21 #include "llvm/Support/MathExtras.h"
23 #include "llvm/Support/Debug.h"
27 #define DEBUG_TYPE "affine-loop-analysis"
35 class DirectedOpGraph {
39 assert(!hasNode(op) &&
"node already added");
40 nodes.emplace_back(op);
47 assert(hasNode(src) &&
"src node does not exist in graph");
48 assert(hasNode(dest) &&
"dest node does not exist in graph");
49 edges[src].push_back(getNode(dest));
53 bool hasCycle() {
return dfs(
true); }
56 for (
auto &en : edges) {
57 llvm::dbgs() << *en.first <<
" (" << en.first <<
")"
58 <<
" has " << en.second.size() <<
" edges:\n";
59 for (
auto *node : en.second) {
60 llvm::dbgs() <<
'\t' << *node->op <<
'\n';
87 llvm::find_if(nodes, [&](
const DGNode &node) {
return node.op == op; });
88 assert(value != nodes.end() &&
"node doesn't exist in graph");
94 return llvm::find_if(nodes, [&](
const DGNode &node) {
95 return node.op == key;
104 bool dfs(
bool cycleCheck =
false) {
105 for (DGNode &node : nodes) {
111 for (DGNode &node : nodes) {
113 bool ret = dfsNode(node, cycleCheck, time);
115 if (cycleCheck && ret)
117 }
else if (cycleCheck && node.fn == -1) {
128 bool dfsNode(DGNode &node,
bool cycleCheck,
unsigned &time)
const {
129 auto nodeEdges = edges.find(node.op);
130 assert(nodeEdges != edges.end() &&
"missing node in graph");
133 for (
auto &neighbour : nodeEdges->second) {
134 if (neighbour->vn == 0) {
135 bool ret = dfsNode(*neighbour, cycleCheck, time);
136 if (cycleCheck && ret)
138 }
else if (cycleCheck && neighbour->fn == -1) {
166 AffineForOp forOp,
AffineMap *tripCountMap,
169 int64_t step = forOp.getStepAsInt();
171 if (forOp.hasConstantBounds()) {
172 int64_t lb = forOp.getConstantLowerBound();
173 int64_t ub = forOp.getConstantUpperBound();
178 llvm::divideCeilSigned(loopSpan, step), context);
179 tripCountOperands->clear();
182 auto lbMap = forOp.getLowerBoundMap();
183 auto ubMap = forOp.getUpperBoundMap();
184 if (lbMap.getNumResults() != 1) {
196 auto lbMapSplat =
AffineMap::get(lbMap.getNumDims(), lbMap.getNumSymbols(),
197 lbSplatExpr, context);
198 AffineValueMap lbSplatValueMap(lbMapSplat, forOp.getLowerBoundOperands());
202 for (
unsigned i = 0, e = tripCountValueMap.
getNumResults(); i < e; ++i)
207 tripCountOperands->assign(tripCountValueMap.
getOperands().begin(),
224 std::optional<uint64_t> tripCount;
226 if (
auto constExpr = dyn_cast<AffineConstantExpr>(resultExpr)) {
227 if (tripCount.has_value())
229 std::min(*tripCount,
static_cast<uint64_t
>(constExpr.getValue()));
231 tripCount = constExpr.getValue();
252 assert(map.
getNumResults() >= 1 &&
"expected one or more results");
253 std::optional<uint64_t> gcd;
256 if (
auto constExpr = dyn_cast<AffineConstantExpr>(resultExpr)) {
257 uint64_t tripCount = constExpr.getValue();
266 thisGcd = resultExpr.getLargestKnownDivisor();
269 gcd = std::gcd(*gcd, thisGcd);
273 assert(gcd.has_value() &&
"value expected per above logic");
283 assert(isa<IndexType>(index.
getType()) &&
"index must be of 'index' type");
292 template <
typename LoadOrStoreOp>
296 return !llvm::is_contained(avm.
getOperands(), forOp.getInductionVar());
310 for (
Value index : indices) {
318 template <
typename LoadOrStoreOp>
321 static_assert(llvm::is_one_of<LoadOrStoreOp, AffineReadOpInterface,
322 AffineWriteOpInterface>::value,
323 "Must be called on either an affine read or write op");
324 assert(memRefDim &&
"memRefDim == nullptr");
325 auto memRefType = memoryOp.getMemRefType();
327 if (!memRefType.getLayout().isIdentity())
328 return memoryOp.emitError(
"NYI: non-trivial layout map"),
false;
330 int uniqueVaryingIndexAlongIv = -1;
331 auto accessMap = memoryOp.getAffineMap();
333 unsigned numDims = accessMap.getNumDims();
334 for (
unsigned i = 0, e = memRefType.getRank(); i < e; ++i) {
337 auto resultExpr = accessMap.getResult(i);
339 if (
auto dimExpr = dyn_cast<AffineDimExpr>(expr))
340 exprOperands.push_back(mapOperands[dimExpr.getPosition()]);
341 else if (
auto symExpr = dyn_cast<AffineSymbolExpr>(expr))
342 exprOperands.push_back(mapOperands[numDims + symExpr.getPosition()]);
345 for (
Value exprOperand : exprOperands) {
347 if (uniqueVaryingIndexAlongIv != -1) {
351 uniqueVaryingIndexAlongIv = i;
356 if (uniqueVaryingIndexAlongIv == -1)
359 *memRefDim = memRefType.getRank() - (uniqueVaryingIndexAlongIv + 1);
364 AffineReadOpInterface loadOp,
367 AffineWriteOpInterface loadOp,
370 template <
typename LoadOrStoreOp>
372 auto memRefType = memoryOp.getMemRefType();
373 return isa<VectorType>(memRefType.getElementType());
382 auto *forOp = loop.getOperation();
387 conditionals.match(forOp, &conditionalsMatched);
388 if (!conditionalsMatched.empty()) {
396 if (MemRefType t = dyn_cast<MemRefType>(type))
397 return !VectorType::isValidElementType(t.getElementType());
398 return !VectorType::isValidElementType(type);
401 return !llvm::all_of(op.
getResultTypes(), VectorType::isValidElementType);
404 types.match(forOp, &opsMatched);
405 if (!opsMatched.empty()) {
411 return op.
getNumRegions() != 0 && !isa<AffineIfOp, AffineForOp>(op);
414 regions.match(forOp, ®ionsMatched);
415 if (!regionsMatched.empty()) {
420 vectorTransferMatcher.
match(forOp, &vectorTransfersMatched);
421 if (!vectorTransfersMatched.empty()) {
427 loadAndStores.match(forOp, &loadAndStoresMatched);
428 for (
auto ls : loadAndStoresMatched) {
429 auto *op = ls.getMatchedOperation();
430 auto load = dyn_cast<AffineLoadOp>(op);
431 auto store = dyn_cast<AffineStoreOp>(op);
439 if (isVectorizableOp && !isVectorizableOp(loop, *op)) {
447 AffineForOp loop,
int *memRefDim,
NestedPattern &vectorTransferMatcher) {
450 auto load = dyn_cast<AffineLoadOp>(op);
451 auto store = dyn_cast<AffineStoreOp>(op);
452 int thisOpMemRefDim = -1;
455 cast<AffineReadOpInterface>(*load),
458 cast<AffineWriteOpInterface>(*store),
460 if (thisOpMemRefDim != -1) {
463 if (*memRefDim != -1 && *memRefDim != thisOpMemRefDim)
465 *memRefDim = thisOpMemRefDim;
484 auto *forBody = forOp.getBody();
485 assert(shifts.size() == forBody->getOperations().size());
490 for (
const auto &it :
492 auto &op = it.value();
496 size_t index = shifts.size() - it.index() - 1;
499 uint64_t shift = shifts[index];
500 forBodyShift.try_emplace(&op, shift);
503 for (
unsigned i = 0, e = op.getNumResults(); i < e; ++i) {
504 Value result = op.getResult(i);
505 for (
auto *user : result.
getUsers()) {
508 if (
auto *ancOp = forBody->findAncestorOpInBlock(*user)) {
509 assert(forBodyShift.count(ancOp) > 0 &&
"ancestor expected in map");
510 if (shift != forBodyShift[ancOp])
520 assert(!loops.empty() &&
"no original loops provided");
525 if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
526 loadAndStoreOps.push_back(op);
529 unsigned numOps = loadAndStoreOps.size();
530 unsigned numLoops = loops.size();
531 for (
unsigned d = 1; d <= numLoops + 1; ++d) {
532 for (
unsigned i = 0; i < numOps; ++i) {
535 for (
unsigned j = 0;
j < numOps; ++
j) {
541 srcAccess, dstAccess, d,
nullptr,
551 LLVM_DEBUG(llvm::dbgs() <<
"Checking whether tiling legality violated "
552 "for dependence at depth: "
553 << Twine(d) <<
" between:\n";);
557 if (depComp.lb.has_value() && depComp.ub.has_value() &&
558 *depComp.lb < *depComp.ub && *depComp.ub < 0) {
559 LLVM_DEBUG(llvm::dbgs()
560 <<
"Dependence component lb = " << Twine(*depComp.lb)
561 <<
" ub = " << Twine(*depComp.ub)
562 <<
" is negative at depth: " << Twine(d)
563 <<
" and thus violates the legality rule.\n");
577 DirectedOpGraph graph;
580 if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) {
581 accesses.emplace_back(op);
588 for (
const auto &accA : accesses) {
589 for (
const auto &accB : accesses) {
590 if (accA.memref != accB.memref)
593 unsigned numCommonLoops =
595 for (
unsigned d = rootDepth + 1; d <= numCommonLoops + 1; ++d) {
597 graph.addEdge(accA.opInst, accB.opInst);
601 return graph.hasCycle();
static bool isVectorizableLoopBodyWithOpCond(AffineForOp loop, const VectorizableOpFun &isVectorizableOp, NestedPattern &vectorTransferMatcher)
std::function< bool(AffineForOp, Operation &)> VectorizableOpFun
static bool isAccessIndexInvariant(Value iv, Value index)
Given an affine.for iv and an access index of type index, returns true if index is independent of iv ...
static bool isVectorElement(LoadOrStoreOp memoryOp)
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Base type for affine expression.
AffineExpr ceilDiv(uint64_t v) const
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
ArrayRef< AffineExpr > getResults() const
unsigned getNumResults() const
static AffineMap getConstantMap(int64_t val, MLIRContext *context)
Returns a single constant result affine map.
MLIRContext is the top-level object for a collection of MLIR operations.
Operation is the basic unit of execution within MLIR.
unsigned getNumRegions()
Returns the number of regions held by this operation.
operand_type_range getOperandTypes()
result_type_range getResultTypes()
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
MLIRContext * getContext() const
Utility to get the associated MLIRContext that this value is defined in.
Type getType() const
Return the type of this value.
user_range getUsers() const
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
void composeSimplifyAndCanonicalize()
Composes all incoming affine.apply ops and then simplifies and canonicalizes the map and operands.
ArrayRef< Value > getOperands() const
AffineExpr getResult(unsigned i)
AffineMap getAffineMap() const
bool isFunctionOf(unsigned idx, Value value) const
Return true if the idx^th result depends on 'value', false otherwise.
void setResult(unsigned i, AffineExpr e)
unsigned getNumResults() const
static void difference(const AffineValueMap &a, const AffineValueMap &b, AffineValueMap *res)
Return the value map that is the difference of value maps 'a' and 'b', represented as an affine map a...
void match(Operation *op, SmallVectorImpl< NestedMatch > *matches)
Returns all the top-level matches in op.
NestedPattern If(const NestedPattern &child)
bool isLoadOrStore(Operation &op)
NestedPattern Op(FilterFunctionType filter=defaultFilterFunction)
std::optional< uint64_t > getConstantTripCount(AffineForOp forOp)
Returns the trip count of the loop if it's a constant, std::nullopt otherwise.
bool isTilingValid(ArrayRef< AffineForOp > loops)
Checks whether hyper-rectangular loop tiling of the nest represented by loops is valid.
bool isVectorizableLoopBody(AffineForOp loop, NestedPattern &vectorTransferMatcher)
Checks whether the loop is structurally vectorizable; i.e.
unsigned getNumCommonSurroundingLoops(Operation &a, Operation &b)
Returns the number of surrounding loops common to both A and B.
DenseSet< Value, DenseMapInfo< Value > > getInvariantAccesses(Value iv, ArrayRef< Value > indices)
Given an induction variable iv of type AffineForOp and indices of type IndexType, returns the set of ...
void getTripCountMapAndOperands(AffineForOp forOp, AffineMap *map, SmallVectorImpl< Value > *operands)
Returns the trip count of the loop as an affine map with its corresponding operands if the latter is ...
bool isInvariantAccess(LoadOrStoreOp memOp, AffineForOp forOp)
Checks if an affine read or write operation depends on forOp's IV, i.e., if the memory access is inva...
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints=nullptr, SmallVector< DependenceComponent, 2 > *dependenceComponents=nullptr, bool allowRAR=false)
bool isAffineForInductionVar(Value val)
Returns true if the provided value is the induction variable of an AffineForOp.
uint64_t getLargestDivisorOfTripCount(AffineForOp forOp)
Returns the greatest known integral divisor of the trip count.
bool isContiguousAccess(Value iv, LoadOrStoreOp memoryOp, int *memRefDim)
Given:
bool hasDependence(DependenceResult result)
Utility function that returns true if the provided DependenceResult corresponds to a dependence resul...
unsigned getNestingDepth(Operation *op)
Returns the nesting depth of this operation, i.e., the number of loops surrounding this operation.
bool isOpwiseShiftValid(AffineForOp forOp, ArrayRef< uint64_t > shifts)
Checks where SSA dominance would be violated if a for op's body operations are shifted by the specifi...
bool hasCyclicDependence(AffineForOp root)
Returns true if the affine nest rooted at root has a cyclic dependence among its affine memory access...
bool noDependence(DependenceResult result)
Returns true if the provided DependenceResult corresponds to the absence of a dependence.
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Include the generated interface declarations.
Checks whether two accesses to the same memref access the same element.
Encapsulates a memref load or store access information.
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.