21 #include "llvm/Support/MathExtras.h"
23 #include "llvm/ADT/DenseSet.h"
24 #include "llvm/ADT/SmallPtrSet.h"
25 #include "llvm/ADT/SmallString.h"
26 #include "llvm/Support/Debug.h"
29 #include <type_traits>
31 #define DEBUG_TYPE "affine-loop-analysis"
39 class DirectedOpGraph {
43 assert(!hasNode(op) &&
"node already added");
44 nodes.emplace_back(op);
51 assert(hasNode(src) &&
"src node does not exist in graph");
52 assert(hasNode(dest) &&
"dest node does not exist in graph");
53 edges[src].push_back(getNode(dest));
57 bool hasCycle() {
return dfs(
true); }
60 for (
auto &en : edges) {
61 llvm::dbgs() << *en.first <<
" (" << en.first <<
")"
62 <<
" has " << en.second.size() <<
" edges:\n";
63 for (
auto *node : en.second) {
64 llvm::dbgs() <<
'\t' << *node->op <<
'\n';
91 llvm::find_if(nodes, [&](
const DGNode &node) {
return node.op == op; });
92 assert(value != nodes.end() &&
"node doesn't exist in graph");
98 return llvm::find_if(nodes, [&](
const DGNode &node) {
99 return node.op == key;
108 bool dfs(
bool cycleCheck =
false) {
109 for (DGNode &node : nodes) {
115 for (DGNode &node : nodes) {
117 bool ret = dfsNode(node, cycleCheck, time);
119 if (cycleCheck && ret)
121 }
else if (cycleCheck && node.fn == -1) {
132 bool dfsNode(DGNode &node,
bool cycleCheck,
unsigned &time)
const {
133 auto nodeEdges = edges.find(node.op);
134 assert(nodeEdges != edges.end() &&
"missing node in graph");
137 for (
auto &neighbour : nodeEdges->second) {
138 if (neighbour->vn == 0) {
139 bool ret = dfsNode(*neighbour, cycleCheck, time);
140 if (cycleCheck && ret)
142 }
else if (cycleCheck && neighbour->fn == -1) {
170 AffineForOp forOp,
AffineMap *tripCountMap,
173 int64_t step = forOp.getStepAsInt();
175 if (forOp.hasConstantBounds()) {
176 int64_t lb = forOp.getConstantLowerBound();
177 int64_t ub = forOp.getConstantUpperBound();
182 llvm::divideCeilSigned(loopSpan, step), context);
183 tripCountOperands->clear();
186 auto lbMap = forOp.getLowerBoundMap();
187 auto ubMap = forOp.getUpperBoundMap();
188 if (lbMap.getNumResults() != 1) {
200 auto lbMapSplat =
AffineMap::get(lbMap.getNumDims(), lbMap.getNumSymbols(),
201 lbSplatExpr, context);
202 AffineValueMap lbSplatValueMap(lbMapSplat, forOp.getLowerBoundOperands());
206 for (
unsigned i = 0, e = tripCountValueMap.
getNumResults(); i < e; ++i)
211 tripCountOperands->assign(tripCountValueMap.
getOperands().begin(),
228 std::optional<uint64_t> tripCount;
230 if (
auto constExpr = dyn_cast<AffineConstantExpr>(resultExpr)) {
231 if (tripCount.has_value())
233 std::min(*tripCount,
static_cast<uint64_t
>(constExpr.getValue()));
235 tripCount = constExpr.getValue();
256 assert(map.
getNumResults() >= 1 &&
"expected one or more results");
257 std::optional<uint64_t> gcd;
260 if (
auto constExpr = dyn_cast<AffineConstantExpr>(resultExpr)) {
261 uint64_t tripCount = constExpr.getValue();
270 thisGcd = resultExpr.getLargestKnownDivisor();
273 gcd = std::gcd(*gcd, thisGcd);
277 assert(gcd.has_value() &&
"value expected per above logic");
287 assert(isa<IndexType>(index.
getType()) &&
"index must be of 'index' type");
296 template <
typename LoadOrStoreOp>
300 return !llvm::is_contained(avm.
getOperands(), forOp.getInductionVar());
314 for (
Value index : indices) {
322 template <
typename LoadOrStoreOp>
325 static_assert(llvm::is_one_of<LoadOrStoreOp, AffineReadOpInterface,
326 AffineWriteOpInterface>::value,
327 "Must be called on either an affine read or write op");
328 assert(memRefDim &&
"memRefDim == nullptr");
329 auto memRefType = memoryOp.getMemRefType();
331 if (!memRefType.getLayout().isIdentity())
332 return memoryOp.emitError(
"NYI: non-trivial layout map"),
false;
334 int uniqueVaryingIndexAlongIv = -1;
335 auto accessMap = memoryOp.getAffineMap();
337 unsigned numDims = accessMap.getNumDims();
338 for (
unsigned i = 0, e = memRefType.getRank(); i < e; ++i) {
341 auto resultExpr = accessMap.getResult(i);
343 if (
auto dimExpr = dyn_cast<AffineDimExpr>(expr))
344 exprOperands.push_back(mapOperands[dimExpr.getPosition()]);
345 else if (
auto symExpr = dyn_cast<AffineSymbolExpr>(expr))
346 exprOperands.push_back(mapOperands[numDims + symExpr.getPosition()]);
349 for (
Value exprOperand : exprOperands) {
351 if (uniqueVaryingIndexAlongIv != -1) {
355 uniqueVaryingIndexAlongIv = i;
360 if (uniqueVaryingIndexAlongIv == -1)
363 *memRefDim = memRefType.getRank() - (uniqueVaryingIndexAlongIv + 1);
368 AffineReadOpInterface loadOp,
371 AffineWriteOpInterface loadOp,
374 template <
typename LoadOrStoreOp>
376 auto memRefType = memoryOp.getMemRefType();
377 return isa<VectorType>(memRefType.getElementType());
386 auto *forOp = loop.getOperation();
391 conditionals.match(forOp, &conditionalsMatched);
392 if (!conditionalsMatched.empty()) {
400 if (MemRefType t = dyn_cast<MemRefType>(type))
401 return !VectorType::isValidElementType(t.getElementType());
402 return !VectorType::isValidElementType(type);
406 return !VectorType::isValidElementType(type);
410 types.match(forOp, &opsMatched);
411 if (!opsMatched.empty()) {
417 return op.
getNumRegions() != 0 && !isa<AffineIfOp, AffineForOp>(op);
420 regions.match(forOp, ®ionsMatched);
421 if (!regionsMatched.empty()) {
426 vectorTransferMatcher.
match(forOp, &vectorTransfersMatched);
427 if (!vectorTransfersMatched.empty()) {
433 loadAndStores.match(forOp, &loadAndStoresMatched);
434 for (
auto ls : loadAndStoresMatched) {
435 auto *op = ls.getMatchedOperation();
436 auto load = dyn_cast<AffineLoadOp>(op);
437 auto store = dyn_cast<AffineStoreOp>(op);
445 if (isVectorizableOp && !isVectorizableOp(loop, *op)) {
453 AffineForOp loop,
int *memRefDim,
NestedPattern &vectorTransferMatcher) {
456 auto load = dyn_cast<AffineLoadOp>(op);
457 auto store = dyn_cast<AffineStoreOp>(op);
458 int thisOpMemRefDim = -1;
461 cast<AffineReadOpInterface>(*load),
464 cast<AffineWriteOpInterface>(*store),
466 if (thisOpMemRefDim != -1) {
469 if (*memRefDim != -1 && *memRefDim != thisOpMemRefDim)
471 *memRefDim = thisOpMemRefDim;
490 auto *forBody = forOp.getBody();
491 assert(shifts.size() == forBody->getOperations().size());
496 for (
const auto &it :
498 auto &op = it.value();
502 size_t index = shifts.size() - it.index() - 1;
505 uint64_t shift = shifts[index];
506 forBodyShift.try_emplace(&op, shift);
509 for (
unsigned i = 0, e = op.getNumResults(); i < e; ++i) {
510 Value result = op.getResult(i);
511 for (
auto *user : result.
getUsers()) {
514 if (
auto *ancOp = forBody->findAncestorOpInBlock(*user)) {
515 assert(forBodyShift.count(ancOp) > 0 &&
"ancestor expected in map");
516 if (shift != forBodyShift[ancOp])
526 assert(!loops.empty() &&
"no original loops provided");
531 if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
532 loadAndStoreOps.push_back(op);
535 unsigned numOps = loadAndStoreOps.size();
536 unsigned numLoops = loops.size();
537 for (
unsigned d = 1; d <= numLoops + 1; ++d) {
538 for (
unsigned i = 0; i < numOps; ++i) {
541 for (
unsigned j = 0;
j < numOps; ++
j) {
547 srcAccess, dstAccess, d,
nullptr,
557 LLVM_DEBUG(llvm::dbgs() <<
"Checking whether tiling legality violated "
558 "for dependence at depth: "
559 << Twine(d) <<
" between:\n";);
563 if (depComp.lb.has_value() && depComp.ub.has_value() &&
564 *depComp.lb < *depComp.ub && *depComp.ub < 0) {
565 LLVM_DEBUG(llvm::dbgs()
566 <<
"Dependence component lb = " << Twine(*depComp.lb)
567 <<
" ub = " << Twine(*depComp.ub)
568 <<
" is negative at depth: " << Twine(d)
569 <<
" and thus violates the legality rule.\n");
583 DirectedOpGraph graph;
586 if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) {
587 accesses.emplace_back(op);
594 for (
const auto &accA : accesses) {
595 for (
const auto &accB : accesses) {
596 if (accA.memref != accB.memref)
599 unsigned numCommonLoops =
601 for (
unsigned d = rootDepth + 1; d <= numCommonLoops + 1; ++d) {
603 graph.addEdge(accA.opInst, accB.opInst);
607 return graph.hasCycle();
static bool isVectorizableLoopBodyWithOpCond(AffineForOp loop, const VectorizableOpFun &isVectorizableOp, NestedPattern &vectorTransferMatcher)
std::function< bool(AffineForOp, Operation &)> VectorizableOpFun
static bool isAccessIndexInvariant(Value iv, Value index)
Given an affine.for iv and an access index of type index, returns true if index is independent of iv ...
static bool isVectorElement(LoadOrStoreOp memoryOp)
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Base type for affine expression.
AffineExpr ceilDiv(uint64_t v) const
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
ArrayRef< AffineExpr > getResults() const
unsigned getNumResults() const
static AffineMap getConstantMap(int64_t val, MLIRContext *context)
Returns a single constant result affine map.
MLIRContext is the top-level object for a collection of MLIR operations.
Operation is the basic unit of execution within MLIR.
unsigned getNumRegions()
Returns the number of regions held by this operation.
operand_type_range getOperandTypes()
result_type_range getResultTypes()
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
MLIRContext * getContext() const
Utility to get the associated MLIRContext that this value is defined in.
Type getType() const
Return the type of this value.
user_range getUsers() const
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
void composeSimplifyAndCanonicalize()
Composes all incoming affine.apply ops and then simplifies and canonicalizes the map and operands.
ArrayRef< Value > getOperands() const
AffineExpr getResult(unsigned i)
AffineMap getAffineMap() const
bool isFunctionOf(unsigned idx, Value value) const
Return true if the idx^th result depends on 'value', false otherwise.
void setResult(unsigned i, AffineExpr e)
unsigned getNumResults() const
static void difference(const AffineValueMap &a, const AffineValueMap &b, AffineValueMap *res)
Return the value map that is the difference of value maps 'a' and 'b', represented as an affine map a...
void match(Operation *op, SmallVectorImpl< NestedMatch > *matches)
Returns all the top-level matches in op.
NestedPattern If(const NestedPattern &child)
bool isLoadOrStore(Operation &op)
NestedPattern Op(FilterFunctionType filter=defaultFilterFunction)
std::optional< uint64_t > getConstantTripCount(AffineForOp forOp)
Returns the trip count of the loop if it's a constant, std::nullopt otherwise.
bool isTilingValid(ArrayRef< AffineForOp > loops)
Checks whether hyper-rectangular loop tiling of the nest represented by loops is valid.
bool isVectorizableLoopBody(AffineForOp loop, NestedPattern &vectorTransferMatcher)
Checks whether the loop is structurally vectorizable; i.e.
unsigned getNumCommonSurroundingLoops(Operation &a, Operation &b)
Returns the number of surrounding loops common to both A and B.
DenseSet< Value, DenseMapInfo< Value > > getInvariantAccesses(Value iv, ArrayRef< Value > indices)
Given an induction variable iv of type AffineForOp and indices of type IndexType, returns the set of ...
void getTripCountMapAndOperands(AffineForOp forOp, AffineMap *map, SmallVectorImpl< Value > *operands)
Returns the trip count of the loop as an affine map with its corresponding operands if the latter is ...
bool isInvariantAccess(LoadOrStoreOp memOp, AffineForOp forOp)
Checks if an affine read or write operation depends on forOp's IV, i.e., if the memory access is inva...
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints=nullptr, SmallVector< DependenceComponent, 2 > *dependenceComponents=nullptr, bool allowRAR=false)
bool isAffineForInductionVar(Value val)
Returns true if the provided value is the induction variable of an AffineForOp.
uint64_t getLargestDivisorOfTripCount(AffineForOp forOp)
Returns the greatest known integral divisor of the trip count.
bool isContiguousAccess(Value iv, LoadOrStoreOp memoryOp, int *memRefDim)
Given:
bool hasDependence(DependenceResult result)
Utility function that returns true if the provided DependenceResult corresponds to a dependence resul...
unsigned getNestingDepth(Operation *op)
Returns the nesting depth of this operation, i.e., the number of loops surrounding this operation.
bool isOpwiseShiftValid(AffineForOp forOp, ArrayRef< uint64_t > shifts)
Checks where SSA dominance would be violated if a for op's body operations are shifted by the specifi...
bool hasCyclicDependence(AffineForOp root)
Returns true if the affine nest rooted at root has a cyclic dependence among its affine memory access...
bool noDependence(DependenceResult result)
Returns true if the provided DependenceResult corresponds to the absence of a dependence.
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Include the generated interface declarations.
Checks whether two accesses to the same memref access the same element.
Encapsulates a memref load or store access information.
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.