21 #include "llvm/Support/MathExtras.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/DebugLog.h"
28 #define DEBUG_TYPE "affine-loop-analysis"
36 class DirectedOpGraph {
40 assert(!hasNode(op) &&
"node already added");
41 nodes.emplace_back(op);
48 assert(hasNode(src) &&
"src node does not exist in graph");
49 assert(hasNode(dest) &&
"dest node does not exist in graph");
50 edges[src].push_back(getNode(dest));
54 bool hasCycle() {
return dfs(
true); }
57 for (
auto &en : edges) {
58 llvm::dbgs() << *en.first <<
" (" << en.first <<
")"
59 <<
" has " << en.second.size() <<
" edges:\n";
60 for (
auto *node : en.second) {
61 llvm::dbgs() <<
'\t' << *node->op <<
'\n';
88 llvm::find_if(nodes, [&](
const DGNode &node) {
return node.op == op; });
89 assert(value != nodes.end() &&
"node doesn't exist in graph");
95 return llvm::find_if(nodes, [&](
const DGNode &node) {
96 return node.op == key;
105 bool dfs(
bool cycleCheck =
false) {
106 for (DGNode &node : nodes) {
112 for (DGNode &node : nodes) {
114 bool ret = dfsNode(node, cycleCheck, time);
116 if (cycleCheck && ret)
118 }
else if (cycleCheck && node.fn == -1) {
129 bool dfsNode(DGNode &node,
bool cycleCheck,
unsigned &time)
const {
130 auto nodeEdges = edges.find(node.op);
131 assert(nodeEdges != edges.end() &&
"missing node in graph");
134 for (
auto &neighbour : nodeEdges->second) {
135 if (neighbour->vn == 0) {
136 bool ret = dfsNode(*neighbour, cycleCheck, time);
137 if (cycleCheck && ret)
139 }
else if (cycleCheck && neighbour->fn == -1) {
167 AffineForOp forOp,
AffineMap *tripCountMap,
170 int64_t step = forOp.getStepAsInt();
172 if (forOp.hasConstantBounds()) {
173 int64_t lb = forOp.getConstantLowerBound();
174 int64_t ub = forOp.getConstantUpperBound();
179 llvm::divideCeilSigned(loopSpan, step), context);
180 tripCountOperands->clear();
183 auto lbMap = forOp.getLowerBoundMap();
184 auto ubMap = forOp.getUpperBoundMap();
185 if (lbMap.getNumResults() != 1) {
197 auto lbMapSplat =
AffineMap::get(lbMap.getNumDims(), lbMap.getNumSymbols(),
198 lbSplatExpr, context);
199 AffineValueMap lbSplatValueMap(lbMapSplat, forOp.getLowerBoundOperands());
203 for (
unsigned i = 0, e = tripCountValueMap.
getNumResults(); i < e; ++i)
208 tripCountOperands->assign(tripCountValueMap.
getOperands().begin(),
225 std::optional<uint64_t> tripCount;
227 if (
auto constExpr = dyn_cast<AffineConstantExpr>(resultExpr)) {
228 if (tripCount.has_value())
230 std::min(*tripCount,
static_cast<uint64_t
>(constExpr.getValue()));
232 tripCount = constExpr.getValue();
253 assert(map.
getNumResults() >= 1 &&
"expected one or more results");
254 std::optional<uint64_t> gcd;
257 if (
auto constExpr = dyn_cast<AffineConstantExpr>(resultExpr)) {
258 uint64_t tripCount = constExpr.getValue();
267 thisGcd = resultExpr.getLargestKnownDivisor();
270 gcd = std::gcd(*gcd, thisGcd);
274 assert(gcd.has_value() &&
"value expected per above logic");
284 assert(isa<IndexType>(index.
getType()) &&
"index must be of 'index' type");
293 template <
typename LoadOrStoreOp>
297 return !llvm::is_contained(avm.
getOperands(), forOp.getInductionVar());
311 for (
Value index : indices) {
319 template <
typename LoadOrStoreOp>
322 static_assert(llvm::is_one_of<LoadOrStoreOp, AffineReadOpInterface,
323 AffineWriteOpInterface>::value,
324 "Must be called on either an affine read or write op");
325 assert(memRefDim &&
"memRefDim == nullptr");
326 auto memRefType = memoryOp.getMemRefType();
328 if (!memRefType.getLayout().isIdentity())
329 return memoryOp.emitError(
"NYI: non-trivial layout map"),
false;
331 int uniqueVaryingIndexAlongIv = -1;
332 auto accessMap = memoryOp.getAffineMap();
334 unsigned numDims = accessMap.getNumDims();
335 for (
unsigned i = 0, e = memRefType.getRank(); i < e; ++i) {
338 auto resultExpr = accessMap.getResult(i);
340 if (
auto dimExpr = dyn_cast<AffineDimExpr>(expr))
341 exprOperands.push_back(mapOperands[dimExpr.getPosition()]);
342 else if (
auto symExpr = dyn_cast<AffineSymbolExpr>(expr))
343 exprOperands.push_back(mapOperands[numDims + symExpr.getPosition()]);
346 for (
Value exprOperand : exprOperands) {
348 if (uniqueVaryingIndexAlongIv != -1) {
352 uniqueVaryingIndexAlongIv = i;
357 if (uniqueVaryingIndexAlongIv == -1)
360 *memRefDim = memRefType.getRank() - (uniqueVaryingIndexAlongIv + 1);
365 AffineReadOpInterface loadOp,
368 AffineWriteOpInterface loadOp,
371 template <
typename LoadOrStoreOp>
373 auto memRefType = memoryOp.getMemRefType();
374 return isa<VectorType>(memRefType.getElementType());
383 auto *forOp = loop.getOperation();
388 conditionals.match(forOp, &conditionalsMatched);
389 if (!conditionalsMatched.empty()) {
397 if (MemRefType t = dyn_cast<MemRefType>(type))
398 return !VectorType::isValidElementType(t.getElementType());
399 return !VectorType::isValidElementType(type);
402 return !llvm::all_of(op.
getResultTypes(), VectorType::isValidElementType);
405 types.match(forOp, &opsMatched);
406 if (!opsMatched.empty()) {
412 return op.
getNumRegions() != 0 && !isa<AffineIfOp, AffineForOp>(op);
415 regions.match(forOp, ®ionsMatched);
416 if (!regionsMatched.empty()) {
421 vectorTransferMatcher.
match(forOp, &vectorTransfersMatched);
422 if (!vectorTransfersMatched.empty()) {
428 loadAndStores.match(forOp, &loadAndStoresMatched);
429 for (
auto ls : loadAndStoresMatched) {
430 auto *op = ls.getMatchedOperation();
431 auto load = dyn_cast<AffineLoadOp>(op);
432 auto store = dyn_cast<AffineStoreOp>(op);
440 if (isVectorizableOp && !isVectorizableOp(loop, *op)) {
448 AffineForOp loop,
int *memRefDim,
NestedPattern &vectorTransferMatcher) {
451 auto load = dyn_cast<AffineLoadOp>(op);
452 auto store = dyn_cast<AffineStoreOp>(op);
453 int thisOpMemRefDim = -1;
456 cast<AffineReadOpInterface>(*load),
459 cast<AffineWriteOpInterface>(*store),
461 if (thisOpMemRefDim != -1) {
464 if (*memRefDim != -1 && *memRefDim != thisOpMemRefDim)
466 *memRefDim = thisOpMemRefDim;
485 auto *forBody = forOp.getBody();
486 assert(shifts.size() == forBody->getOperations().size());
491 for (
const auto &it :
493 auto &op = it.value();
497 size_t index = shifts.size() - it.index() - 1;
500 uint64_t shift = shifts[index];
501 forBodyShift.try_emplace(&op, shift);
504 for (
unsigned i = 0, e = op.getNumResults(); i < e; ++i) {
505 Value result = op.getResult(i);
506 for (
auto *user : result.
getUsers()) {
509 if (
auto *ancOp = forBody->findAncestorOpInBlock(*user)) {
510 assert(forBodyShift.count(ancOp) > 0 &&
"ancestor expected in map");
511 if (shift != forBodyShift[ancOp])
521 assert(!loops.empty() &&
"no original loops provided");
526 if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
527 loadAndStoreOps.push_back(op);
530 unsigned numOps = loadAndStoreOps.size();
531 unsigned numLoops = loops.size();
532 for (
unsigned d = 1; d <= numLoops + 1; ++d) {
533 for (
unsigned i = 0; i < numOps; ++i) {
536 for (
unsigned j = 0;
j < numOps; ++
j) {
542 srcAccess, dstAccess, d,
nullptr,
552 LDBG() <<
"Checking whether tiling legality violated "
553 <<
"for dependence at depth: " << Twine(d) <<
" between:"
559 if (depComp.lb.has_value() && depComp.ub.has_value() &&
560 *depComp.lb < *depComp.ub && *depComp.ub < 0) {
561 LDBG() <<
"Dependence component lb = " << Twine(*depComp.lb)
562 <<
" ub = " << Twine(*depComp.ub)
563 <<
" is negative at depth: " << Twine(d)
564 <<
" and thus violates the legality rule.";
578 DirectedOpGraph graph;
581 if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) {
582 accesses.emplace_back(op);
589 for (
const auto &accA : accesses) {
590 for (
const auto &accB : accesses) {
591 if (accA.memref != accB.memref)
594 unsigned numCommonLoops =
596 for (
unsigned d = rootDepth + 1; d <= numCommonLoops + 1; ++d) {
598 graph.addEdge(accA.opInst, accB.opInst);
602 return graph.hasCycle();
static bool isVectorizableLoopBodyWithOpCond(AffineForOp loop, const VectorizableOpFun &isVectorizableOp, NestedPattern &vectorTransferMatcher)
std::function< bool(AffineForOp, Operation &)> VectorizableOpFun
static bool isAccessIndexInvariant(Value iv, Value index)
Given an affine.for iv and an access index of type index, returns true if index is independent of iv ...
static bool isVectorElement(LoadOrStoreOp memoryOp)
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Base type for affine expression.
AffineExpr ceilDiv(uint64_t v) const
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
ArrayRef< AffineExpr > getResults() const
unsigned getNumResults() const
static AffineMap getConstantMap(int64_t val, MLIRContext *context)
Returns a single constant result affine map.
MLIRContext is the top-level object for a collection of MLIR operations.
Set of flags used to control the behavior of the various IR print methods (e.g.
A wrapper class that allows for printing an operation with a set of flags, useful to act as a "stream...
Operation is the basic unit of execution within MLIR.
unsigned getNumRegions()
Returns the number of regions held by this operation.
operand_type_range getOperandTypes()
result_type_range getResultTypes()
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
MLIRContext * getContext() const
Utility to get the associated MLIRContext that this value is defined in.
Type getType() const
Return the type of this value.
user_range getUsers() const
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
void composeSimplifyAndCanonicalize()
Composes all incoming affine.apply ops and then simplifies and canonicalizes the map and operands.
ArrayRef< Value > getOperands() const
AffineExpr getResult(unsigned i)
AffineMap getAffineMap() const
bool isFunctionOf(unsigned idx, Value value) const
Return true if the idx^th result depends on 'value', false otherwise.
void setResult(unsigned i, AffineExpr e)
unsigned getNumResults() const
static void difference(const AffineValueMap &a, const AffineValueMap &b, AffineValueMap *res)
Return the value map that is the difference of value maps 'a' and 'b', represented as an affine map a...
void match(Operation *op, SmallVectorImpl< NestedMatch > *matches)
Returns all the top-level matches in op.
NestedPattern If(const NestedPattern &child)
bool isLoadOrStore(Operation &op)
NestedPattern Op(FilterFunctionType filter=defaultFilterFunction)
std::optional< uint64_t > getConstantTripCount(AffineForOp forOp)
Returns the trip count of the loop if it's a constant, std::nullopt otherwise.
bool isTilingValid(ArrayRef< AffineForOp > loops)
Checks whether hyper-rectangular loop tiling of the nest represented by loops is valid.
bool isVectorizableLoopBody(AffineForOp loop, NestedPattern &vectorTransferMatcher)
Checks whether the loop is structurally vectorizable; i.e.
unsigned getNumCommonSurroundingLoops(Operation &a, Operation &b)
Returns the number of surrounding loops common to both A and B.
DenseSet< Value, DenseMapInfo< Value > > getInvariantAccesses(Value iv, ArrayRef< Value > indices)
Given an induction variable iv of type AffineForOp and indices of type IndexType, returns the set of ...
void getTripCountMapAndOperands(AffineForOp forOp, AffineMap *map, SmallVectorImpl< Value > *operands)
Returns the trip count of the loop as an affine map with its corresponding operands if the latter is ...
bool isInvariantAccess(LoadOrStoreOp memOp, AffineForOp forOp)
Checks if an affine read or write operation depends on forOp's IV, i.e., if the memory access is inva...
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints=nullptr, SmallVector< DependenceComponent, 2 > *dependenceComponents=nullptr, bool allowRAR=false)
bool isAffineForInductionVar(Value val)
Returns true if the provided value is the induction variable of an AffineForOp.
uint64_t getLargestDivisorOfTripCount(AffineForOp forOp)
Returns the greatest known integral divisor of the trip count.
bool isContiguousAccess(Value iv, LoadOrStoreOp memoryOp, int *memRefDim)
Given:
bool hasDependence(DependenceResult result)
Utility function that returns true if the provided DependenceResult corresponds to a dependence resul...
unsigned getNestingDepth(Operation *op)
Returns the nesting depth of this operation, i.e., the number of loops surrounding this operation.
bool isOpwiseShiftValid(AffineForOp forOp, ArrayRef< uint64_t > shifts)
Checks where SSA dominance would be violated if a for op's body operations are shifted by the specifi...
bool hasCyclicDependence(AffineForOp root)
Returns true if the affine nest rooted at root has a cyclic dependence among its affine memory access...
bool noDependence(DependenceResult result)
Returns true if the provided DependenceResult corresponds to the absence of a dependence.
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Include the generated interface declarations.
Checks whether two accesses to the same memref access the same element.
Encapsulates a memref load or store access information.
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.