MLIR  18.0.0git
DataFlowFramework.h
Go to the documentation of this file.
1 //===- DataFlowFramework.h - A generic framework for data-flow analysis ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a generic framework for writing data-flow analysis in MLIR.
10 // The framework consists of a solver, which runs the fixed-point iteration and
11 // manages analysis dependencies, and a data-flow analysis class used to
12 // implement specific analyses.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef MLIR_ANALYSIS_DATAFLOWFRAMEWORK_H
17 #define MLIR_ANALYSIS_DATAFLOWFRAMEWORK_H
18 
19 #include "mlir/IR/Operation.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/Support/Compiler.h"
23 #include "llvm/Support/TypeName.h"
24 #include <queue>
25 
26 namespace mlir {
27 
28 //===----------------------------------------------------------------------===//
29 // ChangeResult
30 //===----------------------------------------------------------------------===//
31 
32 /// A result type used to indicate if a change happened. Boolean operations on
33 /// ChangeResult behave as though `Change` is truthy.
34 enum class ChangeResult {
35  NoChange,
36  Change,
37 };
39  return lhs == ChangeResult::Change ? lhs : rhs;
40 }
42  lhs = lhs | rhs;
43  return lhs;
44 }
46  return lhs == ChangeResult::NoChange ? lhs : rhs;
47 }
48 
49 /// Forward declare the analysis state class.
50 class AnalysisState;
51 
52 //===----------------------------------------------------------------------===//
53 // GenericProgramPoint
54 //===----------------------------------------------------------------------===//
55 
56 /// Abstract class for generic program points. In classical data-flow analysis,
57 /// programs points represent positions in a program to which lattice elements
58 /// are attached. In sparse data-flow analysis, these can be SSA values, and in
59 /// dense data-flow analysis, these are the program points before and after
60 /// every operation.
61 ///
62 /// In the general MLIR data-flow analysis framework, program points are an
63 /// extensible concept. Program points are uniquely identifiable objects to
64 /// which analysis states can be attached. The semantics of program points are
65 /// defined by the analyses that specify their transfer functions.
66 ///
67 /// Program points are implemented using MLIR's storage uniquer framework and
68 /// type ID system to provide RTTI.
70 public:
72 
73  /// Get the abstract program point's type identifier.
74  TypeID getTypeID() const { return typeID; }
75 
76  /// Get a derived source location for the program point.
77  virtual Location getLoc() const = 0;
78 
79  /// Print the program point.
80  virtual void print(raw_ostream &os) const = 0;
81 
82 protected:
83  /// Create an abstract program point with type identifier.
84  explicit GenericProgramPoint(TypeID typeID) : typeID(typeID) {}
85 
86 private:
87  /// The type identifier of the program point.
88  TypeID typeID;
89 };
90 
91 //===----------------------------------------------------------------------===//
92 // GenericProgramPointBase
93 //===----------------------------------------------------------------------===//
94 
95 /// Base class for generic program points based on a concrete program point
96 /// type and a content key. This class defines the common methods required for
97 /// operability with the storage uniquer framework.
98 ///
99 /// The provided key type uniquely identifies the concrete program point
100 /// instance and are the data members of the class.
101 template <typename ConcreteT, typename Value>
103 public:
104  /// The concrete key type used by the storage uniquer. This class is uniqued
105  /// by its contents.
106  using KeyTy = Value;
107  /// Alias for the base class.
109 
110  /// Construct an instance of the program point using the provided value and
111  /// the type ID of the concrete type.
112  template <typename ValueT>
113  explicit GenericProgramPointBase(ValueT &&value)
114  : GenericProgramPoint(TypeID::get<ConcreteT>()),
115  value(std::forward<ValueT>(value)) {}
116 
117  /// Get a uniqued instance of this program point class with the given
118  /// arguments.
119  template <typename... Args>
120  static ConcreteT *get(StorageUniquer &uniquer, Args &&...args) {
121  return uniquer.get<ConcreteT>(/*initFn=*/{}, std::forward<Args>(args)...);
122  }
123 
124  /// Allocate space for a program point and construct it in-place.
125  template <typename ValueT>
126  static ConcreteT *construct(StorageUniquer::StorageAllocator &alloc,
127  ValueT &&value) {
128  return new (alloc.allocate<ConcreteT>())
129  ConcreteT(std::forward<ValueT>(value));
130  }
131 
132  /// Two program points are equal if their values are equal.
133  bool operator==(const Value &value) const { return this->value == value; }
134 
135  /// Provide LLVM-style RTTI using type IDs.
136  static bool classof(const GenericProgramPoint *point) {
137  return point->getTypeID() == TypeID::get<ConcreteT>();
138  }
139 
140  /// Get the contents of the program point.
141  const Value &getValue() const { return value; }
142 
143 private:
144  /// The program point value.
145  Value value;
146 };
147 
148 //===----------------------------------------------------------------------===//
149 // ProgramPoint
150 //===----------------------------------------------------------------------===//
151 
152 /// Fundamental IR components are supported as first-class program points.
154  : public PointerUnion<GenericProgramPoint *, Operation *, Value, Block *> {
155  using ParentTy =
157  /// Inherit constructors.
158  using ParentTy::PointerUnion;
159  /// Allow implicit conversion from the parent type.
160  ProgramPoint(ParentTy point = nullptr) : ParentTy(point) {}
161  /// Allow implicit conversions from operation wrappers.
162  /// TODO: For Windows only. Find a better solution.
163  template <typename OpT, typename = std::enable_if_t<
164  std::is_convertible<OpT, Operation *>::value &&
165  !std::is_same<OpT, Operation *>::value>>
166  ProgramPoint(OpT op) : ParentTy(op) {}
167 
168  /// Print the program point.
169  void print(raw_ostream &os) const;
170 
171  /// Get the source location of the program point.
172  Location getLoc() const;
173 };
174 
175 /// Forward declaration of the data-flow analysis class.
176 class DataFlowAnalysis;
177 
178 //===----------------------------------------------------------------------===//
179 // DataFlowSolver
180 //===----------------------------------------------------------------------===//
181 
182 /// The general data-flow analysis solver. This class is responsible for
183 /// orchestrating child data-flow analyses, running the fixed-point iteration
184 /// algorithm, managing analysis state and program point memory, and tracking
185 /// dependencies between analyses, program points, and analysis states.
186 ///
187 /// Steps to run a data-flow analysis:
188 ///
189 /// 1. Load and initialize children analyses. Children analyses are instantiated
190 /// in the solver and initialized, building their dependency relations.
191 /// 2. Configure and run the analysis. The solver invokes the children analyses
192 /// according to their dependency relations until a fixed point is reached.
193 /// 3. Query analysis state results from the solver.
194 ///
195 /// TODO: Optimize the internal implementation of the solver.
197 public:
198  /// Load an analysis into the solver. Return the analysis instance.
199  template <typename AnalysisT, typename... Args>
200  AnalysisT *load(Args &&...args);
201 
202  /// Initialize the children analyses starting from the provided top-level
203  /// operation and run the analysis until fixpoint.
205 
206  /// Lookup an analysis state for the given program point. Returns null if one
207  /// does not exist.
208  template <typename StateT, typename PointT>
209  const StateT *lookupState(PointT point) const {
210  auto it = analysisStates.find({ProgramPoint(point), TypeID::get<StateT>()});
211  if (it == analysisStates.end())
212  return nullptr;
213  return static_cast<const StateT *>(it->second.get());
214  }
215 
216  /// Get a uniqued program point instance. If one is not present, it is
217  /// created with the provided arguments.
218  template <typename PointT, typename... Args>
219  PointT *getProgramPoint(Args &&...args) {
220  return PointT::get(uniquer, std::forward<Args>(args)...);
221  }
222 
223  /// A work item on the solver queue is a program point, child analysis pair.
224  /// Each item is processed by invoking the child analysis at the program
225  /// point.
226  using WorkItem = std::pair<ProgramPoint, DataFlowAnalysis *>;
227  /// Push a work item onto the worklist.
228  void enqueue(WorkItem item) { worklist.push(std::move(item)); }
229 
230  /// Get the state associated with the given program point. If it does not
231  /// exist, create an uninitialized state.
232  template <typename StateT, typename PointT>
233  StateT *getOrCreateState(PointT point);
234 
235  /// Propagate an update to an analysis state if it changed by pushing
236  /// dependent work items to the back of the queue.
237  void propagateIfChanged(AnalysisState *state, ChangeResult changed);
238 
239 private:
240  /// The solver's work queue. Work items can be inserted to the front of the
241  /// queue to be processed greedily, speeding up computations that otherwise
242  /// quickly degenerate to quadratic due to propagation of state updates.
243  std::queue<WorkItem> worklist;
244 
245  /// Type-erased instances of the children analyses.
247 
248  /// The storage uniquer instance that owns the memory of the allocated program
249  /// points.
250  StorageUniquer uniquer;
251 
252  /// A type-erased map of program points to associated analysis states for
253  /// first-class program points.
254  DenseMap<std::pair<ProgramPoint, TypeID>, std::unique_ptr<AnalysisState>>
255  analysisStates;
256 
257  /// Allow the base child analysis class to access the internals of the solver.
258  friend class DataFlowAnalysis;
259 };
260 
261 //===----------------------------------------------------------------------===//
262 // AnalysisState
263 //===----------------------------------------------------------------------===//
264 
265 /// Base class for generic analysis states. Analysis states contain data-flow
266 /// information that are attached to program points and which evolve as the
267 /// analysis iterates.
268 ///
269 /// This class places no restrictions on the semantics of analysis states beyond
270 /// these requirements.
271 ///
272 /// 1. Querying the state of a program point prior to visiting that point
273 /// results in uninitialized state. Analyses must be aware of unintialized
274 /// states.
275 /// 2. Analysis states can reach fixpoints, where subsequent updates will never
276 /// trigger a change in the state.
277 /// 3. Analysis states that are uninitialized can be forcefully initialized to a
278 /// default value.
280 public:
281  virtual ~AnalysisState();
282 
283  /// Create the analysis state at the given program point.
285 
286  /// Returns the program point this state is located at.
287  ProgramPoint getPoint() const { return point; }
288 
289  /// Print the contents of the analysis state.
290  virtual void print(raw_ostream &os) const = 0;
291  LLVM_DUMP_METHOD void dump() const;
292 
293  /// Add a dependency to this analysis state on a program point and an
294  /// analysis. If this state is updated, the analysis will be invoked on the
295  /// given program point again (in onUpdate()).
296  void addDependency(ProgramPoint dependent, DataFlowAnalysis *analysis);
297 
298 protected:
299  /// This function is called by the solver when the analysis state is updated
300  /// to enqueue more work items. For example, if a state tracks dependents
301  /// through the IR (e.g. use-def chains), this function can be implemented to
302  /// push those dependents on the worklist.
303  virtual void onUpdate(DataFlowSolver *solver) const {
304  for (const DataFlowSolver::WorkItem &item : dependents)
305  solver->enqueue(item);
306  }
307 
308  /// The program point to which the state belongs.
310 
311 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
312  /// When compiling with debugging, keep a name for the analysis state.
313  StringRef debugName;
314 #endif // LLVM_ENABLE_ABI_BREAKING_CHECKS
315 
316 private:
317  /// The dependency relations originating from this analysis state. An entry
318  /// `state -> (analysis, point)` is created when `analysis` queries `state`
319  /// when updating `point`.
320  ///
321  /// When this state is updated, all dependent child analysis invocations are
322  /// pushed to the back of the queue. Use a `SetVector` to keep the analysis
323  /// deterministic.
324  ///
325  /// Store the dependents on the analysis state for efficiency.
327 
328  /// Allow the framework to access the dependents.
329  friend class DataFlowSolver;
330 };
331 
332 //===----------------------------------------------------------------------===//
333 // DataFlowAnalysis
334 //===----------------------------------------------------------------------===//
335 
336 /// Base class for all data-flow analyses. A child analysis is expected to build
337 /// an initial dependency graph (and optionally provide an initial state) when
338 /// initialized and define transfer functions when visiting program points.
339 ///
340 /// In classical data-flow analysis, the dependency graph is fixed and analyses
341 /// define explicit transfer functions between input states and output states.
342 /// In this framework, however, the dependency graph can change during the
343 /// analysis, and transfer functions are opaque such that the solver doesn't
344 /// know what states calling `visit` on an analysis will be updated. This allows
345 /// multiple analyses to plug in and provide values for the same state.
346 ///
347 /// Generally, when an analysis queries an uninitialized state, it is expected
348 /// to "bail out", i.e., not provide any updates. When the value is initialized,
349 /// the solver will re-invoke the analysis. If the solver exhausts its worklist,
350 /// however, and there are still uninitialized states, the solver "nudges" the
351 /// analyses by default-initializing those states.
353 public:
354  virtual ~DataFlowAnalysis();
355 
356  /// Create an analysis with a reference to the parent solver.
357  explicit DataFlowAnalysis(DataFlowSolver &solver);
358 
359  /// Initialize the analysis from the provided top-level operation by building
360  /// an initial dependency graph between all program points of interest. This
361  /// can be implemented by calling `visit` on all program points of interest
362  /// below the top-level operation.
363  ///
364  /// An analysis can optionally provide initial values to certain analysis
365  /// states to influence the evolution of the analysis.
366  virtual LogicalResult initialize(Operation *top) = 0;
367 
368  /// Visit the given program point. This function is invoked by the solver on
369  /// this analysis with a given program point when a dependent analysis state
370  /// is updated. The function is similar to a transfer function; it queries
371  /// certain analysis states and sets other states.
372  ///
373  /// The function is expected to create dependencies on queried states and
374  /// propagate updates on changed states. A dependency can be created by
375  /// calling `addDependency` between the input state and a program point,
376  /// indicating that, if the state is updated, the solver should invoke `solve`
377  /// on the program point. The dependent point does not have to be the same as
378  /// the provided point. An update to a state is propagated by calling
379  /// `propagateIfChange` on the state. If the state has changed, then all its
380  /// dependents are placed on the worklist.
381  ///
382  /// The dependency graph does not need to be static. Each invocation of
383  /// `visit` can add new dependencies, but these dependencies will not be
384  /// dynamically added to the worklist because the solver doesn't know what
385  /// will provide a value for then.
386  virtual LogicalResult visit(ProgramPoint point) = 0;
387 
388 protected:
389  /// Create a dependency between the given analysis state and program point
390  /// on this analysis.
391  void addDependency(AnalysisState *state, ProgramPoint point);
392 
393  /// Propagate an update to a state if it changed.
394  void propagateIfChanged(AnalysisState *state, ChangeResult changed);
395 
396  /// Register a custom program point class.
397  template <typename PointT>
399  solver.uniquer.registerParametricStorageType<PointT>();
400  }
401 
402  /// Get or create a custom program point.
403  template <typename PointT, typename... Args>
404  PointT *getProgramPoint(Args &&...args) {
405  return solver.getProgramPoint<PointT>(std::forward<Args>(args)...);
406  }
407 
408  /// Get the analysis state associated with the program point. The returned
409  /// state is expected to be "write-only", and any updates need to be
410  /// propagated by `propagateIfChanged`.
411  template <typename StateT, typename PointT>
412  StateT *getOrCreate(PointT point) {
413  return solver.getOrCreateState<StateT>(point);
414  }
415 
416  /// Get a read-only analysis state for the given point and create a dependency
417  /// on `dependent`. If the return state is updated elsewhere, this analysis is
418  /// re-invoked on the dependent.
419  template <typename StateT, typename PointT>
420  const StateT *getOrCreateFor(ProgramPoint dependent, PointT point) {
421  StateT *state = getOrCreate<StateT>(point);
422  addDependency(state, dependent);
423  return state;
424  }
425 
426 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
427  /// When compiling with debugging, keep a name for the analyis.
428  StringRef debugName;
429 #endif // LLVM_ENABLE_ABI_BREAKING_CHECKS
430 
431 private:
432  /// The parent data-flow solver.
433  DataFlowSolver &solver;
434 
435  /// Allow the data-flow solver to access the internals of this class.
436  friend class DataFlowSolver;
437 };
438 
439 template <typename AnalysisT, typename... Args>
440 AnalysisT *DataFlowSolver::load(Args &&...args) {
441  childAnalyses.emplace_back(new AnalysisT(*this, std::forward<Args>(args)...));
442 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
443  childAnalyses.back().get()->debugName = llvm::getTypeName<AnalysisT>();
444 #endif // LLVM_ENABLE_ABI_BREAKING_CHECKS
445  return static_cast<AnalysisT *>(childAnalyses.back().get());
446 }
447 
448 template <typename StateT, typename PointT>
449 StateT *DataFlowSolver::getOrCreateState(PointT point) {
450  std::unique_ptr<AnalysisState> &state =
451  analysisStates[{ProgramPoint(point), TypeID::get<StateT>()}];
452  if (!state) {
453  state = std::unique_ptr<StateT>(new StateT(point));
454 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
455  state->debugName = llvm::getTypeName<StateT>();
456 #endif // LLVM_ENABLE_ABI_BREAKING_CHECKS
457  }
458  return static_cast<StateT *>(state.get());
459 }
460 
461 inline raw_ostream &operator<<(raw_ostream &os, const AnalysisState &state) {
462  state.print(os);
463  return os;
464 }
465 
466 inline raw_ostream &operator<<(raw_ostream &os, ProgramPoint point) {
467  point.print(os);
468  return os;
469 }
470 
471 } // end namespace mlir
472 
473 namespace llvm {
474 /// Allow hashing of program points.
475 template <>
476 struct DenseMapInfo<mlir::ProgramPoint>
477  : public DenseMapInfo<mlir::ProgramPoint::ParentTy> {};
478 
479 // Allow llvm::cast style functions.
480 template <typename To>
481 struct CastInfo<To, mlir::ProgramPoint>
482  : public CastInfo<To, mlir::ProgramPoint::PointerUnion> {};
483 
484 template <typename To>
485 struct CastInfo<To, const mlir::ProgramPoint>
486  : public CastInfo<To, const mlir::ProgramPoint::PointerUnion> {};
487 
488 } // end namespace llvm
489 
490 #endif // MLIR_ANALYSIS_DATAFLOWFRAMEWORK_H
Base class for generic analysis states.
ProgramPoint getPoint() const
Returns the program point this state is located at.
LLVM_DUMP_METHOD void dump() const
virtual void print(raw_ostream &os) const =0
Print the contents of the analysis state.
void addDependency(ProgramPoint dependent, DataFlowAnalysis *analysis)
Add a dependency to this analysis state on a program point and an analysis.
virtual void onUpdate(DataFlowSolver *solver) const
This function is called by the solver when the analysis state is updated to enqueue more work items.
virtual ~AnalysisState()
ProgramPoint point
The program point to which the state belongs.
AnalysisState(ProgramPoint point)
Create the analysis state at the given program point.
Base class for all data-flow analyses.
void registerPointKind()
Register a custom program point class.
void propagateIfChanged(AnalysisState *state, ChangeResult changed)
Propagate an update to a state if it changed.
PointT * getProgramPoint(Args &&...args)
Get or create a custom program point.
virtual LogicalResult visit(ProgramPoint point)=0
Visit the given program point.
const StateT * getOrCreateFor(ProgramPoint dependent, PointT point)
Get a read-only analysis state for the given point and create a dependency on dependent.
DataFlowAnalysis(DataFlowSolver &solver)
Create an analysis with a reference to the parent solver.
StateT * getOrCreate(PointT point)
Get the analysis state associated with the program point.
virtual LogicalResult initialize(Operation *top)=0
Initialize the analysis from the provided top-level operation by building an initial dependency graph...
void addDependency(AnalysisState *state, ProgramPoint point)
Create a dependency between the given analysis state and program point on this analysis.
The general data-flow analysis solver.
PointT * getProgramPoint(Args &&...args)
Get a uniqued program point instance.
StateT * getOrCreateState(PointT point)
Get the state associated with the given program point.
void enqueue(WorkItem item)
Push a work item onto the worklist.
void propagateIfChanged(AnalysisState *state, ChangeResult changed)
Propagate an update to an analysis state if it changed by pushing dependent work items to the back of...
const StateT * lookupState(PointT point) const
Lookup an analysis state for the given program point.
AnalysisT * load(Args &&...args)
Load an analysis into the solver. Return the analysis instance.
LogicalResult initializeAndRun(Operation *top)
Initialize the children analyses starting from the provided top-level operation and run the analysis ...
std::pair< ProgramPoint, DataFlowAnalysis * > WorkItem
A work item on the solver queue is a program point, child analysis pair.
Base class for generic program points based on a concrete program point type and a content key.
static ConcreteT * construct(StorageUniquer::StorageAllocator &alloc, ValueT &&value)
Allocate space for a program point and construct it in-place.
static ConcreteT * get(StorageUniquer &uniquer, Args &&...args)
Get a uniqued instance of this program point class with the given arguments.
static bool classof(const GenericProgramPoint *point)
Provide LLVM-style RTTI using type IDs.
const Value & getValue() const
Get the contents of the program point.
bool operator==(const Value &value) const
Two program points are equal if their values are equal.
GenericProgramPointBase(ValueT &&value)
Construct an instance of the program point using the provided value and the type ID of the concrete t...
Abstract class for generic program points.
virtual void print(raw_ostream &os) const =0
Print the program point.
virtual Location getLoc() const =0
Get a derived source location for the program point.
TypeID getTypeID() const
Get the abstract program point's type identifier.
GenericProgramPoint(TypeID typeID)
Create an abstract program point with type identifier.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:63
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
This class acts as the base storage that all storage classes must derived from.
This is a utility allocator used to allocate memory for instances of derived types.
T * allocate()
Allocate an instance of the provided type.
A utility class to get or create instances of "storage classes".
Storage * get(function_ref< void(Storage *)> initFn, TypeID id, Args &&...args)
Gets a uniqued instance of 'Storage'.
void registerParametricStorageType(TypeID id)
Register a new parametric storage class, this is necessary to create instances of this class type.
This class provides an efficient unique identifier for a specific C++ type.
Definition: TypeID.h:104
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Include the generated interface declarations.
Definition: CallGraph.h:229
Include the generated interface declarations.
ChangeResult & operator|=(ChangeResult &lhs, ChangeResult rhs)
ChangeResult
A result type used to indicate if a change happened.
ChangeResult operator&(ChangeResult lhs, ChangeResult rhs)
ChangeResult operator|(ChangeResult lhs, ChangeResult rhs)
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
raw_ostream & operator<<(raw_ostream &os, const AliasResult &result)
Definition: AliasAnalysis.h:78
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26
Fundamental IR components are supported as first-class program points.
ProgramPoint(ParentTy point=nullptr)
Allow implicit conversion from the parent type.
Location getLoc() const
Get the source location of the program point.
ProgramPoint(OpT op)
Allow implicit conversions from operation wrappers.
void print(raw_ostream &os) const
Print the program point.