MLIR  16.0.0git
DataFlowFramework.h
Go to the documentation of this file.
1 //===- DataFlowFramework.h - A generic framework for data-flow analysis ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a generic framework for writing data-flow analysis in MLIR.
10 // The framework consists of a solver, which runs the fixed-point iteration and
11 // manages analysis dependencies, and a data-flow analysis class used to
12 // implement specific analyses.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef MLIR_ANALYSIS_DATAFLOWFRAMEWORK_H
17 #define MLIR_ANALYSIS_DATAFLOWFRAMEWORK_H
18 
19 #include "mlir/IR/Operation.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/Support/TypeName.h"
23 #include <queue>
24 
25 namespace mlir {
26 
27 //===----------------------------------------------------------------------===//
28 // ChangeResult
29 //===----------------------------------------------------------------------===//
30 
31 /// A result type used to indicate if a change happened. Boolean operations on
32 /// ChangeResult behave as though `Change` is truthy.
33 enum class ChangeResult {
34  NoChange,
35  Change,
36 };
38  return lhs == ChangeResult::Change ? lhs : rhs;
39 }
41  lhs = lhs | rhs;
42  return lhs;
43 }
45  return lhs == ChangeResult::NoChange ? lhs : rhs;
46 }
47 
48 /// Forward declare the analysis state class.
49 class AnalysisState;
50 
51 //===----------------------------------------------------------------------===//
52 // GenericProgramPoint
53 //===----------------------------------------------------------------------===//
54 
55 /// Abstract class for generic program points. In classical data-flow analysis,
56 /// programs points represent positions in a program to which lattice elements
57 /// are attached. In sparse data-flow analysis, these can be SSA values, and in
58 /// dense data-flow analysis, these are the program points before and after
59 /// every operation.
60 ///
61 /// In the general MLIR data-flow analysis framework, program points are an
62 /// extensible concept. Program points are uniquely identifiable objects to
63 /// which analysis states can be attached. The semantics of program points are
64 /// defined by the analyses that specify their transfer functions.
65 ///
66 /// Program points are implemented using MLIR's storage uniquer framework and
67 /// type ID system to provide RTTI.
69 public:
71 
72  /// Get the abstract program point's type identifier.
73  TypeID getTypeID() const { return typeID; }
74 
75  /// Get a derived source location for the program point.
76  virtual Location getLoc() const = 0;
77 
78  /// Print the program point.
79  virtual void print(raw_ostream &os) const = 0;
80 
81 protected:
82  /// Create an abstract program point with type identifier.
83  explicit GenericProgramPoint(TypeID typeID) : typeID(typeID) {}
84 
85 private:
86  /// The type identifier of the program point.
87  TypeID typeID;
88 };
89 
90 //===----------------------------------------------------------------------===//
91 // GenericProgramPointBase
92 //===----------------------------------------------------------------------===//
93 
94 /// Base class for generic program points based on a concrete program point
95 /// type and a content key. This class defines the common methods required for
96 /// operability with the storage uniquer framework.
97 ///
98 /// The provided key type uniquely identifies the concrete program point
99 /// instance and are the data members of the class.
100 template <typename ConcreteT, typename Value>
102 public:
103  /// The concrete key type used by the storage uniquer. This class is uniqued
104  /// by its contents.
105  using KeyTy = Value;
106  /// Alias for the base class.
108 
109  /// Construct an instance of the program point using the provided value and
110  /// the type ID of the concrete type.
111  template <typename ValueT>
112  explicit GenericProgramPointBase(ValueT &&value)
113  : GenericProgramPoint(TypeID::get<ConcreteT>()),
114  value(std::forward<ValueT>(value)) {}
115 
116  /// Get a uniqued instance of this program point class with the given
117  /// arguments.
118  template <typename... Args>
119  static ConcreteT *get(StorageUniquer &uniquer, Args &&...args) {
120  return uniquer.get<ConcreteT>(/*initFn=*/{}, std::forward<Args>(args)...);
121  }
122 
123  /// Allocate space for a program point and construct it in-place.
124  template <typename ValueT>
125  static ConcreteT *construct(StorageUniquer::StorageAllocator &alloc,
126  ValueT &&value) {
127  return new (alloc.allocate<ConcreteT>())
128  ConcreteT(std::forward<ValueT>(value));
129  }
130 
131  /// Two program points are equal if their values are equal.
132  bool operator==(const Value &value) const { return this->value == value; }
133 
134  /// Provide LLVM-style RTTI using type IDs.
135  static bool classof(const GenericProgramPoint *point) {
136  return point->getTypeID() == TypeID::get<ConcreteT>();
137  }
138 
139  /// Get the contents of the program point.
140  const Value &getValue() const { return value; }
141 
142 private:
143  /// The program point value.
144  Value value;
145 };
146 
147 //===----------------------------------------------------------------------===//
148 // ProgramPoint
149 //===----------------------------------------------------------------------===//
150 
151 /// Fundamental IR components are supported as first-class program points.
153  : public PointerUnion<GenericProgramPoint *, Operation *, Value, Block *> {
154  using ParentTy =
156  /// Inherit constructors.
157  using ParentTy::PointerUnion;
158  /// Allow implicit conversion from the parent type.
159  ProgramPoint(ParentTy point = nullptr) : ParentTy(point) {}
160  /// Allow implicit conversions from operation wrappers.
161  /// TODO: For Windows only. Find a better solution.
162  template <typename OpT, typename = std::enable_if_t<
165  ProgramPoint(OpT op) : ParentTy(op) {}
166 
167  /// Print the program point.
168  void print(raw_ostream &os) const;
169 
170  /// Get the source location of the program point.
171  Location getLoc() const;
172 };
173 
174 /// Forward declaration of the data-flow analysis class.
175 class DataFlowAnalysis;
176 
177 //===----------------------------------------------------------------------===//
178 // DataFlowSolver
179 //===----------------------------------------------------------------------===//
180 
181 /// The general data-flow analysis solver. This class is responsible for
182 /// orchestrating child data-flow analyses, running the fixed-point iteration
183 /// algorithm, managing analysis state and program point memory, and tracking
184 /// dependencies beteen analyses, program points, and analysis states.
185 ///
186 /// Steps to run a data-flow analysis:
187 ///
188 /// 1. Load and initialize children analyses. Children analyses are instantiated
189 /// in the solver and initialized, building their dependency relations.
190 /// 2. Configure and run the analysis. The solver invokes the children analyses
191 /// according to their dependency relations until a fixed point is reached.
192 /// 3. Query analysis state results from the solver.
193 ///
194 /// TODO: Optimize the internal implementation of the solver.
196 public:
197  /// Load an analysis into the solver. Return the analysis instance.
198  template <typename AnalysisT, typename... Args>
199  AnalysisT *load(Args &&...args);
200 
201  /// Initialize the children analyses starting from the provided top-level
202  /// operation and run the analysis until fixpoint.
204 
205  /// Lookup an analysis state for the given program point. Returns null if one
206  /// does not exist.
207  template <typename StateT, typename PointT>
208  const StateT *lookupState(PointT point) const {
209  auto it = analysisStates.find({ProgramPoint(point), TypeID::get<StateT>()});
210  if (it == analysisStates.end())
211  return nullptr;
212  return static_cast<const StateT *>(it->second.get());
213  }
214 
215  /// Get a uniqued program point instance. If one is not present, it is
216  /// created with the provided arguments.
217  template <typename PointT, typename... Args>
218  PointT *getProgramPoint(Args &&...args) {
219  return PointT::get(uniquer, std::forward<Args>(args)...);
220  }
221 
222  /// A work item on the solver queue is a program point, child analysis pair.
223  /// Each item is processed by invoking the child analysis at the program
224  /// point.
225  using WorkItem = std::pair<ProgramPoint, DataFlowAnalysis *>;
226  /// Push a work item onto the worklist.
227  void enqueue(WorkItem item) { worklist.push(std::move(item)); }
228 
229  /// Get the state associated with the given program point. If it does not
230  /// exist, create an uninitialized state.
231  template <typename StateT, typename PointT>
232  StateT *getOrCreateState(PointT point);
233 
234  /// Propagate an update to an analysis state if it changed by pushing
235  /// dependent work items to the back of the queue.
236  void propagateIfChanged(AnalysisState *state, ChangeResult changed);
237 
238  /// Add a dependency to an analysis state on a child analysis and program
239  /// point. If the state is updated, the child analysis must be invoked on the
240  /// given program point again.
241  void addDependency(AnalysisState *state, DataFlowAnalysis *analysis,
242  ProgramPoint point);
243 
244 private:
245  /// The solver's work queue. Work items can be inserted to the front of the
246  /// queue to be processed greedily, speeding up computations that otherwise
247  /// quickly degenerate to quadratic due to propagation of state updates.
248  std::queue<WorkItem> worklist;
249 
250  /// Type-erased instances of the children analyses.
252 
253  /// The storage uniquer instance that owns the memory of the allocated program
254  /// points.
255  StorageUniquer uniquer;
256 
257  /// A type-erased map of program points to associated analysis states for
258  /// first-class program points.
259  DenseMap<std::pair<ProgramPoint, TypeID>, std::unique_ptr<AnalysisState>>
260  analysisStates;
261 
262  /// Allow the base child analysis class to access the internals of the solver.
263  friend class DataFlowAnalysis;
264 };
265 
266 //===----------------------------------------------------------------------===//
267 // AnalysisState
268 //===----------------------------------------------------------------------===//
269 
270 /// Base class for generic analysis states. Analysis states contain data-flow
271 /// information that are attached to program points and which evolve as the
272 /// analysis iterates.
273 ///
274 /// This class places no restrictions on the semantics of analysis states beyond
275 /// these requirements.
276 ///
277 /// 1. Querying the state of a program point prior to visiting that point
278 /// results in uninitialized state. Analyses must be aware of unintialized
279 /// states.
280 /// 2. Analysis states can reach fixpoints, where subsequent updates will never
281 /// trigger a change in the state.
282 /// 3. Analysis states that are uninitialized can be forcefully initialized to a
283 /// default value.
285 public:
286  virtual ~AnalysisState();
287 
288  /// Create the analysis state at the given program point.
290 
291  /// Returns the program point this static is located at.
292  ProgramPoint getPoint() const { return point; }
293 
294  /// Print the contents of the analysis state.
295  virtual void print(raw_ostream &os) const = 0;
296 
297 protected:
298  /// This function is called by the solver when the analysis state is updated
299  /// to optionally enqueue more work items. For example, if a state tracks
300  /// dependents through the IR (e.g. use-def chains), this function can be
301  /// implemented to push those dependents on the worklist.
302  virtual void onUpdate(DataFlowSolver *solver) const {}
303 
304  /// The dependency relations originating from this analysis state. An entry
305  /// `state -> (analysis, point)` is created when `analysis` queries `state`
306  /// when updating `point`.
307  ///
308  /// When this state is updated, all dependent child analysis invocations are
309  /// pushed to the back of the queue. Use a `SetVector` to keep the analysis
310  /// deterministic.
311  ///
312  /// Store the dependents on the analysis state for efficiency.
314 
315  /// The program point to which the state belongs.
317 
318 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
319  /// When compiling with debugging, keep a name for the analysis state.
320  StringRef debugName;
321 #endif // LLVM_ENABLE_ABI_BREAKING_CHECKS
322 
323  /// Allow the framework to access the dependents.
324  friend class DataFlowSolver;
325 };
326 
327 //===----------------------------------------------------------------------===//
328 // DataFlowAnalysis
329 //===----------------------------------------------------------------------===//
330 
331 /// Base class for all data-flow analyses. A child analysis is expected to build
332 /// an initial dependency graph (and optionally provide an initial state) when
333 /// initialized and define transfer functions when visiting program points.
334 ///
335 /// In classical data-flow analysis, the dependency graph is fixed and analyses
336 /// define explicit transfer functions between input states and output states.
337 /// In this framework, however, the dependency graph can change during the
338 /// analysis, and transfer functions are opaque such that the solver doesn't
339 /// know what states calling `visit` on an analysis will be updated. This allows
340 /// multiple analyses to plug in and provide values for the same state.
341 ///
342 /// Generally, when an analysis queries an uninitialized state, it is expected
343 /// to "bail out", i.e., not provide any updates. When the value is initialized,
344 /// the solver will re-invoke the analysis. If the solver exhausts its worklist,
345 /// however, and there are still uninitialized states, the solver "nudges" the
346 /// analyses by default-initializing those states.
348 public:
349  virtual ~DataFlowAnalysis();
350 
351  /// Create an analysis with a reference to the parent solver.
352  explicit DataFlowAnalysis(DataFlowSolver &solver);
353 
354  /// Initialize the analysis from the provided top-level operation by building
355  /// an initial dependency graph between all program points of interest. This
356  /// can be implemented by calling `visit` on all program points of interest
357  /// below the top-level operation.
358  ///
359  /// An analysis can optionally provide initial values to certain analysis
360  /// states to influence the evolution of the analysis.
361  virtual LogicalResult initialize(Operation *top) = 0;
362 
363  /// Visit the given program point. This function is invoked by the solver on
364  /// this analysis with a given program point when a dependent analysis state
365  /// is updated. The function is similar to a transfer function; it queries
366  /// certain analysis states and sets other states.
367  ///
368  /// The function is expected to create dependencies on queried states and
369  /// propagate updates on changed states. A dependency can be created by
370  /// calling `addDependency` between the input state and a program point,
371  /// indicating that, if the state is updated, the solver should invoke `solve`
372  /// on the program point. The dependent point does not have to be the same as
373  /// the provided point. An update to a state is propagated by calling
374  /// `propagateIfChange` on the state. If the state has changed, then all its
375  /// dependents are placed on the worklist.
376  ///
377  /// The dependency graph does not need to be static. Each invocation of
378  /// `visit` can add new dependencies, but these dependecies will not be
379  /// dynamically added to the worklist because the solver doesn't know what
380  /// will provide a value for then.
381  virtual LogicalResult visit(ProgramPoint point) = 0;
382 
383 protected:
384  /// Create a dependency between the given analysis state and program point
385  /// on this analysis.
386  void addDependency(AnalysisState *state, ProgramPoint point);
387 
388  /// Propagate an update to a state if it changed.
389  void propagateIfChanged(AnalysisState *state, ChangeResult changed);
390 
391  /// Register a custom program point class.
392  template <typename PointT>
394  solver.uniquer.registerParametricStorageType<PointT>();
395  }
396 
397  /// Get or create a custom program point.
398  template <typename PointT, typename... Args>
399  PointT *getProgramPoint(Args &&...args) {
400  return solver.getProgramPoint<PointT>(std::forward<Args>(args)...);
401  }
402 
403  /// Get the analysis state assiocated with the program point. The returned
404  /// state is expected to be "write-only", and any updates need to be
405  /// propagated by `propagateIfChanged`.
406  template <typename StateT, typename PointT>
407  StateT *getOrCreate(PointT point) {
408  return solver.getOrCreateState<StateT>(point);
409  }
410 
411  /// Get a read-only analysis state for the given point and create a dependency
412  /// on `dependent`. If the return state is updated elsewhere, this analysis is
413  /// re-invoked on the dependent.
414  template <typename StateT, typename PointT>
415  const StateT *getOrCreateFor(ProgramPoint dependent, PointT point) {
416  StateT *state = getOrCreate<StateT>(point);
417  addDependency(state, dependent);
418  return state;
419  }
420 
421 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
422  /// When compiling with debugging, keep a name for the analyis.
423  StringRef debugName;
424 #endif // LLVM_ENABLE_ABI_BREAKING_CHECKS
425 
426 private:
427  /// The parent data-flow solver.
428  DataFlowSolver &solver;
429 
430  /// Allow the data-flow solver to access the internals of this class.
431  friend class DataFlowSolver;
432 };
433 
434 template <typename AnalysisT, typename... Args>
435 AnalysisT *DataFlowSolver::load(Args &&...args) {
436  childAnalyses.emplace_back(new AnalysisT(*this, std::forward<Args>(args)...));
437 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
438  childAnalyses.back().get()->debugName = llvm::getTypeName<AnalysisT>();
439 #endif // LLVM_ENABLE_ABI_BREAKING_CHECKS
440  return static_cast<AnalysisT *>(childAnalyses.back().get());
441 }
442 
443 template <typename StateT, typename PointT>
444 StateT *DataFlowSolver::getOrCreateState(PointT point) {
445  std::unique_ptr<AnalysisState> &state =
446  analysisStates[{ProgramPoint(point), TypeID::get<StateT>()}];
447  if (!state) {
448  state = std::unique_ptr<StateT>(new StateT(point));
449 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
450  state->debugName = llvm::getTypeName<StateT>();
451 #endif // LLVM_ENABLE_ABI_BREAKING_CHECKS
452  }
453  return static_cast<StateT *>(state.get());
454 }
455 
456 inline raw_ostream &operator<<(raw_ostream &os, const AnalysisState &state) {
457  state.print(os);
458  return os;
459 }
460 
461 inline raw_ostream &operator<<(raw_ostream &os, ProgramPoint point) {
462  point.print(os);
463  return os;
464 }
465 
466 } // end namespace mlir
467 
468 namespace llvm {
469 /// Allow hashing of program points.
470 template <>
471 struct DenseMapInfo<mlir::ProgramPoint>
472  : public DenseMapInfo<mlir::ProgramPoint::ParentTy> {};
473 } // end namespace llvm
474 
475 #endif // MLIR_ANALYSIS_DATAFLOWFRAMEWORK_H
static constexpr const bool value
Base class for generic analysis states.
ProgramPoint getPoint() const
Returns the program point this static is located at.
virtual void print(raw_ostream &os) const =0
Print the contents of the analysis state.
virtual void onUpdate(DataFlowSolver *solver) const
This function is called by the solver when the analysis state is updated to optionally enqueue more w...
SetVector< DataFlowSolver::WorkItem > dependents
The dependency relations originating from this analysis state.
virtual ~AnalysisState()
ProgramPoint point
The program point to which the state belongs.
AnalysisState(ProgramPoint point)
Create the analysis state at the given program point.
Base class for all data-flow analyses.
void registerPointKind()
Register a custom program point class.
void propagateIfChanged(AnalysisState *state, ChangeResult changed)
Propagate an update to a state if it changed.
PointT * getProgramPoint(Args &&...args)
Get or create a custom program point.
virtual LogicalResult visit(ProgramPoint point)=0
Visit the given program point.
const StateT * getOrCreateFor(ProgramPoint dependent, PointT point)
Get a read-only analysis state for the given point and create a dependency on dependent.
DataFlowAnalysis(DataFlowSolver &solver)
Create an analysis with a reference to the parent solver.
StateT * getOrCreate(PointT point)
Get the analysis state assiocated with the program point.
virtual LogicalResult initialize(Operation *top)=0
Initialize the analysis from the provided top-level operation by building an initial dependency graph...
void addDependency(AnalysisState *state, ProgramPoint point)
Create a dependency between the given analysis state and program point on this analysis.
The general data-flow analysis solver.
PointT * getProgramPoint(Args &&...args)
Get a uniqued program point instance.
StateT * getOrCreateState(PointT point)
Get the state associated with the given program point.
void addDependency(AnalysisState *state, DataFlowAnalysis *analysis, ProgramPoint point)
Add a dependency to an analysis state on a child analysis and program point.
void enqueue(WorkItem item)
Push a work item onto the worklist.
void propagateIfChanged(AnalysisState *state, ChangeResult changed)
Propagate an update to an analysis state if it changed by pushing dependent work items to the back of...
const StateT * lookupState(PointT point) const
Lookup an analysis state for the given program point.
AnalysisT * load(Args &&...args)
Load an analysis into the solver. Return the analysis instance.
LogicalResult initializeAndRun(Operation *top)
Initialize the children analyses starting from the provided top-level operation and run the analysis ...
std::pair< ProgramPoint, DataFlowAnalysis * > WorkItem
A work item on the solver queue is a program point, child analysis pair.
Base class for generic program points based on a concrete program point type and a content key.
static ConcreteT * construct(StorageUniquer::StorageAllocator &alloc, ValueT &&value)
Allocate space for a program point and construct it in-place.
static ConcreteT * get(StorageUniquer &uniquer, Args &&...args)
Get a uniqued instance of this program point class with the given arguments.
static bool classof(const GenericProgramPoint *point)
Provide LLVM-style RTTI using type IDs.
const Value & getValue() const
Get the contents of the program point.
bool operator==(const Value &value) const
Two program points are equal if their values are equal.
GenericProgramPointBase(ValueT &&value)
Construct an instance of the program point using the provided value and the type ID of the concrete t...
Abstract class for generic program points.
virtual void print(raw_ostream &os) const =0
Print the program point.
virtual Location getLoc() const =0
Get a derived source location for the program point.
TypeID getTypeID() const
Get the abstract program point's type identifier.
GenericProgramPoint(TypeID typeID)
Create an abstract program point with type identifier.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:64
Operation is a basic unit of execution within MLIR.
Definition: Operation.h:31
This class acts as the base storage that all storage classes must derived from.
This is a utility allocator used to allocate memory for instances of derived types.
T * allocate()
Allocate an instance of the provided type.
A utility class to get or create instances of "storage classes".
Storage * get(function_ref< void(Storage *)> initFn, TypeID id, Args &&...args)
Gets a uniqued instance of 'Storage'.
void registerParametricStorageType(TypeID id)
Register a new parametric storage class, this is necessary to create instances of this class type.
This class provides an efficient unique identifier for a specific C++ type.
Definition: TypeID.h:104
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:85
Include the generated interface declarations.
Definition: CallGraph.h:229
Include the generated interface declarations.
ChangeResult & operator|=(ChangeResult &lhs, ChangeResult rhs)
ChangeResult
A result type used to indicate if a change happened.
ChangeResult operator&(ChangeResult lhs, ChangeResult rhs)
ChangeResult operator|(ChangeResult lhs, ChangeResult rhs)
raw_ostream & operator<<(raw_ostream &os, const AliasResult &result)
Definition: AliasAnalysis.h:78
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26
Fundamental IR components are supported as first-class program points.
ProgramPoint(ParentTy point=nullptr)
Allow implicit conversion from the parent type.
Location getLoc() const
Get the source location of the program point.
ProgramPoint(OpT op)
Allow implicit conversions from operation wrappers.
void print(raw_ostream &os) const
Print the program point.