MLIR  15.0.0git
Timing.cpp
Go to the documentation of this file.
1 //===- Timing.cpp - Execution time measurement facilities -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Facilities to measure and provide statistics on execution time.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "mlir/Support/Timing.h"
15 #include "llvm/ADT/MapVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/ADT/StringMap.h"
18 #include "llvm/ADT/StringSet.h"
19 #include "llvm/Support/Allocator.h"
20 #include "llvm/Support/CommandLine.h"
21 #include "llvm/Support/Format.h"
22 #include "llvm/Support/FormatVariadic.h"
23 #include "llvm/Support/ManagedStatic.h"
24 #include "llvm/Support/RWMutex.h"
25 #include "llvm/Support/Threading.h"
26 #include "llvm/Support/raw_ostream.h"
27 
28 #include <atomic>
29 #include <chrono>
30 
31 using namespace mlir;
32 using namespace detail;
34 
35 constexpr llvm::StringLiteral kTimingDescription =
36  "... Execution time report ...";
37 
38 //===----------------------------------------------------------------------===//
39 // TimingManager
40 //===----------------------------------------------------------------------===//
41 
42 namespace mlir {
43 namespace detail {
44 /// Private implementation details of the `TimingManager`.
46 public:
47  // Identifier allocator, map, and mutex for thread safety.
48  llvm::BumpPtrAllocator identifierAllocator;
50  llvm::sys::SmartRWMutex<true> identifierMutex;
51 
52  /// A thread local cache of identifiers to reduce lock contention.
55 
56  TimingManagerImpl() : identifiers(identifierAllocator) {}
57 };
58 } // namespace detail
59 } // namespace mlir
60 
62 
64 
65 /// Get the root timer of this timing manager.
67  auto rt = rootTimer();
68  return rt.hasValue() ? Timer(*this, rt.getValue()) : Timer();
69 }
70 
71 /// Get the root timer of this timing manager wrapped in a `TimingScope`.
73  return TimingScope(getRootTimer());
74 }
75 
76 //===----------------------------------------------------------------------===//
77 // Identifier uniquing
78 //===----------------------------------------------------------------------===//
79 
80 /// Return an identifier for the specified string.
82  // Check for an existing instance in the local cache.
83  auto &impl = *tm.impl;
84  auto *&localEntry = (*impl.localIdentifierCache)[str];
85  if (localEntry)
86  return TimingIdentifier(localEntry);
87 
88  // Check for an existing identifier in read-only mode.
89  {
90  llvm::sys::SmartScopedReader<true> contextLock(impl.identifierMutex);
91  auto it = impl.identifiers.find(str);
92  if (it != impl.identifiers.end()) {
93  localEntry = &*it;
94  return TimingIdentifier(localEntry);
95  }
96  }
97 
98  // Acquire a writer-lock so that we can safely create the new instance.
99  llvm::sys::SmartScopedWriter<true> contextLock(impl.identifierMutex);
100  auto it = impl.identifiers.insert(str).first;
101  localEntry = &*it;
102  return TimingIdentifier(localEntry);
103 }
104 
105 //===----------------------------------------------------------------------===//
106 // Helpers for time record printing
107 //===----------------------------------------------------------------------===//
108 
109 namespace {
110 
111 /// Simple record class to record timing information.
112 struct TimeRecord {
113  TimeRecord(double wall = 0.0, double user = 0.0) : wall(wall), user(user) {}
114 
115  TimeRecord &operator+=(const TimeRecord &other) {
116  wall += other.wall;
117  user += other.user;
118  return *this;
119  }
120 
121  TimeRecord &operator-=(const TimeRecord &other) {
122  wall -= other.wall;
123  user -= other.user;
124  return *this;
125  }
126 
127  /// Print the current time record to 'os', with a breakdown showing
128  /// contributions to the give 'total' time record.
129  void print(raw_ostream &os, const TimeRecord &total) {
130  if (total.user != total.wall)
131  os << llvm::format(" %8.4f (%5.1f%%)", user, 100.0 * user / total.user);
132  os << llvm::format(" %8.4f (%5.1f%%) ", wall, 100.0 * wall / total.wall);
133  }
134 
135  double wall, user;
136 };
137 
138 } // namespace
139 
140 /// Utility to print a single line entry in the timer output.
141 static void printTimeEntry(raw_ostream &os, unsigned indent, StringRef name,
142  TimeRecord time, TimeRecord total) {
143  time.print(os, total);
144  os.indent(indent) << name << "\n";
145 }
146 
147 /// Utility to print the timer heading information.
148 static void printTimeHeader(raw_ostream &os, TimeRecord total) {
149  // Figure out how many spaces to description name.
150  unsigned padding = (80 - kTimingDescription.size()) / 2;
151  os << "===" << std::string(73, '-') << "===\n";
152  os.indent(padding) << kTimingDescription << '\n';
153  os << "===" << std::string(73, '-') << "===\n";
154 
155  // Print the total time followed by the section headers.
156  os << llvm::format(" Total Execution Time: %.4f seconds\n\n", total.wall);
157  if (total.user != total.wall)
158  os << " ----User Time----";
159  os << " ----Wall Time---- ----Name----\n";
160 }
161 
162 //===----------------------------------------------------------------------===//
163 // Timer Implementation for DefaultTimingManager
164 //===----------------------------------------------------------------------===//
165 
166 namespace {
167 
168 /// A timer used to sample execution time.
169 ///
170 /// Separately tracks wall time and user time to account for parallel threads of
171 /// execution. Timers are intended to be started and stopped multiple times.
172 /// Each start and stop will add to the timer's wall and user time.
173 class TimerImpl {
174 public:
175  using ChildrenMap = llvm::MapVector<const void *, std::unique_ptr<TimerImpl>>;
176  using AsyncChildrenMap = llvm::DenseMap<uint64_t, ChildrenMap>;
177 
178  TimerImpl(std::string &&name) : threadId(llvm::get_threadid()), name(name) {}
179 
180  /// Start the timer.
181  void start() { startTime = std::chrono::steady_clock::now(); }
182 
183  /// Stop the timer.
184  void stop() {
185  auto newTime = std::chrono::steady_clock::now() - startTime;
186  wallTime += newTime;
187  userTime += newTime;
188  }
189 
190  /// Create a child timer nested within this one. Multiple calls to this
191  /// function with the same unique identifier `id` will return the same child
192  /// timer.
193  ///
194  /// This function can be called from other threads, as long as this timer
195  /// outlives any uses of the child timer on the other thread.
196  TimerImpl *nest(const void *id, function_ref<std::string()> nameBuilder) {
197  auto tid = llvm::get_threadid();
198  if (tid == threadId)
199  return nestTail(children[id], nameBuilder);
200  std::unique_lock<std::mutex> lock(asyncMutex);
201  return nestTail(asyncChildren[tid][id], nameBuilder);
202  }
203 
204  /// Tail-called from `nest()`.
205  TimerImpl *nestTail(std::unique_ptr<TimerImpl> &child,
206  function_ref<std::string()> nameBuilder) {
207  if (!child)
208  child = std::make_unique<TimerImpl>(nameBuilder());
209  return child.get();
210  }
211 
212  /// Finalize this timer and all its children.
213  ///
214  /// If this timer has async children, which happens if `nest()` was called
215  /// from another thread, this function merges the async childr timers into the
216  /// main list of child timers.
217  ///
218  /// Caution: Call this function only after all nested timers running on other
219  /// threads no longer need their timers!
220  void finalize() {
221  addAsyncUserTime();
222  mergeAsyncChildren();
223  }
224 
225  /// Add the user time of all async children to this timer's user time. This is
226  /// necessary since the user time already contains all regular child timers,
227  /// but not the asynchronous ones (by the nesting nature of the timers).
228  std::chrono::nanoseconds addAsyncUserTime() {
229  auto added = std::chrono::nanoseconds(0);
230  for (auto &child : children)
231  added += child.second->addAsyncUserTime();
232  for (auto &thread : asyncChildren) {
233  for (auto &child : thread.second) {
234  child.second->addAsyncUserTime();
235  added += child.second->userTime;
236  }
237  }
238  userTime += added;
239  return added;
240  }
241 
242  /// Ensure that this timer and recursively all its children have their async
243  /// children folded into the main map of children.
244  void mergeAsyncChildren() {
245  for (auto &child : children)
246  child.second->mergeAsyncChildren();
247  mergeChildren(std::move(asyncChildren));
248  assert(asyncChildren.empty());
249  }
250 
251  /// Merge multiple child timers into this timer.
252  ///
253  /// Children in `other` are added as children to this timer, or, if this timer
254  /// already contains a child with the corresponding unique identifier, are
255  /// merged into the existing child.
256  void mergeChildren(ChildrenMap &&other) {
257  if (children.empty()) {
258  children = std::move(other);
259  for (auto &child : children)
260  child.second->mergeAsyncChildren();
261  } else {
262  for (auto &child : other)
263  mergeChild(child.first, std::move(child.second));
264  other.clear();
265  }
266  }
267 
268  /// See above.
269  void mergeChildren(AsyncChildrenMap &&other) {
270  for (auto &thread : other) {
271  mergeChildren(std::move(thread.second));
272  assert(thread.second.empty());
273  }
274  other.clear();
275  }
276 
277  /// Merge a child timer into this timer for a given unique identifier.
278  ///
279  /// Moves all child and async child timers of `other` into this timer's child
280  /// for the given unique identifier.
281  void mergeChild(const void *id, std::unique_ptr<TimerImpl> &&other) {
282  auto &into = children[id];
283  if (!into) {
284  into = std::move(other);
285  into->mergeAsyncChildren();
286  } else {
287  into->wallTime = std::max(into->wallTime, other->wallTime);
288  into->userTime += other->userTime;
289  into->mergeChildren(std::move(other->children));
290  into->mergeChildren(std::move(other->asyncChildren));
291  other.reset();
292  }
293  }
294 
295  /// Dump a human-readable tree representation of the timer and its children.
296  /// This is useful for debugging the timing mechanisms and structure of the
297  /// timers.
298  void dump(raw_ostream &os, unsigned indent = 0, unsigned markThreadId = 0) {
299  auto time = getTimeRecord();
300  os << std::string(indent * 2, ' ') << name << " [" << threadId << "]"
301  << llvm::format(" %7.4f / %7.4f", time.user, time.wall);
302  if (threadId != markThreadId && markThreadId != 0)
303  os << " (*)";
304  os << "\n";
305  for (auto &child : children)
306  child.second->dump(os, indent + 1, threadId);
307  for (auto &thread : asyncChildren)
308  for (auto &child : thread.second)
309  child.second->dump(os, indent + 1, threadId);
310  }
311 
312  /// Returns the time for this timer in seconds.
313  TimeRecord getTimeRecord() {
314  return TimeRecord(
315  std::chrono::duration_cast<std::chrono::duration<double>>(wallTime)
316  .count(),
317  std::chrono::duration_cast<std::chrono::duration<double>>(userTime)
318  .count());
319  }
320 
321  /// Print the timing result in list mode.
322  void printAsList(raw_ostream &os, TimeRecord total) {
323  // Flatten the leaf timers in the tree and merge them by name.
324  llvm::StringMap<TimeRecord> mergedTimers;
325  std::function<void(TimerImpl *)> addTimer = [&](TimerImpl *timer) {
326  mergedTimers[timer->name] += timer->getTimeRecord();
327  for (auto &children : timer->children)
328  addTimer(children.second.get());
329  };
330  addTimer(this);
331 
332  // Sort the timing information by wall time.
333  std::vector<std::pair<StringRef, TimeRecord>> timerNameAndTime;
334  for (auto &it : mergedTimers)
335  timerNameAndTime.emplace_back(it.first(), it.second);
336  llvm::array_pod_sort(timerNameAndTime.begin(), timerNameAndTime.end(),
337  [](const std::pair<StringRef, TimeRecord> *lhs,
338  const std::pair<StringRef, TimeRecord> *rhs) {
339  return llvm::array_pod_sort_comparator<double>(
340  &rhs->second.wall, &lhs->second.wall);
341  });
342 
343  // Print the timing information sequentially.
344  for (auto &timeData : timerNameAndTime)
345  printTimeEntry(os, 0, timeData.first, timeData.second, total);
346  }
347 
348  /// Print the timing result in tree mode.
349  void printAsTree(raw_ostream &os, TimeRecord total, unsigned indent = 0) {
350  unsigned childIndent = indent;
351  if (!hidden) {
352  printTimeEntry(os, indent, name, getTimeRecord(), total);
353  childIndent += 2;
354  }
355  for (auto &child : children) {
356  child.second->printAsTree(os, total, childIndent);
357  }
358  }
359 
360  /// Print the current timing information.
361  void print(raw_ostream &os, DisplayMode displayMode) {
362  // Print the banner.
363  auto total = getTimeRecord();
364  printTimeHeader(os, total);
365 
366  // Defer to a specialized printer for each display mode.
367  switch (displayMode) {
368  case DisplayMode::List:
369  printAsList(os, total);
370  break;
371  case DisplayMode::Tree:
372  printAsTree(os, total);
373  break;
374  }
375 
376  // Print the top-level time not accounted for by child timers, and the
377  // total.
378  auto rest = total;
379  for (auto &child : children)
380  rest -= child.second->getTimeRecord();
381  printTimeEntry(os, 0, "Rest", rest, total);
382  printTimeEntry(os, 0, "Total", total, total);
383  os.flush();
384  }
385 
386  /// The last time instant at which the timer was started.
387  std::chrono::time_point<std::chrono::steady_clock> startTime;
388 
389  /// Accumulated wall time. If multiple threads of execution are merged into
390  /// this timer, the wall time will hold the maximum wall time of each thread
391  /// of execution.
392  std::chrono::nanoseconds wallTime = std::chrono::nanoseconds(0);
393 
394  /// Accumulated user time. If multiple threads of execution are merged into
395  /// this timer, each thread's user time is added here.
396  std::chrono::nanoseconds userTime = std::chrono::nanoseconds(0);
397 
398  /// The thread on which this timer is running.
399  uint64_t threadId;
400 
401  /// A descriptive name for this timer.
402  std::string name;
403 
404  /// Whether to omit this timer from reports and directly show its children.
405  bool hidden = false;
406 
407  /// Child timers on the same thread the timer itself. We keep at most one
408  /// timer per unique identifier.
409  ChildrenMap children;
410 
411  /// Child timers on other threads. We keep at most one timer per unique
412  /// identifier.
413  AsyncChildrenMap asyncChildren;
414 
415  /// Mutex for the async children.
416  std::mutex asyncMutex;
417 };
418 
419 } // namespace
420 
421 //===----------------------------------------------------------------------===//
422 // DefaultTimingManager
423 //===----------------------------------------------------------------------===//
424 
425 namespace mlir {
426 namespace detail {
427 
428 /// Implementation details of the `DefaultTimingManager`.
430 public:
431  /// Whether we should do our work or not.
432  bool enabled = false;
433 
434  /// The configured display mode.
435  DisplayMode displayMode = DisplayMode::Tree;
436 
437  /// The stream where we should print our output. This will always be non-null.
438  raw_ostream *output = &llvm::errs();
439 
440  /// The root timer.
441  std::unique_ptr<TimerImpl> rootTimer;
442 };
443 
444 } // namespace detail
445 } // namespace mlir
446 
448  : impl(std::make_unique<DefaultTimingManagerImpl>()) {
449  clear(); // initializes the root timer
450 }
451 
453 
454 /// Enable or disable execution time sampling.
455 void DefaultTimingManager::setEnabled(bool enabled) { impl->enabled = enabled; }
456 
457 /// Return whether execution time sampling is enabled.
458 bool DefaultTimingManager::isEnabled() const { return impl->enabled; }
459 
460 /// Change the display mode.
462  impl->displayMode = displayMode;
463 }
464 
465 /// Return the current display mode;
467  return impl->displayMode;
468 }
469 
470 /// Change the stream where the output will be printed to.
471 void DefaultTimingManager::setOutput(raw_ostream &os) { impl->output = &os; }
472 
473 /// Return the current output stream where the output will be printed to.
474 raw_ostream &DefaultTimingManager::getOutput() const {
475  assert(impl->output);
476  return *impl->output;
477 }
478 
479 /// Print and clear the timing results.
481  if (impl->enabled) {
482  impl->rootTimer->finalize();
483  impl->rootTimer->print(*impl->output, impl->displayMode);
484  }
485  clear();
486 }
487 
488 /// Clear the timing results.
490  impl->rootTimer = std::make_unique<TimerImpl>("root");
491  impl->rootTimer->hidden = true;
492 }
493 
494 /// Debug print the timer data structures to an output stream.
495 void DefaultTimingManager::dumpTimers(raw_ostream &os) {
496  impl->rootTimer->dump(os);
497 }
498 
499 /// Debug print the timers as a list.
500 void DefaultTimingManager::dumpAsList(raw_ostream &os) {
501  impl->rootTimer->finalize();
502  impl->rootTimer->print(os, DisplayMode::List);
503 }
504 
505 /// Debug print the timers as a tree.
506 void DefaultTimingManager::dumpAsTree(raw_ostream &os) {
507  impl->rootTimer->finalize();
508  impl->rootTimer->print(os, DisplayMode::Tree);
509 }
510 
512  if (impl->enabled)
513  return impl->rootTimer.get();
514  return llvm::None;
515 }
516 
518  static_cast<TimerImpl *>(handle)->start();
519 }
520 
522  static_cast<TimerImpl *>(handle)->stop();
523 }
524 
525 void *DefaultTimingManager::nestTimer(void *handle, const void *id,
526  function_ref<std::string()> nameBuilder) {
527  return static_cast<TimerImpl *>(handle)->nest(id, nameBuilder);
528 }
529 
531  static_cast<TimerImpl *>(handle)->hidden = true;
532 }
533 
534 //===----------------------------------------------------------------------===//
535 // DefaultTimingManager Command Line Options
536 //===----------------------------------------------------------------------===//
537 
538 namespace {
539 struct DefaultTimingManagerOptions {
540  llvm::cl::opt<bool> timing{"mlir-timing",
541  llvm::cl::desc("Display execution times"),
542  llvm::cl::init(false)};
543  llvm::cl::opt<DisplayMode> displayMode{
544  "mlir-timing-display", llvm::cl::desc("Display method for timing data"),
545  llvm::cl::init(DisplayMode::Tree),
546  llvm::cl::values(
547  clEnumValN(DisplayMode::List, "list",
548  "display the results in a list sorted by total time"),
549  clEnumValN(DisplayMode::Tree, "tree",
550  "display the results ina with a nested tree view"))};
551 };
552 } // namespace
553 
554 static llvm::ManagedStatic<DefaultTimingManagerOptions> options;
555 
557  // Make sure that the options struct has been constructed.
558  *options;
559 }
560 
562  if (!options.isConstructed())
563  return;
564  tm.setEnabled(options->timing);
565  tm.setDisplayMode(options->displayMode);
566 }
Include the generated interface declarations.
virtual ~TimingManager()
In this mode the results are displayed in a tree view, with child timers nested under their parents...
In this mode the results are displayed in a list sorted by total time, with timers aggregated into on...
llvm::StringSet< llvm::BumpPtrAllocator & > identifiers
Definition: Timing.cpp:49
void startTimer(void *handle) override
Start the timer with the given handle.
Definition: Timing.cpp:517
void setDisplayMode(DisplayMode displayMode)
Change the display mode.
Definition: Timing.cpp:461
virtual Optional< void * > rootTimer()=0
Return the root timer.
void setEnabled(bool enabled)
Enable or disable execution time sampling.
Definition: Timing.cpp:455
raw_ostream & getOutput() const
Return the current output stream where the output will be printed to.
Definition: Timing.cpp:474
constexpr llvm::StringLiteral kTimingDescription
Definition: Timing.cpp:35
llvm::BumpPtrAllocator identifierAllocator
Definition: Timing.cpp:48
void setOutput(raw_ostream &os)
Change the stream where the output will be printed to.
Definition: Timing.cpp:471
void clear()
Clear the timing results.
Definition: Timing.cpp:489
DisplayMode getDisplayMode() const
Return the current display mode;.
Definition: Timing.cpp:466
void dumpAsTree(raw_ostream &os=llvm::errs())
Debug print the timers as a tree.
Definition: Timing.cpp:506
This class represents facilities to measure execution time.
Definition: Timing.h:112
Implementation details of the DefaultTimingManager.
Definition: Timing.cpp:429
void * nestTimer(void *handle, const void *id, function_ref< std::string()> nameBuilder) override
Create a child timer nested within the one with the given handle.
Definition: Timing.cpp:525
Private implementation details of the TimingManager.
Definition: Timing.cpp:45
void dumpTimers(raw_ostream &os=llvm::errs())
Debug print the timer data structures to an output stream.
Definition: Timing.cpp:495
friend class TimingIdentifier
Definition: Timing.h:167
Facilities for time measurement and report printing to an output stream.
Definition: Timing.h:340
std::unique_ptr< TimerImpl > rootTimer
The root timer.
Definition: Timing.cpp:441
~DefaultTimingManager() override
Definition: Timing.cpp:452
llvm::sys::SmartRWMutex< true > identifierMutex
Definition: Timing.cpp:50
static void printTimeEntry(raw_ostream &os, unsigned indent, StringRef name, TimeRecord time, TimeRecord total)
Utility to print a single line entry in the timer output.
Definition: Timing.cpp:141
void print()
Print and clear the timing results.
Definition: Timing.cpp:480
bool isEnabled() const
Return whether execution time sampling is enabled.
Definition: Timing.cpp:458
const std::unique_ptr< detail::TimingManagerImpl > impl
Definition: Timing.h:164
static void printTimeHeader(raw_ostream &os, TimeRecord total)
Utility to print the timer heading information.
Definition: Timing.cpp:148
A handle for a timer in a TimingManager.
Definition: Timing.h:185
void applyDefaultTimingManagerCLOptions(DefaultTimingManager &tm)
Apply any values that were registered with &#39;registerDefaultTimingManagerOptions&#39; to a DefaultTimingMa...
Definition: Timing.cpp:561
static llvm::ManagedStatic< DefaultTimingManagerOptions > options
Definition: Timing.cpp:554
Optional< void * > rootTimer() override
Return the root timer.
Definition: Timing.cpp:511
void stopTimer(void *handle) override
Stop the timer with the given handle.
Definition: Timing.cpp:521
static void print(ArrayType type, DialectAsmPrinter &os)
Timer getRootTimer()
Get the root timer of this timing manager.
Definition: Timing.cpp:66
An RAII-style wrapper around a timer that ensures the timer is properly started and stopped...
Definition: Timing.h:271
void dumpAsList(raw_ostream &os=llvm::errs())
Debug print the timers as a list.
Definition: Timing.cpp:500
This class represesents a uniqued string owned by a TimingManager.
Definition: Timing.h:45
ThreadLocalCache< llvm::StringMap< llvm::StringMapEntry< llvm::NoneType > * > > localIdentifierCache
A thread local cache of identifiers to reduce lock contention.
Definition: Timing.cpp:54
void hideTimer(void *handle) override
Hide the timer in timing reports and directly show its children.
Definition: Timing.cpp:530
TimingScope getRootScope()
Get the root timer of this timing manager wrapped in a TimingScope for convenience.
Definition: Timing.cpp:72
void registerDefaultTimingManagerCLOptions()
Register a set of useful command-line options that can be used to configure a DefaultTimingManager.
Definition: Timing.cpp:556
static TimingIdentifier get(StringRef str, TimingManager &tm)
Return an identifier for the specified string.
Definition: Timing.cpp:81
DisplayMode
The different display modes for printing the timers.
Definition: Timing.h:343
friend class Timer
Definition: Timing.h:132
This class provides support for defining a thread local object with non static storage duration...
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)