15 #include "llvm/ADT/MapVector.h" 16 #include "llvm/ADT/Statistic.h" 17 #include "llvm/ADT/StringMap.h" 18 #include "llvm/ADT/StringSet.h" 19 #include "llvm/Support/Allocator.h" 20 #include "llvm/Support/CommandLine.h" 21 #include "llvm/Support/Format.h" 22 #include "llvm/Support/FormatVariadic.h" 23 #include "llvm/Support/ManagedStatic.h" 24 #include "llvm/Support/RWMutex.h" 25 #include "llvm/Support/Threading.h" 26 #include "llvm/Support/raw_ostream.h" 32 using namespace detail;
36 "... Execution time report ...";
68 return rt.hasValue() ?
Timer(*
this, rt.getValue()) :
Timer();
84 auto *&localEntry = (*
impl.localIdentifierCache)[str];
90 llvm::sys::SmartScopedReader<true> contextLock(
impl.identifierMutex);
91 auto it =
impl.identifiers.find(str);
92 if (it !=
impl.identifiers.end()) {
99 llvm::sys::SmartScopedWriter<true> contextLock(
impl.identifierMutex);
100 auto it =
impl.identifiers.insert(str).first;
113 TimeRecord(
double wall = 0.0,
double user = 0.0) : wall(wall), user(user) {}
115 TimeRecord &operator+=(
const TimeRecord &other) {
121 TimeRecord &operator-=(
const TimeRecord &other) {
129 void print(raw_ostream &os,
const TimeRecord &total) {
130 if (total.user != total.wall)
131 os << llvm::format(
" %8.4f (%5.1f%%)", user, 100.0 * user / total.user);
132 os << llvm::format(
" %8.4f (%5.1f%%) ", wall, 100.0 * wall / total.wall);
142 TimeRecord time, TimeRecord total) {
143 time.print(os, total);
144 os.indent(indent) << name <<
"\n";
151 os <<
"===" << std::string(73,
'-') <<
"===\n";
153 os <<
"===" << std::string(73,
'-') <<
"===\n";
156 os << llvm::format(
" Total Execution Time: %.4f seconds\n\n", total.wall);
157 if (total.user != total.wall)
158 os <<
" ----User Time----";
159 os <<
" ----Wall Time---- ----Name----\n";
175 using ChildrenMap = llvm::MapVector<const void *, std::unique_ptr<TimerImpl>>;
178 TimerImpl(std::string &&name) : threadId(llvm::get_threadid()), name(name) {}
181 void start() { startTime = std::chrono::steady_clock::now(); }
185 auto newTime = std::chrono::steady_clock::now() - startTime;
196 TimerImpl *nest(
const void *
id,
function_ref<std::string()> nameBuilder) {
197 auto tid = llvm::get_threadid();
199 return nestTail(children[
id], nameBuilder);
200 std::unique_lock<std::mutex> lock(asyncMutex);
201 return nestTail(asyncChildren[tid][
id], nameBuilder);
205 TimerImpl *nestTail(std::unique_ptr<TimerImpl> &child,
208 child = std::make_unique<TimerImpl>(nameBuilder());
222 mergeAsyncChildren();
228 std::chrono::nanoseconds addAsyncUserTime() {
229 auto added = std::chrono::nanoseconds(0);
230 for (
auto &child : children)
231 added += child.second->addAsyncUserTime();
232 for (
auto &thread : asyncChildren) {
233 for (
auto &child : thread.second) {
234 child.second->addAsyncUserTime();
235 added += child.second->userTime;
244 void mergeAsyncChildren() {
245 for (
auto &child : children)
246 child.second->mergeAsyncChildren();
247 mergeChildren(std::move(asyncChildren));
248 assert(asyncChildren.empty());
256 void mergeChildren(ChildrenMap &&other) {
257 if (children.empty()) {
258 children = std::move(other);
259 for (
auto &child : children)
260 child.second->mergeAsyncChildren();
262 for (
auto &child : other)
263 mergeChild(child.first, std::move(child.second));
269 void mergeChildren(AsyncChildrenMap &&other) {
270 for (
auto &thread : other) {
271 mergeChildren(std::move(thread.second));
272 assert(thread.second.empty());
281 void mergeChild(
const void *
id, std::unique_ptr<TimerImpl> &&other) {
282 auto &into = children[id];
284 into = std::move(other);
285 into->mergeAsyncChildren();
287 into->wallTime =
std::max(into->wallTime, other->wallTime);
288 into->userTime += other->userTime;
289 into->mergeChildren(std::move(other->children));
290 into->mergeChildren(std::move(other->asyncChildren));
298 void dump(raw_ostream &os,
unsigned indent = 0,
unsigned markThreadId = 0) {
299 auto time = getTimeRecord();
300 os << std::string(indent * 2,
' ') << name <<
" [" << threadId <<
"]" 301 << llvm::format(
" %7.4f / %7.4f", time.user, time.wall);
302 if (threadId != markThreadId && markThreadId != 0)
305 for (
auto &child : children)
306 child.second->dump(os, indent + 1, threadId);
307 for (
auto &thread : asyncChildren)
308 for (
auto &child : thread.second)
309 child.second->dump(os, indent + 1, threadId);
313 TimeRecord getTimeRecord() {
315 std::chrono::duration_cast<std::chrono::duration<double>>(wallTime)
317 std::chrono::duration_cast<std::chrono::duration<double>>(userTime)
322 void printAsList(raw_ostream &os, TimeRecord total) {
324 llvm::StringMap<TimeRecord> mergedTimers;
325 std::function<void(TimerImpl *)> addTimer = [&](TimerImpl *timer) {
326 mergedTimers[timer->name] += timer->getTimeRecord();
327 for (
auto &children : timer->children)
328 addTimer(children.second.get());
333 std::vector<std::pair<StringRef, TimeRecord>> timerNameAndTime;
334 for (
auto &it : mergedTimers)
335 timerNameAndTime.emplace_back(it.first(), it.second);
336 llvm::array_pod_sort(timerNameAndTime.begin(), timerNameAndTime.end(),
337 [](
const std::pair<StringRef, TimeRecord> *lhs,
338 const std::pair<StringRef, TimeRecord> *rhs) {
339 return llvm::array_pod_sort_comparator<double>(
340 &rhs->second.wall, &lhs->second.wall);
344 for (
auto &timeData : timerNameAndTime)
349 void printAsTree(raw_ostream &os, TimeRecord total,
unsigned indent = 0) {
350 unsigned childIndent = indent;
355 for (
auto &child : children) {
356 child.second->printAsTree(os, total, childIndent);
363 auto total = getTimeRecord();
367 switch (displayMode) {
369 printAsList(os, total);
371 case DisplayMode::Tree:
372 printAsTree(os, total);
379 for (
auto &child : children)
380 rest -= child.second->getTimeRecord();
387 std::chrono::time_point<std::chrono::steady_clock> startTime;
392 std::chrono::nanoseconds wallTime = std::chrono::nanoseconds(0);
396 std::chrono::nanoseconds userTime = std::chrono::nanoseconds(0);
409 ChildrenMap children;
413 AsyncChildrenMap asyncChildren;
416 std::mutex asyncMutex;
432 bool enabled =
false;
438 raw_ostream *output = &llvm::errs();
462 impl->displayMode = displayMode;
467 return impl->displayMode;
475 assert(
impl->output);
476 return *
impl->output;
482 impl->rootTimer->finalize();
483 impl->rootTimer->print(*
impl->output,
impl->displayMode);
490 impl->rootTimer = std::make_unique<TimerImpl>(
"root");
491 impl->rootTimer->hidden =
true;
496 impl->rootTimer->dump(os);
501 impl->rootTimer->finalize();
507 impl->rootTimer->finalize();
513 return impl->rootTimer.get();
518 static_cast<TimerImpl *
>(handle)->start();
522 static_cast<TimerImpl *
>(handle)->stop();
527 return static_cast<TimerImpl *
>(handle)->nest(
id, nameBuilder);
531 static_cast<TimerImpl *
>(handle)->hidden =
true;
539 struct DefaultTimingManagerOptions {
540 llvm::cl::opt<bool> timing{
"mlir-timing",
541 llvm::cl::desc(
"Display execution times"),
542 llvm::cl::init(
false)};
543 llvm::cl::opt<DisplayMode> displayMode{
544 "mlir-timing-display", llvm::cl::desc(
"Display method for timing data"),
548 "display the results in a list sorted by total time"),
550 "display the results ina with a nested tree view"))};
554 static llvm::ManagedStatic<DefaultTimingManagerOptions>
options;
562 if (!options.isConstructed())
Include the generated interface declarations.
In this mode the results are displayed in a tree view, with child timers nested under their parents...
In this mode the results are displayed in a list sorted by total time, with timers aggregated into on...
llvm::StringSet< llvm::BumpPtrAllocator & > identifiers
void startTimer(void *handle) override
Start the timer with the given handle.
void setDisplayMode(DisplayMode displayMode)
Change the display mode.
virtual Optional< void * > rootTimer()=0
Return the root timer.
void setEnabled(bool enabled)
Enable or disable execution time sampling.
raw_ostream & getOutput() const
Return the current output stream where the output will be printed to.
constexpr llvm::StringLiteral kTimingDescription
llvm::BumpPtrAllocator identifierAllocator
void setOutput(raw_ostream &os)
Change the stream where the output will be printed to.
void clear()
Clear the timing results.
DisplayMode getDisplayMode() const
Return the current display mode;.
void dumpAsTree(raw_ostream &os=llvm::errs())
Debug print the timers as a tree.
This class represents facilities to measure execution time.
Implementation details of the DefaultTimingManager.
void * nestTimer(void *handle, const void *id, function_ref< std::string()> nameBuilder) override
Create a child timer nested within the one with the given handle.
Private implementation details of the TimingManager.
void dumpTimers(raw_ostream &os=llvm::errs())
Debug print the timer data structures to an output stream.
friend class TimingIdentifier
Facilities for time measurement and report printing to an output stream.
std::unique_ptr< TimerImpl > rootTimer
The root timer.
~DefaultTimingManager() override
llvm::sys::SmartRWMutex< true > identifierMutex
static void printTimeEntry(raw_ostream &os, unsigned indent, StringRef name, TimeRecord time, TimeRecord total)
Utility to print a single line entry in the timer output.
void print()
Print and clear the timing results.
bool isEnabled() const
Return whether execution time sampling is enabled.
const std::unique_ptr< detail::TimingManagerImpl > impl
static void printTimeHeader(raw_ostream &os, TimeRecord total)
Utility to print the timer heading information.
A handle for a timer in a TimingManager.
void applyDefaultTimingManagerCLOptions(DefaultTimingManager &tm)
Apply any values that were registered with 'registerDefaultTimingManagerOptions' to a DefaultTimingMa...
static llvm::ManagedStatic< DefaultTimingManagerOptions > options
Optional< void * > rootTimer() override
Return the root timer.
void stopTimer(void *handle) override
Stop the timer with the given handle.
static void print(ArrayType type, DialectAsmPrinter &os)
Timer getRootTimer()
Get the root timer of this timing manager.
An RAII-style wrapper around a timer that ensures the timer is properly started and stopped...
void dumpAsList(raw_ostream &os=llvm::errs())
Debug print the timers as a list.
This class represesents a uniqued string owned by a TimingManager.
ThreadLocalCache< llvm::StringMap< llvm::StringMapEntry< llvm::NoneType > * > > localIdentifierCache
A thread local cache of identifiers to reduce lock contention.
void hideTimer(void *handle) override
Hide the timer in timing reports and directly show its children.
TimingScope getRootScope()
Get the root timer of this timing manager wrapped in a TimingScope for convenience.
void registerDefaultTimingManagerCLOptions()
Register a set of useful command-line options that can be used to configure a DefaultTimingManager.
static TimingIdentifier get(StringRef str, TimingManager &tm)
Return an identifier for the specified string.
DisplayMode
The different display modes for printing the timers.
This class provides support for defining a thread local object with non static storage duration...
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)