15 #include "llvm/ADT/MapVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/ADT/StringMap.h"
18 #include "llvm/ADT/StringSet.h"
19 #include "llvm/Support/Allocator.h"
20 #include "llvm/Support/CommandLine.h"
21 #include "llvm/Support/Format.h"
22 #include "llvm/Support/ManagedStatic.h"
23 #include "llvm/Support/RWMutex.h"
24 #include "llvm/Support/Threading.h"
25 #include "llvm/Support/raw_ostream.h"
31 using namespace detail;
36 "... Execution time report ...";
84 auto *&localEntry = (*
impl.localIdentifierCache)[
str];
90 llvm::sys::SmartScopedReader<true> contextLock(
impl.identifierMutex);
91 auto it =
impl.identifiers.find(
str);
92 if (it !=
impl.identifiers.end()) {
99 llvm::sys::SmartScopedWriter<true> contextLock(
impl.identifierMutex);
100 auto it =
impl.identifiers.insert(
str).first;
115 void printHeader(
const TimeRecord &total)
override {
118 os <<
"===" << std::string(73,
'-') <<
"===\n";
120 os <<
"===" << std::string(73,
'-') <<
"===\n";
123 os << llvm::format(
" Total Execution Time: %.4f seconds\n\n", total.
wall);
125 os <<
" ----User Time----";
126 os <<
" ----Wall Time---- ----Name----\n";
129 void printFooter()
override { os.flush(); }
133 os << llvm::format(
" %8.4f (%5.1f%%)", time.
user,
136 os << llvm::format(
" %8.4f (%5.1f%%) ", time.
wall,
140 void printListEntry(StringRef name,
const TimeRecord &time,
141 const TimeRecord &total,
bool lastEntry)
override {
142 printTime(time, total);
146 void printTreeEntry(
unsigned indent, StringRef name,
const TimeRecord &time,
148 printTime(time, total);
149 os.indent(indent) << name <<
"\n";
152 void printTreeEntryEnd(
unsigned indent,
bool lastEntry)
override {}
159 void printHeader(
const TimeRecord &total)
override { os <<
"[" <<
"\n"; }
161 void printFooter()
override {
169 os <<
"\"duration\": " << llvm::format(
"%8.4f", time.
user) <<
", ";
170 os <<
"\"percentage\": "
171 << llvm::format(
"%5.1f", 100.0 * time.
user / total.
user);
175 os <<
"\"duration\": " << llvm::format(
"%8.4f", time.
wall) <<
", ";
176 os <<
"\"percentage\": "
177 << llvm::format(
"%5.1f", 100.0 * time.
wall / total.
wall);
181 void printListEntry(StringRef name,
const TimeRecord &time,
182 const TimeRecord &total,
bool lastEntry)
override {
184 printTime(time, total);
185 os <<
", \"name\": " <<
"\"" << name <<
"\"";
192 void printTreeEntry(
unsigned indent, StringRef name,
const TimeRecord &time,
194 os.indent(indent) <<
"{";
195 printTime(time, total);
196 os <<
", \"name\": " <<
"\"" << name <<
"\"";
197 os <<
", \"passes\": [" <<
"\n";
200 void printTreeEntryEnd(
unsigned indent,
bool lastEntry)
override {
201 os.indent(indent) <<
"{}]";
224 using ChildrenMap = llvm::MapVector<const void *, std::unique_ptr<TimerImpl>>;
227 TimerImpl(std::string &&name, std::unique_ptr<OutputStrategy> &output)
228 : threadId(
llvm::get_threadid()), name(name), output(output) {}
231 void start() { startTime = std::chrono::steady_clock::now(); }
235 auto newTime = std::chrono::steady_clock::now() - startTime;
246 TimerImpl *nest(
const void *
id,
function_ref<std::string()> nameBuilder) {
247 auto tid = llvm::get_threadid();
249 return nestTail(children[
id], nameBuilder);
250 std::unique_lock<std::mutex> lock(asyncMutex);
251 return nestTail(asyncChildren[tid][
id], nameBuilder);
255 TimerImpl *nestTail(std::unique_ptr<TimerImpl> &child,
258 child = std::make_unique<TimerImpl>(nameBuilder(), output);
272 mergeAsyncChildren();
278 std::chrono::nanoseconds addAsyncUserTime() {
279 auto added = std::chrono::nanoseconds(0);
280 for (
auto &child : children)
281 added += child.second->addAsyncUserTime();
282 for (
auto &thread : asyncChildren) {
283 for (
auto &child : thread.second) {
284 child.second->addAsyncUserTime();
285 added += child.second->userTime;
294 void mergeAsyncChildren() {
295 for (
auto &child : children)
296 child.second->mergeAsyncChildren();
297 mergeChildren(std::move(asyncChildren));
298 assert(asyncChildren.empty());
306 void mergeChildren(ChildrenMap &&other) {
307 if (children.empty()) {
308 children = std::move(other);
309 for (
auto &child : children)
310 child.second->mergeAsyncChildren();
312 for (
auto &child : other)
313 mergeChild(child.first, std::move(child.second));
319 void mergeChildren(AsyncChildrenMap &&other) {
320 for (
auto &thread : other) {
321 mergeChildren(std::move(thread.second));
322 assert(thread.second.empty());
331 void mergeChild(
const void *
id, std::unique_ptr<TimerImpl> &&other) {
332 auto &into = children[id];
334 into = std::move(other);
335 into->mergeAsyncChildren();
337 into->wallTime =
std::max(into->wallTime, other->wallTime);
338 into->userTime += other->userTime;
339 into->mergeChildren(std::move(other->children));
340 into->mergeChildren(std::move(other->asyncChildren));
348 void dump(raw_ostream &os,
unsigned indent = 0,
unsigned markThreadId = 0) {
349 auto time = getTimeRecord();
350 os << std::string(indent * 2,
' ') << name <<
" [" << threadId <<
"]"
351 << llvm::format(
" %7.4f / %7.4f", time.
user, time.
wall);
352 if (threadId != markThreadId && markThreadId != 0)
355 for (
auto &child : children)
356 child.second->dump(os, indent + 1, threadId);
357 for (
auto &thread : asyncChildren)
358 for (
auto &child : thread.second)
359 child.second->dump(os, indent + 1, threadId);
365 std::chrono::duration_cast<std::chrono::duration<double>>(wallTime)
367 std::chrono::duration_cast<std::chrono::duration<double>>(userTime)
374 llvm::StringMap<TimeRecord> mergedTimers;
375 std::function<void(TimerImpl *)> addTimer = [&](TimerImpl *timer) {
376 mergedTimers[timer->name] += timer->getTimeRecord();
377 for (
auto &children : timer->children)
378 addTimer(children.second.get());
383 std::vector<std::pair<StringRef, TimeRecord>> timerNameAndTime;
384 for (
auto &it : mergedTimers)
385 timerNameAndTime.emplace_back(it.first(), it.second);
386 llvm::array_pod_sort(timerNameAndTime.begin(), timerNameAndTime.end(),
387 [](
const std::pair<StringRef, TimeRecord> *lhs,
388 const std::pair<StringRef, TimeRecord> *rhs) {
389 return llvm::array_pod_sort_comparator<double>(
390 &rhs->second.wall, &lhs->second.wall);
394 for (
auto &timeData : timerNameAndTime)
395 output->printListEntry(timeData.first, timeData.second, total);
399 void printAsTree(
TimeRecord total,
unsigned indent = 0) {
400 unsigned childIndent = indent;
402 output->printTreeEntry(indent, name, getTimeRecord(), total);
405 for (
auto &child : children) {
406 child.second->printAsTree(total, childIndent);
409 output->printTreeEntryEnd(indent);
416 auto total = getTimeRecord();
417 output->printHeader(total);
420 switch (displayMode) {
421 case DisplayMode::List:
424 case DisplayMode::Tree:
432 for (
auto &child : children)
433 rest -= child.second->getTimeRecord();
434 output->printListEntry(
"Rest", rest, total);
435 output->printListEntry(
"Total", total, total,
true);
436 output->printFooter();
440 std::chrono::time_point<std::chrono::steady_clock> startTime;
445 std::chrono::nanoseconds wallTime = std::chrono::nanoseconds(0);
449 std::chrono::nanoseconds userTime = std::chrono::nanoseconds(0);
462 ChildrenMap children;
466 AsyncChildrenMap asyncChildren;
469 std::mutex asyncMutex;
471 std::unique_ptr<OutputStrategy> &output;
501 out(std::make_unique<OutputTextStrategy>(
llvm::errs())) {
515 impl->displayMode = displayMode;
520 return impl->displayMode;
525 out = std::move(output);
531 impl->rootTimer->finalize();
532 impl->rootTimer->print(
impl->displayMode);
539 impl->rootTimer = std::make_unique<TimerImpl>(
"root", out);
540 impl->rootTimer->hidden =
true;
545 impl->rootTimer->dump(os);
550 impl->rootTimer->finalize();
556 impl->rootTimer->finalize();
562 return impl->rootTimer.get();
567 static_cast<TimerImpl *
>(handle)->start();
571 static_cast<TimerImpl *
>(handle)->stop();
576 return static_cast<TimerImpl *
>(handle)->nest(
id, nameBuilder);
580 static_cast<TimerImpl *
>(handle)->hidden =
true;
588 struct DefaultTimingManagerOptions {
589 llvm::cl::opt<bool> timing{
"mlir-timing",
590 llvm::cl::desc(
"Display execution times"),
591 llvm::cl::init(
false)};
592 llvm::cl::opt<DisplayMode> displayMode{
593 "mlir-timing-display", llvm::cl::desc(
"Display method for timing data"),
594 llvm::cl::init(DisplayMode::Tree),
596 clEnumValN(DisplayMode::List,
"list",
597 "display the results in a list sorted by total time"),
598 clEnumValN(DisplayMode::Tree,
"tree",
599 "display the results ina with a nested tree view"))};
600 llvm::cl::opt<OutputFormat> outputFormat{
601 "mlir-output-format", llvm::cl::desc(
"Output format for timing data"),
602 llvm::cl::init(OutputFormat::Text),
603 llvm::cl::values(clEnumValN(OutputFormat::Text,
"text",
604 "display the results in text format"),
605 clEnumValN(OutputFormat::Json,
"json",
606 "display the results in JSON format"))};
610 static llvm::ManagedStatic<DefaultTimingManagerOptions>
options;
623 std::unique_ptr<OutputStrategy> printer;
624 if (
options->outputFormat == OutputFormat::Text)
625 printer = std::make_unique<OutputTextStrategy>(llvm::errs());
626 else if (
options->outputFormat == OutputFormat::Json)
627 printer = std::make_unique<OutputJsonStrategy>(llvm::errs());
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static void print(spirv::VerCapExtAttr triple, DialectAsmPrinter &printer)
static llvm::ManagedStatic< DefaultTimingManagerOptions > options
constexpr llvm::StringLiteral kTimingDescription
Facilities for time measurement and report printing to an output stream.
void setDisplayMode(DisplayMode displayMode)
Change the display mode.
void stopTimer(void *handle) override
Stop the timer with the given handle.
std::optional< void * > rootTimer() override
Return the root timer.
DisplayMode
The different display modes for printing the timers.
@ Tree
In this mode the results are displayed in a tree view, with child timers nested under their parents.
@ List
In this mode the results are displayed in a list sorted by total time, with timers aggregated into on...
void setOutput(std::unique_ptr< OutputStrategy > output)
Change the stream where the output will be printed to.
void dumpTimers(raw_ostream &os=llvm::errs())
Debug print the timer data structures to an output stream.
void * nestTimer(void *handle, const void *id, function_ref< std::string()> nameBuilder) override
Create a child timer nested within the one with the given handle.
void startTimer(void *handle) override
Start the timer with the given handle.
DisplayMode getDisplayMode() const
Return the current display mode;.
void dumpAsList(raw_ostream &os=llvm::errs())
Debug print the timers as a list.
void dumpAsTree(raw_ostream &os=llvm::errs())
Debug print the timers as a tree.
void clear()
Clear the timing results.
~DefaultTimingManager() override
void print()
Print and clear the timing results.
void setEnabled(bool enabled)
Enable or disable execution time sampling.
OutputFormat
The different output formats for printing the timers.
bool isEnabled() const
Return whether execution time sampling is enabled.
void hideTimer(void *handle) override
Hide the timer in timing reports and directly show its children.
Facilities for printing timing reports to various output formats.
This class provides support for defining a thread local object with non static storage duration.
A handle for a timer in a TimingManager.
This class represesents a uniqued string owned by a TimingManager.
std::string str() const
Return an std::string.
TimingIdentifier(const TimingIdentifier &)=default
static TimingIdentifier get(StringRef str, TimingManager &tm)
Return an identifier for the specified string.
This class represents facilities to measure execution time.
TimingScope getRootScope()
Get the root timer of this timing manager wrapped in a TimingScope for convenience.
const std::unique_ptr< detail::TimingManagerImpl > impl
virtual std::optional< void * > rootTimer()=0
Return the root timer.
Timer getRootTimer()
Get the root timer of this timing manager.
An RAII-style wrapper around a timer that ensures the timer is properly started and stopped.
Implementation details of the DefaultTimingManager.
bool enabled
Whether we should do our work or not.
std::unique_ptr< TimerImpl > rootTimer
The root timer.
DisplayMode displayMode
The configured display mode.
Private implementation details of the TimingManager.
llvm::sys::SmartRWMutex< true > identifierMutex
ThreadLocalCache< llvm::StringMap< llvm::StringMapEntry< std::nullopt_t > * > > localIdentifierCache
A thread local cache of identifiers to reduce lock contention.
llvm::StringSet< llvm::BumpPtrAllocator & > identifiers
llvm::BumpPtrAllocator identifierAllocator
The OpAsmOpInterface, see OpAsmInterface.td for more details.
Include the generated interface declarations.
void registerDefaultTimingManagerCLOptions()
Register a set of useful command-line options that can be used to configure a DefaultTimingManager.
void applyDefaultTimingManagerCLOptions(DefaultTimingManager &tm)
Apply any values that were registered with 'registerDefaultTimingManagerOptions' to a DefaultTimingMa...
Simple record class to record timing information.