15 #include "llvm/ADT/MapVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/ADT/StringMap.h"
18 #include "llvm/ADT/StringSet.h"
19 #include "llvm/Support/Allocator.h"
20 #include "llvm/Support/CommandLine.h"
21 #include "llvm/Support/Format.h"
22 #include "llvm/Support/FormatVariadic.h"
23 #include "llvm/Support/ManagedStatic.h"
24 #include "llvm/Support/RWMutex.h"
25 #include "llvm/Support/Threading.h"
26 #include "llvm/Support/raw_ostream.h"
33 using namespace detail;
38 "... Execution time report ...";
86 auto *&localEntry = (*
impl.localIdentifierCache)[
str];
92 llvm::sys::SmartScopedReader<true> contextLock(
impl.identifierMutex);
93 auto it =
impl.identifiers.find(
str);
94 if (it !=
impl.identifiers.end()) {
101 llvm::sys::SmartScopedWriter<true> contextLock(
impl.identifierMutex);
102 auto it =
impl.identifiers.insert(
str).first;
117 void printHeader(
const TimeRecord &total)
override {
120 os <<
"===" << std::string(73,
'-') <<
"===\n";
122 os <<
"===" << std::string(73,
'-') <<
"===\n";
125 os << llvm::format(
" Total Execution Time: %.4f seconds\n\n", total.
wall);
127 os <<
" ----User Time----";
128 os <<
" ----Wall Time---- ----Name----\n";
131 void printFooter()
override { os.flush(); }
135 os << llvm::format(
" %8.4f (%5.1f%%)", time.
user,
138 os << llvm::format(
" %8.4f (%5.1f%%) ", time.
wall,
142 void printListEntry(StringRef name,
const TimeRecord &time,
143 const TimeRecord &total,
bool lastEntry)
override {
144 printTime(time, total);
148 void printTreeEntry(
unsigned indent, StringRef name,
const TimeRecord &time,
150 printTime(time, total);
151 os.indent(indent) << name <<
"\n";
154 void printTreeEntryEnd(
unsigned indent,
bool lastEntry)
override {}
161 void printHeader(
const TimeRecord &total)
override { os <<
"[" <<
"\n"; }
163 void printFooter()
override {
171 os <<
"\"duration\": " << llvm::format(
"%8.4f", time.
user) <<
", ";
172 os <<
"\"percentage\": "
173 << llvm::format(
"%5.1f", 100.0 * time.
user / total.
user);
177 os <<
"\"duration\": " << llvm::format(
"%8.4f", time.
wall) <<
", ";
178 os <<
"\"percentage\": "
179 << llvm::format(
"%5.1f", 100.0 * time.
wall / total.
wall);
183 void printListEntry(StringRef name,
const TimeRecord &time,
184 const TimeRecord &total,
bool lastEntry)
override {
186 printTime(time, total);
187 os <<
", \"name\": " <<
"\"" << name <<
"\"";
194 void printTreeEntry(
unsigned indent, StringRef name,
const TimeRecord &time,
196 os.indent(indent) <<
"{";
197 printTime(time, total);
198 os <<
", \"name\": " <<
"\"" << name <<
"\"";
199 os <<
", \"passes\": [" <<
"\n";
202 void printTreeEntryEnd(
unsigned indent,
bool lastEntry)
override {
203 os.indent(indent) <<
"{}]";
226 using ChildrenMap = llvm::MapVector<const void *, std::unique_ptr<TimerImpl>>;
229 TimerImpl(std::string &&name, std::unique_ptr<OutputStrategy> &output)
230 : threadId(
llvm::get_threadid()), name(name), output(output) {}
233 void start() { startTime = std::chrono::steady_clock::now(); }
237 auto newTime = std::chrono::steady_clock::now() - startTime;
248 TimerImpl *nest(
const void *
id,
function_ref<std::string()> nameBuilder) {
249 auto tid = llvm::get_threadid();
251 return nestTail(children[
id], nameBuilder);
252 std::unique_lock<std::mutex> lock(asyncMutex);
253 return nestTail(asyncChildren[tid][
id], nameBuilder);
257 TimerImpl *nestTail(std::unique_ptr<TimerImpl> &child,
260 child = std::make_unique<TimerImpl>(nameBuilder(), output);
274 mergeAsyncChildren();
280 std::chrono::nanoseconds addAsyncUserTime() {
281 auto added = std::chrono::nanoseconds(0);
282 for (
auto &child : children)
283 added += child.second->addAsyncUserTime();
284 for (
auto &thread : asyncChildren) {
285 for (
auto &child : thread.second) {
286 child.second->addAsyncUserTime();
287 added += child.second->userTime;
296 void mergeAsyncChildren() {
297 for (
auto &child : children)
298 child.second->mergeAsyncChildren();
299 mergeChildren(std::move(asyncChildren));
300 assert(asyncChildren.empty());
308 void mergeChildren(ChildrenMap &&other) {
309 if (children.empty()) {
310 children = std::move(other);
311 for (
auto &child : children)
312 child.second->mergeAsyncChildren();
314 for (
auto &child : other)
315 mergeChild(child.first, std::move(child.second));
321 void mergeChildren(AsyncChildrenMap &&other) {
322 for (
auto &thread : other) {
323 mergeChildren(std::move(thread.second));
324 assert(thread.second.empty());
333 void mergeChild(
const void *
id, std::unique_ptr<TimerImpl> &&other) {
334 auto &into = children[id];
336 into = std::move(other);
337 into->mergeAsyncChildren();
339 into->wallTime =
std::max(into->wallTime, other->wallTime);
340 into->userTime += other->userTime;
341 into->mergeChildren(std::move(other->children));
342 into->mergeChildren(std::move(other->asyncChildren));
350 void dump(raw_ostream &os,
unsigned indent = 0,
unsigned markThreadId = 0) {
351 auto time = getTimeRecord();
352 os << std::string(indent * 2,
' ') << name <<
" [" << threadId <<
"]"
353 << llvm::format(
" %7.4f / %7.4f", time.
user, time.
wall);
354 if (threadId != markThreadId && markThreadId != 0)
357 for (
auto &child : children)
358 child.second->dump(os, indent + 1, threadId);
359 for (
auto &thread : asyncChildren)
360 for (
auto &child : thread.second)
361 child.second->dump(os, indent + 1, threadId);
367 std::chrono::duration_cast<std::chrono::duration<double>>(wallTime)
369 std::chrono::duration_cast<std::chrono::duration<double>>(userTime)
376 llvm::StringMap<TimeRecord> mergedTimers;
377 std::function<void(TimerImpl *)> addTimer = [&](TimerImpl *timer) {
378 mergedTimers[timer->name] += timer->getTimeRecord();
379 for (
auto &children : timer->children)
380 addTimer(children.second.get());
385 std::vector<std::pair<StringRef, TimeRecord>> timerNameAndTime;
386 for (
auto &it : mergedTimers)
387 timerNameAndTime.emplace_back(it.first(), it.second);
388 llvm::array_pod_sort(timerNameAndTime.begin(), timerNameAndTime.end(),
389 [](
const std::pair<StringRef, TimeRecord> *lhs,
390 const std::pair<StringRef, TimeRecord> *rhs) {
391 return llvm::array_pod_sort_comparator<double>(
392 &rhs->second.wall, &lhs->second.wall);
396 for (
auto &timeData : timerNameAndTime)
397 output->printListEntry(timeData.first, timeData.second, total);
401 void printAsTree(
TimeRecord total,
unsigned indent = 0) {
402 unsigned childIndent = indent;
404 output->printTreeEntry(indent, name, getTimeRecord(), total);
407 for (
auto &child : children) {
408 child.second->printAsTree(total, childIndent);
411 output->printTreeEntryEnd(indent);
418 auto total = getTimeRecord();
419 output->printHeader(total);
422 switch (displayMode) {
423 case DisplayMode::List:
426 case DisplayMode::Tree:
434 for (
auto &child : children)
435 rest -= child.second->getTimeRecord();
436 output->printListEntry(
"Rest", rest, total);
437 output->printListEntry(
"Total", total, total,
true);
438 output->printFooter();
442 std::chrono::time_point<std::chrono::steady_clock> startTime;
447 std::chrono::nanoseconds wallTime = std::chrono::nanoseconds(0);
451 std::chrono::nanoseconds userTime = std::chrono::nanoseconds(0);
464 ChildrenMap children;
468 AsyncChildrenMap asyncChildren;
471 std::mutex asyncMutex;
473 std::unique_ptr<OutputStrategy> &output;
503 out(std::make_unique<OutputTextStrategy>(
llvm::errs())) {
517 impl->displayMode = displayMode;
522 return impl->displayMode;
527 out = std::move(output);
533 impl->rootTimer->finalize();
534 impl->rootTimer->print(
impl->displayMode);
541 impl->rootTimer = std::make_unique<TimerImpl>(
"root", out);
542 impl->rootTimer->hidden =
true;
547 impl->rootTimer->dump(os);
552 impl->rootTimer->finalize();
558 impl->rootTimer->finalize();
564 return impl->rootTimer.get();
569 static_cast<TimerImpl *
>(handle)->start();
573 static_cast<TimerImpl *
>(handle)->stop();
578 return static_cast<TimerImpl *
>(handle)->nest(
id, nameBuilder);
582 static_cast<TimerImpl *
>(handle)->hidden =
true;
590 struct DefaultTimingManagerOptions {
591 llvm::cl::opt<bool> timing{
"mlir-timing",
592 llvm::cl::desc(
"Display execution times"),
593 llvm::cl::init(
false)};
594 llvm::cl::opt<DisplayMode> displayMode{
595 "mlir-timing-display", llvm::cl::desc(
"Display method for timing data"),
596 llvm::cl::init(DisplayMode::Tree),
598 clEnumValN(DisplayMode::List,
"list",
599 "display the results in a list sorted by total time"),
600 clEnumValN(DisplayMode::Tree,
"tree",
601 "display the results ina with a nested tree view"))};
602 llvm::cl::opt<OutputFormat> outputFormat{
603 "mlir-output-format", llvm::cl::desc(
"Output format for timing data"),
604 llvm::cl::init(OutputFormat::Text),
605 llvm::cl::values(clEnumValN(OutputFormat::Text,
"text",
606 "display the results in text format"),
607 clEnumValN(OutputFormat::Json,
"json",
608 "display the results in JSON format"))};
612 static llvm::ManagedStatic<DefaultTimingManagerOptions>
options;
625 std::unique_ptr<OutputStrategy> printer;
626 if (
options->outputFormat == OutputFormat::Text)
627 printer = std::make_unique<OutputTextStrategy>(llvm::errs());
628 else if (
options->outputFormat == OutputFormat::Json)
629 printer = std::make_unique<OutputJsonStrategy>(llvm::errs());
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static void print(spirv::VerCapExtAttr triple, DialectAsmPrinter &printer)
static llvm::ManagedStatic< DefaultTimingManagerOptions > options
constexpr llvm::StringLiteral kTimingDescription
Facilities for time measurement and report printing to an output stream.
void setDisplayMode(DisplayMode displayMode)
Change the display mode.
void stopTimer(void *handle) override
Stop the timer with the given handle.
std::optional< void * > rootTimer() override
Return the root timer.
DisplayMode
The different display modes for printing the timers.
@ Tree
In this mode the results are displayed in a tree view, with child timers nested under their parents.
@ List
In this mode the results are displayed in a list sorted by total time, with timers aggregated into on...
void setOutput(std::unique_ptr< OutputStrategy > output)
Change the stream where the output will be printed to.
void dumpTimers(raw_ostream &os=llvm::errs())
Debug print the timer data structures to an output stream.
void * nestTimer(void *handle, const void *id, function_ref< std::string()> nameBuilder) override
Create a child timer nested within the one with the given handle.
void startTimer(void *handle) override
Start the timer with the given handle.
DisplayMode getDisplayMode() const
Return the current display mode;.
void dumpAsList(raw_ostream &os=llvm::errs())
Debug print the timers as a list.
void dumpAsTree(raw_ostream &os=llvm::errs())
Debug print the timers as a tree.
void clear()
Clear the timing results.
~DefaultTimingManager() override
void print()
Print and clear the timing results.
void setEnabled(bool enabled)
Enable or disable execution time sampling.
OutputFormat
The different output formats for printing the timers.
bool isEnabled() const
Return whether execution time sampling is enabled.
void hideTimer(void *handle) override
Hide the timer in timing reports and directly show its children.
Facilities for printing timing reports to various output formats.
This class provides support for defining a thread local object with non static storage duration.
A handle for a timer in a TimingManager.
This class represesents a uniqued string owned by a TimingManager.
std::string str() const
Return an std::string.
TimingIdentifier(const TimingIdentifier &)=default
static TimingIdentifier get(StringRef str, TimingManager &tm)
Return an identifier for the specified string.
This class represents facilities to measure execution time.
TimingScope getRootScope()
Get the root timer of this timing manager wrapped in a TimingScope for convenience.
const std::unique_ptr< detail::TimingManagerImpl > impl
virtual std::optional< void * > rootTimer()=0
Return the root timer.
Timer getRootTimer()
Get the root timer of this timing manager.
An RAII-style wrapper around a timer that ensures the timer is properly started and stopped.
Implementation details of the DefaultTimingManager.
bool enabled
Whether we should do our work or not.
std::unique_ptr< TimerImpl > rootTimer
The root timer.
DisplayMode displayMode
The configured display mode.
Private implementation details of the TimingManager.
llvm::sys::SmartRWMutex< true > identifierMutex
ThreadLocalCache< llvm::StringMap< llvm::StringMapEntry< std::nullopt_t > * > > localIdentifierCache
A thread local cache of identifiers to reduce lock contention.
llvm::StringSet< llvm::BumpPtrAllocator & > identifiers
llvm::BumpPtrAllocator identifierAllocator
The OpAsmOpInterface, see OpAsmInterface.td for more details.
Include the generated interface declarations.
void registerDefaultTimingManagerCLOptions()
Register a set of useful command-line options that can be used to configure a DefaultTimingManager.
void applyDefaultTimingManagerCLOptions(DefaultTimingManager &tm)
Apply any values that were registered with 'registerDefaultTimingManagerOptions' to a DefaultTimingMa...
Simple record class to record timing information.