15 #include "llvm/ADT/MapVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/ADT/StringMap.h"
18 #include "llvm/ADT/StringSet.h"
19 #include "llvm/Support/Allocator.h"
20 #include "llvm/Support/CommandLine.h"
21 #include "llvm/Support/Format.h"
22 #include "llvm/Support/FormatVariadic.h"
23 #include "llvm/Support/ManagedStatic.h"
24 #include "llvm/Support/RWMutex.h"
25 #include "llvm/Support/Threading.h"
26 #include "llvm/Support/raw_ostream.h"
33 using namespace detail;
37 "... Execution time report ...";
85 auto *&localEntry = (*
impl.localIdentifierCache)[
str];
91 llvm::sys::SmartScopedReader<true> contextLock(
impl.identifierMutex);
92 auto it =
impl.identifiers.find(
str);
93 if (it !=
impl.identifiers.end()) {
100 llvm::sys::SmartScopedWriter<true> contextLock(
impl.identifierMutex);
101 auto it =
impl.identifiers.insert(
str).first;
114 TimeRecord(
double wall = 0.0,
double user = 0.0) : wall(wall), user(user) {}
116 TimeRecord &
operator+=(
const TimeRecord &other) {
122 TimeRecord &
operator-=(
const TimeRecord &other) {
130 void print(raw_ostream &os,
const TimeRecord &total) {
131 if (total.user != total.wall)
132 os << llvm::format(
" %8.4f (%5.1f%%)", user, 100.0 * user / total.user);
133 os << llvm::format(
" %8.4f (%5.1f%%) ", wall, 100.0 * wall / total.wall);
143 TimeRecord time, TimeRecord total) {
144 time.print(os, total);
145 os.indent(indent) << name <<
"\n";
152 os <<
"===" << std::string(73,
'-') <<
"===\n";
154 os <<
"===" << std::string(73,
'-') <<
"===\n";
157 os << llvm::format(
" Total Execution Time: %.4f seconds\n\n", total.wall);
158 if (total.user != total.wall)
159 os <<
" ----User Time----";
160 os <<
" ----Wall Time---- ----Name----\n";
176 using ChildrenMap = llvm::MapVector<const void *, std::unique_ptr<TimerImpl>>;
179 TimerImpl(std::string &&name) : threadId(
llvm::get_threadid()), name(name) {}
182 void start() { startTime = std::chrono::steady_clock::now(); }
186 auto newTime = std::chrono::steady_clock::now() - startTime;
197 TimerImpl *nest(
const void *
id,
function_ref<std::string()> nameBuilder) {
198 auto tid = llvm::get_threadid();
200 return nestTail(children[
id], nameBuilder);
201 std::unique_lock<std::mutex> lock(asyncMutex);
202 return nestTail(asyncChildren[tid][
id], nameBuilder);
206 TimerImpl *nestTail(std::unique_ptr<TimerImpl> &child,
209 child = std::make_unique<TimerImpl>(nameBuilder());
223 mergeAsyncChildren();
229 std::chrono::nanoseconds addAsyncUserTime() {
230 auto added = std::chrono::nanoseconds(0);
231 for (
auto &child : children)
232 added += child.second->addAsyncUserTime();
233 for (
auto &thread : asyncChildren) {
234 for (
auto &child : thread.second) {
235 child.second->addAsyncUserTime();
236 added += child.second->userTime;
245 void mergeAsyncChildren() {
246 for (
auto &child : children)
247 child.second->mergeAsyncChildren();
248 mergeChildren(std::move(asyncChildren));
249 assert(asyncChildren.empty());
257 void mergeChildren(ChildrenMap &&other) {
258 if (children.empty()) {
259 children = std::move(other);
260 for (
auto &child : children)
261 child.second->mergeAsyncChildren();
263 for (
auto &child : other)
264 mergeChild(child.first, std::move(child.second));
270 void mergeChildren(AsyncChildrenMap &&other) {
271 for (
auto &thread : other) {
272 mergeChildren(std::move(thread.second));
273 assert(thread.second.empty());
282 void mergeChild(
const void *
id, std::unique_ptr<TimerImpl> &&other) {
283 auto &into = children[id];
285 into = std::move(other);
286 into->mergeAsyncChildren();
288 into->wallTime =
std::max(into->wallTime, other->wallTime);
289 into->userTime += other->userTime;
290 into->mergeChildren(std::move(other->children));
291 into->mergeChildren(std::move(other->asyncChildren));
299 void dump(raw_ostream &os,
unsigned indent = 0,
unsigned markThreadId = 0) {
300 auto time = getTimeRecord();
301 os << std::string(indent * 2,
' ') << name <<
" [" << threadId <<
"]"
302 << llvm::format(
" %7.4f / %7.4f", time.user, time.wall);
303 if (threadId != markThreadId && markThreadId != 0)
306 for (
auto &child : children)
307 child.second->dump(os, indent + 1, threadId);
308 for (
auto &thread : asyncChildren)
309 for (
auto &child : thread.second)
310 child.second->dump(os, indent + 1, threadId);
314 TimeRecord getTimeRecord() {
316 std::chrono::duration_cast<std::chrono::duration<double>>(wallTime)
318 std::chrono::duration_cast<std::chrono::duration<double>>(userTime)
323 void printAsList(raw_ostream &os, TimeRecord total) {
325 llvm::StringMap<TimeRecord> mergedTimers;
326 std::function<void(TimerImpl *)> addTimer = [&](TimerImpl *timer) {
327 mergedTimers[timer->name] += timer->getTimeRecord();
328 for (
auto &children : timer->children)
329 addTimer(children.second.get());
334 std::vector<std::pair<StringRef, TimeRecord>> timerNameAndTime;
335 for (
auto &it : mergedTimers)
336 timerNameAndTime.emplace_back(it.first(), it.second);
337 llvm::array_pod_sort(timerNameAndTime.begin(), timerNameAndTime.end(),
338 [](
const std::pair<StringRef, TimeRecord> *lhs,
339 const std::pair<StringRef, TimeRecord> *rhs) {
340 return llvm::array_pod_sort_comparator<double>(
341 &rhs->second.wall, &lhs->second.wall);
345 for (
auto &timeData : timerNameAndTime)
350 void printAsTree(raw_ostream &os, TimeRecord total,
unsigned indent = 0) {
351 unsigned childIndent = indent;
356 for (
auto &child : children) {
357 child.second->printAsTree(os, total, childIndent);
364 auto total = getTimeRecord();
368 switch (displayMode) {
369 case DisplayMode::List:
370 printAsList(os, total);
372 case DisplayMode::Tree:
373 printAsTree(os, total);
380 for (
auto &child : children)
381 rest -= child.second->getTimeRecord();
388 std::chrono::time_point<std::chrono::steady_clock> startTime;
393 std::chrono::nanoseconds wallTime = std::chrono::nanoseconds(0);
397 std::chrono::nanoseconds userTime = std::chrono::nanoseconds(0);
410 ChildrenMap children;
414 AsyncChildrenMap asyncChildren;
417 std::mutex asyncMutex;
463 impl->displayMode = displayMode;
468 return impl->displayMode;
476 assert(
impl->output);
477 return *
impl->output;
483 impl->rootTimer->finalize();
484 impl->rootTimer->print(*
impl->output,
impl->displayMode);
491 impl->rootTimer = std::make_unique<TimerImpl>(
"root");
492 impl->rootTimer->hidden =
true;
497 impl->rootTimer->dump(os);
502 impl->rootTimer->finalize();
508 impl->rootTimer->finalize();
514 return impl->rootTimer.get();
519 static_cast<TimerImpl *
>(handle)->start();
523 static_cast<TimerImpl *
>(handle)->stop();
528 return static_cast<TimerImpl *
>(handle)->nest(
id, nameBuilder);
532 static_cast<TimerImpl *
>(handle)->hidden =
true;
540 struct DefaultTimingManagerOptions {
541 llvm::cl::opt<bool> timing{
"mlir-timing",
542 llvm::cl::desc(
"Display execution times"),
543 llvm::cl::init(
false)};
544 llvm::cl::opt<DisplayMode> displayMode{
545 "mlir-timing-display", llvm::cl::desc(
"Display method for timing data"),
546 llvm::cl::init(DisplayMode::Tree),
548 clEnumValN(DisplayMode::List,
"list",
549 "display the results in a list sorted by total time"),
550 clEnumValN(DisplayMode::Tree,
"tree",
551 "display the results ina with a nested tree view"))};
555 static llvm::ManagedStatic<DefaultTimingManagerOptions>
options;
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static void print(spirv::VerCapExtAttr triple, DialectAsmPrinter &printer)
static void printTimeEntry(raw_ostream &os, unsigned indent, StringRef name, TimeRecord time, TimeRecord total)
Utility to print a single line entry in the timer output.
static void printTimeHeader(raw_ostream &os, TimeRecord total)
Utility to print the timer heading information.
static llvm::ManagedStatic< DefaultTimingManagerOptions > options
constexpr llvm::StringLiteral kTimingDescription
Facilities for time measurement and report printing to an output stream.
void setDisplayMode(DisplayMode displayMode)
Change the display mode.
void stopTimer(void *handle) override
Stop the timer with the given handle.
std::optional< void * > rootTimer() override
Return the root timer.
DisplayMode
The different display modes for printing the timers.
@ Tree
In this mode the results are displayed in a tree view, with child timers nested under their parents.
@ List
In this mode the results are displayed in a list sorted by total time, with timers aggregated into on...
void dumpTimers(raw_ostream &os=llvm::errs())
Debug print the timer data structures to an output stream.
void * nestTimer(void *handle, const void *id, function_ref< std::string()> nameBuilder) override
Create a child timer nested within the one with the given handle.
void startTimer(void *handle) override
Start the timer with the given handle.
DisplayMode getDisplayMode() const
Return the current display mode;.
void setOutput(raw_ostream &os)
Change the stream where the output will be printed to.
raw_ostream & getOutput() const
Return the current output stream where the output will be printed to.
void dumpAsList(raw_ostream &os=llvm::errs())
Debug print the timers as a list.
void dumpAsTree(raw_ostream &os=llvm::errs())
Debug print the timers as a tree.
void clear()
Clear the timing results.
~DefaultTimingManager() override
void print()
Print and clear the timing results.
void setEnabled(bool enabled)
Enable or disable execution time sampling.
bool isEnabled() const
Return whether execution time sampling is enabled.
void hideTimer(void *handle) override
Hide the timer in timing reports and directly show its children.
This class provides support for defining a thread local object with non static storage duration.
A handle for a timer in a TimingManager.
This class represesents a uniqued string owned by a TimingManager.
std::string str() const
Return an std::string.
TimingIdentifier(const TimingIdentifier &)=default
static TimingIdentifier get(StringRef str, TimingManager &tm)
Return an identifier for the specified string.
This class represents facilities to measure execution time.
TimingScope getRootScope()
Get the root timer of this timing manager wrapped in a TimingScope for convenience.
const std::unique_ptr< detail::TimingManagerImpl > impl
virtual std::optional< void * > rootTimer()=0
Return the root timer.
Timer getRootTimer()
Get the root timer of this timing manager.
An RAII-style wrapper around a timer that ensures the timer is properly started and stopped.
Implementation details of the DefaultTimingManager.
bool enabled
Whether we should do our work or not.
std::unique_ptr< TimerImpl > rootTimer
The root timer.
raw_ostream * output
The stream where we should print our output. This will always be non-null.
DisplayMode displayMode
The configured display mode.
Private implementation details of the TimingManager.
llvm::sys::SmartRWMutex< true > identifierMutex
ThreadLocalCache< llvm::StringMap< llvm::StringMapEntry< std::nullopt_t > * > > localIdentifierCache
A thread local cache of identifiers to reduce lock contention.
llvm::StringSet< llvm::BumpPtrAllocator & > identifiers
llvm::BumpPtrAllocator identifierAllocator
Include the generated interface declarations.
LLVM_ATTRIBUTE_ALWAYS_INLINE MPInt & operator-=(MPInt &a, int64_t b)
LLVM_ATTRIBUTE_ALWAYS_INLINE MPInt & operator+=(MPInt &a, int64_t b)
This header declares functions that assit transformations in the MemRef dialect.
void registerDefaultTimingManagerCLOptions()
Register a set of useful command-line options that can be used to configure a DefaultTimingManager.
void applyDefaultTimingManagerCLOptions(DefaultTimingManager &tm)
Apply any values that were registered with 'registerDefaultTimingManagerOptions' to a DefaultTimingMa...