MLIR 22.0.0git
PassCrashRecovery.cpp
Go to the documentation of this file.
1//===- PassCrashRecovery.cpp - Pass Crash Recovery Implementation ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "PassDetail.h"
10#include "mlir/IR/Diagnostics.h"
11#include "mlir/IR/SymbolTable.h"
12#include "mlir/IR/Verifier.h"
13#include "mlir/Parser/Parser.h"
14#include "mlir/Pass/Pass.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/SetVector.h"
18#include "llvm/Support/CrashRecoveryContext.h"
19#include "llvm/Support/ManagedStatic.h"
20#include "llvm/Support/Mutex.h"
21#include "llvm/Support/Signals.h"
22#include "llvm/Support/Threading.h"
23#include "llvm/Support/ToolOutputFile.h"
24
25using namespace mlir;
26using namespace mlir::detail;
27
28//===----------------------------------------------------------------------===//
29// RecoveryReproducerContext
30//===----------------------------------------------------------------------===//
31
32namespace mlir {
33namespace detail {
34/// This class contains all of the context for generating a recovery reproducer.
35/// Each recovery context is registered globally to allow for generating
36/// reproducers when a signal is raised, such as a segfault.
38 RecoveryReproducerContext(std::string passPipelineStr, Operation *op,
39 ReproducerStreamFactory &streamFactory,
40 bool verifyPasses);
42
43 /// Generate a reproducer with the current context.
44 void generate(std::string &description);
45
46 /// Disable this reproducer context. This prevents the context from generating
47 /// a reproducer in the result of a crash.
48 void disable();
49
50 /// Enable a previously disabled reproducer context.
51 void enable();
52
53private:
54 /// This function is invoked in the event of a crash.
55 static void crashHandler(void *);
56
57 /// Register a signal handler to run in the event of a crash.
58 static void registerSignalHandler();
59
60 /// The textual description of the currently executing pipeline.
61 std::string pipelineElements;
62
63 /// The MLIR operation representing the IR before the crash.
64 Operation *preCrashOperation;
65
66 /// The factory for the reproducer output stream to use when generating the
67 /// reproducer.
68 ReproducerStreamFactory &streamFactory;
69
70 /// Various pass manager and context flags.
71 bool disableThreads;
72 bool verifyPasses;
73
74 /// The current set of active reproducer contexts. This is used in the event
75 /// of a crash. This is not thread_local as the pass manager may produce any
76 /// number of child threads. This uses a set to allow for multiple MLIR pass
77 /// managers to be running at the same time.
78 static llvm::ManagedStatic<llvm::sys::SmartMutex<true>> reproducerMutex;
79 static llvm::ManagedStatic<
80 llvm::SmallSetVector<RecoveryReproducerContext *, 1>>
81 reproducerSet;
82};
83} // namespace detail
84} // namespace mlir
85
86llvm::ManagedStatic<llvm::sys::SmartMutex<true>>
87 RecoveryReproducerContext::reproducerMutex;
88llvm::ManagedStatic<llvm::SmallSetVector<RecoveryReproducerContext *, 1>>
89 RecoveryReproducerContext::reproducerSet;
90
92 std::string passPipelineStr, Operation *op,
93 ReproducerStreamFactory &streamFactory, bool verifyPasses)
94 : pipelineElements(std::move(passPipelineStr)),
95 preCrashOperation(op->clone()), streamFactory(streamFactory),
96 disableThreads(!op->getContext()->isMultithreadingEnabled()),
97 verifyPasses(verifyPasses) {
98 enable();
99}
100
102 // Erase the cloned preCrash IR that we cached.
103 preCrashOperation->erase();
104 disable();
105}
106
107static void appendReproducer(std::string &description, Operation *op,
108 const ReproducerStreamFactory &factory,
109 const std::string &pipeline, bool disableThreads,
110 bool verifyPasses) {
111 llvm::raw_string_ostream descOS(description);
112
113 // Try to create a new output stream for this crash reproducer.
114 std::string error;
115 std::unique_ptr<ReproducerStream> stream = factory(error);
116 if (!stream) {
117 descOS << "failed to create output stream: " << error;
118 return;
119 }
120 descOS << "reproducer generated at `" << stream->description() << "`";
121
122 AsmState state(op);
124 "mlir_reproducer", [&](Operation *op, AsmResourceBuilder &builder) {
125 builder.buildString("pipeline", pipeline);
126 builder.buildBool("disable_threading", disableThreads);
127 builder.buildBool("verify_each", verifyPasses);
128 });
129
130 // Output the .mlir module.
131 op->print(stream->os(), state);
132}
133
134void RecoveryReproducerContext::generate(std::string &description) {
135 std::string pipeline = (preCrashOperation->getName().getStringRef() + "(" +
136 pipelineElements + ")")
137 .str();
138 appendReproducer(description, preCrashOperation, streamFactory, pipeline,
139 disableThreads, verifyPasses);
140}
141
143 llvm::sys::SmartScopedLock<true> lock(*reproducerMutex);
144 reproducerSet->remove(this);
145 if (reproducerSet->empty())
146 llvm::CrashRecoveryContext::Disable();
147}
148
150 llvm::sys::SmartScopedLock<true> lock(*reproducerMutex);
151 if (reproducerSet->empty())
152 llvm::CrashRecoveryContext::Enable();
153 registerSignalHandler();
154 reproducerSet->insert(this);
155}
156
157void RecoveryReproducerContext::crashHandler(void *) {
158 // Walk the current stack of contexts and generate a reproducer for each one.
159 // We can't know for certain which one was the cause, so we need to generate
160 // a reproducer for all of them.
161 for (RecoveryReproducerContext *context : *reproducerSet) {
162 std::string description;
163 context->generate(description);
164
165 // Emit an error using information only available within the context.
166 emitError(context->preCrashOperation->getLoc())
167 << "A signal was caught while processing the MLIR module:"
168 << description << "; marking pass as failed";
169 }
170}
171
172void RecoveryReproducerContext::registerSignalHandler() {
173 // Ensure that the handler is only registered once.
174 static bool registered =
175 (llvm::sys::AddSignalHandler(crashHandler, nullptr), false);
176 (void)registered;
177}
178
179//===----------------------------------------------------------------------===//
180// PassCrashReproducerGenerator
181//===----------------------------------------------------------------------===//
182
186
187 /// The factory to use when generating a crash reproducer.
189
190 /// Flag indicating if reproducer generation should be localized to the
191 /// failing pass.
192 bool localReproducer = false;
193
194 /// A record of all of the currently active reproducer contexts.
196
197 /// The set of all currently running passes. Note: This is not populated when
198 /// `localReproducer` is true, as each pass will get its own recovery context.
200
201 /// Various pass manager flags that get emitted when generating a reproducer.
202 bool pmFlagVerifyPasses = false;
203};
204
206 ReproducerStreamFactory &streamFactory, bool localReproducer)
207 : impl(std::make_unique<Impl>(streamFactory, localReproducer)) {}
209
212 bool pmFlagVerifyPasses) {
213 assert((!impl->localReproducer ||
215 "expected multi-threading to be disabled when generating a local "
216 "reproducer");
217
218 llvm::CrashRecoveryContext::Enable();
219 impl->pmFlagVerifyPasses = pmFlagVerifyPasses;
220
221 // If we aren't generating a local reproducer, prepare a reproducer for the
222 // given top-level operation.
223 if (!impl->localReproducer)
224 prepareReproducerFor(passes, op);
225}
226
227static void
229 std::pair<Pass *, Operation *> passOpPair) {
230 os << "`" << passOpPair.first->getName() << "` on "
231 << "'" << passOpPair.second->getName() << "' operation";
232 if (SymbolOpInterface symbol = dyn_cast<SymbolOpInterface>(passOpPair.second))
233 os << ": @" << symbol.getName();
234}
235
237 LogicalResult executionResult) {
238 // Don't generate a reproducer if we have no active contexts.
239 if (impl->activeContexts.empty())
240 return;
241
242 // If the pass manager execution succeeded, we don't generate any reproducers.
243 if (succeeded(executionResult))
244 return impl->activeContexts.clear();
245
247 << "Failures have been detected while "
248 "processing an MLIR pass pipeline";
249
250 // If we are generating a global reproducer, we include all of the running
251 // passes in the error message for the only active context.
252 if (!impl->localReproducer) {
253 assert(impl->activeContexts.size() == 1 && "expected one active context");
254
255 // Generate the reproducer.
256 std::string description;
257 impl->activeContexts.front()->generate(description);
258
259 // Emit an error to the user.
260 Diagnostic &note = diag.attachNote() << "Pipeline failed while executing [";
261 llvm::interleaveComma(impl->runningPasses, note,
262 [&](const std::pair<Pass *, Operation *> &value) {
263 formatPassOpReproducerMessage(note, value);
264 });
265 note << "]: " << description;
266 impl->runningPasses.clear();
267 impl->activeContexts.clear();
268 return;
269 }
270
271 // If we were generating a local reproducer, we generate a reproducer for the
272 // most recently executing pass using the matching entry from `runningPasses`
273 // to generate a localized diagnostic message.
274 assert(impl->activeContexts.size() == impl->runningPasses.size() &&
275 "expected running passes to match active contexts");
276
277 // Generate the reproducer.
278 RecoveryReproducerContext &reproducerContext = *impl->activeContexts.back();
279 std::string description;
280 reproducerContext.generate(description);
281
282 // Emit an error to the user.
283 Diagnostic &note = diag.attachNote() << "Pipeline failed while executing ";
284 formatPassOpReproducerMessage(note, impl->runningPasses.back());
285 note << ": " << description;
286
287 impl->activeContexts.clear();
288 impl->runningPasses.clear();
289}
290
292 Operation *op) {
293 // If not tracking local reproducers, we simply remember that this pass is
294 // running.
295 impl->runningPasses.insert(std::make_pair(pass, op));
296 if (!impl->localReproducer)
297 return;
298
299 // Disable the current pass recovery context, if there is one. This may happen
300 // in the case of dynamic pass pipelines.
301 if (!impl->activeContexts.empty())
302 impl->activeContexts.back()->disable();
303
304 // Collect all of the parent scopes of this operation.
306 while (Operation *parentOp = op->getParentOp()) {
307 scopes.push_back(op->getName());
308 op = parentOp;
309 }
310
311 // Emit a pass pipeline string for the current pass running on the current
312 // operation type.
313 std::string passStr;
314 llvm::raw_string_ostream passOS(passStr);
315 for (OperationName scope : llvm::reverse(scopes))
316 passOS << scope << "(";
317 pass->printAsTextualPipeline(passOS);
318 for (unsigned i = 0, e = scopes.size(); i < e; ++i)
319 passOS << ")";
320
321 impl->activeContexts.push_back(std::make_unique<RecoveryReproducerContext>(
322 passStr, op, impl->streamFactory, impl->pmFlagVerifyPasses));
323}
326 std::string passStr;
327 llvm::raw_string_ostream passOS(passStr);
328 llvm::interleaveComma(
329 passes, passOS, [&](Pass &pass) { pass.printAsTextualPipeline(passOS); });
330
331 impl->activeContexts.push_back(std::make_unique<RecoveryReproducerContext>(
332 passStr, op, impl->streamFactory, impl->pmFlagVerifyPasses));
333}
334
336 Operation *op) {
337 // We only pop the active context if we are tracking local reproducers.
338 impl->runningPasses.remove(std::make_pair(pass, op));
339 if (impl->localReproducer) {
340 impl->activeContexts.pop_back();
341
342 // Re-enable the previous pass recovery context, if there was one. This may
343 // happen in the case of dynamic pass pipelines.
344 if (!impl->activeContexts.empty())
345 impl->activeContexts.back()->enable();
346 }
347}
348
349//===----------------------------------------------------------------------===//
350// CrashReproducerInstrumentation
351//===----------------------------------------------------------------------===//
352
353namespace {
354struct CrashReproducerInstrumentation : public PassInstrumentation {
355 CrashReproducerInstrumentation(PassCrashReproducerGenerator &generator)
356 : generator(generator) {}
357 ~CrashReproducerInstrumentation() override = default;
358
359 void runBeforePass(Pass *pass, Operation *op) override {
360 if (!isa<OpToOpPassAdaptor>(pass))
361 generator.prepareReproducerFor(pass, op);
362 }
363
364 void runAfterPass(Pass *pass, Operation *op) override {
365 if (!isa<OpToOpPassAdaptor>(pass))
366 generator.removeLastReproducerFor(pass, op);
367 }
368
369 void runAfterPassFailed(Pass *pass, Operation *op) override {
370 // Only generate one reproducer per crash reproducer instrumentation.
371 if (alreadyFailed)
372 return;
373
374 alreadyFailed = true;
375 generator.finalize(op, /*executionResult=*/failure());
376 }
377
378private:
379 /// The generator used to create crash reproducers.
380 PassCrashReproducerGenerator &generator;
381 bool alreadyFailed = false;
382};
383} // namespace
384
385//===----------------------------------------------------------------------===//
386// FileReproducerStream
387//===----------------------------------------------------------------------===//
388
389namespace {
390/// This class represents a default instance of mlir::ReproducerStream
391/// that is backed by a file.
392struct FileReproducerStream : public mlir::ReproducerStream {
393 FileReproducerStream(std::unique_ptr<llvm::ToolOutputFile> outputFile)
394 : outputFile(std::move(outputFile)) {}
395 ~FileReproducerStream() override { outputFile->keep(); }
396
397 /// Returns a description of the reproducer stream.
398 StringRef description() override { return outputFile->getFilename(); }
399
400 /// Returns the stream on which to output the reproducer.
401 raw_ostream &os() override { return outputFile->os(); }
402
403private:
404 /// ToolOutputFile corresponding to opened `filename`.
405 std::unique_ptr<llvm::ToolOutputFile> outputFile = nullptr;
406};
407} // namespace
408
409//===----------------------------------------------------------------------===//
410// PassManager
411//===----------------------------------------------------------------------===//
412
413LogicalResult PassManager::runWithCrashRecovery(Operation *op,
414 AnalysisManager am) {
415 const bool threadingEnabled = getContext()->isMultithreadingEnabled();
416 crashReproGenerator->initialize(getPasses(), op, verifyPasses);
417
418 // Safely invoke the passes within a recovery context.
419 LogicalResult passManagerResult = failure();
420 llvm::CrashRecoveryContext recoveryContext;
421 const auto runPassesFn = [&] { passManagerResult = runPasses(op, am); };
422 if (threadingEnabled)
423 recoveryContext.RunSafelyOnThread(runPassesFn);
424 else
425 recoveryContext.RunSafely(runPassesFn);
426 crashReproGenerator->finalize(op, passManagerResult);
427
428 return passManagerResult;
429}
430
432makeReproducerStreamFactory(StringRef outputFile) {
433 // Capture the filename by value in case outputFile is out of scope when
434 // invoked.
435 std::string filename = outputFile.str();
436 return [filename](std::string &error) -> std::unique_ptr<ReproducerStream> {
437 std::unique_ptr<llvm::ToolOutputFile> outputFile =
438 mlir::openOutputFile(filename, &error);
439 if (!outputFile) {
440 error = "Failed to create reproducer stream: " + error;
441 return nullptr;
442 }
443 return std::make_unique<FileReproducerStream>(std::move(outputFile));
444 };
445}
446
448 raw_ostream &os, StringRef anchorName,
449 const llvm::iterator_range<OpPassManager::pass_iterator> &passes,
450 bool pretty = false);
451
453 StringRef anchorName,
455 Operation *op, StringRef outputFile, bool disableThreads,
456 bool verifyPasses) {
457
458 std::string description;
459 std::string pipelineStr;
460 llvm::raw_string_ostream passOS(pipelineStr);
461 ::printAsTextualPipeline(passOS, anchorName, passes);
462 appendReproducer(description, op, makeReproducerStreamFactory(outputFile),
463 pipelineStr, disableThreads, verifyPasses);
464 return description;
465}
466
468 bool genLocalReproducer) {
470 genLocalReproducer);
471}
472
474 ReproducerStreamFactory factory, bool genLocalReproducer) {
475 assert(!crashReproGenerator &&
476 "crash reproducer has already been initialized");
477 if (genLocalReproducer && getContext()->isMultithreadingEnabled())
478 llvm::report_fatal_error(
479 "Local crash reproduction can't be setup on a "
480 "pass-manager without disabling multi-threading first.");
481
482 crashReproGenerator = std::make_unique<PassCrashReproducerGenerator>(
483 factory, genLocalReproducer);
485 std::make_unique<CrashReproducerInstrumentation>(*crashReproGenerator));
486}
487
488//===----------------------------------------------------------------------===//
489// Asm Resource
490//===----------------------------------------------------------------------===//
491
493 auto parseFn = [this](AsmParsedResourceEntry &entry) -> LogicalResult {
494 if (entry.getKey() == "pipeline") {
495 FailureOr<std::string> value = entry.parseAsString();
496 if (succeeded(value))
497 this->pipeline = std::move(*value);
498 return value;
499 }
500 if (entry.getKey() == "disable_threading") {
501 FailureOr<bool> value = entry.parseAsBool();
502 if (succeeded(value))
503 this->disableThreading = *value;
504 return value;
505 }
506 if (entry.getKey() == "verify_each") {
507 FailureOr<bool> value = entry.parseAsBool();
508 if (succeeded(value))
509 this->verifyEach = *value;
510 return value;
511 }
512 return entry.emitError() << "unknown 'mlir_reproducer' resource key '"
513 << entry.getKey() << "'";
514 };
515 config.attachResourceParser("mlir_reproducer", parseFn);
516}
517
519 if (pipeline.has_value()) {
520 FailureOr<OpPassManager> reproPm = parsePassPipeline(*pipeline);
521 if (failed(reproPm))
522 return failure();
523 static_cast<OpPassManager &>(pm) = std::move(*reproPm);
524 }
525
526 if (disableThreading.has_value())
527 pm.getContext()->disableMultithreading(*disableThreading);
528
529 if (verifyEach.has_value())
530 pm.enableVerifier(*verifyEach);
531
532 return success();
533}
return success()
b getContext())
static const mlir::GenInfo * generator
static std::string diag(const llvm::Value &value)
static void appendReproducer(std::string &description, Operation *op, const ReproducerStreamFactory &factory, const std::string &pipeline, bool disableThreads, bool verifyPasses)
static void formatPassOpReproducerMessage(Diagnostic &os, std::pair< Pass *, Operation * > passOpPair)
static ReproducerStreamFactory makeReproducerStreamFactory(StringRef outputFile)
void printAsTextualPipeline(raw_indented_ostream &os, StringRef anchorName, const llvm::iterator_range< OpPassManager::pass_iterator > &passes, bool pretty=false)
Prints out the passes of the pass manager as the textual representation of pipelines.
Definition Pass.cpp:422
This class represents a single parsed resource entry.
Definition AsmState.h:291
This class is used to build resource entries for use by the printer.
Definition AsmState.h:247
virtual void buildString(StringRef key, StringRef data)=0
Build a resource entry represented by the given human-readable string value.
virtual void buildBool(StringRef key, bool data)=0
Build a resource entry represented by the given bool.
This class provides management for the lifetime of the state used when printing the IR.
Definition AsmState.h:542
void attachResourcePrinter(std::unique_ptr< AsmResourcePrinter > printer)
Attach the given resource printer to the AsmState.
This class contains all of the information necessary to report a diagnostic to the DiagnosticEngine.
This class represents a diagnostic that is inflight and set to be reported.
void disableMultithreading(bool disable=true)
Set the flag specifying if multi-threading is disabled by the context.
bool isMultithreadingEnabled()
Return true if multi-threading is enabled by the context.
This class represents a pass manager that runs passes on either a specific operation type,...
Definition PassManager.h:46
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
Location getLoc()
The source location the operation was defined or derived from.
Definition Operation.h:223
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition Operation.h:234
OperationName getName()
The name of an operation is the key identifier for it.
Definition Operation.h:119
void print(raw_ostream &os, const OpPrintingFlags &flags={})
MLIRContext * getContext()
Return the context this operation is associated with.
Definition Operation.h:216
This class represents a configuration for the MLIR assembly parser.
Definition AsmState.h:469
PassInstrumentation provides several entry points into the pass manager infrastructure.
The main pass manager and pipeline builder.
MLIRContext * getContext() const
Return an instance of the context.
void addInstrumentation(std::unique_ptr< PassInstrumentation > pi)
Add the provided instrumentation to the pass manager.
Definition Pass.cpp:1114
void enableCrashReproducerGeneration(StringRef outputFile, bool genLocalReproducer=false)
Enable support for the pass manager to generate a reproducer on the event of a crash or a pass failur...
void enableVerifier(bool enabled=true)
Runs the verifier after each individual pass.
Definition Pass.cpp:1032
The abstract base pass class.
Definition Pass.h:52
void printAsTextualPipeline(raw_ostream &os, bool pretty=false)
Prints out the pass in the textual representation of pipelines.
Definition Pass.cpp:85
void initialize(iterator_range< PassManager::pass_iterator > passes, Operation *op, bool pmFlagVerifyPasses)
Initialize the generator in preparation for reproducer generation.
void removeLastReproducerFor(Pass *pass, Operation *op)
Remove the last recorded reproducer anchored at the given pass and operation.
void finalize(Operation *rootOp, LogicalResult executionResult)
Finalize the current run of the generator, generating any necessary reproducers if the provided execu...
void prepareReproducerFor(Pass *pass, Operation *op)
Prepare a new reproducer for the given pass, operating on op.
PassCrashReproducerGenerator(ReproducerStreamFactory &streamFactory, bool localReproducer)
AttrTypeReplacer.
Include the generated interface declarations.
std::unique_ptr< llvm::ToolOutputFile > openOutputFile(llvm::StringRef outputFilename, std::string *errorMessage=nullptr)
Open the file specified by its name for writing.
const FrozenRewritePatternSet GreedyRewriteConfig config
std::string makeReproducer(StringRef anchorName, const llvm::iterator_range< OpPassManager::pass_iterator > &passes, Operation *op, StringRef outputFile, bool disableThreads=false, bool verifyPasses=false)
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
llvm::SetVector< T, Vector, Set, N > SetVector
Definition LLVM.h:131
Operation * clone(OpBuilder &b, Operation *op, TypeRange newResultTypes, ValueRange newOperands)
std::function< std::unique_ptr< ReproducerStream >(std::string &error)> ReproducerStreamFactory
Method type for constructing ReproducerStream.
LogicalResult parsePassPipeline(StringRef pipeline, OpPassManager &pm, raw_ostream &errorStream=llvm::errs())
Parse the textual representation of a pass pipeline, adding the result to 'pm' on success.
bool pmFlagVerifyPasses
Various pass manager flags that get emitted when generating a reproducer.
ReproducerStreamFactory streamFactory
The factory to use when generating a crash reproducer.
SetVector< std::pair< Pass *, Operation * > > runningPasses
The set of all currently running passes.
bool localReproducer
Flag indicating if reproducer generation should be localized to the failing pass.
Impl(ReproducerStreamFactory &streamFactory, bool localReproducer)
SmallVector< std::unique_ptr< RecoveryReproducerContext > > activeContexts
A record of all of the currently active reproducer contexts.
void attachResourceParser(ParserConfig &config)
Attach an assembly resource parser to 'config' that collects the MLIR reproducer configuration into t...
LogicalResult apply(PassManager &pm) const
Apply the reproducer options to 'pm' and its context.
This class contains all of the context for generating a recovery reproducer.
void disable()
Disable this reproducer context.
RecoveryReproducerContext(std::string passPipelineStr, Operation *op, ReproducerStreamFactory &streamFactory, bool verifyPasses)
void generate(std::string &description)
Generate a reproducer with the current context.
void enable()
Enable a previously disabled reproducer context.