Skip to content

[Explicit Module Builds] Add support for creating a reproducer when clang process crashes. #455

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions Sources/SWBCSupport/CLibclang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -859,6 +859,15 @@ extern "C" {
typedef struct CXOpaqueDependencyScannerServiceOptions
*CXDependencyScannerServiceOptions;

/**
* Generate a self-contained reproducer in a specified location to re-run the compilation.
*/
enum CXErrorCode
(*clang_experimental_DependencyScanner_generateReproducer)(
int argc, const char *const *argv, const char *WorkingDirectory,
const char *ReproducerLocation, bool UseUniqueReproducerName,
CXString *messageOut);

/**
* Creates a default set of service options.
* Must be disposed with \c
Expand Down Expand Up @@ -1423,6 +1432,7 @@ struct LibclangWrapper {
LOOKUP_OPTIONAL(clang_experimental_cas_replayCompilation);
LOOKUP_OPTIONAL(clang_experimental_cas_ReplayResult_dispose);
LOOKUP_OPTIONAL(clang_experimental_cas_ReplayResult_getStderr);
LOOKUP_OPTIONAL(clang_experimental_DependencyScanner_generateReproducer);
LOOKUP_OPTIONAL(clang_experimental_DependencyScannerServiceOptions_create);
LOOKUP_OPTIONAL(clang_experimental_DependencyScannerServiceOptions_dispose);
LOOKUP_OPTIONAL(clang_experimental_DependencyScannerServiceOptions_setDependencyMode);
Expand Down Expand Up @@ -1775,6 +1785,10 @@ extern "C" {
lib->wrapper->fns.clang_experimental_DependencyScannerServiceOptions_setCWDOptimization;
}

bool libclang_has_reproducer_feature(libclang_t lib) {
return lib->wrapper->fns.clang_experimental_DependencyScanner_generateReproducer;
}

libclang_casoptions_t libclang_casoptions_create(libclang_t lib) {
auto opts = lib->wrapper->fns.clang_experimental_cas_Options_create();
return new libclang_casoptions_t_{{lib->wrapper, opts}};
Expand Down Expand Up @@ -2159,6 +2173,22 @@ extern "C" {
return depGraph != nullptr;
}

bool libclang_scanner_generate_reproducer(libclang_scanner_t scanner,
int argc, char *const *argv,
const char *workingDirectory,
const char **message) {
auto lib = scanner->scanner->lib;
LibclangFunctions::CXString messageString;
auto result = lib->fns.clang_experimental_DependencyScanner_generateReproducer(
argc, const_cast<const char**>(argv), workingDirectory,
/*ReproducerLocation=*/NULL, /*UseUniqueReproducerName=*/true, &messageString);
if (message) {
*message = strdup_safe(lib->fns.clang_getCString(messageString));
}
lib->fns.clang_disposeString(messageString);
return result == LibclangFunctions::CXError_Success;
}

bool libclang_driver_get_actions(libclang_t wrapped_lib,
int argc,
char* const* argv,
Expand Down
15 changes: 15 additions & 0 deletions Sources/SWBCSupport/CLibclang.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,9 @@ CSUPPORT_EXPORT bool libclang_has_cas_up_to_date_checks_feature(libclang_t lib);
/// Whether the libclang has current working directory optimization support.
CSUPPORT_EXPORT bool libclang_has_current_working_directory_optimization(libclang_t lib);

/// Whether the libclang has reproducer generation support.
CSUPPORT_EXPORT bool libclang_has_reproducer_feature(libclang_t lib);

/// Create the CAS options object.
CSUPPORT_EXPORT libclang_casoptions_t libclang_casoptions_create(libclang_t lib);

Expand Down Expand Up @@ -203,6 +206,18 @@ CSUPPORT_EXPORT bool libclang_scanner_scan_dependencies(
void (^diagnostics_callback)(const libclang_diagnostic_set_t),
void (^error_callback)(const char *));

/// Generate a way to re-run the compilation without all the source files and the build system.
///
/// \param scanner - The scanner to use.
/// \param argc - The number of arguments.
/// \param argv - The Clang driver command line (including a program name in argv[0]).
/// \param workingDirectory - The working directory to use for evaluation.
/// \param message[out] - The human-readable message describing the result of the operation.
/// \returns True on success, false if something failed (see \p message for more details).
CSUPPORT_EXPORT bool libclang_scanner_generate_reproducer(
libclang_scanner_t scanner, int argc, char *const *argv, const char *workingDirectory,
const char **message);

/// Get the list of commands invoked by the given Clang driver command line.
///
/// \param argc - The number of arguments.
Expand Down
20 changes: 20 additions & 0 deletions Sources/SWBCore/LibclangVendored/Libclang.swift
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ public final class Libclang {
public var supportsCurrentWorkingDirectoryOptimization: Bool {
libclang_has_current_working_directory_optimization(lib)
}

public var supportsReproducerGeneration: Bool {
libclang_has_reproducer_feature(lib)
}
}

enum DependencyScanningError: Error {
Expand Down Expand Up @@ -269,6 +273,22 @@ public final class DependencyScanner {
}
return fileDeps
}

public func generateReproducer(
commandLine: [String],
workingDirectory: String
) throws -> String {
let args = CStringArray(commandLine)
var messageUnsafe: UnsafePointer<Int8>!
defer { messageUnsafe?.deallocate() }
// The count is `- 1` here, because CStringArray appends a trailing nullptr.
let success = libclang_scanner_generate_reproducer(scanner, CInt(args.cArray.count - 1), args.cArray, workingDirectory, &messageUnsafe);
let message = String(cString: messageUnsafe)
guard success else {
throw message.isEmpty ? Error.dependencyScanUnknownError : Error.dependencyScanErrorString(message)
}
return message
}
}

fileprivate struct ClangDiagnosticSet {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ package final class ClangModuleDependencyGraph {
/// for example, when using `-save-temps`.
package let commands: [CompileCommand]

package let scanningCommandLine: [String]

package let transitiveIncludeTreeIDs: OrderedSet<String>
package let transitiveCompileCommandCacheKeys: OrderedSet<String>

Expand All @@ -121,6 +123,7 @@ package final class ClangModuleDependencyGraph {
moduleDependencies: OrderedSet<Path>,
workingDirectory: Path,
commands: [CompileCommand],
scanningCommandLine: [String],
transitiveIncludeTreeIDs: OrderedSet<String>,
transitiveCompileCommandCacheKeys: OrderedSet<String>,
usesSerializedDiagnostics: Bool
Expand All @@ -131,6 +134,7 @@ package final class ClangModuleDependencyGraph {
self.modules = moduleDependencies
self.workingDirectory = workingDirectory
self.commands = commands
self.scanningCommandLine = scanningCommandLine
self.transitiveIncludeTreeIDs = transitiveIncludeTreeIDs
self.transitiveCompileCommandCacheKeys = transitiveCompileCommandCacheKeys
self.usesSerializedDiagnostics = usesSerializedDiagnostics
Expand All @@ -143,6 +147,7 @@ package final class ClangModuleDependencyGraph {
moduleDependencies: OrderedSet<Path>,
workingDirectory: Path,
command: CompileCommand,
scanningCommandLine: [String],
transitiveIncludeTreeIDs: OrderedSet<String>,
transitiveCompileCommandCacheKeys: OrderedSet<String>,
usesSerializedDiagnostics: Bool
Expand All @@ -153,33 +158,36 @@ package final class ClangModuleDependencyGraph {
self.modules = moduleDependencies
self.workingDirectory = workingDirectory
self.commands = [command]
self.scanningCommandLine = scanningCommandLine
self.transitiveIncludeTreeIDs = transitiveIncludeTreeIDs
self.transitiveCompileCommandCacheKeys = transitiveCompileCommandCacheKeys
self.usesSerializedDiagnostics = usesSerializedDiagnostics
}

package func serialize<T>(to serializer: T) where T : Serializer {
serializer.serializeAggregate(9) {
serializer.serializeAggregate(10) {
serializer.serialize(kind)
serializer.serialize(files)
serializer.serialize(includeTreeID)
serializer.serialize(modules)
serializer.serialize(workingDirectory)
serializer.serialize(commands)
serializer.serialize(scanningCommandLine)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should try to measure the impact of serializing an extra command line here, it should be shorter than the cc1 command but we do want to keep this data small...

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the impact in the consumed storage space? Just to know what to measure.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've tested the storage space changes on Sparkle and here are my findings. The build directory has grown by 2.3MB (2,441,200 bytes) which is 0.5% of the build directory size (growth from 450,234,890 bytes to 452,676,090). Most of the growth (1.8MB) is in ExplicitPrecompiledModules where each .scan file (out of 647) has grown by 2.5-3KB. For the record, the smallest .pcm in my testing is 20KB, average is 366KB. I wasn't using short names or shallow directory structure, so the measurements should be fairly representative.

Is it a sufficient measurement or would you like to see more data?

serializer.serialize(transitiveIncludeTreeIDs)
serializer.serialize(transitiveCompileCommandCacheKeys)
serializer.serialize(usesSerializedDiagnostics)
}
}

package init(from deserializer: any Deserializer) throws {
try deserializer.beginAggregate(9)
try deserializer.beginAggregate(10)
self.kind = try deserializer.deserialize()
self.files = try deserializer.deserialize()
self.includeTreeID = try deserializer.deserialize()
self.modules = try deserializer.deserialize()
self.workingDirectory = try deserializer.deserialize()
self.commands = try deserializer.deserialize()
self.scanningCommandLine = try deserializer.deserialize()
self.transitiveIncludeTreeIDs = try deserializer.deserialize()
self.transitiveCompileCommandCacheKeys = try deserializer.deserialize()
self.usesSerializedDiagnostics = try deserializer.deserialize()
Expand Down Expand Up @@ -334,12 +342,13 @@ package final class ClangModuleDependencyGraph {
var moduleTransitiveCacheKeys: [String: OrderedSet<String>] = [:]

let fileDeps: DependencyScanner.FileDependencies
let scanningCommandLine = [compiler] + originalFileArgs
let modulesCallbackErrors = LockedValue<[any Error]>([])
let dependencyPaths = LockedValue<Set<Path>>([])
let requiredTargetDependencies = LockedValue<Set<ScanResult.RequiredDependency>>([])
do {
fileDeps = try clangWithScanner.scanner.scanDependencies(
commandLine: [compiler] + originalFileArgs,
commandLine: scanningCommandLine,
workingDirectory: workingDirectory.str,
lookupOutput: { name, contextHash, kind in
let moduleOutputPath = outputPathForModule(name, contextHash)
Expand Down Expand Up @@ -432,6 +441,7 @@ package final class ClangModuleDependencyGraph {
// Cached builds do not rely on the process working directory, and different scanner working directories should not inhibit task deduplication. The same is true if the scanner reports the working directory can be ignored.
workingDirectory: module.cache_key != nil || module.is_cwd_ignored ? Path.root : workingDirectory,
command: DependencyInfo.CompileCommand(cacheKey: module.cache_key, arguments: commandLine),
scanningCommandLine: scanningCommandLine,
transitiveIncludeTreeIDs: transitiveIncludeTreeIDs,
transitiveCompileCommandCacheKeys: transitiveCommandCacheKeys,
usesSerializedDiagnostics: usesSerializedDiagnostics)
Expand Down Expand Up @@ -513,6 +523,7 @@ package final class ClangModuleDependencyGraph {
// Cached builds do not rely on the process working directory, and different scanner working directories should not inhibit task deduplication
workingDirectory: fileDeps.commands.allSatisfy { $0.cache_key != nil } ? Path.root : workingDirectory,
commands: commands,
scanningCommandLine: scanningCommandLine,
transitiveIncludeTreeIDs: transitiveIncludeTreeIDs,
transitiveCompileCommandCacheKeys: transitiveCommandCacheKeys,
usesSerializedDiagnostics: usesSerializedDiagnostics)
Expand Down Expand Up @@ -549,6 +560,21 @@ package final class ClangModuleDependencyGraph {
return clangWithScanner.casDBs
}

package func generateReproducer(forFailedDependency dependency: DependencyInfo,
libclangPath: Path, casOptions: CASOptions?) throws -> String? {
let clangWithScanner = try libclangWithScanner(
forPath: libclangPath,
casOptions: casOptions,
cacheFallbackIfNotAvailable: false,
core: core
)
guard clangWithScanner.libclang.supportsReproducerGeneration else {
return nil
}
return try clangWithScanner.scanner.generateReproducer(
commandLine: dependency.scanningCommandLine, workingDirectory: dependency.workingDirectory.str)
}

package var isEmpty: Bool {
recordedDependencyInfoRegistry.isEmpty
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,19 @@ public final class ClangCompileTaskAction: TaskAction, BuildValueValidatingTaskA
outputDelegate.emitOutput("Failed frontend command:\n")
outputDelegate.emitOutput(ByteString(encodingAsUTF8: commandString) + "\n")
}

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Related to the question about serialization overhead, we should consider whether this should be opt-in or opt-out. I'm not sure I have a strong opinion right now

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We also have plans to add reproducer-on-error. That one should be opt-in. But crashes seem to be rare enough that generating a reproducer can be don unconditionally. Honestly, the crashes seem to be rare enough that this entire feature in its current form isn't particularly useful. The value should come from iterating on the feature.

if case .some(.failed) = lastResult, case .some(.exit(.uncaughtSignal, _)) = outputDelegate.result {
do {
if let reproducerMessage = try clangModuleDependencyGraph.generateReproducer(
forFailedDependency: dependencyInfo,
libclangPath: explicitModulesPayload.libclangPath,
casOptions: explicitModulesPayload.casOptions) {
outputDelegate.emitOutput(ByteString(encodingAsUTF8: reproducerMessage) + "\n")
}
} catch {
outputDelegate.error(error.localizedDescription)
}
}
return lastResult ?? .failed
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,16 +215,30 @@ final public class PrecompileClangModuleTaskAction: TaskAction, BuildValueValida
enableStrictCASErrors: key.casOptions!.enableStrictCASErrors
)
}
} else if result == .failed && !executionDelegate.userPreferences.enableDebugActivityLogs && !executionDelegate.emitFrontendCommandLines {
let commandString = UNIXShellCommandCodec(
encodingStrategy: .backslashes,
encodingBehavior: .fullCommandLine
).encode(commandLine)

// <rdar://59354519> We need to find a way to use the generic infrastructure for displaying the command line in
// the build log.
outputDelegate.emitOutput("Failed frontend command:\n")
outputDelegate.emitOutput(ByteString(encodingAsUTF8: commandString) + "\n")
} else if result == .failed {
if !executionDelegate.userPreferences.enableDebugActivityLogs && !executionDelegate.emitFrontendCommandLines {
let commandString = UNIXShellCommandCodec(
encodingStrategy: .backslashes,
encodingBehavior: .fullCommandLine
).encode(commandLine)

// <rdar://59354519> We need to find a way to use the generic infrastructure for displaying the command line in
// the build log.
outputDelegate.emitOutput("Failed frontend command:\n")
outputDelegate.emitOutput(ByteString(encodingAsUTF8: commandString) + "\n")
}
if case .some(.exit(.uncaughtSignal, _)) = outputDelegate.result {
do {
if let reproducerMessage = try clangModuleDependencyGraph.generateReproducer(
forFailedDependency: dependencyInfo,
libclangPath: key.libclangPath,
casOptions: key.casOptions) {
outputDelegate.emitOutput(ByteString(encodingAsUTF8: reproducerMessage) + "\n")
}
} catch {
outputDelegate.error(error.localizedDescription)
}
}
}
return result
} catch {
Expand Down
Loading