diff --git a/src/Cargo.lock b/src/Cargo.lock index 18d97972cd3e5..31742023d46f0 100644 --- a/src/Cargo.lock +++ b/src/Cargo.lock @@ -1517,11 +1517,11 @@ dependencies = [ name = "rustc_trans" version = "0.0.0" dependencies = [ - "crossbeam 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)", "flate2 0.2.19 (registry+https://github.com/rust-lang/crates.io-index)", "gcc 0.3.51 (registry+https://github.com/rust-lang/crates.io-index)", "jobserver 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", + "num_cpus 1.6.2 (registry+https://github.com/rust-lang/crates.io-index)", "owning_ref 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "rustc 0.0.0", "rustc-demangle 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/src/librustc/middle/cstore.rs b/src/librustc/middle/cstore.rs index 48bddf2f71759..b1f4aa69adb9f 100644 --- a/src/librustc/middle/cstore.rs +++ b/src/librustc/middle/cstore.rs @@ -50,7 +50,7 @@ pub use self::NativeLibraryKind::*; // lonely orphan structs and enums looking for a better home -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Copy)] pub struct LinkMeta { pub crate_hash: Svh, } @@ -161,15 +161,13 @@ pub struct ExternCrate { } pub struct EncodedMetadata { - pub raw_data: Vec, - pub hashes: EncodedMetadataHashes, + pub raw_data: Vec } impl EncodedMetadata { pub fn new() -> EncodedMetadata { EncodedMetadata { raw_data: Vec::new(), - hashes: EncodedMetadataHashes::new(), } } } @@ -294,7 +292,7 @@ pub trait CrateStore { tcx: TyCtxt<'a, 'tcx, 'tcx>, link_meta: &LinkMeta, reachable: &NodeSet) - -> EncodedMetadata; + -> (EncodedMetadata, EncodedMetadataHashes); fn metadata_encoding_version(&self) -> &[u8]; } @@ -424,7 +422,7 @@ impl CrateStore for DummyCrateStore { tcx: TyCtxt<'a, 'tcx, 'tcx>, link_meta: &LinkMeta, reachable: &NodeSet) - -> EncodedMetadata { + -> (EncodedMetadata, EncodedMetadataHashes) { bug!("encode_metadata") } fn metadata_encoding_version(&self) -> &[u8] { bug!("metadata_encoding_version") } diff --git a/src/librustc/session/config.rs b/src/librustc/session/config.rs index 8b55eb4c099ae..4a9fbbe6f157d 100644 --- a/src/librustc/session/config.rs +++ b/src/librustc/session/config.rs @@ -1059,6 +1059,8 @@ options! {DebuggingOptions, DebuggingSetter, basic_debugging_options, "choose which RELRO level to use"), nll: bool = (false, parse_bool, [UNTRACKED], "run the non-lexical lifetimes MIR pass"), + trans_time_graph: bool = (false, parse_bool, [UNTRACKED], + "generate a graphical HTML report of time spent in trans and LLVM"), } pub fn default_lib_output() -> CrateType { @@ -1498,6 +1500,23 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches) early_error(error_format, "Value for codegen units must be a positive nonzero integer"); } + // It's possible that we have `codegen_units > 1` but only one item in + // `trans.modules`. We could theoretically proceed and do LTO in that + // case, but it would be confusing to have the validity of + // `-Z lto -C codegen-units=2` depend on details of the crate being + // compiled, so we complain regardless. + if cg.lto && cg.codegen_units > 1 { + // This case is impossible to handle because LTO expects to be able + // to combine the entire crate and all its dependencies into a + // single compilation unit, but each codegen unit is in a separate + // LLVM context, so they can't easily be combined. + early_error(error_format, "can't perform LTO when using multiple codegen units"); + } + + if cg.lto && debugging_opts.incremental.is_some() { + early_error(error_format, "can't perform LTO when compiling incrementally"); + } + let mut prints = Vec::::new(); if cg.target_cpu.as_ref().map_or(false, |s| s == "help") { prints.push(PrintRequest::TargetCPUs); diff --git a/src/librustc/util/common.rs b/src/librustc/util/common.rs index 17564671a1e36..244b7f3596889 100644 --- a/src/librustc/util/common.rs +++ b/src/librustc/util/common.rs @@ -57,6 +57,32 @@ pub fn time(do_it: bool, what: &str, f: F) -> T where let rv = f(); let dur = start.elapsed(); + print_time_passes_entry_internal(what, dur); + + TIME_DEPTH.with(|slot| slot.set(old)); + + rv +} + +pub fn print_time_passes_entry(do_it: bool, what: &str, dur: Duration) { + if !do_it { + return + } + + let old = TIME_DEPTH.with(|slot| { + let r = slot.get(); + slot.set(r + 1); + r + }); + + print_time_passes_entry_internal(what, dur); + + TIME_DEPTH.with(|slot| slot.set(old)); +} + +fn print_time_passes_entry_internal(what: &str, dur: Duration) { + let indentation = TIME_DEPTH.with(|slot| slot.get()); + let mem_string = match get_resident() { Some(n) => { let mb = n as f64 / 1_000_000.0; @@ -65,14 +91,10 @@ pub fn time(do_it: bool, what: &str, f: F) -> T where None => "".to_owned(), }; println!("{}time: {}{}\t{}", - repeat(" ").take(old).collect::(), + repeat(" ").take(indentation).collect::(), duration_to_secs_str(dur), mem_string, what); - - TIME_DEPTH.with(|slot| slot.set(old)); - - rv } // Hack up our own formatting for the duration to make it easier for scripts diff --git a/src/librustc_driver/driver.rs b/src/librustc_driver/driver.rs index c592882a1e43b..ee9d30b58fef4 100644 --- a/src/librustc_driver/driver.rs +++ b/src/librustc_driver/driver.rs @@ -15,8 +15,7 @@ use rustc_data_structures::stable_hasher::StableHasher; use rustc_mir as mir; use rustc::session::{Session, CompileResult}; use rustc::session::CompileIncomplete; -use rustc::session::config::{self, Input, OutputFilenames, OutputType, - OutputTypes}; +use rustc::session::config::{self, Input, OutputFilenames, OutputType}; use rustc::session::search_paths::PathKind; use rustc::lint; use rustc::middle::{self, dependency_format, stability, reachable}; @@ -26,7 +25,6 @@ use rustc::ty::{self, TyCtxt, Resolutions, GlobalArenas}; use rustc::traits; use rustc::util::common::{ErrorReported, time}; use rustc::util::nodemap::NodeSet; -use rustc::util::fs::rename_or_copy_remove; use rustc_allocator as allocator; use rustc_borrowck as borrowck; use rustc_incremental::{self, IncrementalHashesMap}; @@ -208,7 +206,7 @@ pub fn compile_input(sess: &Session, println!("Pre-trans"); tcx.print_debug_stats(); } - let trans = phase_4_translate_to_llvm(tcx, analysis, &incremental_hashes_map, + let trans = phase_4_translate_to_llvm(tcx, analysis, incremental_hashes_map, &outputs); if log_enabled!(::log::LogLevel::Info) { @@ -231,7 +229,7 @@ pub fn compile_input(sess: &Session, sess.code_stats.borrow().print_type_sizes(); } - let phase5_result = phase_5_run_llvm_passes(sess, &trans, &outputs); + let (phase5_result, trans) = phase_5_run_llvm_passes(sess, trans); controller_entry_point!(after_llvm, sess, @@ -239,8 +237,6 @@ pub fn compile_input(sess: &Session, phase5_result); phase5_result?; - write::cleanup_llvm(&trans); - phase_6_link_output(sess, &trans, &outputs); // Now that we won't touch anything in the incremental compilation directory @@ -1055,9 +1051,9 @@ pub fn phase_3_run_analysis_passes<'tcx, F, R>(sess: &'tcx Session, /// be discarded. pub fn phase_4_translate_to_llvm<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, analysis: ty::CrateAnalysis, - incremental_hashes_map: &IncrementalHashesMap, + incremental_hashes_map: IncrementalHashesMap, output_filenames: &OutputFilenames) - -> trans::CrateTranslation { + -> write::OngoingCrateTranslation { let time_passes = tcx.sess.time_passes(); time(time_passes, @@ -1067,63 +1063,27 @@ pub fn phase_4_translate_to_llvm<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, let translation = time(time_passes, "translation", - move || trans::trans_crate(tcx, analysis, &incremental_hashes_map, output_filenames)); - - time(time_passes, - "assert dep graph", - || rustc_incremental::assert_dep_graph(tcx)); + move || trans::trans_crate(tcx, analysis, incremental_hashes_map, output_filenames)); - time(time_passes, - "serialize dep graph", - || rustc_incremental::save_dep_graph(tcx, - &incremental_hashes_map, - &translation.metadata.hashes, - translation.link.crate_hash)); translation } /// Run LLVM itself, producing a bitcode file, assembly file or object file /// as a side effect. pub fn phase_5_run_llvm_passes(sess: &Session, - trans: &trans::CrateTranslation, - outputs: &OutputFilenames) -> CompileResult { - if sess.opts.cg.no_integrated_as || - (sess.target.target.options.no_integrated_as && - (outputs.outputs.contains_key(&OutputType::Object) || - outputs.outputs.contains_key(&OutputType::Exe))) - { - let output_types = OutputTypes::new(&[(OutputType::Assembly, None)]); - time(sess.time_passes(), - "LLVM passes", - || write::run_passes(sess, trans, &output_types, outputs)); - - write::run_assembler(sess, outputs); - - // HACK the linker expects the object file to be named foo.0.o but - // `run_assembler` produces an object named just foo.o. Rename it if we - // are going to build an executable - if sess.opts.output_types.contains_key(&OutputType::Exe) { - let f = outputs.path(OutputType::Object); - rename_or_copy_remove(&f, - f.with_file_name(format!("{}.0.o", - f.file_stem().unwrap().to_string_lossy()))).unwrap(); - } + trans: write::OngoingCrateTranslation) + -> (CompileResult, trans::CrateTranslation) { + let trans = trans.join(sess); - // Remove assembly source, unless --save-temps was specified - if !sess.opts.cg.save_temps { - fs::remove_file(&outputs.temp_path(OutputType::Assembly, None)).unwrap(); - } - } else { - time(sess.time_passes(), - "LLVM passes", - || write::run_passes(sess, trans, &sess.opts.output_types, outputs)); + if sess.opts.debugging_opts.incremental_info { + write::dump_incremental_data(&trans); } time(sess.time_passes(), "serialize work products", move || rustc_incremental::save_work_products(sess)); - sess.compile_status() + (sess.compile_status(), trans) } /// Run the linker on any artifacts that resulted from the LLVM run. diff --git a/src/librustc_incremental/persist/save.rs b/src/librustc_incremental/persist/save.rs index 1bdd4f851fb13..339e2bdc15734 100644 --- a/src/librustc_incremental/persist/save.rs +++ b/src/librustc_incremental/persist/save.rs @@ -34,7 +34,7 @@ use super::file_format; use super::work_product; pub fn save_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, - incremental_hashes_map: &IncrementalHashesMap, + incremental_hashes_map: IncrementalHashesMap, metadata_hashes: &EncodedMetadataHashes, svh: Svh) { debug!("save_dep_graph()"); @@ -51,7 +51,7 @@ pub fn save_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, eprintln!("incremental: {} edges in dep-graph", query.graph.len_edges()); } - let mut hcx = HashContext::new(tcx, incremental_hashes_map); + let mut hcx = HashContext::new(tcx, &incremental_hashes_map); let preds = Predecessors::new(&query, &mut hcx); let mut current_metadata_hashes = FxHashMap(); diff --git a/src/librustc_metadata/cstore_impl.rs b/src/librustc_metadata/cstore_impl.rs index 25079613e586d..e8b0dea1e8ac0 100644 --- a/src/librustc_metadata/cstore_impl.rs +++ b/src/librustc_metadata/cstore_impl.rs @@ -15,7 +15,8 @@ use schema; use rustc::ty::maps::QueryConfig; use rustc::middle::cstore::{CrateStore, CrateSource, LibSource, DepKind, NativeLibrary, MetadataLoader, LinkMeta, - LinkagePreference, LoadedMacro, EncodedMetadata}; + LinkagePreference, LoadedMacro, EncodedMetadata, + EncodedMetadataHashes}; use rustc::hir::def; use rustc::middle::lang_items; use rustc::session::Session; @@ -443,7 +444,7 @@ impl CrateStore for cstore::CStore { tcx: TyCtxt<'a, 'tcx, 'tcx>, link_meta: &LinkMeta, reachable: &NodeSet) - -> EncodedMetadata + -> (EncodedMetadata, EncodedMetadataHashes) { encoder::encode_metadata(tcx, link_meta, reachable) } diff --git a/src/librustc_metadata/encoder.rs b/src/librustc_metadata/encoder.rs index 5d73abc3ee8b8..c35d8407c9d3c 100644 --- a/src/librustc_metadata/encoder.rs +++ b/src/librustc_metadata/encoder.rs @@ -1638,7 +1638,7 @@ impl<'a, 'tcx, 'v> ItemLikeVisitor<'v> for ImplVisitor<'a, 'tcx> { pub fn encode_metadata<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, link_meta: &LinkMeta, exported_symbols: &NodeSet) - -> EncodedMetadata + -> (EncodedMetadata, EncodedMetadataHashes) { let mut cursor = Cursor::new(vec![]); cursor.write_all(METADATA_HEADER).unwrap(); @@ -1681,10 +1681,7 @@ pub fn encode_metadata<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, result[header + 2] = (pos >> 8) as u8; result[header + 3] = (pos >> 0) as u8; - EncodedMetadata { - raw_data: result, - hashes: metadata_hashes, - } + (EncodedMetadata { raw_data: result }, metadata_hashes) } pub fn get_repr_options<'a, 'tcx, 'gcx>(tcx: &TyCtxt<'a, 'tcx, 'gcx>, did: DefId) -> ReprOptions { diff --git a/src/librustc_trans/Cargo.toml b/src/librustc_trans/Cargo.toml index c7db2a9a8ae7d..ed9321cc3f3a1 100644 --- a/src/librustc_trans/Cargo.toml +++ b/src/librustc_trans/Cargo.toml @@ -10,7 +10,7 @@ crate-type = ["dylib"] test = false [dependencies] -crossbeam = "0.2" +num_cpus = "1.0" flate2 = "0.2" jobserver = "0.1.5" log = "0.3" diff --git a/src/librustc_trans/assert_module_sources.rs b/src/librustc_trans/assert_module_sources.rs index b5ef4aac34c89..6e661a5a8c6a4 100644 --- a/src/librustc_trans/assert_module_sources.rs +++ b/src/librustc_trans/assert_module_sources.rs @@ -37,11 +37,22 @@ use rustc::ich::{ATTR_PARTITION_REUSED, ATTR_PARTITION_TRANSLATED}; const MODULE: &'static str = "module"; const CFG: &'static str = "cfg"; -#[derive(Debug, PartialEq)] -enum Disposition { Reused, Translated } +#[derive(Debug, PartialEq, Clone, Copy)] +pub enum Disposition { Reused, Translated } + +impl ModuleTranslation { + pub fn disposition(&self) -> (String, Disposition) { + let disposition = match self.source { + ModuleSource::Preexisting(_) => Disposition::Reused, + ModuleSource::Translated(_) => Disposition::Translated, + }; + + (self.name.clone(), disposition) + } +} pub(crate) fn assert_module_sources<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, - modules: &[ModuleTranslation]) { + modules: &[(String, Disposition)]) { let _ignore = tcx.dep_graph.in_ignore(); if tcx.sess.opts.incremental.is_none() { @@ -56,7 +67,7 @@ pub(crate) fn assert_module_sources<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, struct AssertModuleSource<'a, 'tcx: 'a> { tcx: TyCtxt<'a, 'tcx, 'tcx>, - modules: &'a [ModuleTranslation], + modules: &'a [(String, Disposition)], } impl<'a, 'tcx> AssertModuleSource<'a, 'tcx> { @@ -75,15 +86,15 @@ impl<'a, 'tcx> AssertModuleSource<'a, 'tcx> { } let mname = self.field(attr, MODULE); - let mtrans = self.modules.iter().find(|mtrans| *mtrans.name == *mname.as_str()); + let mtrans = self.modules.iter().find(|&&(ref name, _)| name == mname.as_str()); let mtrans = match mtrans { Some(m) => m, None => { debug!("module name `{}` not found amongst:", mname); - for mtrans in self.modules { + for &(ref name, ref disposition) in self.modules { debug!("module named `{}` with disposition {:?}", - mtrans.name, - self.disposition(mtrans)); + name, + disposition); } self.tcx.sess.span_err( @@ -93,7 +104,7 @@ impl<'a, 'tcx> AssertModuleSource<'a, 'tcx> { } }; - let mtrans_disposition = self.disposition(mtrans); + let mtrans_disposition = mtrans.1; if disposition != mtrans_disposition { self.tcx.sess.span_err( attr.span, @@ -104,13 +115,6 @@ impl<'a, 'tcx> AssertModuleSource<'a, 'tcx> { } } - fn disposition(&self, mtrans: &ModuleTranslation) -> Disposition { - match mtrans.source { - ModuleSource::Preexisting(_) => Disposition::Reused, - ModuleSource::Translated(_) => Disposition::Translated, - } - } - fn field(&self, attr: &ast::Attribute, name: &str) -> ast::Name { for item in attr.meta_item_list().unwrap_or_else(Vec::new) { if item.check_name(name) { diff --git a/src/librustc_trans/back/lto.rs b/src/librustc_trans/back/lto.rs index feed127b0b60b..e160d6b6c6ab3 100644 --- a/src/librustc_trans/back/lto.rs +++ b/src/librustc_trans/back/lto.rs @@ -12,7 +12,7 @@ use back::link; use back::write; use back::symbol_export; use rustc::session::config; -use errors::FatalError; +use errors::{FatalError, Handler}; use llvm; use llvm::archive_ro::ArchiveRO; use llvm::{ModuleRef, TargetMachineRef, True, False}; @@ -41,24 +41,24 @@ pub fn crate_type_allows_lto(crate_type: config::CrateType) -> bool { } pub fn run(cgcx: &CodegenContext, + diag_handler: &Handler, llmod: ModuleRef, tm: TargetMachineRef, config: &ModuleConfig, temp_no_opt_bc_filename: &Path) -> Result<(), FatalError> { - let handler = cgcx.handler; if cgcx.opts.cg.prefer_dynamic { - handler.struct_err("cannot prefer dynamic linking when performing LTO") - .note("only 'staticlib', 'bin', and 'cdylib' outputs are \ - supported with LTO") - .emit(); + diag_handler.struct_err("cannot prefer dynamic linking when performing LTO") + .note("only 'staticlib', 'bin', and 'cdylib' outputs are \ + supported with LTO") + .emit(); return Err(FatalError) } // Make sure we actually can run LTO for crate_type in cgcx.crate_types.iter() { if !crate_type_allows_lto(*crate_type) { - let e = handler.fatal("lto can only be run for executables, cdylibs and \ - static library outputs"); + let e = diag_handler.fatal("lto can only be run for executables, cdylibs and \ + static library outputs"); return Err(e) } } @@ -116,13 +116,13 @@ pub fn run(cgcx: &CodegenContext, if res.is_err() { let msg = format!("failed to decompress bc of `{}`", name); - Err(handler.fatal(&msg)) + Err(diag_handler.fatal(&msg)) } else { Ok(inflated) } } else { - Err(handler.fatal(&format!("Unsupported bytecode format version {}", - version))) + Err(diag_handler.fatal(&format!("Unsupported bytecode format version {}", + version))) } })? } else { @@ -136,7 +136,7 @@ pub fn run(cgcx: &CodegenContext, if res.is_err() { let msg = format!("failed to decompress bc of `{}`", name); - Err(handler.fatal(&msg)) + Err(diag_handler.fatal(&msg)) } else { Ok(inflated) } @@ -152,7 +152,7 @@ pub fn run(cgcx: &CodegenContext, Ok(()) } else { let msg = format!("failed to load bc of `{}`", name); - Err(write::llvm_err(handler, msg)) + Err(write::llvm_err(&diag_handler, msg)) } })?; } diff --git a/src/librustc_trans/back/write.rs b/src/librustc_trans/back/write.rs index 26553c85023b7..0d5fe6c0ae95f 100644 --- a/src/librustc_trans/back/write.rs +++ b/src/librustc_trans/back/write.rs @@ -10,36 +10,42 @@ use back::lto; use back::link::{self, get_linker, remove}; +use back::linker::LinkerInfo; use back::symbol_export::ExportedSymbols; use rustc_incremental::{save_trans_partition, in_incr_comp_dir}; +use rustc::middle::cstore::{LinkMeta, EncodedMetadata}; use rustc::session::config::{self, OutputFilenames, OutputType, OutputTypes, Passes, SomePasses, AllPasses, Sanitizer}; use rustc::session::Session; +use time_graph::{self, TimeGraph}; use llvm; use llvm::{ModuleRef, TargetMachineRef, PassManagerRef, DiagnosticInfoRef}; use llvm::SMDiagnosticRef; -use {CrateTranslation, ModuleLlvm, ModuleSource, ModuleTranslation}; +use {CrateTranslation, ModuleSource, ModuleTranslation, CompiledModule, ModuleKind}; use rustc::hir::def_id::CrateNum; -use rustc::util::common::{time, time_depth, set_time_depth, path2cstr}; -use rustc::util::fs::link_or_copy; +use rustc::util::common::{time, time_depth, set_time_depth, path2cstr, print_time_passes_entry}; +use rustc::util::fs::{link_or_copy, rename_or_copy_remove}; use errors::{self, Handler, Level, DiagnosticBuilder, FatalError}; -use errors::emitter::Emitter; +use errors::emitter::{Emitter}; use syntax::ext::hygiene::Mark; use syntax_pos::MultiSpan; +use syntax_pos::symbol::Symbol; use context::{is_pie_binary, get_reloc_model}; use jobserver::{Client, Acquired}; -use crossbeam::{scope, Scope}; use rustc_demangle; -use std::cmp; use std::ffi::CString; +use std::fmt; use std::fs; use std::io; use std::io::Write; use std::path::{Path, PathBuf}; use std::str; -use std::sync::mpsc::{channel, Sender}; +use std::sync::Arc; +use std::sync::mpsc::{channel, Sender, Receiver}; use std::slice; +use std::time::Instant; +use std::thread; use libc::{c_uint, c_void, c_char, size_t}; pub const RELOC_MODEL_ARGS : [(&'static str, llvm::RelocMode); 7] = [ @@ -190,7 +196,6 @@ pub fn create_target_machine(sess: &Session) -> TargetMachineRef { /// Module-specific configuration for `optimize_and_codegen`. -#[derive(Clone)] pub struct ModuleConfig { /// LLVM TargetMachine to use for codegen. tm: TargetMachineRef, @@ -229,9 +234,9 @@ pub struct ModuleConfig { unsafe impl Send for ModuleConfig { } impl ModuleConfig { - fn new(tm: TargetMachineRef, passes: Vec) -> ModuleConfig { + fn new(sess: &Session, passes: Vec) -> ModuleConfig { ModuleConfig { - tm: tm, + tm: create_target_machine(sess), passes: passes, opt_level: None, opt_size: None, @@ -255,10 +260,10 @@ impl ModuleConfig { } } - fn set_flags(&mut self, sess: &Session, trans: &CrateTranslation) { + fn set_flags(&mut self, sess: &Session, no_builtins: bool) { self.no_verify = sess.no_verify(); self.no_prepopulate_passes = sess.opts.cg.no_prepopulate_passes; - self.no_builtins = trans.no_builtins; + self.no_builtins = no_builtins; self.time_passes = sess.time_passes(); self.inline_threshold = sess.opts.cg.inline_threshold; self.obj_is_bitcode = sess.target.target.options.obj_is_bitcode; @@ -279,20 +284,55 @@ impl ModuleConfig { self.merge_functions = sess.opts.optimize == config::OptLevel::Default || sess.opts.optimize == config::OptLevel::Aggressive; } + + fn clone(&self, sess: &Session) -> ModuleConfig { + ModuleConfig { + tm: create_target_machine(sess), + passes: self.passes.clone(), + opt_level: self.opt_level, + opt_size: self.opt_size, + + emit_no_opt_bc: self.emit_no_opt_bc, + emit_bc: self.emit_bc, + emit_lto_bc: self.emit_lto_bc, + emit_ir: self.emit_ir, + emit_asm: self.emit_asm, + emit_obj: self.emit_obj, + obj_is_bitcode: self.obj_is_bitcode, + + no_verify: self.no_verify, + no_prepopulate_passes: self.no_prepopulate_passes, + no_builtins: self.no_builtins, + time_passes: self.time_passes, + vectorize_loop: self.vectorize_loop, + vectorize_slp: self.vectorize_slp, + merge_functions: self.merge_functions, + inline_threshold: self.inline_threshold, + } + } +} + +impl Drop for ModuleConfig { + fn drop(&mut self) { + unsafe { + llvm::LLVMRustDisposeTargetMachine(self.tm); + } + } } /// Additional resources used by optimize_and_codegen (not module specific) -pub struct CodegenContext<'a> { +#[derive(Clone)] +pub struct CodegenContext { // Resouces needed when running LTO pub time_passes: bool, pub lto: bool, pub no_landing_pads: bool, - pub exported_symbols: &'a ExportedSymbols, - pub opts: &'a config::Options, + pub exported_symbols: Arc, + pub opts: Arc, pub crate_types: Vec, pub each_linked_rlib_for_lto: Vec<(CrateNum, PathBuf)>, // Handler to use for diagnostics produced during codegen. - pub handler: &'a Handler, + pub diag_emitter: SharedEmitter, // LLVM passes added by plugins. pub plugin_passes: Vec, // LLVM optimizations for which we want to print remarks. @@ -303,17 +343,27 @@ pub struct CodegenContext<'a> { // compiling incrementally pub incr_comp_session_dir: Option, // Channel back to the main control thread to send messages to - pub tx: Sender, + coordinator_send: Sender, + // A reference to the TimeGraph so we can register timings. None means that + // measuring is disabled. + time_graph: Option, +} + +impl CodegenContext { + fn create_diag_handler(&self) -> Handler { + Handler::with_emitter(true, false, Box::new(self.diag_emitter.clone())) + } } struct HandlerFreeVars<'a> { - cgcx: &'a CodegenContext<'a>, + cgcx: &'a CodegenContext, + diag_handler: &'a Handler, } -unsafe extern "C" fn report_inline_asm<'a, 'b>(cgcx: &'a CodegenContext<'a>, +unsafe extern "C" fn report_inline_asm<'a, 'b>(cgcx: &'a CodegenContext, msg: &'b str, cookie: c_uint) { - drop(cgcx.tx.send(Message::InlineAsmError(cookie as u32, msg.to_string()))); + cgcx.diag_emitter.inline_asm_error(cookie as u32, msg.to_string()); } unsafe extern "C" fn inline_asm_handler(diag: SMDiagnosticRef, @@ -328,7 +378,7 @@ unsafe extern "C" fn inline_asm_handler(diag: SMDiagnosticRef, } unsafe extern "C" fn diagnostic_handler(info: DiagnosticInfoRef, user: *mut c_void) { - let HandlerFreeVars { cgcx, .. } = *(user as *const HandlerFreeVars); + let HandlerFreeVars { cgcx, diag_handler, .. } = *(user as *const HandlerFreeVars); match llvm::diagnostic::Diagnostic::unpack(info) { llvm::diagnostic::InlineAsm(inline) => { @@ -344,7 +394,7 @@ unsafe extern "C" fn diagnostic_handler(info: DiagnosticInfoRef, user: *mut c_vo }; if enabled { - cgcx.handler.note_without_error(&format!("optimization {} for {} at {}:{}:{}: {}", + diag_handler.note_without_error(&format!("optimization {} for {} at {}:{}:{}: {}", opt.kind.describe(), opt.pass_name, opt.filename, @@ -360,25 +410,32 @@ unsafe extern "C" fn diagnostic_handler(info: DiagnosticInfoRef, user: *mut c_vo // Unsafe due to LLVM calls. unsafe fn optimize_and_codegen(cgcx: &CodegenContext, + diag_handler: &Handler, mtrans: ModuleTranslation, - mllvm: ModuleLlvm, config: ModuleConfig, output_names: OutputFilenames) - -> Result<(), FatalError> + -> Result { - let llmod = mllvm.llmod; - let llcx = mllvm.llcx; + let (llmod, llcx) = match mtrans.source { + ModuleSource::Translated(ref llvm) => (llvm.llmod, llvm.llcx), + ModuleSource::Preexisting(_) => { + bug!("optimize_and_codegen: called with ModuleSource::Preexisting") + } + }; + let tm = config.tm; let fv = HandlerFreeVars { cgcx: cgcx, + diag_handler: diag_handler, }; let fv = &fv as *const HandlerFreeVars as *mut c_void; llvm::LLVMRustSetInlineAsmDiagnosticHandler(llcx, inline_asm_handler, fv); llvm::LLVMContextSetDiagnosticHandler(llcx, diagnostic_handler, fv); - let module_name = Some(&mtrans.name[..]); + let module_name = mtrans.name.clone(); + let module_name = Some(&module_name[..]); if config.emit_no_opt_bc { let out = output_names.temp_path_ext("no-opt.bc", module_name); @@ -406,7 +463,7 @@ unsafe fn optimize_and_codegen(cgcx: &CodegenContext, llvm::PassKind::Function => fpm, llvm::PassKind::Module => mpm, llvm::PassKind::Other => { - cgcx.handler.err("Encountered LLVM pass kind we can't handle"); + diag_handler.err("Encountered LLVM pass kind we can't handle"); return true }, }; @@ -426,25 +483,25 @@ unsafe fn optimize_and_codegen(cgcx: &CodegenContext, for pass in &config.passes { if !addpass(pass) { - cgcx.handler.warn(&format!("unknown pass `{}`, ignoring", + diag_handler.warn(&format!("unknown pass `{}`, ignoring", pass)); } } for pass in &cgcx.plugin_passes { if !addpass(pass) { - cgcx.handler.err(&format!("a plugin asked for LLVM pass \ + diag_handler.err(&format!("a plugin asked for LLVM pass \ `{}` but LLVM does not \ recognize it", pass)); } } - cgcx.handler.abort_if_errors(); + diag_handler.abort_if_errors(); // Finally, run the actual optimization passes - time(config.time_passes, &format!("llvm function passes [{}]", cgcx.worker), || + time(config.time_passes, &format!("llvm function passes [{}]", module_name.unwrap()), || llvm::LLVMRustRunFunctionPassManager(fpm, llmod)); - time(config.time_passes, &format!("llvm module passes [{}]", cgcx.worker), || + time(config.time_passes, &format!("llvm module passes [{}]", module_name.unwrap()), || llvm::LLVMRunPassManager(mpm, llmod)); // Deallocate managers that we're now done with @@ -456,6 +513,7 @@ unsafe fn optimize_and_codegen(cgcx: &CodegenContext, let temp_no_opt_bc_filename = output_names.temp_path_ext("no-opt.lto.bc", module_name); lto::run(cgcx, + diag_handler, llmod, tm, &config, @@ -506,7 +564,7 @@ unsafe fn optimize_and_codegen(cgcx: &CodegenContext, llvm::LLVMWriteBitcodeToFile(llmod, bc_out_c.as_ptr()); } - time(config.time_passes, &format!("codegen passes [{}]", cgcx.worker), + time(config.time_passes, &format!("codegen passes [{}]", module_name.unwrap()), || -> Result<(), FatalError> { if config.emit_ir { let out = output_names.temp_path(OutputType::LlvmAssembly, module_name); @@ -561,7 +619,7 @@ unsafe fn optimize_and_codegen(cgcx: &CodegenContext, llmod }; with_codegen(tm, llmod, config.no_builtins, |cpm| { - write_output_file(cgcx.handler, tm, cpm, llmod, &path, + write_output_file(diag_handler, tm, cpm, llmod, &path, llvm::FileType::AssemblyFile) })?; if config.emit_obj { @@ -571,7 +629,7 @@ unsafe fn optimize_and_codegen(cgcx: &CodegenContext, if write_obj { with_codegen(tm, llmod, config.no_builtins, |cpm| { - write_output_file(cgcx.handler, tm, cpm, llmod, &obj_out, + write_output_file(diag_handler, tm, cpm, llmod, &obj_out, llvm::FileType::ObjectFile) })?; } @@ -582,67 +640,53 @@ unsafe fn optimize_and_codegen(cgcx: &CodegenContext, if copy_bc_to_obj { debug!("copying bitcode {:?} to obj {:?}", bc_out, obj_out); if let Err(e) = link_or_copy(&bc_out, &obj_out) { - cgcx.handler.err(&format!("failed to copy bitcode to object file: {}", e)); + diag_handler.err(&format!("failed to copy bitcode to object file: {}", e)); } } if rm_bc { debug!("removing_bitcode {:?}", bc_out); if let Err(e) = fs::remove_file(&bc_out) { - cgcx.handler.err(&format!("failed to remove bitcode: {}", e)); + diag_handler.err(&format!("failed to remove bitcode: {}", e)); } } - llvm::LLVMRustDisposeTargetMachine(tm); - Ok(()) + Ok(mtrans.into_compiled_module(config.emit_obj, config.emit_bc)) } - -pub fn cleanup_llvm(trans: &CrateTranslation) { - for module in trans.modules.iter() { - unsafe { - match module.source { - ModuleSource::Translated(llvm) => { - llvm::LLVMDisposeModule(llvm.llmod); - llvm::LLVMContextDispose(llvm.llcx); - } - ModuleSource::Preexisting(_) => { - } - } - } - } +pub struct CompiledModules { + pub modules: Vec, + pub metadata_module: CompiledModule, + pub allocator_module: Option, } -pub fn run_passes(sess: &Session, - trans: &CrateTranslation, - output_types: &OutputTypes, - crate_output: &OutputFilenames) { - // It's possible that we have `codegen_units > 1` but only one item in - // `trans.modules`. We could theoretically proceed and do LTO in that - // case, but it would be confusing to have the validity of - // `-Z lto -C codegen-units=2` depend on details of the crate being - // compiled, so we complain regardless. - if sess.lto() && sess.opts.cg.codegen_units > 1 { - // This case is impossible to handle because LTO expects to be able - // to combine the entire crate and all its dependencies into a - // single compilation unit, but each codegen unit is in a separate - // LLVM context, so they can't easily be combined. - sess.fatal("can't perform LTO when using multiple codegen units"); - } - - // Sanity check - assert!(trans.modules.len() == sess.opts.cg.codegen_units || - sess.opts.debugging_opts.incremental.is_some() || - !sess.opts.output_types.should_trans() || - sess.opts.debugging_opts.no_trans); +fn need_crate_bitcode_for_rlib(sess: &Session) -> bool { + sess.crate_types.borrow().contains(&config::CrateTypeRlib) && + sess.opts.output_types.contains_key(&OutputType::Exe) +} - let tm = create_target_machine(sess); +pub fn start_async_translation(sess: &Session, + crate_output: &OutputFilenames, + time_graph: Option, + crate_name: Symbol, + link: LinkMeta, + metadata: EncodedMetadata, + exported_symbols: Arc, + no_builtins: bool, + windows_subsystem: Option, + linker_info: LinkerInfo, + no_integrated_as: bool) + -> OngoingCrateTranslation { + let output_types_override = if no_integrated_as { + OutputTypes::new(&[(OutputType::Assembly, None)]) + } else { + sess.opts.output_types.clone() + }; // Figure out what we actually need to build. - - let mut modules_config = ModuleConfig::new(tm, sess.opts.cg.passes.clone()); - let mut metadata_config = ModuleConfig::new(tm, vec![]); - let mut allocator_config = ModuleConfig::new(tm, vec![]); + let mut modules_config = ModuleConfig::new(sess, sess.opts.cg.passes.clone()); + let mut metadata_config = ModuleConfig::new(sess, vec![]); + let mut allocator_config = ModuleConfig::new(sess, vec![]); if let Some(ref sanitizer) = sess.opts.debugging_opts.sanitizer { match *sanitizer { @@ -679,16 +723,11 @@ pub fn run_passes(sess: &Session, // Emit bitcode files for the crate if we're emitting an rlib. // Whenever an rlib is created, the bitcode is inserted into the // archive in order to allow LTO against it. - let needs_crate_bitcode = - sess.crate_types.borrow().contains(&config::CrateTypeRlib) && - sess.opts.output_types.contains_key(&OutputType::Exe); - let needs_crate_object = - sess.opts.output_types.contains_key(&OutputType::Exe); - if needs_crate_bitcode { + if need_crate_bitcode_for_rlib(sess) { modules_config.emit_bc = true; } - for output_type in output_types.keys() { + for output_type in output_types_override.keys() { match *output_type { OutputType::Bitcode => { modules_config.emit_bc = true; } OutputType::LlvmAssembly => { modules_config.emit_ir = true; } @@ -714,76 +753,86 @@ pub fn run_passes(sess: &Session, } } - modules_config.set_flags(sess, trans); - metadata_config.set_flags(sess, trans); - allocator_config.set_flags(sess, trans); - + modules_config.set_flags(sess, no_builtins); + metadata_config.set_flags(sess, no_builtins); + allocator_config.set_flags(sess, no_builtins); - // Populate a buffer with a list of codegen threads. Items are processed in - // LIFO order, just because it's a tiny bit simpler that way. (The order - // doesn't actually matter.) - let mut work_items = Vec::with_capacity(1 + trans.modules.len()); - - { - let work = build_work_item(sess, - trans.metadata_module.clone(), - metadata_config.clone(), - crate_output.clone()); - work_items.push(work); - } - - if let Some(allocator) = trans.allocator_module.clone() { - let work = build_work_item(sess, - allocator, - allocator_config.clone(), - crate_output.clone()); - work_items.push(work); - } - - for mtrans in trans.modules.iter() { - let work = build_work_item(sess, - mtrans.clone(), - modules_config.clone(), - crate_output.clone()); - work_items.push(work); - } - - if sess.opts.debugging_opts.incremental_info { - dump_incremental_data(&trans); - } + // Exclude metadata and allocator modules from time_passes output, since + // they throw off the "LLVM passes" measurement. + metadata_config.time_passes = false; + allocator_config.time_passes = false; let client = sess.jobserver_from_env.clone().unwrap_or_else(|| { // Pick a "reasonable maximum" if we don't otherwise have a jobserver in // our environment, capping out at 32 so we don't take everything down // by hogging the process run queue. - let num_workers = cmp::min(work_items.len() - 1, 32); - Client::new(num_workers).expect("failed to create jobserver") - }); - scope(|scope| { - execute_work(sess, work_items, client, &trans.exported_symbols, scope); + Client::new(32).expect("failed to create jobserver") }); - // If in incr. comp. mode, preserve the `.o` files for potential re-use - for mtrans in trans.modules.iter() { + let (shared_emitter, shared_emitter_main) = SharedEmitter::new(); + let (trans_worker_send, trans_worker_receive) = channel(); + let (coordinator_send, coordinator_receive) = channel(); + + let coordinator_thread = start_executing_work(sess, + shared_emitter, + trans_worker_send, + coordinator_send.clone(), + coordinator_receive, + client, + time_graph.clone(), + exported_symbols.clone()); + OngoingCrateTranslation { + crate_name, + link, + metadata, + exported_symbols, + no_builtins, + windows_subsystem, + linker_info, + no_integrated_as, + + regular_module_config: modules_config, + metadata_module_config: metadata_config, + allocator_module_config: allocator_config, + + time_graph, + output_filenames: crate_output.clone(), + coordinator_send, + trans_worker_receive, + shared_emitter_main, + future: coordinator_thread + } +} + +fn copy_module_artifacts_into_incr_comp_cache(sess: &Session, + compiled_modules: &CompiledModules, + crate_output: &OutputFilenames) { + if sess.opts.incremental.is_none() { + return; + } + + for module in compiled_modules.modules.iter() { let mut files = vec![]; - if modules_config.emit_obj { - let path = crate_output.temp_path(OutputType::Object, Some(&mtrans.name)); + if module.emit_obj { + let path = crate_output.temp_path(OutputType::Object, Some(&module.name)); files.push((OutputType::Object, path)); } - if modules_config.emit_bc { - let path = crate_output.temp_path(OutputType::Bitcode, Some(&mtrans.name)); + if module.emit_bc { + let path = crate_output.temp_path(OutputType::Bitcode, Some(&module.name)); files.push((OutputType::Bitcode, path)); } - save_trans_partition(sess, &mtrans.name, mtrans.symbol_name_hash, &files); + save_trans_partition(sess, &module.name, module.symbol_name_hash, &files); } +} - // All codegen is finished. - unsafe { - llvm::LLVMRustDisposeTargetMachine(tm); - } +fn produce_final_output_artifacts(sess: &Session, + compiled_modules: &CompiledModules, + crate_output: &OutputFilenames) { + let mut user_wants_bitcode = false; + let mut user_wants_objects = false; // Produce final compile outputs. let copy_gracefully = |from: &Path, to: &Path| { @@ -794,10 +843,10 @@ pub fn run_passes(sess: &Session, let copy_if_one_unit = |output_type: OutputType, keep_numbered: bool| { - if trans.modules.len() == 1 { + if compiled_modules.modules.len() == 1 { // 1) Only one codegen unit. In this case it's no difficulty // to copy `foo.0.x` to `foo.x`. - let module_name = Some(&trans.modules[0].name[..]); + let module_name = Some(&compiled_modules.modules[0].name[..]); let path = crate_output.temp_path(output_type, module_name); copy_gracefully(&path, &crate_output.path(output_type)); @@ -834,9 +883,7 @@ pub fn run_passes(sess: &Session, // Flag to indicate whether the user explicitly requested bitcode. // Otherwise, we produced it only as a temporary output, and will need // to get rid of it. - let mut user_wants_bitcode = false; - let mut user_wants_objects = false; - for output_type in output_types.keys() { + for output_type in crate_output.outputs.keys() { match *output_type { OutputType::Bitcode => { user_wants_bitcode = true; @@ -861,7 +908,6 @@ pub fn run_passes(sess: &Session, OutputType::DepInfo => {} } } - let user_wants_bitcode = user_wants_bitcode; // Clean up unwanted temporary files. @@ -893,33 +939,39 @@ pub fn run_passes(sess: &Session, // If you change how this works, also update back::link::link_rlib, // where .#module-name#.bc files are (maybe) deleted after making an // rlib. + let needs_crate_bitcode = need_crate_bitcode_for_rlib(sess); + let needs_crate_object = crate_output.outputs.contains_key(&OutputType::Exe); + let keep_numbered_bitcode = needs_crate_bitcode || (user_wants_bitcode && sess.opts.cg.codegen_units > 1); let keep_numbered_objects = needs_crate_object || (user_wants_objects && sess.opts.cg.codegen_units > 1); - for module_name in trans.modules.iter().map(|m| Some(&m.name[..])) { - if modules_config.emit_obj && !keep_numbered_objects { + for module in compiled_modules.modules.iter() { + let module_name = Some(&module.name[..]); + + if module.emit_obj && !keep_numbered_objects { let path = crate_output.temp_path(OutputType::Object, module_name); remove(sess, &path); } - if modules_config.emit_bc && !keep_numbered_bitcode { + if module.emit_bc && !keep_numbered_bitcode { let path = crate_output.temp_path(OutputType::Bitcode, module_name); remove(sess, &path); } } - if metadata_config.emit_bc && !user_wants_bitcode { + if compiled_modules.metadata_module.emit_bc && !user_wants_bitcode { let path = crate_output.temp_path(OutputType::Bitcode, - Some(&trans.metadata_module.name)); + Some(&compiled_modules.metadata_module.name)); remove(sess, &path); } - if allocator_config.emit_bc && !user_wants_bitcode { - if let Some(ref module) = trans.allocator_module { + + if let Some(ref allocator_module) = compiled_modules.allocator_module { + if allocator_module.emit_bc && !user_wants_bitcode { let path = crate_output.temp_path(OutputType::Bitcode, - Some(&module.name)); + Some(&allocator_module.name)); remove(sess, &path); } } @@ -930,20 +982,13 @@ pub fn run_passes(sess: &Session, // - #crate#.crate.metadata.o // - #crate#.bc // These are used in linking steps and will be cleaned up afterward. - - // FIXME: time_llvm_passes support - does this use a global context or - // something? - if sess.opts.cg.codegen_units == 1 && sess.time_llvm_passes() { - unsafe { llvm::LLVMRustPrintPassTimings(); } - } } -fn dump_incremental_data(trans: &CrateTranslation) { +pub fn dump_incremental_data(trans: &CrateTranslation) { let mut reuse = 0; for mtrans in trans.modules.iter() { - match mtrans.source { - ModuleSource::Preexisting(..) => reuse += 1, - ModuleSource::Translated(..) => (), + if mtrans.pre_existing { + reuse += 1; } } eprintln!("incremental: re-using {} out of {} modules", reuse, trans.modules.len()); @@ -955,14 +1000,17 @@ struct WorkItem { output_names: OutputFilenames } -fn build_work_item(sess: &Session, - mtrans: ModuleTranslation, +impl fmt::Debug for WorkItem { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "WorkItem({})", self.mtrans.name) + } +} + +fn build_work_item(mtrans: ModuleTranslation, config: ModuleConfig, output_names: OutputFilenames) -> WorkItem { - let mut config = config; - config.tm = create_target_machine(sess); WorkItem { mtrans: mtrans, config: config, @@ -971,70 +1019,98 @@ fn build_work_item(sess: &Session, } fn execute_work_item(cgcx: &CodegenContext, work_item: WorkItem) - -> Result<(), FatalError> + -> Result { - unsafe { - match work_item.mtrans.source { - ModuleSource::Translated(mllvm) => { - debug!("llvm-optimizing {:?}", work_item.mtrans.name); - optimize_and_codegen(cgcx, - work_item.mtrans, - mllvm, - work_item.config, - work_item.output_names)?; - } - ModuleSource::Preexisting(wp) => { - let incr_comp_session_dir = cgcx.incr_comp_session_dir - .as_ref() - .unwrap(); - let name = &work_item.mtrans.name; - for (kind, saved_file) in wp.saved_files { - let obj_out = work_item.output_names.temp_path(kind, Some(name)); - let source_file = in_incr_comp_dir(&incr_comp_session_dir, - &saved_file); - debug!("copying pre-existing module `{}` from {:?} to {}", - work_item.mtrans.name, - source_file, - obj_out.display()); - match link_or_copy(&source_file, &obj_out) { - Ok(_) => { } - Err(err) => { - cgcx.handler.err(&format!("unable to copy {} to {}: {}", - source_file.display(), - obj_out.display(), - err)); - } - } + let diag_handler = cgcx.create_diag_handler(); + let module_name = work_item.mtrans.name.clone(); + + let pre_existing = match work_item.mtrans.source { + ModuleSource::Translated(_) => None, + ModuleSource::Preexisting(ref wp) => Some(wp.clone()), + }; + + if let Some(wp) = pre_existing { + let incr_comp_session_dir = cgcx.incr_comp_session_dir + .as_ref() + .unwrap(); + let name = &work_item.mtrans.name; + for (kind, saved_file) in wp.saved_files { + let obj_out = work_item.output_names.temp_path(kind, Some(name)); + let source_file = in_incr_comp_dir(&incr_comp_session_dir, + &saved_file); + debug!("copying pre-existing module `{}` from {:?} to {}", + work_item.mtrans.name, + source_file, + obj_out.display()); + match link_or_copy(&source_file, &obj_out) { + Ok(_) => { } + Err(err) => { + diag_handler.err(&format!("unable to copy {} to {}: {}", + source_file.display(), + obj_out.display(), + err)); } } } - } - Ok(()) + Ok(CompiledModule { + name: module_name, + kind: ModuleKind::Regular, + pre_existing: true, + symbol_name_hash: work_item.mtrans.symbol_name_hash, + emit_bc: work_item.config.emit_bc, + emit_obj: work_item.config.emit_obj, + }) + } else { + debug!("llvm-optimizing {:?}", module_name); + + unsafe { + optimize_and_codegen(cgcx, + &diag_handler, + work_item.mtrans, + work_item.config, + work_item.output_names) + } + } } -pub enum Message { +#[derive(Debug)] +enum Message { Token(io::Result), - Diagnostic(Diagnostic), - Done { success: bool }, - InlineAsmError(u32, String), - AbortIfErrors, + Done { + result: Result, + worker_id: usize, + }, + TranslationDone { + llvm_work_item: WorkItem, + cost: u64, + is_last: bool, + }, + TranslateItem, } -pub struct Diagnostic { +struct Diagnostic { msg: String, code: Option, lvl: Level, } -fn execute_work<'a>(sess: &'a Session, - mut work_items: Vec, - jobserver: Client, - exported_symbols: &'a ExportedSymbols, - scope: &Scope<'a>) { - let (tx, rx) = channel(); - let tx2 = tx.clone(); +#[derive(PartialEq, Clone, Copy, Debug)] +enum MainThreadWorkerState { + Idle, + Translating, + LLVMing, +} +fn start_executing_work(sess: &Session, + shared_emitter: SharedEmitter, + trans_worker_send: Sender, + coordinator_send: Sender, + coordinator_receive: Receiver, + jobserver: Client, + time_graph: Option, + exported_symbols: Arc) + -> thread::JoinHandle { // First up, convert our jobserver into a helper thread so we can use normal // mpsc channels to manage our messages and such. Once we've got the helper // thread then request `n-1` tokens because all of our work items are ready @@ -1045,27 +1121,144 @@ fn execute_work<'a>(sess: &'a Session, // // After we've requested all these tokens then we'll, when we can, get // tokens on `rx` above which will get managed in the main loop below. + let coordinator_send2 = coordinator_send.clone(); let helper = jobserver.into_helper_thread(move |token| { - drop(tx2.send(Message::Token(token))); + drop(coordinator_send2.send(Message::Token(token))); }).expect("failed to spawn helper thread"); - for _ in 0..work_items.len() - 1 { - helper.request_token(); - } + + let mut each_linked_rlib_for_lto = Vec::new(); + drop(link::each_linked_rlib(sess, &mut |cnum, path| { + if link::ignored_for_lto(sess, cnum) { + return + } + each_linked_rlib_for_lto.push((cnum, path.to_path_buf())); + })); + + let cgcx = CodegenContext { + crate_types: sess.crate_types.borrow().clone(), + each_linked_rlib_for_lto: each_linked_rlib_for_lto, + lto: sess.lto(), + no_landing_pads: sess.no_landing_pads(), + opts: Arc::new(sess.opts.clone()), + time_passes: sess.time_passes(), + exported_symbols: exported_symbols, + plugin_passes: sess.plugin_llvm_passes.borrow().clone(), + remark: sess.opts.cg.remark.clone(), + worker: 0, + incr_comp_session_dir: sess.incr_comp_session_dir_opt().map(|r| r.clone()), + coordinator_send: coordinator_send, + diag_emitter: shared_emitter.clone(), + time_graph, + }; // This is the "main loop" of parallel work happening for parallel codegen. // It's here that we manage parallelism, schedule work, and work with // messages coming from clients. // - // Our channel `rx` created above is a channel of messages coming from our - // various worker threads. This includes the jobserver helper thread above - // as well as the work we'll spawn off here. Each turn of this loop starts - // off by trying to spawn as much work as possible. After we've done that we - // then wait for an event and dispatch accordingly once the event is - // received. We're only done once all our work items have been drained and - // nothing is running, at which point we return back up the stack. + // There are a few environmental pre-conditions that shape how the system + // is set up: + // + // - Error reporting only can happen on the main thread because that's the + // only place where we have access to the compiler `Session`. + // - LLVM work can be done on any thread. + // - Translation can only happen on the main thread. + // - Each thread doing substantial work most be in possession of a `Token` + // from the `Jobserver`. + // - The compiler process always holds one `Token`. Any additional `Tokens` + // have to be requested from the `Jobserver`. + // + // Error Reporting + // =============== + // The error reporting restriction is handled separately from the rest: We + // set up a `SharedEmitter` the holds an open channel to the main thread. + // When an error occurs on any thread, the shared emitter will send the + // error message to the receiver main thread (`SharedEmitterMain`). The + // main thread will periodically query this error message queue and emit + // any error messages it has received. It might even abort compilation if + // has received a fatal error. In this case we rely on all other threads + // being torn down automatically with the main thread. + // Since the main thread will often be busy doing translation work, error + // reporting will be somewhat delayed, since the message queue can only be + // checked in between to work packages. + // + // Work Processing Infrastructure + // ============================== + // The work processing infrastructure knows three major actors: + // + // - the coordinator thread, + // - the main thread, and + // - LLVM worker threads + // + // The coordinator thread is running a message loop. It instructs the main + // thread about what work to do when, and it will spawn off LLVM worker + // threads as open LLVM WorkItems become available. + // + // The job of the main thread is to translate CGUs into LLVM work package + // (since the main thread is the only thread that can do this). The main + // thread will block until it receives a message from the coordinator, upon + // which it will translate one CGU, send it to the coordinator and block + // again. This way the coordinator can control what the main thread is + // doing. + // + // The coordinator keeps a queue of LLVM WorkItems, and when a `Token` is + // available, it will spawn off a new LLVM worker thread and let it process + // that a WorkItem. When a LLVM worker thread is done with its WorkItem, + // it will just shut down, which also frees all resources associated with + // the given LLVM module, and sends a message to the coordinator that the + // has been completed. + // + // Work Scheduling + // =============== + // The scheduler's goal is to minimize the time it takes to complete all + // work there is, however, we also want to keep memory consumption low + // if possible. These two goals are at odds with each other: If memory + // consumption were not an issue, we could just let the main thread produce + // LLVM WorkItems at full speed, assuring maximal utilization of + // Tokens/LLVM worker threads. However, since translation usual is faster + // than LLVM processing, the queue of LLVM WorkItems would fill up and each + // WorkItem potentially holds on to a substantial amount of memory. + // + // So the actual goal is to always produce just enough LLVM WorkItems as + // not to starve our LLVM worker threads. That means, once we have enough + // WorkItems in our queue, we can block the main thread, so it does not + // produce more until we need them. // - // ## Parallelism management + // Doing LLVM Work on the Main Thread + // ---------------------------------- + // Since the main thread owns the compiler processes implicit `Token`, it is + // wasteful to keep it blocked without doing any work. Therefore, what we do + // in this case is: We spawn off an additional LLVM worker thread that helps + // reduce the queue. The work it is doing corresponds to the implicit + // `Token`. The coordinator will mark the main thread as being busy with + // LLVM work. (The actual work happens on another OS thread but we just care + // about `Tokens`, not actual threads). // + // When any LLVM worker thread finishes while the main thread is marked as + // "busy with LLVM work", we can do a little switcheroo: We give the Token + // of the just finished thread to the LLVM worker thread that is working on + // behalf of the main thread's implicit Token, thus freeing up the main + // thread again. The coordinator can then again decide what the main thread + // should do. This allows the coordinator to make decisions at more points + // in time. + // + // Striking a Balance between Throughput and Memory Consumption + // ------------------------------------------------------------ + // Since our two goals, (1) use as many Tokens as possible and (2) keep + // memory consumption as low as possible, are in conflict with each other, + // we have to find a trade off between them. Right now, the goal is to keep + // all workers busy, which means that no worker should find the queue empty + // when it is ready to start. + // How do we do achieve this? Good question :) We actually never know how + // many `Tokens` are potentially available so it's hard to say how much to + // fill up the queue before switching the main thread to LLVM work. Also we + // currently don't have a means to estimate how long a running LLVM worker + // will still be busy with it's current WorkItem. However, we know the + // maximal count of available Tokens that makes sense (=the number of CPU + // cores), so we can take a conservative guess. The heuristic we use here + // is implemented in the `queue_full_enough()` function. + // + // Some Background on Jobservers + // ----------------------------- // It's worth also touching on the management of parallelism here. We don't // want to just spawn a thread per work item because while that's optimal // parallelism it may overload a system with too many threads or violate our @@ -1078,193 +1271,302 @@ fn execute_work<'a>(sess: &'a Session, // and whenever we're done with that work we release the semaphore. In this // manner we can ensure that the maximum number of parallel workers is // capped at any one point in time. - // - // The jobserver protocol is a little unique, however. We, as a running - // process, already have an ephemeral token assigned to us. We're not going - // to be doing any productive work in this thread though so we're going to - // give this token to a worker thread (there's no actual token to give, this - // is just conceptually). As a result you'll see a few `+1` and `-1` - // instances below, and it's about working with this ephemeral token. - // - // To acquire tokens we have our `helper` thread above which is just in a - // loop acquiring tokens and sending them to us. We then store all tokens - // locally in a `tokens` vector once they're acquired. Currently we don't - // literally send a token to a worker thread to assist with management of - // our "ephemeral token". - // - // As a result, our "spawn as much work as possible" basically means that we - // fill up the `running` counter up to the limit of the `tokens` list. - // Whenever we get a new token this'll mean a new unit of work is spawned, - // and then whenever a unit of work finishes we relinquish a token, if we - // had one, to maybe get re-acquired later. - // - // Note that there's a race which may mean that we acquire more tokens than - // we originally anticipated. For example let's say we have 2 units of work. - // First we request one token from the helper thread and then we - // immediately spawn one unit of work with our ephemeral token after. We may - // then finish the first piece of work before the token is acquired, but we - // can continue to spawn the second piece of work with our ephemeral token. - // Before that work finishes, however, we may acquire a token. In that case - // we actually wastefully acquired the token, so we relinquish it back to - // the jobserver. - let mut tokens = Vec::new(); - let mut running = 0; - while work_items.len() > 0 || running > 0 { - - // Spin up what work we can, only doing this while we've got available - // parallelism slots and work left to spawn. - while work_items.len() > 0 && running < tokens.len() + 1 { - let item = work_items.pop().unwrap(); - let index = work_items.len(); - spawn_work(sess, exported_symbols, scope, tx.clone(), item, index); - running += 1; - } - - // Relinquish accidentally acquired extra tokens - tokens.truncate(running.saturating_sub(1)); - - match rx.recv().unwrap() { - // Save the token locally and the next turn of the loop will use - // this to spawn a new unit of work, or it may get dropped - // immediately if we have no more work to spawn. - Message::Token(token) => { - tokens.push(token.expect("failed to acquire jobserver token")); + return thread::spawn(move || { + // We pretend to be within the top-level LLVM time-passes task here: + set_time_depth(1); + + let max_workers = ::num_cpus::get(); + let mut worker_id_counter = 0; + let mut free_worker_ids = Vec::new(); + let mut get_worker_id = |free_worker_ids: &mut Vec| { + if let Some(id) = free_worker_ids.pop() { + id + } else { + let id = worker_id_counter; + worker_id_counter += 1; + id } + }; - // If a thread exits successfully then we drop a token associated - // with that worker and update our `running` count. We may later - // re-acquire a token to continue running more work. We may also not - // actually drop a token here if the worker was running with an - // "ephemeral token" - // - // Note that if the thread failed that means it panicked, so we - // abort immediately. - Message::Done { success: true } => { - drop(tokens.pop()); - running -= 1; + // This is where we collect codegen units that have gone all the way + // through translation and LLVM. + let mut compiled_modules = vec![]; + let mut compiled_metadata_module = None; + let mut compiled_allocator_module = None; + + // This flag tracks whether all items have gone through translations + let mut translation_done = false; + + // This is the queue of LLVM work items that still need processing. + let mut work_items = Vec::new(); + + // This are the Jobserver Tokens we currently hold. Does not include + // the implicit Token the compiler process owns no matter what. + let mut tokens = Vec::new(); + + let mut main_thread_worker_state = MainThreadWorkerState::Idle; + let mut running = 0; + + let mut llvm_start_time = None; + + // Run the message loop while there's still anything that needs message + // processing: + while !translation_done || + work_items.len() > 0 || + running > 0 || + main_thread_worker_state != MainThreadWorkerState::Idle { + + // While there are still CGUs to be translated, the coordinator has + // to decide how to utilize the compiler processes implicit Token: + // For translating more CGU or for running them through LLVM. + if !translation_done { + if main_thread_worker_state == MainThreadWorkerState::Idle { + if !queue_full_enough(work_items.len(), running, max_workers) { + // The queue is not full enough, translate more items: + if let Err(_) = trans_worker_send.send(Message::TranslateItem) { + panic!("Could not send Message::TranslateItem to main thread") + } + main_thread_worker_state = MainThreadWorkerState::Translating; + } else { + // The queue is full enough to not let the worker + // threads starve. Use the implicit Token to do some + // LLVM work too. + let (item, _) = work_items.pop() + .expect("queue empty - queue_full_enough() broken?"); + let cgcx = CodegenContext { + worker: get_worker_id(&mut free_worker_ids), + .. cgcx.clone() + }; + maybe_start_llvm_timer(&item, &mut llvm_start_time); + main_thread_worker_state = MainThreadWorkerState::LLVMing; + spawn_work(cgcx, item); + } + } + } else { + // In this branch, we know that everything has been translated, + // so it's just a matter of determining whether the implicit + // Token is free to use for LLVM work. + match main_thread_worker_state { + MainThreadWorkerState::Idle => { + if let Some((item, _)) = work_items.pop() { + let cgcx = CodegenContext { + worker: get_worker_id(&mut free_worker_ids), + .. cgcx.clone() + }; + maybe_start_llvm_timer(&item, &mut llvm_start_time); + main_thread_worker_state = MainThreadWorkerState::LLVMing; + spawn_work(cgcx, item); + } + } + MainThreadWorkerState::Translating => { + bug!("trans worker should not be translating after \ + translation was already completed") + } + MainThreadWorkerState::LLVMing => { + // Already making good use of that token + } + } } - Message::Done { success: false } => { - sess.fatal("aborting due to worker thread panic"); + + // Spin up what work we can, only doing this while we've got available + // parallelism slots and work left to spawn. + while work_items.len() > 0 && running < tokens.len() { + let (item, _) = work_items.pop().unwrap(); + + maybe_start_llvm_timer(&item, &mut llvm_start_time); + + let cgcx = CodegenContext { + worker: get_worker_id(&mut free_worker_ids), + .. cgcx.clone() + }; + + spawn_work(cgcx, item); + running += 1; } - // Our worker wants us to emit an error message, so get ahold of our - // `sess` and print it out - Message::Diagnostic(diag) => { - let handler = sess.diagnostic(); - match diag.code { - Some(ref code) => { - handler.emit_with_code(&MultiSpan::new(), - &diag.msg, - &code, - diag.lvl); + // Relinquish accidentally acquired extra tokens + tokens.truncate(running); + + match coordinator_receive.recv().unwrap() { + // Save the token locally and the next turn of the loop will use + // this to spawn a new unit of work, or it may get dropped + // immediately if we have no more work to spawn. + Message::Token(token) => { + match token { + Ok(token) => { + tokens.push(token); + + if main_thread_worker_state == MainThreadWorkerState::LLVMing { + // If the main thread token is used for LLVM work + // at the moment, we turn that thread into a regular + // LLVM worker thread, so the main thread is free + // to react to translation demand. + main_thread_worker_state = MainThreadWorkerState::Idle; + running += 1; + } + } + Err(e) => { + let msg = &format!("failed to acquire jobserver token: {}", e); + shared_emitter.fatal(msg); + // Exit the coordinator thread + panic!("{}", msg) + } } - None => { - handler.emit(&MultiSpan::new(), - &diag.msg, - diag.lvl); + } + + Message::TranslationDone { llvm_work_item, cost, is_last } => { + // We keep the queue sorted by estimated processing cost, + // so that more expensive items are processed earlier. This + // is good for throughput as it gives the main thread more + // time to fill up the queue and it avoids scheduling + // expensive items to the end. + // Note, however, that this is not ideal for memory + // consumption, as LLVM module sizes are not evenly + // distributed. + let insertion_index = + work_items.binary_search_by_key(&cost, |&(_, cost)| cost); + let insertion_index = match insertion_index { + Ok(idx) | Err(idx) => idx + }; + work_items.insert(insertion_index, (llvm_work_item, cost)); + + if is_last { + // If this is the last, don't request a token because + // the trans worker thread will be free to handle this + // immediately. + translation_done = true; + } else { + helper.request_token(); } + + assert_eq!(main_thread_worker_state, + MainThreadWorkerState::Translating); + main_thread_worker_state = MainThreadWorkerState::Idle; } - } - Message::InlineAsmError(cookie, msg) => { - match Mark::from_u32(cookie).expn_info() { - Some(ei) => sess.span_err(ei.call_site, &msg), - None => sess.err(&msg), + + // If a thread exits successfully then we drop a token associated + // with that worker and update our `running` count. We may later + // re-acquire a token to continue running more work. We may also not + // actually drop a token here if the worker was running with an + // "ephemeral token" + // + // Note that if the thread failed that means it panicked, so we + // abort immediately. + Message::Done { result: Ok(compiled_module), worker_id } => { + if main_thread_worker_state == MainThreadWorkerState::LLVMing { + main_thread_worker_state = MainThreadWorkerState::Idle; + } else { + running -= 1; + } + + free_worker_ids.push(worker_id); + + match compiled_module.kind { + ModuleKind::Regular => { + compiled_modules.push(compiled_module); + } + ModuleKind::Metadata => { + assert!(compiled_metadata_module.is_none()); + compiled_metadata_module = Some(compiled_module); + } + ModuleKind::Allocator => { + assert!(compiled_allocator_module.is_none()); + compiled_allocator_module = Some(compiled_module); + } + } + } + Message::Done { result: Err(()), worker_id: _ } => { + shared_emitter.fatal("aborting due to worker thread panic"); + // Exit the coordinator thread + panic!("aborting due to worker thread panic") + } + Message::TranslateItem => { + bug!("the coordinator should not receive translation requests") } } + } - // Sent to us after a worker sends us a batch of error messages, and - // it's the point at which we check for errors. - Message::AbortIfErrors => sess.diagnostic().abort_if_errors(), + if let Some(llvm_start_time) = llvm_start_time { + let total_llvm_time = Instant::now().duration_since(llvm_start_time); + // This is the top-level timing for all of LLVM, set the time-depth + // to zero. + set_time_depth(0); + print_time_passes_entry(cgcx.time_passes, + "LLVM passes", + total_llvm_time); } - } - // Just in case, check this on the way out. - sess.diagnostic().abort_if_errors(); -} + let compiled_metadata_module = compiled_metadata_module + .expect("Metadata module not compiled?"); -struct SharedEmitter { - tx: Sender, -} + CompiledModules { + modules: compiled_modules, + metadata_module: compiled_metadata_module, + allocator_module: compiled_allocator_module, + } + }); -impl Emitter for SharedEmitter { - fn emit(&mut self, db: &DiagnosticBuilder) { - drop(self.tx.send(Message::Diagnostic(Diagnostic { - msg: db.message(), - code: db.code.clone(), - lvl: db.level, - }))); - for child in &db.children { - drop(self.tx.send(Message::Diagnostic(Diagnostic { - msg: child.message(), - code: None, - lvl: child.level, - }))); + // A heuristic that determines if we have enough LLVM WorkItems in the + // queue so that the main thread can do LLVM work instead of translation + fn queue_full_enough(items_in_queue: usize, + workers_running: usize, + max_workers: usize) -> bool { + // Tune me, plz. + items_in_queue > 0 && + items_in_queue >= max_workers.saturating_sub(workers_running / 2) + } + + fn maybe_start_llvm_timer(work_item: &WorkItem, + llvm_start_time: &mut Option) { + // We keep track of the -Ztime-passes output manually, + // since the closure-based interface does not fit well here. + if work_item.config.time_passes { + if llvm_start_time.is_none() { + *llvm_start_time = Some(Instant::now()); + } } - drop(self.tx.send(Message::AbortIfErrors)); } } -fn spawn_work<'a>(sess: &'a Session, - exported_symbols: &'a ExportedSymbols, - scope: &Scope<'a>, - tx: Sender, - work: WorkItem, - idx: usize) { - let plugin_passes = sess.plugin_llvm_passes.borrow().clone(); - let remark = sess.opts.cg.remark.clone(); - let incr_comp_session_dir = sess.incr_comp_session_dir_opt().map(|r| r.clone()); +pub const TRANS_WORKER_ID: usize = ::std::usize::MAX; +pub const TRANS_WORKER_TIMELINE: time_graph::TimelineId = + time_graph::TimelineId(TRANS_WORKER_ID); +pub const TRANS_WORK_PACKAGE_KIND: time_graph::WorkPackageKind = + time_graph::WorkPackageKind(&["#DE9597", "#FED1D3", "#FDC5C7", "#B46668", "#88494B"]); +const LLVM_WORK_PACKAGE_KIND: time_graph::WorkPackageKind = + time_graph::WorkPackageKind(&["#7DB67A", "#C6EEC4", "#ACDAAA", "#579354", "#3E6F3C"]); + +fn spawn_work(cgcx: CodegenContext, work: WorkItem) { let depth = time_depth(); - let lto = sess.lto(); - let crate_types = sess.crate_types.borrow().clone(); - let mut each_linked_rlib_for_lto = Vec::new(); - drop(link::each_linked_rlib(sess, &mut |cnum, path| { - if link::ignored_for_lto(sess, cnum) { - return - } - each_linked_rlib_for_lto.push((cnum, path.to_path_buf())); - })); - let time_passes = sess.time_passes(); - let no_landing_pads = sess.no_landing_pads(); - let opts = &sess.opts; - scope.spawn(move || { + thread::spawn(move || { set_time_depth(depth); // Set up a destructor which will fire off a message that we're done as // we exit. struct Bomb { - tx: Sender, - success: bool, + coordinator_send: Sender, + result: Option, + worker_id: usize, } impl Drop for Bomb { fn drop(&mut self) { - drop(self.tx.send(Message::Done { success: self.success })); + let result = match self.result.take() { + Some(compiled_module) => Ok(compiled_module), + None => Err(()) + }; + + drop(self.coordinator_send.send(Message::Done { + result, + worker_id: self.worker_id, + })); } } - let mut bomb = Bomb { - tx: tx.clone(), - success: false, - }; - // Set up our non-`Send` `CodegenContext` now that we're in a helper - // thread and have all our info available to us. - let emitter = SharedEmitter { tx: tx.clone() }; - let diag_handler = Handler::with_emitter(true, false, Box::new(emitter)); - - let cgcx = CodegenContext { - crate_types: crate_types, - each_linked_rlib_for_lto: each_linked_rlib_for_lto, - lto: lto, - no_landing_pads: no_landing_pads, - opts: opts, - time_passes: time_passes, - exported_symbols: exported_symbols, - handler: &diag_handler, - plugin_passes: plugin_passes, - remark: remark, - worker: idx, - incr_comp_session_dir: incr_comp_session_dir, - tx: tx.clone(), + let mut bomb = Bomb { + coordinator_send: cgcx.coordinator_send.clone(), + result: None, + worker_id: cgcx.worker, }; // Execute the work itself, and if it finishes successfully then flag @@ -1280,8 +1582,13 @@ fn spawn_work<'a>(sess: &'a Session, // we just ignore the result and then send off our message saying that // we're done, which if `execute_work_item` failed is unlikely to be // seen by the main thread, but hey we might as well try anyway. - drop(execute_work_item(&cgcx, work).is_err()); - bomb.success = true; + bomb.result = { + let _timing_guard = cgcx.time_graph + .as_ref() + .map(|tg| tg.start(time_graph::TimelineId(cgcx.worker), + LLVM_WORK_PACKAGE_KIND)); + Some(execute_work_item(&cgcx, work).unwrap()) + }; }); } @@ -1375,3 +1682,249 @@ pub unsafe fn with_llvm_pmb(llmod: ModuleRef, f(builder); llvm::LLVMPassManagerBuilderDispose(builder); } + + +enum SharedEmitterMessage { + Diagnostic(Diagnostic), + InlineAsmError(u32, String), + AbortIfErrors, + Fatal(String), +} + +#[derive(Clone)] +pub struct SharedEmitter { + sender: Sender, +} + +pub struct SharedEmitterMain { + receiver: Receiver, +} + +impl SharedEmitter { + pub fn new() -> (SharedEmitter, SharedEmitterMain) { + let (sender, receiver) = channel(); + + (SharedEmitter { sender }, SharedEmitterMain { receiver }) + } + + fn inline_asm_error(&self, cookie: u32, msg: String) { + drop(self.sender.send(SharedEmitterMessage::InlineAsmError(cookie, msg))); + } + + fn fatal(&self, msg: &str) { + drop(self.sender.send(SharedEmitterMessage::Fatal(msg.to_string()))); + } +} + +impl Emitter for SharedEmitter { + fn emit(&mut self, db: &DiagnosticBuilder) { + drop(self.sender.send(SharedEmitterMessage::Diagnostic(Diagnostic { + msg: db.message(), + code: db.code.clone(), + lvl: db.level, + }))); + for child in &db.children { + drop(self.sender.send(SharedEmitterMessage::Diagnostic(Diagnostic { + msg: child.message(), + code: None, + lvl: child.level, + }))); + } + drop(self.sender.send(SharedEmitterMessage::AbortIfErrors)); + } +} + +impl SharedEmitterMain { + pub fn check(&self, sess: &Session, blocking: bool) { + loop { + let message = if blocking { + match self.receiver.recv() { + Ok(message) => Ok(message), + Err(_) => Err(()), + } + } else { + match self.receiver.try_recv() { + Ok(message) => Ok(message), + Err(_) => Err(()), + } + }; + + match message { + Ok(SharedEmitterMessage::Diagnostic(diag)) => { + let handler = sess.diagnostic(); + match diag.code { + Some(ref code) => { + handler.emit_with_code(&MultiSpan::new(), + &diag.msg, + &code, + diag.lvl); + } + None => { + handler.emit(&MultiSpan::new(), + &diag.msg, + diag.lvl); + } + } + } + Ok(SharedEmitterMessage::InlineAsmError(cookie, msg)) => { + match Mark::from_u32(cookie).expn_info() { + Some(ei) => sess.span_err(ei.call_site, &msg), + None => sess.err(&msg), + } + } + Ok(SharedEmitterMessage::AbortIfErrors) => { + sess.abort_if_errors(); + } + Ok(SharedEmitterMessage::Fatal(msg)) => { + sess.fatal(&msg); + } + Err(_) => { + break; + } + } + + } + } +} + +pub struct OngoingCrateTranslation { + crate_name: Symbol, + link: LinkMeta, + metadata: EncodedMetadata, + exported_symbols: Arc, + no_builtins: bool, + windows_subsystem: Option, + linker_info: LinkerInfo, + no_integrated_as: bool, + + output_filenames: OutputFilenames, + regular_module_config: ModuleConfig, + metadata_module_config: ModuleConfig, + allocator_module_config: ModuleConfig, + + time_graph: Option, + coordinator_send: Sender, + trans_worker_receive: Receiver, + shared_emitter_main: SharedEmitterMain, + future: thread::JoinHandle, +} + +impl OngoingCrateTranslation { + pub fn join(self, sess: &Session) -> CrateTranslation { + self.shared_emitter_main.check(sess, true); + let compiled_modules = match self.future.join() { + Ok(compiled_modules) => compiled_modules, + Err(_) => { + sess.fatal("Error during translation/LLVM phase."); + } + }; + + sess.abort_if_errors(); + + if let Some(time_graph) = self.time_graph { + time_graph.dump(&format!("{}-timings", self.crate_name)); + } + + copy_module_artifacts_into_incr_comp_cache(sess, + &compiled_modules, + &self.output_filenames); + produce_final_output_artifacts(sess, + &compiled_modules, + &self.output_filenames); + + // FIXME: time_llvm_passes support - does this use a global context or + // something? + if sess.opts.cg.codegen_units == 1 && sess.time_llvm_passes() { + unsafe { llvm::LLVMRustPrintPassTimings(); } + } + + let trans = CrateTranslation { + crate_name: self.crate_name, + link: self.link, + metadata: self.metadata, + exported_symbols: self.exported_symbols, + no_builtins: self.no_builtins, + windows_subsystem: self.windows_subsystem, + linker_info: self.linker_info, + + modules: compiled_modules.modules, + metadata_module: compiled_modules.metadata_module, + allocator_module: compiled_modules.allocator_module, + }; + + if self.no_integrated_as { + run_assembler(sess, &self.output_filenames); + + // HACK the linker expects the object file to be named foo.0.o but + // `run_assembler` produces an object named just foo.o. Rename it if we + // are going to build an executable + if sess.opts.output_types.contains_key(&OutputType::Exe) { + let f = self.output_filenames.path(OutputType::Object); + rename_or_copy_remove(&f, + f.with_file_name(format!("{}.0.o", + f.file_stem().unwrap().to_string_lossy()))).unwrap(); + } + + // Remove assembly source, unless --save-temps was specified + if !sess.opts.cg.save_temps { + fs::remove_file(&self.output_filenames + .temp_path(OutputType::Assembly, None)).unwrap(); + } + } + + trans + } + + pub fn submit_translated_module_to_llvm(&self, + sess: &Session, + mtrans: ModuleTranslation, + cost: u64, + is_last: bool) { + let module_config = match mtrans.kind { + ModuleKind::Regular => self.regular_module_config.clone(sess), + ModuleKind::Metadata => self.metadata_module_config.clone(sess), + ModuleKind::Allocator => self.allocator_module_config.clone(sess), + }; + + let llvm_work_item = build_work_item(mtrans, + module_config, + self.output_filenames.clone()); + + drop(self.coordinator_send.send(Message::TranslationDone { + llvm_work_item, + cost, + is_last + })); + } + + pub fn submit_pre_translated_module_to_llvm(&self, + sess: &Session, + mtrans: ModuleTranslation, + is_last: bool) { + self.wait_for_signal_to_translate_item(); + self.check_for_errors(sess); + + // These are generally cheap and won't through off scheduling. + let cost = 0; + self.submit_translated_module_to_llvm(sess, mtrans, cost, is_last); + } + + pub fn check_for_errors(&self, sess: &Session) { + self.shared_emitter_main.check(sess, false); + } + + pub fn wait_for_signal_to_translate_item(&self) { + match self.trans_worker_receive.recv() { + Ok(Message::TranslateItem) => { + // Nothing to do + } + Ok(message) => { + panic!("unexpected message: {:?}", message) + } + Err(_) => { + // One of the LLVM threads must have panicked, fall through so + // error handling can be reached. + } + } + } +} diff --git a/src/librustc_trans/base.rs b/src/librustc_trans/base.rs index 7b836399f9cb5..14c73de64bc79 100644 --- a/src/librustc_trans/base.rs +++ b/src/librustc_trans/base.rs @@ -23,29 +23,30 @@ //! but one TypeRef corresponds to many `Ty`s; for instance, tup(int, int, //! int) and rec(x=int, y=int, z=int) will have the same TypeRef. -use super::CrateTranslation; use super::ModuleLlvm; use super::ModuleSource; use super::ModuleTranslation; +use super::ModuleKind; use assert_module_sources; use back::link; use back::linker::LinkerInfo; use back::symbol_export::{self, ExportedSymbols}; +use back::write::{self, OngoingCrateTranslation}; use llvm::{ContextRef, Linkage, ModuleRef, ValueRef, Vector, get_param}; use llvm; use metadata; use rustc::hir::def_id::LOCAL_CRATE; use rustc::middle::lang_items::StartFnLangItem; -use rustc::middle::cstore::EncodedMetadata; +use rustc::middle::cstore::{EncodedMetadata, EncodedMetadataHashes}; use rustc::ty::{self, Ty, TyCtxt}; use rustc::dep_graph::AssertDepGraphSafe; use rustc::middle::cstore::LinkMeta; use rustc::hir::map as hir_map; -use rustc::util::common::time; -use rustc::session::config::{self, NoDebugInfo, OutputFilenames}; +use rustc::util::common::{time, print_time_passes_entry}; +use rustc::session::config::{self, NoDebugInfo, OutputFilenames, OutputType}; use rustc::session::Session; -use rustc_incremental::IncrementalHashesMap; +use rustc_incremental::{self, IncrementalHashesMap}; use abi; use allocator; use mir::lvalue::LvalueRef; @@ -68,6 +69,7 @@ use mir; use monomorphize::{self, Instance}; use partitioning::{self, PartitioningStrategy, CodegenUnit}; use symbol_names_test; +use time_graph; use trans_item::{TransItem, DefPathBasedNames}; use type_::Type; use type_of; @@ -78,6 +80,7 @@ use libc::c_uint; use std::ffi::{CStr, CString}; use std::str; use std::sync::Arc; +use std::time::{Instant, Duration}; use std::i32; use syntax_pos::Span; use syntax::attr; @@ -647,9 +650,23 @@ pub fn set_link_section(ccx: &CrateContext, } } +// check for the #[rustc_error] annotation, which forces an +// error in trans. This is used to write compile-fail tests +// that actually test that compilation succeeds without +// reporting an error. +fn check_for_rustc_errors_attr(tcx: TyCtxt) { + if let Some((id, span)) = *tcx.sess.entry_fn.borrow() { + let main_def_id = tcx.hir.local_def_id(id); + + if tcx.has_attr(main_def_id, "rustc_error") { + tcx.sess.span_fatal(span, "compilation successful"); + } + } +} + /// Create the `main` function which will initialise the rust runtime and call /// users main function. -pub fn maybe_create_entry_wrapper(ccx: &CrateContext) { +fn maybe_create_entry_wrapper(ccx: &CrateContext) { let (main_def_id, span) = match *ccx.sess().entry_fn.borrow() { Some((id, span)) => { (ccx.tcx().hir.local_def_id(id), span) @@ -657,14 +674,6 @@ pub fn maybe_create_entry_wrapper(ccx: &CrateContext) { None => return, }; - // check for the #[rustc_error] annotation, which forces an - // error in trans. This is used to write compile-fail tests - // that actually test that compilation succeeds without - // reporting an error. - if ccx.tcx().has_attr(main_def_id, "rustc_error") { - ccx.tcx().sess.span_fatal(span, "compilation successful"); - } - let instance = Instance::mono(ccx.tcx(), main_def_id); if !ccx.codegen_unit().contains_item(&TransItem::Fn(instance)) { @@ -728,7 +737,8 @@ fn contains_null(s: &str) -> bool { fn write_metadata<'a, 'gcx>(tcx: TyCtxt<'a, 'gcx, 'gcx>, link_meta: &LinkMeta, exported_symbols: &NodeSet) - -> (ContextRef, ModuleRef, EncodedMetadata) { + -> (ContextRef, ModuleRef, + EncodedMetadata, EncodedMetadataHashes) { use std::io::Write; use flate2::Compression; use flate2::write::DeflateEncoder; @@ -758,15 +768,18 @@ fn write_metadata<'a, 'gcx>(tcx: TyCtxt<'a, 'gcx, 'gcx>, }).max().unwrap(); if kind == MetadataKind::None { - return (metadata_llcx, metadata_llmod, EncodedMetadata::new()); + return (metadata_llcx, + metadata_llmod, + EncodedMetadata::new(), + EncodedMetadataHashes::new()); } let cstore = &tcx.sess.cstore; - let metadata = cstore.encode_metadata(tcx, - &link_meta, - exported_symbols); + let (metadata, hashes) = cstore.encode_metadata(tcx, + &link_meta, + exported_symbols); if kind == MetadataKind::Uncompressed { - return (metadata_llcx, metadata_llmod, metadata); + return (metadata_llcx, metadata_llmod, metadata, hashes); } assert!(kind == MetadataKind::Compressed); @@ -794,7 +807,7 @@ fn write_metadata<'a, 'gcx>(tcx: TyCtxt<'a, 'gcx, 'gcx>, let directive = CString::new(directive).unwrap(); llvm::LLVMSetModuleInlineAsm(metadata_llmod, directive.as_ptr()) } - return (metadata_llcx, metadata_llmod, metadata); + return (metadata_llcx, metadata_llmod, metadata, hashes); } // Create a `__imp_ = &symbol` global for every public static `symbol`. @@ -803,7 +816,7 @@ fn write_metadata<'a, 'gcx>(tcx: TyCtxt<'a, 'gcx, 'gcx>, // code references on its own. // See #26591, #27438 fn create_imps(sess: &Session, - llvm_modules: &[ModuleLlvm]) { + llvm_module: &ModuleLlvm) { // The x86 ABI seems to require that leading underscores are added to symbol // names, so we need an extra underscore on 32-bit. There's also a leading // '\x01' here which disables LLVM's symbol mangling (e.g. no extra @@ -814,28 +827,26 @@ fn create_imps(sess: &Session, "\x01__imp_" }; unsafe { - for ll in llvm_modules { - let exported: Vec<_> = iter_globals(ll.llmod) - .filter(|&val| { - llvm::LLVMRustGetLinkage(val) == - llvm::Linkage::ExternalLinkage && - llvm::LLVMIsDeclaration(val) == 0 - }) - .collect(); - - let i8p_ty = Type::i8p_llcx(ll.llcx); - for val in exported { - let name = CStr::from_ptr(llvm::LLVMGetValueName(val)); - let mut imp_name = prefix.as_bytes().to_vec(); - imp_name.extend(name.to_bytes()); - let imp_name = CString::new(imp_name).unwrap(); - let imp = llvm::LLVMAddGlobal(ll.llmod, - i8p_ty.to_ref(), - imp_name.as_ptr() as *const _); - let init = llvm::LLVMConstBitCast(val, i8p_ty.to_ref()); - llvm::LLVMSetInitializer(imp, init); - llvm::LLVMRustSetLinkage(imp, llvm::Linkage::ExternalLinkage); - } + let exported: Vec<_> = iter_globals(llvm_module.llmod) + .filter(|&val| { + llvm::LLVMRustGetLinkage(val) == + llvm::Linkage::ExternalLinkage && + llvm::LLVMIsDeclaration(val) == 0 + }) + .collect(); + + let i8p_ty = Type::i8p_llcx(llvm_module.llcx); + for val in exported { + let name = CStr::from_ptr(llvm::LLVMGetValueName(val)); + let mut imp_name = prefix.as_bytes().to_vec(); + imp_name.extend(name.to_bytes()); + let imp_name = CString::new(imp_name).unwrap(); + let imp = llvm::LLVMAddGlobal(llvm_module.llmod, + i8p_ty.to_ref(), + imp_name.as_ptr() as *const _); + let init = llvm::LLVMConstBitCast(val, i8p_ty.to_ref()); + llvm::LLVMSetInitializer(imp, init); + llvm::LLVMRustSetLinkage(imp, llvm::Linkage::ExternalLinkage); } } } @@ -920,27 +931,26 @@ pub fn find_exported_symbols(tcx: TyCtxt, reachable: &NodeSet) -> NodeSet { pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, analysis: ty::CrateAnalysis, - incremental_hashes_map: &IncrementalHashesMap, + incremental_hashes_map: IncrementalHashesMap, output_filenames: &OutputFilenames) - -> CrateTranslation { + -> OngoingCrateTranslation { + check_for_rustc_errors_attr(tcx); + // Be careful with this krate: obviously it gives access to the // entire contents of the krate. So if you push any subtasks of // `TransCrate`, you need to be careful to register "reads" of the // particular items that will be processed. let krate = tcx.hir.krate(); - let ty::CrateAnalysis { reachable, .. } = analysis; - let check_overflow = tcx.sess.overflow_checks(); - - let link_meta = link::build_link_meta(incremental_hashes_map); - + let link_meta = link::build_link_meta(&incremental_hashes_map); let exported_symbol_node_ids = find_exported_symbols(tcx, &reachable); + let shared_ccx = SharedCrateContext::new(tcx, check_overflow, output_filenames); // Translate the metadata. - let (metadata_llcx, metadata_llmod, metadata) = + let (metadata_llcx, metadata_llmod, metadata, metadata_incr_hashes) = time(tcx.sess.time_passes(), "write metadata", || { write_metadata(tcx, &link_meta, &exported_symbol_node_ids) }); @@ -952,27 +962,44 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, llcx: metadata_llcx, llmod: metadata_llmod, }), + kind: ModuleKind::Metadata, }; let no_builtins = attr::contains_name(&krate.attrs, "no_builtins"); + let time_graph = if tcx.sess.opts.debugging_opts.trans_time_graph { + Some(time_graph::TimeGraph::new()) + } else { + None + }; // Skip crate items and just output metadata in -Z no-trans mode. if tcx.sess.opts.debugging_opts.no_trans || !tcx.sess.opts.output_types.should_trans() { let empty_exported_symbols = ExportedSymbols::empty(); let linker_info = LinkerInfo::new(&shared_ccx, &empty_exported_symbols); - return CrateTranslation { - crate_name: tcx.crate_name(LOCAL_CRATE), - modules: vec![], - metadata_module: metadata_module, - allocator_module: None, - link: link_meta, - metadata: metadata, - exported_symbols: empty_exported_symbols, - no_builtins: no_builtins, - linker_info: linker_info, - windows_subsystem: None, - }; + let ongoing_translation = write::start_async_translation( + tcx.sess, + output_filenames, + time_graph.clone(), + tcx.crate_name(LOCAL_CRATE), + link_meta, + metadata, + Arc::new(empty_exported_symbols), + no_builtins, + None, + linker_info, + false); + + ongoing_translation.submit_pre_translated_module_to_llvm(tcx.sess, metadata_module, true); + + assert_and_save_dep_graph(tcx, + incremental_hashes_map, + metadata_incr_hashes, + link_meta); + + ongoing_translation.check_for_errors(tcx.sess); + + return ongoing_translation; } let exported_symbols = Arc::new(ExportedSymbols::compute(tcx, @@ -983,12 +1010,110 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, let (translation_items, codegen_units) = collect_and_partition_translation_items(&shared_ccx, &exported_symbols); + assert!(codegen_units.len() <= 1 || !tcx.sess.lto()); + + let linker_info = LinkerInfo::new(&shared_ccx, &exported_symbols); + let subsystem = attr::first_attr_value_str_by_name(&krate.attrs, + "windows_subsystem"); + let windows_subsystem = subsystem.map(|subsystem| { + if subsystem != "windows" && subsystem != "console" { + tcx.sess.fatal(&format!("invalid windows subsystem `{}`, only \ + `windows` and `console` are allowed", + subsystem)); + } + subsystem.to_string() + }); + + let no_integrated_as = tcx.sess.opts.cg.no_integrated_as || + (tcx.sess.target.target.options.no_integrated_as && + (output_filenames.outputs.contains_key(&OutputType::Object) || + output_filenames.outputs.contains_key(&OutputType::Exe))); + + let ongoing_translation = write::start_async_translation( + tcx.sess, + output_filenames, + time_graph.clone(), + tcx.crate_name(LOCAL_CRATE), + link_meta, + metadata, + exported_symbols.clone(), + no_builtins, + windows_subsystem, + linker_info, + no_integrated_as); + + // Translate an allocator shim, if any + // + // If LTO is enabled and we've got some previous LLVM module we translated + // above, then we can just translate directly into that LLVM module. If not, + // however, we need to create a separate module and trans into that. Note + // that the separate translation is critical for the standard library where + // the rlib's object file doesn't have allocator functions but the dylib + // links in an object file that has allocator functions. When we're + // compiling a final LTO artifact, though, there's no need to worry about + // this as we're not working with this dual "rlib/dylib" functionality. + let allocator_module = if tcx.sess.lto() { + None + } else if let Some(kind) = tcx.sess.allocator_kind.get() { + unsafe { + let (llcx, llmod) = + context::create_context_and_module(tcx.sess, "allocator"); + let modules = ModuleLlvm { + llmod: llmod, + llcx: llcx, + }; + time(tcx.sess.time_passes(), "write allocator module", || { + allocator::trans(tcx, &modules, kind) + }); + + Some(ModuleTranslation { + name: link::ALLOCATOR_MODULE_NAME.to_string(), + symbol_name_hash: 0, // we always rebuild allocator shims + source: ModuleSource::Translated(modules), + kind: ModuleKind::Allocator, + }) + } + } else { + None + }; + + if let Some(allocator_module) = allocator_module { + ongoing_translation.submit_pre_translated_module_to_llvm(tcx.sess, allocator_module, false); + } + + let codegen_unit_count = codegen_units.len(); + ongoing_translation.submit_pre_translated_module_to_llvm(tcx.sess, + metadata_module, + codegen_unit_count == 0); + let translation_items = Arc::new(translation_items); let mut all_stats = Stats::default(); - let modules: Vec = codegen_units - .into_iter() - .map(|cgu| { + let mut module_dispositions = tcx.sess.opts.incremental.as_ref().map(|_| Vec::new()); + + // We sort the codegen units by size. This way we can schedule work for LLVM + // a bit more efficiently. Note that "size" is defined rather crudely at the + // moment as it is just the number of TransItems in the CGU, not taking into + // account the size of each TransItem. + let codegen_units = { + let mut codegen_units = codegen_units; + codegen_units.sort_by_key(|cgu| -(cgu.items().len() as isize)); + codegen_units + }; + + let mut total_trans_time = Duration::new(0, 0); + + for (cgu_index, cgu) in codegen_units.into_iter().enumerate() { + ongoing_translation.wait_for_signal_to_translate_item(); + ongoing_translation.check_for_errors(tcx.sess); + + let start_time = Instant::now(); + + let module = { + let _timing_guard = time_graph + .as_ref() + .map(|time_graph| time_graph.start(write::TRANS_WORKER_TIMELINE, + write::TRANS_WORK_PACKAGE_KIND)); let dep_node = cgu.work_product_dep_node(); let ((stats, module), _) = tcx.dep_graph.with_task(dep_node, @@ -998,9 +1123,41 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, exported_symbols.clone())), module_translation); all_stats.extend(stats); + + if let Some(ref mut module_dispositions) = module_dispositions { + module_dispositions.push(module.disposition()); + } + module - }) - .collect(); + }; + + let time_to_translate = Instant::now().duration_since(start_time); + + // We assume that the cost to run LLVM on a CGU is proportional to + // the time we needed for translating it. + let cost = time_to_translate.as_secs() * 1_000_000_000 + + time_to_translate.subsec_nanos() as u64; + + total_trans_time += time_to_translate; + + let is_last_cgu = (cgu_index + 1) == codegen_unit_count; + + ongoing_translation.submit_translated_module_to_llvm(tcx.sess, + module, + cost, + is_last_cgu); + ongoing_translation.check_for_errors(tcx.sess); + } + + // Since the main thread is sometimes blocked during trans, we keep track + // -Ztime-passes output manually. + print_time_passes_entry(tcx.sess.time_passes(), + "translate to LLVM IR", + total_trans_time); + + if let Some(module_dispositions) = module_dispositions { + assert_module_sources::assert_module_sources(tcx, &module_dispositions); + } fn module_translation<'a, 'tcx>( scx: AssertDepGraphSafe<&SharedCrateContext<'a, 'tcx>>, @@ -1044,7 +1201,8 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, let module = ModuleTranslation { name: cgu_name, symbol_name_hash, - source: ModuleSource::Preexisting(buf.clone()) + source: ModuleSource::Preexisting(buf.clone()), + kind: ModuleKind::Regular, }; return (Stats::default(), module); } @@ -1099,21 +1257,40 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, debuginfo::finalize(&ccx); } + let llvm_module = ModuleLlvm { + llcx: ccx.llcx(), + llmod: ccx.llmod(), + }; + + // In LTO mode we inject the allocator shim into the existing + // module. + if ccx.sess().lto() { + if let Some(kind) = ccx.sess().allocator_kind.get() { + time(ccx.sess().time_passes(), "write allocator module", || { + unsafe { + allocator::trans(ccx.tcx(), &llvm_module, kind); + } + }); + } + } + + // Adjust exported symbols for MSVC dllimport + if ccx.sess().target.target.options.is_like_msvc && + ccx.sess().crate_types.borrow().iter().any(|ct| *ct == config::CrateTypeRlib) { + create_imps(ccx.sess(), &llvm_module); + } + ModuleTranslation { name: cgu_name, symbol_name_hash, - source: ModuleSource::Translated(ModuleLlvm { - llcx: ccx.llcx(), - llmod: ccx.llmod(), - }) + source: ModuleSource::Translated(llvm_module), + kind: ModuleKind::Regular, } }; (lcx.into_stats(), module) } - assert_module_sources::assert_module_sources(tcx, &modules); - symbol_names_test::report_symbol_names(tcx); if shared_ccx.sess().trans_stats() { @@ -1144,85 +1321,29 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, } } - let sess = shared_ccx.sess(); - - // Get the list of llvm modules we created. We'll do a few wacky - // transforms on them now. - - let llvm_modules: Vec<_> = - modules.iter() - .filter_map(|module| match module.source { - ModuleSource::Translated(llvm) => Some(llvm), - _ => None, - }) - .collect(); - - if sess.target.target.options.is_like_msvc && - sess.crate_types.borrow().iter().any(|ct| *ct == config::CrateTypeRlib) { - create_imps(sess, &llvm_modules); - } - - // Translate an allocator shim, if any - // - // If LTO is enabled and we've got some previous LLVM module we translated - // above, then we can just translate directly into that LLVM module. If not, - // however, we need to create a separate module and trans into that. Note - // that the separate translation is critical for the standard library where - // the rlib's object file doesn't have allocator functions but the dylib - // links in an object file that has allocator functions. When we're - // compiling a final LTO artifact, though, there's no need to worry about - // this as we're not working with this dual "rlib/dylib" functionality. - let allocator_module = tcx.sess.allocator_kind.get().and_then(|kind| unsafe { - if sess.lto() && llvm_modules.len() > 0 { - time(tcx.sess.time_passes(), "write allocator module", || { - allocator::trans(tcx, &llvm_modules[0], kind) - }); - None - } else { - let (llcx, llmod) = - context::create_context_and_module(tcx.sess, "allocator"); - let modules = ModuleLlvm { - llmod: llmod, - llcx: llcx, - }; - time(tcx.sess.time_passes(), "write allocator module", || { - allocator::trans(tcx, &modules, kind) - }); - - Some(ModuleTranslation { - name: link::ALLOCATOR_MODULE_NAME.to_string(), - symbol_name_hash: 0, // we always rebuild allocator shims - source: ModuleSource::Translated(modules), - }) - } - }); - - let linker_info = LinkerInfo::new(&shared_ccx, &exported_symbols); + ongoing_translation.check_for_errors(tcx.sess); - let subsystem = attr::first_attr_value_str_by_name(&krate.attrs, - "windows_subsystem"); - let windows_subsystem = subsystem.map(|subsystem| { - if subsystem != "windows" && subsystem != "console" { - tcx.sess.fatal(&format!("invalid windows subsystem `{}`, only \ - `windows` and `console` are allowed", - subsystem)); - } - subsystem.to_string() - }); + assert_and_save_dep_graph(tcx, + incremental_hashes_map, + metadata_incr_hashes, + link_meta); + ongoing_translation +} - CrateTranslation { - crate_name: tcx.crate_name(LOCAL_CRATE), - modules: modules, - metadata_module: metadata_module, - allocator_module: allocator_module, - link: link_meta, - metadata: metadata, - exported_symbols: Arc::try_unwrap(exported_symbols) - .expect("There's still a reference to exported_symbols?"), - no_builtins: no_builtins, - linker_info: linker_info, - windows_subsystem: windows_subsystem, - } +fn assert_and_save_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, + incremental_hashes_map: IncrementalHashesMap, + metadata_incr_hashes: EncodedMetadataHashes, + link_meta: LinkMeta) { + time(tcx.sess.time_passes(), + "assert dep graph", + || rustc_incremental::assert_dep_graph(tcx)); + + time(tcx.sess.time_passes(), + "serialize dep graph", + || rustc_incremental::save_dep_graph(tcx, + incremental_hashes_map, + &metadata_incr_hashes, + link_meta.crate_hash)); } #[inline(never)] // give this a place in the profiler diff --git a/src/librustc_trans/lib.rs b/src/librustc_trans/lib.rs index 70337a91731d7..5a4a5b95cf90a 100644 --- a/src/librustc_trans/lib.rs +++ b/src/librustc_trans/lib.rs @@ -36,9 +36,9 @@ use rustc::dep_graph::WorkProduct; use syntax_pos::symbol::Symbol; +use std::sync::Arc; extern crate flate2; -extern crate crossbeam; extern crate libc; extern crate owning_ref; #[macro_use] extern crate rustc; @@ -54,6 +54,7 @@ extern crate rustc_const_math; extern crate rustc_bitflags; extern crate rustc_demangle; extern crate jobserver; +extern crate num_cpus; #[macro_use] extern crate log; #[macro_use] extern crate syntax; @@ -124,13 +125,13 @@ mod mir; mod monomorphize; mod partitioning; mod symbol_names_test; +mod time_graph; mod trans_item; mod tvec; mod type_; mod type_of; mod value; -#[derive(Clone)] pub struct ModuleTranslation { /// The name of the module. When the crate may be saved between /// compilations, incremental compilation requires that name be @@ -140,6 +141,58 @@ pub struct ModuleTranslation { pub name: String, pub symbol_name_hash: u64, pub source: ModuleSource, + pub kind: ModuleKind, +} + +#[derive(Copy, Clone, Debug)] +pub enum ModuleKind { + Regular, + Metadata, + Allocator, +} + +impl ModuleTranslation { + pub fn into_compiled_module(self, emit_obj: bool, emit_bc: bool) -> CompiledModule { + let pre_existing = match self.source { + ModuleSource::Preexisting(_) => true, + ModuleSource::Translated(_) => false, + }; + + CompiledModule { + name: self.name.clone(), + kind: self.kind, + symbol_name_hash: self.symbol_name_hash, + pre_existing, + emit_obj, + emit_bc, + } + } +} + +impl Drop for ModuleTranslation { + fn drop(&mut self) { + match self.source { + ModuleSource::Preexisting(_) => { + // Nothing to dispose. + }, + ModuleSource::Translated(llvm) => { + unsafe { + llvm::LLVMDisposeModule(llvm.llmod); + llvm::LLVMContextDispose(llvm.llcx); + } + }, + } + } +} + +#[derive(Debug)] +pub struct CompiledModule { + pub name: String, + pub kind: ModuleKind, + pub symbol_name_hash: u64, + pub pre_existing: bool, + pub emit_obj: bool, + pub emit_bc: bool, } #[derive(Clone)] @@ -151,7 +204,7 @@ pub enum ModuleSource { Translated(ModuleLlvm), } -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Debug)] pub struct ModuleLlvm { pub llcx: llvm::ContextRef, pub llmod: llvm::ModuleRef, @@ -162,12 +215,12 @@ unsafe impl Sync for ModuleTranslation { } pub struct CrateTranslation { pub crate_name: Symbol, - pub modules: Vec, - pub metadata_module: ModuleTranslation, - pub allocator_module: Option, + pub modules: Vec, + pub metadata_module: CompiledModule, + pub allocator_module: Option, pub link: rustc::middle::cstore::LinkMeta, pub metadata: rustc::middle::cstore::EncodedMetadata, - pub exported_symbols: back::symbol_export::ExportedSymbols, + pub exported_symbols: Arc, pub no_builtins: bool, pub windows_subsystem: Option, pub linker_info: back::linker::LinkerInfo diff --git a/src/librustc_trans/time_graph.rs b/src/librustc_trans/time_graph.rs new file mode 100644 index 0000000000000..e0ebe8a0933f1 --- /dev/null +++ b/src/librustc_trans/time_graph.rs @@ -0,0 +1,181 @@ +// Copyright 2017 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use std::collections::HashMap; +use std::marker::PhantomData; +use std::sync::{Arc, Mutex}; +use std::time::Instant; +use std::io::prelude::*; +use std::fs::File; + +const OUTPUT_WIDTH_IN_PX: u64 = 1000; +const TIME_LINE_HEIGHT_IN_PX: u64 = 7; +const TIME_LINE_HEIGHT_STRIDE_IN_PX: usize = 10; + +#[derive(Clone)] +struct Timing { + start: Instant, + end: Instant, + work_package_kind: WorkPackageKind, +} + +#[derive(Clone, Copy, Hash, Eq, PartialEq, Debug)] +pub struct TimelineId(pub usize); + +#[derive(Clone)] +struct PerThread { + timings: Vec, + open_work_package: Option<(Instant, WorkPackageKind)>, +} + +#[derive(Clone)] +pub struct TimeGraph { + data: Arc>>, +} + +#[derive(Clone, Copy)] +pub struct WorkPackageKind(pub &'static [&'static str]); + +pub struct RaiiToken { + graph: TimeGraph, + timeline: TimelineId, + // The token must not be Send: + _marker: PhantomData<*const ()> +} + + +impl Drop for RaiiToken { + fn drop(&mut self) { + self.graph.end(self.timeline); + } +} + +impl TimeGraph { + pub fn new() -> TimeGraph { + TimeGraph { + data: Arc::new(Mutex::new(HashMap::new())) + } + } + + pub fn start(&self, + timeline: TimelineId, + work_package_kind: WorkPackageKind) -> RaiiToken { + { + let mut table = self.data.lock().unwrap(); + + let mut data = table.entry(timeline).or_insert(PerThread { + timings: Vec::new(), + open_work_package: None, + }); + + assert!(data.open_work_package.is_none()); + data.open_work_package = Some((Instant::now(), work_package_kind)); + } + + RaiiToken { + graph: self.clone(), + timeline, + _marker: PhantomData, + } + } + + fn end(&self, timeline: TimelineId) { + let end = Instant::now(); + + let mut table = self.data.lock().unwrap(); + let mut data = table.get_mut(&timeline).unwrap(); + + if let Some((start, work_package_kind)) = data.open_work_package { + data.timings.push(Timing { + start, + end, + work_package_kind, + }); + } else { + bug!("end timing without start?") + } + + data.open_work_package = None; + } + + pub fn dump(&self, output_filename: &str) { + let table = self.data.lock().unwrap(); + + for data in table.values() { + assert!(data.open_work_package.is_none()); + } + + let mut timelines: Vec = + table.values().map(|data| data.clone()).collect(); + + timelines.sort_by_key(|timeline| timeline.timings[0].start); + + let earliest_instant = timelines[0].timings[0].start; + let latest_instant = timelines.iter() + .map(|timeline| timeline.timings + .last() + .unwrap() + .end) + .max() + .unwrap(); + let max_distance = distance(earliest_instant, latest_instant); + + let mut file = File::create(format!("{}.html", output_filename)).unwrap(); + + writeln!(file, "").unwrap(); + writeln!(file, "").unwrap(); + writeln!(file, "").unwrap(); + + let mut color = 0; + + for (line_index, timeline) in timelines.iter().enumerate() { + let line_top = line_index * TIME_LINE_HEIGHT_STRIDE_IN_PX; + + for span in &timeline.timings { + let start = distance(earliest_instant, span.start); + let end = distance(earliest_instant, span.end); + + let start = normalize(start, max_distance, OUTPUT_WIDTH_IN_PX); + let end = normalize(end, max_distance, OUTPUT_WIDTH_IN_PX); + + let colors = span.work_package_kind.0; + + writeln!(file, "
", + line_top, + start, + end - start, + TIME_LINE_HEIGHT_IN_PX, + colors[color % colors.len()] + ).unwrap(); + + color += 1; + } + } + + writeln!(file, "").unwrap(); + writeln!(file, "").unwrap(); + } +} + +fn distance(zero: Instant, x: Instant) -> u64 { + + let duration = x.duration_since(zero); + (duration.as_secs() * 1_000_000_000 + duration.subsec_nanos() as u64) // / div +} + +fn normalize(distance: u64, max: u64, max_pixels: u64) -> u64 { + (max_pixels * distance) / max +} + diff --git a/src/test/run-make/llvm-phase/test.rs b/src/test/run-make/llvm-phase/test.rs index a75dc7e57a9a2..7a63871f19e38 100644 --- a/src/test/run-make/llvm-phase/test.rs +++ b/src/test/run-make/llvm-phase/test.rs @@ -54,11 +54,7 @@ impl<'a> CompilerCalls<'a> for JitCalls { state.session.abort_if_errors(); let trans = state.trans.unwrap(); assert_eq!(trans.modules.len(), 1); - let rs_llmod = match trans.modules[0].source { - ModuleSource::Preexisting(_) => unimplemented!(), - ModuleSource::Translated(llvm) => llvm.llmod, - }; - unsafe { rustc_llvm::LLVMDumpModule(rs_llmod) }; + println!("name of compiled module = {}", trans.modules[0].name); }); cc }