Skip to content

improve cgu schedule with greedy partition #112766

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion compiler/rustc_middle/src/mir/mono.rs
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ impl<'tcx> fmt::Display for MonoItem<'tcx> {
}
}

#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct CodegenUnit<'tcx> {
/// A name for this CGU. Incremental compilation requires that
/// name be unique amongst **all** crates. Therefore, it should
Expand All @@ -236,6 +236,7 @@ pub struct CodegenUnit<'tcx> {
/// True if this is CGU is used to hold code coverage information for dead code,
/// false otherwise.
is_code_coverage_dead_code_cgu: bool,
was_merged: bool,
}

/// Specifies the linkage type for a `MonoItem`.
Expand Down Expand Up @@ -272,6 +273,7 @@ impl<'tcx> CodegenUnit<'tcx> {
size_estimate: None,
primary: false,
is_code_coverage_dead_code_cgu: false,
was_merged: false,
}
}

Expand All @@ -291,6 +293,14 @@ impl<'tcx> CodegenUnit<'tcx> {
self.primary = true;
}

pub fn was_merged(&self) -> bool {
self.was_merged
}

pub fn make_merged(&mut self) {
self.was_merged = true;
}

/// The order of these items is non-determinstic.
pub fn items(&self) -> &FxHashMap<MonoItem<'tcx>, (Linkage, Visibility)> {
&self.items
Expand Down Expand Up @@ -411,6 +421,7 @@ impl<'a, 'tcx> HashStable<StableHashingContext<'a>> for CodegenUnit<'tcx> {
size_estimate: _,
primary: _,
is_code_coverage_dead_code_cgu,
was_merged: _,
} = *self;

name.hash_stable(hcx, hasher);
Expand Down
66 changes: 45 additions & 21 deletions compiler/rustc_monomorphize/src/partitioning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,7 @@ fn merge_codegen_units<'tcx>(
let mut cgu_contents: FxHashMap<Symbol, Vec<Symbol>> =
codegen_units.iter().map(|cgu| (cgu.name(), vec![cgu.name()])).collect();

let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx);
// Having multiple CGUs can drastically speed up compilation. But for
// non-incremental builds, tiny CGUs slow down compilation *and* result in
// worse generated code. So we don't allow CGUs smaller than this (unless
Expand All @@ -326,36 +327,59 @@ fn merge_codegen_units<'tcx>(
// the `compiler_builtins` crate sets `codegen-units = 10000` and it's
// critical they aren't merged. Also, some tests use explicit small values
// and likewise won't work if small CGUs are merged.
while codegen_units.len() > cx.tcx.sess.codegen_units().as_usize()

if codegen_units.len() > cx.tcx.sess.codegen_units().as_usize()
|| (cx.tcx.sess.opts.incremental.is_none()
&& matches!(cx.tcx.sess.codegen_units(), CodegenUnits::Default(_))
&& codegen_units.len() > 1
&& codegen_units.iter().any(|cgu| cgu.size_estimate() < NON_INCR_MIN_CGU_SIZE))
&& codegen_units.len() > 1)
{
// Sort small cgus to the back.
codegen_units.sort_by_cached_key(|cgu| cmp::Reverse(cgu.size_estimate()));
while codegen_units.len() > 1
&& codegen_units.iter().any(|cgu| cgu.size_estimate() < NON_INCR_MIN_CGU_SIZE)
{
// Sort small cgus to the back.
codegen_units.sort_by_cached_key(|cgu| cmp::Reverse(cgu.size_estimate()));

let mut smallest = codegen_units.pop().unwrap();
let second_smallest = codegen_units.last_mut().unwrap();
let Some((mut smallest, second_smallest)) = codegen_units.pop().zip(codegen_units.last_mut()) else { break; };

// Move the mono-items from `smallest` to `second_smallest`
second_smallest.modify_size_estimate(smallest.size_estimate());
second_smallest.items_mut().extend(smallest.items_mut().drain());
// Move the mono-items from `smallest` to `second_smallest`
second_smallest.modify_size_estimate(smallest.size_estimate());
second_smallest.items_mut().extend(smallest.items_mut().drain());

// Record that `second_smallest` now contains all the stuff that was
// in `smallest` before.
let mut consumed_cgu_names = cgu_contents.remove(&smallest.name()).unwrap();
cgu_contents.get_mut(&second_smallest.name()).unwrap().append(&mut consumed_cgu_names);
// Record that `second_smallest` now contains all the stuff that was
// in `smallest` before.
let mut consumed_cgu_names = cgu_contents.remove(&smallest.name()).unwrap();
cgu_contents.get_mut(&second_smallest.name()).unwrap().append(&mut consumed_cgu_names);
}

debug!(
"CodegenUnit {} merged into CodegenUnit {}",
smallest.name(),
second_smallest.name()
);
codegen_units.sort_by_cached_key(|cgu| cmp::Reverse(cgu.size_estimate()));
let fallback_cgu_name = fallback_cgu_name(cgu_name_builder);
let mut default = CodegenUnit::new(fallback_cgu_name);
default.create_size_estimate(cx.tcx);
let mut merged_subsets: Vec<CodegenUnit<'_>> =
vec![default; cx.tcx.sess.codegen_units().as_usize().min(codegen_units.len())];

codegen_units.iter_mut().for_each(|cgu| {
let min = merged_subsets
.iter()
.enumerate()
.min_by_key(|(_, cgu)| cgu.size_estimate())
.map(|(i, _)| i)
.unwrap_or(0);
let min_cgu = &mut merged_subsets[min];
if !min_cgu.was_merged() {
*min_cgu = std::mem::replace(cgu, CodegenUnit::new(fallback_cgu_name));
min_cgu.make_merged();
} else {
min_cgu.modify_size_estimate(cgu.size_estimate());
min_cgu.items_mut().extend(cgu.items_mut().drain());
let mut consumed_cgu_names = cgu_contents.remove(&cgu.name()).unwrap();
cgu_contents.get_mut(&min_cgu.name()).unwrap().append(&mut consumed_cgu_names);
debug!("CodegenUnit {} merged into CodegenUnit {}", cgu.name(), min_cgu.name());
}
});
*codegen_units = merged_subsets;
}

let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx);

if cx.tcx.sess.opts.incremental.is_some() {
// If we are doing incremental compilation, we want CGU names to
// reflect the path of the source level module they correspond to.
Expand Down
25 changes: 23 additions & 2 deletions compiler/rustc_ty_utils/src/ty.rs
Original file line number Diff line number Diff line change
Expand Up @@ -454,12 +454,33 @@ fn instance_def_size_estimate<'tcx>(
tcx: TyCtxt<'tcx>,
instance_def: ty::InstanceDef<'tcx>,
) -> usize {
use rustc_middle::mir::StatementKind;
use ty::InstanceDef;

match instance_def {
InstanceDef::Item(..) | InstanceDef::DropGlue(..) => {
let mir = tcx.instance_mir(instance_def);
mir.basic_blocks.iter().map(|bb| bb.statements.len() + 1).sum()
mir.basic_blocks
.iter()
.map(|bb| {
bb.statements
.iter()
.filter(|s| {
// do not count non-codegen statement
!matches!(
s.kind,
StatementKind::Deinit(..)
| StatementKind::FakeRead(..)
| StatementKind::Retag { .. }
| StatementKind::AscribeUserType(..)
| StatementKind::ConstEvalCounter
| StatementKind::PlaceMention(..)
| StatementKind::Nop
)
})
.count()
+ 1
})
.sum()
}
// Estimate the size of other compiler-generated shims to be 1.
_ => 1,
Expand Down