Skip to content

Commit 15b9834

Browse files
committed
Don't copy ByRef passed types to local stack slot when not necessary
Eg when the local is immutable **and** the type is freeze. This makes the simple raytracer runtime benchmark 1% faster than cg_llvm without optimizations. Before it was 2% slower. cc #691 cc #684
1 parent 76d2e08 commit 15b9834

File tree

4 files changed

+49
-1
lines changed

4 files changed

+49
-1
lines changed

example/example.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,3 +202,7 @@ fn get_sized_field_ref_from_unsized_type(u: &Unsized) -> &u8 {
202202
fn get_unsized_field_ref_from_unsized_type(u: &Unsized) -> &str {
203203
&u.1
204204
}
205+
206+
pub fn reuse_byref_argument_storage(a: (u8, u16, u32)) -> u8 {
207+
a.0
208+
}

src/abi/comments.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,15 @@ pub fn add_local_place_comments<'tcx>(
9494
align.abi.bytes(),
9595
align.pref.bytes(),
9696
)),
97-
CPlaceInner::Addr(_, _) => unreachable!(),
97+
CPlaceInner::Addr(addr, None) => fx.add_global_comment(format!(
98+
"reuse {:5} {:20} {:4}b {}, {} storage={}",
99+
format!("{:?}", local),
100+
format!("{:?}", ty),
101+
size.bytes(),
102+
align.abi.bytes(),
103+
align.pref.bytes(),
104+
addr,
105+
)),
106+
CPlaceInner::Addr(_, Some(_)) => unreachable!(),
98107
}
99108
}

src/abi/mod.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,34 @@ pub fn codegen_fn_prelude(
282282
.unwrap()
283283
.contains(crate::analyze::Flags::NOT_SSA);
284284

285+
match arg_kind {
286+
ArgKind::Normal(Some(val)) => {
287+
if let Some(addr) = val.try_to_addr() {
288+
let local_decl = &fx.mir.local_decls[local];
289+
// v this ! is important
290+
let internally_mutable = !val.layout().ty.is_freeze(
291+
fx.tcx,
292+
ParamEnv::reveal_all(),
293+
local_decl.source_info.span,
294+
);
295+
if local_decl.mutability == mir::Mutability::Not && internally_mutable {
296+
// We wont mutate this argument, so it is fine to borrow the backing storage
297+
// of this argument, to prevent a copy.
298+
299+
let place = CPlace::for_addr(addr, val.layout());
300+
301+
#[cfg(debug_assertions)]
302+
self::comments::add_local_place_comments(fx, place, local);
303+
304+
let prev_place = fx.local_map.insert(local, place);
305+
debug_assert!(prev_place.is_none());
306+
continue;
307+
}
308+
}
309+
}
310+
_ => {}
311+
}
312+
285313
let place = local_place(fx, local, layout, is_ssa);
286314

287315
match arg_kind {

src/value_and_place.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,13 @@ impl<'tcx> CValue<'tcx> {
6363
}
6464
}
6565

66+
pub fn try_to_addr(self) -> Option<Value> {
67+
match self.0 {
68+
CValueInner::ByRef(addr) => Some(addr),
69+
CValueInner::ByVal(_) | CValueInner::ByValPair(_, _) => None,
70+
}
71+
}
72+
6673
/// Load a value with layout.abi of scalar
6774
pub fn load_scalar<'a>(self, fx: &mut FunctionCx<'_, 'tcx, impl Backend>) -> Value {
6875
let layout = self.1;

0 commit comments

Comments
 (0)