Skip to content

Commit 6bb51e7

Browse files
committed
Use a 5-byte length for tables
1 parent b23a5ad commit 6bb51e7

File tree

1 file changed

+88
-26
lines changed
  • compiler/rustc_metadata/src/rmeta

1 file changed

+88
-26
lines changed

compiler/rustc_metadata/src/rmeta/table.rs

Lines changed: 88 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,70 @@ use rustc_span::hygiene::MacroKind;
1010
use std::marker::PhantomData;
1111
use std::num::NonZeroUsize;
1212

13+
/// Table elements in the rmeta format must have fixed size, but we also want to encode offsets in
14+
/// the file inside of table elements. If we use 4-byte offsets, it is too easy for crates to run
15+
/// over that limit; see #112934. Switching to an 8-byte offset increases the size of some rlibs by 30%.
16+
/// So for the time being we are using 5 bytes, which lets us encode offsets as large as 1 TB. It
17+
/// seems unlikely that anyone will need offsets larger than that, but if you do, simply adjust
18+
/// this constant.
19+
const OFFSET_SIZE_BYTES: usize = 5;
20+
21+
#[derive(Default)]
22+
struct Offset(usize);
23+
24+
impl Offset {
25+
// We technically waste 1 byte per offset if the compiler is compiled for a 32-bit target, but
26+
// that waste keeps the format described in this module portable.
27+
#[cfg(target_pointer_width = "32")]
28+
const MAX: usize = usize::MAX;
29+
30+
#[cfg(target_pointer_width = "64")]
31+
const MAX: usize = usize::MAX >> (8 - OFFSET_SIZE_BYTES);
32+
}
33+
34+
#[derive(Debug)]
35+
pub struct OffsetTooLarge;
36+
37+
impl TryFrom<usize> for Offset {
38+
type Error = OffsetTooLarge;
39+
40+
#[inline]
41+
fn try_from(value: usize) -> Result<Self, Self::Error> {
42+
if value > Self::MAX { Err(OffsetTooLarge) } else { Ok(Self(value)) }
43+
}
44+
}
45+
46+
impl From<Offset> for usize {
47+
#[inline]
48+
fn from(v: Offset) -> usize {
49+
v.0
50+
}
51+
}
52+
53+
impl FixedSizeEncoding for Offset {
54+
type ByteArray = [u8; OFFSET_SIZE_BYTES];
55+
56+
#[inline]
57+
fn from_bytes(b: &Self::ByteArray) -> Self {
58+
let mut buf = [0u8; std::mem::size_of::<usize>()];
59+
buf[..OFFSET_SIZE_BYTES].copy_from_slice(b);
60+
let inner = usize::from_le_bytes(buf);
61+
debug_assert!(inner <= Self::MAX);
62+
Self(inner)
63+
}
64+
65+
#[inline]
66+
fn write_to_bytes(self, b: &mut Self::ByteArray) {
67+
b.copy_from_slice(&self.0.to_le_bytes()[..OFFSET_SIZE_BYTES]);
68+
}
69+
}
70+
71+
impl IsDefault for Offset {
72+
fn is_default(&self) -> bool {
73+
self.0 == 0
74+
}
75+
}
76+
1377
pub(super) trait IsDefault: Default {
1478
fn is_default(&self) -> bool;
1579
}
@@ -73,8 +137,6 @@ pub(super) trait FixedSizeEncoding: IsDefault {
73137
fn write_to_bytes(self, b: &mut Self::ByteArray);
74138
}
75139

76-
/// This implementation is not used generically, but for reading/writing
77-
/// concrete `u32` fields in `Lazy*` structures, which may be zero.
78140
impl FixedSizeEncoding for u32 {
79141
type ByteArray = [u8; 4];
80142

@@ -296,25 +358,22 @@ impl FixedSizeEncoding for UnusedGenericParams {
296358
}
297359
}
298360

299-
// NOTE(eddyb) there could be an impl for `usize`, which would enable a more
300-
// generic `LazyValue<T>` impl, but in the general case we might not need / want
301-
// to fit every `usize` in `u32`.
302361
impl<T> FixedSizeEncoding for Option<LazyValue<T>> {
303-
type ByteArray = [u8; 4];
362+
type ByteArray = [u8; OFFSET_SIZE_BYTES];
304363

305364
#[inline]
306-
fn from_bytes(b: &[u8; 4]) -> Self {
307-
let position = NonZeroUsize::new(u32::from_bytes(b) as usize)?;
365+
fn from_bytes(b: &Self::ByteArray) -> Self {
366+
let position = NonZeroUsize::new(Offset::from_bytes(b).try_into().unwrap())?;
308367
Some(LazyValue::from_position(position))
309368
}
310369

311370
#[inline]
312-
fn write_to_bytes(self, b: &mut [u8; 4]) {
371+
fn write_to_bytes(self, b: &mut Self::ByteArray) {
313372
match self {
314373
None => unreachable!(),
315374
Some(lazy) => {
316375
let position = lazy.position.get();
317-
let position: u32 = position.try_into().unwrap();
376+
let position: Offset = position.try_into().unwrap();
318377
position.write_to_bytes(b)
319378
}
320379
}
@@ -323,55 +382,58 @@ impl<T> FixedSizeEncoding for Option<LazyValue<T>> {
323382

324383
impl<T> LazyArray<T> {
325384
#[inline]
326-
fn write_to_bytes_impl(self, b: &mut [u8; 8]) {
327-
let ([position_bytes, meta_bytes],[])= b.as_chunks_mut::<4>() else { panic!() };
385+
fn write_to_bytes_impl(self, b: &mut [u8; OFFSET_SIZE_BYTES * 2]) {
386+
let ([position_bytes, meta_bytes],[])= b.as_chunks_mut::<OFFSET_SIZE_BYTES>() else { panic!() };
328387

329388
let position = self.position.get();
330-
let position: u32 = position.try_into().unwrap();
389+
let position: Offset = position.try_into().unwrap();
331390
position.write_to_bytes(position_bytes);
332391

333392
let len = self.num_elems;
334-
let len: u32 = len.try_into().unwrap();
393+
let len: Offset = len.try_into().unwrap();
335394
len.write_to_bytes(meta_bytes);
336395
}
337396

338-
fn from_bytes_impl(position_bytes: &[u8; 4], meta_bytes: &[u8; 4]) -> Option<LazyArray<T>> {
339-
let position = NonZeroUsize::new(u32::from_bytes(position_bytes) as usize)?;
340-
let len = u32::from_bytes(meta_bytes) as usize;
397+
fn from_bytes_impl(
398+
position_bytes: &[u8; OFFSET_SIZE_BYTES],
399+
meta_bytes: &[u8; OFFSET_SIZE_BYTES],
400+
) -> Option<LazyArray<T>> {
401+
let position = NonZeroUsize::new(Offset::from_bytes(position_bytes).0)?;
402+
let len = Offset::from_bytes(meta_bytes).0;
341403
Some(LazyArray::from_position_and_num_elems(position, len))
342404
}
343405
}
344406

345407
impl<T> FixedSizeEncoding for LazyArray<T> {
346-
type ByteArray = [u8; 8];
408+
type ByteArray = [u8; OFFSET_SIZE_BYTES * 2];
347409

348410
#[inline]
349-
fn from_bytes(b: &[u8; 8]) -> Self {
350-
let ([position_bytes, meta_bytes],[])= b.as_chunks::<4>() else { panic!() };
351-
if *meta_bytes == [0; 4] {
411+
fn from_bytes(b: &Self::ByteArray) -> Self {
412+
let ([position_bytes, meta_bytes],[])= b.as_chunks::<OFFSET_SIZE_BYTES>() else { panic!() };
413+
if *meta_bytes == [0u8; OFFSET_SIZE_BYTES] {
352414
return Default::default();
353415
}
354416
LazyArray::from_bytes_impl(position_bytes, meta_bytes).unwrap()
355417
}
356418

357419
#[inline]
358-
fn write_to_bytes(self, b: &mut [u8; 8]) {
420+
fn write_to_bytes(self, b: &mut Self::ByteArray) {
359421
assert!(!self.is_default());
360422
self.write_to_bytes_impl(b)
361423
}
362424
}
363425

364426
impl<T> FixedSizeEncoding for Option<LazyArray<T>> {
365-
type ByteArray = [u8; 8];
427+
type ByteArray = [u8; OFFSET_SIZE_BYTES * 2];
366428

367429
#[inline]
368-
fn from_bytes(b: &[u8; 8]) -> Self {
369-
let ([position_bytes, meta_bytes],[])= b.as_chunks::<4>() else { panic!() };
430+
fn from_bytes(b: &Self::ByteArray) -> Self {
431+
let ([position_bytes, meta_bytes],[])= b.as_chunks::<OFFSET_SIZE_BYTES>() else { panic!() };
370432
LazyArray::from_bytes_impl(position_bytes, meta_bytes)
371433
}
372434

373435
#[inline]
374-
fn write_to_bytes(self, b: &mut [u8; 8]) {
436+
fn write_to_bytes(self, b: &mut Self::ByteArray) {
375437
match self {
376438
None => unreachable!(),
377439
Some(lazy) => lazy.write_to_bytes_impl(b),

0 commit comments

Comments
 (0)