Skip to content

Commit c434acd

Browse files
committed
Avoid some extra bounds checks in read_{u8,u16}
1 parent 0534655 commit c434acd

File tree

4 files changed

+61
-34
lines changed

4 files changed

+61
-34
lines changed

compiler/rustc_serialize/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ Core encoding and decoding interfaces.
1111
)]
1212
#![feature(never_type)]
1313
#![feature(associated_type_bounds)]
14+
#![feature(iter_advance_by)]
1415
#![feature(min_specialization)]
1516
#![feature(core_intrinsics)]
1617
#![feature(maybe_uninit_slice)]

compiler/rustc_serialize/src/opaque.rs

+51-30
Original file line numberDiff line numberDiff line change
@@ -535,34 +535,55 @@ impl Encoder for FileEncoder {
535535
// -----------------------------------------------------------------------------
536536

537537
pub struct MemDecoder<'a> {
538+
// Previously this type stored `position: usize`, but because it's staying
539+
// safe code, that meant that reading `n` bytes meant a bounds check both
540+
// for `position + n` *and* `position`, since there's nothing saying that
541+
// the additions didn't wrap. Storing an iterator like this instead means
542+
// there's no offsetting needed to get to the data, and the iterator instead
543+
// of a slice means only increasing the start pointer on reads, rather than
544+
// also needing to decrease the count in a slice.
545+
// This field is first because it's touched more than `data`.
546+
reader: std::slice::Iter<'a, u8>,
538547
pub data: &'a [u8],
539-
position: usize,
540548
}
541549

542550
impl<'a> MemDecoder<'a> {
543551
#[inline]
544552
pub fn new(data: &'a [u8], position: usize) -> MemDecoder<'a> {
545-
MemDecoder { data, position }
553+
let reader = data[position..].iter();
554+
MemDecoder { data, reader }
546555
}
547556

548557
#[inline]
549558
pub fn position(&self) -> usize {
550-
self.position
559+
self.data.len() - self.reader.len()
551560
}
552561

553562
#[inline]
554563
pub fn set_position(&mut self, pos: usize) {
555-
self.position = pos
564+
self.reader = self.data[pos..].iter();
556565
}
557566

558567
#[inline]
559568
pub fn advance(&mut self, bytes: usize) {
560-
self.position += bytes;
569+
self.reader.advance_by(bytes).unwrap();
570+
}
571+
572+
#[cold]
573+
fn panic_insufficient_data(&self) -> ! {
574+
let pos = self.position();
575+
let len = self.data.len();
576+
panic!("Insufficient remaining data at position {pos} (length {len})");
561577
}
562578
}
563579

564580
macro_rules! read_leb128 {
565-
($dec:expr, $fun:ident) => {{ leb128::$fun($dec.data, &mut $dec.position) }};
581+
($dec:expr, $fun:ident) => {{
582+
let mut position = 0_usize;
583+
let val = leb128::$fun($dec.reader.as_slice(), &mut position);
584+
let _ = $dec.reader.advance_by(position);
585+
val
586+
}};
566587
}
567588

568589
impl<'a> Decoder for MemDecoder<'a> {
@@ -583,17 +604,14 @@ impl<'a> Decoder for MemDecoder<'a> {
583604

584605
#[inline]
585606
fn read_u16(&mut self) -> u16 {
586-
let bytes = [self.data[self.position], self.data[self.position + 1]];
587-
let value = u16::from_le_bytes(bytes);
588-
self.position += 2;
589-
value
607+
let bytes = self.read_raw_bytes_array::<2>();
608+
u16::from_le_bytes(*bytes)
590609
}
591610

592611
#[inline]
593612
fn read_u8(&mut self) -> u8 {
594-
let value = self.data[self.position];
595-
self.position += 1;
596-
value
613+
let bytes = self.read_raw_bytes_array::<1>();
614+
u8::from_le_bytes(*bytes)
597615
}
598616

599617
#[inline]
@@ -618,17 +636,14 @@ impl<'a> Decoder for MemDecoder<'a> {
618636

619637
#[inline]
620638
fn read_i16(&mut self) -> i16 {
621-
let bytes = [self.data[self.position], self.data[self.position + 1]];
622-
let value = i16::from_le_bytes(bytes);
623-
self.position += 2;
624-
value
639+
let bytes = self.read_raw_bytes_array::<2>();
640+
i16::from_le_bytes(*bytes)
625641
}
626642

627643
#[inline]
628644
fn read_i8(&mut self) -> i8 {
629-
let value = self.data[self.position];
630-
self.position += 1;
631-
value as i8
645+
let bytes = self.read_raw_bytes_array::<1>();
646+
i8::from_le_bytes(*bytes)
632647
}
633648

634649
#[inline]
@@ -663,20 +678,26 @@ impl<'a> Decoder for MemDecoder<'a> {
663678
#[inline]
664679
fn read_str(&mut self) -> &'a str {
665680
let len = self.read_usize();
666-
let sentinel = self.data[self.position + len];
667-
assert!(sentinel == STR_SENTINEL);
668-
let s = unsafe {
669-
std::str::from_utf8_unchecked(&self.data[self.position..self.position + len])
670-
};
671-
self.position += len + 1;
672-
s
681+
682+
// This cannot reuse `read_raw_bytes` as that runs into lifetime issues
683+
// where the slice gets tied to `'b` instead of just to `'a`.
684+
if self.reader.len() <= len {
685+
self.panic_insufficient_data();
686+
}
687+
let slice = self.reader.as_slice();
688+
assert!(slice[len] == STR_SENTINEL);
689+
self.reader.advance_by(len + 1).unwrap();
690+
unsafe { std::str::from_utf8_unchecked(&slice[..len]) }
673691
}
674692

675693
#[inline]
676694
fn read_raw_bytes(&mut self, bytes: usize) -> &'a [u8] {
677-
let start = self.position;
678-
self.position += bytes;
679-
&self.data[start..self.position]
695+
if self.reader.len() < bytes {
696+
self.panic_insufficient_data();
697+
}
698+
let slice = self.reader.as_slice();
699+
self.reader.advance_by(bytes).unwrap();
700+
&slice[..bytes]
680701
}
681702
}
682703

compiler/rustc_serialize/src/serialize.rs

+5
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,11 @@ pub trait Decoder {
7878
fn read_char(&mut self) -> char;
7979
fn read_str(&mut self) -> &str;
8080
fn read_raw_bytes(&mut self, len: usize) -> &[u8];
81+
82+
#[inline]
83+
fn read_raw_bytes_array<const N: usize>(&mut self) -> &[u8; N] {
84+
self.read_raw_bytes(N).try_into().unwrap()
85+
}
8186
}
8287

8388
/// Trait for types that can be serialized

compiler/rustc_serialize/tests/opaque.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -55,15 +55,15 @@ fn test_unit() {
5555
#[test]
5656
fn test_u8() {
5757
let mut vec = vec![];
58-
for i in u8::MIN..u8::MAX {
58+
for i in u8::MIN..=u8::MAX {
5959
vec.push(i);
6060
}
6161
check_round_trip(vec);
6262
}
6363

6464
#[test]
6565
fn test_u16() {
66-
for i in u16::MIN..u16::MAX {
66+
for i in u16::MIN..=u16::MAX {
6767
check_round_trip(vec![1, 2, 3, i, i, i]);
6868
}
6969
}
@@ -86,15 +86,15 @@ fn test_usize() {
8686
#[test]
8787
fn test_i8() {
8888
let mut vec = vec![];
89-
for i in i8::MIN..i8::MAX {
89+
for i in i8::MIN..=i8::MAX {
9090
vec.push(i);
9191
}
9292
check_round_trip(vec);
9393
}
9494

9595
#[test]
9696
fn test_i16() {
97-
for i in i16::MIN..i16::MAX {
97+
for i in i16::MIN..=i16::MAX {
9898
check_round_trip(vec![-1, 2, -3, i, i, i, 2]);
9999
}
100100
}

0 commit comments

Comments
 (0)