diff --git a/library/std/src/io/mod.rs b/library/std/src/io/mod.rs index f4f2e3f243457..2cbdd027036d8 100644 --- a/library/std/src/io/mod.rs +++ b/library/std/src/io/mod.rs @@ -1923,6 +1923,28 @@ fn read_until(r: &mut R, delim: u8, buf: &mut Vec) -> R } } +fn skip_until(r: &mut R, delim: u8) -> Result { + let mut read = 0; + loop { + let (done, used) = { + let available = match r.fill_buf() { + Ok(n) => n, + Err(ref e) if e.kind() == ErrorKind::Interrupted => continue, + Err(e) => return Err(e), + }; + match memchr::memchr(delim, available) { + Some(i) => (true, i + 1), + None => (false, available.len()), + } + }; + r.consume(used); + read += used; + if done || used == 0 { + return Ok(read); + } + } +} + /// A `BufRead` is a type of `Read`er which has an internal buffer, allowing it /// to perform extra ways of reading. /// @@ -2126,6 +2148,68 @@ pub trait BufRead: Read { read_until(self, byte, buf) } + /// Skip all bytes until the delimiter `byte` or EOF is reached. + /// + /// This function will read (and discard) bytes from the underlying stream until the + /// delimiter or EOF is found. + /// + /// If successful, this function will return the total number of bytes read, + /// including the delimiter byte. + /// + /// This is useful for efficiently skipping data such as NUL-terminated strings + /// in binary file formats without buffering. + /// + /// This function is blocking and should be used carefully: it is possible for + /// an attacker to continuously send bytes without ever sending the delimiter + /// or EOF. + /// + /// # Errors + /// + /// This function will ignore all instances of [`ErrorKind::Interrupted`] and + /// will otherwise return any errors returned by [`fill_buf`]. + /// + /// If an I/O error is encountered then all bytes read so far will be + /// present in `buf` and its length will have been adjusted appropriately. + /// + /// [`fill_buf`]: BufRead::fill_buf + /// + /// # Examples + /// + /// [`std::io::Cursor`][`Cursor`] is a type that implements `BufRead`. In + /// this example, we use [`Cursor`] to read some NUL-terminated information + /// about Ferris from a binary string, skipping the fun fact: + /// + /// ``` + /// #![feature(bufread_skip_until)] + /// + /// use std::io::{self, BufRead}; + /// + /// let mut cursor = io::Cursor::new(b"Ferris\0Likes long walks on the beach\0Crustacean\0"); + /// + /// // read name + /// let mut name = Vec::new(); + /// let num_bytes = cursor.read_until(b'\0', &mut name) + /// .expect("reading from cursor won't fail"); + /// assert_eq!(num_bytes, 7); + /// assert_eq!(name, b"Ferris\0"); + /// + /// // skip fun fact + /// let num_bytes = cursor.skip_until(b'\0') + /// .expect("reading from cursor won't fail"); + /// assert_eq!(num_bytes, 30); + /// + /// // read animal type + /// let mut animal = Vec::new(); + /// let num_bytes = cursor.read_until(b'\0', &mut animal) + /// .expect("reading from cursor won't fail"); + /// assert_eq!(num_bytes, 11); + /// assert_eq!(animal, b"Crustacean\0"); + /// ``` + #[unstable(feature = "bufread_skip_until", issue = "111735")] + fn skip_until(&mut self, byte: u8) -> Result { + skip_until(self, byte) + } + /// Read all bytes until a newline (the `0xA` byte) is reached, and append /// them to the provided buffer. You do not need to clear the buffer before /// appending. diff --git a/library/std/src/io/tests.rs b/library/std/src/io/tests.rs index d5a8c93b0ce9f..4c5f86fe43163 100644 --- a/library/std/src/io/tests.rs +++ b/library/std/src/io/tests.rs @@ -25,6 +25,36 @@ fn read_until() { assert_eq!(v, []); } +#[test] +fn skip_until() { + let bytes: &[u8] = b"read\0ignore\0read\0ignore\0read\0ignore\0"; + let mut reader = BufReader::new(bytes); + + // read from the bytes, alternating between + // consuming `read\0`s and skipping `ignore\0`s + loop { + // consume `read\0` + let mut out = Vec::new(); + let read = reader.read_until(0, &mut out).unwrap(); + if read == 0 { + // eof + break; + } else { + assert_eq!(out, b"read\0"); + assert_eq!(read, b"read\0".len()); + } + + // skip past `ignore\0` + let skipped = reader.skip_until(0).unwrap(); + assert_eq!(skipped, b"ignore\0".len()); + } + + // ensure we are at the end of the byte slice and that we can skip no further + // also ensure skip_until matches the behavior of read_until at EOF + let skipped = reader.skip_until(0).unwrap(); + assert_eq!(skipped, 0); +} + #[test] fn split() { let buf = Cursor::new(&b"12"[..]);