From 1460667ec7dc55ed77de6a633b7e60b8fd45fdb9 Mon Sep 17 00:00:00 2001 From: Andrea Canciani Date: Tue, 19 Jan 2016 11:21:48 +0100 Subject: [PATCH 1/4] Extract stepping from `EscapeUnicode::next` Extract a function that updates the iterator state and returns the result of an arbitrary step of iteration. This implements the same logic as `next`, but it can be shared with `nth`. --- src/libcore/char.rs | 79 ++++++++++++++++++++++++++++----------------- 1 file changed, 49 insertions(+), 30 deletions(-) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index d80b456181ae4..cb43b3051e9ee 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -429,35 +429,10 @@ impl Iterator for EscapeUnicode { type Item = char; fn next(&mut self) -> Option { - match self.state { - EscapeUnicodeState::Backslash => { - self.state = EscapeUnicodeState::Type; - Some('\\') - } - EscapeUnicodeState::Type => { - self.state = EscapeUnicodeState::LeftBrace; - Some('u') - } - EscapeUnicodeState::LeftBrace => { - self.state = EscapeUnicodeState::Value; - Some('{') - } - EscapeUnicodeState::Value => { - let hex_digit = ((self.c as u32) >> (self.hex_digit_idx * 4)) & 0xf; - let c = from_digit(hex_digit, 16).unwrap(); - if self.hex_digit_idx == 0 { - self.state = EscapeUnicodeState::RightBrace; - } else { - self.hex_digit_idx -= 1; - } - Some(c) - } - EscapeUnicodeState::RightBrace => { - self.state = EscapeUnicodeState::Done; - Some('}') - } - EscapeUnicodeState::Done => None, - } + let state = self.state_len(); + let hex_digit_idx = self.hex_digit_idx; + + self.step(state, hex_digit_idx) } #[inline] @@ -488,8 +463,15 @@ impl Iterator for EscapeUnicode { impl ExactSizeIterator for EscapeUnicode { #[inline] fn len(&self) -> usize { + self.hex_digit_idx + self.state_len() + } +} + +impl EscapeUnicode { + #[inline] + fn state_len(&self) -> usize { // The match is a single memory access with no branching - self.hex_digit_idx + match self.state { + match self.state { EscapeUnicodeState::Done => 0, EscapeUnicodeState::RightBrace => 1, EscapeUnicodeState::Value => 2, @@ -498,6 +480,43 @@ impl ExactSizeIterator for EscapeUnicode { EscapeUnicodeState::Backslash => 5, } } + + #[inline] + fn step(&mut self, state: usize, hex_digit_idx: usize) -> Option { + self.hex_digit_idx = hex_digit_idx; + + match state { + 5 => { + self.state = EscapeUnicodeState::Type; + Some('\\') + } + 4 => { + self.state = EscapeUnicodeState::LeftBrace; + Some('u') + } + 3 => { + self.state = EscapeUnicodeState::Value; + Some('{') + } + 2 => { + self.state = if hex_digit_idx == 0 { + EscapeUnicodeState::RightBrace + } else { + self.hex_digit_idx -= 1; + EscapeUnicodeState::Value + }; + from_digit(((self.c as u32) >> (hex_digit_idx * 4)) & 0xf, 16) + } + 1 => { + self.state = EscapeUnicodeState::Done; + Some('}') + } + _ => { + self.state = EscapeUnicodeState::Done; + None + } + } + } } /// An iterator that yields the literal escape code of a `char`. From 21bb7b863b0846c5a46cbaa172fb8cbbd4001452 Mon Sep 17 00:00:00 2001 From: Andrea Canciani Date: Tue, 19 Jan 2016 11:22:00 +0100 Subject: [PATCH 2/4] Implement `EscapeUnicode::nth` as a step from the appropriate state. Part of #24214. --- src/libcore/char.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index cb43b3051e9ee..629dc5d48b13b 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -446,6 +446,23 @@ impl Iterator for EscapeUnicode { self.len() } + fn nth(&mut self, n: usize) -> Option { + let remaining = self.len().saturating_sub(n); + + // hex_digit_idx = (number of hex digits still to be emitted) - 1 + // It can be computed from the remaining number of items by keeping + // into account that: + // - hex_digit_idx can never increase + // - the last 2 items (last hex digit, '}') are not counted in hex_digit_idx + let hex_digit_idx = ::cmp::min(self.hex_digit_idx, remaining.saturating_sub(2)); + + // state = number of items to be emitted for the state (as per state_len()) + // It can be computed because (remaining number of items) = state + hex_digit_idx + let state = remaining - hex_digit_idx; + + self.step(state, hex_digit_idx) + } + fn last(self) -> Option { match self.state { EscapeUnicodeState::Done => None, From 0c71e95efdfe76352608ccfccfbdea9a21103044 Mon Sep 17 00:00:00 2001 From: Andrea Canciani Date: Wed, 20 Jan 2016 14:17:34 +0100 Subject: [PATCH 3/4] Use the same structure for non-Unicode variants of `EscapeDefaultState` This makes it easier to have a unique path for handling all of them. --- src/libcore/char.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 629dc5d48b13b..58df6ff7660d6 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -551,7 +551,7 @@ pub struct EscapeDefault { #[derive(Clone, Debug)] enum EscapeDefaultState { - Done, + Done(char), Char(char), Backslash(char), Unicode(EscapeUnicode), @@ -568,10 +568,10 @@ impl Iterator for EscapeDefault { Some('\\') } EscapeDefaultState::Char(c) => { - self.state = EscapeDefaultState::Done; + self.state = EscapeDefaultState::Done(c); Some(c) } - EscapeDefaultState::Done => None, + EscapeDefaultState::Done(_) => None, EscapeDefaultState::Unicode(ref mut iter) => iter.next(), } } @@ -594,15 +594,15 @@ impl Iterator for EscapeDefault { Some('\\') }, EscapeDefaultState::Backslash(c) if n == 1 => { - self.state = EscapeDefaultState::Done; + self.state = EscapeDefaultState::Done(c); Some(c) }, - EscapeDefaultState::Backslash(_) => { - self.state = EscapeDefaultState::Done; + EscapeDefaultState::Backslash(c) => { + self.state = EscapeDefaultState::Done(c); None }, EscapeDefaultState::Char(c) => { - self.state = EscapeDefaultState::Done; + self.state = EscapeDefaultState::Done(c); if n == 0 { Some(c) @@ -610,7 +610,7 @@ impl Iterator for EscapeDefault { None } }, - EscapeDefaultState::Done => return None, + EscapeDefaultState::Done(_) => return None, EscapeDefaultState::Unicode(ref mut i) => return i.nth(n), } } @@ -618,7 +618,7 @@ impl Iterator for EscapeDefault { fn last(self) -> Option { match self.state { EscapeDefaultState::Unicode(iter) => iter.last(), - EscapeDefaultState::Done => None, + EscapeDefaultState::Done(_) => None, EscapeDefaultState::Backslash(c) | EscapeDefaultState::Char(c) => Some(c), } } @@ -628,7 +628,7 @@ impl Iterator for EscapeDefault { impl ExactSizeIterator for EscapeDefault { fn len(&self) -> usize { match self.state { - EscapeDefaultState::Done => 0, + EscapeDefaultState::Done(_) => 0, EscapeDefaultState::Char(_) => 1, EscapeDefaultState::Backslash(_) => 2, EscapeDefaultState::Unicode(ref iter) => iter.len(), From 5d5b35c2ffc38765df7336f7330fa59733e89b46 Mon Sep 17 00:00:00 2001 From: Andrea Canciani Date: Wed, 20 Jan 2016 14:19:32 +0100 Subject: [PATCH 4/4] Unify `EscapeDefault::next` and `EscapeDefault::nth` by extracting a shared `step` function. --- src/libcore/char.rs | 68 +++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 36 deletions(-) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 58df6ff7660d6..6ca29bc61d269 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -562,17 +562,10 @@ impl Iterator for EscapeDefault { type Item = char; fn next(&mut self) -> Option { - match self.state { - EscapeDefaultState::Backslash(c) => { - self.state = EscapeDefaultState::Char(c); - Some('\\') - } - EscapeDefaultState::Char(c) => { - self.state = EscapeDefaultState::Done(c); - Some(c) - } - EscapeDefaultState::Done(_) => None, - EscapeDefaultState::Unicode(ref mut iter) => iter.next(), + if let EscapeDefaultState::Unicode(ref mut iter) = self.state { + iter.next() + } else { + self.step(0) } } @@ -588,31 +581,7 @@ impl Iterator for EscapeDefault { } fn nth(&mut self, n: usize) -> Option { - match self.state { - EscapeDefaultState::Backslash(c) if n == 0 => { - self.state = EscapeDefaultState::Char(c); - Some('\\') - }, - EscapeDefaultState::Backslash(c) if n == 1 => { - self.state = EscapeDefaultState::Done(c); - Some(c) - }, - EscapeDefaultState::Backslash(c) => { - self.state = EscapeDefaultState::Done(c); - None - }, - EscapeDefaultState::Char(c) => { - self.state = EscapeDefaultState::Done(c); - - if n == 0 { - Some(c) - } else { - None - } - }, - EscapeDefaultState::Done(_) => return None, - EscapeDefaultState::Unicode(ref mut i) => return i.nth(n), - } + self.step(n) } fn last(self) -> Option { @@ -636,6 +605,33 @@ impl ExactSizeIterator for EscapeDefault { } } +impl EscapeDefault { + #[inline] + fn step(&mut self, n: usize) -> Option { + let (remaining, c) = match self.state { + EscapeDefaultState::Done(c) => (0usize, c), + EscapeDefaultState::Char(c) => (1, c), + EscapeDefaultState::Backslash(c) => (2, c), + EscapeDefaultState::Unicode(ref mut iter) => return iter.nth(n), + }; + + match remaining.saturating_sub(n) { + 2 => { + self.state = EscapeDefaultState::Char(c); + Some('\\') + } + 1 => { + self.state = EscapeDefaultState::Done(c); + Some(c) + } + _ => { + self.state = EscapeDefaultState::Done(c); + None + } + } + } +} + /// An iterator over `u8` entries represending the UTF-8 encoding of a `char` /// value. ///