From 73c4839dfa606f38d13afea37d3419f323945937 Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Wed, 8 Oct 2014 17:03:04 -0700 Subject: [PATCH 01/14] core: Add from_u32 to the Char trait This is the only free function not part of the trait. --- src/libcore/char.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index f507556909c8f..3f61df5d3b529 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -270,6 +270,9 @@ pub trait Char { /// Fails if given a radix > 36. fn from_digit(num: uint, radix: uint) -> Option; + /// Converts from `u32` to a `char` + fn from_u32(i: u32) -> Option; + /// Returns the hexadecimal Unicode escape of a character. /// /// The rules are as follows: @@ -319,6 +322,9 @@ impl Char for char { fn from_digit(num: uint, radix: uint) -> Option { from_digit(num, radix) } + #[inline] + fn from_u32(i: u32) -> Option { from_u32(i) } + fn escape_unicode(&self, f: |char|) { escape_unicode(*self, f) } fn escape_default(&self, f: |char|) { escape_default(*self, f) } From e56dc519a795f831114c628c927e12c8d54b91d8 Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Wed, 8 Oct 2014 17:05:12 -0700 Subject: [PATCH 02/14] core: Mark Char trait experimental --- src/libcore/char.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 3f61df5d3b529..39d250f26b705 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -229,6 +229,7 @@ pub fn len_utf8_bytes(c: char) -> uint { } /// Basic `char` manipulations. +#[experimental = "trait organization may change"] pub trait Char { /// Checks if a `char` parses as a numeric digit in the given radix. /// @@ -315,6 +316,7 @@ pub trait Char { fn encode_utf16(&self, dst: &mut [u16]) -> Option; } +#[experimental = "trait is experimental"] impl Char for char { fn is_digit_radix(&self, radix: uint) -> bool { is_digit_radix(*self, radix) } From 9f5e4e2974b5392d10c44f31f4415a8e3966d0da Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Wed, 8 Oct 2014 17:06:19 -0700 Subject: [PATCH 03/14] char: Mark the MAX constant stable --- src/libcore/char.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 39d250f26b705..2113ef78c2c4e 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -63,6 +63,7 @@ static MAX_THREE_B: u32 = 0x10000u32; */ /// The highest valid code point +#[stable] pub const MAX: char = '\U0010ffff'; /// Converts from `u32` to a `char` From f0cc17b65f000c7e91dafc77563c313ba4b3ba74 Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Wed, 8 Oct 2014 17:15:27 -0700 Subject: [PATCH 04/14] unicode: Rename UnicodeChar::is_digit to is_numeric 'Numeric' is the proper name of the unicode character class, and this frees up the word 'digit' for ascii use in libcore. Since I'm going to rename `Char::is_digit_radix` to `is_digit`, I am not leaving a deprecated method in place, because that would just cause name clashes, as both `Char` and `UnicodeChar` are in the prelude. [breaking-change] --- src/compiletest/runtest.rs | 2 +- src/libcollections/str.rs | 6 +++--- src/libcore/str.rs | 14 +++++++------- src/libcoretest/char.rs | 12 ++++++------ src/libhexfloat/lib.rs | 2 +- src/librustc/lint/builtin.rs | 2 +- src/libstd/ascii.rs | 8 ++++---- src/libstd/rt/backtrace.rs | 4 ++-- src/libunicode/u_char.rs | 4 ++-- 9 files changed, 27 insertions(+), 27 deletions(-) diff --git a/src/compiletest/runtest.rs b/src/compiletest/runtest.rs index ff6b1b1533324..dd0a14dfcdbc7 100644 --- a/src/compiletest/runtest.rs +++ b/src/compiletest/runtest.rs @@ -1515,7 +1515,7 @@ fn _arm_exec_compiled_test(config: &Config, let mut exitcode: int = 0; for c in exitcode_out.as_slice().chars() { - if !c.is_digit() { break; } + if !c.is_numeric() { break; } exitcode = exitcode * 10 + match c { '0' ... '9' => c as int - ('0' as int), _ => 101, diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs index f49371b8e8862..1c40a299b9e7d 100644 --- a/src/libcollections/str.rs +++ b/src/libcollections/str.rs @@ -1273,7 +1273,7 @@ mod tests { assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11"); let chars: &[char] = &['1', '2']; assert_eq!("12foo1bar12".trim_left_chars(chars), "foo1bar12"); - assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_digit()), "foo1bar123"); + assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_numeric()), "foo1bar123"); } #[test] @@ -1288,7 +1288,7 @@ mod tests { assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar"); let chars: &[char] = &['1', '2']; assert_eq!("12foo1bar12".trim_right_chars(chars), "12foo1bar"); - assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_digit()), "123foo1bar"); + assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_numeric()), "123foo1bar"); } #[test] @@ -1303,7 +1303,7 @@ mod tests { assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar"); let chars: &[char] = &['1', '2']; assert_eq!("12foo1bar12".trim_chars(chars), "foo1bar"); - assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_digit()), "foo1bar"); + assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_numeric()), "foo1bar"); } #[test] diff --git a/src/libcore/str.rs b/src/libcore/str.rs index e8cd93ba7dc42..4cd291b0ae0b2 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -1266,7 +1266,7 @@ pub trait StrSlice<'a> { /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect(); /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]); /// - /// let v: Vec<&str> = "abc1def2ghi".split(|c: char| c.is_digit()).collect(); + /// let v: Vec<&str> = "abc1def2ghi".split(|c: char| c.is_numeric()).collect(); /// assert_eq!(v, vec!["abc", "def", "ghi"]); /// /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect(); @@ -1287,7 +1287,7 @@ pub trait StrSlice<'a> { /// let v: Vec<&str> = "Mary had a little lambda".splitn(2, ' ').collect(); /// assert_eq!(v, vec!["Mary", "had", "a little lambda"]); /// - /// let v: Vec<&str> = "abc1def2ghi".splitn(1, |c: char| c.is_digit()).collect(); + /// let v: Vec<&str> = "abc1def2ghi".splitn(1, |c: char| c.is_numeric()).collect(); /// assert_eq!(v, vec!["abc", "def2ghi"]); /// /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(2, 'X').collect(); @@ -1319,7 +1319,7 @@ pub trait StrSlice<'a> { /// let v: Vec<&str> = "Mary had a little lamb".split(' ').rev().collect(); /// assert_eq!(v, vec!["lamb", "little", "a", "had", "Mary"]); /// - /// let v: Vec<&str> = "abc1def2ghi".split(|c: char| c.is_digit()).rev().collect(); + /// let v: Vec<&str> = "abc1def2ghi".split(|c: char| c.is_numeric()).rev().collect(); /// assert_eq!(v, vec!["ghi", "def", "abc"]); /// /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').rev().collect(); @@ -1337,7 +1337,7 @@ pub trait StrSlice<'a> { /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(2, ' ').collect(); /// assert_eq!(v, vec!["lamb", "little", "Mary had a"]); /// - /// let v: Vec<&str> = "abc1def2ghi".rsplitn(1, |c: char| c.is_digit()).collect(); + /// let v: Vec<&str> = "abc1def2ghi".rsplitn(1, |c: char| c.is_numeric()).collect(); /// assert_eq!(v, vec!["ghi", "abc1def"]); /// /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(2, 'X').collect(); @@ -1547,7 +1547,7 @@ pub trait StrSlice<'a> { /// assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar") /// let x: &[_] = &['1', '2']; /// assert_eq!("12foo1bar12".trim_chars(x), "foo1bar") - /// assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_digit()), "foo1bar") + /// assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_numeric()), "foo1bar") /// ``` fn trim_chars(&self, to_trim: C) -> &'a str; @@ -1563,7 +1563,7 @@ pub trait StrSlice<'a> { /// assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11") /// let x: &[_] = &['1', '2']; /// assert_eq!("12foo1bar12".trim_left_chars(x), "foo1bar12") - /// assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_digit()), "foo1bar123") + /// assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_numeric()), "foo1bar123") /// ``` fn trim_left_chars(&self, to_trim: C) -> &'a str; @@ -1579,7 +1579,7 @@ pub trait StrSlice<'a> { /// assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar") /// let x: &[_] = &['1', '2']; /// assert_eq!("12foo1bar12".trim_right_chars(x), "12foo1bar") - /// assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_digit()), "123foo1bar") + /// assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_numeric()), "123foo1bar") /// ``` fn trim_right_chars(&self, to_trim: C) -> &'a str; diff --git a/src/libcoretest/char.rs b/src/libcoretest/char.rs index 8807756d01b7e..8b80a4493eebd 100644 --- a/src/libcoretest/char.rs +++ b/src/libcoretest/char.rs @@ -105,12 +105,12 @@ fn test_is_control() { #[test] fn test_is_digit() { - assert!('2'.is_digit()); - assert!('7'.is_digit()); - assert!(!'c'.is_digit()); - assert!(!'i'.is_digit()); - assert!(!'z'.is_digit()); - assert!(!'Q'.is_digit()); + assert!('2'.is_numeric()); + assert!('7'.is_numeric()); + assert!(!'c'.is_numeric()); + assert!(!'i'.is_numeric()); + assert!(!'z'.is_numeric()); + assert!(!'Q'.is_numeric()); } #[test] diff --git a/src/libhexfloat/lib.rs b/src/libhexfloat/lib.rs index 8335cc16d649f..4ffab6fba099d 100644 --- a/src/libhexfloat/lib.rs +++ b/src/libhexfloat/lib.rs @@ -93,7 +93,7 @@ fn hex_float_lit_err(s: &str) -> Option<(uint, String)> { } i+=1; if chars.peek() == Some(&'-') { chars.next(); i+= 1 } let mut e_len = 0i; - for _ in chars.take_while(|c| c.is_digit()) { chars.next(); i+=1; e_len += 1} + for _ in chars.take_while(|c| c.is_numeric()) { chars.next(); i+=1; e_len += 1} if e_len == 0 { return Some((i, "Expected exponent digits".to_string())); } diff --git a/src/librustc/lint/builtin.rs b/src/librustc/lint/builtin.rs index 988b128e31d5c..942d1ab96aa87 100644 --- a/src/librustc/lint/builtin.rs +++ b/src/librustc/lint/builtin.rs @@ -861,7 +861,7 @@ impl NonSnakeCase { let mut allow_underscore = true; ident.chars().all(|c| { allow_underscore = match c { - c if c.is_lowercase() || c.is_digit() => true, + c if c.is_lowercase() || c.is_numeric() => true, '_' if allow_underscore => false, _ => return false, }; diff --git a/src/libstd/ascii.rs b/src/libstd/ascii.rs index 71d38ac66597f..9dd6bf709349e 100644 --- a/src/libstd/ascii.rs +++ b/src/libstd/ascii.rs @@ -592,10 +592,10 @@ mod tests { assert_eq!('`'.to_ascii().to_uppercase().to_char(), '`'); assert_eq!('{'.to_ascii().to_uppercase().to_char(), '{'); - assert!('0'.to_ascii().is_digit()); - assert!('9'.to_ascii().is_digit()); - assert!(!'/'.to_ascii().is_digit()); - assert!(!':'.to_ascii().is_digit()); + assert!('0'.to_ascii().is_numeric()); + assert!('9'.to_ascii().is_numeric()); + assert!(!'/'.to_ascii().is_numeric()); + assert!(!':'.to_ascii().is_numeric()); assert!((0x1fu8).to_ascii().is_control()); assert!(!' '.to_ascii().is_control()); diff --git a/src/libstd/rt/backtrace.rs b/src/libstd/rt/backtrace.rs index e05e533be56c5..250a3e7efafda 100644 --- a/src/libstd/rt/backtrace.rs +++ b/src/libstd/rt/backtrace.rs @@ -73,7 +73,7 @@ fn demangle(writer: &mut Writer, s: &str) -> IoResult<()> { while valid { let mut i = 0; for c in chars { - if c.is_digit() { + if c.is_numeric() { i = i * 10 + c as uint - '0' as uint; } else { break @@ -103,7 +103,7 @@ fn demangle(writer: &mut Writer, s: &str) -> IoResult<()> { first = false; } let mut rest = s; - while rest.char_at(0).is_digit() { + while rest.char_at(0).is_numeric() { rest = rest.slice_from(1); } let i: uint = from_str(s.slice_to(s.len() - rest.len())).unwrap(); diff --git a/src/libunicode/u_char.rs b/src/libunicode/u_char.rs index f725cdba64ef5..1dec001c3d026 100644 --- a/src/libunicode/u_char.rs +++ b/src/libunicode/u_char.rs @@ -217,7 +217,7 @@ pub trait UnicodeChar { fn is_control(&self) -> bool; /// Indicates whether the character is numeric (Nd, Nl, or No). - fn is_digit(&self) -> bool; + fn is_numeric(&self) -> bool; /// Converts a character to its lowercase equivalent. /// @@ -279,7 +279,7 @@ impl UnicodeChar for char { fn is_control(&self) -> bool { is_control(*self) } - fn is_digit(&self) -> bool { is_digit(*self) } + fn is_numeric(&self) -> bool { is_digit(*self) } fn to_lowercase(&self) -> char { to_lowercase(*self) } From bce3b67927b536cffdc878774da1091b1db9080c Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Wed, 8 Oct 2014 17:24:15 -0700 Subject: [PATCH 05/14] core: Rename Char::is_digit_radix to is_digit This fits the naming of `to_digit` and `from_digit`. Leave the old name deprecated. --- src/libcore/char.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 2113ef78c2c4e..b331ae148463c 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -245,8 +245,24 @@ pub trait Char { /// # Failure /// /// Fails if given a radix > 36. + #[deprecated = "use is_digit"] fn is_digit_radix(&self, radix: uint) -> bool; + /// Checks if a `char` parses as a numeric digit in the given radix. + /// + /// Compared to `is_digit()`, this function only recognizes the characters + /// `0-9`, `a-z` and `A-Z`. + /// + /// # Return value + /// + /// Returns `true` if `c` is a valid digit under `radix`, and `false` + /// otherwise. + /// + /// # Failure + /// + /// Fails if given a radix > 36. + fn is_digit(&self, radix: uint) -> bool; + /// Converts a character to the corresponding digit. /// /// # Return value @@ -319,8 +335,11 @@ pub trait Char { #[experimental = "trait is experimental"] impl Char for char { + #[deprecated = "use is_digit"] fn is_digit_radix(&self, radix: uint) -> bool { is_digit_radix(*self, radix) } + fn is_digit(&self, radix: uint) -> bool { is_digit_radix(*self, radix) } + fn to_digit(&self, radix: uint) -> Option { to_digit(*self, radix) } fn from_digit(num: uint, radix: uint) -> Option { from_digit(num, radix) } From 1668e0b743647320352473fad08e978fcf600712 Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Wed, 8 Oct 2014 17:33:02 -0700 Subject: [PATCH 06/14] core: Rename Char::len_utf8_bytes to Char::len_utf8 "bytes" is redundant. Deprecate the old. --- src/libcore/char.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index b331ae148463c..a1e8a541e7ea7 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -316,8 +316,13 @@ pub trait Char { /// Returns the amount of bytes this character would need if encoded in /// UTF-8. + #[deprecated = "use len_utf8"] fn len_utf8_bytes(&self) -> uint; + /// Returns the amount of bytes this character would need if encoded in + /// UTF-8. + fn len_utf8(&self) -> uint; + /// Encodes this character as UTF-8 into the provided byte buffer, /// and then returns the number of bytes written. /// @@ -352,8 +357,12 @@ impl Char for char { fn escape_default(&self, f: |char|) { escape_default(*self, f) } #[inline] + #[deprecated = "use len_utf8"] fn len_utf8_bytes(&self) -> uint { len_utf8_bytes(*self) } + #[inline] + fn len_utf8(&self) -> uint { len_utf8_bytes(*self) } + #[inline] fn encode_utf8<'a>(&self, dst: &'a mut [u8]) -> Option { // Marked #[inline] to allow llvm optimizing it away From 2ddfce0d61bdfb688c2b6ae724a1ce091a0d895f Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Wed, 8 Oct 2014 17:40:31 -0700 Subject: [PATCH 07/14] core: Add Char::len_utf16 Missing method to pair with len_utf8. --- src/libcore/char.rs | 10 ++++++++++ src/libcoretest/char.rs | 8 ++++++++ 2 files changed, 18 insertions(+) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index a1e8a541e7ea7..7f646e1b34f00 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -323,6 +323,10 @@ pub trait Char { /// UTF-8. fn len_utf8(&self) -> uint; + /// Returns the amount of bytes this character would need if encoded in + /// UTF-16. + fn len_utf16(&self) -> uint; + /// Encodes this character as UTF-8 into the provided byte buffer, /// and then returns the number of bytes written. /// @@ -363,6 +367,12 @@ impl Char for char { #[inline] fn len_utf8(&self) -> uint { len_utf8_bytes(*self) } + #[inline] + fn len_utf16(&self) -> uint { + let ch = *self as u32; + if (ch & 0xFFFF_u32) == ch { 1 } else { 2 } + } + #[inline] fn encode_utf8<'a>(&self, dst: &'a mut [u8]) -> Option { // Marked #[inline] to allow llvm optimizing it away diff --git a/src/libcoretest/char.rs b/src/libcoretest/char.rs index 8b80a4493eebd..9cba57110d756 100644 --- a/src/libcoretest/char.rs +++ b/src/libcoretest/char.rs @@ -197,6 +197,14 @@ fn test_encode_utf16() { check('\U0001f4a9', [0xd83d, 0xdca9]); } +#[test] +fn test_len_utf16() { + assert!('x'.len_utf16() == 1); + assert!('\u00e9'.len_utf16() == 1); + assert!('\ua66e'.len_utf16() == 1); + assert!('\U0001f4a9'.len_utf16() == 2); +} + #[test] fn test_width() { assert_eq!('\x00'.width(false),Some(0)); From cf12e00fa43541d8bcc913c1046753c4da93e1ee Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Fri, 10 Oct 2014 14:55:11 -0700 Subject: [PATCH 08/14] core: Add stability attributes to char::from_digit and from_u32 For now we are preferring free functions for primitive ctors, so they are marked 'unstable' pending final decision. The methods on `Char` are 'deprecated'. --- src/libcore/char.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 7f646e1b34f00..bc027d98a06ff 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -68,6 +68,7 @@ pub const MAX: char = '\U0010ffff'; /// Converts from `u32` to a `char` #[inline] +#[unstable = "pending decisions about costructors for primitives"] pub fn from_u32(i: u32) -> Option { // catch out-of-bounds and surrogates if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) { @@ -146,6 +147,7 @@ pub fn to_digit(c: char, radix: uint) -> Option { /// Fails if given an `radix` > 36. /// #[inline] +#[unstable = "pending decisions about costructors for primitives"] pub fn from_digit(num: uint, radix: uint) -> Option { if radix > 36 { fail!("from_digit: radix is too high (maximum 36)"); @@ -286,9 +288,11 @@ pub trait Char { /// # Failure /// /// Fails if given a radix > 36. + #[deprecated = "use the char::from_digit free function"] fn from_digit(num: uint, radix: uint) -> Option; /// Converts from `u32` to a `char` + #[deprecated = "use the char::from_u32 free function"] fn from_u32(i: u32) -> Option; /// Returns the hexadecimal Unicode escape of a character. @@ -351,9 +355,11 @@ impl Char for char { fn to_digit(&self, radix: uint) -> Option { to_digit(*self, radix) } + #[deprecated = "use the char::from_digit free function"] fn from_digit(num: uint, radix: uint) -> Option { from_digit(num, radix) } #[inline] + #[deprecated = "use the char::from_u32 free function"] fn from_u32(i: u32) -> Option { from_u32(i) } fn escape_unicode(&self, f: |char|) { escape_unicode(*self, f) } From 7da9791445e8604db896d92a7bbf8cec56464cfd Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Mon, 13 Oct 2014 10:54:18 -0700 Subject: [PATCH 09/14] core: Deprecated remaining free functions in `char` Prefer the methods. --- src/libcore/char.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index bc027d98a06ff..4aaf1011a5e7e 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -98,6 +98,7 @@ pub fn from_u32(i: u32) -> Option { /// This just wraps `to_digit()`. /// #[inline] +#[deprecated = "use the Char::is_digit method"] pub fn is_digit_radix(c: char, radix: uint) -> bool { match to_digit(c, radix) { Some(_) => true, @@ -120,6 +121,7 @@ pub fn is_digit_radix(c: char, radix: uint) -> bool { /// Fails if given a `radix` outside the range `[0..36]`. /// #[inline] +#[deprecated = "use the Char::to_digit method"] pub fn to_digit(c: char, radix: uint) -> Option { if radix > 36 { fail!("to_digit: radix is too high (maximum 36)"); @@ -174,6 +176,7 @@ pub fn from_digit(num: uint, radix: uint) -> Option { /// - chars in [0x100,0xffff] get 4-digit escapes: `\\uNNNN` /// - chars above 0x10000 get 8-digit escapes: `\\UNNNNNNNN` /// +#[deprecated = "use the Char::escape_unicode method"] pub fn escape_unicode(c: char, f: |char|) { // avoid calling str::to_str_radix because we don't really need to allocate // here. @@ -206,6 +209,7 @@ pub fn escape_unicode(c: char, f: |char|) { /// - Any other chars in the range [0x20,0x7e] are not escaped. /// - Any other chars are given hex Unicode escapes; see `escape_unicode`. /// +#[deprecated = "use the Char::escape_default method"] pub fn escape_default(c: char, f: |char|) { match c { '\t' => { f('\\'); f('t'); } @@ -221,6 +225,7 @@ pub fn escape_default(c: char, f: |char|) { /// Returns the amount of bytes this `char` would need if encoded in UTF-8 #[inline] +#[deprecated = "use the Char::len_utf8 method"] pub fn len_utf8_bytes(c: char) -> uint { let code = c as u32; match () { From 11f5fa426749f2ae8ad214648005b82042f9147c Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Mon, 13 Oct 2014 10:57:49 -0700 Subject: [PATCH 10/14] core: Mark remaining Char methods unstable The `Char` trait itself may go away in favor of primitive inherent methods. Still some questions about whether the preconditions are following the final error handling conventions. --- src/libcore/char.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 4aaf1011a5e7e..427f00c2fdd1d 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -268,6 +268,7 @@ pub trait Char { /// # Failure /// /// Fails if given a radix > 36. + #[unstable = "pending error conventions"] fn is_digit(&self, radix: uint) -> bool; /// Converts a character to the corresponding digit. @@ -281,6 +282,7 @@ pub trait Char { /// # Failure /// /// Fails if given a radix outside the range [0..36]. + #[unstable = "pending error conventions, trait organization"] fn to_digit(&self, radix: uint) -> Option; /// Converts a number to the character representing it. @@ -307,6 +309,7 @@ pub trait Char { /// * Characters in [0,0xff] get 2-digit escapes: `\\xNN` /// * Characters in [0x100,0xffff] get 4-digit escapes: `\\uNNNN`. /// * Characters above 0x10000 get 8-digit escapes: `\\UNNNNNNNN`. + #[unstable = "pending error conventions, trait organization"] fn escape_unicode(&self, f: |char|); /// Returns a 'default' ASCII and C++11-like literal escape of a @@ -321,6 +324,7 @@ pub trait Char { /// escaped. /// * Any other chars in the range [0x20,0x7e] are not escaped. /// * Any other chars are given hex Unicode escapes; see `escape_unicode`. + #[unstable = "pending error conventions, trait organization"] fn escape_default(&self, f: |char|); /// Returns the amount of bytes this character would need if encoded in @@ -330,10 +334,12 @@ pub trait Char { /// Returns the amount of bytes this character would need if encoded in /// UTF-8. + #[unstable = "pending trait organization"] fn len_utf8(&self) -> uint; /// Returns the amount of bytes this character would need if encoded in /// UTF-16. + #[unstable = "pending trait organization"] fn len_utf16(&self) -> uint; /// Encodes this character as UTF-8 into the provided byte buffer, @@ -341,6 +347,7 @@ pub trait Char { /// /// If the buffer is not large enough, nothing will be written into it /// and a `None` will be returned. + #[unstable = "pending trait organization"] fn encode_utf8(&self, dst: &mut [u8]) -> Option; /// Encodes this character as UTF-16 into the provided `u16` buffer, @@ -348,6 +355,7 @@ pub trait Char { /// /// If the buffer is not large enough, nothing will be written into it /// and a `None` will be returned. + #[unstable = "pending trait organization"] fn encode_utf16(&self, dst: &mut [u16]) -> Option; } @@ -356,8 +364,10 @@ impl Char for char { #[deprecated = "use is_digit"] fn is_digit_radix(&self, radix: uint) -> bool { is_digit_radix(*self, radix) } + #[unstable = "pending trait organization"] fn is_digit(&self, radix: uint) -> bool { is_digit_radix(*self, radix) } + #[unstable = "pending trait organization"] fn to_digit(&self, radix: uint) -> Option { to_digit(*self, radix) } #[deprecated = "use the char::from_digit free function"] @@ -367,8 +377,10 @@ impl Char for char { #[deprecated = "use the char::from_u32 free function"] fn from_u32(i: u32) -> Option { from_u32(i) } + #[unstable = "pending error conventions, trait organization"] fn escape_unicode(&self, f: |char|) { escape_unicode(*self, f) } + #[unstable = "pending error conventions, trait organization"] fn escape_default(&self, f: |char|) { escape_default(*self, f) } #[inline] @@ -376,15 +388,18 @@ impl Char for char { fn len_utf8_bytes(&self) -> uint { len_utf8_bytes(*self) } #[inline] + #[unstable = "pending trait organization"] fn len_utf8(&self) -> uint { len_utf8_bytes(*self) } #[inline] + #[unstable = "pending trait organization"] fn len_utf16(&self) -> uint { let ch = *self as u32; if (ch & 0xFFFF_u32) == ch { 1 } else { 2 } } #[inline] + #[unstable = "pending error conventions, trait organization"] fn encode_utf8<'a>(&self, dst: &'a mut [u8]) -> Option { // Marked #[inline] to allow llvm optimizing it away let code = *self as u32; @@ -412,6 +427,7 @@ impl Char for char { } #[inline] + #[unstable = "pending error conventions, trait organization"] fn encode_utf16(&self, dst: &mut [u16]) -> Option { // Marked #[inline] to allow llvm optimizing it away let mut ch = *self as u32; From a911b35505180f018c71e59b39f2b58cc2b96e14 Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Mon, 13 Oct 2014 13:03:42 -0700 Subject: [PATCH 11/14] Fix various deprecation warnings from char changes --- src/libcore/char.rs | 122 +++++++++++++++++++---------------- src/libcore/fmt/float.rs | 3 +- src/libdebug/repr.rs | 3 +- src/libfmt_macros/lib.rs | 2 +- src/libstd/num/strconv.rs | 5 +- src/libterm/terminfo/parm.rs | 3 +- 6 files changed, 74 insertions(+), 64 deletions(-) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 427f00c2fdd1d..cd6fe3c559c76 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -100,10 +100,7 @@ pub fn from_u32(i: u32) -> Option { #[inline] #[deprecated = "use the Char::is_digit method"] pub fn is_digit_radix(c: char, radix: uint) -> bool { - match to_digit(c, radix) { - Some(_) => true, - None => false, - } + c.is_digit(radix) } /// @@ -123,17 +120,7 @@ pub fn is_digit_radix(c: char, radix: uint) -> bool { #[inline] #[deprecated = "use the Char::to_digit method"] pub fn to_digit(c: char, radix: uint) -> Option { - if radix > 36 { - fail!("to_digit: radix is too high (maximum 36)"); - } - let val = match c { - '0' ... '9' => c as uint - ('0' as uint), - 'a' ... 'z' => c as uint + 10u - ('a' as uint), - 'A' ... 'Z' => c as uint + 10u - ('A' as uint), - _ => return None, - }; - if val < radix { Some(val) } - else { None } + c.to_digit(radix) } /// @@ -178,23 +165,7 @@ pub fn from_digit(num: uint, radix: uint) -> Option { /// #[deprecated = "use the Char::escape_unicode method"] pub fn escape_unicode(c: char, f: |char|) { - // avoid calling str::to_str_radix because we don't really need to allocate - // here. - f('\\'); - let pad = match () { - _ if c <= '\xff' => { f('x'); 2 } - _ if c <= '\uffff' => { f('u'); 4 } - _ => { f('U'); 8 } - }; - for offset in range_step::(4 * (pad - 1), -1, -4) { - let offset = offset as uint; - unsafe { - match ((c as i32) >> offset) & 0xf { - i @ 0 ... 9 => { f(transmute('0' as i32 + i)); } - i => { f(transmute('a' as i32 + (i - 10))); } - } - } - } + c.escape_unicode(f) } /// @@ -211,29 +182,14 @@ pub fn escape_unicode(c: char, f: |char|) { /// #[deprecated = "use the Char::escape_default method"] pub fn escape_default(c: char, f: |char|) { - match c { - '\t' => { f('\\'); f('t'); } - '\r' => { f('\\'); f('r'); } - '\n' => { f('\\'); f('n'); } - '\\' => { f('\\'); f('\\'); } - '\'' => { f('\\'); f('\''); } - '"' => { f('\\'); f('"'); } - '\x20' ... '\x7e' => { f(c); } - _ => c.escape_unicode(f), - } + c.escape_default(f) } /// Returns the amount of bytes this `char` would need if encoded in UTF-8 #[inline] #[deprecated = "use the Char::len_utf8 method"] pub fn len_utf8_bytes(c: char) -> uint { - let code = c as u32; - match () { - _ if code < MAX_ONE_B => 1u, - _ if code < MAX_TWO_B => 2u, - _ if code < MAX_THREE_B => 3u, - _ => 4u, - } + c.len_utf8() } /// Basic `char` manipulations. @@ -362,13 +318,30 @@ pub trait Char { #[experimental = "trait is experimental"] impl Char for char { #[deprecated = "use is_digit"] - fn is_digit_radix(&self, radix: uint) -> bool { is_digit_radix(*self, radix) } + fn is_digit_radix(&self, radix: uint) -> bool { self.is_digit(radix) } #[unstable = "pending trait organization"] - fn is_digit(&self, radix: uint) -> bool { is_digit_radix(*self, radix) } + fn is_digit(&self, radix: uint) -> bool { + match self.to_digit(radix) { + Some(_) => true, + None => false, + } + } #[unstable = "pending trait organization"] - fn to_digit(&self, radix: uint) -> Option { to_digit(*self, radix) } + fn to_digit(&self, radix: uint) -> Option { + if radix > 36 { + fail!("to_digit: radix is too high (maximum 36)"); + } + let val = match *self { + '0' ... '9' => *self as uint - ('0' as uint), + 'a' ... 'z' => *self as uint + 10u - ('a' as uint), + 'A' ... 'Z' => *self as uint + 10u - ('A' as uint), + _ => return None, + }; + if val < radix { Some(val) } + else { None } + } #[deprecated = "use the char::from_digit free function"] fn from_digit(num: uint, radix: uint) -> Option { from_digit(num, radix) } @@ -378,18 +351,55 @@ impl Char for char { fn from_u32(i: u32) -> Option { from_u32(i) } #[unstable = "pending error conventions, trait organization"] - fn escape_unicode(&self, f: |char|) { escape_unicode(*self, f) } + fn escape_unicode(&self, f: |char|) { + // avoid calling str::to_str_radix because we don't really need to allocate + // here. + f('\\'); + let pad = match () { + _ if *self <= '\xff' => { f('x'); 2 } + _ if *self <= '\uffff' => { f('u'); 4 } + _ => { f('U'); 8 } + }; + for offset in range_step::(4 * (pad - 1), -1, -4) { + let offset = offset as uint; + unsafe { + match ((*self as i32) >> offset) & 0xf { + i @ 0 ... 9 => { f(transmute('0' as i32 + i)); } + i => { f(transmute('a' as i32 + (i - 10))); } + } + } + } + } #[unstable = "pending error conventions, trait organization"] - fn escape_default(&self, f: |char|) { escape_default(*self, f) } + fn escape_default(&self, f: |char|) { + match *self { + '\t' => { f('\\'); f('t'); } + '\r' => { f('\\'); f('r'); } + '\n' => { f('\\'); f('n'); } + '\\' => { f('\\'); f('\\'); } + '\'' => { f('\\'); f('\''); } + '"' => { f('\\'); f('"'); } + '\x20' ... '\x7e' => { f(*self); } + _ => self.escape_unicode(f), + } + } #[inline] #[deprecated = "use len_utf8"] - fn len_utf8_bytes(&self) -> uint { len_utf8_bytes(*self) } + fn len_utf8_bytes(&self) -> uint { self.len_utf8() } #[inline] #[unstable = "pending trait organization"] - fn len_utf8(&self) -> uint { len_utf8_bytes(*self) } + fn len_utf8(&self) -> uint { + let code = *self as u32; + match () { + _ if code < MAX_ONE_B => 1u, + _ if code < MAX_TWO_B => 2u, + _ if code < MAX_THREE_B => 3u, + _ => 4u, + } + } #[inline] #[unstable = "pending trait organization"] diff --git a/src/libcore/fmt/float.rs b/src/libcore/fmt/float.rs index 343ab7cfd28b9..f86a0bbbf3ff6 100644 --- a/src/libcore/fmt/float.rs +++ b/src/libcore/fmt/float.rs @@ -11,6 +11,7 @@ #![allow(missing_doc)] use char; +use char::Char; use collections::Collection; use fmt; use iter::{range, DoubleEndedIterator}; @@ -220,7 +221,7 @@ pub fn float_to_str_bytes_common( // round the remaining ones. if limit_digits && dig == digit_count { let ascii2value = |chr: u8| { - char::to_digit(chr as char, radix).unwrap() + (chr as char).to_digit(radix).unwrap() }; let value2ascii = |val: uint| { char::from_digit(val, radix).unwrap() as u8 diff --git a/src/libdebug/repr.rs b/src/libdebug/repr.rs index e27816c816539..6a3c009964d19 100644 --- a/src/libdebug/repr.rs +++ b/src/libdebug/repr.rs @@ -14,7 +14,6 @@ More runtime type reflection */ -use std::char; use std::intrinsics::{Disr, Opaque, TyDesc, TyVisitor, get_tydesc, visit_tydesc}; use std::io; use std::mem; @@ -229,7 +228,7 @@ impl<'a> ReprVisitor<'a> { } '\x20'...'\x7e' => self.writer.write([ch as u8]), _ => { - char::escape_unicode(ch, |c| { + ch.escape_unicode(|c| { let _ = self.writer.write([c as u8]); }); Ok(()) diff --git a/src/libfmt_macros/lib.rs b/src/libfmt_macros/lib.rs index a9f34e1195ce6..f8d4534a4d4c9 100644 --- a/src/libfmt_macros/lib.rs +++ b/src/libfmt_macros/lib.rs @@ -406,7 +406,7 @@ impl<'a> Parser<'a> { loop { match self.cur.clone().next() { Some((_, c)) => { - match char::to_digit(c, 10) { + match c.to_digit(10) { Some(i) => { cur = cur * 10 + i; found = true; diff --git a/src/libstd/num/strconv.rs b/src/libstd/num/strconv.rs index 48ee7664c16ac..04f71fb7759e2 100644 --- a/src/libstd/num/strconv.rs +++ b/src/libstd/num/strconv.rs @@ -13,6 +13,7 @@ #![allow(missing_doc)] use char; +use char::Char; use clone::Clone; use collections::{Collection, MutableSeq}; use num::{NumCast, Zero, One, cast, Int}; @@ -618,7 +619,7 @@ pub fn from_str_bytes_common+ while i < len { let c = buf[i] as char; - match char::to_digit(c, radix) { + match c.to_digit(radix) { Some(digit) => { // shift accum one digit left accum = accum * radix_gen.clone(); @@ -673,7 +674,7 @@ pub fn from_str_bytes_common+ while i < len { let c = buf[i] as char; - match char::to_digit(c, radix) { + match c.to_digit(radix) { Some(digit) => { // Decrease power one order of magnitude power = power / radix_gen; diff --git a/src/libterm/terminfo/parm.rs b/src/libterm/terminfo/parm.rs index a1bce6e8e8b04..bba29420b6334 100644 --- a/src/libterm/terminfo/parm.rs +++ b/src/libterm/terminfo/parm.rs @@ -10,7 +10,6 @@ //! Parameterized string expansion -use std::char; use std::mem::replace; #[deriving(PartialEq)] @@ -293,7 +292,7 @@ pub fn expand(cap: &[u8], params: &[Param], vars: &mut Variables) }, PushParam => { // params are 1-indexed - stack.push(mparams[match char::to_digit(cur, 10) { + stack.push(mparams[match cur.to_digit(10) { Some(d) => d - 1, None => return Err("bad param number".to_string()) }].clone()); From bb2f7d4493488fd95a4ec60ab40a265710db63d6 Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Mon, 13 Oct 2014 16:59:26 -0700 Subject: [PATCH 12/14] core: Change Char::encode_utf8 and encode_utf16 to return iterators Consistent with current design guidelines, though it does result in a performance hit, particularly for multi-byte cases. test iter_1bytes ... bench: 341 ns/iter (+/- 90) test iter_4bytes ... bench: 944 ns/iter (+/- 204) test noiter_1bytes ... bench: 324 ns/iter (+/- 52) test noiter_4bytes ... bench: 510 ns/iter (+/- 227) [breaking-change] --- src/libcollections/string.rs | 24 ++++--- src/libcore/char.rs | 120 +++++++++++++++++++++++++---------- src/libcore/fmt/mod.rs | 20 ++++-- src/libcore/str.rs | 37 +++++++---- src/libcoretest/char.rs | 10 ++- src/libserialize/json.rs | 8 ++- src/libstd/io/mod.rs | 8 ++- 7 files changed, 150 insertions(+), 77 deletions(-) diff --git a/src/libcollections/string.rs b/src/libcollections/string.rs index 061064ff803d5..8622267483e80 100644 --- a/src/libcollections/string.rs +++ b/src/libcollections/string.rs @@ -19,8 +19,6 @@ use core::fmt; use core::mem; use core::ptr; use core::ops; -// FIXME: ICE's abound if you import the `Slice` type while importing `Slice` trait -use core::raw::Slice as RawSlice; use {Mutable, MutableSeq}; use hash; @@ -540,12 +538,13 @@ impl String { unsafe { // Attempt to not use an intermediate buffer by just pushing bytes // directly onto this string. - let slice = RawSlice { - data: self.vec.as_ptr().offset(cur_len as int), - len: 4, - }; - let used = ch.encode_utf8(mem::transmute(slice)).unwrap_or(0); - self.vec.set_len(cur_len + used); + let buf = self.vec.as_mut_ptr().offset(cur_len as int); + let mut used = 0; + for byte in ch.encode_utf8() { + *buf.offset(used) = byte; + used += 1; + } + self.vec.set_len(cur_len + (used as uint)); } } @@ -798,16 +797,15 @@ impl String { assert!(idx <= len); assert!(self.as_slice().is_char_boundary(idx)); self.vec.reserve_additional(4); - let mut bits = [0, ..4]; - let amt = ch.encode_utf8(bits).unwrap(); + let amt = ch.len_utf8(); unsafe { ptr::copy_memory(self.vec.as_mut_ptr().offset((idx + amt) as int), self.vec.as_ptr().offset(idx as int), len - idx); - ptr::copy_memory(self.vec.as_mut_ptr().offset(idx as int), - bits.as_ptr(), - amt); + for (i, byte) in ch.encode_utf8().enumerate() { + *self.vec.as_mut_ptr().offset((idx + i) as int) = byte + } self.vec.set_len(len + amt); } } diff --git a/src/libcore/char.rs b/src/libcore/char.rs index cd6fe3c559c76..0c8952d2a0d38 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -15,9 +15,10 @@ #![allow(non_snake_case)] #![doc(primitive = "char")] +use clone::Clone; use mem::transmute; use option::{None, Option, Some}; -use iter::range_step; +use iter::{range_step, Iterator}; use collections::Collection; // UTF-8 ranges and tags for encoding characters @@ -304,7 +305,7 @@ pub trait Char { /// If the buffer is not large enough, nothing will be written into it /// and a `None` will be returned. #[unstable = "pending trait organization"] - fn encode_utf8(&self, dst: &mut [u8]) -> Option; + fn encode_utf8(&self) -> Utf8CodeUnits; /// Encodes this character as UTF-16 into the provided `u16` buffer, /// and then returns the number of `u16`s written. @@ -312,7 +313,7 @@ pub trait Char { /// If the buffer is not large enough, nothing will be written into it /// and a `None` will be returned. #[unstable = "pending trait organization"] - fn encode_utf16(&self, dst: &mut [u16]) -> Option; + fn encode_utf16(&self) -> Utf16CodeUnits; } #[experimental = "trait is experimental"] @@ -410,49 +411,102 @@ impl Char for char { #[inline] #[unstable = "pending error conventions, trait organization"] - fn encode_utf8<'a>(&self, dst: &'a mut [u8]) -> Option { - // Marked #[inline] to allow llvm optimizing it away + fn encode_utf8(&self) -> Utf8CodeUnits { let code = *self as u32; - if code < MAX_ONE_B && dst.len() >= 1 { - dst[0] = code as u8; - Some(1) - } else if code < MAX_TWO_B && dst.len() >= 2 { - dst[0] = (code >> 6u & 0x1F_u32) as u8 | TAG_TWO_B; - dst[1] = (code & 0x3F_u32) as u8 | TAG_CONT; - Some(2) - } else if code < MAX_THREE_B && dst.len() >= 3 { - dst[0] = (code >> 12u & 0x0F_u32) as u8 | TAG_THREE_B; - dst[1] = (code >> 6u & 0x3F_u32) as u8 | TAG_CONT; - dst[2] = (code & 0x3F_u32) as u8 | TAG_CONT; - Some(3) - } else if dst.len() >= 4 { - dst[0] = (code >> 18u & 0x07_u32) as u8 | TAG_FOUR_B; - dst[1] = (code >> 12u & 0x3F_u32) as u8 | TAG_CONT; - dst[2] = (code >> 6u & 0x3F_u32) as u8 | TAG_CONT; - dst[3] = (code & 0x3F_u32) as u8 | TAG_CONT; - Some(4) + let (len, buf) = if code < MAX_ONE_B { + (1, [code as u8, 0, 0, 0]) + } else if code < MAX_TWO_B { + (2, [(code >> 6u & 0x1F_u32) as u8 | TAG_TWO_B, + (code & 0x3F_u32) as u8 | TAG_CONT, + 0, 0]) + } else if code < MAX_THREE_B { + (3, [(code >> 12u & 0x0F_u32) as u8 | TAG_THREE_B, + (code >> 6u & 0x3F_u32) as u8 | TAG_CONT, + (code & 0x3F_u32) as u8 | TAG_CONT, + 0]) } else { - None - } + (4, [(code >> 18u & 0x07_u32) as u8 | TAG_FOUR_B, + (code >> 12u & 0x3F_u32) as u8 | TAG_CONT, + (code >> 6u & 0x3F_u32) as u8 | TAG_CONT, + (code & 0x3F_u32) as u8 | TAG_CONT]) + }; + + Utf8CodeUnits { pos: 0, len: len, buf: buf } } #[inline] #[unstable = "pending error conventions, trait organization"] - fn encode_utf16(&self, dst: &mut [u16]) -> Option { + fn encode_utf16(&self) -> Utf16CodeUnits { // Marked #[inline] to allow llvm optimizing it away let mut ch = *self as u32; - if (ch & 0xFFFF_u32) == ch && dst.len() >= 1 { + let (len, buf) = if (ch & 0xFFFF_u32) == ch { // The BMP falls through (assuming non-surrogate, as it should) - dst[0] = ch as u16; - Some(1) - } else if dst.len() >= 2 { + (1, [ch as u16, 0]) + } else { // Supplementary planes break into surrogates. ch -= 0x1_0000_u32; - dst[0] = 0xD800_u16 | ((ch >> 10) as u16); - dst[1] = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16); - Some(2) + (2, [0xD800_u16 | ((ch >> 10) as u16), + 0xDC00_u16 | ((ch as u16) & 0x3FF_u16)]) + }; + + Utf16CodeUnits { pos: 0, len: len, buf: buf } + } +} + +/// An iterator over the bytes of a char encoded as UTF-8 +#[unstable = "pending error conventions, trait organization"] +pub struct Utf8CodeUnits { + pos: uint, + len: uint, + buf: [u8, ..4] +} + +#[unstable = "struct is unstable"] +impl Iterator for Utf8CodeUnits { + #[inline] + fn next(&mut self) -> Option { + if self.pos != self.len { + let next = self.buf[self.pos]; + self.pos += 1; + Some(next) } else { None } } } + +#[unstable = "struct is unstable"] +impl Clone for Utf8CodeUnits { + fn clone(&self) -> Utf8CodeUnits { + Utf8CodeUnits { pos: self.pos, len: self.len, buf: self.buf } + } +} + +/// An iterator over the bytes of a char encoded as UTF-8 +#[unstable = "pending error conventions, trait organization"] +pub struct Utf16CodeUnits { + pos: uint, + len: uint, + buf: [u16, ..2] +} + +#[unstable = "struct is unstable"] +impl Iterator for Utf16CodeUnits { + #[inline] + fn next(&mut self) -> Option { + if self.pos != self.len { + let next = self.buf[self.pos]; + self.pos += 1; + Some(next) + } else { + None + } + } +} + +#[unstable = "struct is unstable"] +impl Clone for Utf16CodeUnits { + fn clone(&self) -> Utf16CodeUnits { + Utf16CodeUnits { pos: self.pos, len: self.len, buf: self.buf } + } +} diff --git a/src/libcore/fmt/mod.rs b/src/libcore/fmt/mod.rs index 093f5896aad2d..5c441dfa888b3 100644 --- a/src/libcore/fmt/mod.rs +++ b/src/libcore/fmt/mod.rs @@ -421,9 +421,9 @@ impl<'a> Formatter<'a> { // Writes the sign if it exists, and then the prefix if it was requested let write_prefix = |f: &mut Formatter| { for c in sign.into_iter() { - let mut b = [0, ..4]; - let n = c.encode_utf8(b).unwrap_or(0); - try!(f.buf.write(b[..n])); + for byte in c.encode_utf8() { + try!(f.buf.write([byte])); + } } if prefixed { f.buf.write(prefix.as_bytes()) } else { Ok(()) } @@ -527,7 +527,11 @@ impl<'a> Formatter<'a> { }; let mut fill = [0u8, ..4]; - let len = self.fill.encode_utf8(fill).unwrap_or(0); + let mut len = 0; + for byte in self.fill.encode_utf8() { + fill[len] = byte; + len += 1; + } for _ in range(0, pre_pad) { try!(self.buf.write(fill[..len])); @@ -610,8 +614,12 @@ impl Char for char { use char::Char; let mut utf8 = [0u8, ..4]; - let amt = self.encode_utf8(utf8).unwrap_or(0); - let s: &str = unsafe { mem::transmute(utf8[..amt]) }; + let mut len = 0; + for byte in self.encode_utf8() { + utf8[len] = byte; + len += 1; + } + let s: &str = unsafe { mem::transmute(utf8[..len]) }; secret_string(&s, f) } } diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 4cd291b0ae0b2..614b4bd22e0ad 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -724,27 +724,36 @@ impl<'a> Iterator<&'a str> for StrSplits<'a> { /// External iterator for a string's UTF16 codeunits. /// Use with the `std::iter` module. +// NB: This could be implemented as a flatmap from Chars +// to char::Utf16CodeUnits, except that FlatMap is not +// Clone. #[deriving(Clone)] pub struct Utf16CodeUnits<'a> { chars: Chars<'a>, - extra: u16 + char_units: Option } impl<'a> Iterator for Utf16CodeUnits<'a> { #[inline] fn next(&mut self) -> Option { - if self.extra != 0 { - let tmp = self.extra; - self.extra = 0; - return Some(tmp); + match self.char_units { + Some(ref mut char_units) => { + match char_units.next() { + Some(unit) => Some(unit), + None => { + let next_char = self.chars.next(); + match next_char { + Some(next_char) => { + *char_units = next_char.encode_utf16(); + char_units.next() + } + None => None + } + } + } + } + None => None } - - let mut buf = [0u16, ..2]; - self.chars.next().map(|ch| { - let n = ch.encode_utf16(buf[mut]).unwrap_or(0); - if n == 2 { self.extra = buf[1]; } - buf[0] - }) } #[inline] @@ -2176,7 +2185,9 @@ impl<'a> StrSlice<'a> for &'a str { #[inline] fn utf16_units(&self) -> Utf16CodeUnits<'a> { - Utf16CodeUnits{ chars: self.chars(), extra: 0} + let mut chars = self.chars(); + let first_char_units = chars.next().map(|c| c.encode_utf16()); + Utf16CodeUnits{ chars: chars, char_units: first_char_units } } } diff --git a/src/libcoretest/char.rs b/src/libcoretest/char.rs index 9cba57110d756..bd84903a09214 100644 --- a/src/libcoretest/char.rs +++ b/src/libcoretest/char.rs @@ -172,9 +172,8 @@ fn test_escape_unicode() { #[test] fn test_encode_utf8() { fn check(input: char, expect: &[u8]) { - let mut buf = [0u8, ..4]; - let n = input.encode_utf8(buf.as_mut_slice()).unwrap_or(0); - assert_eq!(buf[..n], expect); + let buf: Vec = input.encode_utf8().collect(); + assert_eq!(buf[], expect); } check('x', [0x78]); @@ -186,9 +185,8 @@ fn test_encode_utf8() { #[test] fn test_encode_utf16() { fn check(input: char, expect: &[u16]) { - let mut buf = [0u16, ..2]; - let n = input.encode_utf16(buf.as_mut_slice()).unwrap_or(0); - assert_eq!(buf[..n], expect); + let buf: Vec = input.encode_utf16().collect(); + assert_eq!(buf[], expect); } check('x', [0x0078]); diff --git a/src/libserialize/json.rs b/src/libserialize/json.rs index eda38e96cbb1a..b288f7cb96924 100644 --- a/src/libserialize/json.rs +++ b/src/libserialize/json.rs @@ -354,9 +354,11 @@ fn escape_str(writer: &mut io::Writer, v: &str) -> Result<(), io::IoError> { } fn escape_char(writer: &mut io::Writer, v: char) -> Result<(), io::IoError> { - let mut buf = [0, .. 4]; - v.encode_utf8(buf); - escape_bytes(writer, buf) + for byte in v.encode_utf8() { + try!(escape_bytes(writer, [byte])); + } + + Ok(()) } fn spaces(wr: &mut io::Writer, mut n: uint) -> Result<(), io::IoError> { diff --git a/src/libstd/io/mod.rs b/src/libstd/io/mod.rs index 8592d48974a25..7fcbb6dd960ce 100644 --- a/src/libstd/io/mod.rs +++ b/src/libstd/io/mod.rs @@ -1110,9 +1110,11 @@ pub trait Writer { /// Write a single char, encoded as UTF-8. #[inline] fn write_char(&mut self, c: char) -> IoResult<()> { - let mut buf = [0u8, ..4]; - let n = c.encode_utf8(buf[mut]).unwrap_or(0); - self.write(buf[..n]) + for byte in c.encode_utf8() { + try!(self.write_u8(byte)); + } + + Ok(()) } /// Write the result of passing n through `int::to_str_bytes`. From 8db525243b129dd5d21b18652add4b6b342d134d Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Tue, 14 Oct 2014 13:08:54 -0700 Subject: [PATCH 13/14] core: Convert Char methods to by-val self Methods on primitmive Copy types generally should take `self`. [breaking-change] --- src/libcore/char.rs | 66 ++++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 0c8952d2a0d38..53bebc6c8e98d 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -210,7 +210,7 @@ pub trait Char { /// /// Fails if given a radix > 36. #[deprecated = "use is_digit"] - fn is_digit_radix(&self, radix: uint) -> bool; + fn is_digit_radix(self, radix: uint) -> bool; /// Checks if a `char` parses as a numeric digit in the given radix. /// @@ -226,7 +226,7 @@ pub trait Char { /// /// Fails if given a radix > 36. #[unstable = "pending error conventions"] - fn is_digit(&self, radix: uint) -> bool; + fn is_digit(self, radix: uint) -> bool; /// Converts a character to the corresponding digit. /// @@ -240,7 +240,7 @@ pub trait Char { /// /// Fails if given a radix outside the range [0..36]. #[unstable = "pending error conventions, trait organization"] - fn to_digit(&self, radix: uint) -> Option; + fn to_digit(self, radix: uint) -> Option; /// Converts a number to the character representing it. /// @@ -267,7 +267,7 @@ pub trait Char { /// * Characters in [0x100,0xffff] get 4-digit escapes: `\\uNNNN`. /// * Characters above 0x10000 get 8-digit escapes: `\\UNNNNNNNN`. #[unstable = "pending error conventions, trait organization"] - fn escape_unicode(&self, f: |char|); + fn escape_unicode(self, f: |char|); /// Returns a 'default' ASCII and C++11-like literal escape of a /// character. @@ -282,22 +282,22 @@ pub trait Char { /// * Any other chars in the range [0x20,0x7e] are not escaped. /// * Any other chars are given hex Unicode escapes; see `escape_unicode`. #[unstable = "pending error conventions, trait organization"] - fn escape_default(&self, f: |char|); + fn escape_default(self, f: |char|); /// Returns the amount of bytes this character would need if encoded in /// UTF-8. #[deprecated = "use len_utf8"] - fn len_utf8_bytes(&self) -> uint; + fn len_utf8_bytes(self) -> uint; /// Returns the amount of bytes this character would need if encoded in /// UTF-8. #[unstable = "pending trait organization"] - fn len_utf8(&self) -> uint; + fn len_utf8(self) -> uint; /// Returns the amount of bytes this character would need if encoded in /// UTF-16. #[unstable = "pending trait organization"] - fn len_utf16(&self) -> uint; + fn len_utf16(self) -> uint; /// Encodes this character as UTF-8 into the provided byte buffer, /// and then returns the number of bytes written. @@ -305,7 +305,7 @@ pub trait Char { /// If the buffer is not large enough, nothing will be written into it /// and a `None` will be returned. #[unstable = "pending trait organization"] - fn encode_utf8(&self) -> Utf8CodeUnits; + fn encode_utf8(self) -> Utf8CodeUnits; /// Encodes this character as UTF-16 into the provided `u16` buffer, /// and then returns the number of `u16`s written. @@ -313,16 +313,16 @@ pub trait Char { /// If the buffer is not large enough, nothing will be written into it /// and a `None` will be returned. #[unstable = "pending trait organization"] - fn encode_utf16(&self) -> Utf16CodeUnits; + fn encode_utf16(self) -> Utf16CodeUnits; } #[experimental = "trait is experimental"] impl Char for char { #[deprecated = "use is_digit"] - fn is_digit_radix(&self, radix: uint) -> bool { self.is_digit(radix) } + fn is_digit_radix(self, radix: uint) -> bool { self.is_digit(radix) } #[unstable = "pending trait organization"] - fn is_digit(&self, radix: uint) -> bool { + fn is_digit(self, radix: uint) -> bool { match self.to_digit(radix) { Some(_) => true, None => false, @@ -330,14 +330,14 @@ impl Char for char { } #[unstable = "pending trait organization"] - fn to_digit(&self, radix: uint) -> Option { + fn to_digit(self, radix: uint) -> Option { if radix > 36 { fail!("to_digit: radix is too high (maximum 36)"); } - let val = match *self { - '0' ... '9' => *self as uint - ('0' as uint), - 'a' ... 'z' => *self as uint + 10u - ('a' as uint), - 'A' ... 'Z' => *self as uint + 10u - ('A' as uint), + let val = match self { + '0' ... '9' => self as uint - ('0' as uint), + 'a' ... 'z' => self as uint + 10u - ('a' as uint), + 'A' ... 'Z' => self as uint + 10u - ('A' as uint), _ => return None, }; if val < radix { Some(val) } @@ -352,19 +352,19 @@ impl Char for char { fn from_u32(i: u32) -> Option { from_u32(i) } #[unstable = "pending error conventions, trait organization"] - fn escape_unicode(&self, f: |char|) { + fn escape_unicode(self, f: |char|) { // avoid calling str::to_str_radix because we don't really need to allocate // here. f('\\'); let pad = match () { - _ if *self <= '\xff' => { f('x'); 2 } - _ if *self <= '\uffff' => { f('u'); 4 } + _ if self <= '\xff' => { f('x'); 2 } + _ if self <= '\uffff' => { f('u'); 4 } _ => { f('U'); 8 } }; for offset in range_step::(4 * (pad - 1), -1, -4) { let offset = offset as uint; unsafe { - match ((*self as i32) >> offset) & 0xf { + match ((self as i32) >> offset) & 0xf { i @ 0 ... 9 => { f(transmute('0' as i32 + i)); } i => { f(transmute('a' as i32 + (i - 10))); } } @@ -373,27 +373,27 @@ impl Char for char { } #[unstable = "pending error conventions, trait organization"] - fn escape_default(&self, f: |char|) { - match *self { + fn escape_default(self, f: |char|) { + match self { '\t' => { f('\\'); f('t'); } '\r' => { f('\\'); f('r'); } '\n' => { f('\\'); f('n'); } '\\' => { f('\\'); f('\\'); } '\'' => { f('\\'); f('\''); } '"' => { f('\\'); f('"'); } - '\x20' ... '\x7e' => { f(*self); } + '\x20' ... '\x7e' => { f(self); } _ => self.escape_unicode(f), } } #[inline] #[deprecated = "use len_utf8"] - fn len_utf8_bytes(&self) -> uint { self.len_utf8() } + fn len_utf8_bytes(self) -> uint { self.len_utf8() } #[inline] #[unstable = "pending trait organization"] - fn len_utf8(&self) -> uint { - let code = *self as u32; + fn len_utf8(self) -> uint { + let code = self as u32; match () { _ if code < MAX_ONE_B => 1u, _ if code < MAX_TWO_B => 2u, @@ -404,15 +404,15 @@ impl Char for char { #[inline] #[unstable = "pending trait organization"] - fn len_utf16(&self) -> uint { - let ch = *self as u32; + fn len_utf16(self) -> uint { + let ch = self as u32; if (ch & 0xFFFF_u32) == ch { 1 } else { 2 } } #[inline] #[unstable = "pending error conventions, trait organization"] - fn encode_utf8(&self) -> Utf8CodeUnits { - let code = *self as u32; + fn encode_utf8(self) -> Utf8CodeUnits { + let code = self as u32; let (len, buf) = if code < MAX_ONE_B { (1, [code as u8, 0, 0, 0]) } else if code < MAX_TWO_B { @@ -436,9 +436,9 @@ impl Char for char { #[inline] #[unstable = "pending error conventions, trait organization"] - fn encode_utf16(&self) -> Utf16CodeUnits { + fn encode_utf16(self) -> Utf16CodeUnits { // Marked #[inline] to allow llvm optimizing it away - let mut ch = *self as u32; + let mut ch = self as u32; let (len, buf) = if (ch & 0xFFFF_u32) == ch { // The BMP falls through (assuming non-surrogate, as it should) (1, [ch as u16, 0]) From d48b5bf88b98689e57c1ecb42d5dbed7be78a82e Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Tue, 14 Oct 2014 13:45:20 -0700 Subject: [PATCH 14/14] wip --- src/libcollections/str.rs | 8 ++- src/libcore/char.rs | 130 +++++++++++++++++++++++++--------- src/libdebug/repr.rs | 4 +- src/libgraphviz/lib.rs | 2 +- src/librustdoc/clean/mod.rs | 4 +- src/libsyntax/print/pprust.rs | 8 ++- 6 files changed, 113 insertions(+), 43 deletions(-) diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs index 1c40a299b9e7d..bc08c4f034ca2 100644 --- a/src/libcollections/str.rs +++ b/src/libcollections/str.rs @@ -698,7 +698,9 @@ pub trait StrAllocating: Str { let me = self.as_slice(); let mut out = String::with_capacity(me.len()); for c in me.chars() { - c.escape_default(|c| out.push(c)); + for c in c.escape_default() { + out.push(c); + } } out } @@ -708,7 +710,9 @@ pub trait StrAllocating: Str { let me = self.as_slice(); let mut out = String::with_capacity(me.len()); for c in me.chars() { - c.escape_unicode(|c| out.push(c)); + for c in c.escape_unicode() { + out.push(c); + } } out } diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 53bebc6c8e98d..13522e3aa7ec3 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -18,7 +18,7 @@ use clone::Clone; use mem::transmute; use option::{None, Option, Some}; -use iter::{range_step, Iterator}; +use iter::{range_step, Iterator, RangeStep}; use collections::Collection; // UTF-8 ranges and tags for encoding characters @@ -166,7 +166,9 @@ pub fn from_digit(num: uint, radix: uint) -> Option { /// #[deprecated = "use the Char::escape_unicode method"] pub fn escape_unicode(c: char, f: |char|) { - c.escape_unicode(f) + for char in c.escape_unicode() { + f(char); + } } /// @@ -183,7 +185,9 @@ pub fn escape_unicode(c: char, f: |char|) { /// #[deprecated = "use the Char::escape_default method"] pub fn escape_default(c: char, f: |char|) { - c.escape_default(f) + for c in c.escape_default() { + f(c); + } } /// Returns the amount of bytes this `char` would need if encoded in UTF-8 @@ -267,7 +271,7 @@ pub trait Char { /// * Characters in [0x100,0xffff] get 4-digit escapes: `\\uNNNN`. /// * Characters above 0x10000 get 8-digit escapes: `\\UNNNNNNNN`. #[unstable = "pending error conventions, trait organization"] - fn escape_unicode(self, f: |char|); + fn escape_unicode(self) -> UnicodeEscapedChars; /// Returns a 'default' ASCII and C++11-like literal escape of a /// character. @@ -282,7 +286,7 @@ pub trait Char { /// * Any other chars in the range [0x20,0x7e] are not escaped. /// * Any other chars are given hex Unicode escapes; see `escape_unicode`. #[unstable = "pending error conventions, trait organization"] - fn escape_default(self, f: |char|); + fn escape_default(self) -> DefaultEscapedChars; /// Returns the amount of bytes this character would need if encoded in /// UTF-8. @@ -352,38 +356,23 @@ impl Char for char { fn from_u32(i: u32) -> Option { from_u32(i) } #[unstable = "pending error conventions, trait organization"] - fn escape_unicode(self, f: |char|) { - // avoid calling str::to_str_radix because we don't really need to allocate - // here. - f('\\'); - let pad = match () { - _ if self <= '\xff' => { f('x'); 2 } - _ if self <= '\uffff' => { f('u'); 4 } - _ => { f('U'); 8 } - }; - for offset in range_step::(4 * (pad - 1), -1, -4) { - let offset = offset as uint; - unsafe { - match ((self as i32) >> offset) & 0xf { - i @ 0 ... 9 => { f(transmute('0' as i32 + i)); } - i => { f(transmute('a' as i32 + (i - 10))); } - } - } - } + fn escape_unicode(self) -> UnicodeEscapedChars { + UnicodeEscapedChars { c: self, state: EscapeBackslash } } #[unstable = "pending error conventions, trait organization"] - fn escape_default(self, f: |char|) { - match self { - '\t' => { f('\\'); f('t'); } - '\r' => { f('\\'); f('r'); } - '\n' => { f('\\'); f('n'); } - '\\' => { f('\\'); f('\\'); } - '\'' => { f('\\'); f('\''); } - '"' => { f('\\'); f('"'); } - '\x20' ... '\x7e' => { f(self); } - _ => self.escape_unicode(f), - } + fn escape_default(self) -> DefaultEscapedChars { + let init_state = match self { + '\t' => DefaultEscapeBackslash('t'), + '\r' => DefaultEscapeBackslash('r'), + '\n' => DefaultEscapeBackslash('n'), + '\\' => DefaultEscapeBackslash('\\'), + '\'' => DefaultEscapeBackslash('\''), + '"' => DefaultEscapeBackslash('"'), + '\x20' ... '\x7e' => DefaultEscapeChar(self), + _ => DefaultEscapeUnicode(self.escape_unicode()) + }; + DefaultEscapedChars { state: init_state } } #[inline] @@ -510,3 +499,76 @@ impl Clone for Utf16CodeUnits { Utf16CodeUnits { pos: self.pos, len: self.len, buf: self.buf } } } + +/// An iterator over the characters that represent a `char`, as escaped by +/// Rust's unicode escaping rules. +pub struct UnicodeEscapedChars { + c: char, + state: UnicodeEscapedCharsState +} + +enum UnicodeEscapedCharsState { + EscapeBackslash, + EscapeType, + EscapeValue(RangeStep), +} + +impl Iterator for UnicodeEscapedChars { + fn next(&mut self) -> Option { + match self.state { + EscapeBackslash => { + self.state = EscapeType; + Some('\\') + } + EscapeType => { + let (typechar, pad) = if self.c <= '\xff' { ('x', 2) } + else if self.c <= '\uffff' { ('u', 4) } + else { ('U', 8) }; + self.state = EscapeValue(range_step(4 * (pad - 1), -1, -4i32)); + Some(typechar) + } + EscapeValue(ref mut range_step) => match range_step.next() { + Some(offset) => { + let offset = offset as uint; + let v = match ((self.c as i32) >> offset) & 0xf { + i @ 0 ... 9 => '0' as i32 + i, + i => 'a' as i32 + (i - 10) + }; + Some(unsafe { transmute(v) }) + } + None => None + } + } + } +} + +/// An iterator over the characters that represent a `char`, escaped +/// for maximum portability. +pub struct DefaultEscapedChars { + state: DefaultEscapedCharsState +} + +enum DefaultEscapedCharsState { + DefaultEscapeBackslash(char), + DefaultEscapeChar(char), + DefaultEscapeDone, + DefaultEscapeUnicode(UnicodeEscapedChars), +} + +impl Iterator for DefaultEscapedChars { + fn next(&mut self) -> Option { + match self.state { + DefaultEscapeBackslash(c) => { + self.state = DefaultEscapeChar(c); + Some('\\') + } + DefaultEscapeChar(c) => { + self.state = DefaultEscapeDone; + Some(c) + } + DefaultEscapeDone => None, + DefaultEscapeUnicode(ref mut iter) => iter.next() + } + } +} + diff --git a/src/libdebug/repr.rs b/src/libdebug/repr.rs index 6a3c009964d19..430e36340fb8d 100644 --- a/src/libdebug/repr.rs +++ b/src/libdebug/repr.rs @@ -228,9 +228,9 @@ impl<'a> ReprVisitor<'a> { } '\x20'...'\x7e' => self.writer.write([ch as u8]), _ => { - ch.escape_unicode(|c| { + for c in ch.escape_unicode() { let _ = self.writer.write([c as u8]); - }); + } Ok(()) } }); diff --git a/src/libgraphviz/lib.rs b/src/libgraphviz/lib.rs index e21186a5fc879..c9c53459e5589 100644 --- a/src/libgraphviz/lib.rs +++ b/src/libgraphviz/lib.rs @@ -420,7 +420,7 @@ impl<'a> LabelText<'a> { // not escaping \\, since Graphviz escString needs to // interpret backslashes; see EscStr above. '\\' => f(c), - _ => c.escape_default(f) + _ => for c in c.escape_default() { f(c) } } } fn escape_str(s: &str) -> String { diff --git a/src/librustdoc/clean/mod.rs b/src/librustdoc/clean/mod.rs index 7e9bb2844a7c7..7d782c66acaa6 100644 --- a/src/librustdoc/clean/mod.rs +++ b/src/librustdoc/clean/mod.rs @@ -2020,9 +2020,9 @@ fn lit_to_string(lit: &ast::Lit) -> String { ast::LitBinary(ref data) => format!("{:?}", data.as_slice()), ast::LitByte(b) => { let mut res = String::from_str("b'"); - (b as char).escape_default(|c| { + for c in (b as char).escape_default() { res.push(c); - }); + } res.push('\''); res }, diff --git a/src/libsyntax/print/pprust.rs b/src/libsyntax/print/pprust.rs index d32828192e996..898cbe6f72b81 100644 --- a/src/libsyntax/print/pprust.rs +++ b/src/libsyntax/print/pprust.rs @@ -2663,13 +2663,17 @@ impl<'a> State<'a> { ast::LitStr(ref st, style) => self.print_string(st.get(), style), ast::LitByte(byte) => { let mut res = String::from_str("b'"); - (byte as char).escape_default(|c| res.push_char(c)); + for c in (byte as char).escape_default() { + res.push_char(c); + } res.push_char('\''); word(&mut self.s, res.as_slice()) } ast::LitChar(ch) => { let mut res = String::from_str("'"); - ch.escape_default(|c| res.push_char(c)); + for c in ch.escape_default() { + res.push_char(c); + } res.push_char('\''); word(&mut self.s, res.as_slice()) }