|
8 | 8 | // option. This file may not be copied, modified, or distributed
|
9 | 9 | // except according to those terms.
|
10 | 10 |
|
11 |
| -//! Unicode-intensive `char` methods. |
| 11 | +//! Unicode-intensive `char` methods along with the `core` methods. |
12 | 12 | //!
|
13 | 13 | //! These methods implement functionality for `char` that requires knowledge of
|
14 | 14 | //! Unicode definitions, including normalization, categorization, and display information.
|
15 | 15 |
|
| 16 | +use core::char; |
| 17 | +use core::char::CharExt as C; |
16 | 18 | use core::option::Option;
|
17 | 19 | use tables::{derived_property, property, general_category, conversions, charwidth};
|
18 | 20 |
|
19 |
| -/// Useful functions for Unicode characters. |
| 21 | +/// Functionality for manipulating `char`. |
20 | 22 | #[experimental = "pending prelude organization"]
|
21 |
| -pub trait UnicodeChar { |
| 23 | +pub trait CharExt { |
| 24 | + /// Checks if a `char` parses as a numeric digit in the given radix. |
| 25 | + /// |
| 26 | + /// Compared to `is_numeric()`, this function only recognizes the characters |
| 27 | + /// `0-9`, `a-z` and `A-Z`. |
| 28 | + /// |
| 29 | + /// # Return value |
| 30 | + /// |
| 31 | + /// Returns `true` if `c` is a valid digit under `radix`, and `false` |
| 32 | + /// otherwise. |
| 33 | + /// |
| 34 | + /// # Panics |
| 35 | + /// |
| 36 | + /// Panics if given a radix > 36. |
| 37 | + #[unstable = "pending integer conventions"] |
| 38 | + fn is_digit(self, radix: uint) -> bool; |
| 39 | + |
| 40 | + /// Converts a character to the corresponding digit. |
| 41 | + /// |
| 42 | + /// # Return value |
| 43 | + /// |
| 44 | + /// If `c` is between '0' and '9', the corresponding value between 0 and |
| 45 | + /// 9. If `c` is 'a' or 'A', 10. If `c` is 'b' or 'B', 11, etc. Returns |
| 46 | + /// none if the character does not refer to a digit in the given radix. |
| 47 | + /// |
| 48 | + /// # Panics |
| 49 | + /// |
| 50 | + /// Panics if given a radix outside the range [0..36]. |
| 51 | + #[unstable = "pending integer conventions"] |
| 52 | + fn to_digit(self, radix: uint) -> Option<uint>; |
| 53 | + |
| 54 | + /// Returns an iterator that yields the hexadecimal Unicode escape |
| 55 | + /// of a character, as `char`s. |
| 56 | + /// |
| 57 | + /// All characters are escaped with Rust syntax of the form `\\u{NNNN}` |
| 58 | + /// where `NNNN` is the shortest hexadecimal representation of the code |
| 59 | + /// point. |
| 60 | + #[stable] |
| 61 | + fn escape_unicode(self) -> char::EscapeUnicode; |
| 62 | + |
| 63 | + /// Returns an iterator that yields the 'default' ASCII and |
| 64 | + /// C++11-like literal escape of a character, as `char`s. |
| 65 | + /// |
| 66 | + /// The default is chosen with a bias toward producing literals that are |
| 67 | + /// legal in a variety of languages, including C++11 and similar C-family |
| 68 | + /// languages. The exact rules are: |
| 69 | + /// |
| 70 | + /// * Tab, CR and LF are escaped as '\t', '\r' and '\n' respectively. |
| 71 | + /// * Single-quote, double-quote and backslash chars are backslash- |
| 72 | + /// escaped. |
| 73 | + /// * Any other chars in the range [0x20,0x7e] are not escaped. |
| 74 | + /// * Any other chars are given hex Unicode escapes; see `escape_unicode`. |
| 75 | + #[stable] |
| 76 | + fn escape_default(self) -> char::EscapeDefault; |
| 77 | + |
| 78 | + /// Returns the amount of bytes this character would need if encoded in |
| 79 | + /// UTF-8. |
| 80 | + #[stable] |
| 81 | + fn len_utf8(self) -> uint; |
| 82 | + |
| 83 | + /// Returns the amount of bytes this character would need if encoded in |
| 84 | + /// UTF-16. |
| 85 | + #[stable] |
| 86 | + fn len_utf16(self) -> uint; |
| 87 | + |
| 88 | + /// Encodes this character as UTF-8 into the provided byte buffer, |
| 89 | + /// and then returns the number of bytes written. |
| 90 | + /// |
| 91 | + /// If the buffer is not large enough, nothing will be written into it |
| 92 | + /// and a `None` will be returned. |
| 93 | + #[unstable = "pending decision about Iterator/Writer/Reader"] |
| 94 | + fn encode_utf8(self, dst: &mut [u8]) -> Option<uint>; |
| 95 | + |
| 96 | + /// Encodes this character as UTF-16 into the provided `u16` buffer, |
| 97 | + /// and then returns the number of `u16`s written. |
| 98 | + /// |
| 99 | + /// If the buffer is not large enough, nothing will be written into it |
| 100 | + /// and a `None` will be returned. |
| 101 | + #[unstable = "pending decision about Iterator/Writer/Reader"] |
| 102 | + fn encode_utf16(self, dst: &mut [u16]) -> Option<uint>; |
| 103 | + |
22 | 104 | /// Returns whether the specified character is considered a Unicode
|
23 | 105 | /// alphabetic code point.
|
24 | 106 | fn is_alphabetic(self) -> bool;
|
@@ -118,7 +200,24 @@ pub trait UnicodeChar {
|
118 | 200 | }
|
119 | 201 |
|
120 | 202 | #[experimental = "pending prelude organization"]
|
121 |
| -impl UnicodeChar for char { |
| 203 | +impl CharExt for char { |
| 204 | + #[unstable = "pending integer conventions"] |
| 205 | + fn is_digit(self, radix: uint) -> bool { C::is_digit(self, radix) } |
| 206 | + #[unstable = "pending integer conventions"] |
| 207 | + fn to_digit(self, radix: uint) -> Option<uint> { C::to_digit(self, radix) } |
| 208 | + #[stable] |
| 209 | + fn escape_unicode(self) -> char::EscapeUnicode { C::escape_unicode(self) } |
| 210 | + #[stable] |
| 211 | + fn escape_default(self) -> char::EscapeDefault { C::escape_default(self) } |
| 212 | + #[stable] |
| 213 | + fn len_utf8(self) -> uint { C::len_utf8(self) } |
| 214 | + #[stable] |
| 215 | + fn len_utf16(self) -> uint { C::len_utf16(self) } |
| 216 | + #[unstable = "pending decision about Iterator/Writer/Reader"] |
| 217 | + fn encode_utf8(self, dst: &mut [u8]) -> Option<uint> { C::encode_utf8(self, dst) } |
| 218 | + #[unstable = "pending decision about Iterator/Writer/Reader"] |
| 219 | + fn encode_utf16(self, dst: &mut [u16]) -> Option<uint> { C::encode_utf16(self, dst) } |
| 220 | + |
122 | 221 | fn is_alphabetic(self) -> bool {
|
123 | 222 | match self {
|
124 | 223 | 'a' ... 'z' | 'A' ... 'Z' => true,
|
|
0 commit comments