Skip to content

Commit 503e5df

Browse files
committed
auto merge of #10621 : Florob/rust/unicode63, r=cmr
This update the unicode.rs file to the latest Unicode version released 2013-09-30.
2 parents d2c405e + dfe38db commit 503e5df

File tree

5 files changed

+1479
-814
lines changed

5 files changed

+1479
-814
lines changed

src/etc/unicode.py

+17-15
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# code covering the core properties. Since this is a pretty rare event we
66
# just store this out-of-line and check the unicode.rs file into git.
77
#
8-
# The emitted code is "the minimum we think is necessary for libcore", that
8+
# The emitted code is "the minimum we think is necessary for libstd", that
99
# is, to support basic operations of the compiler and "most nontrivial rust
1010
# programs". It is not meant to be a complete implementation of unicode.
1111
# For that we recommend you use a proper binding to libicu.
@@ -41,7 +41,7 @@ def load_unicode_data(f):
4141
continue
4242
[code, name, gencat, combine, bidi,
4343
decomp, deci, digit, num, mirror,
44-
old, iso, upcase, lowcsae, titlecase ] = fields
44+
old, iso, upcase, lowcase, titlecase ] = fields
4545

4646
code = int(code, 16)
4747

@@ -89,11 +89,9 @@ def load_unicode_data(f):
8989

9090
return (canon_decomp, compat_decomp, gencats, combines)
9191

92-
93-
def load_derived_core_properties(f):
92+
def load_properties(f, interestingprops):
9493
fetch(f)
95-
derivedprops = {}
96-
interestingprops = ["XID_Start", "XID_Continue", "Alphabetic"]
94+
props = {}
9795
re1 = re.compile("^([0-9A-F]+) +; (\w+)")
9896
re2 = re.compile("^([0-9A-F]+)\.\.([0-9A-F]+) +; (\w+)")
9997

@@ -118,10 +116,10 @@ def load_derived_core_properties(f):
118116
continue
119117
d_lo = int(d_lo, 16)
120118
d_hi = int(d_hi, 16)
121-
if prop not in derivedprops:
122-
derivedprops[prop] = []
123-
derivedprops[prop].append((d_lo, d_hi))
124-
return derivedprops
119+
if prop not in props:
120+
props[prop] = []
121+
props[prop].append((d_lo, d_hi))
122+
return props
125123

126124
def escape_char(c):
127125
if c <= 0xff:
@@ -144,7 +142,7 @@ def emit_bsearch_range_table(f):
144142
use cmp::{Equal, Less, Greater};
145143
use vec::ImmutableVector;
146144
use option::None;
147-
(do r.bsearch |&(lo,hi)| {
145+
r.bsearch(|&(lo,hi)| {
148146
if lo <= c && c <= hi { Equal }
149147
else if hi < c { Less }
150148
else { Greater }
@@ -302,14 +300,14 @@ def emit_decomp_module(f, canon, compat, combine):
302300
ix += 1
303301
f.write("\n ];\n")
304302

305-
f.write(" pub fn canonical(c: char, i: &fn(char)) "
303+
f.write(" pub fn canonical(c: char, i: |char|) "
306304
+ "{ d(c, i, false); }\n\n")
307-
f.write(" pub fn compatibility(c: char, i: &fn(char)) "
305+
f.write(" pub fn compatibility(c: char, i: |char|) "
308306
+"{ d(c, i, true); }\n\n")
309307
f.write(" pub fn canonical_combining_class(c: char) -> u8 {\n"
310308
+ " bsearch_range_value_table(c, combining_class_table)\n"
311309
+ " }\n\n")
312-
f.write(" fn d(c: char, i: &fn(char), k: bool) {\n")
310+
f.write(" fn d(c: char, i: |char|, k: bool) {\n")
313311
f.write(" use iter::Iterator;\n");
314312

315313
f.write(" if c <= '\\x7f' { i(c); return; }\n")
@@ -376,5 +374,9 @@ def emit_decomp_module(f, canon, compat, combine):
376374

377375
emit_decomp_module(rf, canon_decomp, compat_decomp, combines)
378376

379-
derived = load_derived_core_properties("DerivedCoreProperties.txt")
377+
derived = load_properties("DerivedCoreProperties.txt",
378+
["XID_Start", "XID_Continue", "Alphabetic", "Lowercase", "Uppercase"])
380379
emit_property_module(rf, "derived_property", derived)
380+
381+
props = load_properties("PropList.txt", ["White_Space"])
382+
emit_property_module(rf, "property", props)

src/libstd/char.rs

+8-10
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ use cast::transmute;
1414
use option::{None, Option, Some};
1515
use iter::{Iterator, range_step};
1616
use str::StrSlice;
17-
use unicode::{derived_property, general_category, decompose};
17+
use unicode::{derived_property, property, general_category, decompose};
1818
use to_str::ToStr;
1919
use str;
2020

@@ -89,30 +89,28 @@ pub fn is_XID_continue(c: char) -> bool { derived_property::XID_Continue(c) }
8989

9090
///
9191
/// Indicates whether a character is in lower case, defined
92-
/// in terms of the Unicode General Category 'Ll'
92+
/// in terms of the Unicode Derived Core Property 'Lowercase'.
9393
///
9494
#[inline]
95-
pub fn is_lowercase(c: char) -> bool { general_category::Ll(c) }
95+
pub fn is_lowercase(c: char) -> bool { derived_property::Lowercase(c) }
9696

9797
///
9898
/// Indicates whether a character is in upper case, defined
99-
/// in terms of the Unicode General Category 'Lu'.
99+
/// in terms of the Unicode Derived Core Property 'Uppercase'.
100100
///
101101
#[inline]
102-
pub fn is_uppercase(c: char) -> bool { general_category::Lu(c) }
102+
pub fn is_uppercase(c: char) -> bool { derived_property::Uppercase(c) }
103103

104104
///
105105
/// Indicates whether a character is whitespace. Whitespace is defined in
106-
/// terms of the Unicode General Categories 'Zs', 'Zl', 'Zp'
107-
/// additional 'Cc'-category control codes in the range [0x09, 0x0d]
106+
/// terms of the Unicode Property 'White_Space'.
108107
///
109108
#[inline]
110109
pub fn is_whitespace(c: char) -> bool {
110+
// As an optimization ASCII whitespace characters are checked separately
111111
c == ' '
112112
|| ('\x09' <= c && c <= '\x0d')
113-
|| general_category::Zs(c)
114-
|| general_category::Zl(c)
115-
|| general_category::Zp(c)
113+
|| property::White_Space(c)
116114
}
117115

118116
///

0 commit comments

Comments
 (0)