Skip to content

Make the width function of char return u32 #23539

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/etc/unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,13 +457,13 @@ def emit_charwidth_module(f, width_table):
""")

f.write("""
pub fn width(c: char, is_cjk: bool) -> Option<usize> {
match c as usize {
pub fn width(c: char, is_cjk: bool) -> Option<u32> {
match c as u32 {
_c @ 0 => Some(0), // null is zero width
cu if cu < 0x20 => None, // control sequences have no width
cu if cu < 0x7F => Some(1), // ASCII
cu if cu < 0xA0 => None, // more control sequences
_ => Some(bsearch_range_value_table(c, is_cjk, charwidth_table) as usize)
_ => Some(bsearch_range_value_table(c, is_cjk, charwidth_table) as u32)
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/libcollections/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1456,7 +1456,7 @@ impl str {
/// characters be treated as 1 column (i.e., `is_cjk = false`) if the locale is unknown.
#[unstable(feature = "unicode",
reason = "this functionality may only be provided by libunicode")]
pub fn width(&self, is_cjk: bool) -> usize {
pub fn width(&self, is_cjk: bool) -> u32 {
UnicodeStr::width(&self[..], is_cjk)
}

Expand Down
2 changes: 1 addition & 1 deletion src/libcore/char.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ enum EscapeUnicodeState {
Backslash,
Type,
LeftBrace,
Value(usize),
Value(u8),
RightBrace,
Done,
}
Expand Down
4 changes: 2 additions & 2 deletions src/librustc_driver/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -576,7 +576,7 @@ Available lint options:
.map(|&s| s.name.width(true))
.max().unwrap_or(0);
let padded = |x: &str| {
let mut s = repeat(" ").take(max_name_len - x.chars().count())
let mut s = repeat(" ").take((max_name_len - x.width(true)) as usize)
.collect::<String>();
s.push_str(x);
s
Expand All @@ -603,7 +603,7 @@ Available lint options:
.map(|&(s, _)| s.width(true))
.max().unwrap_or(0);
let padded = |x: &str| {
let mut s = repeat(" ").take(max_name_len - x.chars().count())
let mut s = repeat(" ").take((max_name_len - x.width(true)) as usize)
.collect::<String>();
s.push_str(x);
s
Expand Down
4 changes: 2 additions & 2 deletions src/libsyntax/diagnostic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,7 @@ fn highlight_lines(err: &mut EmitterWriter,
_ => lastc.width(false).unwrap_or(0),
};
col += count;
s.extend(::std::iter::repeat('~').take(count));
s.extend(::std::iter::repeat('~').take(count as usize));

let hi = cm.lookup_char_pos(sp.hi);
if hi.col != lo.col {
Expand All @@ -556,7 +556,7 @@ fn highlight_lines(err: &mut EmitterWriter,
_ => ch.width(false).unwrap_or(0),
};
col += count;
s.extend(::std::iter::repeat('~').take(count));
s.extend(::std::iter::repeat('~').take(count as usize));
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/libunicode/char.rs
Original file line number Diff line number Diff line change
Expand Up @@ -447,5 +447,5 @@ impl char {
/// `is_cjk` = `false`) if the context cannot be reliably determined.
#[unstable(feature = "unicode",
reason = "needs expert opinion. is_cjk flag stands out as ugly")]
pub fn width(self, is_cjk: bool) -> Option<usize> { charwidth::width(self, is_cjk) }
pub fn width(self, is_cjk: bool) -> Option<u32> { charwidth::width(self, is_cjk) }
}
6 changes: 3 additions & 3 deletions src/libunicode/tables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7603,13 +7603,13 @@ pub mod charwidth {
}
}

pub fn width(c: char, is_cjk: bool) -> Option<usize> {
match c as usize {
pub fn width(c: char, is_cjk: bool) -> Option<u32> {
match c as u32 {
_c @ 0 => Some(0), // null is zero width
cu if cu < 0x20 => None, // control sequences have no width
cu if cu < 0x7F => Some(1), // ASCII
cu if cu < 0xA0 => None, // more control sequences
_ => Some(bsearch_range_value_table(c, is_cjk, charwidth_table) as usize)
_ => Some(bsearch_range_value_table(c, is_cjk, charwidth_table) as u32)
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/libunicode/u_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ pub trait UnicodeStr {
fn words<'a>(&'a self) -> Words<'a>;
fn is_whitespace(&self) -> bool;
fn is_alphanumeric(&self) -> bool;
fn width(&self, is_cjk: bool) -> usize;
fn width(&self, is_cjk: bool) -> u32;
fn trim<'a>(&'a self) -> &'a str;
fn trim_left<'a>(&'a self) -> &'a str;
fn trim_right<'a>(&'a self) -> &'a str;
Expand Down Expand Up @@ -76,7 +76,7 @@ impl UnicodeStr for str {
fn is_alphanumeric(&self) -> bool { self.chars().all(|c| c.is_alphanumeric()) }

#[inline]
fn width(&self, is_cjk: bool) -> usize {
fn width(&self, is_cjk: bool) -> u32 {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, this seems like somewhat of an edge case: the width of a string will typically be fairly similar (at least, proportional) to its size in memory.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mh, maybe this is true.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe that calls for u64, for integers that do not clearly fall in the range of u32?

self.chars().map(|c| c.width(is_cjk).unwrap_or(0)).sum()
}

Expand Down