Skip to content

Commit 6048ba8

Browse files
committed
Auto merge of #23249 - tbu-:pr_rm_core_str_macros, r=alexcrichton
2 parents d193233 + fb297d1 commit 6048ba8

File tree

1 file changed

+40
-41
lines changed

1 file changed

+40
-41
lines changed

src/libcore/str/mod.rs

Lines changed: 40 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -335,21 +335,20 @@ pub struct Chars<'a> {
335335
iter: slice::Iter<'a, u8>
336336
}
337337

338-
// Return the initial codepoint accumulator for the first byte.
339-
// The first byte is special, only want bottom 5 bits for width 2, 4 bits
340-
// for width 3, and 3 bits for width 4
341-
macro_rules! utf8_first_byte {
342-
($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
343-
}
338+
/// Return the initial codepoint accumulator for the first byte.
339+
/// The first byte is special, only want bottom 5 bits for width 2, 4 bits
340+
/// for width 3, and 3 bits for width 4.
341+
#[inline]
342+
fn utf8_first_byte(byte: u8, width: u32) -> u32 { (byte & (0x7F >> width)) as u32 }
344343

345-
// return the value of $ch updated with continuation byte $byte
346-
macro_rules! utf8_acc_cont_byte {
347-
($ch:expr, $byte:expr) => (($ch << 6) | ($byte & CONT_MASK) as u32)
348-
}
344+
/// Return the value of `ch` updated with continuation byte `byte`.
345+
#[inline]
346+
fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 { (ch << 6) | (byte & CONT_MASK) as u32 }
349347

350-
macro_rules! utf8_is_cont_byte {
351-
($byte:expr) => (($byte & !CONT_MASK) == TAG_CONT_U8)
352-
}
348+
/// Checks whether the byte is a UTF-8 continuation byte (i.e. starts with the
349+
/// bits `10`).
350+
#[inline]
351+
fn utf8_is_cont_byte(byte: u8) -> bool { (byte & !CONT_MASK) == TAG_CONT_U8 }
353352

354353
#[inline]
355354
fn unwrap_or_0(opt: Option<&u8>) -> u8 {
@@ -374,20 +373,20 @@ pub fn next_code_point(bytes: &mut slice::Iter<u8>) -> Option<u32> {
374373
// Multibyte case follows
375374
// Decode from a byte combination out of: [[[x y] z] w]
376375
// NOTE: Performance is sensitive to the exact formulation here
377-
let init = utf8_first_byte!(x, 2);
376+
let init = utf8_first_byte(x, 2);
378377
let y = unwrap_or_0(bytes.next());
379-
let mut ch = utf8_acc_cont_byte!(init, y);
378+
let mut ch = utf8_acc_cont_byte(init, y);
380379
if x >= 0xE0 {
381380
// [[x y z] w] case
382381
// 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
383382
let z = unwrap_or_0(bytes.next());
384-
let y_z = utf8_acc_cont_byte!((y & CONT_MASK) as u32, z);
383+
let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
385384
ch = init << 12 | y_z;
386385
if x >= 0xF0 {
387386
// [x y z w] case
388387
// use only the lower 3 bits of `init`
389388
let w = unwrap_or_0(bytes.next());
390-
ch = (init & 7) << 18 | utf8_acc_cont_byte!(y_z, w);
389+
ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
391390
}
392391
}
393392

@@ -410,18 +409,18 @@ pub fn next_code_point_reverse(bytes: &mut slice::Iter<u8>) -> Option<u32> {
410409
// Decode from a byte combination out of: [x [y [z w]]]
411410
let mut ch;
412411
let z = unwrap_or_0(bytes.next_back());
413-
ch = utf8_first_byte!(z, 2);
414-
if utf8_is_cont_byte!(z) {
412+
ch = utf8_first_byte(z, 2);
413+
if utf8_is_cont_byte(z) {
415414
let y = unwrap_or_0(bytes.next_back());
416-
ch = utf8_first_byte!(y, 3);
417-
if utf8_is_cont_byte!(y) {
415+
ch = utf8_first_byte(y, 3);
416+
if utf8_is_cont_byte(y) {
418417
let x = unwrap_or_0(bytes.next_back());
419-
ch = utf8_first_byte!(x, 4);
420-
ch = utf8_acc_cont_byte!(ch, y);
418+
ch = utf8_first_byte(x, 4);
419+
ch = utf8_acc_cont_byte(ch, y);
421420
}
422-
ch = utf8_acc_cont_byte!(ch, z);
421+
ch = utf8_acc_cont_byte(ch, z);
423422
}
424-
ch = utf8_acc_cont_byte!(ch, w);
423+
ch = utf8_acc_cont_byte(ch, w);
425424

426425
Some(ch)
427426
}
@@ -1040,7 +1039,7 @@ fn run_utf8_validation_iterator(iter: &mut slice::Iter<u8>)
10401039
// ASCII characters are always valid, so only large
10411040
// bytes need more examination.
10421041
if first >= 128 {
1043-
let w = UTF8_CHAR_WIDTH[first as usize] as usize;
1042+
let w = UTF8_CHAR_WIDTH[first as usize];
10441043
let second = next!();
10451044
// 2-byte encoding is for codepoints \u{0080} to \u{07ff}
10461045
// first C2 80 last DF BF
@@ -1594,14 +1593,14 @@ impl StrExt for str {
15941593
i -= 1;
15951594
}
15961595

1597-
let mut val = s.as_bytes()[i] as u32;
1598-
let w = UTF8_CHAR_WIDTH[val as usize] as usize;
1599-
assert!((w != 0));
1596+
let first= s.as_bytes()[i];
1597+
let w = UTF8_CHAR_WIDTH[first as usize];
1598+
assert!(w != 0);
16001599

1601-
val = utf8_first_byte!(val, w);
1602-
val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 1]);
1603-
if w > 2 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 2]); }
1604-
if w > 3 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 3]); }
1600+
let mut val = utf8_first_byte(first, w as u32);
1601+
val = utf8_acc_cont_byte(val, s.as_bytes()[i + 1]);
1602+
if w > 2 { val = utf8_acc_cont_byte(val, s.as_bytes()[i + 2]); }
1603+
if w > 3 { val = utf8_acc_cont_byte(val, s.as_bytes()[i + 3]); }
16051604

16061605
return CharRange {ch: unsafe { mem::transmute(val) }, next: i};
16071606
}
@@ -1686,16 +1685,16 @@ pub fn char_range_at_raw(bytes: &[u8], i: usize) -> (u32, usize) {
16861685

16871686
// Multibyte case is a fn to allow char_range_at to inline cleanly
16881687
fn multibyte_char_range_at(bytes: &[u8], i: usize) -> (u32, usize) {
1689-
let mut val = bytes[i] as u32;
1690-
let w = UTF8_CHAR_WIDTH[val as usize] as usize;
1691-
assert!((w != 0));
1688+
let first = bytes[i];
1689+
let w = UTF8_CHAR_WIDTH[first as usize];
1690+
assert!(w != 0);
16921691

1693-
val = utf8_first_byte!(val, w);
1694-
val = utf8_acc_cont_byte!(val, bytes[i + 1]);
1695-
if w > 2 { val = utf8_acc_cont_byte!(val, bytes[i + 2]); }
1696-
if w > 3 { val = utf8_acc_cont_byte!(val, bytes[i + 3]); }
1692+
let mut val = utf8_first_byte(first, w as u32);
1693+
val = utf8_acc_cont_byte(val, bytes[i + 1]);
1694+
if w > 2 { val = utf8_acc_cont_byte(val, bytes[i + 2]); }
1695+
if w > 3 { val = utf8_acc_cont_byte(val, bytes[i + 3]); }
16971696

1698-
return (val, i + w);
1697+
return (val, i + w as usize);
16991698
}
17001699

17011700
multibyte_char_range_at(bytes, i)

0 commit comments

Comments
 (0)