Skip to content

Commit 108b8b6

Browse files
SimonSapinalexcrichton
authored andcommitted
Deprecate the bytes!() macro.
Replace its usage with byte string literals, except in `bytes!()` tests. Also add a new snapshot, to be able to use the new b"foo" syntax. The src/etc/2014-06-rewrite-bytes-macros.py script automatically rewrites `bytes!()` invocations into byte string literals. Pass it filenames as arguments to generate a diff that you can inspect, or `--apply` followed by filenames to apply the changes in place. Diffs can be piped into `tip` or `pygmentize -l diff` for coloring.
1 parent abf7e93 commit 108b8b6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+498
-355
lines changed

src/compiletest/runtest.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1269,7 +1269,7 @@ fn make_out_name(config: &Config, testfile: &Path, extension: &str) -> Path {
12691269

12701270
fn aux_output_dir_name(config: &Config, testfile: &Path) -> Path {
12711271
let mut f = output_base_name(config, testfile);
1272-
match f.filename().map(|s| Vec::from_slice(s).append(bytes!(".libaux"))) {
1272+
match f.filename().map(|s| Vec::from_slice(s).append(b".libaux")) {
12731273
Some(v) => f.set_filename(v),
12741274
None => ()
12751275
}
@@ -1490,7 +1490,7 @@ fn append_suffix_to_stem(p: &Path, suffix: &str) -> Path {
14901490
(*p).clone()
14911491
} else {
14921492
let stem = p.filestem().unwrap();
1493-
p.with_filename(Vec::from_slice(stem).append(bytes!("-")).append(suffix.as_bytes()))
1493+
p.with_filename(Vec::from_slice(stem).append(b"-").append(suffix.as_bytes()))
14941494
}
14951495
}
14961496

src/doc/complement-cheatsheet.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ character.
7676
~~~
7777
use std::str;
7878
79-
let x = bytes!(72u8,"ello ",0xF0,0x90,0x80,"World!");
79+
let x = b"Hello \xF0\x90\x80World!";
8080
let y = str::from_utf8_lossy(x);
8181
~~~
8282

src/doc/rust.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,19 @@ the characters `U+0022` (double-quote) (except when followed by at least as
378378
many `U+0023` (`#`) characters as were used to start the raw string literal) or
379379
`U+005C` (`\`) do not have any special meaning.
380380

381+
Examples for byte string literals:
382+
383+
~~~~
384+
b"foo"; br"foo"; // foo
385+
b"\"foo\""; br#""foo""#; // "foo"
386+
387+
b"foo #\"# bar";
388+
br##"foo #"# bar"##; // foo #"# bar
389+
390+
b"\x52"; b"R"; br"R"; // R
391+
b"\\x52"; br"\x52"; // \x52
392+
~~~~
393+
381394
#### Number literals
382395

383396
~~~~ {.ebnf .gram}
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
#!/bin/env python
2+
#
3+
# Copyright 2014 The Rust Project Developers. See the COPYRIGHT
4+
# file at the top-level directory of this distribution and at
5+
# http://rust-lang.org/COPYRIGHT.
6+
#
7+
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
8+
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
9+
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
10+
# option. This file may not be copied, modified, or distributed
11+
# except according to those terms.
12+
13+
import sys
14+
import subprocess
15+
import re
16+
17+
18+
def main():
19+
if len(sys.argv) <= 1:
20+
print('Usage: %s [ --apply ] filename1.rs filename2.rs ...'
21+
% sys.argv[0])
22+
elif sys.argv[1] == '--apply':
23+
for filename in sys.argv[2:]:
24+
patch(filename)
25+
else:
26+
for filename in sys.argv[1:]:
27+
diff(filename)
28+
29+
30+
def patch(filename):
31+
source = read(filename)
32+
rewritten = rewrite_bytes_macros(source)
33+
if rewritten is not None and rewritten != source:
34+
write(filename, rewritten)
35+
36+
37+
def diff(filename):
38+
rewritten = rewrite_bytes_macros(read(filename))
39+
if rewritten is not None:
40+
p = subprocess.Popen(['diff', '-u', filename, '-'],
41+
stdin=subprocess.PIPE)
42+
p.stdin.write(rewritten)
43+
p.stdin.close()
44+
p.wait()
45+
46+
47+
def read(filename):
48+
with open(filename, 'rb') as f:
49+
return f.read()
50+
51+
52+
def write(filename, content):
53+
with open(filename, 'wb') as f:
54+
f.write(content)
55+
56+
57+
def rewrite_bytes_macros(source):
58+
rewritten, num_occurrences = BYTES_MACRO_RE.subn(rewrite_one_macro, source)
59+
if num_occurrences > 0:
60+
return rewritten
61+
62+
63+
BYTES_MACRO_RE = re.compile(br'bytes!\( (?P<args> [^)]* ) \)', re.VERBOSE)
64+
65+
66+
def rewrite_one_macro(match):
67+
try:
68+
bytes = parse_bytes(split_args(match.group('args')))
69+
return b'b"' + b''.join(map(escape, bytes)) + b'"'
70+
except SkipThisRewrite:
71+
print('Skipped: %s' % match.group(0).decode('utf8', 'replace'))
72+
return match.group(0)
73+
74+
75+
class SkipThisRewrite(Exception):
76+
pass
77+
78+
79+
def split_args(args):
80+
previous = b''
81+
for arg in args.split(b','):
82+
if previous:
83+
arg = previous + b',' + arg
84+
if arg.count(b'"') % 2 == 0:
85+
yield arg
86+
previous = b''
87+
else:
88+
previous = arg
89+
if previous:
90+
yield previous
91+
92+
93+
def parse_bytes(args):
94+
for arg in args:
95+
arg = arg.strip()
96+
if (arg.startswith(b'"') and arg.endswith(b'"')) or (
97+
arg.startswith(b"'") and arg.endswith(b"'")):
98+
# Escaped newline means something different in Rust and Python.
99+
if b'\\\n' in arg:
100+
raise SkipThisRewrite
101+
for byte in eval(b'u' + arg).encode('utf8'):
102+
yield ord(byte)
103+
else:
104+
if arg.endswith(b'u8'):
105+
arg = arg[:-2]
106+
# Assume that all Rust integer literals
107+
# are valid Python integer literals
108+
value = int(eval(arg))
109+
assert value <= 0xFF
110+
yield value
111+
112+
113+
def escape(byte):
114+
c = chr(byte)
115+
escaped = {
116+
b'\0': br'\0',
117+
b'\t': br'\t',
118+
b'\n': br'\n',
119+
b'\r': br'\r',
120+
b'\'': b'\\\'',
121+
b'\\': br'\\',
122+
}.get(c)
123+
if escaped is not None:
124+
return escaped
125+
elif b' ' <= c <= b'~':
126+
return chr(byte)
127+
else:
128+
return ('\\x%02X' % byte).encode('ascii')
129+
130+
131+
if str is not bytes:
132+
# Python 3.x
133+
ord = lambda x: x
134+
chr = lambda x: bytes([x])
135+
136+
137+
if __name__ == '__main__':
138+
main()

src/libcollections/slice.rs

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1957,30 +1957,30 @@ mod tests {
19571957

19581958
#[test]
19591959
fn test_starts_with() {
1960-
assert!(bytes!("foobar").starts_with(bytes!("foo")));
1961-
assert!(!bytes!("foobar").starts_with(bytes!("oob")));
1962-
assert!(!bytes!("foobar").starts_with(bytes!("bar")));
1963-
assert!(!bytes!("foo").starts_with(bytes!("foobar")));
1964-
assert!(!bytes!("bar").starts_with(bytes!("foobar")));
1965-
assert!(bytes!("foobar").starts_with(bytes!("foobar")));
1960+
assert!(b"foobar".starts_with(b"foo"));
1961+
assert!(!b"foobar".starts_with(b"oob"));
1962+
assert!(!b"foobar".starts_with(b"bar"));
1963+
assert!(!b"foo".starts_with(b"foobar"));
1964+
assert!(!b"bar".starts_with(b"foobar"));
1965+
assert!(b"foobar".starts_with(b"foobar"));
19661966
let empty: &[u8] = [];
19671967
assert!(empty.starts_with(empty));
1968-
assert!(!empty.starts_with(bytes!("foo")));
1969-
assert!(bytes!("foobar").starts_with(empty));
1968+
assert!(!empty.starts_with(b"foo"));
1969+
assert!(b"foobar".starts_with(empty));
19701970
}
19711971

19721972
#[test]
19731973
fn test_ends_with() {
1974-
assert!(bytes!("foobar").ends_with(bytes!("bar")));
1975-
assert!(!bytes!("foobar").ends_with(bytes!("oba")));
1976-
assert!(!bytes!("foobar").ends_with(bytes!("foo")));
1977-
assert!(!bytes!("foo").ends_with(bytes!("foobar")));
1978-
assert!(!bytes!("bar").ends_with(bytes!("foobar")));
1979-
assert!(bytes!("foobar").ends_with(bytes!("foobar")));
1974+
assert!(b"foobar".ends_with(b"bar"));
1975+
assert!(!b"foobar".ends_with(b"oba"));
1976+
assert!(!b"foobar".ends_with(b"foo"));
1977+
assert!(!b"foo".ends_with(b"foobar"));
1978+
assert!(!b"bar".ends_with(b"foobar"));
1979+
assert!(b"foobar".ends_with(b"foobar"));
19801980
let empty: &[u8] = [];
19811981
assert!(empty.ends_with(empty));
1982-
assert!(!empty.ends_with(bytes!("foo")));
1983-
assert!(bytes!("foobar").ends_with(empty));
1982+
assert!(!empty.ends_with(b"foo"));
1983+
assert!(b"foobar".ends_with(empty));
19841984
}
19851985

19861986
#[test]

src/libcollections/str.rs

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,7 @@ static TAG_CONT_U8: u8 = 128u8;
382382
/// # Example
383383
///
384384
/// ```rust
385-
/// let input = bytes!("Hello ", 0xF0, 0x90, 0x80, "World");
385+
/// let input = b"Hello \xF0\x90\x80World";
386386
/// let output = std::str::from_utf8_lossy(input);
387387
/// assert_eq!(output.as_slice(), "Hello \uFFFDWorld");
388388
/// ```
@@ -391,7 +391,7 @@ pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> MaybeOwned<'a> {
391391
return Slice(unsafe { mem::transmute(v) })
392392
}
393393

394-
static REPLACEMENT: &'static [u8] = bytes!(0xEF, 0xBF, 0xBD); // U+FFFD in UTF-8
394+
static REPLACEMENT: &'static [u8] = b"\xEF\xBF\xBD"; // U+FFFD in UTF-8
395395
let mut i = 0;
396396
let total = v.len();
397397
fn unsafe_get(xs: &[u8], i: uint) -> u8 {
@@ -994,7 +994,7 @@ mod tests {
994994
fn test_into_bytes() {
995995
let data = "asdf".to_string();
996996
let buf = data.into_bytes();
997-
assert_eq!(bytes!("asdf"), buf.as_slice());
997+
assert_eq!(b"asdf", buf.as_slice());
998998
}
999999

10001000
#[test]
@@ -2050,58 +2050,58 @@ mod tests {
20502050

20512051
#[test]
20522052
fn test_str_from_utf8() {
2053-
let xs = bytes!("hello");
2053+
let xs = b"hello";
20542054
assert_eq!(from_utf8(xs), Some("hello"));
20552055

2056-
let xs = bytes!("ศไทย中华Việt Nam");
2056+
let xs = "ศไทย中华Việt Nam".as_bytes();
20572057
assert_eq!(from_utf8(xs), Some("ศไทย中华Việt Nam"));
20582058

2059-
let xs = bytes!("hello", 0xff);
2059+
let xs = b"hello\xFF";
20602060
assert_eq!(from_utf8(xs), None);
20612061
}
20622062

20632063
#[test]
20642064
fn test_str_from_utf8_owned() {
2065-
let xs = Vec::from_slice(bytes!("hello"));
2065+
let xs = Vec::from_slice(b"hello");
20662066
assert_eq!(from_utf8_owned(xs), Ok("hello".to_string()));
20672067

2068-
let xs = Vec::from_slice(bytes!("ศไทย中华Việt Nam"));
2068+
let xs = Vec::from_slice("ศไทย中华Việt Nam".as_bytes());
20692069
assert_eq!(from_utf8_owned(xs), Ok("ศไทย中华Việt Nam".to_string()));
20702070

2071-
let xs = Vec::from_slice(bytes!("hello", 0xff));
2071+
let xs = Vec::from_slice(b"hello\xFF");
20722072
assert_eq!(from_utf8_owned(xs),
2073-
Err(Vec::from_slice(bytes!("hello", 0xff))));
2073+
Err(Vec::from_slice(b"hello\xFF")));
20742074
}
20752075

20762076
#[test]
20772077
fn test_str_from_utf8_lossy() {
2078-
let xs = bytes!("hello");
2078+
let xs = b"hello";
20792079
assert_eq!(from_utf8_lossy(xs), Slice("hello"));
20802080

2081-
let xs = bytes!("ศไทย中华Việt Nam");
2081+
let xs = "ศไทย中华Việt Nam".as_bytes();
20822082
assert_eq!(from_utf8_lossy(xs), Slice("ศไทย中华Việt Nam"));
20832083

2084-
let xs = bytes!("Hello", 0xC2, " There", 0xFF, " Goodbye");
2084+
let xs = b"Hello\xC2 There\xFF Goodbye";
20852085
assert_eq!(from_utf8_lossy(xs), Owned("Hello\uFFFD There\uFFFD Goodbye".to_string()));
20862086

2087-
let xs = bytes!("Hello", 0xC0, 0x80, " There", 0xE6, 0x83, " Goodbye");
2087+
let xs = b"Hello\xC0\x80 There\xE6\x83 Goodbye";
20882088
assert_eq!(from_utf8_lossy(xs), Owned("Hello\uFFFD\uFFFD There\uFFFD Goodbye".to_string()));
20892089

2090-
let xs = bytes!(0xF5, "foo", 0xF5, 0x80, "bar");
2090+
let xs = b"\xF5foo\xF5\x80bar";
20912091
assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFD\uFFFDbar".to_string()));
20922092

2093-
let xs = bytes!(0xF1, "foo", 0xF1, 0x80, "bar", 0xF1, 0x80, 0x80, "baz");
2093+
let xs = b"\xF1foo\xF1\x80bar\xF1\x80\x80baz";
20942094
assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFDbar\uFFFDbaz".to_string()));
20952095

2096-
let xs = bytes!(0xF4, "foo", 0xF4, 0x80, "bar", 0xF4, 0xBF, "baz");
2096+
let xs = b"\xF4foo\xF4\x80bar\xF4\xBFbaz";
20972097
assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFDbar\uFFFD\uFFFDbaz".to_string()));
20982098

2099-
let xs = bytes!(0xF0, 0x80, 0x80, 0x80, "foo", 0xF0, 0x90, 0x80, 0x80, "bar");
2099+
let xs = b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar";
21002100
assert_eq!(from_utf8_lossy(xs), Owned("\uFFFD\uFFFD\uFFFD\uFFFD\
21012101
foo\U00010000bar".to_string()));
21022102

21032103
// surrogates
2104-
let xs = bytes!(0xED, 0xA0, 0x80, "foo", 0xED, 0xBF, 0xBF, "bar");
2104+
let xs = b"\xED\xA0\x80foo\xED\xBF\xBFbar";
21052105
assert_eq!(from_utf8_lossy(xs), Owned("\uFFFD\uFFFD\uFFFDfoo\
21062106
\uFFFD\uFFFD\uFFFDbar".to_string()));
21072107
}
@@ -2298,8 +2298,8 @@ mod bench {
22982298
#[bench]
22992299
fn is_utf8_100_ascii(b: &mut Bencher) {
23002300

2301-
let s = bytes!("Hello there, the quick brown fox jumped over the lazy dog! \
2302-
Lorem ipsum dolor sit amet, consectetur. ");
2301+
let s = b"Hello there, the quick brown fox jumped over the lazy dog! \
2302+
Lorem ipsum dolor sit amet, consectetur. ";
23032303

23042304
assert_eq!(100, s.len());
23052305
b.iter(|| {
@@ -2309,7 +2309,7 @@ mod bench {
23092309

23102310
#[bench]
23112311
fn is_utf8_100_multibyte(b: &mut Bencher) {
2312-
let s = bytes!("𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰");
2312+
let s = "𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰".as_bytes();
23132313
assert_eq!(100, s.len());
23142314
b.iter(|| {
23152315
is_utf8(s)
@@ -2318,8 +2318,8 @@ mod bench {
23182318

23192319
#[bench]
23202320
fn from_utf8_lossy_100_ascii(b: &mut Bencher) {
2321-
let s = bytes!("Hello there, the quick brown fox jumped over the lazy dog! \
2322-
Lorem ipsum dolor sit amet, consectetur. ");
2321+
let s = b"Hello there, the quick brown fox jumped over the lazy dog! \
2322+
Lorem ipsum dolor sit amet, consectetur. ";
23232323

23242324
assert_eq!(100, s.len());
23252325
b.iter(|| {
@@ -2329,7 +2329,7 @@ mod bench {
23292329

23302330
#[bench]
23312331
fn from_utf8_lossy_100_multibyte(b: &mut Bencher) {
2332-
let s = bytes!("𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰");
2332+
let s = "𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰".as_bytes();
23332333
assert_eq!(100, s.len());
23342334
b.iter(|| {
23352335
let _ = from_utf8_lossy(s);
@@ -2338,7 +2338,7 @@ mod bench {
23382338

23392339
#[bench]
23402340
fn from_utf8_lossy_invalid(b: &mut Bencher) {
2341-
let s = bytes!("Hello", 0xC0, 0x80, " There", 0xE6, 0x83, " Goodbye");
2341+
let s = b"Hello\xC0\x80 There\xE6\x83 Goodbye";
23422342
b.iter(|| {
23432343
let _ = from_utf8_lossy(s);
23442344
});

0 commit comments

Comments
 (0)