diff --git a/compiler/rustc_expand/src/lib.rs b/compiler/rustc_expand/src/lib.rs index 7f631cb71afa8..5436b1ef737f5 100644 --- a/compiler/rustc_expand/src/lib.rs +++ b/compiler/rustc_expand/src/lib.rs @@ -39,11 +39,6 @@ mod tests; mod parse { #[cfg(test)] mod tests; - #[cfg(test)] - mod lexer { - #[cfg(test)] - mod tests; - } } #[cfg(test)] mod tokenstream { diff --git a/compiler/rustc_expand/src/parse/lexer/tests.rs b/compiler/rustc_expand/src/parse/lexer/tests.rs deleted file mode 100644 index 871844442839c..0000000000000 --- a/compiler/rustc_expand/src/parse/lexer/tests.rs +++ /dev/null @@ -1,252 +0,0 @@ -use rustc_ast::ast::AttrStyle; -use rustc_ast::token::{self, CommentKind, Token, TokenKind}; -use rustc_data_structures::sync::Lrc; -use rustc_errors::{emitter::EmitterWriter, Handler}; -use rustc_parse::lexer::StringReader; -use rustc_session::parse::ParseSess; -use rustc_span::source_map::{FilePathMapping, SourceMap}; -use rustc_span::symbol::Symbol; -use rustc_span::with_default_session_globals; -use rustc_span::{BytePos, Span}; - -use std::io; -use std::path::PathBuf; - -fn mk_sess(sm: Lrc) -> ParseSess { - let emitter = EmitterWriter::new( - Box::new(io::sink()), - Some(sm.clone()), - false, - false, - false, - None, - false, - ); - ParseSess::with_span_handler(Handler::with_emitter(true, None, Box::new(emitter)), sm) -} - -// Creates a string reader for the given string. -fn setup<'a>(sm: &SourceMap, sess: &'a ParseSess, teststr: String) -> StringReader<'a> { - let sf = sm.new_source_file(PathBuf::from(teststr.clone()).into(), teststr); - StringReader::new(sess, sf, None) -} - -#[test] -fn t1() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - let mut string_reader = setup( - &sm, - &sh, - "/* my source file */ fn main() { println!(\"zebra\"); }\n".to_string(), - ); - assert_eq!(string_reader.next_token(), token::Comment); - assert_eq!(string_reader.next_token(), token::Whitespace); - let tok1 = string_reader.next_token(); - let tok2 = Token::new(mk_ident("fn"), Span::with_root_ctxt(BytePos(21), BytePos(23))); - assert_eq!(tok1.kind, tok2.kind); - assert_eq!(tok1.span, tok2.span); - assert_eq!(string_reader.next_token(), token::Whitespace); - // Read another token. - let tok3 = string_reader.next_token(); - assert_eq!(string_reader.pos(), BytePos(28)); - let tok4 = Token::new(mk_ident("main"), Span::with_root_ctxt(BytePos(24), BytePos(28))); - assert_eq!(tok3.kind, tok4.kind); - assert_eq!(tok3.span, tok4.span); - - assert_eq!(string_reader.next_token(), token::OpenDelim(token::Paren)); - assert_eq!(string_reader.pos(), BytePos(29)) - }) -} - -// Checks that the given reader produces the desired stream -// of tokens (stop checking after exhausting `expected`). -fn check_tokenization(mut string_reader: StringReader<'_>, expected: Vec) { - for expected_tok in &expected { - assert_eq!(&string_reader.next_token(), expected_tok); - } -} - -// Makes the identifier by looking up the string in the interner. -fn mk_ident(id: &str) -> TokenKind { - token::Ident(Symbol::intern(id), false) -} - -fn mk_lit(kind: token::LitKind, symbol: &str, suffix: Option<&str>) -> TokenKind { - TokenKind::lit(kind, Symbol::intern(symbol), suffix.map(Symbol::intern)) -} - -#[test] -fn doublecolon_parsing() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - check_tokenization( - setup(&sm, &sh, "a b".to_string()), - vec![mk_ident("a"), token::Whitespace, mk_ident("b")], - ); - }) -} - -#[test] -fn doublecolon_parsing_2() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - check_tokenization( - setup(&sm, &sh, "a::b".to_string()), - vec![mk_ident("a"), token::Colon, token::Colon, mk_ident("b")], - ); - }) -} - -#[test] -fn doublecolon_parsing_3() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - check_tokenization( - setup(&sm, &sh, "a ::b".to_string()), - vec![mk_ident("a"), token::Whitespace, token::Colon, token::Colon, mk_ident("b")], - ); - }) -} - -#[test] -fn doublecolon_parsing_4() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - check_tokenization( - setup(&sm, &sh, "a:: b".to_string()), - vec![mk_ident("a"), token::Colon, token::Colon, token::Whitespace, mk_ident("b")], - ); - }) -} - -#[test] -fn character_a() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - assert_eq!(setup(&sm, &sh, "'a'".to_string()).next_token(), mk_lit(token::Char, "a", None),); - }) -} - -#[test] -fn character_space() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - assert_eq!(setup(&sm, &sh, "' '".to_string()).next_token(), mk_lit(token::Char, " ", None),); - }) -} - -#[test] -fn character_escaped() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - assert_eq!( - setup(&sm, &sh, "'\\n'".to_string()).next_token(), - mk_lit(token::Char, "\\n", None), - ); - }) -} - -#[test] -fn lifetime_name() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - assert_eq!( - setup(&sm, &sh, "'abc".to_string()).next_token(), - token::Lifetime(Symbol::intern("'abc")), - ); - }) -} - -#[test] -fn raw_string() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - assert_eq!( - setup(&sm, &sh, "r###\"\"#a\\b\x00c\"\"###".to_string()).next_token(), - mk_lit(token::StrRaw(3), "\"#a\\b\x00c\"", None), - ); - }) -} - -#[test] -fn literal_suffixes() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - macro_rules! test { - ($input: expr, $tok_type: ident, $tok_contents: expr) => {{ - assert_eq!( - setup(&sm, &sh, format!("{}suffix", $input)).next_token(), - mk_lit(token::$tok_type, $tok_contents, Some("suffix")), - ); - // with a whitespace separator - assert_eq!( - setup(&sm, &sh, format!("{} suffix", $input)).next_token(), - mk_lit(token::$tok_type, $tok_contents, None), - ); - }}; - } - - test!("'a'", Char, "a"); - test!("b'a'", Byte, "a"); - test!("\"a\"", Str, "a"); - test!("b\"a\"", ByteStr, "a"); - test!("1234", Integer, "1234"); - test!("0b101", Integer, "0b101"); - test!("0xABC", Integer, "0xABC"); - test!("1.0", Float, "1.0"); - test!("1.0e10", Float, "1.0e10"); - - assert_eq!( - setup(&sm, &sh, "2us".to_string()).next_token(), - mk_lit(token::Integer, "2", Some("us")), - ); - assert_eq!( - setup(&sm, &sh, "r###\"raw\"###suffix".to_string()).next_token(), - mk_lit(token::StrRaw(3), "raw", Some("suffix")), - ); - assert_eq!( - setup(&sm, &sh, "br###\"raw\"###suffix".to_string()).next_token(), - mk_lit(token::ByteStrRaw(3), "raw", Some("suffix")), - ); - }) -} - -#[test] -fn nested_block_comments() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - let mut lexer = setup(&sm, &sh, "/* /* */ */'a'".to_string()); - assert_eq!(lexer.next_token(), token::Comment); - assert_eq!(lexer.next_token(), mk_lit(token::Char, "a", None)); - }) -} - -#[test] -fn crlf_comments() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - let mut lexer = setup(&sm, &sh, "// test\r\n/// test\r\n".to_string()); - let comment = lexer.next_token(); - assert_eq!(comment.kind, token::Comment); - assert_eq!((comment.span.lo(), comment.span.hi()), (BytePos(0), BytePos(7))); - assert_eq!(lexer.next_token(), token::Whitespace); - assert_eq!( - lexer.next_token(), - token::DocComment(CommentKind::Line, AttrStyle::Outer, Symbol::intern(" test")) - ); - }) -} diff --git a/compiler/rustc_lexer/src/tests.rs b/compiler/rustc_lexer/src/tests.rs index a1ea5ceb1f612..94017b7b286e2 100644 --- a/compiler/rustc_lexer/src/tests.rs +++ b/compiler/rustc_lexer/src/tests.rs @@ -128,6 +128,34 @@ fn check_lexing(src: &str, expect: Expect) { expect.assert_eq(&actual) } +#[test] +fn smoke_test() { + check_lexing( + "/* my source file */ fn main() { println!(\"zebra\"); }\n", + expect![[r#" + Token { kind: BlockComment { doc_style: None, terminated: true }, len: 20 } + Token { kind: Whitespace, len: 1 } + Token { kind: Ident, len: 2 } + Token { kind: Whitespace, len: 1 } + Token { kind: Ident, len: 4 } + Token { kind: OpenParen, len: 1 } + Token { kind: CloseParen, len: 1 } + Token { kind: Whitespace, len: 1 } + Token { kind: OpenBrace, len: 1 } + Token { kind: Whitespace, len: 1 } + Token { kind: Ident, len: 7 } + Token { kind: Bang, len: 1 } + Token { kind: OpenParen, len: 1 } + Token { kind: Literal { kind: Str { terminated: true }, suffix_start: 7 }, len: 7 } + Token { kind: CloseParen, len: 1 } + Token { kind: Semi, len: 1 } + Token { kind: Whitespace, len: 1 } + Token { kind: CloseBrace, len: 1 } + Token { kind: Whitespace, len: 1 } + "#]], + ) +} + #[test] fn comment_flavors() { check_lexing( @@ -143,25 +171,117 @@ fn comment_flavors() { /*! inner doc block */ ", expect![[r#" - Token { kind: Whitespace, len: 1 } - Token { kind: LineComment { doc_style: None }, len: 7 } - Token { kind: Whitespace, len: 1 } - Token { kind: LineComment { doc_style: None }, len: 17 } - Token { kind: Whitespace, len: 1 } - Token { kind: LineComment { doc_style: Some(Outer) }, len: 18 } - Token { kind: Whitespace, len: 1 } - Token { kind: LineComment { doc_style: Some(Inner) }, len: 18 } - Token { kind: Whitespace, len: 1 } - Token { kind: BlockComment { doc_style: None, terminated: true }, len: 11 } - Token { kind: Whitespace, len: 1 } - Token { kind: BlockComment { doc_style: None, terminated: true }, len: 4 } - Token { kind: Whitespace, len: 1 } - Token { kind: BlockComment { doc_style: None, terminated: true }, len: 18 } - Token { kind: Whitespace, len: 1 } - Token { kind: BlockComment { doc_style: Some(Outer), terminated: true }, len: 22 } - Token { kind: Whitespace, len: 1 } - Token { kind: BlockComment { doc_style: Some(Inner), terminated: true }, len: 22 } - Token { kind: Whitespace, len: 1 } - "#]], + Token { kind: Whitespace, len: 1 } + Token { kind: LineComment { doc_style: None }, len: 7 } + Token { kind: Whitespace, len: 1 } + Token { kind: LineComment { doc_style: None }, len: 17 } + Token { kind: Whitespace, len: 1 } + Token { kind: LineComment { doc_style: Some(Outer) }, len: 18 } + Token { kind: Whitespace, len: 1 } + Token { kind: LineComment { doc_style: Some(Inner) }, len: 18 } + Token { kind: Whitespace, len: 1 } + Token { kind: BlockComment { doc_style: None, terminated: true }, len: 11 } + Token { kind: Whitespace, len: 1 } + Token { kind: BlockComment { doc_style: None, terminated: true }, len: 4 } + Token { kind: Whitespace, len: 1 } + Token { kind: BlockComment { doc_style: None, terminated: true }, len: 18 } + Token { kind: Whitespace, len: 1 } + Token { kind: BlockComment { doc_style: Some(Outer), terminated: true }, len: 22 } + Token { kind: Whitespace, len: 1 } + Token { kind: BlockComment { doc_style: Some(Inner), terminated: true }, len: 22 } + Token { kind: Whitespace, len: 1 } + "#]], + ) +} + +#[test] +fn nested_block_comments() { + check_lexing( + "/* /* */ */'a'", + expect![[r#" + Token { kind: BlockComment { doc_style: None, terminated: true }, len: 11 } + Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 } + "#]], + ) +} + +#[test] +fn characters() { + check_lexing( + "'a' ' ' '\\n'", + expect![[r#" + Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 4 }, len: 4 } + "#]], + ); +} + +#[test] +fn lifetime() { + check_lexing( + "'abc", + expect![[r#" + Token { kind: Lifetime { starts_with_number: false }, len: 4 } + "#]], + ); +} + +#[test] +fn raw_string() { + check_lexing( + "r###\"\"#a\\b\x00c\"\"###", + expect![[r#" + Token { kind: Literal { kind: RawStr { n_hashes: 3, err: None }, suffix_start: 17 }, len: 17 } + "#]], + ) +} + +#[test] +fn literal_suffixes() { + check_lexing( + r####" +'a' +b'a' +"a" +b"a" +1234 +0b101 +0xABC +1.0 +1.0e10 +2us +r###"raw"###suffix +br###"raw"###suffix +"####, + expect![[r#" + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Byte { terminated: true }, suffix_start: 4 }, len: 4 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Str { terminated: true }, suffix_start: 3 }, len: 3 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: ByteStr { terminated: true }, suffix_start: 4 }, len: 4 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Int { base: Decimal, empty_int: false }, suffix_start: 4 }, len: 4 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Int { base: Binary, empty_int: false }, suffix_start: 5 }, len: 5 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Int { base: Hexadecimal, empty_int: false }, suffix_start: 5 }, len: 5 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Float { base: Decimal, empty_exponent: false }, suffix_start: 3 }, len: 3 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Float { base: Decimal, empty_exponent: false }, suffix_start: 6 }, len: 6 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Int { base: Decimal, empty_int: false }, suffix_start: 1 }, len: 3 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: RawStr { n_hashes: 3, err: None }, suffix_start: 12 }, len: 18 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: RawByteStr { n_hashes: 3, err: None }, suffix_start: 13 }, len: 19 } + Token { kind: Whitespace, len: 1 } + "#]], ) } diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index a65d344681924..c4ef35bc30c70 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -439,10 +439,6 @@ impl<'a> StringReader<'a> { (lit_kind, id) } - pub fn pos(&self) -> BytePos { - self.pos - } - #[inline] fn src_index(&self, pos: BytePos) -> usize { (pos - self.start_pos).to_usize()