From c3ce245ba68f62edfc5818f003b2b78a02ce5c03 Mon Sep 17 00:00:00 2001 From: "Felix S. Klock II" Date: Fri, 1 Aug 2014 17:11:53 +0200 Subject: [PATCH 1/2] quote_expr macro: embed Ident using special encoding that preserves hygiene. This adds support to `quote_expr!` and friends for round-trip hygienic preservation of Ident. Here are the pieces of the puzzle: * adding a method for encoding Ident for re-reading into token tree. * Support for reading such encoded Idents in the lexer. Note that one must peek ahead for MOD_SEP after scan_embedded_hygienic_ident. * To ensure that encoded Idents are only read when we are in the midst of expanding a `quote_expr` or similar, added a `read_embedded_ident` flag on `StringReader`. * pprust support for encoding Ident's as (uint,uint) pairs (for hygiene). --- src/libsyntax/ast.rs | 6 ++ src/libsyntax/ext/quote.rs | 99 ++++++++++++++++++++++++++++- src/libsyntax/parse/lexer/mod.rs | 105 +++++++++++++++++++++++++++++++ src/libsyntax/parse/mod.rs | 69 ++++++++++++++++++++ src/libsyntax/print/pprust.rs | 97 +++++++++++++++++++--------- 5 files changed, 345 insertions(+), 31 deletions(-) diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs index 629b21875c9b6..0ea57e41b0ffc 100644 --- a/src/libsyntax/ast.rs +++ b/src/libsyntax/ast.rs @@ -55,6 +55,12 @@ impl Ident { pub fn as_str<'a>(&'a self) -> &'a str { self.name.as_str() } + + pub fn encode_with_hygiene(&self) -> String { + format!("\x00name_{:u},ctxt_{:u}\x00", + self.name.uint(), + self.ctxt) + } } impl Show for Ident { diff --git a/src/libsyntax/ext/quote.rs b/src/libsyntax/ext/quote.rs index cc07b5312589f..d7d6c20b475a6 100644 --- a/src/libsyntax/ext/quote.rs +++ b/src/libsyntax/ext/quote.rs @@ -97,6 +97,15 @@ pub mod rt { fn to_source(&self) -> String; } + // FIXME (Issue #16472): This should go away after ToToken impls + // are revised to go directly to token-trees. + trait ToSourceWithHygiene : ToSource { + // Takes a thing and generates a string containing rust code + // for it, encoding Idents as special byte sequences to + // maintain hygiene across serialization and deserialization. + fn to_source_with_hygiene(&self) -> String; + } + macro_rules! impl_to_source( (Gc<$t:ty>, $pp:ident) => ( impl ToSource for Gc<$t> { @@ -104,6 +113,11 @@ pub mod rt { pprust::$pp(&**self) } } + impl ToSourceWithHygiene for Gc<$t> { + fn to_source_with_hygiene(&self) -> String { + pprust::with_hygiene::$pp(&**self) + } + } ); ($t:ty, $pp:ident) => ( impl ToSource for $t { @@ -111,6 +125,11 @@ pub mod rt { pprust::$pp(self) } } + impl ToSourceWithHygiene for $t { + fn to_source_with_hygiene(&self) -> String { + pprust::with_hygiene::$pp(self) + } + } ); ) @@ -122,6 +141,15 @@ pub mod rt { .to_string() } + fn slice_to_source_with_hygiene<'a, T: ToSourceWithHygiene>( + sep: &'static str, xs: &'a [T]) -> String { + xs.iter() + .map(|i| i.to_source_with_hygiene()) + .collect::>() + .connect(sep) + .to_string() + } + macro_rules! impl_to_source_slice( ($t:ty, $sep:expr) => ( impl<'a> ToSource for &'a [$t] { @@ -129,6 +157,12 @@ pub mod rt { slice_to_source($sep, *self) } } + + impl<'a> ToSourceWithHygiene for &'a [$t] { + fn to_source_with_hygiene(&self) -> String { + slice_to_source_with_hygiene($sep, *self) + } + } ) ) @@ -138,6 +172,12 @@ pub mod rt { } } + impl ToSourceWithHygiene for ast::Ident { + fn to_source_with_hygiene(&self) -> String { + self.encode_with_hygiene() + } + } + impl_to_source!(ast::Ty, ty_to_string) impl_to_source!(ast::Block, block_to_string) impl_to_source!(ast::Arg, arg_to_string) @@ -156,6 +196,11 @@ pub mod rt { pprust::attribute_to_string(&dummy_spanned(*self)) } } + impl ToSourceWithHygiene for ast::Attribute_ { + fn to_source_with_hygiene(&self) -> String { + self.to_source() + } + } impl<'a> ToSource for &'a str { fn to_source(&self) -> String { @@ -164,12 +209,22 @@ pub mod rt { pprust::lit_to_string(&lit) } } + impl<'a> ToSourceWithHygiene for &'a str { + fn to_source_with_hygiene(&self) -> String { + self.to_source() + } + } impl ToSource for () { fn to_source(&self) -> String { "()".to_string() } } + impl ToSourceWithHygiene for () { + fn to_source_with_hygiene(&self) -> String { + self.to_source() + } + } impl ToSource for bool { fn to_source(&self) -> String { @@ -177,6 +232,11 @@ pub mod rt { pprust::lit_to_string(&lit) } } + impl ToSourceWithHygiene for bool { + fn to_source_with_hygiene(&self) -> String { + self.to_source() + } + } impl ToSource for char { fn to_source(&self) -> String { @@ -184,6 +244,11 @@ pub mod rt { pprust::lit_to_string(&lit) } } + impl ToSourceWithHygiene for char { + fn to_source_with_hygiene(&self) -> String { + self.to_source() + } + } macro_rules! impl_to_source_int( (signed, $t:ty, $tag:ident) => ( @@ -194,6 +259,11 @@ pub mod rt { pprust::lit_to_string(&dummy_spanned(lit)) } } + impl ToSourceWithHygiene for $t { + fn to_source_with_hygiene(&self) -> String { + self.to_source() + } + } ); (unsigned, $t:ty, $tag:ident) => ( impl ToSource for $t { @@ -202,6 +272,11 @@ pub mod rt { pprust::lit_to_string(&dummy_spanned(lit)) } } + impl ToSourceWithHygiene for $t { + fn to_source_with_hygiene(&self) -> String { + self.to_source() + } + } ); ) @@ -223,7 +298,7 @@ pub mod rt { ($t:ty) => ( impl ToTokens for $t { fn to_tokens(&self, cx: &ExtCtxt) -> Vec { - cx.parse_tts(self.to_source()) + cx.parse_tts_with_hygiene(self.to_source_with_hygiene()) } } ) @@ -233,7 +308,7 @@ pub mod rt { ($t:ty) => ( impl<'a> ToTokens for $t { fn to_tokens(&self, cx: &ExtCtxt) -> Vec { - cx.parse_tts(self.to_source()) + cx.parse_tts_with_hygiene(self.to_source_with_hygiene()) } } ) @@ -272,7 +347,13 @@ pub mod rt { fn parse_item(&self, s: String) -> Gc; fn parse_expr(&self, s: String) -> Gc; fn parse_stmt(&self, s: String) -> Gc; - fn parse_tts(&self, s: String) -> Vec ; + fn parse_tts(&self, s: String) -> Vec; + } + + trait ExtParseUtilsWithHygiene { + // FIXME (Issue #16472): This should go away after ToToken impls + // are revised to go directly to token-trees. + fn parse_tts_with_hygiene(&self, s: String) -> Vec; } impl<'a> ExtParseUtils for ExtCtxt<'a> { @@ -315,6 +396,18 @@ pub mod rt { } } + impl<'a> ExtParseUtilsWithHygiene for ExtCtxt<'a> { + + fn parse_tts_with_hygiene(&self, s: String) -> Vec { + use parse::with_hygiene::parse_tts_from_source_str; + parse_tts_from_source_str("".to_string(), + s, + self.cfg(), + self.parse_sess()) + } + + } + } pub fn expand_quote_tokens(cx: &mut ExtCtxt, diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 625c03ec13db9..e5275af5ccabe 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -17,7 +17,9 @@ use parse::token; use parse::token::{str_to_ident}; use std::char; +use std::fmt; use std::mem::replace; +use std::num; use std::rc::Rc; use std::str; @@ -55,6 +57,11 @@ pub struct StringReader<'a> { /* cached: */ pub peek_tok: token::Token, pub peek_span: Span, + + // FIXME (Issue #16472): This field should go away after ToToken impls + // are revised to go directly to token-trees. + /// Is \x00,\x00 is interpreted as encoded ast::Ident? + read_embedded_ident: bool, } impl<'a> Reader for StringReader<'a> { @@ -106,6 +113,17 @@ impl<'a> Reader for TtReader<'a> { } } +// FIXME (Issue #16472): This function should go away after +// ToToken impls are revised to go directly to token-trees. +pub fn make_reader_with_embedded_idents<'b>(span_diagnostic: &'b SpanHandler, + filemap: Rc) + -> StringReader<'b> { + let mut sr = StringReader::new_raw(span_diagnostic, filemap); + sr.read_embedded_ident = true; + sr.advance_token(); + sr +} + impl<'a> StringReader<'a> { /// For comments.rs, which hackily pokes into pos and curr pub fn new_raw<'b>(span_diagnostic: &'b SpanHandler, @@ -120,6 +138,7 @@ impl<'a> StringReader<'a> { /* dummy values; not read */ peek_tok: token::EOF, peek_span: codemap::DUMMY_SP, + read_embedded_ident: false, }; sr.bump(); sr @@ -512,6 +531,81 @@ impl<'a> StringReader<'a> { }) } + // FIXME (Issue #16472): The scan_embedded_hygienic_ident function + // should go away after we revise the syntax::ext::quote::ToToken + // impls to go directly to token-trees instead of thing -> string + // -> token-trees. (The function is currently used to resolve + // Issues #15750 and #15962.) + // + // Since this function is only used for certain internal macros, + // and the functionality it provides is not exposed to end user + // programs, pnkfelix deliberately chose to write it in a way that + // favors rustc debugging effectiveness over runtime efficiency. + + /// Scan through input of form \x00name_NNNNNN,ctxt_CCCCCCC\x00 + /// where: `NNNNNN` is a string of characters forming an integer + /// (the name) and `CCCCCCC` is a string of characters forming an + /// integer (the ctxt), separate by a comma and delimited by a + /// `\x00` marker. + #[inline(never)] + fn scan_embedded_hygienic_ident(&mut self) -> ast::Ident { + fn bump_expecting_char<'a,D:fmt::Show>(r: &mut StringReader<'a>, + c: char, + described_c: D, + where: &str) { + match r.curr { + Some(r_c) if r_c == c => r.bump(), + Some(r_c) => fail!("expected {}, hit {}, {}", described_c, r_c, where), + None => fail!("expected {}, hit EOF, {}", described_c, where), + } + } + + let where = "while scanning embedded hygienic ident"; + + // skip over the leading `\x00` + bump_expecting_char(self, '\x00', "nul-byte", where); + + // skip over the "name_" + for c in "name_".chars() { + bump_expecting_char(self, c, c, where); + } + + let start_bpos = self.last_pos; + let base = 10; + + // find the integer representing the name + self.scan_digits(base); + let encoded_name : u32 = self.with_str_from(start_bpos, |s| { + num::from_str_radix(s, 10).unwrap_or_else(|| { + fail!("expected digits representing a name, got `{}`, {}, range [{},{}]", + s, where, start_bpos, self.last_pos); + }) + }); + + // skip over the `,` + bump_expecting_char(self, ',', "comma", where); + + // skip over the "ctxt_" + for c in "ctxt_".chars() { + bump_expecting_char(self, c, c, where); + } + + // find the integer representing the ctxt + let start_bpos = self.last_pos; + self.scan_digits(base); + let encoded_ctxt : ast::SyntaxContext = self.with_str_from(start_bpos, |s| { + num::from_str_radix(s, 10).unwrap_or_else(|| { + fail!("expected digits representing a ctxt, got `{}`, {}", s, where); + }) + }); + + // skip over the `\x00` + bump_expecting_char(self, '\x00', "nul-byte", where); + + ast::Ident { name: ast::Name(encoded_name), + ctxt: encoded_ctxt, } + } + /// Scan through any digits (base `radix`) or underscores, and return how /// many digits there were. fn scan_digits(&mut self, radix: uint) -> uint { @@ -839,6 +933,17 @@ impl<'a> StringReader<'a> { return self.scan_number(c.unwrap()); } + if self.read_embedded_ident { + match (c.unwrap(), self.nextch(), self.nextnextch()) { + ('\x00', Some('n'), Some('a')) => { + let ast_ident = self.scan_embedded_hygienic_ident(); + let is_mod_name = self.curr_is(':') && self.nextch_is(':'); + return token::IDENT(ast_ident, is_mod_name); + } + _ => {} + } + } + match c.expect("next_token_inner called at EOF") { // One-byte tokens. ';' => { self.bump(); return token::SEMI; } diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index 5b70ed609d98f..af1f296a6ca31 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -144,6 +144,8 @@ pub fn parse_stmt_from_source_str(name: String, maybe_aborted(p.parse_stmt(attrs),p) } +// Note: keep in sync with `with_hygiene::parse_tts_from_source_str` +// until #16472 is resolved. pub fn parse_tts_from_source_str(name: String, source: String, cfg: ast::CrateConfig, @@ -160,6 +162,8 @@ pub fn parse_tts_from_source_str(name: String, maybe_aborted(p.parse_all_token_trees(),p) } +// Note: keep in sync with `with_hygiene::new_parser_from_source_str` +// until #16472 is resolved. // Create a new parser from a source string pub fn new_parser_from_source_str<'a>(sess: &'a ParseSess, cfg: ast::CrateConfig, @@ -192,6 +196,8 @@ pub fn new_sub_parser_from_file<'a>(sess: &'a ParseSess, p } +// Note: keep this in sync with `with_hygiene::filemap_to_parser` until +// #16472 is resolved. /// Given a filemap and config, return a parser pub fn filemap_to_parser<'a>(sess: &'a ParseSess, filemap: Rc, @@ -248,6 +254,8 @@ pub fn string_to_filemap(sess: &ParseSess, source: String, path: String) sess.span_diagnostic.cm.new_filemap(path, source) } +// Note: keep this in sync with `with_hygiene::filemap_to_tts` (apart +// from the StringReader constructor), until #16472 is resolved. /// Given a filemap, produce a sequence of token-trees pub fn filemap_to_tts(sess: &ParseSess, filemap: Rc) -> Vec { @@ -267,6 +275,67 @@ pub fn tts_to_parser<'a>(sess: &'a ParseSess, Parser::new(sess, cfg, box trdr) } +// FIXME (Issue #16472): The `with_hygiene` mod should go away after +// ToToken impls are revised to go directly to token-trees. +pub mod with_hygiene { + use ast; + use codemap::FileMap; + use parse::parser::Parser; + use std::rc::Rc; + use super::ParseSess; + use super::{maybe_aborted, string_to_filemap, tts_to_parser}; + + // Note: keep this in sync with `super::parse_tts_from_source_str` until + // #16472 is resolved. + pub fn parse_tts_from_source_str(name: String, + source: String, + cfg: ast::CrateConfig, + sess: &ParseSess) -> Vec { + let mut p = new_parser_from_source_str( + sess, + cfg, + name, + source + ); + p.quote_depth += 1u; + // right now this is re-creating the token trees from ... token trees. + maybe_aborted(p.parse_all_token_trees(),p) + } + + // Note: keep this in sync with `super::new_parser_from_source_str` until + // #16472 is resolved. + // Create a new parser from a source string + fn new_parser_from_source_str<'a>(sess: &'a ParseSess, + cfg: ast::CrateConfig, + name: String, + source: String) -> Parser<'a> { + filemap_to_parser(sess, string_to_filemap(sess, source, name), cfg) + } + + // Note: keep this in sync with `super::filemap_to_parserr` until + // #16472 is resolved. + /// Given a filemap and config, return a parser + fn filemap_to_parser<'a>(sess: &'a ParseSess, + filemap: Rc, + cfg: ast::CrateConfig) -> Parser<'a> { + tts_to_parser(sess, filemap_to_tts(sess, filemap), cfg) + } + + // Note: keep this in sync with `super::filemap_to_tts` until + // #16472 is resolved. + /// Given a filemap, produce a sequence of token-trees + fn filemap_to_tts(sess: &ParseSess, filemap: Rc) + -> Vec { + // it appears to me that the cfg doesn't matter here... indeed, + // parsing tt's probably shouldn't require a parser at all. + use make_reader = super::lexer::make_reader_with_embedded_idents; + let cfg = Vec::new(); + let srdr = make_reader(&sess.span_diagnostic, filemap); + let mut p1 = Parser::new(sess, cfg, box srdr); + p1.parse_all_token_trees() + } +} + /// Abort if necessary pub fn maybe_aborted(result: T, mut p: Parser) -> T { p.abort_if_errors(); diff --git a/src/libsyntax/print/pprust.rs b/src/libsyntax/print/pprust.rs index 9d4b7343c8a15..9c1b97f94a825 100644 --- a/src/libsyntax/print/pprust.rs +++ b/src/libsyntax/print/pprust.rs @@ -58,7 +58,8 @@ pub struct State<'a> { literals: Option >, cur_cmnt_and_lit: CurrentCommentAndLiteral, boxes: Vec, - ann: &'a PpAnn + ann: &'a PpAnn, + encode_idents_with_hygiene: bool, } pub fn rust_printer(writer: Box) -> State<'static> { @@ -78,7 +79,8 @@ pub fn rust_printer_annotated<'a>(writer: Box, cur_lit: 0 }, boxes: Vec::new(), - ann: ann + ann: ann, + encode_idents_with_hygiene: false, } } @@ -148,7 +150,8 @@ impl<'a> State<'a> { cur_lit: 0 }, boxes: Vec::new(), - ann: ann + ann: ann, + encode_idents_with_hygiene: false, } } } @@ -169,70 +172,77 @@ pub fn to_string(f: |&mut State| -> IoResult<()>) -> String { } } +// FIXME (Issue #16472): the thing_to_string_impls macro should go away +// after we revise the syntax::ext::quote::ToToken impls to go directly +// to token-trees instea of thing -> string -> token-trees. + +macro_rules! thing_to_string_impls { + ($to_string:ident) => { + pub fn ty_to_string(ty: &ast::Ty) -> String { - to_string(|s| s.print_type(ty)) + $to_string(|s| s.print_type(ty)) } pub fn pat_to_string(pat: &ast::Pat) -> String { - to_string(|s| s.print_pat(pat)) + $to_string(|s| s.print_pat(pat)) } pub fn arm_to_string(arm: &ast::Arm) -> String { - to_string(|s| s.print_arm(arm)) + $to_string(|s| s.print_arm(arm)) } pub fn expr_to_string(e: &ast::Expr) -> String { - to_string(|s| s.print_expr(e)) + $to_string(|s| s.print_expr(e)) } pub fn lifetime_to_string(e: &ast::Lifetime) -> String { - to_string(|s| s.print_lifetime(e)) + $to_string(|s| s.print_lifetime(e)) } pub fn tt_to_string(tt: &ast::TokenTree) -> String { - to_string(|s| s.print_tt(tt)) + $to_string(|s| s.print_tt(tt)) } pub fn tts_to_string(tts: &[ast::TokenTree]) -> String { - to_string(|s| s.print_tts(tts)) + $to_string(|s| s.print_tts(tts)) } pub fn stmt_to_string(stmt: &ast::Stmt) -> String { - to_string(|s| s.print_stmt(stmt)) + $to_string(|s| s.print_stmt(stmt)) } pub fn item_to_string(i: &ast::Item) -> String { - to_string(|s| s.print_item(i)) + $to_string(|s| s.print_item(i)) } pub fn generics_to_string(generics: &ast::Generics) -> String { - to_string(|s| s.print_generics(generics)) + $to_string(|s| s.print_generics(generics)) } pub fn ty_method_to_string(p: &ast::TypeMethod) -> String { - to_string(|s| s.print_ty_method(p)) + $to_string(|s| s.print_ty_method(p)) } pub fn method_to_string(p: &ast::Method) -> String { - to_string(|s| s.print_method(p)) + $to_string(|s| s.print_method(p)) } pub fn fn_block_to_string(p: &ast::FnDecl) -> String { - to_string(|s| s.print_fn_block_args(p, false)) + $to_string(|s| s.print_fn_block_args(p, false)) } pub fn path_to_string(p: &ast::Path) -> String { - to_string(|s| s.print_path(p, false)) + $to_string(|s| s.print_path(p, false)) } pub fn ident_to_string(id: &ast::Ident) -> String { - to_string(|s| s.print_ident(*id)) + $to_string(|s| s.print_ident(*id)) } pub fn fun_to_string(decl: &ast::FnDecl, fn_style: ast::FnStyle, name: ast::Ident, opt_explicit_self: Option, generics: &ast::Generics) -> String { - to_string(|s| { + $to_string(|s| { try!(s.print_fn(decl, Some(fn_style), abi::Rust, name, generics, opt_explicit_self, ast::Inherited)); try!(s.end()); // Close the head box @@ -241,7 +251,7 @@ pub fn fun_to_string(decl: &ast::FnDecl, fn_style: ast::FnStyle, name: ast::Iden } pub fn block_to_string(blk: &ast::Block) -> String { - to_string(|s| { + $to_string(|s| { // containing cbox, will be closed by print-block at } try!(s.cbox(indent_unit)); // head-ibox, will be closed by print-block after { @@ -251,31 +261,57 @@ pub fn block_to_string(blk: &ast::Block) -> String { } pub fn meta_item_to_string(mi: &ast::MetaItem) -> String { - to_string(|s| s.print_meta_item(mi)) + $to_string(|s| s.print_meta_item(mi)) } pub fn attribute_to_string(attr: &ast::Attribute) -> String { - to_string(|s| s.print_attribute(attr)) + $to_string(|s| s.print_attribute(attr)) } pub fn lit_to_string(l: &ast::Lit) -> String { - to_string(|s| s.print_literal(l)) + $to_string(|s| s.print_literal(l)) } pub fn explicit_self_to_string(explicit_self: ast::ExplicitSelf_) -> String { - to_string(|s| s.print_explicit_self(explicit_self, ast::MutImmutable).map(|_| {})) + $to_string(|s| s.print_explicit_self(explicit_self, ast::MutImmutable).map(|_| {})) } pub fn variant_to_string(var: &ast::Variant) -> String { - to_string(|s| s.print_variant(var)) + $to_string(|s| s.print_variant(var)) } pub fn arg_to_string(arg: &ast::Arg) -> String { - to_string(|s| s.print_arg(arg)) + $to_string(|s| s.print_arg(arg)) } pub fn mac_to_string(arg: &ast::Mac) -> String { - to_string(|s| s.print_mac(arg)) + $to_string(|s| s.print_mac(arg)) +} + +} } + +thing_to_string_impls!(to_string) + +// FIXME (Issue #16472): the whole `with_hygiene` mod should go away +// after we revise the syntax::ext::quote::ToToken impls to go directly +// to token-trees instea of thing -> string -> token-trees. + +pub mod with_hygiene { + use abi; + use ast; + use std::io::IoResult; + use super::indent_unit; + + // This function is the trick that all the rest of the routines + // hang on. + pub fn to_string_hyg(f: |&mut super::State| -> IoResult<()>) -> String { + super::to_string(|s| { + s.encode_idents_with_hygiene = true; + f(s) + }) + } + + thing_to_string_impls!(to_string_hyg) } pub fn visibility_qualified(vis: ast::Visibility, s: &str) -> String { @@ -1672,7 +1708,12 @@ impl<'a> State<'a> { } pub fn print_ident(&mut self, ident: ast::Ident) -> IoResult<()> { - word(&mut self.s, token::get_ident(ident).get()) + if self.encode_idents_with_hygiene { + let encoded = ident.encode_with_hygiene(); + word(&mut self.s, encoded.as_slice()) + } else { + word(&mut self.s, token::get_ident(ident).get()) + } } pub fn print_name(&mut self, name: ast::Name) -> IoResult<()> { From 9434920b24405588d22c17436b184736881ed933 Mon Sep 17 00:00:00 2001 From: "Felix S. Klock II" Date: Wed, 13 Aug 2014 13:15:31 +0200 Subject: [PATCH 2/2] rustc lexer: regression tests for embedded Idents. I chose to make two of them because I wanted something close to an "end-to-end" test (*), but at the same time I wanted a test that would run on Windows (**). (*) The run-make test serves as the end-to-end: It constructs an input that is trying to subvert the hack and we are going to check that it fails in the attempt). (**) The compile-fail-fulldeps test serves as a more narrow test that will be tested on all platforms. It also attempts to subvert the hack, testing that when you use `new_parser_from_tts`, the resulting parser does not support reading embedded Idents. --- src/test/auxiliary/macro_crate_test.rs | 26 +++++++++++ .../macro-crate-cannot-read-embedded-ident.rs | 31 +++++++++++++ .../cannot-read-embedded-idents/Makefile | 28 ++++++++++++ .../create_and_compile.rs | 44 +++++++++++++++++++ 4 files changed, 129 insertions(+) create mode 100644 src/test/compile-fail-fulldeps/macro-crate-cannot-read-embedded-ident.rs create mode 100644 src/test/run-make/cannot-read-embedded-idents/Makefile create mode 100644 src/test/run-make/cannot-read-embedded-idents/create_and_compile.rs diff --git a/src/test/auxiliary/macro_crate_test.rs b/src/test/auxiliary/macro_crate_test.rs index 4936bff5768c9..f6e96cca5c19c 100644 --- a/src/test/auxiliary/macro_crate_test.rs +++ b/src/test/auxiliary/macro_crate_test.rs @@ -31,6 +31,7 @@ macro_rules! unexported_macro (() => (3i)) #[plugin_registrar] pub fn plugin_registrar(reg: &mut Registry) { reg.register_macro("make_a_1", expand_make_a_1); + reg.register_macro("forged_ident", expand_forged_ident); reg.register_syntax_extension( token::intern("into_foo"), ItemModifier(expand_into_foo)); @@ -52,4 +53,29 @@ fn expand_into_foo(cx: &mut ExtCtxt, sp: Span, attr: Gc, it: Gc) } } +fn expand_forged_ident(cx: &mut ExtCtxt, sp: Span, tts: &[TokenTree]) -> Box { + use syntax::ext::quote::rt::*; + + if !tts.is_empty() { + cx.span_fatal(sp, "forged_ident takes no arguments"); + } + + // Most of this is modelled after the expansion of the `quote_expr!` + // macro ... + let parse_sess = cx.parse_sess(); + let cfg = cx.cfg(); + + // ... except this is where we inject a forged identifier, + // and deliberately do not call `cx.parse_tts_with_hygiene` + // (because we are testing that this will be *rejected* + // by the default parser). + + let expr = { + let tt = cx.parse_tts("\x00name_2,ctxt_0\x00".to_string()); + let mut parser = new_parser_from_tts(parse_sess, cfg, tt); + parser.parse_expr() + }; + MacExpr::new(expr) +} + pub fn foo() {} diff --git a/src/test/compile-fail-fulldeps/macro-crate-cannot-read-embedded-ident.rs b/src/test/compile-fail-fulldeps/macro-crate-cannot-read-embedded-ident.rs new file mode 100644 index 0000000000000..268b6e6aa0fa6 --- /dev/null +++ b/src/test/compile-fail-fulldeps/macro-crate-cannot-read-embedded-ident.rs @@ -0,0 +1,31 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// aux-build:macro_crate_test.rs +// ignore-stage1 +// ignore-android +// error-pattern: unknown start of token: \x00 + +// Issue #15750 and #15962 : this test is checking that the standard +// parser rejects embedded idents. pnkfelix did not want to attempt +// to make a test file that itself used the embedded ident input form, +// since he worrid that would be difficult to work with in many text +// editors, so instead he made a macro that expands into the embedded +// ident form. + +#![feature(phase)] + +#[phase(plugin)] +extern crate macro_crate_test; + +fn main() { + let x = 0; + assert_eq!(3, forged_ident!()); +} diff --git a/src/test/run-make/cannot-read-embedded-idents/Makefile b/src/test/run-make/cannot-read-embedded-idents/Makefile new file mode 100644 index 0000000000000..0d047be02ca1d --- /dev/null +++ b/src/test/run-make/cannot-read-embedded-idents/Makefile @@ -0,0 +1,28 @@ +-include ../tools.mk + +# Issue #15750, #15962 : This test ensures that our special embedded +# ident syntax hack is not treated as legitimate input by the lexer in +# normal mode. +# +# It is modelled after the `unicode-input/` test, since we need to +# create files with syntax that can trip up normal text editting tools +# (namely text with embedded nul-bytes). + +# This test attempts to run rustc itself from the compiled binary; but +# that means that you need to set the LD_LIBRARY_PATH for rustc itself +# while running create_and_compile, and that won't work for stage1. + +# FIXME ignore windows +ifndef IS_WINDOWS +ifeq ($(RUST_BUILD_STAGE),1) +DOTEST= +else +DOTEST=dotest +endif +endif + +all: $(DOTEST) + +dotest: + $(RUSTC) create_and_compile.rs + $(call RUN,create_and_compile) "$(RUSTC)" "$(TMPDIR)" diff --git a/src/test/run-make/cannot-read-embedded-idents/create_and_compile.rs b/src/test/run-make/cannot-read-embedded-idents/create_and_compile.rs new file mode 100644 index 0000000000000..2d8fa352e2713 --- /dev/null +++ b/src/test/run-make/cannot-read-embedded-idents/create_and_compile.rs @@ -0,0 +1,44 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use std::os; +use std::io::{File, Command}; + +// creates broken.rs, which has the Ident \x00name_0,ctxt_0\x00 +// embedded within it, and then attempts to compile broken.rs with the +// provided `rustc` + +fn main() { + let args = os::args(); + let rustc = args[1].as_slice(); + let tmpdir = Path::new(args[2].as_slice()); + + let main_file = tmpdir.join("broken.rs"); + let _ = File::create(&main_file).unwrap() + .write_str("pub fn main() { + let \x00name_0,ctxt_0\x00 = 3i; + println!(\"{}\", \x00name_0,ctxt_0\x00); + }"); + + // rustc is passed to us with --out-dir and -L etc., so we + // can't exec it directly + let result = Command::new("sh") + .arg("-c") + .arg(format!("{} {}", + rustc, + main_file.as_str() + .unwrap()).as_slice()) + .output().unwrap(); + let err = String::from_utf8_lossy(result.error.as_slice()); + + // positive test so that this test will be updated when the + // compiler changes. + assert!(err.as_slice().contains("unknown start of token")) +}