diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 6ce0b66ef6a45..34b9968cebc66 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -125,13 +125,9 @@ symbols! { AcqRel, Acquire, AddToDiagnostic, - Alignment, Any, Arc, - Argument, - ArgumentV1, ArgumentV1Methods, - Arguments, AsMut, AsRef, AssertParamIsClone, @@ -160,14 +156,12 @@ symbols! { Break, C, CStr, - CString, Capture, Center, Clone, Context, Continue, Copy, - Count, Cow, Debug, Decodable, @@ -188,15 +182,11 @@ symbols! { Error, File, FileType, - FormatSpec, Formatter, From, FromIterator, FromResidual, - Future, FutureOutput, - FxHashMap, - FxHashSet, GlobalAlloc, Hash, HashMap, @@ -223,7 +213,6 @@ symbols! { LocalKey, Mutex, MutexGuard, - N, NonZeroI128, NonZeroI16, NonZeroI32, @@ -248,7 +237,6 @@ symbols! { Path, PathBuf, Pending, - Pin, Pointer, Poll, ProcMacro, @@ -284,7 +272,6 @@ symbols! { StructuralPartialEq, SubdiagnosticMessage, Sync, - T, Target, ToOwned, ToString, @@ -298,7 +285,6 @@ symbols! { TyCtxt, TyKind, Unknown, - UnsafeArg, Vec, VecDeque, Wrapper, @@ -352,7 +338,6 @@ symbols! { allow_fail, allow_internal_unsafe, allow_internal_unstable, - allowed, alu32, always, and, @@ -368,7 +353,6 @@ symbols! { arm, arm_target_feature, array, - arrays, as_ptr, as_ref, as_str, @@ -396,7 +380,6 @@ symbols! { async_await, async_closure, async_fn_in_trait, - atomic, atomic_mod, atomics, att_syntax, @@ -439,7 +422,6 @@ symbols! { breakpoint, bridge, bswap, - c_str, c_unwind, c_variadic, call, @@ -540,7 +522,6 @@ symbols! { const_try, constant, constructor, - context, copy, copy_closures, copy_nonoverlapping, @@ -583,9 +564,7 @@ symbols! { debug_assert_macro, debug_assert_ne_macro, debug_assertions, - debug_struct, debug_struct_fields_finish, - debug_tuple, debug_tuple_fields_finish, debugger_visualizer, decl_macro, @@ -610,7 +589,6 @@ symbols! { derive_default_enum, destruct, destructuring_assignment, - diagnostic, direct, discriminant_kind, discriminant_type, @@ -701,15 +679,11 @@ symbols! { fence, ferris: "🦀", fetch_update, - ffi, ffi_const, ffi_pure, ffi_returns_twice, - field, field_init_shorthand, file, - fill, - flags, float, float_to_int_unchecked, floorf32, @@ -727,7 +701,6 @@ symbols! { fn_ptr_trait, forbid, forget, - format, format_alignment, format_args, format_args_capture, @@ -754,7 +727,6 @@ symbols! { from_yeet, fsub_fast, fundamental, - future, future_trait, gdb_script_file, ge, @@ -947,7 +919,6 @@ symbols! { minnumf64, mips_target_feature, miri, - misc, mmx_reg, modifiers, module, @@ -1041,7 +1012,6 @@ symbols! { on_unimplemented, oom, opaque, - ops, opt_out_copy, optimize, optimize_attribute, @@ -1105,7 +1075,6 @@ symbols! { powif64, pre_dash_lto: "pre-lto", precise_pointer_size_matching, - precision, pref_align_of, prefetch_read_data, prefetch_read_instruction, @@ -1115,7 +1084,6 @@ symbols! { prelude, prelude_import, preserves_flags, - primitive, print_macro, println_macro, proc_dash_macro: "proc-macro", @@ -1450,7 +1418,6 @@ symbols! { structural_match, structural_peq, structural_teq, - sty, sub, sub_assign, sub_with_overflow, @@ -1589,7 +1556,6 @@ symbols! { used_with_arg, using, usize, - v1, va_arg, va_copy, va_end, @@ -1606,7 +1572,6 @@ symbols! { vfp2, vis, visible_private_types, - volatile, volatile_copy_memory, volatile_copy_nonoverlapping_memory, volatile_load, @@ -2006,7 +1971,7 @@ pub mod kw { // This module has a very short name because it's used a lot. /// This module contains all the defined non-keyword `Symbol`s. /// -/// Given that `sym` is imported, use them like `sym::symbol_name`. +/// Given that `sym` is imported, use them like `sym::`. /// For example `sym::rustfmt` or `sym::u8`. pub mod sym { use super::Symbol; diff --git a/src/tools/tidy/src/lib.rs b/src/tools/tidy/src/lib.rs index 35000320d1abf..81826cd8646cd 100644 --- a/src/tools/tidy/src/lib.rs +++ b/src/tools/tidy/src/lib.rs @@ -64,6 +64,7 @@ pub mod pal; pub mod primitive_docs; pub mod rustdoc_gui_tests; pub mod style; +pub mod symbols; pub mod target_specific_tests; pub mod tests_placement; pub mod ui_tests; diff --git a/src/tools/tidy/src/main.rs b/src/tools/tidy/src/main.rs index f59406c404bab..c05856794eee6 100644 --- a/src/tools/tidy/src/main.rs +++ b/src/tools/tidy/src/main.rs @@ -11,7 +11,6 @@ use std::env; use std::num::NonZeroUsize; use std::path::PathBuf; use std::process; -use std::str::FromStr; use std::sync::atomic::{AtomicBool, Ordering}; use std::thread::{self, scope, ScopedJoinHandle}; @@ -20,15 +19,18 @@ fn main() { let cargo: PathBuf = env::args_os().nth(2).expect("need path to cargo").into(); let output_directory: PathBuf = env::args_os().nth(3).expect("need path to output directory").into(); - let concurrency: NonZeroUsize = - FromStr::from_str(&env::args().nth(4).expect("need concurrency")) - .expect("concurrency must be a number"); + let concurrency: NonZeroUsize = env::args() + .nth(4) + .expect("need concurrency") + .parse() + .expect("concurrency must be a non-zero number"); let src_path = root_path.join("src"); let tests_path = root_path.join("tests"); let library_path = root_path.join("library"); let compiler_path = root_path.join("compiler"); let librustdoc_path = src_path.join("librustdoc"); + let tools_path = src_path.join("tools"); let args: Vec = env::args().skip(1).collect(); @@ -96,6 +98,7 @@ fn main() { // Checks that only make sense for the compiler. check!(error_codes, &root_path, &[&compiler_path, &librustdoc_path], verbose); + check!(symbols, &compiler_path, &librustdoc_path, &tools_path); // Checks that only make sense for the std libs. check!(pal, &library_path); diff --git a/src/tools/tidy/src/symbols.rs b/src/tools/tidy/src/symbols.rs new file mode 100644 index 0000000000000..b7de4d865627d --- /dev/null +++ b/src/tools/tidy/src/symbols.rs @@ -0,0 +1,194 @@ +//! Tidy check to ensure that there are no unused pre-interned symbols. +//! +//! Symbols are defined in the `symbols!` macro call in `compiler/rusc_span/src/symbol.rs`. + +use crate::walk::{filter_not_rust, walk_many}; +use regex::Regex; +use std::{collections::HashMap, path::Path}; + +/// Symbols that are not used as `sym::*`, but should be explicitly ignored by +/// this check. +static IGNORELIST: &[&str] = &[ + // a +]; + +/// Symbols that are used, but not picked up by this check. +/// +/// This can happen if a symbol is used in macro interpolation and the macro +/// is not explicitly handled below. +static ALLOWLIST: &[&str] = &[ + "Hasher", + "StructuralEq", + "StructuralPartialEq", + "alu32", + "avx", + "avx512bw", + "avx512f", + "cr", + "d32", + "derive_const", + "local", + "neon", + "position", + "rust_begin_unwind", + "rust_eh_catch_typeinfo", + "rust_eh_personality", + "rustc_dump_env_program_clauses", + "rustc_dump_program_clauses", + "sse", + "vfp2", + "width", + "xer", +]; + +pub fn check(compiler: &Path, librustdoc: &Path, tools: &Path, bad: &mut bool) { + // Find the `Symbols { ... }` block in `rustc_span::symbol`. + let symbols_path = compiler.join("rustc_span/src/symbol.rs"); + let symbols_file_contents: String = std::fs::read_to_string(&symbols_path).unwrap(); + let symbols_block = { + let e = || panic!("Couldn't find `Symbols` block in `{}`", symbols_path.display()); + + let start_pat = " Symbols {\n"; + let start = symbols_file_contents.find(start_pat).unwrap_or_else(e) + start_pat.len(); + + let end_pat = "\n }\n}"; + let relative_end = symbols_file_contents[start..].find(end_pat).unwrap_or_else(e); + + &symbols_file_contents[start..start + relative_end] + }; + + // Extract all the symbol identifiers from the block. + let mut symbols: HashMap<&str, bool> = symbols_block + .split(',') + .filter_map(|item| { + let item = item.trim(); + let ident = item.split_once(':').map_or(item, |(lhs, _)| lhs.trim()); + // Skip multi-line literals or empty strings (`sym::unstable_location_reason_default`) + if ident.is_empty() || ident.contains(char::is_whitespace) { + None + } else { + // Allow all assembly registries + let used = ident.contains("reg"); + Some((ident, used)) + } + }) + // Add special cases that are not in the `Symbols` block. + .chain([("macro_rules", false), ("integer", true)]) + .collect(); + + // Add the symbols from the allowlist. + for symbol in ALLOWLIST { + set(&mut symbols, symbol); + } + + // Add the symbols used in `declare_features!` macro calls. + find_features(&mut symbols, compiler); + + // Add the symbols used in `compiler/rustc_builtin_macros/src/lib.rs`. + find_builtins(&mut symbols, compiler); + + // Find all the symbol identifiers in `rustc_span` users. + let clippy = tools.join("clippy"); + let rustfmt = tools.join("rustfmt"); + let miri = tools.join("miri"); + let paths = &[compiler, librustdoc, &clippy, &rustfmt, &miri]; + find_sym(&mut symbols, paths); + + let mut unused = symbols + .iter() + .filter(|&(k, &v)| !v && !IGNORELIST.contains(k)) + .map(|(&k, _)| k) + .collect::>(); + if !unused.is_empty() { + unused.sort_unstable(); + tidy_error!( + bad, + "found {} unused pre-interned symbols in `{}`:\n {}", + unused.len(), + symbols_path.display(), + unused.join("\n "), + ); + } +} + +fn find_sym(symbols: &mut HashMap<&str, bool>, paths: &[&Path]) { + let sym_re = Regex::new(r"\bsym::\w+").unwrap(); + walk_many(paths, |path, _| filter_not_rust(path), &mut |_entry, contents| { + for m in sym_re.find_iter(contents) { + // skip `sym::` + let symbol = &contents[m.start() + 5..m.end()]; + set(symbols, symbol); + } + }); +} + +/// Finds the symbols used in `declare_features!` macro calls and adds them to +/// the `symbols` map. +fn find_features(symbols: &mut HashMap<&str, bool>, compiler: &Path) { + let start = Regex::new(r"declare_features!\s?(\(|\[|\{)").unwrap(); + let end = Regex::new(r"(\)|\]|\});\n").unwrap(); + let non_word = Regex::new(r"\W").unwrap(); + + let rustc_feature_src = compiler.join("rustc_feature/src"); + let feature_files = [ + rustc_feature_src.join("accepted.rs"), + rustc_feature_src.join("active.rs"), + rustc_feature_src.join("removed.rs"), + ]; + for file in feature_files { + let contents = std::fs::read_to_string(&file).unwrap(); + let features_blocks_lines = + regex_blocks(&start, &end, &contents).map(|s| s.lines()).flatten(); + for line in features_blocks_lines { + let line = line.trim_start(); + if !line.starts_with('(') { + continue; + } + let split: Vec<&str> = line.split(',').collect(); + if split.len() < 4 { + continue; + } + let symbol = split[1].trim(); + if symbol.is_empty() || non_word.is_match(symbol) { + continue; + } + set(symbols, symbol); + } + } +} + +/// Finds the symbols used in compiler/rustc_builtin_macros/src/lib.rs +/// and adds them to the `symbols` map. +fn find_builtins(symbols: &mut HashMap<&str, bool>, compiler: &Path) { + let path = compiler.join("rustc_builtin_macros/src/lib.rs"); + let contents = std::fs::read_to_string(path).unwrap(); + + let end = Regex::new(r"(\)|\]|\})\s*").unwrap(); + for mac in ["register_bang", "register_attr", "register_derive"] { + let start = Regex::new(&format!(r"{mac}!\s?(\(|\[|\{{)")).unwrap(); + let macro_block = regex_blocks(&start, &end, &contents).next().unwrap(); + for line in macro_block.lines() { + let Some((symbol, _)) = line.trim().split_once(':') else { continue }; + set(symbols, symbol); + } + } +} + +fn regex_blocks<'a>( + start: &'a Regex, + end: &'a Regex, + s: &'a str, +) -> impl Iterator + 'a { + start.find_iter(s).map(move |m| { + let start_idx = m.end(); + let end_idx = end.find(&s[start_idx..]).unwrap().start(); + &s[start_idx..start_idx + end_idx] + }) +} + +fn set(symbols: &mut HashMap<&str, bool>, symbol: &str) { + match symbols.get_mut(symbol) { + Some(used) => *used = true, + None => panic!("Symbols map doesn't contain `sym::{}`", symbol), + } +}