From 37d9ea745b358a5b9f48560600841fc6619e545d Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Thu, 24 Feb 2022 16:10:36 +1100
Subject: [PATCH 1/2] Improve `scan_escape`.

`scan_escape` currently has a fast path (for when the first char isn't
'\\') and a slow path.

This commit changes `scan_escape` so it only handles the slow path, i.e.
the actual escaping code. The fast path is inlined into the two call
sites.

This change makes the code faster, because there is no function call
overhead on the fast path. (`scan_escape` is a big function and doesn't
get inlined.)

This change also improves readability, because it removes a bunch of
mode checks on the the fast paths.
---
 compiler/rustc_lexer/src/unescape.rs | 45 ++++++++++++++--------------
 1 file changed, 22 insertions(+), 23 deletions(-)
diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs
index d789237e692d2..97f9588ae1ef5 100644
--- a/compiler/rustc_lexer/src/unescape.rs
+++ b/compiler/rustc_lexer/src/unescape.rs
@@ -159,26 +159,8 @@ impl Mode {
     }
 }
 
-fn scan_escape(first_char: char, chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
-    if first_char != '\\' {
-        // Previous character was not a slash, and we don't expect it to be
-        // an escape-only character.
-        return match first_char {
-            '\t' | '\n' => Err(EscapeError::EscapeOnlyChar),
-            '\r' => Err(EscapeError::BareCarriageReturn),
-            '\'' if mode.in_single_quotes() => Err(EscapeError::EscapeOnlyChar),
-            '"' if mode.in_double_quotes() => Err(EscapeError::EscapeOnlyChar),
-            _ => {
-                if mode.is_bytes() && !first_char.is_ascii() {
-                    // Byte literal can't be a non-ascii character.
-                    return Err(EscapeError::NonAsciiCharInByte);
-                }
-                Ok(first_char)
-            }
-        };
-    }
-
-    // Previous character is '\\', try to unescape it.
+fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
+    // Previous character was '\\', unescape what follows.
 
     let second_char = chars.next().ok_or(EscapeError::LoneSlash)?;
 
@@ -270,9 +252,24 @@ fn scan_escape(first_char: char, chars: &mut Chars<'_>, mode: Mode) -> Result<ch
     Ok(res)
 }
 
+#[inline]
+fn ascii_check(first_char: char, mode: Mode) -> Result<char, EscapeError> {
+    if mode.is_bytes() && !first_char.is_ascii() {
+        // Byte literal can't be a non-ascii character.
+        Err(EscapeError::NonAsciiCharInByte)
+    } else {
+        Ok(first_char)
+    }
+}
+
 fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
     let first_char = chars.next().ok_or(EscapeError::ZeroChars)?;
-    let res = scan_escape(first_char, chars, mode)?;
+    let res = match first_char {
+        '\\' => scan_escape(chars, mode),
+        '\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar),
+        '\r' => Err(EscapeError::BareCarriageReturn),
+        _ => ascii_check(first_char, mode),
+    }?;
     if chars.next().is_some() {
         return Err(EscapeError::MoreThanOneChar);
     }
@@ -303,12 +300,14 @@ where
                         skip_ascii_whitespace(&mut chars, start, callback);
                         continue;
                     }
-                    _ => scan_escape(first_char, &mut chars, mode),
+                    _ => scan_escape(&mut chars, mode),
                 }
             }
             '\n' => Ok('\n'),
             '\t' => Ok('\t'),
-            _ => scan_escape(first_char, &mut chars, mode),
+            '"' => Err(EscapeError::EscapeOnlyChar),
+            '\r' => Err(EscapeError::BareCarriageReturn),
+            _ => ascii_check(first_char, mode),
         };
         let end = initial_len - chars.as_str().len();
         callback(start..end, unescaped_char);

From 44308dc3489e39958b2ce6dd297b895514b6f425 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Thu, 24 Feb 2022 16:49:37 +1100
Subject: [PATCH 2/2] Inline a hot closure in `from_lit_token`.

The change looks big because `rustfmt` rearranges things, but the only
real change is the inlining annotation.
---
 compiler/rustc_ast/src/lib.rs          |  1 +
 compiler/rustc_ast/src/util/literal.rs | 39 +++++++++++++++-----------
 2 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/compiler/rustc_ast/src/lib.rs b/compiler/rustc_ast/src/lib.rs
index 84fe9ad26720e..21183121e15a0 100644
--- a/compiler/rustc_ast/src/lib.rs
+++ b/compiler/rustc_ast/src/lib.rs
@@ -16,6 +16,7 @@
 #![feature(min_specialization)]
 #![recursion_limit = "256"]
 #![feature(slice_internals)]
+#![feature(stmt_expr_attributes)]
 
 #[macro_use]
 extern crate rustc_macros;
diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs
index 1cc5ddfd8ee29..224afbd553fb8 100644
--- a/compiler/rustc_ast/src/util/literal.rs
+++ b/compiler/rustc_ast/src/util/literal.rs
@@ -56,25 +56,30 @@ impl LitKind {
                 // new symbol because the string in the LitKind is different to the
                 // string in the token.
                 let s = symbol.as_str();
-                let symbol =
-                    if s.contains(&['\\', '\r']) {
-                        let mut buf = String::with_capacity(s.len());
-                        let mut error = Ok(());
-                        unescape_literal(&s, Mode::Str, &mut |_, unescaped_char| {
-                            match unescaped_char {
-                                Ok(c) => buf.push(c),
-                                Err(err) => {
-                                    if err.is_fatal() {
-                                        error = Err(LitError::LexerError);
-                                    }
+                let symbol = if s.contains(&['\\', '\r']) {
+                    let mut buf = String::with_capacity(s.len());
+                    let mut error = Ok(());
+                    // Force-inlining here is aggressive but the closure is
+                    // called on every char in the string, so it can be
+                    // hot in programs with many long strings.
+                    unescape_literal(
+                        &s,
+                        Mode::Str,
+                        &mut #[inline(always)]
+                        |_, unescaped_char| match unescaped_char {
+                            Ok(c) => buf.push(c),
+                            Err(err) => {
+                                if err.is_fatal() {
+                                    error = Err(LitError::LexerError);
                                 }
                             }
-                        });
-                        error?;
-                        Symbol::intern(&buf)
-                    } else {
-                        symbol
-                    };
+                        },
+                    );
+                    error?;
+                    Symbol::intern(&buf)
+                } else {
+                    symbol
+                };
                 LitKind::Str(symbol, ast::StrStyle::Cooked)
             }
             token::StrRaw(n) => {