Skip to content

Commit e64610d

Browse files
committed
Auto merge of rust-lang#17037 - davidsemakula:token-set-collisions, r=Veykril
internal: improve `TokenSet` implementation and add reserved keywords The current `TokenSet` type represents "A bit-set of `SyntaxKind`s" as a newtype `u128`. Internally, the flag for each `SyntaxKind` variant in the bit-set is set as the n-th LSB (least significant bit) via a bit-wise left shift operation, where n is the discriminant. Edit: This is problematic because there's currently ~121 token `SyntaxKind`s, so adding new token kinds for missing reserved keywords increases the number of token `SyntaxKind`s above 128, thus making this ["mask"](https://github.com/rust-lang/rust-analyzer/blob/7a8374c162c64c17e865b98aad282d16b16e96d6/crates/parser/src/token_set.rs#L31-L33) operation overflow. ~~This is problematic because there's currently 266 SyntaxKinds, so this ["mask"](https://github.com/rust-lang/rust-analyzer/blob/7a8374c162c64c17e865b98aad282d16b16e96d6/crates/parser/src/token_set.rs#L31-L33) operation silently overflows in release mode.~~ ~~This leads to a single flag/bit in the bit-set being shared by multiple `SyntaxKind`s~~. This PR: - Changes the wrapped type for `TokenSet` from `u128` to `[u64; 3]` ~~`[u*; N]` (currently `[u16; 17]`) where `u*` can be any desirable unsigned integer type and `N` is the minimum array length needed to represent all token `SyntaxKind`s without any collisions~~. - Edit: Add assertion that `TokenSet`s only include token `SyntaxKind`s - Edit: Add ~7 missing [reserved keywords](https://doc.rust-lang.org/stable/reference/keywords.html#reserved-keywords) - ~~Moves the definition of the `TokenSet` type to grammar codegen in xtask, so that `N` is adjusted automatically (depending on the chosen `u*` "base" type) when new `SyntaxKind`s are added~~. - ~~Updates the `token_set_works_for_tokens` unit test to include the `__LAST` `SyntaxKind` as a way of catching overflows in tests.~~ ~~Currently `u16` is arbitrarily chosen as the `u*` "base" type mostly because it strikes a good balance (IMO) between unused bits and readability of the generated `TokenSet` code (especially the [`union` method](https://github.com/rust-lang/rust-analyzer/blob/7a8374c162c64c17e865b98aad282d16b16e96d6/crates/parser/src/token_set.rs#L26-L28)), but I'm open to other suggestions or a better methodology for choosing `u*` type.~~ ~~I considered using a third-party crate for the bit-set, but a direct implementation seems simple enough without adding any new dependencies. I'm not strongly opposed to using a third-party crate though, if that's preferred.~~ ~~Finally, I haven't had the chance to review issues, to figure out if there are any parser issues caused by collisions due the current implementation that may be fixed by this PR - I just stumbled upon the issue while adding "new" keywords to solve rust-lang#16858~~ Edit: fixes rust-lang#16858
2 parents dcbb27a + 69fe457 commit e64610d

File tree

4 files changed

+101
-20
lines changed

4 files changed

+101
-20
lines changed

crates/ide-assists/src/handlers/auto_import.rs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1620,4 +1620,50 @@ mod foo {
16201620
"#,
16211621
);
16221622
}
1623+
1624+
#[test]
1625+
fn preserve_raw_identifiers_strict() {
1626+
check_assist(
1627+
auto_import,
1628+
r"
1629+
r#as$0
1630+
1631+
pub mod ffi_mod {
1632+
pub fn r#as() {};
1633+
}
1634+
",
1635+
r"
1636+
use ffi_mod::r#as;
1637+
1638+
r#as
1639+
1640+
pub mod ffi_mod {
1641+
pub fn r#as() {};
1642+
}
1643+
",
1644+
);
1645+
}
1646+
1647+
#[test]
1648+
fn preserve_raw_identifiers_reserved() {
1649+
check_assist(
1650+
auto_import,
1651+
r"
1652+
r#abstract$0
1653+
1654+
pub mod ffi_mod {
1655+
pub fn r#abstract() {};
1656+
}
1657+
",
1658+
r"
1659+
use ffi_mod::r#abstract;
1660+
1661+
r#abstract
1662+
1663+
pub mod ffi_mod {
1664+
pub fn r#abstract() {};
1665+
}
1666+
",
1667+
);
1668+
}
16231669
}

crates/parser/src/syntax_kind/generated.rs

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,11 @@ pub enum SyntaxKind {
6161
SHR,
6262
SHLEQ,
6363
SHREQ,
64+
ABSTRACT_KW,
6465
AS_KW,
6566
ASYNC_KW,
6667
AWAIT_KW,
68+
BECOME_KW,
6769
BOX_KW,
6870
BREAK_KW,
6971
CONST_KW,
@@ -75,6 +77,7 @@ pub enum SyntaxKind {
7577
ENUM_KW,
7678
EXTERN_KW,
7779
FALSE_KW,
80+
FINAL_KW,
7881
FN_KW,
7982
FOR_KW,
8083
IF_KW,
@@ -87,10 +90,11 @@ pub enum SyntaxKind {
8790
MOD_KW,
8891
MOVE_KW,
8992
MUT_KW,
93+
OVERRIDE_KW,
94+
PRIV_KW,
9095
PUB_KW,
9196
REF_KW,
9297
RETURN_KW,
93-
BECOME_KW,
9498
SELF_KW,
9599
SELF_TYPE_KW,
96100
STATIC_KW,
@@ -100,8 +104,11 @@ pub enum SyntaxKind {
100104
TRUE_KW,
101105
TRY_KW,
102106
TYPE_KW,
107+
TYPEOF_KW,
103108
UNSAFE_KW,
109+
UNSIZED_KW,
104110
USE_KW,
111+
VIRTUAL_KW,
105112
WHERE_KW,
106113
WHILE_KW,
107114
YIELD_KW,
@@ -280,9 +287,11 @@ impl SyntaxKind {
280287
pub fn is_keyword(self) -> bool {
281288
matches!(
282289
self,
283-
AS_KW
290+
ABSTRACT_KW
291+
| AS_KW
284292
| ASYNC_KW
285293
| AWAIT_KW
294+
| BECOME_KW
286295
| BOX_KW
287296
| BREAK_KW
288297
| CONST_KW
@@ -294,6 +303,7 @@ impl SyntaxKind {
294303
| ENUM_KW
295304
| EXTERN_KW
296305
| FALSE_KW
306+
| FINAL_KW
297307
| FN_KW
298308
| FOR_KW
299309
| IF_KW
@@ -306,10 +316,11 @@ impl SyntaxKind {
306316
| MOD_KW
307317
| MOVE_KW
308318
| MUT_KW
319+
| OVERRIDE_KW
320+
| PRIV_KW
309321
| PUB_KW
310322
| REF_KW
311323
| RETURN_KW
312-
| BECOME_KW
313324
| SELF_KW
314325
| SELF_TYPE_KW
315326
| STATIC_KW
@@ -319,8 +330,11 @@ impl SyntaxKind {
319330
| TRUE_KW
320331
| TRY_KW
321332
| TYPE_KW
333+
| TYPEOF_KW
322334
| UNSAFE_KW
335+
| UNSIZED_KW
323336
| USE_KW
337+
| VIRTUAL_KW
324338
| WHERE_KW
325339
| WHILE_KW
326340
| YIELD_KW
@@ -399,9 +413,11 @@ impl SyntaxKind {
399413
}
400414
pub fn from_keyword(ident: &str) -> Option<SyntaxKind> {
401415
let kw = match ident {
416+
"abstract" => ABSTRACT_KW,
402417
"as" => AS_KW,
403418
"async" => ASYNC_KW,
404419
"await" => AWAIT_KW,
420+
"become" => BECOME_KW,
405421
"box" => BOX_KW,
406422
"break" => BREAK_KW,
407423
"const" => CONST_KW,
@@ -413,6 +429,7 @@ impl SyntaxKind {
413429
"enum" => ENUM_KW,
414430
"extern" => EXTERN_KW,
415431
"false" => FALSE_KW,
432+
"final" => FINAL_KW,
416433
"fn" => FN_KW,
417434
"for" => FOR_KW,
418435
"if" => IF_KW,
@@ -425,10 +442,11 @@ impl SyntaxKind {
425442
"mod" => MOD_KW,
426443
"move" => MOVE_KW,
427444
"mut" => MUT_KW,
445+
"override" => OVERRIDE_KW,
446+
"priv" => PRIV_KW,
428447
"pub" => PUB_KW,
429448
"ref" => REF_KW,
430449
"return" => RETURN_KW,
431-
"become" => BECOME_KW,
432450
"self" => SELF_KW,
433451
"Self" => SELF_TYPE_KW,
434452
"static" => STATIC_KW,
@@ -438,8 +456,11 @@ impl SyntaxKind {
438456
"true" => TRUE_KW,
439457
"try" => TRY_KW,
440458
"type" => TYPE_KW,
459+
"typeof" => TYPEOF_KW,
441460
"unsafe" => UNSAFE_KW,
461+
"unsized" => UNSIZED_KW,
442462
"use" => USE_KW,
463+
"virtual" => VIRTUAL_KW,
443464
"where" => WHERE_KW,
444465
"while" => WHILE_KW,
445466
"yield" => YIELD_KW,
@@ -500,4 +521,4 @@ impl SyntaxKind {
500521
}
501522
}
502523
#[macro_export]
503-
macro_rules ! T { [;] => { $ crate :: SyntaxKind :: SEMICOLON } ; [,] => { $ crate :: SyntaxKind :: COMMA } ; ['('] => { $ crate :: SyntaxKind :: L_PAREN } ; [')'] => { $ crate :: SyntaxKind :: R_PAREN } ; ['{'] => { $ crate :: SyntaxKind :: L_CURLY } ; ['}'] => { $ crate :: SyntaxKind :: R_CURLY } ; ['['] => { $ crate :: SyntaxKind :: L_BRACK } ; [']'] => { $ crate :: SyntaxKind :: R_BRACK } ; [<] => { $ crate :: SyntaxKind :: L_ANGLE } ; [>] => { $ crate :: SyntaxKind :: R_ANGLE } ; [@] => { $ crate :: SyntaxKind :: AT } ; [#] => { $ crate :: SyntaxKind :: POUND } ; [~] => { $ crate :: SyntaxKind :: TILDE } ; [?] => { $ crate :: SyntaxKind :: QUESTION } ; [$] => { $ crate :: SyntaxKind :: DOLLAR } ; [&] => { $ crate :: SyntaxKind :: AMP } ; [|] => { $ crate :: SyntaxKind :: PIPE } ; [+] => { $ crate :: SyntaxKind :: PLUS } ; [*] => { $ crate :: SyntaxKind :: STAR } ; [/] => { $ crate :: SyntaxKind :: SLASH } ; [^] => { $ crate :: SyntaxKind :: CARET } ; [%] => { $ crate :: SyntaxKind :: PERCENT } ; [_] => { $ crate :: SyntaxKind :: UNDERSCORE } ; [.] => { $ crate :: SyntaxKind :: DOT } ; [..] => { $ crate :: SyntaxKind :: DOT2 } ; [...] => { $ crate :: SyntaxKind :: DOT3 } ; [..=] => { $ crate :: SyntaxKind :: DOT2EQ } ; [:] => { $ crate :: SyntaxKind :: COLON } ; [::] => { $ crate :: SyntaxKind :: COLON2 } ; [=] => { $ crate :: SyntaxKind :: EQ } ; [==] => { $ crate :: SyntaxKind :: EQ2 } ; [=>] => { $ crate :: SyntaxKind :: FAT_ARROW } ; [!] => { $ crate :: SyntaxKind :: BANG } ; [!=] => { $ crate :: SyntaxKind :: NEQ } ; [-] => { $ crate :: SyntaxKind :: MINUS } ; [->] => { $ crate :: SyntaxKind :: THIN_ARROW } ; [<=] => { $ crate :: SyntaxKind :: LTEQ } ; [>=] => { $ crate :: SyntaxKind :: GTEQ } ; [+=] => { $ crate :: SyntaxKind :: PLUSEQ } ; [-=] => { $ crate :: SyntaxKind :: MINUSEQ } ; [|=] => { $ crate :: SyntaxKind :: PIPEEQ } ; [&=] => { $ crate :: SyntaxKind :: AMPEQ } ; [^=] => { $ crate :: SyntaxKind :: CARETEQ } ; [/=] => { $ crate :: SyntaxKind :: SLASHEQ } ; [*=] => { $ crate :: SyntaxKind :: STAREQ } ; [%=] => { $ crate :: SyntaxKind :: PERCENTEQ } ; [&&] => { $ crate :: SyntaxKind :: AMP2 } ; [||] => { $ crate :: SyntaxKind :: PIPE2 } ; [<<] => { $ crate :: SyntaxKind :: SHL } ; [>>] => { $ crate :: SyntaxKind :: SHR } ; [<<=] => { $ crate :: SyntaxKind :: SHLEQ } ; [>>=] => { $ crate :: SyntaxKind :: SHREQ } ; [as] => { $ crate :: SyntaxKind :: AS_KW } ; [async] => { $ crate :: SyntaxKind :: ASYNC_KW } ; [await] => { $ crate :: SyntaxKind :: AWAIT_KW } ; [box] => { $ crate :: SyntaxKind :: BOX_KW } ; [break] => { $ crate :: SyntaxKind :: BREAK_KW } ; [const] => { $ crate :: SyntaxKind :: CONST_KW } ; [continue] => { $ crate :: SyntaxKind :: CONTINUE_KW } ; [crate] => { $ crate :: SyntaxKind :: CRATE_KW } ; [do] => { $ crate :: SyntaxKind :: DO_KW } ; [dyn] => { $ crate :: SyntaxKind :: DYN_KW } ; [else] => { $ crate :: SyntaxKind :: ELSE_KW } ; [enum] => { $ crate :: SyntaxKind :: ENUM_KW } ; [extern] => { $ crate :: SyntaxKind :: EXTERN_KW } ; [false] => { $ crate :: SyntaxKind :: FALSE_KW } ; [fn] => { $ crate :: SyntaxKind :: FN_KW } ; [for] => { $ crate :: SyntaxKind :: FOR_KW } ; [if] => { $ crate :: SyntaxKind :: IF_KW } ; [impl] => { $ crate :: SyntaxKind :: IMPL_KW } ; [in] => { $ crate :: SyntaxKind :: IN_KW } ; [let] => { $ crate :: SyntaxKind :: LET_KW } ; [loop] => { $ crate :: SyntaxKind :: LOOP_KW } ; [macro] => { $ crate :: SyntaxKind :: MACRO_KW } ; [match] => { $ crate :: SyntaxKind :: MATCH_KW } ; [mod] => { $ crate :: SyntaxKind :: MOD_KW } ; [move] => { $ crate :: SyntaxKind :: MOVE_KW } ; [mut] => { $ crate :: SyntaxKind :: MUT_KW } ; [pub] => { $ crate :: SyntaxKind :: PUB_KW } ; [ref] => { $ crate :: SyntaxKind :: REF_KW } ; [return] => { $ crate :: SyntaxKind :: RETURN_KW } ; [become] => { $ crate :: SyntaxKind :: BECOME_KW } ; [self] => { $ crate :: SyntaxKind :: SELF_KW } ; [Self] => { $ crate :: SyntaxKind :: SELF_TYPE_KW } ; [static] => { $ crate :: SyntaxKind :: STATIC_KW } ; [struct] => { $ crate :: SyntaxKind :: STRUCT_KW } ; [super] => { $ crate :: SyntaxKind :: SUPER_KW } ; [trait] => { $ crate :: SyntaxKind :: TRAIT_KW } ; [true] => { $ crate :: SyntaxKind :: TRUE_KW } ; [try] => { $ crate :: SyntaxKind :: TRY_KW } ; [type] => { $ crate :: SyntaxKind :: TYPE_KW } ; [unsafe] => { $ crate :: SyntaxKind :: UNSAFE_KW } ; [use] => { $ crate :: SyntaxKind :: USE_KW } ; [where] => { $ crate :: SyntaxKind :: WHERE_KW } ; [while] => { $ crate :: SyntaxKind :: WHILE_KW } ; [yield] => { $ crate :: SyntaxKind :: YIELD_KW } ; [auto] => { $ crate :: SyntaxKind :: AUTO_KW } ; [builtin] => { $ crate :: SyntaxKind :: BUILTIN_KW } ; [default] => { $ crate :: SyntaxKind :: DEFAULT_KW } ; [existential] => { $ crate :: SyntaxKind :: EXISTENTIAL_KW } ; [union] => { $ crate :: SyntaxKind :: UNION_KW } ; [raw] => { $ crate :: SyntaxKind :: RAW_KW } ; [macro_rules] => { $ crate :: SyntaxKind :: MACRO_RULES_KW } ; [yeet] => { $ crate :: SyntaxKind :: YEET_KW } ; [offset_of] => { $ crate :: SyntaxKind :: OFFSET_OF_KW } ; [asm] => { $ crate :: SyntaxKind :: ASM_KW } ; [format_args] => { $ crate :: SyntaxKind :: FORMAT_ARGS_KW } ; [lifetime_ident] => { $ crate :: SyntaxKind :: LIFETIME_IDENT } ; [ident] => { $ crate :: SyntaxKind :: IDENT } ; [shebang] => { $ crate :: SyntaxKind :: SHEBANG } ; }
524+
macro_rules ! T { [;] => { $ crate :: SyntaxKind :: SEMICOLON } ; [,] => { $ crate :: SyntaxKind :: COMMA } ; ['('] => { $ crate :: SyntaxKind :: L_PAREN } ; [')'] => { $ crate :: SyntaxKind :: R_PAREN } ; ['{'] => { $ crate :: SyntaxKind :: L_CURLY } ; ['}'] => { $ crate :: SyntaxKind :: R_CURLY } ; ['['] => { $ crate :: SyntaxKind :: L_BRACK } ; [']'] => { $ crate :: SyntaxKind :: R_BRACK } ; [<] => { $ crate :: SyntaxKind :: L_ANGLE } ; [>] => { $ crate :: SyntaxKind :: R_ANGLE } ; [@] => { $ crate :: SyntaxKind :: AT } ; [#] => { $ crate :: SyntaxKind :: POUND } ; [~] => { $ crate :: SyntaxKind :: TILDE } ; [?] => { $ crate :: SyntaxKind :: QUESTION } ; [$] => { $ crate :: SyntaxKind :: DOLLAR } ; [&] => { $ crate :: SyntaxKind :: AMP } ; [|] => { $ crate :: SyntaxKind :: PIPE } ; [+] => { $ crate :: SyntaxKind :: PLUS } ; [*] => { $ crate :: SyntaxKind :: STAR } ; [/] => { $ crate :: SyntaxKind :: SLASH } ; [^] => { $ crate :: SyntaxKind :: CARET } ; [%] => { $ crate :: SyntaxKind :: PERCENT } ; [_] => { $ crate :: SyntaxKind :: UNDERSCORE } ; [.] => { $ crate :: SyntaxKind :: DOT } ; [..] => { $ crate :: SyntaxKind :: DOT2 } ; [...] => { $ crate :: SyntaxKind :: DOT3 } ; [..=] => { $ crate :: SyntaxKind :: DOT2EQ } ; [:] => { $ crate :: SyntaxKind :: COLON } ; [::] => { $ crate :: SyntaxKind :: COLON2 } ; [=] => { $ crate :: SyntaxKind :: EQ } ; [==] => { $ crate :: SyntaxKind :: EQ2 } ; [=>] => { $ crate :: SyntaxKind :: FAT_ARROW } ; [!] => { $ crate :: SyntaxKind :: BANG } ; [!=] => { $ crate :: SyntaxKind :: NEQ } ; [-] => { $ crate :: SyntaxKind :: MINUS } ; [->] => { $ crate :: SyntaxKind :: THIN_ARROW } ; [<=] => { $ crate :: SyntaxKind :: LTEQ } ; [>=] => { $ crate :: SyntaxKind :: GTEQ } ; [+=] => { $ crate :: SyntaxKind :: PLUSEQ } ; [-=] => { $ crate :: SyntaxKind :: MINUSEQ } ; [|=] => { $ crate :: SyntaxKind :: PIPEEQ } ; [&=] => { $ crate :: SyntaxKind :: AMPEQ } ; [^=] => { $ crate :: SyntaxKind :: CARETEQ } ; [/=] => { $ crate :: SyntaxKind :: SLASHEQ } ; [*=] => { $ crate :: SyntaxKind :: STAREQ } ; [%=] => { $ crate :: SyntaxKind :: PERCENTEQ } ; [&&] => { $ crate :: SyntaxKind :: AMP2 } ; [||] => { $ crate :: SyntaxKind :: PIPE2 } ; [<<] => { $ crate :: SyntaxKind :: SHL } ; [>>] => { $ crate :: SyntaxKind :: SHR } ; [<<=] => { $ crate :: SyntaxKind :: SHLEQ } ; [>>=] => { $ crate :: SyntaxKind :: SHREQ } ; [abstract] => { $ crate :: SyntaxKind :: ABSTRACT_KW } ; [as] => { $ crate :: SyntaxKind :: AS_KW } ; [async] => { $ crate :: SyntaxKind :: ASYNC_KW } ; [await] => { $ crate :: SyntaxKind :: AWAIT_KW } ; [become] => { $ crate :: SyntaxKind :: BECOME_KW } ; [box] => { $ crate :: SyntaxKind :: BOX_KW } ; [break] => { $ crate :: SyntaxKind :: BREAK_KW } ; [const] => { $ crate :: SyntaxKind :: CONST_KW } ; [continue] => { $ crate :: SyntaxKind :: CONTINUE_KW } ; [crate] => { $ crate :: SyntaxKind :: CRATE_KW } ; [do] => { $ crate :: SyntaxKind :: DO_KW } ; [dyn] => { $ crate :: SyntaxKind :: DYN_KW } ; [else] => { $ crate :: SyntaxKind :: ELSE_KW } ; [enum] => { $ crate :: SyntaxKind :: ENUM_KW } ; [extern] => { $ crate :: SyntaxKind :: EXTERN_KW } ; [false] => { $ crate :: SyntaxKind :: FALSE_KW } ; [final] => { $ crate :: SyntaxKind :: FINAL_KW } ; [fn] => { $ crate :: SyntaxKind :: FN_KW } ; [for] => { $ crate :: SyntaxKind :: FOR_KW } ; [if] => { $ crate :: SyntaxKind :: IF_KW } ; [impl] => { $ crate :: SyntaxKind :: IMPL_KW } ; [in] => { $ crate :: SyntaxKind :: IN_KW } ; [let] => { $ crate :: SyntaxKind :: LET_KW } ; [loop] => { $ crate :: SyntaxKind :: LOOP_KW } ; [macro] => { $ crate :: SyntaxKind :: MACRO_KW } ; [match] => { $ crate :: SyntaxKind :: MATCH_KW } ; [mod] => { $ crate :: SyntaxKind :: MOD_KW } ; [move] => { $ crate :: SyntaxKind :: MOVE_KW } ; [mut] => { $ crate :: SyntaxKind :: MUT_KW } ; [override] => { $ crate :: SyntaxKind :: OVERRIDE_KW } ; [priv] => { $ crate :: SyntaxKind :: PRIV_KW } ; [pub] => { $ crate :: SyntaxKind :: PUB_KW } ; [ref] => { $ crate :: SyntaxKind :: REF_KW } ; [return] => { $ crate :: SyntaxKind :: RETURN_KW } ; [self] => { $ crate :: SyntaxKind :: SELF_KW } ; [Self] => { $ crate :: SyntaxKind :: SELF_TYPE_KW } ; [static] => { $ crate :: SyntaxKind :: STATIC_KW } ; [struct] => { $ crate :: SyntaxKind :: STRUCT_KW } ; [super] => { $ crate :: SyntaxKind :: SUPER_KW } ; [trait] => { $ crate :: SyntaxKind :: TRAIT_KW } ; [true] => { $ crate :: SyntaxKind :: TRUE_KW } ; [try] => { $ crate :: SyntaxKind :: TRY_KW } ; [type] => { $ crate :: SyntaxKind :: TYPE_KW } ; [typeof] => { $ crate :: SyntaxKind :: TYPEOF_KW } ; [unsafe] => { $ crate :: SyntaxKind :: UNSAFE_KW } ; [unsized] => { $ crate :: SyntaxKind :: UNSIZED_KW } ; [use] => { $ crate :: SyntaxKind :: USE_KW } ; [virtual] => { $ crate :: SyntaxKind :: VIRTUAL_KW } ; [where] => { $ crate :: SyntaxKind :: WHERE_KW } ; [while] => { $ crate :: SyntaxKind :: WHILE_KW } ; [yield] => { $ crate :: SyntaxKind :: YIELD_KW } ; [auto] => { $ crate :: SyntaxKind :: AUTO_KW } ; [builtin] => { $ crate :: SyntaxKind :: BUILTIN_KW } ; [default] => { $ crate :: SyntaxKind :: DEFAULT_KW } ; [existential] => { $ crate :: SyntaxKind :: EXISTENTIAL_KW } ; [union] => { $ crate :: SyntaxKind :: UNION_KW } ; [raw] => { $ crate :: SyntaxKind :: RAW_KW } ; [macro_rules] => { $ crate :: SyntaxKind :: MACRO_RULES_KW } ; [yeet] => { $ crate :: SyntaxKind :: YEET_KW } ; [offset_of] => { $ crate :: SyntaxKind :: OFFSET_OF_KW } ; [asm] => { $ crate :: SyntaxKind :: ASM_KW } ; [format_args] => { $ crate :: SyntaxKind :: FORMAT_ARGS_KW } ; [lifetime_ident] => { $ crate :: SyntaxKind :: LIFETIME_IDENT } ; [ident] => { $ crate :: SyntaxKind :: IDENT } ; [shebang] => { $ crate :: SyntaxKind :: SHEBANG } ; }

crates/parser/src/token_set.rs

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,34 +4,48 @@ use crate::SyntaxKind;
44

55
/// A bit-set of `SyntaxKind`s
66
#[derive(Clone, Copy)]
7-
pub(crate) struct TokenSet(u128);
7+
pub(crate) struct TokenSet([u64; 3]);
8+
9+
/// `TokenSet`s should only include token `SyntaxKind`s, so the discriminant of any passed/included
10+
/// `SyntaxKind` must *not* be greater than that of the last token `SyntaxKind`.
11+
/// See #17037.
12+
const LAST_TOKEN_KIND_DISCRIMINANT: usize = SyntaxKind::SHEBANG as usize;
813

914
impl TokenSet {
10-
pub(crate) const EMPTY: TokenSet = TokenSet(0);
15+
pub(crate) const EMPTY: TokenSet = TokenSet([0; 3]);
1116

1217
pub(crate) const fn new(kinds: &[SyntaxKind]) -> TokenSet {
13-
let mut res = 0u128;
18+
let mut res = [0; 3];
1419
let mut i = 0;
1520
while i < kinds.len() {
16-
res |= mask(kinds[i]);
21+
let discriminant = kinds[i] as usize;
22+
debug_assert!(
23+
discriminant <= LAST_TOKEN_KIND_DISCRIMINANT,
24+
"Expected a token `SyntaxKind`"
25+
);
26+
let idx = discriminant / 64;
27+
res[idx] |= 1 << (discriminant % 64);
1728
i += 1;
1829
}
1930
TokenSet(res)
2031
}
2132

2233
pub(crate) const fn union(self, other: TokenSet) -> TokenSet {
23-
TokenSet(self.0 | other.0)
34+
TokenSet([self.0[0] | other.0[0], self.0[1] | other.0[1], self.0[2] | other.0[2]])
2435
}
2536

2637
pub(crate) const fn contains(&self, kind: SyntaxKind) -> bool {
27-
self.0 & mask(kind) != 0
38+
let discriminant = kind as usize;
39+
debug_assert!(
40+
discriminant <= LAST_TOKEN_KIND_DISCRIMINANT,
41+
"Expected a token `SyntaxKind`"
42+
);
43+
let idx = discriminant / 64;
44+
let mask = 1 << (discriminant % 64);
45+
self.0[idx] & mask != 0
2846
}
2947
}
3048

31-
const fn mask(kind: SyntaxKind) -> u128 {
32-
1u128 << (kind as usize)
33-
}
34-
3549
#[test]
3650
fn token_set_works_for_tokens() {
3751
use crate::SyntaxKind::*;

xtask/src/codegen/grammar/ast_src.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -65,11 +65,11 @@ pub(crate) const KINDS_SRC: KindsSrc<'_> = KindsSrc {
6565
(">>=", "SHREQ"),
6666
],
6767
keywords: &[
68-
"as", "async", "await", "box", "break", "const", "continue", "crate", "do", "dyn", "else",
69-
"enum", "extern", "false", "fn", "for", "if", "impl", "in", "let", "loop", "macro",
70-
"match", "mod", "move", "mut", "pub", "ref", "return", "become", "self", "Self", "static",
71-
"struct", "super", "trait", "true", "try", "type", "unsafe", "use", "where", "while",
72-
"yield",
68+
"abstract", "as", "async", "await", "become", "box", "break", "const", "continue", "crate",
69+
"do", "dyn", "else", "enum", "extern", "false", "final", "fn", "for", "if", "impl", "in",
70+
"let", "loop", "macro", "match", "mod", "move", "mut", "override", "priv", "pub", "ref",
71+
"return", "self", "Self", "static", "struct", "super", "trait", "true", "try", "type",
72+
"typeof", "unsafe", "unsized", "use", "virtual", "where", "while", "yield",
7373
],
7474
contextual_keywords: &[
7575
"auto",

0 commit comments

Comments
 (0)