From 0269357c5a3535f973079d2a02318bd531c9d01c Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Sun, 14 Sep 2025 06:41:59 -0400 Subject: [PATCH] feat(generate): allow more characters for keywords --- crates/generate/src/build_tables.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/crates/generate/src/build_tables.rs b/crates/generate/src/build_tables.rs index f5709419..ea61834d 100644 --- a/crates/generate/src/build_tables.rs +++ b/crates/generate/src/build_tables.rs @@ -334,7 +334,7 @@ fn identify_keywords( .enumerate() .filter_map(|(i, variable)| { cursor.reset(vec![variable.start_state]); - if all_chars_are_alphabetical(&cursor) + if all_chars_are_valid_for_keywords(&cursor) && token_conflict_map.does_match_same_string(i, word_token.index) && !token_conflict_map.does_match_different_string(i, word_token.index) { @@ -531,12 +531,17 @@ fn report_state_info<'a>( } } -fn all_chars_are_alphabetical(cursor: &NfaCursor) -> bool { +/// This definition should match the set of characters that are typically +/// allowed in programming language keywords. Note that it is provisional, +/// and can be adjusted if necessary. +fn all_chars_are_valid_for_keywords(cursor: &NfaCursor) -> bool { cursor.transition_chars().all(|(chars, is_sep)| { if is_sep { true } else { - chars.chars().all(|c| c.is_alphabetic() || c == '_') + chars + .chars() + .all(|c| c.is_alphanumeric() || "_!@#$-:.?/`".contains(c)) } }) }