fix(generate): do not generate large character sets for unused variables

This commit is contained in:
Amaan Qureshi 2024-09-07 21:15:18 -04:00
parent cbd3bb5b9a
commit d8ab779df4
3 changed files with 6 additions and 1 deletions

View file

@ -10,6 +10,7 @@ use crate::generate::{
dedup::split_state_id_groups,
grammars::{LexicalGrammar, SyntaxGrammar},
nfa::{CharacterSet, NfaCursor},
prepare_grammar::symbol_is_used,
rules::{Symbol, TokenSet},
tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable},
};
@ -93,6 +94,9 @@ pub fn build_lex_table(
let mut large_character_sets = Vec::new();
for (variable_ix, _variable) in lexical_grammar.variables.iter().enumerate() {
let symbol = Symbol::terminal(variable_ix);
if !symbol_is_used(&syntax_grammar.variables, symbol) {
continue;
}
builder.reset();
builder.add_state_for_tokens(&TokenSet::from_iter([symbol]));
for state in &builder.table.states {

View file

@ -173,7 +173,7 @@ fn flatten_variable(variable: Variable) -> SyntaxVariable {
}
}
fn symbol_is_used(variables: &[SyntaxVariable], symbol: Symbol) -> bool {
pub fn symbol_is_used(variables: &[SyntaxVariable], symbol: Symbol) -> bool {
for variable in variables {
for production in &variable.productions {
for step in &production.steps {

View file

@ -13,6 +13,7 @@ use std::{
};
use anyhow::{anyhow, Result};
pub(super) use flatten_grammar::symbol_is_used;
pub use self::expand_tokens::expand_tokens;
use self::{