From 99a0ddc4c23d3f6be77ca0db02a3e1cdd3f4e55c Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Thu, 3 Oct 2024 14:39:41 -0400 Subject: [PATCH] fix(generate): remove unused rules --- .../src/build_tables/build_lex_table.rs | 4 - cli/generate/src/parse_grammar.rs | 117 ++++++++++++++---- .../src/prepare_grammar/flatten_grammar.rs | 2 +- cli/generate/src/prepare_grammar/mod.rs | 1 - 4 files changed, 97 insertions(+), 27 deletions(-) diff --git a/cli/generate/src/build_tables/build_lex_table.rs b/cli/generate/src/build_tables/build_lex_table.rs index 9e079008..c96e7013 100644 --- a/cli/generate/src/build_tables/build_lex_table.rs +++ b/cli/generate/src/build_tables/build_lex_table.rs @@ -10,7 +10,6 @@ use crate::{ dedup::split_state_id_groups, grammars::{LexicalGrammar, SyntaxGrammar}, nfa::{CharacterSet, NfaCursor}, - prepare_grammar::symbol_is_used, rules::{Symbol, TokenSet}, tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable}, }; @@ -94,9 +93,6 @@ pub fn build_lex_table( let mut large_character_sets = Vec::new(); for (variable_ix, _variable) in lexical_grammar.variables.iter().enumerate() { let symbol = Symbol::terminal(variable_ix); - if !symbol_is_used(&syntax_grammar.variables, symbol) { - continue; - } builder.reset(); builder.add_state_for_tokens(&TokenSet::from_iter([symbol])); for state in &builder.table.states { diff --git a/cli/generate/src/parse_grammar.rs b/cli/generate/src/parse_grammar.rs index d2f37c39..2771d52e 100644 --- a/cli/generate/src/parse_grammar.rs +++ b/cli/generate/src/parse_grammar.rs @@ -1,3 +1,5 @@ +use std::collections::HashSet; + use anyhow::{anyhow, Result}; use serde::Deserialize; use serde_json::{Map, Value}; @@ -94,15 +96,106 @@ pub struct GrammarJSON { word: Option, } +fn rule_is_referenced(rule: &Rule, target: &str) -> bool { + match rule { + Rule::NamedSymbol(name) => name == target, + Rule::Choice(rules) | Rule::Seq(rules) => { + rules.iter().any(|r| rule_is_referenced(r, target)) + } + Rule::Metadata { rule, .. } => rule_is_referenced(rule, target), + Rule::Repeat(inner) => rule_is_referenced(inner, target), + Rule::Blank | Rule::String(_) | Rule::Pattern(_, _) | Rule::Symbol(_) => false, + } +} + +fn variable_is_used( + grammar_rules: &[(String, Rule)], + extras: &[Rule], + externals: &[Rule], + target_name: &str, + in_progress: &mut HashSet, +) -> bool { + let root = &grammar_rules.first().unwrap().0; + if target_name == root { + return true; + } + + if extras + .iter() + .chain(externals.iter()) + .any(|rule| rule_is_referenced(rule, target_name)) + { + return true; + } + + in_progress.insert(target_name.to_string()); + let result = grammar_rules + .iter() + .filter(|(key, _)| *key != target_name) + .any(|(name, rule)| { + if !rule_is_referenced(rule, target_name) || in_progress.contains(name) { + return false; + } + variable_is_used(grammar_rules, extras, externals, name, in_progress) + }); + in_progress.remove(target_name); + + result +} + pub(crate) fn parse_grammar(input: &str) -> Result { - let grammar_json = serde_json::from_str::(input)?; + let mut grammar_json = serde_json::from_str::(input)?; + + let mut extra_symbols = + grammar_json + .extras + .into_iter() + .try_fold(Vec::new(), |mut acc, item| { + let rule = parse_rule(item); + if let Rule::String(ref value) = rule { + if value.is_empty() { + return Err(anyhow!( + "Rules in the `extras` array must not contain empty strings" + )); + } + } + acc.push(rule); + Ok(acc) + })?; + + let mut external_tokens = grammar_json + .externals + .into_iter() + .map(parse_rule) + .collect::>(); let mut variables = Vec::with_capacity(grammar_json.rules.len()); - for (name, value) in grammar_json.rules { + + let rules = grammar_json + .rules + .into_iter() + .map(|(n, r)| Ok((n, parse_rule(serde_json::from_value(r)?)))) + .collect::>>()?; + + let mut in_progress = HashSet::new(); + + for (name, rule) in &rules { + if !variable_is_used( + &rules, + &extra_symbols, + &external_tokens, + name, + &mut in_progress, + ) { + extra_symbols.retain(|r| !rule_is_referenced(r, name)); + external_tokens.retain(|r| !rule_is_referenced(r, name)); + grammar_json.supertypes.retain(|r| r != name); + continue; + } variables.push(Variable { name: name.clone(), kind: VariableType::Named, - rule: parse_rule(serde_json::from_value(value)?), + rule: rule.clone(), }); } @@ -123,24 +216,6 @@ pub(crate) fn parse_grammar(input: &str) -> Result { precedence_orderings.push(ordering); } - let extra_symbols = grammar_json - .extras - .into_iter() - .try_fold(Vec::new(), |mut acc, item| { - let rule = parse_rule(item); - if let Rule::String(ref value) = rule { - if value.is_empty() { - return Err(anyhow!( - "Rules in the `extras` array must not contain empty strings" - )); - } - } - acc.push(rule); - Ok(acc) - })?; - - let external_tokens = grammar_json.externals.into_iter().map(parse_rule).collect(); - Ok(InputGrammar { name: grammar_json.name, word_token: grammar_json.word, diff --git a/cli/generate/src/prepare_grammar/flatten_grammar.rs b/cli/generate/src/prepare_grammar/flatten_grammar.rs index 1a48706c..e01bc0b0 100644 --- a/cli/generate/src/prepare_grammar/flatten_grammar.rs +++ b/cli/generate/src/prepare_grammar/flatten_grammar.rs @@ -173,7 +173,7 @@ fn flatten_variable(variable: Variable) -> SyntaxVariable { } } -pub fn symbol_is_used(variables: &[SyntaxVariable], symbol: Symbol) -> bool { +fn symbol_is_used(variables: &[SyntaxVariable], symbol: Symbol) -> bool { for variable in variables { for production in &variable.productions { for step in &production.steps { diff --git a/cli/generate/src/prepare_grammar/mod.rs b/cli/generate/src/prepare_grammar/mod.rs index 5e0a0471..ea97ac1b 100644 --- a/cli/generate/src/prepare_grammar/mod.rs +++ b/cli/generate/src/prepare_grammar/mod.rs @@ -13,7 +13,6 @@ use std::{ }; use anyhow::{anyhow, Result}; -pub(super) use flatten_grammar::symbol_is_used; pub use self::expand_tokens::expand_tokens; use self::{