fix(generate): remove unused rules

This commit is contained in:
Amaan Qureshi 2024-10-03 14:39:41 -04:00
parent 19c33b1ef7
commit 99a0ddc4c2
4 changed files with 97 additions and 27 deletions

View file

@ -10,7 +10,6 @@ use crate::{
dedup::split_state_id_groups,
grammars::{LexicalGrammar, SyntaxGrammar},
nfa::{CharacterSet, NfaCursor},
prepare_grammar::symbol_is_used,
rules::{Symbol, TokenSet},
tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable},
};
@ -94,9 +93,6 @@ pub fn build_lex_table(
let mut large_character_sets = Vec::new();
for (variable_ix, _variable) in lexical_grammar.variables.iter().enumerate() {
let symbol = Symbol::terminal(variable_ix);
if !symbol_is_used(&syntax_grammar.variables, symbol) {
continue;
}
builder.reset();
builder.add_state_for_tokens(&TokenSet::from_iter([symbol]));
for state in &builder.table.states {

View file

@ -1,3 +1,5 @@
use std::collections::HashSet;
use anyhow::{anyhow, Result};
use serde::Deserialize;
use serde_json::{Map, Value};
@ -94,15 +96,106 @@ pub struct GrammarJSON {
word: Option<String>,
}
fn rule_is_referenced(rule: &Rule, target: &str) -> bool {
match rule {
Rule::NamedSymbol(name) => name == target,
Rule::Choice(rules) | Rule::Seq(rules) => {
rules.iter().any(|r| rule_is_referenced(r, target))
}
Rule::Metadata { rule, .. } => rule_is_referenced(rule, target),
Rule::Repeat(inner) => rule_is_referenced(inner, target),
Rule::Blank | Rule::String(_) | Rule::Pattern(_, _) | Rule::Symbol(_) => false,
}
}
fn variable_is_used(
grammar_rules: &[(String, Rule)],
extras: &[Rule],
externals: &[Rule],
target_name: &str,
in_progress: &mut HashSet<String>,
) -> bool {
let root = &grammar_rules.first().unwrap().0;
if target_name == root {
return true;
}
if extras
.iter()
.chain(externals.iter())
.any(|rule| rule_is_referenced(rule, target_name))
{
return true;
}
in_progress.insert(target_name.to_string());
let result = grammar_rules
.iter()
.filter(|(key, _)| *key != target_name)
.any(|(name, rule)| {
if !rule_is_referenced(rule, target_name) || in_progress.contains(name) {
return false;
}
variable_is_used(grammar_rules, extras, externals, name, in_progress)
});
in_progress.remove(target_name);
result
}
pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
let grammar_json = serde_json::from_str::<GrammarJSON>(input)?;
let mut grammar_json = serde_json::from_str::<GrammarJSON>(input)?;
let mut extra_symbols =
grammar_json
.extras
.into_iter()
.try_fold(Vec::new(), |mut acc, item| {
let rule = parse_rule(item);
if let Rule::String(ref value) = rule {
if value.is_empty() {
return Err(anyhow!(
"Rules in the `extras` array must not contain empty strings"
));
}
}
acc.push(rule);
Ok(acc)
})?;
let mut external_tokens = grammar_json
.externals
.into_iter()
.map(parse_rule)
.collect::<Vec<_>>();
let mut variables = Vec::with_capacity(grammar_json.rules.len());
for (name, value) in grammar_json.rules {
let rules = grammar_json
.rules
.into_iter()
.map(|(n, r)| Ok((n, parse_rule(serde_json::from_value(r)?))))
.collect::<Result<Vec<_>>>()?;
let mut in_progress = HashSet::new();
for (name, rule) in &rules {
if !variable_is_used(
&rules,
&extra_symbols,
&external_tokens,
name,
&mut in_progress,
) {
extra_symbols.retain(|r| !rule_is_referenced(r, name));
external_tokens.retain(|r| !rule_is_referenced(r, name));
grammar_json.supertypes.retain(|r| r != name);
continue;
}
variables.push(Variable {
name: name.clone(),
kind: VariableType::Named,
rule: parse_rule(serde_json::from_value(value)?),
rule: rule.clone(),
});
}
@ -123,24 +216,6 @@ pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
precedence_orderings.push(ordering);
}
let extra_symbols = grammar_json
.extras
.into_iter()
.try_fold(Vec::new(), |mut acc, item| {
let rule = parse_rule(item);
if let Rule::String(ref value) = rule {
if value.is_empty() {
return Err(anyhow!(
"Rules in the `extras` array must not contain empty strings"
));
}
}
acc.push(rule);
Ok(acc)
})?;
let external_tokens = grammar_json.externals.into_iter().map(parse_rule).collect();
Ok(InputGrammar {
name: grammar_json.name,
word_token: grammar_json.word,

View file

@ -173,7 +173,7 @@ fn flatten_variable(variable: Variable) -> SyntaxVariable {
}
}
pub fn symbol_is_used(variables: &[SyntaxVariable], symbol: Symbol) -> bool {
fn symbol_is_used(variables: &[SyntaxVariable], symbol: Symbol) -> bool {
for variable in variables {
for production in &variable.productions {
for step in &production.steps {

View file

@ -13,7 +13,6 @@ use std::{
};
use anyhow::{anyhow, Result};
pub(super) use flatten_grammar::symbol_is_used;
pub use self::expand_tokens::expand_tokens;
use self::{