From 70aa4c2b2d97fbcf6e330f85e4d4fd0df026cfce Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 4 Jan 2019 09:11:44 -0800 Subject: [PATCH] Add a --no-minimize flag to suppress table minimization for debugging --- src/build_tables/build_lex_table.rs | 22 ++++++++++++++----- src/build_tables/build_parse_table.rs | 13 ++++++++++- ...parse_table.rs => minimize_parse_table.rs} | 14 ++++++------ src/build_tables/mod.rs | 22 +++++++++++++------ src/generate.rs | 5 +++-- src/main.rs | 6 +++-- 6 files changed, 58 insertions(+), 24 deletions(-) rename src/build_tables/{shrink_parse_table.rs => minimize_parse_table.rs} (97%) diff --git a/src/build_tables/build_lex_table.rs b/src/build_tables/build_lex_table.rs index 60810f83..9c440f4e 100644 --- a/src/build_tables/build_lex_table.rs +++ b/src/build_tables/build_lex_table.rs @@ -12,6 +12,7 @@ pub(crate) fn build_lex_table( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, keywords: &LookaheadSet, + minimize: bool, ) -> (LexTable, LexTable) { let keyword_lex_table; if syntax_grammar.word_token.is_some() { @@ -41,7 +42,10 @@ pub(crate) fn build_lex_table( } let mut table = builder.table; - shrink_lex_table(&mut table, parse_table); + + if minimize { + minimize_lex_table(&mut table, parse_table); + } (table, keyword_lex_table) } @@ -147,14 +151,20 @@ impl<'a> LexTableBuilder<'a> { completion = Some((id, prec)); } + info!( + "lex state: {}, completion: {:?}", + state_id, + completion.map(|(id, prec)| (&self.lexical_grammar.variables[id].name, prec)) + ); + let successors = self.cursor.grouped_successors(); - info!("populate state: {}, successors: {:?}", state_id, successors); + info!("lex state: {}, successors: {:?}", state_id, successors); // If EOF is a valid lookahead token, add a transition predicated on the null // character that leads to the empty set of NFA states. if eof_valid { let (next_state_id, _) = self.add_state(Vec::new(), false); - info!("populate state: {}, character: EOF", state_id); + info!("lex state: {}, successor: EOF", state_id); self.table.states[state_id].advance_actions.push(( CharacterSet::empty().add_char('\0'), AdvanceAction { @@ -166,7 +176,9 @@ impl<'a> LexTableBuilder<'a> { for (chars, advance_precedence, next_states, is_sep) in successors { if let Some((_, completed_precedence)) = completion { - if advance_precedence < completed_precedence { + if advance_precedence < completed_precedence + || (advance_precedence == completed_precedence && is_sep) + { continue; } } @@ -188,7 +200,7 @@ impl<'a> LexTableBuilder<'a> { } } -fn shrink_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) { +fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) { let mut state_replacements = BTreeMap::new(); let mut done = false; while !done { diff --git a/src/build_tables/build_parse_table.rs b/src/build_tables/build_parse_table.rs index 6f930463..9bccf238 100644 --- a/src/build_tables/build_parse_table.rs +++ b/src/build_tables/build_parse_table.rs @@ -67,7 +67,7 @@ impl<'a> ParseTableBuilder<'a> { // info!( // "state: {}, item set: {}", // entry.state_id, - // ParseItemSetDisplay( + // super::item::ParseItemSetDisplay( // &self.item_sets_by_state_id[entry.state_id], // self.syntax_grammar, // self.lexical_grammar, @@ -77,6 +77,17 @@ impl<'a> ParseTableBuilder<'a> { let item_set = self .item_set_builder .transitive_closure(&self.item_sets_by_state_id[entry.state_id]); + + // info!( + // "state: {}, closed item set: {}", + // entry.state_id, + // super::item::ParseItemSetDisplay( + // &item_set, + // self.syntax_grammar, + // self.lexical_grammar, + // ) + // ); + self.add_actions( entry.preceding_symbols, entry.preceding_auxiliary_symbols, diff --git a/src/build_tables/shrink_parse_table.rs b/src/build_tables/minimize_parse_table.rs similarity index 97% rename from src/build_tables/shrink_parse_table.rs rename to src/build_tables/minimize_parse_table.rs index 64a4b259..573bf974 100644 --- a/src/build_tables/shrink_parse_table.rs +++ b/src/build_tables/minimize_parse_table.rs @@ -5,26 +5,26 @@ use crate::rules::{AliasMap, Symbol}; use crate::tables::{ParseAction, ParseState, ParseTable, ParseTableEntry}; use hashbrown::{HashMap, HashSet}; -pub(crate) fn shrink_parse_table( +pub(crate) fn minimize_parse_table( parse_table: &mut ParseTable, syntax_grammar: &SyntaxGrammar, simple_aliases: &AliasMap, token_conflict_map: &TokenConflictMap, keywords: &LookaheadSet, ) { - let mut optimizer = Optimizer { + let mut minimizer = Minimizer { parse_table, syntax_grammar, token_conflict_map, keywords, simple_aliases, }; - optimizer.remove_unit_reductions(); - optimizer.merge_compatible_states(); - optimizer.remove_unused_states(); + minimizer.remove_unit_reductions(); + minimizer.merge_compatible_states(); + minimizer.remove_unused_states(); } -struct Optimizer<'a> { +struct Minimizer<'a> { parse_table: &'a mut ParseTable, syntax_grammar: &'a SyntaxGrammar, token_conflict_map: &'a TokenConflictMap<'a>, @@ -32,7 +32,7 @@ struct Optimizer<'a> { simple_aliases: &'a AliasMap, } -impl<'a> Optimizer<'a> { +impl<'a> Minimizer<'a> { fn remove_unit_reductions(&mut self) { let mut aliased_symbols = HashSet::new(); for variable in &self.syntax_grammar.variables { diff --git a/src/build_tables/mod.rs b/src/build_tables/mod.rs index 84659600..886594f8 100644 --- a/src/build_tables/mod.rs +++ b/src/build_tables/mod.rs @@ -3,14 +3,14 @@ mod build_parse_table; mod coincident_tokens; mod item; mod item_set_builder; -mod shrink_parse_table; +mod minimize_parse_table; mod token_conflicts; use self::build_lex_table::build_lex_table; use self::build_parse_table::build_parse_table; use self::coincident_tokens::CoincidentTokenIndex; use self::item::LookaheadSet; -use self::shrink_parse_table::shrink_parse_table; +use self::minimize_parse_table::minimize_parse_table; use self::token_conflicts::TokenConflictMap; use crate::error::Result; use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar}; @@ -23,6 +23,7 @@ pub(crate) fn build_tables( lexical_grammar: &LexicalGrammar, simple_aliases: &AliasMap, inlines: &InlinedProductionMap, + minimize: bool, ) -> Result<(ParseTable, LexTable, LexTable, Option)> { let (mut parse_table, following_tokens) = build_parse_table(syntax_grammar, lexical_grammar, inlines)?; @@ -42,15 +43,22 @@ pub(crate) fn build_tables( &coincident_token_index, &token_conflict_map, ); - shrink_parse_table( + if minimize { + minimize_parse_table( + &mut parse_table, + syntax_grammar, + simple_aliases, + &token_conflict_map, + &keywords, + ); + } + let (main_lex_table, keyword_lex_table) = build_lex_table( &mut parse_table, syntax_grammar, - simple_aliases, - &token_conflict_map, + lexical_grammar, &keywords, + minimize, ); - let (main_lex_table, keyword_lex_table) = - build_lex_table(&mut parse_table, syntax_grammar, lexical_grammar, &keywords); Ok(( parse_table, main_lex_table, diff --git a/src/generate.rs b/src/generate.rs index cdbbea4f..d574c165 100644 --- a/src/generate.rs +++ b/src/generate.rs @@ -4,14 +4,15 @@ use crate::prepare_grammar::prepare_grammar; use crate::build_tables::build_tables; use crate::render::render_c_code; -pub fn generate_parser_for_grammar(input: &str) -> Result { +pub fn generate_parser_for_grammar(input: &str, minimize: bool) -> Result { let input_grammar = parse_grammar(input)?; let (syntax_grammar, lexical_grammar, inlines, simple_aliases) = prepare_grammar(&input_grammar)?; let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables( &syntax_grammar, &lexical_grammar, &simple_aliases, - &inlines + &inlines, + minimize )?; let c_code = render_c_code( &input_grammar.name, diff --git a/src/main.rs b/src/main.rs index a08922b7..10820ed1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -33,7 +33,8 @@ fn main() -> error::Result<()> { .subcommand( SubCommand::with_name("generate") .about("Generate a parser") - .arg(Arg::with_name("log").long("log")), + .arg(Arg::with_name("log").long("log")) + .arg(Arg::with_name("no-minimize").long("no-minimize")), ) .subcommand( SubCommand::with_name("parse") @@ -54,10 +55,11 @@ fn main() -> error::Result<()> { logger::init(); } + let minimize = !matches.is_present("no-minimize"); let mut grammar_path = env::current_dir().expect("Failed to read CWD"); grammar_path.push("grammar.js"); let grammar_json = load_js_grammar_file(grammar_path); - let code = generate::generate_parser_for_grammar(&grammar_json)?; + let code = generate::generate_parser_for_grammar(&grammar_json, minimize)?; println!("{}", code); }