Add a --no-minimize flag to suppress table minimization for debugging

This commit is contained in:
Max Brunsfeld 2019-01-04 09:11:44 -08:00
parent bf9556dadc
commit 70aa4c2b2d
6 changed files with 58 additions and 24 deletions

View file

@ -12,6 +12,7 @@ pub(crate) fn build_lex_table(
syntax_grammar: &SyntaxGrammar, syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar, lexical_grammar: &LexicalGrammar,
keywords: &LookaheadSet, keywords: &LookaheadSet,
minimize: bool,
) -> (LexTable, LexTable) { ) -> (LexTable, LexTable) {
let keyword_lex_table; let keyword_lex_table;
if syntax_grammar.word_token.is_some() { if syntax_grammar.word_token.is_some() {
@ -41,7 +42,10 @@ pub(crate) fn build_lex_table(
} }
let mut table = builder.table; let mut table = builder.table;
shrink_lex_table(&mut table, parse_table);
if minimize {
minimize_lex_table(&mut table, parse_table);
}
(table, keyword_lex_table) (table, keyword_lex_table)
} }
@ -147,14 +151,20 @@ impl<'a> LexTableBuilder<'a> {
completion = Some((id, prec)); completion = Some((id, prec));
} }
info!(
"lex state: {}, completion: {:?}",
state_id,
completion.map(|(id, prec)| (&self.lexical_grammar.variables[id].name, prec))
);
let successors = self.cursor.grouped_successors(); let successors = self.cursor.grouped_successors();
info!("populate state: {}, successors: {:?}", state_id, successors); info!("lex state: {}, successors: {:?}", state_id, successors);
// If EOF is a valid lookahead token, add a transition predicated on the null // If EOF is a valid lookahead token, add a transition predicated on the null
// character that leads to the empty set of NFA states. // character that leads to the empty set of NFA states.
if eof_valid { if eof_valid {
let (next_state_id, _) = self.add_state(Vec::new(), false); let (next_state_id, _) = self.add_state(Vec::new(), false);
info!("populate state: {}, character: EOF", state_id); info!("lex state: {}, successor: EOF", state_id);
self.table.states[state_id].advance_actions.push(( self.table.states[state_id].advance_actions.push((
CharacterSet::empty().add_char('\0'), CharacterSet::empty().add_char('\0'),
AdvanceAction { AdvanceAction {
@ -166,7 +176,9 @@ impl<'a> LexTableBuilder<'a> {
for (chars, advance_precedence, next_states, is_sep) in successors { for (chars, advance_precedence, next_states, is_sep) in successors {
if let Some((_, completed_precedence)) = completion { if let Some((_, completed_precedence)) = completion {
if advance_precedence < completed_precedence { if advance_precedence < completed_precedence
|| (advance_precedence == completed_precedence && is_sep)
{
continue; continue;
} }
} }
@ -188,7 +200,7 @@ impl<'a> LexTableBuilder<'a> {
} }
} }
fn shrink_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) { fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
let mut state_replacements = BTreeMap::new(); let mut state_replacements = BTreeMap::new();
let mut done = false; let mut done = false;
while !done { while !done {

View file

@ -67,7 +67,7 @@ impl<'a> ParseTableBuilder<'a> {
// info!( // info!(
// "state: {}, item set: {}", // "state: {}, item set: {}",
// entry.state_id, // entry.state_id,
// ParseItemSetDisplay( // super::item::ParseItemSetDisplay(
// &self.item_sets_by_state_id[entry.state_id], // &self.item_sets_by_state_id[entry.state_id],
// self.syntax_grammar, // self.syntax_grammar,
// self.lexical_grammar, // self.lexical_grammar,
@ -77,6 +77,17 @@ impl<'a> ParseTableBuilder<'a> {
let item_set = self let item_set = self
.item_set_builder .item_set_builder
.transitive_closure(&self.item_sets_by_state_id[entry.state_id]); .transitive_closure(&self.item_sets_by_state_id[entry.state_id]);
// info!(
// "state: {}, closed item set: {}",
// entry.state_id,
// super::item::ParseItemSetDisplay(
// &item_set,
// self.syntax_grammar,
// self.lexical_grammar,
// )
// );
self.add_actions( self.add_actions(
entry.preceding_symbols, entry.preceding_symbols,
entry.preceding_auxiliary_symbols, entry.preceding_auxiliary_symbols,

View file

@ -5,26 +5,26 @@ use crate::rules::{AliasMap, Symbol};
use crate::tables::{ParseAction, ParseState, ParseTable, ParseTableEntry}; use crate::tables::{ParseAction, ParseState, ParseTable, ParseTableEntry};
use hashbrown::{HashMap, HashSet}; use hashbrown::{HashMap, HashSet};
pub(crate) fn shrink_parse_table( pub(crate) fn minimize_parse_table(
parse_table: &mut ParseTable, parse_table: &mut ParseTable,
syntax_grammar: &SyntaxGrammar, syntax_grammar: &SyntaxGrammar,
simple_aliases: &AliasMap, simple_aliases: &AliasMap,
token_conflict_map: &TokenConflictMap, token_conflict_map: &TokenConflictMap,
keywords: &LookaheadSet, keywords: &LookaheadSet,
) { ) {
let mut optimizer = Optimizer { let mut minimizer = Minimizer {
parse_table, parse_table,
syntax_grammar, syntax_grammar,
token_conflict_map, token_conflict_map,
keywords, keywords,
simple_aliases, simple_aliases,
}; };
optimizer.remove_unit_reductions(); minimizer.remove_unit_reductions();
optimizer.merge_compatible_states(); minimizer.merge_compatible_states();
optimizer.remove_unused_states(); minimizer.remove_unused_states();
} }
struct Optimizer<'a> { struct Minimizer<'a> {
parse_table: &'a mut ParseTable, parse_table: &'a mut ParseTable,
syntax_grammar: &'a SyntaxGrammar, syntax_grammar: &'a SyntaxGrammar,
token_conflict_map: &'a TokenConflictMap<'a>, token_conflict_map: &'a TokenConflictMap<'a>,
@ -32,7 +32,7 @@ struct Optimizer<'a> {
simple_aliases: &'a AliasMap, simple_aliases: &'a AliasMap,
} }
impl<'a> Optimizer<'a> { impl<'a> Minimizer<'a> {
fn remove_unit_reductions(&mut self) { fn remove_unit_reductions(&mut self) {
let mut aliased_symbols = HashSet::new(); let mut aliased_symbols = HashSet::new();
for variable in &self.syntax_grammar.variables { for variable in &self.syntax_grammar.variables {

View file

@ -3,14 +3,14 @@ mod build_parse_table;
mod coincident_tokens; mod coincident_tokens;
mod item; mod item;
mod item_set_builder; mod item_set_builder;
mod shrink_parse_table; mod minimize_parse_table;
mod token_conflicts; mod token_conflicts;
use self::build_lex_table::build_lex_table; use self::build_lex_table::build_lex_table;
use self::build_parse_table::build_parse_table; use self::build_parse_table::build_parse_table;
use self::coincident_tokens::CoincidentTokenIndex; use self::coincident_tokens::CoincidentTokenIndex;
use self::item::LookaheadSet; use self::item::LookaheadSet;
use self::shrink_parse_table::shrink_parse_table; use self::minimize_parse_table::minimize_parse_table;
use self::token_conflicts::TokenConflictMap; use self::token_conflicts::TokenConflictMap;
use crate::error::Result; use crate::error::Result;
use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar}; use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
@ -23,6 +23,7 @@ pub(crate) fn build_tables(
lexical_grammar: &LexicalGrammar, lexical_grammar: &LexicalGrammar,
simple_aliases: &AliasMap, simple_aliases: &AliasMap,
inlines: &InlinedProductionMap, inlines: &InlinedProductionMap,
minimize: bool,
) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> { ) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
let (mut parse_table, following_tokens) = let (mut parse_table, following_tokens) =
build_parse_table(syntax_grammar, lexical_grammar, inlines)?; build_parse_table(syntax_grammar, lexical_grammar, inlines)?;
@ -42,15 +43,22 @@ pub(crate) fn build_tables(
&coincident_token_index, &coincident_token_index,
&token_conflict_map, &token_conflict_map,
); );
shrink_parse_table( if minimize {
minimize_parse_table(
&mut parse_table,
syntax_grammar,
simple_aliases,
&token_conflict_map,
&keywords,
);
}
let (main_lex_table, keyword_lex_table) = build_lex_table(
&mut parse_table, &mut parse_table,
syntax_grammar, syntax_grammar,
simple_aliases, lexical_grammar,
&token_conflict_map,
&keywords, &keywords,
minimize,
); );
let (main_lex_table, keyword_lex_table) =
build_lex_table(&mut parse_table, syntax_grammar, lexical_grammar, &keywords);
Ok(( Ok((
parse_table, parse_table,
main_lex_table, main_lex_table,

View file

@ -4,14 +4,15 @@ use crate::prepare_grammar::prepare_grammar;
use crate::build_tables::build_tables; use crate::build_tables::build_tables;
use crate::render::render_c_code; use crate::render::render_c_code;
pub fn generate_parser_for_grammar(input: &str) -> Result<String> { pub fn generate_parser_for_grammar(input: &str, minimize: bool) -> Result<String> {
let input_grammar = parse_grammar(input)?; let input_grammar = parse_grammar(input)?;
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) = prepare_grammar(&input_grammar)?; let (syntax_grammar, lexical_grammar, inlines, simple_aliases) = prepare_grammar(&input_grammar)?;
let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables( let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
&syntax_grammar, &syntax_grammar,
&lexical_grammar, &lexical_grammar,
&simple_aliases, &simple_aliases,
&inlines &inlines,
minimize
)?; )?;
let c_code = render_c_code( let c_code = render_c_code(
&input_grammar.name, &input_grammar.name,

View file

@ -33,7 +33,8 @@ fn main() -> error::Result<()> {
.subcommand( .subcommand(
SubCommand::with_name("generate") SubCommand::with_name("generate")
.about("Generate a parser") .about("Generate a parser")
.arg(Arg::with_name("log").long("log")), .arg(Arg::with_name("log").long("log"))
.arg(Arg::with_name("no-minimize").long("no-minimize")),
) )
.subcommand( .subcommand(
SubCommand::with_name("parse") SubCommand::with_name("parse")
@ -54,10 +55,11 @@ fn main() -> error::Result<()> {
logger::init(); logger::init();
} }
let minimize = !matches.is_present("no-minimize");
let mut grammar_path = env::current_dir().expect("Failed to read CWD"); let mut grammar_path = env::current_dir().expect("Failed to read CWD");
grammar_path.push("grammar.js"); grammar_path.push("grammar.js");
let grammar_json = load_js_grammar_file(grammar_path); let grammar_json = load_js_grammar_file(grammar_path);
let code = generate::generate_parser_for_grammar(&grammar_json)?; let code = generate::generate_parser_for_grammar(&grammar_json, minimize)?;
println!("{}", code); println!("{}", code);
} }