Add a --no-minimize flag to suppress table minimization for debugging
This commit is contained in:
parent
bf9556dadc
commit
70aa4c2b2d
6 changed files with 58 additions and 24 deletions
|
|
@ -12,6 +12,7 @@ pub(crate) fn build_lex_table(
|
||||||
syntax_grammar: &SyntaxGrammar,
|
syntax_grammar: &SyntaxGrammar,
|
||||||
lexical_grammar: &LexicalGrammar,
|
lexical_grammar: &LexicalGrammar,
|
||||||
keywords: &LookaheadSet,
|
keywords: &LookaheadSet,
|
||||||
|
minimize: bool,
|
||||||
) -> (LexTable, LexTable) {
|
) -> (LexTable, LexTable) {
|
||||||
let keyword_lex_table;
|
let keyword_lex_table;
|
||||||
if syntax_grammar.word_token.is_some() {
|
if syntax_grammar.word_token.is_some() {
|
||||||
|
|
@ -41,7 +42,10 @@ pub(crate) fn build_lex_table(
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut table = builder.table;
|
let mut table = builder.table;
|
||||||
shrink_lex_table(&mut table, parse_table);
|
|
||||||
|
if minimize {
|
||||||
|
minimize_lex_table(&mut table, parse_table);
|
||||||
|
}
|
||||||
|
|
||||||
(table, keyword_lex_table)
|
(table, keyword_lex_table)
|
||||||
}
|
}
|
||||||
|
|
@ -147,14 +151,20 @@ impl<'a> LexTableBuilder<'a> {
|
||||||
completion = Some((id, prec));
|
completion = Some((id, prec));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
info!(
|
||||||
|
"lex state: {}, completion: {:?}",
|
||||||
|
state_id,
|
||||||
|
completion.map(|(id, prec)| (&self.lexical_grammar.variables[id].name, prec))
|
||||||
|
);
|
||||||
|
|
||||||
let successors = self.cursor.grouped_successors();
|
let successors = self.cursor.grouped_successors();
|
||||||
info!("populate state: {}, successors: {:?}", state_id, successors);
|
info!("lex state: {}, successors: {:?}", state_id, successors);
|
||||||
|
|
||||||
// If EOF is a valid lookahead token, add a transition predicated on the null
|
// If EOF is a valid lookahead token, add a transition predicated on the null
|
||||||
// character that leads to the empty set of NFA states.
|
// character that leads to the empty set of NFA states.
|
||||||
if eof_valid {
|
if eof_valid {
|
||||||
let (next_state_id, _) = self.add_state(Vec::new(), false);
|
let (next_state_id, _) = self.add_state(Vec::new(), false);
|
||||||
info!("populate state: {}, character: EOF", state_id);
|
info!("lex state: {}, successor: EOF", state_id);
|
||||||
self.table.states[state_id].advance_actions.push((
|
self.table.states[state_id].advance_actions.push((
|
||||||
CharacterSet::empty().add_char('\0'),
|
CharacterSet::empty().add_char('\0'),
|
||||||
AdvanceAction {
|
AdvanceAction {
|
||||||
|
|
@ -166,7 +176,9 @@ impl<'a> LexTableBuilder<'a> {
|
||||||
|
|
||||||
for (chars, advance_precedence, next_states, is_sep) in successors {
|
for (chars, advance_precedence, next_states, is_sep) in successors {
|
||||||
if let Some((_, completed_precedence)) = completion {
|
if let Some((_, completed_precedence)) = completion {
|
||||||
if advance_precedence < completed_precedence {
|
if advance_precedence < completed_precedence
|
||||||
|
|| (advance_precedence == completed_precedence && is_sep)
|
||||||
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -188,7 +200,7 @@ impl<'a> LexTableBuilder<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn shrink_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
|
fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
|
||||||
let mut state_replacements = BTreeMap::new();
|
let mut state_replacements = BTreeMap::new();
|
||||||
let mut done = false;
|
let mut done = false;
|
||||||
while !done {
|
while !done {
|
||||||
|
|
|
||||||
|
|
@ -67,7 +67,7 @@ impl<'a> ParseTableBuilder<'a> {
|
||||||
// info!(
|
// info!(
|
||||||
// "state: {}, item set: {}",
|
// "state: {}, item set: {}",
|
||||||
// entry.state_id,
|
// entry.state_id,
|
||||||
// ParseItemSetDisplay(
|
// super::item::ParseItemSetDisplay(
|
||||||
// &self.item_sets_by_state_id[entry.state_id],
|
// &self.item_sets_by_state_id[entry.state_id],
|
||||||
// self.syntax_grammar,
|
// self.syntax_grammar,
|
||||||
// self.lexical_grammar,
|
// self.lexical_grammar,
|
||||||
|
|
@ -77,6 +77,17 @@ impl<'a> ParseTableBuilder<'a> {
|
||||||
let item_set = self
|
let item_set = self
|
||||||
.item_set_builder
|
.item_set_builder
|
||||||
.transitive_closure(&self.item_sets_by_state_id[entry.state_id]);
|
.transitive_closure(&self.item_sets_by_state_id[entry.state_id]);
|
||||||
|
|
||||||
|
// info!(
|
||||||
|
// "state: {}, closed item set: {}",
|
||||||
|
// entry.state_id,
|
||||||
|
// super::item::ParseItemSetDisplay(
|
||||||
|
// &item_set,
|
||||||
|
// self.syntax_grammar,
|
||||||
|
// self.lexical_grammar,
|
||||||
|
// )
|
||||||
|
// );
|
||||||
|
|
||||||
self.add_actions(
|
self.add_actions(
|
||||||
entry.preceding_symbols,
|
entry.preceding_symbols,
|
||||||
entry.preceding_auxiliary_symbols,
|
entry.preceding_auxiliary_symbols,
|
||||||
|
|
|
||||||
|
|
@ -5,26 +5,26 @@ use crate::rules::{AliasMap, Symbol};
|
||||||
use crate::tables::{ParseAction, ParseState, ParseTable, ParseTableEntry};
|
use crate::tables::{ParseAction, ParseState, ParseTable, ParseTableEntry};
|
||||||
use hashbrown::{HashMap, HashSet};
|
use hashbrown::{HashMap, HashSet};
|
||||||
|
|
||||||
pub(crate) fn shrink_parse_table(
|
pub(crate) fn minimize_parse_table(
|
||||||
parse_table: &mut ParseTable,
|
parse_table: &mut ParseTable,
|
||||||
syntax_grammar: &SyntaxGrammar,
|
syntax_grammar: &SyntaxGrammar,
|
||||||
simple_aliases: &AliasMap,
|
simple_aliases: &AliasMap,
|
||||||
token_conflict_map: &TokenConflictMap,
|
token_conflict_map: &TokenConflictMap,
|
||||||
keywords: &LookaheadSet,
|
keywords: &LookaheadSet,
|
||||||
) {
|
) {
|
||||||
let mut optimizer = Optimizer {
|
let mut minimizer = Minimizer {
|
||||||
parse_table,
|
parse_table,
|
||||||
syntax_grammar,
|
syntax_grammar,
|
||||||
token_conflict_map,
|
token_conflict_map,
|
||||||
keywords,
|
keywords,
|
||||||
simple_aliases,
|
simple_aliases,
|
||||||
};
|
};
|
||||||
optimizer.remove_unit_reductions();
|
minimizer.remove_unit_reductions();
|
||||||
optimizer.merge_compatible_states();
|
minimizer.merge_compatible_states();
|
||||||
optimizer.remove_unused_states();
|
minimizer.remove_unused_states();
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Optimizer<'a> {
|
struct Minimizer<'a> {
|
||||||
parse_table: &'a mut ParseTable,
|
parse_table: &'a mut ParseTable,
|
||||||
syntax_grammar: &'a SyntaxGrammar,
|
syntax_grammar: &'a SyntaxGrammar,
|
||||||
token_conflict_map: &'a TokenConflictMap<'a>,
|
token_conflict_map: &'a TokenConflictMap<'a>,
|
||||||
|
|
@ -32,7 +32,7 @@ struct Optimizer<'a> {
|
||||||
simple_aliases: &'a AliasMap,
|
simple_aliases: &'a AliasMap,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Optimizer<'a> {
|
impl<'a> Minimizer<'a> {
|
||||||
fn remove_unit_reductions(&mut self) {
|
fn remove_unit_reductions(&mut self) {
|
||||||
let mut aliased_symbols = HashSet::new();
|
let mut aliased_symbols = HashSet::new();
|
||||||
for variable in &self.syntax_grammar.variables {
|
for variable in &self.syntax_grammar.variables {
|
||||||
|
|
@ -3,14 +3,14 @@ mod build_parse_table;
|
||||||
mod coincident_tokens;
|
mod coincident_tokens;
|
||||||
mod item;
|
mod item;
|
||||||
mod item_set_builder;
|
mod item_set_builder;
|
||||||
mod shrink_parse_table;
|
mod minimize_parse_table;
|
||||||
mod token_conflicts;
|
mod token_conflicts;
|
||||||
|
|
||||||
use self::build_lex_table::build_lex_table;
|
use self::build_lex_table::build_lex_table;
|
||||||
use self::build_parse_table::build_parse_table;
|
use self::build_parse_table::build_parse_table;
|
||||||
use self::coincident_tokens::CoincidentTokenIndex;
|
use self::coincident_tokens::CoincidentTokenIndex;
|
||||||
use self::item::LookaheadSet;
|
use self::item::LookaheadSet;
|
||||||
use self::shrink_parse_table::shrink_parse_table;
|
use self::minimize_parse_table::minimize_parse_table;
|
||||||
use self::token_conflicts::TokenConflictMap;
|
use self::token_conflicts::TokenConflictMap;
|
||||||
use crate::error::Result;
|
use crate::error::Result;
|
||||||
use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
|
use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
|
||||||
|
|
@ -23,6 +23,7 @@ pub(crate) fn build_tables(
|
||||||
lexical_grammar: &LexicalGrammar,
|
lexical_grammar: &LexicalGrammar,
|
||||||
simple_aliases: &AliasMap,
|
simple_aliases: &AliasMap,
|
||||||
inlines: &InlinedProductionMap,
|
inlines: &InlinedProductionMap,
|
||||||
|
minimize: bool,
|
||||||
) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
|
) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
|
||||||
let (mut parse_table, following_tokens) =
|
let (mut parse_table, following_tokens) =
|
||||||
build_parse_table(syntax_grammar, lexical_grammar, inlines)?;
|
build_parse_table(syntax_grammar, lexical_grammar, inlines)?;
|
||||||
|
|
@ -42,15 +43,22 @@ pub(crate) fn build_tables(
|
||||||
&coincident_token_index,
|
&coincident_token_index,
|
||||||
&token_conflict_map,
|
&token_conflict_map,
|
||||||
);
|
);
|
||||||
shrink_parse_table(
|
if minimize {
|
||||||
|
minimize_parse_table(
|
||||||
|
&mut parse_table,
|
||||||
|
syntax_grammar,
|
||||||
|
simple_aliases,
|
||||||
|
&token_conflict_map,
|
||||||
|
&keywords,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
let (main_lex_table, keyword_lex_table) = build_lex_table(
|
||||||
&mut parse_table,
|
&mut parse_table,
|
||||||
syntax_grammar,
|
syntax_grammar,
|
||||||
simple_aliases,
|
lexical_grammar,
|
||||||
&token_conflict_map,
|
|
||||||
&keywords,
|
&keywords,
|
||||||
|
minimize,
|
||||||
);
|
);
|
||||||
let (main_lex_table, keyword_lex_table) =
|
|
||||||
build_lex_table(&mut parse_table, syntax_grammar, lexical_grammar, &keywords);
|
|
||||||
Ok((
|
Ok((
|
||||||
parse_table,
|
parse_table,
|
||||||
main_lex_table,
|
main_lex_table,
|
||||||
|
|
|
||||||
|
|
@ -4,14 +4,15 @@ use crate::prepare_grammar::prepare_grammar;
|
||||||
use crate::build_tables::build_tables;
|
use crate::build_tables::build_tables;
|
||||||
use crate::render::render_c_code;
|
use crate::render::render_c_code;
|
||||||
|
|
||||||
pub fn generate_parser_for_grammar(input: &str) -> Result<String> {
|
pub fn generate_parser_for_grammar(input: &str, minimize: bool) -> Result<String> {
|
||||||
let input_grammar = parse_grammar(input)?;
|
let input_grammar = parse_grammar(input)?;
|
||||||
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) = prepare_grammar(&input_grammar)?;
|
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) = prepare_grammar(&input_grammar)?;
|
||||||
let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
|
let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
|
||||||
&syntax_grammar,
|
&syntax_grammar,
|
||||||
&lexical_grammar,
|
&lexical_grammar,
|
||||||
&simple_aliases,
|
&simple_aliases,
|
||||||
&inlines
|
&inlines,
|
||||||
|
minimize
|
||||||
)?;
|
)?;
|
||||||
let c_code = render_c_code(
|
let c_code = render_c_code(
|
||||||
&input_grammar.name,
|
&input_grammar.name,
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,8 @@ fn main() -> error::Result<()> {
|
||||||
.subcommand(
|
.subcommand(
|
||||||
SubCommand::with_name("generate")
|
SubCommand::with_name("generate")
|
||||||
.about("Generate a parser")
|
.about("Generate a parser")
|
||||||
.arg(Arg::with_name("log").long("log")),
|
.arg(Arg::with_name("log").long("log"))
|
||||||
|
.arg(Arg::with_name("no-minimize").long("no-minimize")),
|
||||||
)
|
)
|
||||||
.subcommand(
|
.subcommand(
|
||||||
SubCommand::with_name("parse")
|
SubCommand::with_name("parse")
|
||||||
|
|
@ -54,10 +55,11 @@ fn main() -> error::Result<()> {
|
||||||
logger::init();
|
logger::init();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let minimize = !matches.is_present("no-minimize");
|
||||||
let mut grammar_path = env::current_dir().expect("Failed to read CWD");
|
let mut grammar_path = env::current_dir().expect("Failed to read CWD");
|
||||||
grammar_path.push("grammar.js");
|
grammar_path.push("grammar.js");
|
||||||
let grammar_json = load_js_grammar_file(grammar_path);
|
let grammar_json = load_js_grammar_file(grammar_path);
|
||||||
let code = generate::generate_parser_for_grammar(&grammar_json)?;
|
let code = generate::generate_parser_for_grammar(&grammar_json, minimize)?;
|
||||||
println!("{}", code);
|
println!("{}", code);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue