diff --git a/cli/src/build_tables/build_lex_table.rs b/cli/src/build_tables/build_lex_table.rs index 9fc8edc6..0f828f5c 100644 --- a/cli/src/build_tables/build_lex_table.rs +++ b/cli/src/build_tables/build_lex_table.rs @@ -1,9 +1,10 @@ +use super::coincident_tokens::CoincidentTokenIndex; use super::item::TokenSet; use super::token_conflicts::TokenConflictMap; use crate::grammars::{LexicalGrammar, SyntaxGrammar}; use crate::nfa::{CharacterSet, NfaCursor, NfaTransition}; use crate::rules::Symbol; -use crate::tables::{AdvanceAction, LexState, LexTable, ParseTable}; +use crate::tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable}; use std::collections::hash_map::Entry; use std::collections::{BTreeMap, HashMap, VecDeque}; @@ -12,6 +13,8 @@ pub(crate) fn build_lex_table( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, keywords: &TokenSet, + coincident_token_index: &CoincidentTokenIndex, + token_conflict_map: &TokenConflictMap, minimize: bool, ) -> (LexTable, LexTable) { let keyword_lex_table; @@ -23,8 +26,8 @@ pub(crate) fn build_lex_table( keyword_lex_table = LexTable::default(); } - let mut builder = LexTableBuilder::new(lexical_grammar); - for state in parse_table.states.iter_mut() { + let mut parse_state_ids_by_token_set: Vec<(TokenSet, Vec)> = Vec::new(); + for (i, state) in parse_table.states.iter().enumerate() { let tokens = state .terminal_entries .keys() @@ -42,7 +45,33 @@ pub(crate) fn build_lex_table( } }) .collect(); - state.lex_state_id = builder.add_state_for_tokens(&tokens); + + let mut did_merge = false; + for entry in parse_state_ids_by_token_set.iter_mut() { + if merge_token_set( + &mut entry.0, + &tokens, + lexical_grammar, + token_conflict_map, + coincident_token_index, + ) { + did_merge = true; + entry.1.push(i); + break; + } + } + + if !did_merge { + parse_state_ids_by_token_set.push((tokens, vec![i])); + } + } + + let mut builder = LexTableBuilder::new(lexical_grammar); + for (tokens, parse_state_ids) in parse_state_ids_by_token_set { + let lex_state_id = builder.add_state_for_tokens(&tokens); + for id in parse_state_ids { + parse_table.states[id].lex_state_id = lex_state_id; + } } let mut table = builder.table; @@ -215,6 +244,34 @@ impl<'a> LexTableBuilder<'a> { } } +fn merge_token_set( + tokens: &mut TokenSet, + other: &TokenSet, + lexical_grammar: &LexicalGrammar, + token_conflict_map: &TokenConflictMap, + coincident_token_index: &CoincidentTokenIndex, +) -> bool { + for i in 0..lexical_grammar.variables.len() { + let symbol = Symbol::terminal(i); + let set_without_terminal = match (tokens.contains_terminal(i), other.contains_terminal(i)) { + (true, false) => other, + (false, true) => tokens, + _ => continue, + }; + + for existing_token in set_without_terminal.terminals() { + if token_conflict_map.does_conflict(i, existing_token.index) + || !coincident_token_index.contains(symbol, existing_token) + { + return false; + } + } + } + + tokens.insert_all(other); + true +} + fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) { let mut state_replacements = BTreeMap::new(); let mut done = false; diff --git a/cli/src/build_tables/item.rs b/cli/src/build_tables/item.rs index 5d6edc2f..2be331b0 100644 --- a/cli/src/build_tables/item.rs +++ b/cli/src/build_tables/item.rs @@ -93,6 +93,19 @@ impl TokenSet { .chain(if self.eof { Some(Symbol::end()) } else { None }) } + pub fn terminals<'a>(&'a self) -> impl Iterator + 'a { + self.terminal_bits + .iter() + .enumerate() + .filter_map(|(i, value)| { + if value { + Some(Symbol::terminal(i)) + } else { + None + } + }) + } + pub fn contains(&self, symbol: &Symbol) -> bool { match symbol.kind { SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"), @@ -102,6 +115,10 @@ impl TokenSet { } } + pub fn contains_terminal(&self, index: usize) -> bool { + self.terminal_bits.get(index).unwrap_or(false) + } + pub fn insert(&mut self, other: Symbol) { let vec = match other.kind { SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"), diff --git a/cli/src/build_tables/mod.rs b/cli/src/build_tables/mod.rs index c632aa7b..1f9acc14 100644 --- a/cli/src/build_tables/mod.rs +++ b/cli/src/build_tables/mod.rs @@ -59,6 +59,8 @@ pub(crate) fn build_tables( syntax_grammar, lexical_grammar, &keywords, + &coincident_token_index, + &token_conflict_map, minimize, ); Ok((