From 29bc26ecd51c39d73a00cc7c97997aa71da5063d Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 18 Feb 2021 15:43:01 -0800 Subject: [PATCH] Fix test failure after non-terminal extras change --- .../build_tables/build_parse_table.rs | 11 ++++--- .../generate/build_tables/item_set_builder.rs | 4 +-- .../build_tables/minimize_parse_table.rs | 8 +++++ .../extract_default_aliases.rs | 8 +++-- cli/src/generate/render.rs | 8 +++-- cli/src/generate/rules.rs | 33 ++++++++++++++++++- cli/src/generate/tables.rs | 17 +++------- 7 files changed, 65 insertions(+), 24 deletions(-) diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs index 4fe7a922..205c8f0c 100644 --- a/cli/src/generate/build_tables/build_parse_table.rs +++ b/cli/src/generate/build_tables/build_parse_table.rs @@ -90,7 +90,10 @@ impl<'a> ParseTableBuilder<'a> { production, step_index: 1, }, - &[Symbol::end()].iter().cloned().collect(), + &[Symbol::end_of_nonterminal_extra()] + .iter() + .cloned() + .collect(), ); } } @@ -304,7 +307,7 @@ impl<'a> ParseTableBuilder<'a> { // Finally, add actions for the grammar's `extra` symbols. let state = &mut self.parse_table.states[state_id]; - let is_end_of_non_terminal_extra = state.is_end_of_non_terminal_extra(&self.syntax_grammar); + let is_end_of_non_terminal_extra = state.is_end_of_non_terminal_extra(); // If this state represents the end of a non-terminal extra rule, then make sure that // it doesn't have other successor states. Non-terminal extra rules must have @@ -315,7 +318,7 @@ impl<'a> ParseTableBuilder<'a> { .entries .iter() .filter_map(|(item, _)| { - if item.step_index > 0 { + if !item.is_augmented() && item.step_index > 0 { Some(item.variable_index) } else { None @@ -817,7 +820,7 @@ impl<'a> ParseTableBuilder<'a> { fn symbol_name(&self, symbol: &Symbol) -> String { match symbol.kind { - SymbolType::End => "EOF".to_string(), + SymbolType::End | SymbolType::EndOfNonTerminalExtra => "EOF".to_string(), SymbolType::External => self.syntax_grammar.external_tokens[symbol.index] .name .clone(), diff --git a/cli/src/generate/build_tables/item_set_builder.rs b/cli/src/generate/build_tables/item_set_builder.rs index ec8f368b..29690829 100644 --- a/cli/src/generate/build_tables/item_set_builder.rs +++ b/cli/src/generate/build_tables/item_set_builder.rs @@ -306,7 +306,7 @@ impl<'a> fmt::Debug for ParseItemSetBuilder<'a> { SymbolType::NonTerminal => &self.syntax_grammar.variables[symbol.index].name, SymbolType::External => &self.syntax_grammar.external_tokens[symbol.index].name, SymbolType::Terminal => &self.lexical_grammar.variables[symbol.index].name, - SymbolType::End => "END", + SymbolType::End | SymbolType::EndOfNonTerminalExtra => "END", }; write!( f, @@ -323,7 +323,7 @@ impl<'a> fmt::Debug for ParseItemSetBuilder<'a> { SymbolType::NonTerminal => &self.syntax_grammar.variables[symbol.index].name, SymbolType::External => &self.syntax_grammar.external_tokens[symbol.index].name, SymbolType::Terminal => &self.lexical_grammar.variables[symbol.index].name, - SymbolType::End => "END", + SymbolType::End | SymbolType::EndOfNonTerminalExtra => "END", }; write!( f, diff --git a/cli/src/generate/build_tables/minimize_parse_table.rs b/cli/src/generate/build_tables/minimize_parse_table.rs index 0458df94..4c2224c4 100644 --- a/cli/src/generate/build_tables/minimize_parse_table.rs +++ b/cli/src/generate/build_tables/minimize_parse_table.rs @@ -366,6 +366,14 @@ impl<'a> Minimizer<'a> { existing_tokens: impl Iterator, new_token: Symbol, ) -> bool { + if new_token == Symbol::end_of_nonterminal_extra() { + info!( + "split states {} {} - end of non-terminal extra", + left_id, right_id, + ); + return true; + } + // Do not add external tokens; they could conflict lexically with any of the state's // existing lookahead tokens. if new_token.is_external() { diff --git a/cli/src/generate/prepare_grammar/extract_default_aliases.rs b/cli/src/generate/prepare_grammar/extract_default_aliases.rs index 3e08e3ad..9dabdf60 100644 --- a/cli/src/generate/prepare_grammar/extract_default_aliases.rs +++ b/cli/src/generate/prepare_grammar/extract_default_aliases.rs @@ -35,7 +35,9 @@ pub(super) fn extract_default_aliases( SymbolType::External => &mut external_status_list[step.symbol.index], SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index], SymbolType::Terminal => &mut terminal_status_list[step.symbol.index], - SymbolType::End => panic!("Unexpected end token"), + SymbolType::End | SymbolType::EndOfNonTerminalExtra => { + panic!("Unexpected end token") + } }; // Default aliases don't work for inlined variables. @@ -111,7 +113,9 @@ pub(super) fn extract_default_aliases( SymbolType::External => &mut external_status_list[step.symbol.index], SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index], SymbolType::Terminal => &mut terminal_status_list[step.symbol.index], - SymbolType::End => panic!("Unexpected end token"), + SymbolType::End | SymbolType::EndOfNonTerminalExtra => { + panic!("Unexpected end token") + } }; // If this step is aliased as the symbol's default alias, then remove that alias. diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index b99e8d6d..1ce3ce55 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -142,6 +142,10 @@ impl Generator { for i in 0..self.parse_table.symbols.len() { self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers); } + self.symbol_ids.insert( + Symbol::end_of_nonterminal_extra(), + self.symbol_ids[&Symbol::end()].clone(), + ); self.symbol_map = self .parse_table @@ -970,7 +974,7 @@ impl Generator { add_line!(self, "static TSLexMode ts_lex_modes[STATE_COUNT] = {{"); indent!(self); for (i, state) in self.parse_table.states.iter().enumerate() { - if state.is_end_of_non_terminal_extra(&self.syntax_grammar) { + if state.is_end_of_non_terminal_extra() { add_line!(self, "[{}] = {{(TSStateId)(-1)}},", i,); } else if state.external_lex_state_id > 0 { add_line!( @@ -1479,7 +1483,7 @@ impl Generator { fn metadata_for_symbol(&self, symbol: Symbol) -> (&str, VariableType) { match symbol.kind { - SymbolType::End => ("end", VariableType::Hidden), + SymbolType::End | SymbolType::EndOfNonTerminalExtra => ("end", VariableType::Hidden), SymbolType::NonTerminal => { let variable = &self.syntax_grammar.variables[symbol.index]; (&variable.name, variable.kind) diff --git a/cli/src/generate/rules.rs b/cli/src/generate/rules.rs index 7676d61d..e99f0197 100644 --- a/cli/src/generate/rules.rs +++ b/cli/src/generate/rules.rs @@ -7,6 +7,7 @@ use std::iter::FromIterator; pub(crate) enum SymbolType { External, End, + EndOfNonTerminalExtra, Terminal, NonTerminal, } @@ -69,6 +70,7 @@ pub(crate) struct TokenSet { terminal_bits: SmallBitVec, external_bits: SmallBitVec, eof: bool, + end_of_nonterminal_extra: bool, } impl Rule { @@ -221,6 +223,13 @@ impl Symbol { index: 0, } } + + pub fn end_of_nonterminal_extra() -> Self { + Symbol { + kind: SymbolType::EndOfNonTerminalExtra, + index: 0, + } + } } impl From for Rule { @@ -235,6 +244,7 @@ impl TokenSet { terminal_bits: SmallBitVec::new(), external_bits: SmallBitVec::new(), eof: false, + end_of_nonterminal_extra: false, } } @@ -262,6 +272,11 @@ impl TokenSet { }), ) .chain(if self.eof { Some(Symbol::end()) } else { None }) + .chain(if self.end_of_nonterminal_extra { + Some(Symbol::end_of_nonterminal_extra()) + } else { + None + }) } pub fn terminals<'a>(&'a self) -> impl Iterator + 'a { @@ -283,6 +298,7 @@ impl TokenSet { SymbolType::Terminal => self.terminal_bits.get(symbol.index).unwrap_or(false), SymbolType::External => self.external_bits.get(symbol.index).unwrap_or(false), SymbolType::End => self.eof, + SymbolType::EndOfNonTerminalExtra => self.end_of_nonterminal_extra, } } @@ -299,6 +315,10 @@ impl TokenSet { self.eof = true; return; } + SymbolType::EndOfNonTerminalExtra => { + self.end_of_nonterminal_extra = true; + return; + } }; if other.index >= vec.len() { vec.resize(other.index + 1, false); @@ -315,6 +335,10 @@ impl TokenSet { self.eof = false; return; } + SymbolType::EndOfNonTerminalExtra => { + self.end_of_nonterminal_extra = false; + return; + } }; if other.index < vec.len() { vec.set(other.index, false); @@ -322,7 +346,10 @@ impl TokenSet { } pub fn is_empty(&self) -> bool { - !self.eof && !self.terminal_bits.iter().any(|a| a) && !self.external_bits.iter().any(|a| a) + !self.eof + && !self.end_of_nonterminal_extra + && !self.terminal_bits.iter().any(|a| a) + && !self.external_bits.iter().any(|a| a) } pub fn insert_all_terminals(&mut self, other: &TokenSet) -> bool { @@ -359,6 +386,10 @@ impl TokenSet { result |= !self.eof; self.eof = true; } + if other.end_of_nonterminal_extra { + result |= !self.end_of_nonterminal_extra; + self.end_of_nonterminal_extra = true; + } result |= self.insert_all_terminals(other); result |= self.insert_all_externals(other); result diff --git a/cli/src/generate/tables.rs b/cli/src/generate/tables.rs index 1071bbf4..e74ec977 100644 --- a/cli/src/generate/tables.rs +++ b/cli/src/generate/tables.rs @@ -1,5 +1,5 @@ +use super::nfa::CharacterSet; use super::rules::{Alias, Associativity, Symbol, TokenSet}; -use super::{grammars::SyntaxGrammar, nfa::CharacterSet}; use std::collections::{BTreeMap, HashMap}; pub(crate) type ProductionInfoId = usize; pub(crate) type ParseStateId = usize; @@ -101,18 +101,9 @@ impl Default for LexTable { } impl ParseState { - pub fn is_end_of_non_terminal_extra(&self, grammar: &SyntaxGrammar) -> bool { - if let Some(eof_entry) = self.terminal_entries.get(&Symbol::end()) { - eof_entry.actions.iter().any(|action| { - if let ParseAction::Reduce { symbol, .. } = action { - grammar.extra_symbols.contains(&symbol) - } else { - false - } - }) - } else { - false - } + pub fn is_end_of_non_terminal_extra(&self) -> bool { + self.terminal_entries + .contains_key(&Symbol::end_of_nonterminal_extra()) } pub fn referenced_states<'a>(&'a self) -> impl Iterator + 'a {