diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs index bfb01736..4fe7a922 100644 --- a/cli/src/generate/build_tables/build_parse_table.rs +++ b/cli/src/generate/build_tables/build_parse_table.rs @@ -55,7 +55,7 @@ impl<'a> ParseTableBuilder<'a> { .push(ProductionInfo::default()); // Add the error state at index 0. - self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default(), false); + self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default()); // Add the starting state at index 1. self.add_parse_state( @@ -69,7 +69,6 @@ impl<'a> ParseTableBuilder<'a> { .iter() .cloned(), ), - false, ); // Compute the possible item sets for non-terminal extras. @@ -100,7 +99,7 @@ impl<'a> ParseTableBuilder<'a> { for (terminal, item_set) in non_terminal_extra_item_sets_by_first_terminal { self.non_terminal_extra_states .push((terminal, self.parse_table.states.len())); - self.add_parse_state(&Vec::new(), &Vec::new(), item_set, true); + self.add_parse_state(&Vec::new(), &Vec::new(), item_set); } while let Some(entry) = self.parse_state_queue.pop_front() { @@ -126,7 +125,6 @@ impl<'a> ParseTableBuilder<'a> { preceding_symbols: &SymbolSequence, preceding_auxiliary_symbols: &AuxiliarySymbolSequence, item_set: ParseItemSet<'a>, - is_non_terminal_extra: bool, ) -> ParseStateId { match self.state_ids_by_item_set.entry(item_set) { // If an equivalent item set has already been processed, then return @@ -157,7 +155,6 @@ impl<'a> ParseTableBuilder<'a> { terminal_entries: HashMap::new(), nonterminal_entries: HashMap::new(), core_id, - is_non_terminal_extra, }); self.parse_state_queue.push_back(ParseStateQueueEntry { state_id, @@ -256,7 +253,6 @@ impl<'a> ParseTableBuilder<'a> { &preceding_symbols, &preceding_auxiliary_symbols, next_item_set, - self.parse_table.states[state_id].is_non_terminal_extra, ); preceding_symbols.pop(); @@ -284,7 +280,6 @@ impl<'a> ParseTableBuilder<'a> { &preceding_symbols, &preceding_auxiliary_symbols, next_item_set, - self.parse_table.states[state_id].is_non_terminal_extra, ); preceding_symbols.pop(); self.parse_table.states[state_id] @@ -309,15 +304,37 @@ impl<'a> ParseTableBuilder<'a> { // Finally, add actions for the grammar's `extra` symbols. let state = &mut self.parse_table.states[state_id]; - let is_non_terminal_extra = state.is_non_terminal_extra; - let is_end_of_non_terminal_extra = - is_non_terminal_extra && state.terminal_entries.len() == 1; + let is_end_of_non_terminal_extra = state.is_end_of_non_terminal_extra(&self.syntax_grammar); + // If this state represents the end of a non-terminal extra rule, then make sure that + // it doesn't have other successor states. Non-terminal extra rules must have + // unambiguous endings. + if is_end_of_non_terminal_extra { + if state.terminal_entries.len() > 1 { + let parent_symbols = item_set + .entries + .iter() + .filter_map(|(item, _)| { + if item.step_index > 0 { + Some(item.variable_index) + } else { + None + } + }) + .collect::>(); + let mut message = + "Extra rules must have unambiguous endings. Conflicting rules: ".to_string(); + for (i, variable_index) in parent_symbols.iter().enumerate() { + if i > 0 { + message += ", "; + } + message += &self.syntax_grammar.variables[*variable_index as usize].name; + } + return Err(Error::new(message)); + } + } // Add actions for the start tokens of each non-terminal extra rule. - // These actions are added to every state except for the states that are - // alread within non-terminal extras. Non-terminal extras are not allowed - // to nest within each other. - if !is_non_terminal_extra { + else { for (terminal, state_id) in &self.non_terminal_extra_states { state .terminal_entries @@ -330,12 +347,10 @@ impl<'a> ParseTableBuilder<'a> { }], }); } - } - // Add ShiftExtra actions for the terminal extra tokens. These actions - // are added to every state except for those at the ends of non-terminal - // extras. - if !is_end_of_non_terminal_extra { + // Add ShiftExtra actions for the terminal extra tokens. These actions + // are added to every state except for those at the ends of non-terminal + // extras. for extra_token in &self.syntax_grammar.extra_symbols { if extra_token.is_non_terminal() { state diff --git a/cli/src/generate/build_tables/minimize_parse_table.rs b/cli/src/generate/build_tables/minimize_parse_table.rs index d159a2c4..0458df94 100644 --- a/cli/src/generate/build_tables/minimize_parse_table.rs +++ b/cli/src/generate/build_tables/minimize_parse_table.rs @@ -200,9 +200,6 @@ impl<'a> Minimizer<'a> { right_state: &ParseState, group_ids_by_state_id: &Vec, ) -> bool { - if left_state.is_non_terminal_extra != right_state.is_non_terminal_extra { - return true; - } for (token, left_entry) in &left_state.terminal_entries { if let Some(right_entry) = right_state.terminal_entries.get(token) { if self.entries_conflict( diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index d6e32cf3..b99e8d6d 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -970,10 +970,7 @@ impl Generator { add_line!(self, "static TSLexMode ts_lex_modes[STATE_COUNT] = {{"); indent!(self); for (i, state) in self.parse_table.states.iter().enumerate() { - if state.is_non_terminal_extra - && state.terminal_entries.len() == 1 - && *state.terminal_entries.iter().next().unwrap().0 == Symbol::end() - { + if state.is_end_of_non_terminal_extra(&self.syntax_grammar) { add_line!(self, "[{}] = {{(TSStateId)(-1)}},", i,); } else if state.external_lex_state_id > 0 { add_line!( diff --git a/cli/src/generate/tables.rs b/cli/src/generate/tables.rs index 15b18a97..1071bbf4 100644 --- a/cli/src/generate/tables.rs +++ b/cli/src/generate/tables.rs @@ -1,5 +1,5 @@ -use super::nfa::CharacterSet; use super::rules::{Alias, Associativity, Symbol, TokenSet}; +use super::{grammars::SyntaxGrammar, nfa::CharacterSet}; use std::collections::{BTreeMap, HashMap}; pub(crate) type ProductionInfoId = usize; pub(crate) type ParseStateId = usize; @@ -44,7 +44,6 @@ pub(crate) struct ParseState { pub lex_state_id: usize, pub external_lex_state_id: usize, pub core_id: usize, - pub is_non_terminal_extra: bool, } #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] @@ -102,6 +101,20 @@ impl Default for LexTable { } impl ParseState { + pub fn is_end_of_non_terminal_extra(&self, grammar: &SyntaxGrammar) -> bool { + if let Some(eof_entry) = self.terminal_entries.get(&Symbol::end()) { + eof_entry.actions.iter().any(|action| { + if let ParseAction::Reduce { symbol, .. } = action { + grammar.extra_symbols.contains(&symbol) + } else { + false + } + }) + } else { + false + } + } + pub fn referenced_states<'a>(&'a self) -> impl Iterator + 'a { self.terminal_entries .iter()