Fix bugs in parser generation for non-terminal extras

Previously, we attempted to completely separate the parse states
for item sets with non-terminal extras from the parse states
for other rules. But there was not a complete separation.

It actually isn't necessary to separate the parse states in this way.
The only special behavior for parse states with non-terminal extra rules
is what happens at the *end* of the rule: these parse states need to
perform an unconditional reduction.

Luckily, it's possible to distinguish these *non-terminal extra ending*
states from other states just based on their normal structure, with
no additional state.
This commit is contained in:
Max Brunsfeld 2021-02-18 14:14:20 -08:00
parent b46d51f224
commit 86a891fa63
4 changed files with 50 additions and 28 deletions

View file

@ -55,7 +55,7 @@ impl<'a> ParseTableBuilder<'a> {
.push(ProductionInfo::default());
// Add the error state at index 0.
self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default(), false);
self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
// Add the starting state at index 1.
self.add_parse_state(
@ -69,7 +69,6 @@ impl<'a> ParseTableBuilder<'a> {
.iter()
.cloned(),
),
false,
);
// Compute the possible item sets for non-terminal extras.
@ -100,7 +99,7 @@ impl<'a> ParseTableBuilder<'a> {
for (terminal, item_set) in non_terminal_extra_item_sets_by_first_terminal {
self.non_terminal_extra_states
.push((terminal, self.parse_table.states.len()));
self.add_parse_state(&Vec::new(), &Vec::new(), item_set, true);
self.add_parse_state(&Vec::new(), &Vec::new(), item_set);
}
while let Some(entry) = self.parse_state_queue.pop_front() {
@ -126,7 +125,6 @@ impl<'a> ParseTableBuilder<'a> {
preceding_symbols: &SymbolSequence,
preceding_auxiliary_symbols: &AuxiliarySymbolSequence,
item_set: ParseItemSet<'a>,
is_non_terminal_extra: bool,
) -> ParseStateId {
match self.state_ids_by_item_set.entry(item_set) {
// If an equivalent item set has already been processed, then return
@ -157,7 +155,6 @@ impl<'a> ParseTableBuilder<'a> {
terminal_entries: HashMap::new(),
nonterminal_entries: HashMap::new(),
core_id,
is_non_terminal_extra,
});
self.parse_state_queue.push_back(ParseStateQueueEntry {
state_id,
@ -256,7 +253,6 @@ impl<'a> ParseTableBuilder<'a> {
&preceding_symbols,
&preceding_auxiliary_symbols,
next_item_set,
self.parse_table.states[state_id].is_non_terminal_extra,
);
preceding_symbols.pop();
@ -284,7 +280,6 @@ impl<'a> ParseTableBuilder<'a> {
&preceding_symbols,
&preceding_auxiliary_symbols,
next_item_set,
self.parse_table.states[state_id].is_non_terminal_extra,
);
preceding_symbols.pop();
self.parse_table.states[state_id]
@ -309,15 +304,37 @@ impl<'a> ParseTableBuilder<'a> {
// Finally, add actions for the grammar's `extra` symbols.
let state = &mut self.parse_table.states[state_id];
let is_non_terminal_extra = state.is_non_terminal_extra;
let is_end_of_non_terminal_extra =
is_non_terminal_extra && state.terminal_entries.len() == 1;
let is_end_of_non_terminal_extra = state.is_end_of_non_terminal_extra(&self.syntax_grammar);
// If this state represents the end of a non-terminal extra rule, then make sure that
// it doesn't have other successor states. Non-terminal extra rules must have
// unambiguous endings.
if is_end_of_non_terminal_extra {
if state.terminal_entries.len() > 1 {
let parent_symbols = item_set
.entries
.iter()
.filter_map(|(item, _)| {
if item.step_index > 0 {
Some(item.variable_index)
} else {
None
}
})
.collect::<HashSet<_>>();
let mut message =
"Extra rules must have unambiguous endings. Conflicting rules: ".to_string();
for (i, variable_index) in parent_symbols.iter().enumerate() {
if i > 0 {
message += ", ";
}
message += &self.syntax_grammar.variables[*variable_index as usize].name;
}
return Err(Error::new(message));
}
}
// Add actions for the start tokens of each non-terminal extra rule.
// These actions are added to every state except for the states that are
// alread within non-terminal extras. Non-terminal extras are not allowed
// to nest within each other.
if !is_non_terminal_extra {
else {
for (terminal, state_id) in &self.non_terminal_extra_states {
state
.terminal_entries
@ -330,12 +347,10 @@ impl<'a> ParseTableBuilder<'a> {
}],
});
}
}
// Add ShiftExtra actions for the terminal extra tokens. These actions
// are added to every state except for those at the ends of non-terminal
// extras.
if !is_end_of_non_terminal_extra {
// Add ShiftExtra actions for the terminal extra tokens. These actions
// are added to every state except for those at the ends of non-terminal
// extras.
for extra_token in &self.syntax_grammar.extra_symbols {
if extra_token.is_non_terminal() {
state

View file

@ -200,9 +200,6 @@ impl<'a> Minimizer<'a> {
right_state: &ParseState,
group_ids_by_state_id: &Vec<ParseStateId>,
) -> bool {
if left_state.is_non_terminal_extra != right_state.is_non_terminal_extra {
return true;
}
for (token, left_entry) in &left_state.terminal_entries {
if let Some(right_entry) = right_state.terminal_entries.get(token) {
if self.entries_conflict(

View file

@ -970,10 +970,7 @@ impl Generator {
add_line!(self, "static TSLexMode ts_lex_modes[STATE_COUNT] = {{");
indent!(self);
for (i, state) in self.parse_table.states.iter().enumerate() {
if state.is_non_terminal_extra
&& state.terminal_entries.len() == 1
&& *state.terminal_entries.iter().next().unwrap().0 == Symbol::end()
{
if state.is_end_of_non_terminal_extra(&self.syntax_grammar) {
add_line!(self, "[{}] = {{(TSStateId)(-1)}},", i,);
} else if state.external_lex_state_id > 0 {
add_line!(

View file

@ -1,5 +1,5 @@
use super::nfa::CharacterSet;
use super::rules::{Alias, Associativity, Symbol, TokenSet};
use super::{grammars::SyntaxGrammar, nfa::CharacterSet};
use std::collections::{BTreeMap, HashMap};
pub(crate) type ProductionInfoId = usize;
pub(crate) type ParseStateId = usize;
@ -44,7 +44,6 @@ pub(crate) struct ParseState {
pub lex_state_id: usize,
pub external_lex_state_id: usize,
pub core_id: usize,
pub is_non_terminal_extra: bool,
}
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
@ -102,6 +101,20 @@ impl Default for LexTable {
}
impl ParseState {
pub fn is_end_of_non_terminal_extra(&self, grammar: &SyntaxGrammar) -> bool {
if let Some(eof_entry) = self.terminal_entries.get(&Symbol::end()) {
eof_entry.actions.iter().any(|action| {
if let ParseAction::Reduce { symbol, .. } = action {
grammar.extra_symbols.contains(&symbol)
} else {
false
}
})
} else {
false
}
}
pub fn referenced_states<'a>(&'a self) -> impl Iterator<Item = ParseStateId> + 'a {
self.terminal_entries
.iter()