Fix bugs in parser generation for non-terminal extras
Previously, we attempted to completely separate the parse states for item sets with non-terminal extras from the parse states for other rules. But there was not a complete separation. It actually isn't necessary to separate the parse states in this way. The only special behavior for parse states with non-terminal extra rules is what happens at the *end* of the rule: these parse states need to perform an unconditional reduction. Luckily, it's possible to distinguish these *non-terminal extra ending* states from other states just based on their normal structure, with no additional state.
This commit is contained in:
parent
b46d51f224
commit
86a891fa63
4 changed files with 50 additions and 28 deletions
|
|
@ -55,7 +55,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
.push(ProductionInfo::default());
|
||||
|
||||
// Add the error state at index 0.
|
||||
self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default(), false);
|
||||
self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
|
||||
|
||||
// Add the starting state at index 1.
|
||||
self.add_parse_state(
|
||||
|
|
@ -69,7 +69,6 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
.iter()
|
||||
.cloned(),
|
||||
),
|
||||
false,
|
||||
);
|
||||
|
||||
// Compute the possible item sets for non-terminal extras.
|
||||
|
|
@ -100,7 +99,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
for (terminal, item_set) in non_terminal_extra_item_sets_by_first_terminal {
|
||||
self.non_terminal_extra_states
|
||||
.push((terminal, self.parse_table.states.len()));
|
||||
self.add_parse_state(&Vec::new(), &Vec::new(), item_set, true);
|
||||
self.add_parse_state(&Vec::new(), &Vec::new(), item_set);
|
||||
}
|
||||
|
||||
while let Some(entry) = self.parse_state_queue.pop_front() {
|
||||
|
|
@ -126,7 +125,6 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
preceding_symbols: &SymbolSequence,
|
||||
preceding_auxiliary_symbols: &AuxiliarySymbolSequence,
|
||||
item_set: ParseItemSet<'a>,
|
||||
is_non_terminal_extra: bool,
|
||||
) -> ParseStateId {
|
||||
match self.state_ids_by_item_set.entry(item_set) {
|
||||
// If an equivalent item set has already been processed, then return
|
||||
|
|
@ -157,7 +155,6 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
terminal_entries: HashMap::new(),
|
||||
nonterminal_entries: HashMap::new(),
|
||||
core_id,
|
||||
is_non_terminal_extra,
|
||||
});
|
||||
self.parse_state_queue.push_back(ParseStateQueueEntry {
|
||||
state_id,
|
||||
|
|
@ -256,7 +253,6 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
&preceding_symbols,
|
||||
&preceding_auxiliary_symbols,
|
||||
next_item_set,
|
||||
self.parse_table.states[state_id].is_non_terminal_extra,
|
||||
);
|
||||
preceding_symbols.pop();
|
||||
|
||||
|
|
@ -284,7 +280,6 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
&preceding_symbols,
|
||||
&preceding_auxiliary_symbols,
|
||||
next_item_set,
|
||||
self.parse_table.states[state_id].is_non_terminal_extra,
|
||||
);
|
||||
preceding_symbols.pop();
|
||||
self.parse_table.states[state_id]
|
||||
|
|
@ -309,15 +304,37 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
|
||||
// Finally, add actions for the grammar's `extra` symbols.
|
||||
let state = &mut self.parse_table.states[state_id];
|
||||
let is_non_terminal_extra = state.is_non_terminal_extra;
|
||||
let is_end_of_non_terminal_extra =
|
||||
is_non_terminal_extra && state.terminal_entries.len() == 1;
|
||||
let is_end_of_non_terminal_extra = state.is_end_of_non_terminal_extra(&self.syntax_grammar);
|
||||
|
||||
// If this state represents the end of a non-terminal extra rule, then make sure that
|
||||
// it doesn't have other successor states. Non-terminal extra rules must have
|
||||
// unambiguous endings.
|
||||
if is_end_of_non_terminal_extra {
|
||||
if state.terminal_entries.len() > 1 {
|
||||
let parent_symbols = item_set
|
||||
.entries
|
||||
.iter()
|
||||
.filter_map(|(item, _)| {
|
||||
if item.step_index > 0 {
|
||||
Some(item.variable_index)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect::<HashSet<_>>();
|
||||
let mut message =
|
||||
"Extra rules must have unambiguous endings. Conflicting rules: ".to_string();
|
||||
for (i, variable_index) in parent_symbols.iter().enumerate() {
|
||||
if i > 0 {
|
||||
message += ", ";
|
||||
}
|
||||
message += &self.syntax_grammar.variables[*variable_index as usize].name;
|
||||
}
|
||||
return Err(Error::new(message));
|
||||
}
|
||||
}
|
||||
// Add actions for the start tokens of each non-terminal extra rule.
|
||||
// These actions are added to every state except for the states that are
|
||||
// alread within non-terminal extras. Non-terminal extras are not allowed
|
||||
// to nest within each other.
|
||||
if !is_non_terminal_extra {
|
||||
else {
|
||||
for (terminal, state_id) in &self.non_terminal_extra_states {
|
||||
state
|
||||
.terminal_entries
|
||||
|
|
@ -330,12 +347,10 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
}],
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Add ShiftExtra actions for the terminal extra tokens. These actions
|
||||
// are added to every state except for those at the ends of non-terminal
|
||||
// extras.
|
||||
if !is_end_of_non_terminal_extra {
|
||||
// Add ShiftExtra actions for the terminal extra tokens. These actions
|
||||
// are added to every state except for those at the ends of non-terminal
|
||||
// extras.
|
||||
for extra_token in &self.syntax_grammar.extra_symbols {
|
||||
if extra_token.is_non_terminal() {
|
||||
state
|
||||
|
|
|
|||
|
|
@ -200,9 +200,6 @@ impl<'a> Minimizer<'a> {
|
|||
right_state: &ParseState,
|
||||
group_ids_by_state_id: &Vec<ParseStateId>,
|
||||
) -> bool {
|
||||
if left_state.is_non_terminal_extra != right_state.is_non_terminal_extra {
|
||||
return true;
|
||||
}
|
||||
for (token, left_entry) in &left_state.terminal_entries {
|
||||
if let Some(right_entry) = right_state.terminal_entries.get(token) {
|
||||
if self.entries_conflict(
|
||||
|
|
|
|||
|
|
@ -970,10 +970,7 @@ impl Generator {
|
|||
add_line!(self, "static TSLexMode ts_lex_modes[STATE_COUNT] = {{");
|
||||
indent!(self);
|
||||
for (i, state) in self.parse_table.states.iter().enumerate() {
|
||||
if state.is_non_terminal_extra
|
||||
&& state.terminal_entries.len() == 1
|
||||
&& *state.terminal_entries.iter().next().unwrap().0 == Symbol::end()
|
||||
{
|
||||
if state.is_end_of_non_terminal_extra(&self.syntax_grammar) {
|
||||
add_line!(self, "[{}] = {{(TSStateId)(-1)}},", i,);
|
||||
} else if state.external_lex_state_id > 0 {
|
||||
add_line!(
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
use super::nfa::CharacterSet;
|
||||
use super::rules::{Alias, Associativity, Symbol, TokenSet};
|
||||
use super::{grammars::SyntaxGrammar, nfa::CharacterSet};
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
pub(crate) type ProductionInfoId = usize;
|
||||
pub(crate) type ParseStateId = usize;
|
||||
|
|
@ -44,7 +44,6 @@ pub(crate) struct ParseState {
|
|||
pub lex_state_id: usize,
|
||||
pub external_lex_state_id: usize,
|
||||
pub core_id: usize,
|
||||
pub is_non_terminal_extra: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
|
||||
|
|
@ -102,6 +101,20 @@ impl Default for LexTable {
|
|||
}
|
||||
|
||||
impl ParseState {
|
||||
pub fn is_end_of_non_terminal_extra(&self, grammar: &SyntaxGrammar) -> bool {
|
||||
if let Some(eof_entry) = self.terminal_entries.get(&Symbol::end()) {
|
||||
eof_entry.actions.iter().any(|action| {
|
||||
if let ParseAction::Reduce { symbol, .. } = action {
|
||||
grammar.extra_symbols.contains(&symbol)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
})
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
pub fn referenced_states<'a>(&'a self) -> impl Iterator<Item = ParseStateId> + 'a {
|
||||
self.terminal_entries
|
||||
.iter()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue