feat: add 'reserved word' construct
Co-authored-by: Amaan Qureshi <amaanq12@gmail.com>
This commit is contained in:
parent
2a63077cac
commit
201b41cf11
31 changed files with 2367 additions and 1628 deletions
|
|
@ -170,17 +170,12 @@ impl Minimizer<'_> {
|
|||
let mut new_states = Vec::with_capacity(state_ids_by_group_id.len());
|
||||
for state_ids in &state_ids_by_group_id {
|
||||
// Initialize the new state based on the first old state in the group.
|
||||
let mut parse_state = ParseState::default();
|
||||
mem::swap(&mut parse_state, &mut self.parse_table.states[state_ids[0]]);
|
||||
let mut parse_state = mem::take(&mut self.parse_table.states[state_ids[0]]);
|
||||
|
||||
// Extend the new state with all of the actions from the other old states
|
||||
// in the group.
|
||||
for state_id in &state_ids[1..] {
|
||||
let mut other_parse_state = ParseState::default();
|
||||
mem::swap(
|
||||
&mut other_parse_state,
|
||||
&mut self.parse_table.states[*state_id],
|
||||
);
|
||||
let other_parse_state = mem::take(&mut self.parse_table.states[*state_id]);
|
||||
|
||||
parse_state
|
||||
.terminal_entries
|
||||
|
|
@ -188,6 +183,12 @@ impl Minimizer<'_> {
|
|||
parse_state
|
||||
.nonterminal_entries
|
||||
.extend(other_parse_state.nonterminal_entries);
|
||||
parse_state
|
||||
.reserved_words
|
||||
.insert_all(&other_parse_state.reserved_words);
|
||||
for symbol in parse_state.terminal_entries.keys() {
|
||||
parse_state.reserved_words.remove(symbol);
|
||||
}
|
||||
}
|
||||
|
||||
// Update the new state's outgoing references using the new grouping.
|
||||
|
|
@ -216,24 +217,14 @@ impl Minimizer<'_> {
|
|||
) {
|
||||
return true;
|
||||
}
|
||||
} else if self.token_conflicts(
|
||||
left_state.id,
|
||||
right_state.id,
|
||||
right_state.terminal_entries.keys(),
|
||||
*token,
|
||||
) {
|
||||
} else if self.token_conflicts(left_state.id, right_state.id, right_state, *token) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
for token in right_state.terminal_entries.keys() {
|
||||
if !left_state.terminal_entries.contains_key(token)
|
||||
&& self.token_conflicts(
|
||||
left_state.id,
|
||||
right_state.id,
|
||||
left_state.terminal_entries.keys(),
|
||||
*token,
|
||||
)
|
||||
&& self.token_conflicts(left_state.id, right_state.id, left_state, *token)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
|
@ -350,11 +341,11 @@ impl Minimizer<'_> {
|
|||
false
|
||||
}
|
||||
|
||||
fn token_conflicts<'b>(
|
||||
fn token_conflicts(
|
||||
&self,
|
||||
left_id: ParseStateId,
|
||||
right_id: ParseStateId,
|
||||
existing_tokens: impl Iterator<Item = &'b Symbol>,
|
||||
right_state: &ParseState,
|
||||
new_token: Symbol,
|
||||
) -> bool {
|
||||
if new_token == Symbol::end_of_nonterminal_extra() {
|
||||
|
|
@ -372,6 +363,10 @@ impl Minimizer<'_> {
|
|||
return true;
|
||||
}
|
||||
|
||||
if right_state.reserved_words.contains(&new_token) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Do not add tokens which are both internal and external. Their validity could
|
||||
// influence the behavior of the external scanner.
|
||||
if self
|
||||
|
|
@ -388,23 +383,30 @@ impl Minimizer<'_> {
|
|||
}
|
||||
|
||||
// Do not add a token if it conflicts with an existing token.
|
||||
for token in existing_tokens {
|
||||
if token.is_terminal()
|
||||
&& !(self.syntax_grammar.word_token == Some(*token)
|
||||
&& self.keywords.contains(&new_token))
|
||||
&& !(self.syntax_grammar.word_token == Some(new_token)
|
||||
&& self.keywords.contains(token))
|
||||
&& (self
|
||||
for token in right_state.terminal_entries.keys().copied() {
|
||||
if !token.is_terminal() {
|
||||
continue;
|
||||
}
|
||||
if self.syntax_grammar.word_token == Some(token) && self.keywords.contains(&new_token) {
|
||||
continue;
|
||||
}
|
||||
if self.syntax_grammar.word_token == Some(new_token) && self.keywords.contains(&token) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if self
|
||||
.token_conflict_map
|
||||
.does_conflict(new_token.index, token.index)
|
||||
|| self
|
||||
.token_conflict_map
|
||||
.does_conflict(new_token.index, token.index)
|
||||
|| self
|
||||
.token_conflict_map
|
||||
.does_match_same_string(new_token.index, token.index))
|
||||
.does_match_same_string(new_token.index, token.index)
|
||||
{
|
||||
info!(
|
||||
"split states {left_id} {right_id} - token {} conflicts with {}",
|
||||
"split states {} {} - token {} conflicts with {}",
|
||||
left_id,
|
||||
right_id,
|
||||
self.symbol_name(&new_token),
|
||||
self.symbol_name(token),
|
||||
self.symbol_name(&token),
|
||||
);
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue