Allow non-terminal extras
This commit is contained in:
parent
49c632ae90
commit
fcaabea0cf
8 changed files with 274 additions and 83 deletions
|
|
@ -7,7 +7,7 @@ use crate::generate::grammars::{
|
|||
use crate::generate::node_types::VariableInfo;
|
||||
use crate::generate::rules::{Associativity, Symbol, SymbolType, TokenSet};
|
||||
use crate::generate::tables::{
|
||||
FieldLocation, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
|
||||
FieldLocation, GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
|
||||
ProductionInfo, ProductionInfoId,
|
||||
};
|
||||
use core::ops::Range;
|
||||
|
|
@ -16,17 +16,19 @@ use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
|
|||
use std::fmt::Write;
|
||||
use std::u32;
|
||||
|
||||
// For conflict reporting, each parse state is associated with an example
|
||||
// sequence of symbols that could lead to that parse state.
|
||||
type SymbolSequence = Vec<Symbol>;
|
||||
|
||||
type AuxiliarySymbolSequence = Vec<AuxiliarySymbolInfo>;
|
||||
pub(crate) type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>);
|
||||
|
||||
#[derive(Clone)]
|
||||
struct AuxiliarySymbolInfo {
|
||||
auxiliary_symbol: Symbol,
|
||||
parent_symbols: Vec<Symbol>,
|
||||
}
|
||||
|
||||
type SymbolSequence = Vec<Symbol>;
|
||||
type AuxiliarySymbolSequence = Vec<AuxiliarySymbolInfo>;
|
||||
|
||||
pub(crate) type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>);
|
||||
|
||||
struct ParseStateQueueEntry {
|
||||
state_id: ParseStateId,
|
||||
preceding_auxiliary_symbols: AuxiliarySymbolSequence,
|
||||
|
|
@ -41,6 +43,7 @@ struct ParseTableBuilder<'a> {
|
|||
state_ids_by_item_set: HashMap<ParseItemSet<'a>, ParseStateId>,
|
||||
parse_state_info_by_id: Vec<ParseStateInfo<'a>>,
|
||||
parse_state_queue: VecDeque<ParseStateQueueEntry>,
|
||||
non_terminal_extra_states: Vec<(Symbol, usize)>,
|
||||
parse_table: ParseTable,
|
||||
}
|
||||
|
||||
|
|
@ -52,7 +55,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
.push(ProductionInfo::default());
|
||||
|
||||
// Add the error state at index 0.
|
||||
self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
|
||||
self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default(), false);
|
||||
|
||||
// Add the starting state at index 1.
|
||||
self.add_parse_state(
|
||||
|
|
@ -66,8 +69,40 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
.iter()
|
||||
.cloned(),
|
||||
),
|
||||
false,
|
||||
);
|
||||
|
||||
// Compute the possible item sets for non-terminal extras.
|
||||
let mut non_terminal_extra_item_sets_by_first_terminal = BTreeMap::new();
|
||||
for extra_non_terminal in self
|
||||
.syntax_grammar
|
||||
.extra_tokens
|
||||
.iter()
|
||||
.filter(|s| s.is_non_terminal())
|
||||
{
|
||||
let variable = &self.syntax_grammar.variables[extra_non_terminal.index];
|
||||
for production in &variable.productions {
|
||||
non_terminal_extra_item_sets_by_first_terminal
|
||||
.entry(production.first_symbol().unwrap())
|
||||
.or_insert(ParseItemSet::default())
|
||||
.insert(
|
||||
ParseItem {
|
||||
variable_index: extra_non_terminal.index as u32,
|
||||
production,
|
||||
step_index: 1,
|
||||
},
|
||||
&[Symbol::end()].iter().cloned().collect(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Add a state for each starting terminal of a non-terminal extra rule.
|
||||
for (terminal, item_set) in non_terminal_extra_item_sets_by_first_terminal {
|
||||
self.non_terminal_extra_states
|
||||
.push((terminal, self.parse_table.states.len()));
|
||||
self.add_parse_state(&Vec::new(), &Vec::new(), item_set, true);
|
||||
}
|
||||
|
||||
while let Some(entry) = self.parse_state_queue.pop_front() {
|
||||
let item_set = self
|
||||
.item_set_builder
|
||||
|
|
@ -91,9 +126,15 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
preceding_symbols: &SymbolSequence,
|
||||
preceding_auxiliary_symbols: &AuxiliarySymbolSequence,
|
||||
item_set: ParseItemSet<'a>,
|
||||
is_non_terminal_extra: bool,
|
||||
) -> ParseStateId {
|
||||
match self.state_ids_by_item_set.entry(item_set) {
|
||||
// If an equivalent item set has already been processed, then return
|
||||
// the existing parse state index.
|
||||
Entry::Occupied(o) => *o.get(),
|
||||
|
||||
// Otherwise, insert a new parse state and add it to the queue of
|
||||
// parse states to populate.
|
||||
Entry::Vacant(v) => {
|
||||
let core = v.key().core();
|
||||
let core_count = self.core_ids_by_core.len();
|
||||
|
|
@ -116,6 +157,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
terminal_entries: HashMap::new(),
|
||||
nonterminal_entries: HashMap::new(),
|
||||
core_id,
|
||||
is_non_terminal_extra,
|
||||
});
|
||||
self.parse_state_queue.push_back(ParseStateQueueEntry {
|
||||
state_id,
|
||||
|
|
@ -138,7 +180,12 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
let mut non_terminal_successors = BTreeMap::new();
|
||||
let mut lookaheads_with_conflicts = TokenSet::new();
|
||||
|
||||
// Each item in the item set contributes to either or a Shift action or a Reduce
|
||||
// action in this state.
|
||||
for (item, lookaheads) in &item_set.entries {
|
||||
// If the item is unfinished, then this state has a transition for the item's
|
||||
// next symbol. Advance the item to its next step and insert the resulting
|
||||
// item into the successor item set.
|
||||
if let Some(next_symbol) = item.symbol() {
|
||||
let successor = item.successor();
|
||||
if next_symbol.is_non_terminal() {
|
||||
|
|
@ -160,7 +207,10 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
.or_insert_with(|| ParseItemSet::default())
|
||||
.insert(successor, lookaheads);
|
||||
}
|
||||
} else {
|
||||
}
|
||||
// If the item is finished, then add a Reduce action to this state based
|
||||
// on this item.
|
||||
else {
|
||||
let action = if item.is_augmented() {
|
||||
ParseAction::Accept
|
||||
} else {
|
||||
|
|
@ -179,6 +229,10 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
.terminal_entries
|
||||
.entry(lookahead);
|
||||
let entry = entry.or_insert_with(|| ParseTableEntry::new());
|
||||
|
||||
// While inserting Reduce actions, eagerly resolve conflicts related
|
||||
// to precedence: avoid inserting lower-precedence reductions, and
|
||||
// clear the action list when inserting higher-precedence reductions.
|
||||
if entry.actions.is_empty() {
|
||||
entry.actions.push(action);
|
||||
} else if action.precedence() > entry.actions[0].precedence() {
|
||||
|
|
@ -193,12 +247,16 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
// Having computed the the successor item sets for each symbol, add a new
|
||||
// parse state for each of these item sets, and add a corresponding Shift
|
||||
// action to this state.
|
||||
for (symbol, next_item_set) in terminal_successors {
|
||||
preceding_symbols.push(symbol);
|
||||
let next_state_id = self.add_parse_state(
|
||||
&preceding_symbols,
|
||||
&preceding_auxiliary_symbols,
|
||||
next_item_set,
|
||||
self.parse_table.states[state_id].is_non_terminal_extra,
|
||||
);
|
||||
preceding_symbols.pop();
|
||||
|
||||
|
|
@ -226,13 +284,19 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
&preceding_symbols,
|
||||
&preceding_auxiliary_symbols,
|
||||
next_item_set,
|
||||
self.parse_table.states[state_id].is_non_terminal_extra,
|
||||
);
|
||||
preceding_symbols.pop();
|
||||
self.parse_table.states[state_id]
|
||||
.nonterminal_entries
|
||||
.insert(symbol, next_state_id);
|
||||
.insert(symbol, GotoAction::Goto(next_state_id));
|
||||
}
|
||||
|
||||
// For any symbol with multiple actions, perform conflict resolution.
|
||||
// This will either
|
||||
// * choose one action over the others using precedence or associativity
|
||||
// * keep multiple actions if this conflict has been whitelisted in the grammar
|
||||
// * fail, terminating the parser generation process
|
||||
for symbol in lookaheads_with_conflicts.iter() {
|
||||
self.handle_conflict(
|
||||
&item_set,
|
||||
|
|
@ -243,15 +307,50 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
)?;
|
||||
}
|
||||
|
||||
// Finally, add actions for the grammar's `extra` symbols.
|
||||
let state = &mut self.parse_table.states[state_id];
|
||||
for extra_token in &self.syntax_grammar.extra_tokens {
|
||||
state
|
||||
.terminal_entries
|
||||
.entry(*extra_token)
|
||||
.or_insert(ParseTableEntry {
|
||||
reusable: true,
|
||||
actions: vec![ParseAction::ShiftExtra],
|
||||
});
|
||||
let is_non_terminal_extra = state.is_non_terminal_extra;
|
||||
let is_end_of_non_terminal_extra =
|
||||
is_non_terminal_extra && state.terminal_entries.len() == 1;
|
||||
|
||||
// Add actions for the start tokens of each non-terminal extra rule.
|
||||
// These actions are added to every state except for the states that are
|
||||
// alread within non-terminal extras. Non-terminal extras are not allowed
|
||||
// to nest within each other.
|
||||
if !is_non_terminal_extra {
|
||||
for (terminal, state_id) in &self.non_terminal_extra_states {
|
||||
state
|
||||
.terminal_entries
|
||||
.entry(*terminal)
|
||||
.or_insert(ParseTableEntry {
|
||||
reusable: true,
|
||||
actions: vec![ParseAction::Shift {
|
||||
state: *state_id,
|
||||
is_repetition: false,
|
||||
}],
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Add ShiftExtra actions for the terminal extra tokens. These actions
|
||||
// are added to every state except for those at the ends of non-terminal
|
||||
// extras.
|
||||
if !is_end_of_non_terminal_extra {
|
||||
for extra_token in &self.syntax_grammar.extra_tokens {
|
||||
if extra_token.is_non_terminal() {
|
||||
state
|
||||
.nonterminal_entries
|
||||
.insert(*extra_token, GotoAction::ShiftExtra);
|
||||
} else {
|
||||
state
|
||||
.terminal_entries
|
||||
.entry(*extra_token)
|
||||
.or_insert(ParseTableEntry {
|
||||
reusable: true,
|
||||
actions: vec![ParseAction::ShiftExtra],
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
|
@ -362,8 +461,8 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
// If all reduce actions are left associative, remove the SHIFT action.
|
||||
// If all reduce actions are right associative, remove the REDUCE actions.
|
||||
// If all Reduce actions are left associative, remove the SHIFT action.
|
||||
// If all Reduce actions are right associative, remove the REDUCE actions.
|
||||
match (has_left, has_non, has_right) {
|
||||
(true, false, false) => {
|
||||
entry.actions.pop();
|
||||
|
|
@ -774,6 +873,7 @@ pub(crate) fn build_parse_table<'a>(
|
|||
lexical_grammar,
|
||||
item_set_builder,
|
||||
variable_info,
|
||||
non_terminal_extra_states: Vec::new(),
|
||||
state_ids_by_item_set: HashMap::new(),
|
||||
core_ids_by_core: HashMap::new(),
|
||||
parse_state_info_by_id: Vec::new(),
|
||||
|
|
|
|||
|
|
@ -2,7 +2,9 @@ use super::token_conflicts::TokenConflictMap;
|
|||
use crate::generate::dedup::split_state_id_groups;
|
||||
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
|
||||
use crate::generate::rules::{AliasMap, Symbol, TokenSet};
|
||||
use crate::generate::tables::{ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry};
|
||||
use crate::generate::tables::{
|
||||
GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
|
||||
};
|
||||
use log::info;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::mem;
|
||||
|
|
@ -101,7 +103,10 @@ impl<'a> Minimizer<'a> {
|
|||
state.update_referenced_states(|other_state_id, state| {
|
||||
if let Some(symbol) = unit_reduction_symbols_by_state.get(&other_state_id) {
|
||||
done = false;
|
||||
state.nonterminal_entries[symbol]
|
||||
match state.nonterminal_entries.get(symbol) {
|
||||
Some(GotoAction::Goto(state_id)) => *state_id,
|
||||
_ => other_state_id,
|
||||
}
|
||||
} else {
|
||||
other_state_id
|
||||
}
|
||||
|
|
@ -262,18 +267,24 @@ impl<'a> Minimizer<'a> {
|
|||
|
||||
for (symbol, s1) in &state1.nonterminal_entries {
|
||||
if let Some(s2) = state2.nonterminal_entries.get(symbol) {
|
||||
let group1 = group_ids_by_state_id[*s1];
|
||||
let group2 = group_ids_by_state_id[*s2];
|
||||
if group1 != group2 {
|
||||
info!(
|
||||
"split states {} {} - successors for {} are split: {} {}",
|
||||
state1.id,
|
||||
state2.id,
|
||||
self.symbol_name(symbol),
|
||||
s1,
|
||||
s2,
|
||||
);
|
||||
return true;
|
||||
match (s1, s2) {
|
||||
(GotoAction::ShiftExtra, GotoAction::ShiftExtra) => continue,
|
||||
(GotoAction::Goto(s1), GotoAction::Goto(s2)) => {
|
||||
let group1 = group_ids_by_state_id[*s1];
|
||||
let group2 = group_ids_by_state_id[*s2];
|
||||
if group1 != group2 {
|
||||
info!(
|
||||
"split states {} {} - successors for {} are split: {} {}",
|
||||
state1.id,
|
||||
state2.id,
|
||||
self.symbol_name(symbol),
|
||||
s1,
|
||||
s2,
|
||||
);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
_ => return true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -93,15 +93,7 @@ pub(super) fn extract_tokens(
|
|||
let mut extra_tokens = Vec::new();
|
||||
for rule in grammar.extra_tokens {
|
||||
if let Rule::Symbol(symbol) = rule {
|
||||
let new_symbol = symbol_replacer.replace_symbol(symbol);
|
||||
if new_symbol.is_non_terminal() {
|
||||
return Error::err(format!(
|
||||
"Non-token symbol '{}' cannot be used as an extra token",
|
||||
&variables[new_symbol.index].name
|
||||
));
|
||||
} else {
|
||||
extra_tokens.push(new_symbol);
|
||||
}
|
||||
extra_tokens.push(symbol_replacer.replace_symbol(symbol));
|
||||
} else {
|
||||
if let Some(index) = lexical_variables.iter().position(|v| v.rule == rule) {
|
||||
extra_tokens.push(Symbol::terminal(index));
|
||||
|
|
@ -472,28 +464,6 @@ mod test {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_error_on_non_terminal_symbol_extras() {
|
||||
let mut grammar = build_grammar(vec![
|
||||
Variable::named("rule_0", Rule::non_terminal(1)),
|
||||
Variable::named("rule_1", Rule::non_terminal(2)),
|
||||
Variable::named("rule_2", Rule::string("x")),
|
||||
]);
|
||||
grammar.extra_tokens = vec![Rule::non_terminal(1)];
|
||||
|
||||
match extract_tokens(grammar) {
|
||||
Err(e) => {
|
||||
assert_eq!(
|
||||
e.message(),
|
||||
"Non-token symbol 'rule_1' cannot be used as an extra token"
|
||||
);
|
||||
}
|
||||
_ => {
|
||||
panic!("Expected an error but got no error");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_error_on_external_with_same_name_as_non_terminal() {
|
||||
let mut grammar = build_grammar(vec![
|
||||
|
|
|
|||
|
|
@ -2,7 +2,8 @@ use super::grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType
|
|||
use super::nfa::CharacterSet;
|
||||
use super::rules::{Alias, AliasMap, Symbol, SymbolType};
|
||||
use super::tables::{
|
||||
AdvanceAction, FieldLocation, LexState, LexTable, ParseAction, ParseTable, ParseTableEntry,
|
||||
AdvanceAction, FieldLocation, GotoAction, LexState, LexTable, ParseAction, ParseTable,
|
||||
ParseTableEntry,
|
||||
};
|
||||
use core::ops::Range;
|
||||
use std::cmp;
|
||||
|
|
@ -678,7 +679,12 @@ impl Generator {
|
|||
add_line!(self, "static TSLexMode ts_lex_modes[STATE_COUNT] = {{");
|
||||
indent!(self);
|
||||
for (i, state) in self.parse_table.states.iter().enumerate() {
|
||||
if state.external_lex_state_id > 0 {
|
||||
if state.is_non_terminal_extra
|
||||
&& state.terminal_entries.len() == 1
|
||||
&& *state.terminal_entries.iter().next().unwrap().0 == Symbol::end()
|
||||
{
|
||||
add_line!(self, "[{}] = {{-1}},", i,);
|
||||
} else if state.external_lex_state_id > 0 {
|
||||
add_line!(
|
||||
self,
|
||||
"[{}] = {{.lex_state = {}, .external_lex_state = {}}},",
|
||||
|
|
@ -807,12 +813,15 @@ impl Generator {
|
|||
terminal_entries.sort_unstable_by_key(|e| self.symbol_order.get(e.0));
|
||||
nonterminal_entries.sort_unstable_by_key(|k| k.0);
|
||||
|
||||
for (symbol, state_id) in &nonterminal_entries {
|
||||
for (symbol, action) in &nonterminal_entries {
|
||||
add_line!(
|
||||
self,
|
||||
"[{}] = STATE({}),",
|
||||
self.symbol_ids[symbol],
|
||||
*state_id
|
||||
match action {
|
||||
GotoAction::Goto(state) => *state,
|
||||
GotoAction::ShiftExtra => i,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -865,9 +874,15 @@ impl Generator {
|
|||
.or_default()
|
||||
.push(**symbol);
|
||||
}
|
||||
for (symbol, state_id) in &state.nonterminal_entries {
|
||||
for (symbol, action) in &state.nonterminal_entries {
|
||||
let state_id = match action {
|
||||
GotoAction::Goto(i) => *i,
|
||||
GotoAction::ShiftExtra => {
|
||||
self.large_state_count + small_state_indices.len() - 1
|
||||
}
|
||||
};
|
||||
symbols_by_value
|
||||
.entry((*state_id, SymbolType::NonTerminal))
|
||||
.entry((state_id, SymbolType::NonTerminal))
|
||||
.or_default()
|
||||
.push(*symbol);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,6 +24,12 @@ pub(crate) enum ParseAction {
|
|||
},
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub(crate) enum GotoAction {
|
||||
Goto(ParseStateId),
|
||||
ShiftExtra,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct ParseTableEntry {
|
||||
pub actions: Vec<ParseAction>,
|
||||
|
|
@ -34,10 +40,11 @@ pub(crate) struct ParseTableEntry {
|
|||
pub(crate) struct ParseState {
|
||||
pub id: ParseStateId,
|
||||
pub terminal_entries: HashMap<Symbol, ParseTableEntry>,
|
||||
pub nonterminal_entries: HashMap<Symbol, ParseStateId>,
|
||||
pub nonterminal_entries: HashMap<Symbol, GotoAction>,
|
||||
pub lex_state_id: usize,
|
||||
pub external_lex_state_id: usize,
|
||||
pub core_id: usize,
|
||||
pub is_non_terminal_extra: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
|
||||
|
|
@ -103,7 +110,13 @@ impl ParseState {
|
|||
_ => None,
|
||||
})
|
||||
})
|
||||
.chain(self.nonterminal_entries.iter().map(|(_, state)| *state))
|
||||
.chain(self.nonterminal_entries.iter().filter_map(|(_, action)| {
|
||||
if let GotoAction::Goto(state) = action {
|
||||
Some(*state)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn update_referenced_states<F>(&mut self, mut f: F)
|
||||
|
|
@ -121,15 +134,18 @@ impl ParseState {
|
|||
}
|
||||
}
|
||||
}
|
||||
for (symbol, other_state) in &self.nonterminal_entries {
|
||||
let result = f(*other_state, self);
|
||||
if result != *other_state {
|
||||
updates.push((*symbol, 0, result));
|
||||
for (symbol, action) in &self.nonterminal_entries {
|
||||
if let GotoAction::Goto(other_state) = action {
|
||||
let result = f(*other_state, self);
|
||||
if result != *other_state {
|
||||
updates.push((*symbol, 0, result));
|
||||
}
|
||||
}
|
||||
}
|
||||
for (symbol, action_index, new_state) in updates {
|
||||
if symbol.is_non_terminal() {
|
||||
self.nonterminal_entries.insert(symbol, new_state);
|
||||
self.nonterminal_entries
|
||||
.insert(symbol, GotoAction::Goto(new_state));
|
||||
} else {
|
||||
let entry = self.terminal_entries.get_mut(&symbol).unwrap();
|
||||
if let ParseAction::Shift { is_repetition, .. } = entry.actions[action_index] {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue