Merge branch 'master' into HEAD
This commit is contained in:
commit
026231e93d
173 changed files with 22878 additions and 6961 deletions
|
|
@ -2,7 +2,7 @@ use super::coincident_tokens::CoincidentTokenIndex;
|
|||
use super::token_conflicts::TokenConflictMap;
|
||||
use crate::generate::dedup::split_state_id_groups;
|
||||
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
|
||||
use crate::generate::nfa::{CharacterSet, NfaCursor};
|
||||
use crate::generate::nfa::NfaCursor;
|
||||
use crate::generate::rules::{Symbol, TokenSet};
|
||||
use crate::generate::tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable};
|
||||
use log::info;
|
||||
|
|
@ -189,13 +189,10 @@ impl<'a> LexTableBuilder<'a> {
|
|||
// character that leads to the empty set of NFA states.
|
||||
if eof_valid {
|
||||
let (next_state_id, _) = self.add_state(Vec::new(), false);
|
||||
self.table.states[state_id].advance_actions.push((
|
||||
CharacterSet::empty().add_char('\0'),
|
||||
AdvanceAction {
|
||||
state: next_state_id,
|
||||
in_main_token: true,
|
||||
},
|
||||
));
|
||||
self.table.states[state_id].eof_action = Some(AdvanceAction {
|
||||
state: next_state_id,
|
||||
in_main_token: true,
|
||||
});
|
||||
}
|
||||
|
||||
for transition in transitions {
|
||||
|
|
@ -273,6 +270,7 @@ fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
|
|||
let signature = (
|
||||
i == 0,
|
||||
state.accept_action,
|
||||
state.eof_action.is_some(),
|
||||
state
|
||||
.advance_actions
|
||||
.iter()
|
||||
|
|
@ -320,6 +318,9 @@ fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
|
|||
for (_, advance_action) in new_state.advance_actions.iter_mut() {
|
||||
advance_action.state = group_ids_by_state_id[advance_action.state];
|
||||
}
|
||||
if let Some(eof_action) = &mut new_state.eof_action {
|
||||
eof_action.state = group_ids_by_state_id[eof_action.state];
|
||||
}
|
||||
new_states.push(new_state);
|
||||
}
|
||||
|
||||
|
|
@ -364,6 +365,9 @@ fn sort_states(table: &mut LexTable, parse_table: &mut ParseTable) {
|
|||
for (_, advance_action) in state.advance_actions.iter_mut() {
|
||||
advance_action.state = new_ids_by_old_id[advance_action.state];
|
||||
}
|
||||
if let Some(eof_action) = &mut state.eof_action {
|
||||
eof_action.state = new_ids_by_old_id[eof_action.state];
|
||||
}
|
||||
state
|
||||
})
|
||||
.collect();
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ use crate::generate::grammars::{
|
|||
use crate::generate::node_types::VariableInfo;
|
||||
use crate::generate::rules::{Associativity, Symbol, SymbolType, TokenSet};
|
||||
use crate::generate::tables::{
|
||||
FieldLocation, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
|
||||
FieldLocation, GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
|
||||
ProductionInfo, ProductionInfoId,
|
||||
};
|
||||
use core::ops::Range;
|
||||
|
|
@ -16,17 +16,19 @@ use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
|
|||
use std::fmt::Write;
|
||||
use std::u32;
|
||||
|
||||
// For conflict reporting, each parse state is associated with an example
|
||||
// sequence of symbols that could lead to that parse state.
|
||||
type SymbolSequence = Vec<Symbol>;
|
||||
|
||||
type AuxiliarySymbolSequence = Vec<AuxiliarySymbolInfo>;
|
||||
pub(crate) type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>);
|
||||
|
||||
#[derive(Clone)]
|
||||
struct AuxiliarySymbolInfo {
|
||||
auxiliary_symbol: Symbol,
|
||||
parent_symbols: Vec<Symbol>,
|
||||
}
|
||||
|
||||
type SymbolSequence = Vec<Symbol>;
|
||||
type AuxiliarySymbolSequence = Vec<AuxiliarySymbolInfo>;
|
||||
|
||||
pub(crate) type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>);
|
||||
|
||||
struct ParseStateQueueEntry {
|
||||
state_id: ParseStateId,
|
||||
preceding_auxiliary_symbols: AuxiliarySymbolSequence,
|
||||
|
|
@ -41,6 +43,7 @@ struct ParseTableBuilder<'a> {
|
|||
state_ids_by_item_set: HashMap<ParseItemSet<'a>, ParseStateId>,
|
||||
parse_state_info_by_id: Vec<ParseStateInfo<'a>>,
|
||||
parse_state_queue: VecDeque<ParseStateQueueEntry>,
|
||||
non_terminal_extra_states: Vec<(Symbol, usize)>,
|
||||
parse_table: ParseTable,
|
||||
}
|
||||
|
||||
|
|
@ -52,7 +55,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
.push(ProductionInfo::default());
|
||||
|
||||
// Add the error state at index 0.
|
||||
self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
|
||||
self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default(), false);
|
||||
|
||||
// Add the starting state at index 1.
|
||||
self.add_parse_state(
|
||||
|
|
@ -66,8 +69,40 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
.iter()
|
||||
.cloned(),
|
||||
),
|
||||
false,
|
||||
);
|
||||
|
||||
// Compute the possible item sets for non-terminal extras.
|
||||
let mut non_terminal_extra_item_sets_by_first_terminal = BTreeMap::new();
|
||||
for extra_non_terminal in self
|
||||
.syntax_grammar
|
||||
.extra_symbols
|
||||
.iter()
|
||||
.filter(|s| s.is_non_terminal())
|
||||
{
|
||||
let variable = &self.syntax_grammar.variables[extra_non_terminal.index];
|
||||
for production in &variable.productions {
|
||||
non_terminal_extra_item_sets_by_first_terminal
|
||||
.entry(production.first_symbol().unwrap())
|
||||
.or_insert(ParseItemSet::default())
|
||||
.insert(
|
||||
ParseItem {
|
||||
variable_index: extra_non_terminal.index as u32,
|
||||
production,
|
||||
step_index: 1,
|
||||
},
|
||||
&[Symbol::end()].iter().cloned().collect(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Add a state for each starting terminal of a non-terminal extra rule.
|
||||
for (terminal, item_set) in non_terminal_extra_item_sets_by_first_terminal {
|
||||
self.non_terminal_extra_states
|
||||
.push((terminal, self.parse_table.states.len()));
|
||||
self.add_parse_state(&Vec::new(), &Vec::new(), item_set, true);
|
||||
}
|
||||
|
||||
while let Some(entry) = self.parse_state_queue.pop_front() {
|
||||
let item_set = self
|
||||
.item_set_builder
|
||||
|
|
@ -91,9 +126,15 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
preceding_symbols: &SymbolSequence,
|
||||
preceding_auxiliary_symbols: &AuxiliarySymbolSequence,
|
||||
item_set: ParseItemSet<'a>,
|
||||
is_non_terminal_extra: bool,
|
||||
) -> ParseStateId {
|
||||
match self.state_ids_by_item_set.entry(item_set) {
|
||||
// If an equivalent item set has already been processed, then return
|
||||
// the existing parse state index.
|
||||
Entry::Occupied(o) => *o.get(),
|
||||
|
||||
// Otherwise, insert a new parse state and add it to the queue of
|
||||
// parse states to populate.
|
||||
Entry::Vacant(v) => {
|
||||
let core = v.key().core();
|
||||
let core_count = self.core_ids_by_core.len();
|
||||
|
|
@ -116,6 +157,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
terminal_entries: HashMap::new(),
|
||||
nonterminal_entries: HashMap::new(),
|
||||
core_id,
|
||||
is_non_terminal_extra,
|
||||
});
|
||||
self.parse_state_queue.push_back(ParseStateQueueEntry {
|
||||
state_id,
|
||||
|
|
@ -138,7 +180,12 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
let mut non_terminal_successors = BTreeMap::new();
|
||||
let mut lookaheads_with_conflicts = TokenSet::new();
|
||||
|
||||
// Each item in the item set contributes to either or a Shift action or a Reduce
|
||||
// action in this state.
|
||||
for (item, lookaheads) in &item_set.entries {
|
||||
// If the item is unfinished, then this state has a transition for the item's
|
||||
// next symbol. Advance the item to its next step and insert the resulting
|
||||
// item into the successor item set.
|
||||
if let Some(next_symbol) = item.symbol() {
|
||||
let successor = item.successor();
|
||||
if next_symbol.is_non_terminal() {
|
||||
|
|
@ -160,7 +207,10 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
.or_insert_with(|| ParseItemSet::default())
|
||||
.insert(successor, lookaheads);
|
||||
}
|
||||
} else {
|
||||
}
|
||||
// If the item is finished, then add a Reduce action to this state based
|
||||
// on this item.
|
||||
else {
|
||||
let action = if item.is_augmented() {
|
||||
ParseAction::Accept
|
||||
} else {
|
||||
|
|
@ -179,6 +229,10 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
.terminal_entries
|
||||
.entry(lookahead);
|
||||
let entry = entry.or_insert_with(|| ParseTableEntry::new());
|
||||
|
||||
// While inserting Reduce actions, eagerly resolve conflicts related
|
||||
// to precedence: avoid inserting lower-precedence reductions, and
|
||||
// clear the action list when inserting higher-precedence reductions.
|
||||
if entry.actions.is_empty() {
|
||||
entry.actions.push(action);
|
||||
} else if action.precedence() > entry.actions[0].precedence() {
|
||||
|
|
@ -193,12 +247,16 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
// Having computed the the successor item sets for each symbol, add a new
|
||||
// parse state for each of these item sets, and add a corresponding Shift
|
||||
// action to this state.
|
||||
for (symbol, next_item_set) in terminal_successors {
|
||||
preceding_symbols.push(symbol);
|
||||
let next_state_id = self.add_parse_state(
|
||||
&preceding_symbols,
|
||||
&preceding_auxiliary_symbols,
|
||||
next_item_set,
|
||||
self.parse_table.states[state_id].is_non_terminal_extra,
|
||||
);
|
||||
preceding_symbols.pop();
|
||||
|
||||
|
|
@ -226,13 +284,19 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
&preceding_symbols,
|
||||
&preceding_auxiliary_symbols,
|
||||
next_item_set,
|
||||
self.parse_table.states[state_id].is_non_terminal_extra,
|
||||
);
|
||||
preceding_symbols.pop();
|
||||
self.parse_table.states[state_id]
|
||||
.nonterminal_entries
|
||||
.insert(symbol, next_state_id);
|
||||
.insert(symbol, GotoAction::Goto(next_state_id));
|
||||
}
|
||||
|
||||
// For any symbol with multiple actions, perform conflict resolution.
|
||||
// This will either
|
||||
// * choose one action over the others using precedence or associativity
|
||||
// * keep multiple actions if this conflict has been whitelisted in the grammar
|
||||
// * fail, terminating the parser generation process
|
||||
for symbol in lookaheads_with_conflicts.iter() {
|
||||
self.handle_conflict(
|
||||
&item_set,
|
||||
|
|
@ -243,15 +307,50 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
)?;
|
||||
}
|
||||
|
||||
// Finally, add actions for the grammar's `extra` symbols.
|
||||
let state = &mut self.parse_table.states[state_id];
|
||||
for extra_token in &self.syntax_grammar.extra_tokens {
|
||||
state
|
||||
.terminal_entries
|
||||
.entry(*extra_token)
|
||||
.or_insert(ParseTableEntry {
|
||||
reusable: true,
|
||||
actions: vec![ParseAction::ShiftExtra],
|
||||
});
|
||||
let is_non_terminal_extra = state.is_non_terminal_extra;
|
||||
let is_end_of_non_terminal_extra =
|
||||
is_non_terminal_extra && state.terminal_entries.len() == 1;
|
||||
|
||||
// Add actions for the start tokens of each non-terminal extra rule.
|
||||
// These actions are added to every state except for the states that are
|
||||
// alread within non-terminal extras. Non-terminal extras are not allowed
|
||||
// to nest within each other.
|
||||
if !is_non_terminal_extra {
|
||||
for (terminal, state_id) in &self.non_terminal_extra_states {
|
||||
state
|
||||
.terminal_entries
|
||||
.entry(*terminal)
|
||||
.or_insert(ParseTableEntry {
|
||||
reusable: true,
|
||||
actions: vec![ParseAction::Shift {
|
||||
state: *state_id,
|
||||
is_repetition: false,
|
||||
}],
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Add ShiftExtra actions for the terminal extra tokens. These actions
|
||||
// are added to every state except for those at the ends of non-terminal
|
||||
// extras.
|
||||
if !is_end_of_non_terminal_extra {
|
||||
for extra_token in &self.syntax_grammar.extra_symbols {
|
||||
if extra_token.is_non_terminal() {
|
||||
state
|
||||
.nonterminal_entries
|
||||
.insert(*extra_token, GotoAction::ShiftExtra);
|
||||
} else {
|
||||
state
|
||||
.terminal_entries
|
||||
.entry(*extra_token)
|
||||
.or_insert(ParseTableEntry {
|
||||
reusable: true,
|
||||
actions: vec![ParseAction::ShiftExtra],
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
|
@ -362,8 +461,8 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
// If all reduce actions are left associative, remove the SHIFT action.
|
||||
// If all reduce actions are right associative, remove the REDUCE actions.
|
||||
// If all Reduce actions are left associative, remove the SHIFT action.
|
||||
// If all Reduce actions are right associative, remove the REDUCE actions.
|
||||
match (has_left, has_non, has_right) {
|
||||
(true, false, false) => {
|
||||
entry.actions.pop();
|
||||
|
|
@ -744,7 +843,7 @@ fn populate_following_tokens(
|
|||
}
|
||||
}
|
||||
}
|
||||
for extra in &grammar.extra_tokens {
|
||||
for extra in &grammar.extra_symbols {
|
||||
if extra.is_terminal() {
|
||||
for entry in result.iter_mut() {
|
||||
entry.insert(*extra);
|
||||
|
|
@ -774,6 +873,7 @@ pub(crate) fn build_parse_table<'a>(
|
|||
lexical_grammar,
|
||||
item_set_builder,
|
||||
variable_info,
|
||||
non_terminal_extra_states: Vec::new(),
|
||||
state_ids_by_item_set: HashMap::new(),
|
||||
core_ids_by_core: HashMap::new(),
|
||||
parse_state_info_by_id: Vec::new(),
|
||||
|
|
|
|||
|
|
@ -2,7 +2,9 @@ use super::token_conflicts::TokenConflictMap;
|
|||
use crate::generate::dedup::split_state_id_groups;
|
||||
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
|
||||
use crate::generate::rules::{AliasMap, Symbol, TokenSet};
|
||||
use crate::generate::tables::{ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry};
|
||||
use crate::generate::tables::{
|
||||
GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
|
||||
};
|
||||
use log::info;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::mem;
|
||||
|
|
@ -66,6 +68,7 @@ impl<'a> Minimizer<'a> {
|
|||
..
|
||||
} => {
|
||||
if !self.simple_aliases.contains_key(&symbol)
|
||||
&& !self.syntax_grammar.supertype_symbols.contains(&symbol)
|
||||
&& !aliased_symbols.contains(&symbol)
|
||||
&& self.syntax_grammar.variables[symbol.index].kind
|
||||
!= VariableType::Named
|
||||
|
|
@ -101,7 +104,10 @@ impl<'a> Minimizer<'a> {
|
|||
state.update_referenced_states(|other_state_id, state| {
|
||||
if let Some(symbol) = unit_reduction_symbols_by_state.get(&other_state_id) {
|
||||
done = false;
|
||||
state.nonterminal_entries[symbol]
|
||||
match state.nonterminal_entries.get(symbol) {
|
||||
Some(GotoAction::Goto(state_id)) => *state_id,
|
||||
_ => other_state_id,
|
||||
}
|
||||
} else {
|
||||
other_state_id
|
||||
}
|
||||
|
|
@ -194,6 +200,9 @@ impl<'a> Minimizer<'a> {
|
|||
right_state: &ParseState,
|
||||
group_ids_by_state_id: &Vec<ParseStateId>,
|
||||
) -> bool {
|
||||
if left_state.is_non_terminal_extra != right_state.is_non_terminal_extra {
|
||||
return true;
|
||||
}
|
||||
for (token, left_entry) in &left_state.terminal_entries {
|
||||
if let Some(right_entry) = right_state.terminal_entries.get(token) {
|
||||
if self.entries_conflict(
|
||||
|
|
@ -262,18 +271,24 @@ impl<'a> Minimizer<'a> {
|
|||
|
||||
for (symbol, s1) in &state1.nonterminal_entries {
|
||||
if let Some(s2) = state2.nonterminal_entries.get(symbol) {
|
||||
let group1 = group_ids_by_state_id[*s1];
|
||||
let group2 = group_ids_by_state_id[*s2];
|
||||
if group1 != group2 {
|
||||
info!(
|
||||
"split states {} {} - successors for {} are split: {} {}",
|
||||
state1.id,
|
||||
state2.id,
|
||||
self.symbol_name(symbol),
|
||||
s1,
|
||||
s2,
|
||||
);
|
||||
return true;
|
||||
match (s1, s2) {
|
||||
(GotoAction::ShiftExtra, GotoAction::ShiftExtra) => continue,
|
||||
(GotoAction::Goto(s1), GotoAction::Goto(s2)) => {
|
||||
let group1 = group_ids_by_state_id[*s1];
|
||||
let group2 = group_ids_by_state_id[*s2];
|
||||
if group1 != group2 {
|
||||
info!(
|
||||
"split states {} {} - successors for {} are split: {} {}",
|
||||
state1.id,
|
||||
state2.id,
|
||||
self.symbol_name(symbol),
|
||||
s1,
|
||||
s2,
|
||||
);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
_ => return true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -271,6 +271,7 @@ fn identify_keywords(
|
|||
cursor.reset(vec![variable.start_state]);
|
||||
if all_chars_are_alphabetical(&cursor)
|
||||
&& token_conflict_map.does_match_same_string(i, word_token.index)
|
||||
&& !token_conflict_map.does_match_different_string(i, word_token.index)
|
||||
{
|
||||
info!(
|
||||
"Keywords - add candidate {}",
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
use crate::generate::build_tables::item::{TokenSetDisplay};
|
||||
use crate::generate::build_tables::item::TokenSetDisplay;
|
||||
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
|
||||
use crate::generate::nfa::{CharacterSet, NfaCursor, NfaTransition};
|
||||
use crate::generate::rules::TokenSet;
|
||||
use std::collections::HashSet;
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::HashSet;
|
||||
use std::fmt;
|
||||
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||
|
|
@ -13,6 +13,7 @@ struct TokenConflictStatus {
|
|||
does_match_valid_continuation: bool,
|
||||
does_match_separators: bool,
|
||||
matches_same_string: bool,
|
||||
matches_different_string: bool,
|
||||
}
|
||||
|
||||
pub(crate) struct TokenConflictMap<'a> {
|
||||
|
|
@ -25,6 +26,12 @@ pub(crate) struct TokenConflictMap<'a> {
|
|||
}
|
||||
|
||||
impl<'a> TokenConflictMap<'a> {
|
||||
/// Create a token conflict map based on a lexical grammar, which describes the structure
|
||||
/// each token, and a `following_token` map, which indicates which tokens may be appear
|
||||
/// immediately after each other token.
|
||||
///
|
||||
/// This analyzes the possible kinds of overlap between each pair of tokens and stores
|
||||
/// them in a matrix.
|
||||
pub fn new(grammar: &'a LexicalGrammar, following_tokens: Vec<TokenSet>) -> Self {
|
||||
let mut cursor = NfaCursor::new(&grammar.nfa, Vec::new());
|
||||
let starting_chars = get_starting_chars(&mut cursor, grammar);
|
||||
|
|
@ -50,12 +57,21 @@ impl<'a> TokenConflictMap<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Does token `i` match any strings that token `j` also matches, such that token `i`
|
||||
/// is preferred over token `j`?
|
||||
pub fn has_same_conflict_status(&self, a: usize, b: usize, other: usize) -> bool {
|
||||
let left = &self.status_matrix[matrix_index(self.n, a, other)];
|
||||
let right = &self.status_matrix[matrix_index(self.n, b, other)];
|
||||
left == right
|
||||
}
|
||||
|
||||
/// Does token `i` match any strings that token `j` does *not* match?
|
||||
pub fn does_match_different_string(&self, i: usize, j: usize) -> bool {
|
||||
self.status_matrix[matrix_index(self.n, i, j)].matches_different_string
|
||||
}
|
||||
|
||||
/// Does token `i` match any strings that token `j` also matches, where
|
||||
/// token `i` is preferred over token `j`?
|
||||
pub fn does_match_same_string(&self, i: usize, j: usize) -> bool {
|
||||
self.status_matrix[matrix_index(self.n, i, j)].matches_same_string
|
||||
}
|
||||
|
|
@ -67,6 +83,7 @@ impl<'a> TokenConflictMap<'a> {
|
|||
|| entry.matches_same_string
|
||||
}
|
||||
|
||||
/// Does token `i` match any strings that are *prefixes* of strings matched by `j`?
|
||||
pub fn does_match_prefix(&self, i: usize, j: usize) -> bool {
|
||||
self.status_matrix[matrix_index(self.n, i, j)].matches_prefix
|
||||
}
|
||||
|
|
@ -239,19 +256,29 @@ fn compute_conflict_status(
|
|||
);
|
||||
|
||||
while let Some(state_set) = state_set_queue.pop() {
|
||||
// Don't pursue states where there's no potential for conflict.
|
||||
if grammar.variable_indices_for_nfa_states(&state_set).count() > 1 {
|
||||
cursor.reset(state_set);
|
||||
} else {
|
||||
let mut live_variable_indices = grammar.variable_indices_for_nfa_states(&state_set);
|
||||
|
||||
// If only one of the two tokens could possibly match from this state, then
|
||||
// there is no reason to analyze any of its successors. Just record the fact
|
||||
// that the token matches a string that the other token does not match.
|
||||
let first_live_variable_index = live_variable_indices.next().unwrap();
|
||||
if live_variable_indices.count() == 0 {
|
||||
if first_live_variable_index == i {
|
||||
result.0.matches_different_string = true;
|
||||
} else {
|
||||
result.1.matches_different_string = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
let has_sep = cursor.transition_chars().any(|(_, sep)| sep);
|
||||
// Don't pursue states where there's no potential for conflict.
|
||||
cursor.reset(state_set);
|
||||
let within_separator = cursor.transition_chars().any(|(_, sep)| sep);
|
||||
|
||||
// Examine each possible completed token in this state.
|
||||
let mut completion = None;
|
||||
for (id, precedence) in cursor.completions() {
|
||||
if has_sep {
|
||||
if within_separator {
|
||||
if id == i {
|
||||
result.0.does_match_separators = true;
|
||||
} else {
|
||||
|
|
@ -316,7 +343,7 @@ fn compute_conflict_status(
|
|||
&transition,
|
||||
completed_id,
|
||||
completed_precedence,
|
||||
has_sep,
|
||||
within_separator,
|
||||
) {
|
||||
can_advance = true;
|
||||
if advanced_id == i {
|
||||
|
|
|
|||
|
|
@ -292,7 +292,12 @@ function grammar(baseGrammar, options) {
|
|||
|
||||
extras = options.extras
|
||||
.call(ruleBuilder, ruleBuilder, baseGrammar.extras)
|
||||
.map(normalize);
|
||||
|
||||
if (!Array.isArray(extras)) {
|
||||
throw new Error("Grammar's 'extras' function must return an array.")
|
||||
}
|
||||
|
||||
extras = extras.map(normalize);
|
||||
}
|
||||
|
||||
let word = baseGrammar.word;
|
||||
|
|
|
|||
|
|
@ -1,15 +1,15 @@
|
|||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"title": "tree-sitter grammar specification",
|
||||
"type": "object",
|
||||
|
||||
"required": [
|
||||
"name",
|
||||
"rules"
|
||||
],
|
||||
"required": ["name", "rules"],
|
||||
|
||||
"additionalProperties": false,
|
||||
|
||||
"properties": {
|
||||
"name": {
|
||||
"description": "the name of the grammar",
|
||||
"type": "string",
|
||||
"pattern": "^[a-zA-Z_]\\w*"
|
||||
},
|
||||
|
|
@ -60,6 +60,15 @@
|
|||
"word": {
|
||||
"type": "string",
|
||||
"pattern": "^[a-zA-Z_]\\w*"
|
||||
},
|
||||
|
||||
"supertypes": {
|
||||
"description": "A list of hidden rule names that should be considered supertypes in the generated node types file. See http://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types.",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"description": "the name of a rule in `rules` or `extras`",
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
|
|
@ -96,20 +105,19 @@
|
|||
"type": "string",
|
||||
"pattern": "^PATTERN$"
|
||||
},
|
||||
"value": {"type": "string"}
|
||||
"value": { "type": "string" }
|
||||
},
|
||||
"required": ["type", "value"]
|
||||
},
|
||||
|
||||
"symbol-rule": {
|
||||
"required": ["name"],
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"pattern": "^SYMBOL$"
|
||||
},
|
||||
"name": {"type": "string"}
|
||||
"name": { "type": "string" }
|
||||
},
|
||||
"required": ["type", "name"]
|
||||
},
|
||||
|
|
@ -210,6 +218,20 @@
|
|||
"required": ["type", "content"]
|
||||
},
|
||||
|
||||
"field-rule": {
|
||||
"properties": {
|
||||
"name": { "type": "string" },
|
||||
"type": {
|
||||
"type": "string",
|
||||
"pattern": "^FIELD$"
|
||||
},
|
||||
"content": {
|
||||
"$ref": "#/definitions/rule"
|
||||
}
|
||||
},
|
||||
"required": ["name", "type", "content"]
|
||||
},
|
||||
|
||||
"prec-rule": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
@ -239,6 +261,7 @@
|
|||
{ "$ref": "#/definitions/repeat1-rule" },
|
||||
{ "$ref": "#/definitions/repeat-rule" },
|
||||
{ "$ref": "#/definitions/token-rule" },
|
||||
{ "$ref": "#/definitions/field-rule" },
|
||||
{ "$ref": "#/definitions/prec-rule" }
|
||||
]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ pub(crate) struct Variable {
|
|||
pub(crate) struct InputGrammar {
|
||||
pub name: String,
|
||||
pub variables: Vec<Variable>,
|
||||
pub extra_tokens: Vec<Rule>,
|
||||
pub extra_symbols: Vec<Rule>,
|
||||
pub expected_conflicts: Vec<Vec<String>>,
|
||||
pub external_tokens: Vec<Rule>,
|
||||
pub variables_to_inline: Vec<String>,
|
||||
|
|
@ -87,7 +87,7 @@ pub(crate) struct ExternalToken {
|
|||
#[derive(Debug, Default)]
|
||||
pub(crate) struct SyntaxGrammar {
|
||||
pub variables: Vec<SyntaxVariable>,
|
||||
pub extra_tokens: Vec<Symbol>,
|
||||
pub extra_symbols: Vec<Symbol>,
|
||||
pub expected_conflicts: Vec<Vec<Symbol>>,
|
||||
pub external_tokens: Vec<ExternalToken>,
|
||||
pub supertype_symbols: Vec<Symbol>,
|
||||
|
|
|
|||
|
|
@ -6,13 +6,12 @@ mod node_types;
|
|||
mod npm_files;
|
||||
pub mod parse_grammar;
|
||||
mod prepare_grammar;
|
||||
pub mod properties;
|
||||
mod render;
|
||||
mod rules;
|
||||
mod tables;
|
||||
|
||||
use self::build_tables::build_tables;
|
||||
use self::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType};
|
||||
use self::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
|
||||
use self::parse_grammar::parse_grammar;
|
||||
use self::prepare_grammar::prepare_grammar;
|
||||
use self::render::render_c_code;
|
||||
|
|
@ -20,9 +19,8 @@ use self::rules::AliasMap;
|
|||
use crate::error::{Error, Result};
|
||||
use lazy_static::lazy_static;
|
||||
use regex::{Regex, RegexBuilder};
|
||||
use std::collections::HashSet;
|
||||
use std::fs::{self, File};
|
||||
use std::io::{BufWriter, Write};
|
||||
use std::fs;
|
||||
use std::io::Write;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::{Command, Stdio};
|
||||
|
||||
|
|
@ -33,15 +31,9 @@ lazy_static! {
|
|||
.unwrap();
|
||||
}
|
||||
|
||||
const NEW_HEADER_PARTS: [&'static str; 2] = [
|
||||
"
|
||||
uint32_t large_state_count;
|
||||
const uint16_t *small_parse_table;
|
||||
const uint32_t *small_parse_table_map;",
|
||||
"
|
||||
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
|
||||
",
|
||||
];
|
||||
const NEW_HEADER_PARTS: &[&'static str] = &["
|
||||
const uint16_t *alias_map;
|
||||
uint32_t state_count;"];
|
||||
|
||||
struct GeneratedParser {
|
||||
c_code: String,
|
||||
|
|
@ -51,13 +43,11 @@ struct GeneratedParser {
|
|||
pub fn generate_parser_in_directory(
|
||||
repo_path: &PathBuf,
|
||||
grammar_path: Option<&str>,
|
||||
properties_only: bool,
|
||||
next_abi: bool,
|
||||
report_symbol_name: Option<&str>,
|
||||
) -> Result<()> {
|
||||
let src_path = repo_path.join("src");
|
||||
let header_path = src_path.join("tree_sitter");
|
||||
let properties_dir_path = repo_path.join("properties");
|
||||
|
||||
// Ensure that the output directories exist.
|
||||
fs::create_dir_all(&src_path)?;
|
||||
|
|
@ -82,71 +72,48 @@ pub fn generate_parser_in_directory(
|
|||
prepare_grammar(&input_grammar)?;
|
||||
let language_name = input_grammar.name;
|
||||
|
||||
// If run with no arguments, read all of the property sheets and compile them to JSON.
|
||||
if grammar_path.is_none() {
|
||||
let token_names = get_token_names(&syntax_grammar, &lexical_grammar);
|
||||
if let Ok(entries) = fs::read_dir(properties_dir_path) {
|
||||
for entry in entries {
|
||||
let css_path = entry?.path();
|
||||
let css = fs::read_to_string(&css_path)?;
|
||||
let sheet = properties::generate_property_sheet(&css_path, &css, &token_names)?;
|
||||
let property_sheet_json_path = src_path
|
||||
.join(css_path.file_name().unwrap())
|
||||
.with_extension("json");
|
||||
let property_sheet_json_file =
|
||||
File::create(&property_sheet_json_path).map_err(Error::wrap(|| {
|
||||
format!("Failed to create {:?}", property_sheet_json_path)
|
||||
}))?;
|
||||
let mut writer = BufWriter::new(property_sheet_json_file);
|
||||
serde_json::to_writer_pretty(&mut writer, &sheet)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Generate the parser and related files.
|
||||
if !properties_only {
|
||||
let GeneratedParser {
|
||||
c_code,
|
||||
node_types_json,
|
||||
} = generate_parser_for_grammar_with_opts(
|
||||
&language_name,
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
inlines,
|
||||
simple_aliases,
|
||||
next_abi,
|
||||
report_symbol_name,
|
||||
)?;
|
||||
let GeneratedParser {
|
||||
c_code,
|
||||
node_types_json,
|
||||
} = generate_parser_for_grammar_with_opts(
|
||||
&language_name,
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
inlines,
|
||||
simple_aliases,
|
||||
next_abi,
|
||||
report_symbol_name,
|
||||
)?;
|
||||
|
||||
write_file(&src_path.join("parser.c"), c_code)?;
|
||||
write_file(&src_path.join("node-types.json"), node_types_json)?;
|
||||
write_file(&src_path.join("parser.c"), c_code)?;
|
||||
write_file(&src_path.join("node-types.json"), node_types_json)?;
|
||||
|
||||
if next_abi {
|
||||
write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?;
|
||||
} else {
|
||||
let mut header = tree_sitter::PARSER_HEADER.to_string();
|
||||
if next_abi {
|
||||
write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?;
|
||||
} else {
|
||||
let mut header = tree_sitter::PARSER_HEADER.to_string();
|
||||
|
||||
for part in &NEW_HEADER_PARTS {
|
||||
let pos = header
|
||||
.find(part)
|
||||
.expect("Missing expected part of parser.h header");
|
||||
header.replace_range(pos..(pos + part.len()), "");
|
||||
}
|
||||
|
||||
write_file(&header_path.join("parser.h"), header)?;
|
||||
for part in NEW_HEADER_PARTS.iter() {
|
||||
let pos = header
|
||||
.find(part)
|
||||
.expect("Missing expected part of parser.h header");
|
||||
header.replace_range(pos..(pos + part.len()), "");
|
||||
}
|
||||
|
||||
ensure_file(&repo_path.join("index.js"), || {
|
||||
npm_files::index_js(&language_name)
|
||||
})?;
|
||||
ensure_file(&src_path.join("binding.cc"), || {
|
||||
npm_files::binding_cc(&language_name)
|
||||
})?;
|
||||
ensure_file(&repo_path.join("binding.gyp"), || {
|
||||
npm_files::binding_gyp(&language_name)
|
||||
})?;
|
||||
write_file(&header_path.join("parser.h"), header)?;
|
||||
}
|
||||
|
||||
ensure_file(&repo_path.join("index.js"), || {
|
||||
npm_files::index_js(&language_name)
|
||||
})?;
|
||||
ensure_file(&src_path.join("binding.cc"), || {
|
||||
npm_files::binding_cc(&language_name)
|
||||
})?;
|
||||
ensure_file(&repo_path.join("binding.gyp"), || {
|
||||
npm_files::binding_gyp(&language_name)
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
@ -176,7 +143,8 @@ fn generate_parser_for_grammar_with_opts(
|
|||
next_abi: bool,
|
||||
report_symbol_name: Option<&str>,
|
||||
) -> Result<GeneratedParser> {
|
||||
let variable_info = node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &inlines)?;
|
||||
let variable_info =
|
||||
node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases)?;
|
||||
let node_types_json = node_types::generate_node_types_json(
|
||||
&syntax_grammar,
|
||||
&lexical_grammar,
|
||||
|
|
@ -208,35 +176,6 @@ fn generate_parser_for_grammar_with_opts(
|
|||
})
|
||||
}
|
||||
|
||||
fn get_token_names(
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
) -> HashSet<String> {
|
||||
let mut result = HashSet::new();
|
||||
for variable in &lexical_grammar.variables {
|
||||
if variable.kind == VariableType::Named {
|
||||
result.insert(variable.name.clone());
|
||||
}
|
||||
}
|
||||
for token in &syntax_grammar.external_tokens {
|
||||
if token.kind == VariableType::Named {
|
||||
result.insert(token.name.clone());
|
||||
}
|
||||
}
|
||||
for variable in &syntax_grammar.variables {
|
||||
for production in &variable.productions {
|
||||
for step in &production.steps {
|
||||
if let Some(alias) = &step.alias {
|
||||
if !step.symbol.is_non_terminal() && alias.is_named {
|
||||
result.insert(alias.value.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn load_grammar_file(grammar_path: &Path) -> Result<String> {
|
||||
match grammar_path.extension().and_then(|e| e.to_str()) {
|
||||
Some("js") => Ok(load_js_grammar_file(grammar_path)?),
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
use std::char;
|
||||
use std::cmp::max;
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::HashSet;
|
||||
use std::fmt;
|
||||
use std::mem::swap;
|
||||
use std::ops::Range;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum CharacterSet {
|
||||
|
|
@ -178,6 +180,40 @@ impl CharacterSet {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn ranges<'a>(
|
||||
chars: &'a Vec<char>,
|
||||
ruled_out_characters: &'a HashSet<u32>,
|
||||
) -> impl Iterator<Item = Range<char>> + 'a {
|
||||
let mut prev_range: Option<Range<char>> = None;
|
||||
chars
|
||||
.iter()
|
||||
.map(|c| (*c, false))
|
||||
.chain(Some(('\0', true)))
|
||||
.filter_map(move |(c, done)| {
|
||||
if done {
|
||||
return prev_range.clone();
|
||||
}
|
||||
if ruled_out_characters.contains(&(c as u32)) {
|
||||
return None;
|
||||
}
|
||||
if let Some(range) = prev_range.clone() {
|
||||
let mut prev_range_successor = range.end as u32 + 1;
|
||||
while prev_range_successor < c as u32 {
|
||||
if !ruled_out_characters.contains(&prev_range_successor) {
|
||||
prev_range = Some(c..c);
|
||||
return Some(range);
|
||||
}
|
||||
prev_range_successor += 1;
|
||||
}
|
||||
prev_range = Some(range.start..c);
|
||||
None
|
||||
} else {
|
||||
prev_range = Some(c..c);
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn contains(&self, c: char) -> bool {
|
||||
match self {
|
||||
|
|
@ -266,6 +302,13 @@ fn compare_chars(left: &Vec<char>, right: &Vec<char>) -> SetComparision {
|
|||
result.common = true;
|
||||
}
|
||||
}
|
||||
|
||||
match (i, j) {
|
||||
(Some(_), _) => result.left_only = true,
|
||||
(_, Some(_)) => result.right_only = true,
|
||||
_ => {}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
|
|
@ -718,7 +761,7 @@ mod tests {
|
|||
.add_range('d', 'e')
|
||||
);
|
||||
|
||||
// A whitelist and an intersecting blacklist.
|
||||
// An inclusion and an intersecting exclusion.
|
||||
// Both sets contain 'e', 'f', and 'm'
|
||||
let mut a = CharacterSet::empty()
|
||||
.add_range('c', 'h')
|
||||
|
|
@ -748,7 +791,7 @@ mod tests {
|
|||
assert_eq!(a, CharacterSet::Include(vec!['c', 'd', 'g', 'h', 'k', 'l']));
|
||||
assert_eq!(b, CharacterSet::empty().add_range('a', 'm').negate());
|
||||
|
||||
// A blacklist and an overlapping blacklist.
|
||||
// An exclusion and an overlapping inclusion.
|
||||
// Both sets exclude 'c', 'd', and 'e'
|
||||
let mut a = CharacterSet::empty().add_range('a', 'e').negate();
|
||||
let mut b = CharacterSet::empty().add_range('c', 'h').negate();
|
||||
|
|
@ -759,7 +802,7 @@ mod tests {
|
|||
assert_eq!(a, CharacterSet::Include(vec!['f', 'g', 'h']));
|
||||
assert_eq!(b, CharacterSet::Include(vec!['a', 'b']));
|
||||
|
||||
// A blacklist and a larger blacklist.
|
||||
// An exclusion and a larger exclusion.
|
||||
let mut a = CharacterSet::empty().add_range('b', 'c').negate();
|
||||
let mut b = CharacterSet::empty().add_range('a', 'd').negate();
|
||||
assert_eq!(
|
||||
|
|
@ -810,5 +853,53 @@ mod tests {
|
|||
);
|
||||
assert!(a.does_intersect(&b));
|
||||
assert!(b.does_intersect(&a));
|
||||
|
||||
let (a, b) = (
|
||||
CharacterSet::Include(vec!['c']),
|
||||
CharacterSet::Exclude(vec!['a']),
|
||||
);
|
||||
assert!(a.does_intersect(&b));
|
||||
assert!(b.does_intersect(&a));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_character_set_get_ranges() {
|
||||
struct Row {
|
||||
chars: Vec<char>,
|
||||
ruled_out_chars: Vec<char>,
|
||||
expected_ranges: Vec<Range<char>>,
|
||||
}
|
||||
|
||||
let table = [
|
||||
Row {
|
||||
chars: vec!['a'],
|
||||
ruled_out_chars: vec![],
|
||||
expected_ranges: vec!['a'..'a'],
|
||||
},
|
||||
Row {
|
||||
chars: vec!['a', 'b', 'c', 'e', 'z'],
|
||||
ruled_out_chars: vec![],
|
||||
expected_ranges: vec!['a'..'c', 'e'..'e', 'z'..'z'],
|
||||
},
|
||||
Row {
|
||||
chars: vec!['a', 'b', 'c', 'e', 'h', 'z'],
|
||||
ruled_out_chars: vec!['d', 'f', 'g'],
|
||||
expected_ranges: vec!['a'..'h', 'z'..'z'],
|
||||
},
|
||||
];
|
||||
|
||||
for Row {
|
||||
chars,
|
||||
ruled_out_chars,
|
||||
expected_ranges,
|
||||
} in table.iter()
|
||||
{
|
||||
let ruled_out_chars = ruled_out_chars
|
||||
.into_iter()
|
||||
.map(|c: &char| *c as u32)
|
||||
.collect();
|
||||
let ranges = CharacterSet::ranges(chars, &ruled_out_chars).collect::<Vec<_>>();
|
||||
assert_eq!(ranges, *expected_ranges);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -87,7 +87,7 @@ pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
|
|||
})
|
||||
}
|
||||
|
||||
let extra_tokens = grammar_json
|
||||
let extra_symbols = grammar_json
|
||||
.extras
|
||||
.unwrap_or(Vec::new())
|
||||
.into_iter()
|
||||
|
|
@ -107,7 +107,7 @@ pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
|
|||
name: grammar_json.name,
|
||||
word_token: grammar_json.word,
|
||||
variables,
|
||||
extra_tokens,
|
||||
extra_symbols,
|
||||
expected_conflicts,
|
||||
external_tokens,
|
||||
supertype_symbols,
|
||||
|
|
|
|||
|
|
@ -283,7 +283,7 @@ mod tests {
|
|||
fn build_grammar(variables: Vec<Variable>) -> ExtractedSyntaxGrammar {
|
||||
ExtractedSyntaxGrammar {
|
||||
variables,
|
||||
extra_tokens: Vec::new(),
|
||||
extra_symbols: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
expected_conflicts: Vec::new(),
|
||||
variables_to_inline: Vec::new(),
|
||||
|
|
|
|||
293
cli/src/generate/prepare_grammar/extract_default_aliases.rs
Normal file
293
cli/src/generate/prepare_grammar/extract_default_aliases.rs
Normal file
|
|
@ -0,0 +1,293 @@
|
|||
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
|
||||
use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType};
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
struct SymbolStatus {
|
||||
aliases: Vec<(Alias, usize)>,
|
||||
appears_unaliased: bool,
|
||||
}
|
||||
|
||||
// Update the grammar by finding symbols that always are aliased, and for each such symbol,
|
||||
// promoting one of its aliases to a "default alias", which is applied globally instead
|
||||
// of in a context-specific way.
|
||||
//
|
||||
// This has two benefits:
|
||||
// * It reduces the overhead of storing production-specific alias info in the parse table.
|
||||
// * Within an `ERROR` node, no context-specific aliases will be applied. This transformation
|
||||
// ensures that the children of an `ERROR` node have symbols that are consistent with the
|
||||
// way that they would appear in a valid syntax tree.
|
||||
pub(super) fn extract_default_aliases(
|
||||
syntax_grammar: &mut SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
) -> AliasMap {
|
||||
let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()];
|
||||
let mut non_terminal_status_list =
|
||||
vec![SymbolStatus::default(); syntax_grammar.variables.len()];
|
||||
let mut external_status_list =
|
||||
vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()];
|
||||
|
||||
// For each grammar symbol, find all of the aliases under which the symbol appears,
|
||||
// and determine whether or not the symbol ever appears *unaliased*.
|
||||
for variable in syntax_grammar.variables.iter() {
|
||||
for production in variable.productions.iter() {
|
||||
for step in production.steps.iter() {
|
||||
let mut status = match step.symbol.kind {
|
||||
SymbolType::External => &mut external_status_list[step.symbol.index],
|
||||
SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index],
|
||||
SymbolType::Terminal => &mut terminal_status_list[step.symbol.index],
|
||||
SymbolType::End => panic!("Unexpected end token"),
|
||||
};
|
||||
|
||||
// Default aliases don't work for inlined variables.
|
||||
if syntax_grammar.variables_to_inline.contains(&step.symbol) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(alias) = &step.alias {
|
||||
if let Some(count_for_alias) = status
|
||||
.aliases
|
||||
.iter_mut()
|
||||
.find_map(|(a, count)| if a == alias { Some(count) } else { None })
|
||||
{
|
||||
*count_for_alias += 1;
|
||||
} else {
|
||||
status.aliases.push((alias.clone(), 1));
|
||||
}
|
||||
} else {
|
||||
status.appears_unaliased = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let symbols_with_statuses = (terminal_status_list
|
||||
.iter_mut()
|
||||
.enumerate()
|
||||
.map(|(i, status)| (Symbol::terminal(i), status)))
|
||||
.chain(
|
||||
non_terminal_status_list
|
||||
.iter_mut()
|
||||
.enumerate()
|
||||
.map(|(i, status)| (Symbol::non_terminal(i), status)),
|
||||
)
|
||||
.chain(
|
||||
external_status_list
|
||||
.iter_mut()
|
||||
.enumerate()
|
||||
.map(|(i, status)| (Symbol::external(i), status)),
|
||||
);
|
||||
|
||||
// For each symbol that always appears aliased, find the alias the occurs most often,
|
||||
// and designate that alias as the symbol's "default alias". Store all of these
|
||||
// default aliases in a map that will be returned.
|
||||
let mut result = AliasMap::new();
|
||||
for (symbol, status) in symbols_with_statuses {
|
||||
if status.appears_unaliased {
|
||||
status.aliases.clear();
|
||||
} else {
|
||||
if let Some(default_entry) = status
|
||||
.aliases
|
||||
.iter()
|
||||
.enumerate()
|
||||
.max_by_key(|(i, (_, count))| (count, -(*i as i64)))
|
||||
.map(|(_, entry)| entry.clone())
|
||||
{
|
||||
status.aliases.clear();
|
||||
status.aliases.push(default_entry.clone());
|
||||
result.insert(symbol, default_entry.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Wherever a symbol is aliased as its default alias, remove the usage of the alias,
|
||||
// because it will now be redundant.
|
||||
let mut alias_positions_to_clear = Vec::new();
|
||||
for variable in syntax_grammar.variables.iter_mut() {
|
||||
alias_positions_to_clear.clear();
|
||||
|
||||
for (i, production) in variable.productions.iter().enumerate() {
|
||||
for (j, step) in production.steps.iter().enumerate() {
|
||||
let status = match step.symbol.kind {
|
||||
SymbolType::External => &mut external_status_list[step.symbol.index],
|
||||
SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index],
|
||||
SymbolType::Terminal => &mut terminal_status_list[step.symbol.index],
|
||||
SymbolType::End => panic!("Unexpected end token"),
|
||||
};
|
||||
|
||||
// If this step is aliased as the symbol's default alias, then remove that alias.
|
||||
if step.alias.is_some()
|
||||
&& step.alias.as_ref() == status.aliases.get(0).map(|t| &t.0)
|
||||
{
|
||||
let mut other_productions_must_use_this_alias_at_this_index = false;
|
||||
for (other_i, other_production) in variable.productions.iter().enumerate() {
|
||||
if other_i != i
|
||||
&& other_production.steps.len() > j
|
||||
&& other_production.steps[j].alias == step.alias
|
||||
&& result.get(&other_production.steps[j].symbol) != step.alias.as_ref()
|
||||
{
|
||||
other_productions_must_use_this_alias_at_this_index = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if !other_productions_must_use_this_alias_at_this_index {
|
||||
alias_positions_to_clear.push((i, j));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (production_index, step_index) in &alias_positions_to_clear {
|
||||
variable.productions[*production_index].steps[*step_index].alias = None;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::generate::grammars::{
|
||||
LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType,
|
||||
};
|
||||
use crate::generate::nfa::Nfa;
|
||||
|
||||
#[test]
|
||||
fn test_extract_simple_aliases() {
|
||||
let mut syntax_grammar = SyntaxGrammar {
|
||||
variables: vec![
|
||||
SyntaxVariable {
|
||||
name: "v1".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
|
||||
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
|
||||
ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true),
|
||||
],
|
||||
}],
|
||||
},
|
||||
SyntaxVariable {
|
||||
name: "v2".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
// Token 0 is always aliased as "a1".
|
||||
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
|
||||
// Token 1 is aliased within rule `v1` above, but not here.
|
||||
ProductionStep::new(Symbol::terminal(1)),
|
||||
// Token 2 is aliased differently here than in `v1`. The alias from
|
||||
// `v1` should be promoted to the default alias, because `v1` appears
|
||||
// first in the grammar.
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true),
|
||||
// Token 3 is also aliased differently here than in `v1`. In this case,
|
||||
// this alias should be promoted to the default alias, because it is
|
||||
// used a greater number of times (twice).
|
||||
ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true),
|
||||
ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true),
|
||||
],
|
||||
}],
|
||||
},
|
||||
],
|
||||
extra_symbols: Vec::new(),
|
||||
expected_conflicts: Vec::new(),
|
||||
variables_to_inline: Vec::new(),
|
||||
supertype_symbols: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
word_token: None,
|
||||
};
|
||||
|
||||
let lexical_grammar = LexicalGrammar {
|
||||
nfa: Nfa::new(),
|
||||
variables: vec![
|
||||
LexicalVariable {
|
||||
name: "t0".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
LexicalVariable {
|
||||
name: "t1".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
LexicalVariable {
|
||||
name: "t2".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
LexicalVariable {
|
||||
name: "t3".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar);
|
||||
assert_eq!(default_aliases.len(), 3);
|
||||
|
||||
assert_eq!(
|
||||
default_aliases.get(&Symbol::terminal(0)),
|
||||
Some(&Alias {
|
||||
value: "a1".to_string(),
|
||||
is_named: true,
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
default_aliases.get(&Symbol::terminal(2)),
|
||||
Some(&Alias {
|
||||
value: "a3".to_string(),
|
||||
is_named: true,
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
default_aliases.get(&Symbol::terminal(3)),
|
||||
Some(&Alias {
|
||||
value: "a6".to_string(),
|
||||
is_named: true,
|
||||
})
|
||||
);
|
||||
assert_eq!(default_aliases.get(&Symbol::terminal(1)), None);
|
||||
|
||||
assert_eq!(
|
||||
syntax_grammar.variables,
|
||||
vec![
|
||||
SyntaxVariable {
|
||||
name: "v1".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
|
||||
ProductionStep::new(Symbol::terminal(2)),
|
||||
ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true),
|
||||
],
|
||||
},],
|
||||
},
|
||||
SyntaxVariable {
|
||||
name: "v2".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::terminal(1)),
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true),
|
||||
ProductionStep::new(Symbol::terminal(3)),
|
||||
ProductionStep::new(Symbol::terminal(3)),
|
||||
],
|
||||
},],
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,223 +0,0 @@
|
|||
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
|
||||
use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType};
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
struct SymbolStatus {
|
||||
alias: Option<Alias>,
|
||||
conflicting: bool,
|
||||
}
|
||||
|
||||
pub(super) fn extract_simple_aliases(
|
||||
syntax_grammar: &mut SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
) -> AliasMap {
|
||||
// Determine which symbols in the grammars are *always* aliased to a single name.
|
||||
let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()];
|
||||
let mut non_terminal_status_list =
|
||||
vec![SymbolStatus::default(); syntax_grammar.variables.len()];
|
||||
let mut external_status_list =
|
||||
vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()];
|
||||
for variable in syntax_grammar.variables.iter() {
|
||||
for production in variable.productions.iter() {
|
||||
for step in production.steps.iter() {
|
||||
let mut status = match step.symbol {
|
||||
Symbol {
|
||||
kind: SymbolType::External,
|
||||
index,
|
||||
} => &mut external_status_list[index],
|
||||
Symbol {
|
||||
kind: SymbolType::NonTerminal,
|
||||
index,
|
||||
} => &mut non_terminal_status_list[index],
|
||||
Symbol {
|
||||
kind: SymbolType::Terminal,
|
||||
index,
|
||||
} => &mut terminal_status_list[index],
|
||||
Symbol {
|
||||
kind: SymbolType::End,
|
||||
..
|
||||
} => panic!("Unexpected end token"),
|
||||
};
|
||||
|
||||
if step.alias.is_none() {
|
||||
status.alias = None;
|
||||
status.conflicting = true;
|
||||
}
|
||||
|
||||
if !status.conflicting {
|
||||
if status.alias.is_none() {
|
||||
status.alias = step.alias.clone();
|
||||
} else if status.alias != step.alias {
|
||||
status.alias = None;
|
||||
status.conflicting = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove the aliases for those symbols.
|
||||
for variable in syntax_grammar.variables.iter_mut() {
|
||||
for production in variable.productions.iter_mut() {
|
||||
for step in production.steps.iter_mut() {
|
||||
let status = match step.symbol {
|
||||
Symbol {
|
||||
kind: SymbolType::External,
|
||||
index,
|
||||
} => &external_status_list[index],
|
||||
Symbol {
|
||||
kind: SymbolType::NonTerminal,
|
||||
index,
|
||||
} => &non_terminal_status_list[index],
|
||||
Symbol {
|
||||
kind: SymbolType::Terminal,
|
||||
index,
|
||||
} => &terminal_status_list[index],
|
||||
Symbol {
|
||||
kind: SymbolType::End,
|
||||
..
|
||||
} => panic!("Unexpected end token"),
|
||||
};
|
||||
|
||||
if status.alias.is_some() {
|
||||
step.alias = None;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Populate a map of the symbols to their aliases.
|
||||
let mut result = AliasMap::new();
|
||||
for (i, status) in terminal_status_list.into_iter().enumerate() {
|
||||
if let Some(alias) = status.alias {
|
||||
result.insert(Symbol::terminal(i), alias);
|
||||
}
|
||||
}
|
||||
for (i, status) in non_terminal_status_list.into_iter().enumerate() {
|
||||
if let Some(alias) = status.alias {
|
||||
result.insert(Symbol::non_terminal(i), alias);
|
||||
}
|
||||
}
|
||||
for (i, status) in external_status_list.into_iter().enumerate() {
|
||||
if let Some(alias) = status.alias {
|
||||
result.insert(Symbol::external(i), alias);
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::generate::grammars::{
|
||||
LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType,
|
||||
};
|
||||
use crate::generate::nfa::Nfa;
|
||||
|
||||
#[test]
|
||||
fn test_extract_simple_aliases() {
|
||||
let mut syntax_grammar = SyntaxGrammar {
|
||||
variables: vec![
|
||||
SyntaxVariable {
|
||||
name: "v1".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
|
||||
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
|
||||
],
|
||||
}],
|
||||
},
|
||||
SyntaxVariable {
|
||||
name: "v2".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
// Token 0 is always aliased as "a1".
|
||||
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
|
||||
// Token 1 is aliased above, but not here.
|
||||
ProductionStep::new(Symbol::terminal(1)),
|
||||
// Token 2 is aliased differently than above.
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true),
|
||||
],
|
||||
}],
|
||||
},
|
||||
],
|
||||
extra_tokens: Vec::new(),
|
||||
expected_conflicts: Vec::new(),
|
||||
variables_to_inline: Vec::new(),
|
||||
supertype_symbols: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
word_token: None,
|
||||
};
|
||||
|
||||
let lexical_grammar = LexicalGrammar {
|
||||
nfa: Nfa::new(),
|
||||
variables: vec![
|
||||
LexicalVariable {
|
||||
name: "t1".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
LexicalVariable {
|
||||
name: "t2".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
LexicalVariable {
|
||||
name: "t3".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar);
|
||||
assert_eq!(simple_aliases.len(), 1);
|
||||
assert_eq!(
|
||||
simple_aliases[&Symbol::terminal(0)],
|
||||
Alias {
|
||||
value: "a1".to_string(),
|
||||
is_named: true,
|
||||
}
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
syntax_grammar.variables,
|
||||
vec![
|
||||
SyntaxVariable {
|
||||
name: "v1".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
// 'Simple' alias removed
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
// Other aliases unchanged
|
||||
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
|
||||
],
|
||||
},],
|
||||
},
|
||||
SyntaxVariable {
|
||||
name: "v2".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::terminal(1)),
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true),
|
||||
],
|
||||
},],
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -90,21 +90,13 @@ pub(super) fn extract_tokens(
|
|||
.collect();
|
||||
|
||||
let mut separators = Vec::new();
|
||||
let mut extra_tokens = Vec::new();
|
||||
for rule in grammar.extra_tokens {
|
||||
let mut extra_symbols = Vec::new();
|
||||
for rule in grammar.extra_symbols {
|
||||
if let Rule::Symbol(symbol) = rule {
|
||||
let new_symbol = symbol_replacer.replace_symbol(symbol);
|
||||
if new_symbol.is_non_terminal() {
|
||||
return Error::err(format!(
|
||||
"Non-token symbol '{}' cannot be used as an extra token",
|
||||
&variables[new_symbol.index].name
|
||||
));
|
||||
} else {
|
||||
extra_tokens.push(new_symbol);
|
||||
}
|
||||
extra_symbols.push(symbol_replacer.replace_symbol(symbol));
|
||||
} else {
|
||||
if let Some(index) = lexical_variables.iter().position(|v| v.rule == rule) {
|
||||
extra_tokens.push(Symbol::terminal(index));
|
||||
extra_symbols.push(Symbol::terminal(index));
|
||||
} else {
|
||||
separators.push(rule);
|
||||
}
|
||||
|
|
@ -158,7 +150,7 @@ pub(super) fn extract_tokens(
|
|||
ExtractedSyntaxGrammar {
|
||||
variables,
|
||||
expected_conflicts,
|
||||
extra_tokens,
|
||||
extra_symbols,
|
||||
variables_to_inline,
|
||||
supertype_symbols,
|
||||
external_tokens,
|
||||
|
|
@ -415,15 +407,15 @@ mod test {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn test_extracting_extra_tokens() {
|
||||
fn test_extracting_extra_symbols() {
|
||||
let mut grammar = build_grammar(vec![
|
||||
Variable::named("rule_0", Rule::string("x")),
|
||||
Variable::named("comment", Rule::pattern("//.*")),
|
||||
]);
|
||||
grammar.extra_tokens = vec![Rule::string(" "), Rule::non_terminal(1)];
|
||||
grammar.extra_symbols = vec![Rule::string(" "), Rule::non_terminal(1)];
|
||||
|
||||
let (syntax_grammar, lexical_grammar) = extract_tokens(grammar).unwrap();
|
||||
assert_eq!(syntax_grammar.extra_tokens, vec![Symbol::terminal(1),]);
|
||||
assert_eq!(syntax_grammar.extra_symbols, vec![Symbol::terminal(1),]);
|
||||
assert_eq!(lexical_grammar.separators, vec![Rule::string(" "),]);
|
||||
}
|
||||
|
||||
|
|
@ -472,28 +464,6 @@ mod test {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_error_on_non_terminal_symbol_extras() {
|
||||
let mut grammar = build_grammar(vec![
|
||||
Variable::named("rule_0", Rule::non_terminal(1)),
|
||||
Variable::named("rule_1", Rule::non_terminal(2)),
|
||||
Variable::named("rule_2", Rule::string("x")),
|
||||
]);
|
||||
grammar.extra_tokens = vec![Rule::non_terminal(1)];
|
||||
|
||||
match extract_tokens(grammar) {
|
||||
Err(e) => {
|
||||
assert_eq!(
|
||||
e.message(),
|
||||
"Non-token symbol 'rule_1' cannot be used as an extra token"
|
||||
);
|
||||
}
|
||||
_ => {
|
||||
panic!("Expected an error but got no error");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_error_on_external_with_same_name_as_non_terminal() {
|
||||
let mut grammar = build_grammar(vec![
|
||||
|
|
@ -522,7 +492,7 @@ mod test {
|
|||
fn build_grammar(variables: Vec<Variable>) -> InternedGrammar {
|
||||
InternedGrammar {
|
||||
variables,
|
||||
extra_tokens: Vec::new(),
|
||||
extra_symbols: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
expected_conflicts: Vec::new(),
|
||||
variables_to_inline: Vec::new(),
|
||||
|
|
|
|||
|
|
@ -199,7 +199,7 @@ unless they are used only as the grammar's start rule.
|
|||
}
|
||||
}
|
||||
Ok(SyntaxGrammar {
|
||||
extra_tokens: grammar.extra_tokens,
|
||||
extra_symbols: grammar.extra_symbols,
|
||||
expected_conflicts: grammar.expected_conflicts,
|
||||
variables_to_inline: grammar.variables_to_inline,
|
||||
external_tokens: grammar.external_tokens,
|
||||
|
|
|
|||
|
|
@ -30,9 +30,9 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
|
|||
external_tokens.push(Variable { name, kind, rule });
|
||||
}
|
||||
|
||||
let mut extra_tokens = Vec::with_capacity(grammar.extra_tokens.len());
|
||||
for extra_token in grammar.extra_tokens.iter() {
|
||||
extra_tokens.push(interner.intern_rule(extra_token)?);
|
||||
let mut extra_symbols = Vec::with_capacity(grammar.extra_symbols.len());
|
||||
for extra_token in grammar.extra_symbols.iter() {
|
||||
extra_symbols.push(interner.intern_rule(extra_token)?);
|
||||
}
|
||||
|
||||
let mut supertype_symbols = Vec::with_capacity(grammar.supertype_symbols.len());
|
||||
|
|
@ -73,10 +73,16 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
|
|||
);
|
||||
}
|
||||
|
||||
for (i, variable) in variables.iter_mut().enumerate() {
|
||||
if supertype_symbols.contains(&Symbol::non_terminal(i)) {
|
||||
variable.kind = VariableType::Hidden;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(InternedGrammar {
|
||||
variables,
|
||||
external_tokens,
|
||||
extra_tokens,
|
||||
extra_symbols,
|
||||
expected_conflicts,
|
||||
variables_to_inline,
|
||||
supertype_symbols,
|
||||
|
|
@ -236,7 +242,7 @@ mod tests {
|
|||
InputGrammar {
|
||||
variables,
|
||||
name: "the_language".to_string(),
|
||||
extra_tokens: Vec::new(),
|
||||
extra_symbols: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
expected_conflicts: Vec::new(),
|
||||
variables_to_inline: Vec::new(),
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
mod expand_repeats;
|
||||
mod expand_tokens;
|
||||
mod extract_simple_aliases;
|
||||
mod extract_default_aliases;
|
||||
mod extract_tokens;
|
||||
mod flatten_grammar;
|
||||
mod intern_symbols;
|
||||
|
|
@ -8,7 +8,7 @@ mod process_inlines;
|
|||
|
||||
use self::expand_repeats::expand_repeats;
|
||||
pub(crate) use self::expand_tokens::expand_tokens;
|
||||
use self::extract_simple_aliases::extract_simple_aliases;
|
||||
use self::extract_default_aliases::extract_default_aliases;
|
||||
use self::extract_tokens::extract_tokens;
|
||||
use self::flatten_grammar::flatten_grammar;
|
||||
use self::intern_symbols::intern_symbols;
|
||||
|
|
@ -21,7 +21,7 @@ use crate::generate::rules::{AliasMap, Rule, Symbol};
|
|||
|
||||
pub(crate) struct IntermediateGrammar<T, U> {
|
||||
variables: Vec<Variable>,
|
||||
extra_tokens: Vec<T>,
|
||||
extra_symbols: Vec<T>,
|
||||
expected_conflicts: Vec<Vec<Symbol>>,
|
||||
external_tokens: Vec<U>,
|
||||
variables_to_inline: Vec<Symbol>,
|
||||
|
|
@ -52,7 +52,7 @@ pub(crate) fn prepare_grammar(
|
|||
let syntax_grammar = expand_repeats(syntax_grammar);
|
||||
let mut syntax_grammar = flatten_grammar(syntax_grammar)?;
|
||||
let lexical_grammar = expand_tokens(lexical_grammar)?;
|
||||
let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar);
|
||||
let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar);
|
||||
let inlines = process_inlines(&syntax_grammar);
|
||||
Ok((syntax_grammar, lexical_grammar, inlines, simple_aliases))
|
||||
Ok((syntax_grammar, lexical_grammar, inlines, default_aliases))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -127,6 +127,9 @@ impl InlinedProductionMapBuilder {
|
|||
last_inserted_step.associativity = removed_step.associativity;
|
||||
}
|
||||
}
|
||||
if p.dynamic_precedence.abs() > production.dynamic_precedence.abs() {
|
||||
production.dynamic_precedence = p.dynamic_precedence;
|
||||
}
|
||||
production
|
||||
}),
|
||||
);
|
||||
|
|
@ -196,7 +199,7 @@ mod tests {
|
|||
fn test_basic_inlining() {
|
||||
let grammar = SyntaxGrammar {
|
||||
expected_conflicts: Vec::new(),
|
||||
extra_tokens: Vec::new(),
|
||||
extra_symbols: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
supertype_symbols: Vec::new(),
|
||||
word_token: None,
|
||||
|
|
@ -226,7 +229,7 @@ mod tests {
|
|||
],
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
dynamic_precedence: -2,
|
||||
steps: vec![ProductionStep::new(Symbol::terminal(14))],
|
||||
},
|
||||
],
|
||||
|
|
@ -258,7 +261,7 @@ mod tests {
|
|||
],
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
dynamic_precedence: -2,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(10)),
|
||||
ProductionStep::new(Symbol::terminal(14)),
|
||||
|
|
@ -327,7 +330,7 @@ mod tests {
|
|||
Symbol::non_terminal(3),
|
||||
],
|
||||
expected_conflicts: Vec::new(),
|
||||
extra_tokens: Vec::new(),
|
||||
extra_symbols: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
supertype_symbols: Vec::new(),
|
||||
word_token: None,
|
||||
|
|
@ -429,7 +432,7 @@ mod tests {
|
|||
},
|
||||
],
|
||||
expected_conflicts: Vec::new(),
|
||||
extra_tokens: Vec::new(),
|
||||
extra_symbols: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
supertype_symbols: Vec::new(),
|
||||
word_token: None,
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -1,3 +1,4 @@
|
|||
use super::grammars::VariableType;
|
||||
use smallbitvec::SmallBitVec;
|
||||
use std::collections::HashMap;
|
||||
use std::iter::FromIterator;
|
||||
|
|
@ -139,6 +140,16 @@ impl Rule {
|
|||
}
|
||||
}
|
||||
|
||||
impl Alias {
|
||||
pub fn kind(&self) -> VariableType {
|
||||
if self.is_named {
|
||||
VariableType::Named
|
||||
} else {
|
||||
VariableType::Anonymous
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl Rule {
|
||||
pub fn terminal(index: usize) -> Self {
|
||||
|
|
@ -366,7 +377,7 @@ impl FromIterator<Symbol> for TokenSet {
|
|||
|
||||
fn add_metadata<T: FnOnce(&mut MetadataParams)>(input: Rule, f: T) -> Rule {
|
||||
match input {
|
||||
Rule::Metadata { rule, mut params } => {
|
||||
Rule::Metadata { rule, mut params } if !params.is_token => {
|
||||
f(&mut params);
|
||||
Rule::Metadata { rule, params }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,6 +24,12 @@ pub(crate) enum ParseAction {
|
|||
},
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub(crate) enum GotoAction {
|
||||
Goto(ParseStateId),
|
||||
ShiftExtra,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct ParseTableEntry {
|
||||
pub actions: Vec<ParseAction>,
|
||||
|
|
@ -34,10 +40,11 @@ pub(crate) struct ParseTableEntry {
|
|||
pub(crate) struct ParseState {
|
||||
pub id: ParseStateId,
|
||||
pub terminal_entries: HashMap<Symbol, ParseTableEntry>,
|
||||
pub nonterminal_entries: HashMap<Symbol, ParseStateId>,
|
||||
pub nonterminal_entries: HashMap<Symbol, GotoAction>,
|
||||
pub lex_state_id: usize,
|
||||
pub external_lex_state_id: usize,
|
||||
pub core_id: usize,
|
||||
pub is_non_terminal_extra: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
|
||||
|
|
@ -70,6 +77,7 @@ pub(crate) struct AdvanceAction {
|
|||
#[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub(crate) struct LexState {
|
||||
pub accept_action: Option<Symbol>,
|
||||
pub eof_action: Option<AdvanceAction>,
|
||||
pub advance_actions: Vec<(CharacterSet, AdvanceAction)>,
|
||||
}
|
||||
|
||||
|
|
@ -103,7 +111,13 @@ impl ParseState {
|
|||
_ => None,
|
||||
})
|
||||
})
|
||||
.chain(self.nonterminal_entries.iter().map(|(_, state)| *state))
|
||||
.chain(self.nonterminal_entries.iter().filter_map(|(_, action)| {
|
||||
if let GotoAction::Goto(state) = action {
|
||||
Some(*state)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn update_referenced_states<F>(&mut self, mut f: F)
|
||||
|
|
@ -121,15 +135,18 @@ impl ParseState {
|
|||
}
|
||||
}
|
||||
}
|
||||
for (symbol, other_state) in &self.nonterminal_entries {
|
||||
let result = f(*other_state, self);
|
||||
if result != *other_state {
|
||||
updates.push((*symbol, 0, result));
|
||||
for (symbol, action) in &self.nonterminal_entries {
|
||||
if let GotoAction::Goto(other_state) = action {
|
||||
let result = f(*other_state, self);
|
||||
if result != *other_state {
|
||||
updates.push((*symbol, 0, result));
|
||||
}
|
||||
}
|
||||
}
|
||||
for (symbol, action_index, new_state) in updates {
|
||||
if symbol.is_non_terminal() {
|
||||
self.nonterminal_entries.insert(symbol, new_state);
|
||||
self.nonterminal_entries
|
||||
.insert(symbol, GotoAction::Goto(new_state));
|
||||
} else {
|
||||
let entry = self.terminal_entries.get_mut(&symbol).unwrap();
|
||||
if let ParseAction::Shift { is_repetition, .. } = entry.actions[action_index] {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue