Fix computation of following tokens
This commit is contained in:
parent
31bdf5eb97
commit
ff41f05a20
5 changed files with 125 additions and 36 deletions
|
|
@ -41,12 +41,11 @@ struct ParseTableBuilder<'a> {
|
|||
item_sets_by_state_id: Vec<ParseItemSet<'a>>,
|
||||
parse_state_queue: VecDeque<ParseStateQueueEntry>,
|
||||
parse_table: ParseTable,
|
||||
following_tokens: Vec<TokenSet>,
|
||||
state_ids_to_log: Vec<ParseStateId>,
|
||||
}
|
||||
|
||||
impl<'a> ParseTableBuilder<'a> {
|
||||
fn build(mut self) -> Result<(ParseTable, Vec<TokenSet>)> {
|
||||
fn build(mut self) -> Result<ParseTable> {
|
||||
// Ensure that the empty alias sequence has index 0.
|
||||
self.parse_table.alias_sequences.push(Vec::new());
|
||||
|
||||
|
|
@ -99,7 +98,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
|
||||
self.remove_precedences();
|
||||
|
||||
Ok((self.parse_table, self.following_tokens))
|
||||
Ok(self.parse_table)
|
||||
}
|
||||
|
||||
fn add_parse_state(
|
||||
|
|
@ -108,20 +107,6 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
preceding_auxiliary_symbols: &AuxiliarySymbolSequence,
|
||||
item_set: ParseItemSet<'a>,
|
||||
) -> ParseStateId {
|
||||
if preceding_symbols.len() > 1 {
|
||||
let left_tokens = self
|
||||
.item_set_builder
|
||||
.last_set(&preceding_symbols[preceding_symbols.len() - 2]);
|
||||
let right_tokens = self
|
||||
.item_set_builder
|
||||
.first_set(&preceding_symbols[preceding_symbols.len() - 1]);
|
||||
for left_token in left_tokens.iter() {
|
||||
if left_token.is_terminal() {
|
||||
self.following_tokens[left_token.index].insert_all(right_tokens);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut hasher = DefaultHasher::new();
|
||||
item_set.hash_unfinished_items(&mut hasher);
|
||||
let unfinished_item_signature = hasher.finish();
|
||||
|
|
@ -705,17 +690,50 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
fn populate_following_tokens(
|
||||
result: &mut Vec<TokenSet>,
|
||||
grammar: &SyntaxGrammar,
|
||||
inlines: &InlinedProductionMap,
|
||||
builder: &ParseItemSetBuilder,
|
||||
) {
|
||||
let productions = grammar
|
||||
.variables
|
||||
.iter()
|
||||
.flat_map(|v| &v.productions)
|
||||
.chain(&inlines.productions);
|
||||
for production in productions {
|
||||
for i in 1..production.steps.len() {
|
||||
let left_tokens = builder.last_set(&production.steps[i - 1].symbol);
|
||||
let right_tokens = builder.first_set(&production.steps[i].symbol);
|
||||
for left_token in left_tokens.iter() {
|
||||
if left_token.is_terminal() {
|
||||
result[left_token.index].insert_all_terminals(right_tokens);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn build_parse_table(
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
inlines: &InlinedProductionMap,
|
||||
state_ids_to_log: Vec<usize>,
|
||||
) -> Result<(ParseTable, Vec<TokenSet>)> {
|
||||
ParseTableBuilder {
|
||||
let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines);
|
||||
let mut following_tokens = vec![TokenSet::new(); lexical_grammar.variables.len()];
|
||||
populate_following_tokens(
|
||||
&mut following_tokens,
|
||||
syntax_grammar,
|
||||
inlines,
|
||||
&item_set_builder,
|
||||
);
|
||||
|
||||
let table = ParseTableBuilder {
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
state_ids_to_log,
|
||||
item_set_builder: ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines),
|
||||
item_set_builder,
|
||||
state_ids_by_item_set: HashMap::new(),
|
||||
item_sets_by_state_id: Vec::new(),
|
||||
parse_state_queue: VecDeque::new(),
|
||||
|
|
@ -725,7 +743,8 @@ pub(crate) fn build_parse_table(
|
|||
alias_sequences: Vec::new(),
|
||||
max_aliased_production_length: 0,
|
||||
},
|
||||
following_tokens: vec![TokenSet::new(); lexical_grammar.variables.len()],
|
||||
}
|
||||
.build()
|
||||
.build()?;
|
||||
|
||||
Ok((table, following_tokens))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -48,7 +48,11 @@ pub(crate) struct ParseItemDisplay<'a>(
|
|||
pub &'a LexicalGrammar,
|
||||
);
|
||||
|
||||
pub(crate) struct TokenSetDisplay<'a>(&'a TokenSet, &'a SyntaxGrammar, &'a LexicalGrammar);
|
||||
pub(crate) struct TokenSetDisplay<'a>(
|
||||
pub &'a TokenSet,
|
||||
pub &'a SyntaxGrammar,
|
||||
pub &'a LexicalGrammar,
|
||||
);
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub(crate) struct ParseItemSetDisplay<'a>(
|
||||
|
|
@ -134,30 +138,42 @@ impl TokenSet {
|
|||
vec.set(other.index, true);
|
||||
}
|
||||
|
||||
pub fn insert_all(&mut self, other: &TokenSet) -> bool {
|
||||
pub fn insert_all_terminals(&mut self, other: &TokenSet) -> bool {
|
||||
let mut result = false;
|
||||
if other.terminal_bits.len() > self.terminal_bits.len() {
|
||||
self.terminal_bits.resize(other.terminal_bits.len(), false);
|
||||
}
|
||||
if other.external_bits.len() > self.external_bits.len() {
|
||||
self.external_bits.resize(other.external_bits.len(), false);
|
||||
}
|
||||
for (i, element) in other.terminal_bits.iter().enumerate() {
|
||||
if element {
|
||||
result |= !self.terminal_bits[i];
|
||||
self.terminal_bits.set(i, element);
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn insert_all_externals(&mut self, other: &TokenSet) -> bool {
|
||||
let mut result = false;
|
||||
if other.external_bits.len() > self.external_bits.len() {
|
||||
self.external_bits.resize(other.external_bits.len(), false);
|
||||
}
|
||||
for (i, element) in other.external_bits.iter().enumerate() {
|
||||
if element {
|
||||
result |= !self.external_bits[i];
|
||||
self.external_bits.set(i, element);
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub fn insert_all(&mut self, other: &TokenSet) -> bool {
|
||||
let mut result = false;
|
||||
if other.eof {
|
||||
result |= !self.eof;
|
||||
self.eof = true;
|
||||
}
|
||||
result |= self.insert_all_terminals(other);
|
||||
result |= self.insert_all_externals(other);
|
||||
result
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
use super::item::{ParseItem, ParseItemDisplay, ParseItemSet, TokenSet};
|
||||
use super::item::{ParseItem, ParseItemDisplay, ParseItemSet, TokenSet, TokenSetDisplay};
|
||||
use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
|
||||
use crate::generate::rules::Symbol;
|
||||
use crate::generate::rules::{Symbol, SymbolType};
|
||||
use hashbrown::{HashMap, HashSet};
|
||||
use std::fmt;
|
||||
|
||||
|
|
@ -268,7 +268,7 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
}
|
||||
|
||||
pub fn last_set(&self, symbol: &Symbol) -> &TokenSet {
|
||||
&self.first_sets[symbol]
|
||||
&self.last_sets[symbol]
|
||||
}
|
||||
|
||||
fn add_item(&self, set: &mut ParseItemSet<'a>, item: ParseItem<'a>, lookaheads: &TokenSet) {
|
||||
|
|
@ -300,6 +300,40 @@ impl<'a> fmt::Debug for ParseItemSetBuilder<'a> {
|
|||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "ParseItemSetBuilder {{\n")?;
|
||||
|
||||
write!(f, " first_sets: {{\n")?;
|
||||
for (symbol, first_set) in &self.first_sets {
|
||||
let name = match symbol.kind {
|
||||
SymbolType::NonTerminal => &self.syntax_grammar.variables[symbol.index].name,
|
||||
SymbolType::External => &self.syntax_grammar.external_tokens[symbol.index].name,
|
||||
SymbolType::Terminal => &self.lexical_grammar.variables[symbol.index].name,
|
||||
SymbolType::End => "END",
|
||||
};
|
||||
write!(
|
||||
f,
|
||||
" first({:?}): {}\n",
|
||||
name,
|
||||
TokenSetDisplay(first_set, &self.syntax_grammar, &self.lexical_grammar)
|
||||
)?;
|
||||
}
|
||||
write!(f, " }}\n")?;
|
||||
|
||||
write!(f, " last_sets: {{\n")?;
|
||||
for (symbol, last_set) in &self.last_sets {
|
||||
let name = match symbol.kind {
|
||||
SymbolType::NonTerminal => &self.syntax_grammar.variables[symbol.index].name,
|
||||
SymbolType::External => &self.syntax_grammar.external_tokens[symbol.index].name,
|
||||
SymbolType::Terminal => &self.lexical_grammar.variables[symbol.index].name,
|
||||
SymbolType::End => "END",
|
||||
};
|
||||
write!(
|
||||
f,
|
||||
" last({:?}): {}\n",
|
||||
name,
|
||||
TokenSetDisplay(last_set, &self.syntax_grammar, &self.lexical_grammar)
|
||||
)?;
|
||||
}
|
||||
write!(f, " }}\n")?;
|
||||
|
||||
write!(f, " additions: {{\n")?;
|
||||
for (i, variable) in self.syntax_grammar.variables.iter().enumerate() {
|
||||
write!(f, " {}: {{\n", variable.name)?;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
use crate::generate::build_tables::item::TokenSet;
|
||||
use crate::generate::grammars::LexicalGrammar;
|
||||
use crate::generate::build_tables::item::{TokenSet, TokenSetDisplay};
|
||||
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
|
||||
use crate::generate::nfa::{CharacterSet, NfaCursor, NfaTransition};
|
||||
use hashbrown::HashSet;
|
||||
use std::cmp::Ordering;
|
||||
|
|
@ -16,6 +16,7 @@ struct TokenConflictStatus {
|
|||
pub(crate) struct TokenConflictMap<'a> {
|
||||
n: usize,
|
||||
status_matrix: Vec<TokenConflictStatus>,
|
||||
following_tokens: Vec<TokenSet>,
|
||||
starting_chars_by_index: Vec<CharacterSet>,
|
||||
following_chars_by_index: Vec<CharacterSet>,
|
||||
grammar: &'a LexicalGrammar,
|
||||
|
|
@ -25,7 +26,7 @@ impl<'a> TokenConflictMap<'a> {
|
|||
pub fn new(grammar: &'a LexicalGrammar, following_tokens: Vec<TokenSet>) -> Self {
|
||||
let mut cursor = NfaCursor::new(&grammar.nfa, Vec::new());
|
||||
let starting_chars = get_starting_chars(&mut cursor, grammar);
|
||||
let following_chars = get_following_chars(&starting_chars, following_tokens);
|
||||
let following_chars = get_following_chars(&starting_chars, &following_tokens);
|
||||
|
||||
let n = grammar.variables.len();
|
||||
let mut status_matrix = vec![TokenConflictStatus::default(); n * n];
|
||||
|
|
@ -40,6 +41,7 @@ impl<'a> TokenConflictMap<'a> {
|
|||
TokenConflictMap {
|
||||
n,
|
||||
status_matrix,
|
||||
following_tokens,
|
||||
starting_chars_by_index: starting_chars,
|
||||
following_chars_by_index: following_chars,
|
||||
grammar,
|
||||
|
|
@ -115,9 +117,27 @@ impl<'a> fmt::Debug for TokenConflictMap<'a> {
|
|||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "TokenConflictMap {{\n")?;
|
||||
|
||||
let syntax_grammar = SyntaxGrammar::default();
|
||||
|
||||
write!(f, " following_tokens: {{\n")?;
|
||||
for (i, following_tokens) in self.following_tokens.iter().enumerate() {
|
||||
write!(
|
||||
f,
|
||||
" follow({:?}): {},\n",
|
||||
self.grammar.variables[i].name,
|
||||
TokenSetDisplay(following_tokens, &syntax_grammar, &self.grammar)
|
||||
)?;
|
||||
}
|
||||
write!(f, " }},\n")?;
|
||||
|
||||
write!(f, " starting_characters: {{\n")?;
|
||||
for i in 0..self.n {
|
||||
write!(f, " {}: {:?},\n", i, self.starting_chars_by_index[i])?;
|
||||
write!(
|
||||
f,
|
||||
" {:?}: {:?},\n",
|
||||
self.grammar.variables[i].name,
|
||||
self.starting_chars_by_index[i]
|
||||
)?;
|
||||
}
|
||||
write!(f, " }},\n")?;
|
||||
|
||||
|
|
@ -169,10 +189,10 @@ fn get_starting_chars(cursor: &mut NfaCursor, grammar: &LexicalGrammar) -> Vec<C
|
|||
|
||||
fn get_following_chars(
|
||||
starting_chars: &Vec<CharacterSet>,
|
||||
following_tokens: Vec<TokenSet>,
|
||||
following_tokens: &Vec<TokenSet>,
|
||||
) -> Vec<CharacterSet> {
|
||||
following_tokens
|
||||
.into_iter()
|
||||
.iter()
|
||||
.map(|following_tokens| {
|
||||
let mut chars = CharacterSet::empty();
|
||||
for token in following_tokens.iter() {
|
||||
|
|
|
|||
|
|
@ -81,7 +81,7 @@ pub(crate) struct ExternalToken {
|
|||
pub corresponding_internal_token: Option<Symbol>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Default)]
|
||||
pub(crate) struct SyntaxGrammar {
|
||||
pub variables: Vec<SyntaxVariable>,
|
||||
pub extra_tokens: Vec<Symbol>,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue