Rename LookaheadSet -> TokenSet
Also, replace non-standard `with` method with a `FromIterator` implementation.
This commit is contained in:
parent
dd416b0955
commit
001f8c8f55
7 changed files with 165 additions and 150 deletions
|
|
@ -1,4 +1,4 @@
|
|||
use super::item::LookaheadSet;
|
||||
use super::item::TokenSet;
|
||||
use super::token_conflicts::TokenConflictMap;
|
||||
use crate::grammars::{LexicalGrammar, SyntaxGrammar};
|
||||
use crate::nfa::{CharacterSet, NfaCursor, NfaTransition};
|
||||
|
|
@ -11,7 +11,7 @@ pub(crate) fn build_lex_table(
|
|||
parse_table: &mut ParseTable,
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
keywords: &LookaheadSet,
|
||||
keywords: &TokenSet,
|
||||
minimize: bool,
|
||||
) -> (LexTable, LexTable) {
|
||||
let keyword_lex_table;
|
||||
|
|
@ -25,19 +25,23 @@ pub(crate) fn build_lex_table(
|
|||
|
||||
let mut builder = LexTableBuilder::new(lexical_grammar);
|
||||
for state in parse_table.states.iter_mut() {
|
||||
let tokens = LookaheadSet::with(state.terminal_entries.keys().filter_map(|token| {
|
||||
if token.is_terminal() {
|
||||
if keywords.contains(&token) {
|
||||
syntax_grammar.word_token
|
||||
} else {
|
||||
let tokens = state
|
||||
.terminal_entries
|
||||
.keys()
|
||||
.filter_map(|token| {
|
||||
if token.is_terminal() {
|
||||
if keywords.contains(&token) {
|
||||
syntax_grammar.word_token
|
||||
} else {
|
||||
Some(*token)
|
||||
}
|
||||
} else if token.is_eof() {
|
||||
Some(*token)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else if token.is_eof() {
|
||||
Some(*token)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}));
|
||||
})
|
||||
.collect();
|
||||
state.lex_state_id = builder.add_state_for_tokens(&tokens);
|
||||
}
|
||||
|
||||
|
|
@ -75,7 +79,7 @@ impl<'a> LexTableBuilder<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
fn add_state_for_tokens(&mut self, tokens: &LookaheadSet) -> usize {
|
||||
fn add_state_for_tokens(&mut self, tokens: &TokenSet) -> usize {
|
||||
let mut eof_valid = false;
|
||||
let nfa_states = tokens
|
||||
.iter()
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
use super::item::{LookaheadSet, ParseItem, ParseItemSet};
|
||||
use super::item::{ParseItem, ParseItemSet, TokenSet};
|
||||
use super::item_set_builder::ParseItemSetBuilder;
|
||||
use crate::error::{Error, Result};
|
||||
use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType};
|
||||
|
|
@ -38,12 +38,12 @@ struct ParseTableBuilder<'a> {
|
|||
item_sets_by_state_id: Vec<ParseItemSet<'a>>,
|
||||
parse_state_queue: VecDeque<ParseStateQueueEntry>,
|
||||
parse_table: ParseTable,
|
||||
following_tokens: Vec<LookaheadSet>,
|
||||
following_tokens: Vec<TokenSet>,
|
||||
state_ids_to_log: Vec<ParseStateId>,
|
||||
}
|
||||
|
||||
impl<'a> ParseTableBuilder<'a> {
|
||||
fn build(mut self) -> Result<(ParseTable, Vec<LookaheadSet>)> {
|
||||
fn build(mut self) -> Result<(ParseTable, Vec<TokenSet>)> {
|
||||
// Ensure that the empty alias sequence has index 0.
|
||||
self.parse_table.alias_sequences.push(Vec::new());
|
||||
|
||||
|
|
@ -57,7 +57,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
ParseItemSet::with(
|
||||
[(
|
||||
ParseItem::start(),
|
||||
LookaheadSet::with([Symbol::end()].iter().cloned()),
|
||||
[Symbol::end()].iter().cloned().collect(),
|
||||
)]
|
||||
.iter()
|
||||
.cloned(),
|
||||
|
|
@ -174,7 +174,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
.or_insert_with(|| ParseItemSet::default())
|
||||
.entries
|
||||
.entry(successor)
|
||||
.or_insert_with(|| LookaheadSet::new())
|
||||
.or_insert_with(|| TokenSet::new())
|
||||
.insert_all(lookaheads);
|
||||
} else {
|
||||
terminal_successors
|
||||
|
|
@ -182,7 +182,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
.or_insert_with(|| ParseItemSet::default())
|
||||
.entries
|
||||
.entry(successor)
|
||||
.or_insert_with(|| LookaheadSet::new())
|
||||
.or_insert_with(|| TokenSet::new())
|
||||
.insert_all(lookaheads);
|
||||
}
|
||||
} else {
|
||||
|
|
@ -714,7 +714,7 @@ pub(crate) fn build_parse_table(
|
|||
lexical_grammar: &LexicalGrammar,
|
||||
inlines: &InlinedProductionMap,
|
||||
state_ids_to_log: Vec<usize>,
|
||||
) -> Result<(ParseTable, Vec<LookaheadSet>)> {
|
||||
) -> Result<(ParseTable, Vec<TokenSet>)> {
|
||||
ParseTableBuilder {
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
|
|
@ -729,7 +729,7 @@ pub(crate) fn build_parse_table(
|
|||
alias_sequences: Vec::new(),
|
||||
max_aliased_production_length: 0,
|
||||
},
|
||||
following_tokens: vec![LookaheadSet::new(); lexical_grammar.variables.len()],
|
||||
following_tokens: vec![TokenSet::new(); lexical_grammar.variables.len()],
|
||||
}
|
||||
.build()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ use std::cmp::Ordering;
|
|||
use std::collections::BTreeMap;
|
||||
use std::fmt;
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::iter::FromIterator;
|
||||
use std::u32;
|
||||
|
||||
lazy_static! {
|
||||
|
|
@ -24,7 +25,7 @@ lazy_static! {
|
|||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub(crate) struct LookaheadSet {
|
||||
pub(crate) struct TokenSet {
|
||||
terminal_bits: SmallBitVec,
|
||||
external_bits: SmallBitVec,
|
||||
eof: bool,
|
||||
|
|
@ -39,7 +40,7 @@ pub(crate) struct ParseItem<'a> {
|
|||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct ParseItemSet<'a> {
|
||||
pub entries: BTreeMap<ParseItem<'a>, LookaheadSet>,
|
||||
pub entries: BTreeMap<ParseItem<'a>, TokenSet>,
|
||||
}
|
||||
|
||||
pub(crate) struct ParseItemDisplay<'a>(
|
||||
|
|
@ -48,7 +49,7 @@ pub(crate) struct ParseItemDisplay<'a>(
|
|||
pub &'a LexicalGrammar,
|
||||
);
|
||||
|
||||
pub(crate) struct LookaheadSetDisplay<'a>(&'a LookaheadSet, &'a SyntaxGrammar, &'a LexicalGrammar);
|
||||
pub(crate) struct TokenSetDisplay<'a>(&'a TokenSet, &'a SyntaxGrammar, &'a LexicalGrammar);
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub(crate) struct ParseItemSetDisplay<'a>(
|
||||
|
|
@ -57,7 +58,7 @@ pub(crate) struct ParseItemSetDisplay<'a>(
|
|||
pub &'a LexicalGrammar,
|
||||
);
|
||||
|
||||
impl LookaheadSet {
|
||||
impl TokenSet {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
terminal_bits: SmallBitVec::new(),
|
||||
|
|
@ -92,17 +93,9 @@ impl LookaheadSet {
|
|||
.chain(if self.eof { Some(Symbol::end()) } else { None })
|
||||
}
|
||||
|
||||
pub fn with(symbols: impl IntoIterator<Item = Symbol>) -> Self {
|
||||
let mut result = Self::new();
|
||||
for symbol in symbols {
|
||||
result.insert(symbol);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub fn contains(&self, symbol: &Symbol) -> bool {
|
||||
match symbol.kind {
|
||||
SymbolType::NonTerminal => panic!("Cannot store non-terminals in a LookaheadSet"),
|
||||
SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
|
||||
SymbolType::Terminal => self.terminal_bits.get(symbol.index).unwrap_or(false),
|
||||
SymbolType::External => self.external_bits.get(symbol.index).unwrap_or(false),
|
||||
SymbolType::End => self.eof,
|
||||
|
|
@ -111,7 +104,7 @@ impl LookaheadSet {
|
|||
|
||||
pub fn insert(&mut self, other: Symbol) {
|
||||
let vec = match other.kind {
|
||||
SymbolType::NonTerminal => panic!("Cannot store non-terminals in a LookaheadSet"),
|
||||
SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
|
||||
SymbolType::Terminal => &mut self.terminal_bits,
|
||||
SymbolType::External => &mut self.external_bits,
|
||||
SymbolType::End => {
|
||||
|
|
@ -125,7 +118,7 @@ impl LookaheadSet {
|
|||
vec.set(other.index, true);
|
||||
}
|
||||
|
||||
pub fn insert_all(&mut self, other: &LookaheadSet) -> bool {
|
||||
pub fn insert_all(&mut self, other: &TokenSet) -> bool {
|
||||
let mut result = false;
|
||||
if other.terminal_bits.len() > self.terminal_bits.len() {
|
||||
self.terminal_bits.resize(other.terminal_bits.len(), false);
|
||||
|
|
@ -153,6 +146,16 @@ impl LookaheadSet {
|
|||
}
|
||||
}
|
||||
|
||||
impl FromIterator<Symbol> for TokenSet {
|
||||
fn from_iter<T: IntoIterator<Item = Symbol>>(iter: T) -> Self {
|
||||
let mut result = Self::new();
|
||||
for symbol in iter {
|
||||
result.insert(symbol);
|
||||
}
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ParseItem<'a> {
|
||||
pub fn start() -> Self {
|
||||
ParseItem {
|
||||
|
|
@ -204,7 +207,7 @@ impl<'a> ParseItem<'a> {
|
|||
}
|
||||
|
||||
impl<'a> ParseItemSet<'a> {
|
||||
pub fn with(elements: impl IntoIterator<Item = (ParseItem<'a>, LookaheadSet)>) -> Self {
|
||||
pub fn with(elements: impl IntoIterator<Item = (ParseItem<'a>, TokenSet)>) -> Self {
|
||||
let mut result = Self::default();
|
||||
for (item, lookaheads) in elements {
|
||||
result.entries.insert(item, lookaheads);
|
||||
|
|
@ -296,7 +299,7 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'a> fmt::Display for LookaheadSetDisplay<'a> {
|
||||
impl<'a> fmt::Display for TokenSetDisplay<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||
write!(f, "[")?;
|
||||
for (i, symbol) in self.0.iter().enumerate() {
|
||||
|
|
@ -328,7 +331,7 @@ impl<'a> fmt::Display for ParseItemSetDisplay<'a> {
|
|||
f,
|
||||
"{}\t{}",
|
||||
ParseItemDisplay(item, self.1, self.2),
|
||||
LookaheadSetDisplay(lookaheads, self.1, self.2)
|
||||
TokenSetDisplay(lookaheads, self.1, self.2)
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
use super::item::{LookaheadSet, ParseItem, ParseItemDisplay, ParseItemSet};
|
||||
use super::item::{ParseItem, ParseItemDisplay, ParseItemSet, TokenSet};
|
||||
use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
|
||||
use crate::rules::Symbol;
|
||||
use hashbrown::{HashMap, HashSet};
|
||||
|
|
@ -12,15 +12,15 @@ struct TransitiveClosureAddition<'a> {
|
|||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
struct FollowSetInfo {
|
||||
lookaheads: LookaheadSet,
|
||||
lookaheads: TokenSet,
|
||||
propagates_lookaheads: bool,
|
||||
}
|
||||
|
||||
pub(crate) struct ParseItemSetBuilder<'a> {
|
||||
syntax_grammar: &'a SyntaxGrammar,
|
||||
lexical_grammar: &'a LexicalGrammar,
|
||||
first_sets: HashMap<Symbol, LookaheadSet>,
|
||||
last_sets: HashMap<Symbol, LookaheadSet>,
|
||||
first_sets: HashMap<Symbol, TokenSet>,
|
||||
last_sets: HashMap<Symbol, TokenSet>,
|
||||
inlines: &'a InlinedProductionMap,
|
||||
transitive_closure_additions: Vec<Vec<TransitiveClosureAddition<'a>>>,
|
||||
}
|
||||
|
|
@ -54,7 +54,7 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
// terminal itself.
|
||||
for i in 0..lexical_grammar.variables.len() {
|
||||
let symbol = Symbol::terminal(i);
|
||||
let mut set = LookaheadSet::new();
|
||||
let mut set = TokenSet::new();
|
||||
set.insert(symbol);
|
||||
result.first_sets.insert(symbol, set.clone());
|
||||
result.last_sets.insert(symbol, set);
|
||||
|
|
@ -62,7 +62,7 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
|
||||
for i in 0..syntax_grammar.external_tokens.len() {
|
||||
let symbol = Symbol::external(i);
|
||||
let mut set = LookaheadSet::new();
|
||||
let mut set = TokenSet::new();
|
||||
set.insert(symbol);
|
||||
result.first_sets.insert(symbol, set.clone());
|
||||
result.last_sets.insert(symbol, set);
|
||||
|
|
@ -80,10 +80,7 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
for i in 0..syntax_grammar.variables.len() {
|
||||
let symbol = Symbol::non_terminal(i);
|
||||
|
||||
let first_set = &mut result
|
||||
.first_sets
|
||||
.entry(symbol)
|
||||
.or_insert(LookaheadSet::new());
|
||||
let first_set = &mut result.first_sets.entry(symbol).or_insert(TokenSet::new());
|
||||
processed_non_terminals.clear();
|
||||
symbols_to_process.clear();
|
||||
symbols_to_process.push(symbol);
|
||||
|
|
@ -103,10 +100,7 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
}
|
||||
|
||||
// The LAST set is defined in a similar way to the FIRST set.
|
||||
let last_set = &mut result
|
||||
.last_sets
|
||||
.entry(symbol)
|
||||
.or_insert(LookaheadSet::new());
|
||||
let last_set = &mut result.last_sets.entry(symbol).or_insert(TokenSet::new());
|
||||
processed_non_terminals.clear();
|
||||
symbols_to_process.clear();
|
||||
symbols_to_process.push(symbol);
|
||||
|
|
@ -148,7 +142,7 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
// Again, rather than computing these additions recursively, we use an explicit
|
||||
// stack called `entries_to_process`.
|
||||
for i in 0..syntax_grammar.variables.len() {
|
||||
let empty_lookaheads = LookaheadSet::new();
|
||||
let empty_lookaheads = TokenSet::new();
|
||||
let mut entries_to_process = vec![(i, &empty_lookaheads, true)];
|
||||
|
||||
// First, build up a map whose keys are all of the non-terminals that can
|
||||
|
|
@ -160,7 +154,7 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
let existing_info = follow_set_info_by_non_terminal
|
||||
.entry(variable_index)
|
||||
.or_insert_with(|| FollowSetInfo {
|
||||
lookaheads: LookaheadSet::new(),
|
||||
lookaheads: TokenSet::new(),
|
||||
propagates_lookaheads: false,
|
||||
});
|
||||
|
||||
|
|
@ -269,15 +263,15 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
result
|
||||
}
|
||||
|
||||
pub fn first_set(&self, symbol: &Symbol) -> &LookaheadSet {
|
||||
pub fn first_set(&self, symbol: &Symbol) -> &TokenSet {
|
||||
&self.first_sets[symbol]
|
||||
}
|
||||
|
||||
pub fn last_set(&self, symbol: &Symbol) -> &LookaheadSet {
|
||||
pub fn last_set(&self, symbol: &Symbol) -> &TokenSet {
|
||||
&self.first_sets[symbol]
|
||||
}
|
||||
|
||||
fn add_item(&self, set: &mut ParseItemSet<'a>, item: ParseItem<'a>, lookaheads: &LookaheadSet) {
|
||||
fn add_item(&self, set: &mut ParseItemSet<'a>, item: ParseItem<'a>, lookaheads: &TokenSet) {
|
||||
if let Some(step) = item.step() {
|
||||
if step.symbol.is_non_terminal() {
|
||||
let next_step = item.successor().step();
|
||||
|
|
@ -294,7 +288,7 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
let lookaheads = set
|
||||
.entries
|
||||
.entry(addition.item)
|
||||
.or_insert_with(|| LookaheadSet::new());
|
||||
.or_insert_with(|| TokenSet::new());
|
||||
lookaheads.insert_all(&addition.info.lookaheads);
|
||||
if addition.info.propagates_lookaheads {
|
||||
lookaheads.insert_all(following_tokens);
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
use super::item::LookaheadSet;
|
||||
use super::item::TokenSet;
|
||||
use super::token_conflicts::TokenConflictMap;
|
||||
use crate::grammars::{SyntaxGrammar, VariableType};
|
||||
use crate::rules::{AliasMap, Symbol};
|
||||
|
|
@ -10,7 +10,7 @@ pub(crate) fn minimize_parse_table(
|
|||
syntax_grammar: &SyntaxGrammar,
|
||||
simple_aliases: &AliasMap,
|
||||
token_conflict_map: &TokenConflictMap,
|
||||
keywords: &LookaheadSet,
|
||||
keywords: &TokenSet,
|
||||
) {
|
||||
let mut minimizer = Minimizer {
|
||||
parse_table,
|
||||
|
|
@ -28,7 +28,7 @@ struct Minimizer<'a> {
|
|||
parse_table: &'a mut ParseTable,
|
||||
syntax_grammar: &'a SyntaxGrammar,
|
||||
token_conflict_map: &'a TokenConflictMap<'a>,
|
||||
keywords: &'a LookaheadSet,
|
||||
keywords: &'a TokenSet,
|
||||
simple_aliases: &'a AliasMap,
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ mod token_conflicts;
|
|||
use self::build_lex_table::build_lex_table;
|
||||
use self::build_parse_table::build_parse_table;
|
||||
use self::coincident_tokens::CoincidentTokenIndex;
|
||||
use self::item::LookaheadSet;
|
||||
use self::item::TokenSet;
|
||||
use self::minimize_parse_table::minimize_parse_table;
|
||||
use self::token_conflicts::TokenConflictMap;
|
||||
use crate::error::Result;
|
||||
|
|
@ -44,11 +44,7 @@ pub(crate) fn build_tables(
|
|||
&coincident_token_index,
|
||||
&token_conflict_map,
|
||||
);
|
||||
mark_fragile_tokens(
|
||||
&mut parse_table,
|
||||
lexical_grammar,
|
||||
&token_conflict_map,
|
||||
);
|
||||
mark_fragile_tokens(&mut parse_table, lexical_grammar, &token_conflict_map);
|
||||
if minimize {
|
||||
minimize_parse_table(
|
||||
&mut parse_table,
|
||||
|
|
@ -85,22 +81,25 @@ fn populate_error_state(
|
|||
|
||||
// First identify the *conflict-free tokens*: tokens that do not overlap with
|
||||
// any other token in any way.
|
||||
let conflict_free_tokens = LookaheadSet::with((0..n).into_iter().filter_map(|i| {
|
||||
let conflicts_with_other_tokens = (0..n).into_iter().any(|j| {
|
||||
j != i
|
||||
&& !coincident_token_index.contains(Symbol::terminal(i), Symbol::terminal(j))
|
||||
&& token_conflict_map.does_conflict(i, j)
|
||||
});
|
||||
if conflicts_with_other_tokens {
|
||||
None
|
||||
} else {
|
||||
info!(
|
||||
"error recovery - token {} has no conflicts",
|
||||
lexical_grammar.variables[i].name
|
||||
);
|
||||
Some(Symbol::terminal(i))
|
||||
}
|
||||
}));
|
||||
let conflict_free_tokens: TokenSet = (0..n)
|
||||
.into_iter()
|
||||
.filter_map(|i| {
|
||||
let conflicts_with_other_tokens = (0..n).into_iter().any(|j| {
|
||||
j != i
|
||||
&& !coincident_token_index.contains(Symbol::terminal(i), Symbol::terminal(j))
|
||||
&& token_conflict_map.does_conflict(i, j)
|
||||
});
|
||||
if conflicts_with_other_tokens {
|
||||
None
|
||||
} else {
|
||||
info!(
|
||||
"error recovery - token {} has no conflicts",
|
||||
lexical_grammar.variables[i].name
|
||||
);
|
||||
Some(Symbol::terminal(i))
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
let recover_entry = ParseTableEntry {
|
||||
reusable: false,
|
||||
|
|
@ -153,9 +152,9 @@ fn identify_keywords(
|
|||
word_token: Option<Symbol>,
|
||||
token_conflict_map: &TokenConflictMap,
|
||||
coincident_token_index: &CoincidentTokenIndex,
|
||||
) -> LookaheadSet {
|
||||
) -> TokenSet {
|
||||
if word_token.is_none() {
|
||||
return LookaheadSet::new();
|
||||
return TokenSet::new();
|
||||
}
|
||||
|
||||
let word_token = word_token.unwrap();
|
||||
|
|
@ -163,8 +162,11 @@ fn identify_keywords(
|
|||
|
||||
// First find all of the candidate keyword tokens: tokens that start with
|
||||
// letters or underscore and can match the same string as a word token.
|
||||
let keywords = LookaheadSet::with(lexical_grammar.variables.iter().enumerate().filter_map(
|
||||
|(i, variable)| {
|
||||
let keywords: TokenSet = lexical_grammar
|
||||
.variables
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, variable)| {
|
||||
cursor.reset(vec![variable.start_state]);
|
||||
if all_chars_are_alphabetical(&cursor)
|
||||
&& token_conflict_map.does_match_same_string(i, word_token.index)
|
||||
|
|
@ -177,69 +179,75 @@ fn identify_keywords(
|
|||
} else {
|
||||
None
|
||||
}
|
||||
},
|
||||
));
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Exclude keyword candidates that shadow another keyword candidate.
|
||||
let keywords = LookaheadSet::with(keywords.iter().filter(|token| {
|
||||
for other_token in keywords.iter() {
|
||||
if other_token != *token
|
||||
&& token_conflict_map.does_match_same_string(token.index, other_token.index)
|
||||
{
|
||||
info!(
|
||||
"Keywords - exclude {} because it matches the same string as {}",
|
||||
lexical_grammar.variables[token.index].name,
|
||||
lexical_grammar.variables[other_token.index].name
|
||||
);
|
||||
return false;
|
||||
let keywords: TokenSet = keywords
|
||||
.iter()
|
||||
.filter(|token| {
|
||||
for other_token in keywords.iter() {
|
||||
if other_token != *token
|
||||
&& token_conflict_map.does_match_same_string(token.index, other_token.index)
|
||||
{
|
||||
info!(
|
||||
"Keywords - exclude {} because it matches the same string as {}",
|
||||
lexical_grammar.variables[token.index].name,
|
||||
lexical_grammar.variables[other_token.index].name
|
||||
);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
true
|
||||
}));
|
||||
true
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Exclude keyword candidates for which substituting the keyword capture
|
||||
// token would introduce new lexical conflicts with other tokens.
|
||||
let keywords = LookaheadSet::with(keywords.iter().filter(|token| {
|
||||
for other_index in 0..lexical_grammar.variables.len() {
|
||||
if keywords.contains(&Symbol::terminal(other_index)) {
|
||||
continue;
|
||||
let keywords = keywords
|
||||
.iter()
|
||||
.filter(|token| {
|
||||
for other_index in 0..lexical_grammar.variables.len() {
|
||||
if keywords.contains(&Symbol::terminal(other_index)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// If the word token was already valid in every state containing
|
||||
// this keyword candidate, then substituting the word token won't
|
||||
// introduce any new lexical conflicts.
|
||||
if coincident_token_index
|
||||
.states_with(*token, Symbol::terminal(other_index))
|
||||
.iter()
|
||||
.all(|state_id| {
|
||||
parse_table.states[*state_id]
|
||||
.terminal_entries
|
||||
.contains_key(&word_token)
|
||||
})
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if !token_conflict_map.has_same_conflict_status(
|
||||
token.index,
|
||||
word_token.index,
|
||||
other_index,
|
||||
) {
|
||||
info!(
|
||||
"Keywords - exclude {} because of conflict with {}",
|
||||
lexical_grammar.variables[token.index].name,
|
||||
lexical_grammar.variables[other_index].name
|
||||
);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// If the word token was already valid in every state containing
|
||||
// this keyword candidate, then substituting the word token won't
|
||||
// introduce any new lexical conflicts.
|
||||
if coincident_token_index
|
||||
.states_with(*token, Symbol::terminal(other_index))
|
||||
.iter()
|
||||
.all(|state_id| {
|
||||
parse_table.states[*state_id]
|
||||
.terminal_entries
|
||||
.contains_key(&word_token)
|
||||
})
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if !token_conflict_map.has_same_conflict_status(
|
||||
token.index,
|
||||
word_token.index,
|
||||
other_index,
|
||||
) {
|
||||
info!(
|
||||
"Keywords - exclude {} because of conflict with {}",
|
||||
lexical_grammar.variables[token.index].name,
|
||||
lexical_grammar.variables[other_index].name
|
||||
);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
info!(
|
||||
"Keywords - include {}",
|
||||
lexical_grammar.variables[token.index].name,
|
||||
);
|
||||
true
|
||||
}));
|
||||
info!(
|
||||
"Keywords - include {}",
|
||||
lexical_grammar.variables[token.index].name,
|
||||
);
|
||||
true
|
||||
})
|
||||
.collect();
|
||||
|
||||
keywords
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
use crate::build_tables::item::LookaheadSet;
|
||||
use crate::build_tables::item::TokenSet;
|
||||
use crate::grammars::LexicalGrammar;
|
||||
use crate::nfa::{CharacterSet, NfaCursor, NfaTransition};
|
||||
use hashbrown::HashSet;
|
||||
|
|
@ -22,7 +22,7 @@ pub(crate) struct TokenConflictMap<'a> {
|
|||
}
|
||||
|
||||
impl<'a> TokenConflictMap<'a> {
|
||||
pub fn new(grammar: &'a LexicalGrammar, following_tokens: Vec<LookaheadSet>) -> Self {
|
||||
pub fn new(grammar: &'a LexicalGrammar, following_tokens: Vec<TokenSet>) -> Self {
|
||||
let mut cursor = NfaCursor::new(&grammar.nfa, Vec::new());
|
||||
let starting_chars = get_starting_chars(&mut cursor, grammar);
|
||||
let following_chars = get_following_chars(&starting_chars, following_tokens);
|
||||
|
|
@ -141,7 +141,7 @@ fn get_starting_chars(cursor: &mut NfaCursor, grammar: &LexicalGrammar) -> Vec<C
|
|||
|
||||
fn get_following_chars(
|
||||
starting_chars: &Vec<CharacterSet>,
|
||||
following_tokens: Vec<LookaheadSet>,
|
||||
following_tokens: Vec<TokenSet>,
|
||||
) -> Vec<CharacterSet> {
|
||||
following_tokens
|
||||
.into_iter()
|
||||
|
|
@ -352,9 +352,15 @@ mod tests {
|
|||
let token_map = TokenConflictMap::new(
|
||||
&grammar,
|
||||
vec![
|
||||
LookaheadSet::with([Symbol::terminal(var("identifier"))].iter().cloned()),
|
||||
LookaheadSet::with([Symbol::terminal(var("in"))].iter().cloned()),
|
||||
LookaheadSet::with([Symbol::terminal(var("identifier"))].iter().cloned()),
|
||||
[Symbol::terminal(var("identifier"))]
|
||||
.iter()
|
||||
.cloned()
|
||||
.collect(),
|
||||
[Symbol::terminal(var("in"))].iter().cloned().collect(),
|
||||
[Symbol::terminal(var("identifier"))]
|
||||
.iter()
|
||||
.cloned()
|
||||
.collect(),
|
||||
],
|
||||
);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue