Fix various logic errors in parse table construction
This commit is contained in:
parent
9824ebbbc3
commit
3fbaff5e69
21 changed files with 297 additions and 115 deletions
18
Cargo.lock
generated
18
Cargo.lock
generated
|
|
@ -76,6 +76,11 @@ dependencies = [
|
|||
"constant_time_eq 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
version = "1.2.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.25"
|
||||
|
|
@ -212,6 +217,15 @@ dependencies = [
|
|||
"regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"byteorder 1.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ignore"
|
||||
version = "0.4.4"
|
||||
|
|
@ -463,9 +477,11 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"dirs 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"hashbrown 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"ignore 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rusqlite 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
|
@ -737,6 +753,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
"checksum backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)" = "c66d56ac8dabd07f6aacdaf633f4b8262f5b3601a810a0dcddffd5c22c69daa0"
|
||||
"checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12"
|
||||
"checksum blake2-rfc 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)" = "5d6d530bdd2d52966a6d03b7a964add7ae1a288d25214066fd4b600f0f796400"
|
||||
"checksum byteorder 1.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "94f88df23a25417badc922ab0f5716cc1330e87f71ddd9203b3a3ccd9cedf75d"
|
||||
"checksum cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)" = "f159dfd43363c4d08055a07703eb7a3406b0dac4d0584d96965a3262db3c9d16"
|
||||
"checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4"
|
||||
"checksum clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b957d88f4b6a63b9d70d5f454ac8011819c6efa7727858f458ab71c756ce2d3e"
|
||||
|
|
@ -753,6 +770,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
"checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82"
|
||||
"checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
|
||||
"checksum globset 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4743617a7464bbda3c8aec8558ff2f9429047e025771037df561d383337ff865"
|
||||
"checksum hashbrown 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "64b7d419d0622ae02fe5da6b9a5e1964b610a65bb37923b976aeebb6dbb8f86e"
|
||||
"checksum ignore 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "36ecfc5ad80f0b1226df948c562e2cddd446096be3f644c95106400eae8a5e01"
|
||||
"checksum indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7e81a7c05f79578dbc15793d8b619db9ba32b4577003ef3af1a91c416798c58d"
|
||||
"checksum itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1306f3464951f30e30d12373d31c79fbd52d236e5e896fd92f96ec7babbbe60b"
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ lazy_static = "1.2.0"
|
|||
smallbitvec = "2.3.0"
|
||||
clap = "2.32"
|
||||
dirs = "1.0.2"
|
||||
hashbrown = "0.1"
|
||||
ignore = "0.4.4"
|
||||
libloading = "0.5"
|
||||
rusqlite = "0.14.0"
|
||||
|
|
@ -20,3 +21,7 @@ regex-syntax = "0.6.4"
|
|||
[dependencies.serde_json]
|
||||
version = "1.0"
|
||||
features = ["preserve_order"]
|
||||
|
||||
[dependencies.log]
|
||||
version = "0.4.6"
|
||||
features = ["std"]
|
||||
|
|
|
|||
|
|
@ -2,10 +2,9 @@ use super::item::LookaheadSet;
|
|||
use super::token_conflicts::TokenConflictMap;
|
||||
use crate::grammars::{LexicalGrammar, SyntaxGrammar};
|
||||
use crate::nfa::NfaCursor;
|
||||
use crate::rules::Symbol;
|
||||
use crate::tables::{AdvanceAction, LexState, LexTable, ParseTable};
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use std::collections::{BTreeMap, HashMap, VecDeque};
|
||||
|
||||
pub(crate) fn build_lex_table(
|
||||
parse_table: &mut ParseTable,
|
||||
|
|
@ -16,15 +15,16 @@ pub(crate) fn build_lex_table(
|
|||
let keyword_lex_table;
|
||||
if syntax_grammar.word_token.is_some() {
|
||||
let mut builder = LexTableBuilder::new(lexical_grammar);
|
||||
builder.add_state_for_tokens(keywords.iter());
|
||||
builder.add_state_for_tokens(keywords);
|
||||
keyword_lex_table = builder.table;
|
||||
} else {
|
||||
keyword_lex_table = LexTable::default();
|
||||
}
|
||||
|
||||
let mut builder = LexTableBuilder::new(lexical_grammar);
|
||||
for state in parse_table.states.iter_mut() {
|
||||
let tokens = state.terminal_entries.keys().filter_map(|token| {
|
||||
for (i, state) in parse_table.states.iter_mut().enumerate() {
|
||||
info!("populate lex state for parse state {}", i);
|
||||
let tokens = LookaheadSet::with(state.terminal_entries.keys().filter_map(|token| {
|
||||
if token.is_terminal() {
|
||||
if keywords.contains(&token) {
|
||||
syntax_grammar.word_token
|
||||
|
|
@ -34,11 +34,14 @@ pub(crate) fn build_lex_table(
|
|||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
state.lex_state_id = builder.add_state_for_tokens(tokens);
|
||||
}));
|
||||
state.lex_state_id = builder.add_state_for_tokens(&tokens);
|
||||
}
|
||||
|
||||
(builder.table, keyword_lex_table)
|
||||
let mut table = builder.table;
|
||||
shrink_lex_table(&mut table, parse_table);
|
||||
|
||||
(table, keyword_lex_table)
|
||||
}
|
||||
|
||||
struct LexTableBuilder<'a> {
|
||||
|
|
@ -60,32 +63,49 @@ impl<'a> LexTableBuilder<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
fn add_state_for_tokens(&mut self, tokens: impl Iterator<Item = Symbol>) -> usize {
|
||||
fn add_state_for_tokens(&mut self, tokens: &LookaheadSet) -> usize {
|
||||
let nfa_states = tokens
|
||||
.iter()
|
||||
.map(|token| self.lexical_grammar.variables[token.index].start_state)
|
||||
.collect();
|
||||
let result = self.add_state(nfa_states);
|
||||
while let Some((state_id, nfa_states)) = self.state_queue.pop_front() {
|
||||
let (state_id, is_new) = self.add_state(nfa_states);
|
||||
|
||||
if is_new {
|
||||
info!(
|
||||
"entry point state: {}, tokens: {:?}",
|
||||
state_id,
|
||||
tokens
|
||||
.iter()
|
||||
.map(|t| &self.lexical_grammar.variables[t.index].name)
|
||||
.collect::<Vec<_>>()
|
||||
);
|
||||
}
|
||||
|
||||
while let Some((state_id, nfa_states)) = self.state_queue.pop_back() {
|
||||
self.populate_state(state_id, nfa_states);
|
||||
}
|
||||
result
|
||||
state_id
|
||||
}
|
||||
|
||||
fn add_state(&mut self, nfa_states: Vec<u32>) -> usize {
|
||||
match self.state_ids_by_nfa_state_set.entry(nfa_states) {
|
||||
Entry::Occupied(o) => *o.get(),
|
||||
fn add_state(&mut self, nfa_states: Vec<u32>) -> (usize, bool) {
|
||||
self.cursor.reset(nfa_states);
|
||||
match self
|
||||
.state_ids_by_nfa_state_set
|
||||
.entry(self.cursor.state_ids.clone())
|
||||
{
|
||||
Entry::Occupied(o) => (*o.get(), false),
|
||||
Entry::Vacant(v) => {
|
||||
let state_id = self.table.states.len();
|
||||
self.table.states.push(LexState::default());
|
||||
self.state_queue.push_back((state_id, v.key().clone()));
|
||||
v.insert(state_id);
|
||||
state_id
|
||||
(state_id, true)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn populate_state(&mut self, state_id: usize, nfa_states: Vec<u32>) {
|
||||
self.cursor.reset(nfa_states);
|
||||
self.cursor.force_reset(nfa_states);
|
||||
|
||||
let mut completion = None;
|
||||
for (id, prec) in self.cursor.completions() {
|
||||
|
|
@ -102,12 +122,16 @@ impl<'a> LexTableBuilder<'a> {
|
|||
}
|
||||
|
||||
for (chars, advance_precedence, next_states, is_sep) in self.cursor.grouped_successors() {
|
||||
info!(
|
||||
"populate state: {}, characters: {:?}, precedence: {:?}",
|
||||
state_id, chars, advance_precedence
|
||||
);
|
||||
if let Some((_, completed_precedence)) = completion {
|
||||
if advance_precedence < completed_precedence {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
let next_state_id = self.add_state(next_states);
|
||||
let (next_state_id, _) = self.add_state(next_states);
|
||||
self.table.states[state_id].advance_actions.push((
|
||||
chars,
|
||||
AdvanceAction {
|
||||
|
|
@ -122,3 +146,59 @@ impl<'a> LexTableBuilder<'a> {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn shrink_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
|
||||
let mut state_replacements = BTreeMap::new();
|
||||
let mut done = false;
|
||||
while !done {
|
||||
done = true;
|
||||
for (i, state_i) in table.states.iter().enumerate() {
|
||||
if state_replacements.contains_key(&i) {
|
||||
continue;
|
||||
}
|
||||
for (j, state_j) in table.states.iter().enumerate() {
|
||||
if state_replacements.contains_key(&j) {
|
||||
continue;
|
||||
}
|
||||
if j == i {
|
||||
break;
|
||||
}
|
||||
if state_i == state_j {
|
||||
info!("replace state {} with state {}", i, j);
|
||||
state_replacements.insert(i, j);
|
||||
done = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
for state in table.states.iter_mut() {
|
||||
for advance_action in state.advance_actions.iter_mut() {
|
||||
if let Some(new_state_id) = state_replacements.get(&advance_action.1.state) {
|
||||
advance_action.1.state = *new_state_id;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let final_state_replacements = (0..table.states.len()).into_iter().map(|state_id| {
|
||||
let replacement = state_replacements.get(&state_id).cloned().unwrap_or(state_id);
|
||||
let prior_removed = state_replacements.iter().take_while(|i| *i.0 < replacement).count();
|
||||
replacement - prior_removed
|
||||
}).collect::<Vec<_>>();
|
||||
|
||||
for state in parse_table.states.iter_mut() {
|
||||
state.lex_state_id = final_state_replacements[state.lex_state_id];
|
||||
}
|
||||
|
||||
for state in table.states.iter_mut() {
|
||||
for advance_action in state.advance_actions.iter_mut() {
|
||||
advance_action.1.state = final_state_replacements[advance_action.1.state];
|
||||
}
|
||||
}
|
||||
|
||||
let mut i = 0;
|
||||
table.states.retain(|_| {
|
||||
let result = !state_replacements.contains_key(&i);
|
||||
i += 1;
|
||||
result
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,8 +7,11 @@ use crate::tables::{
|
|||
AliasSequenceId, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
|
||||
};
|
||||
use core::ops::Range;
|
||||
use std::collections::hash_map::{DefaultHasher, Entry};
|
||||
use std::collections::{HashMap, HashSet, VecDeque};
|
||||
use hashbrown::hash_map::Entry;
|
||||
use hashbrown::{HashMap, HashSet};
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::collections::VecDeque;
|
||||
|
||||
use std::fmt::Write;
|
||||
use std::hash::Hasher;
|
||||
|
||||
|
|
@ -43,9 +46,10 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
// Ensure that the empty alias sequence has index 0.
|
||||
self.parse_table.alias_sequences.push(Vec::new());
|
||||
|
||||
// Ensure that the error state has index 0.
|
||||
// Add the error state at index 0.
|
||||
self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
|
||||
|
||||
// Add the starting state at index 1.
|
||||
self.add_parse_state(
|
||||
&Vec::new(),
|
||||
&Vec::new(),
|
||||
|
|
@ -61,6 +65,8 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
|
||||
self.process_part_state_queue()?;
|
||||
self.populate_used_symbols();
|
||||
self.remove_precedences();
|
||||
|
||||
Ok((self.parse_table, self.following_tokens))
|
||||
}
|
||||
|
||||
|
|
@ -112,28 +118,9 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
|
||||
fn process_part_state_queue(&mut self) -> Result<()> {
|
||||
while let Some(entry) = self.parse_state_queue.pop_front() {
|
||||
let debug = false;
|
||||
|
||||
if debug {
|
||||
println!(
|
||||
"ITEM SET {}:\n{}",
|
||||
entry.state_id,
|
||||
self.item_sets_by_state_id[entry.state_id]
|
||||
.display_with(&self.syntax_grammar, &self.lexical_grammar,)
|
||||
);
|
||||
}
|
||||
|
||||
let item_set = self
|
||||
.item_set_builder
|
||||
.transitive_closure(&self.item_sets_by_state_id[entry.state_id]);
|
||||
|
||||
if debug {
|
||||
println!(
|
||||
"TRANSITIVE CLOSURE:\n{}",
|
||||
item_set.display_with(&self.syntax_grammar, &self.lexical_grammar)
|
||||
);
|
||||
}
|
||||
|
||||
self.add_actions(
|
||||
entry.preceding_symbols,
|
||||
entry.preceding_auxiliary_symbols,
|
||||
|
|
@ -527,6 +514,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
}
|
||||
|
||||
fn populate_used_symbols(&mut self) {
|
||||
self.parse_table.symbols.push(Symbol::end());
|
||||
let mut terminal_usages = vec![false; self.lexical_grammar.variables.len()];
|
||||
let mut non_terminal_usages = vec![false; self.syntax_grammar.variables.len()];
|
||||
let mut external_usages = vec![false; self.syntax_grammar.external_tokens.len()];
|
||||
|
|
@ -542,20 +530,39 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
non_terminal_usages[symbol.index] = true;
|
||||
}
|
||||
}
|
||||
self.parse_table.symbols.push(Symbol::end());
|
||||
for (i, value) in terminal_usages.into_iter().enumerate() {
|
||||
if value {
|
||||
self.parse_table.symbols.push(Symbol::terminal(i));
|
||||
}
|
||||
}
|
||||
for (i, value) in external_usages.into_iter().enumerate() {
|
||||
if value {
|
||||
self.parse_table.symbols.push(Symbol::external(i));
|
||||
}
|
||||
}
|
||||
for (i, value) in non_terminal_usages.into_iter().enumerate() {
|
||||
if value {
|
||||
self.parse_table.symbols.push(Symbol::non_terminal(i));
|
||||
}
|
||||
}
|
||||
for (i, value) in external_usages.into_iter().enumerate() {
|
||||
if value {
|
||||
self.parse_table.symbols.push(Symbol::external(i));
|
||||
}
|
||||
|
||||
fn remove_precedences(&mut self) {
|
||||
for state in self.parse_table.states.iter_mut() {
|
||||
for (_, entry) in state.terminal_entries.iter_mut() {
|
||||
for action in entry.actions.iter_mut() {
|
||||
match action {
|
||||
ParseAction::Reduce {
|
||||
precedence,
|
||||
associativity,
|
||||
..
|
||||
} => {
|
||||
*precedence = 0;
|
||||
*associativity = None;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,36 +1,44 @@
|
|||
use crate::grammars::LexicalGrammar;
|
||||
use crate::rules::Symbol;
|
||||
use crate::tables::{ParseStateId, ParseTable};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::collections::HashSet;
|
||||
|
||||
pub(crate) struct CoincidentTokenIndex {
|
||||
entries: HashMap<(Symbol, Symbol), HashSet<ParseStateId>>,
|
||||
empty: HashSet<ParseStateId>,
|
||||
entries: Vec<HashSet<ParseStateId>>,
|
||||
n: usize,
|
||||
}
|
||||
|
||||
impl CoincidentTokenIndex {
|
||||
pub fn new(table: &ParseTable) -> Self {
|
||||
let mut entries = HashMap::new();
|
||||
pub fn new(table: &ParseTable, lexical_grammar: &LexicalGrammar) -> Self {
|
||||
let n = lexical_grammar.variables.len();
|
||||
let mut result = Self {
|
||||
n,
|
||||
entries: vec![HashSet::new(); n * n],
|
||||
};
|
||||
for (i, state) in table.states.iter().enumerate() {
|
||||
for symbol in state.terminal_entries.keys() {
|
||||
for other_symbol in state.terminal_entries.keys() {
|
||||
entries
|
||||
.entry((*symbol, *other_symbol))
|
||||
.or_insert(HashSet::new())
|
||||
.insert(i);
|
||||
let index = result.index(*symbol, *other_symbol);
|
||||
result.entries[index].insert(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
Self {
|
||||
entries,
|
||||
empty: HashSet::new(),
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub fn states_with(&self, a: Symbol, b: Symbol) -> &HashSet<ParseStateId> {
|
||||
self.entries.get(&(a, b)).unwrap_or(&self.empty)
|
||||
&self.entries[self.index(a, b)]
|
||||
}
|
||||
|
||||
pub fn contains(&self, a: Symbol, b: Symbol) -> bool {
|
||||
self.entries.contains_key(&(a, b))
|
||||
!self.entries[self.index(a, b)].is_empty()
|
||||
}
|
||||
|
||||
fn index(&self, a: Symbol, b: Symbol) -> usize {
|
||||
if a.index < b.index {
|
||||
a.index * self.n + b.index
|
||||
} else {
|
||||
b.index * self.n + a.index
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -112,7 +112,9 @@ impl LookaheadSet {
|
|||
return;
|
||||
}
|
||||
};
|
||||
vec.resize(other.index + 1, false);
|
||||
if other.index >= vec.len() {
|
||||
vec.resize(other.index + 1, false);
|
||||
}
|
||||
vec.set(other.index, true);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
use super::item::{LookaheadSet, ParseItem, ParseItemSet};
|
||||
use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
|
||||
use crate::rules::Symbol;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use hashbrown::{HashMap, HashSet};
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
struct TransitiveClosureAddition<'a> {
|
||||
|
|
|
|||
|
|
@ -27,22 +27,14 @@ pub(crate) fn build_tables(
|
|||
let (mut parse_table, following_tokens) =
|
||||
build_parse_table(syntax_grammar, lexical_grammar, inlines)?;
|
||||
let token_conflict_map = TokenConflictMap::new(lexical_grammar, following_tokens);
|
||||
|
||||
eprintln!("{:?}", token_conflict_map);
|
||||
|
||||
let coincident_token_index = CoincidentTokenIndex::new(&parse_table);
|
||||
let keywords = if let Some(word_token) = syntax_grammar.word_token {
|
||||
identify_keywords(
|
||||
lexical_grammar,
|
||||
&parse_table,
|
||||
word_token,
|
||||
&token_conflict_map,
|
||||
&coincident_token_index,
|
||||
)
|
||||
} else {
|
||||
LookaheadSet::new()
|
||||
};
|
||||
|
||||
let coincident_token_index = CoincidentTokenIndex::new(&parse_table, lexical_grammar);
|
||||
let keywords = identify_keywords(
|
||||
lexical_grammar,
|
||||
&parse_table,
|
||||
syntax_grammar.word_token,
|
||||
&token_conflict_map,
|
||||
&coincident_token_index,
|
||||
);
|
||||
populate_error_state(
|
||||
&mut parse_table,
|
||||
syntax_grammar,
|
||||
|
|
@ -123,10 +115,15 @@ fn populate_error_state(
|
|||
fn identify_keywords(
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
parse_table: &ParseTable,
|
||||
word_token: Symbol,
|
||||
word_token: Option<Symbol>,
|
||||
token_conflict_map: &TokenConflictMap,
|
||||
coincident_token_index: &CoincidentTokenIndex,
|
||||
) -> LookaheadSet {
|
||||
if word_token.is_none() {
|
||||
return LookaheadSet::new();
|
||||
}
|
||||
|
||||
let word_token = word_token.unwrap();
|
||||
let mut cursor = NfaCursor::new(&lexical_grammar.nfa, Vec::new());
|
||||
|
||||
// First find all of the candidate keyword tokens: tokens that start with
|
||||
|
|
@ -137,6 +134,7 @@ fn identify_keywords(
|
|||
if all_chars_are_alphabetical(&cursor)
|
||||
&& token_conflict_map.does_match_same_string(i, word_token.index)
|
||||
{
|
||||
info!("Keywords - add candidate {}", lexical_grammar.variables[i].name);
|
||||
Some(Symbol::terminal(i))
|
||||
} else {
|
||||
None
|
||||
|
|
@ -150,8 +148,8 @@ fn identify_keywords(
|
|||
if other_token != *token
|
||||
&& token_conflict_map.does_match_same_string(token.index, other_token.index)
|
||||
{
|
||||
eprintln!(
|
||||
"Exclude {} from keywords because it matches the same string as {}",
|
||||
info!(
|
||||
"Keywords - exclude {} because it matches the same string as {}",
|
||||
lexical_grammar.variables[token.index].name,
|
||||
lexical_grammar.variables[other_token.index].name
|
||||
);
|
||||
|
|
@ -189,8 +187,8 @@ fn identify_keywords(
|
|||
word_token.index,
|
||||
other_index,
|
||||
) {
|
||||
eprintln!(
|
||||
"Exclude {} from keywords because of conflict with {}",
|
||||
info!(
|
||||
"Keywords - exclude {} because of conflict with {}",
|
||||
lexical_grammar.variables[token.index].name,
|
||||
lexical_grammar.variables[other_index].name
|
||||
);
|
||||
|
|
@ -198,8 +196,8 @@ fn identify_keywords(
|
|||
}
|
||||
}
|
||||
|
||||
eprintln!(
|
||||
"Include {} in keywords",
|
||||
info!(
|
||||
"Keywords - include {}",
|
||||
lexical_grammar.variables[token.index].name,
|
||||
);
|
||||
true
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ use super::token_conflicts::TokenConflictMap;
|
|||
use crate::grammars::{SyntaxGrammar, VariableType};
|
||||
use crate::rules::{AliasMap, Symbol};
|
||||
use crate::tables::{ParseAction, ParseState, ParseTable, ParseTableEntry};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use hashbrown::{HashMap, HashSet};
|
||||
|
||||
pub(crate) fn shrink_parse_table(
|
||||
parse_table: &mut ParseTable,
|
||||
|
|
@ -240,6 +240,10 @@ fn can_add_entry_to_state(
|
|||
|
||||
fn remove_unused_states(parse_table: &mut ParseTable) {
|
||||
let mut state_usage_map = vec![false; parse_table.states.len()];
|
||||
|
||||
state_usage_map[0] = true;
|
||||
state_usage_map[1] = true;
|
||||
|
||||
for state in &parse_table.states {
|
||||
for referenced_state in state.referenced_states() {
|
||||
state_usage_map[referenced_state] = true;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
use crate::build_tables::item::LookaheadSet;
|
||||
use crate::grammars::LexicalGrammar;
|
||||
use crate::nfa::{CharacterSet, NfaCursor};
|
||||
use std::collections::HashSet;
|
||||
use hashbrown::HashSet;
|
||||
use std::fmt;
|
||||
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
use crate::nfa::Nfa;
|
||||
use crate::rules::{Alias, Associativity, Rule, Symbol};
|
||||
use std::collections::HashMap;
|
||||
use hashbrown::HashMap;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub(crate) enum VariableType {
|
||||
|
|
|
|||
29
src/logger.rs
Normal file
29
src/logger.rs
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
use log::{LevelFilter, Log, Metadata, Record};
|
||||
|
||||
struct Logger {
|
||||
pub filter: Option<String>,
|
||||
}
|
||||
|
||||
impl Log for Logger {
|
||||
fn enabled(&self, _: &Metadata) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn log(&self, record: &Record) {
|
||||
eprintln!(
|
||||
"[{}] {}",
|
||||
record
|
||||
.module_path()
|
||||
.unwrap_or_default()
|
||||
.trim_start_matches("rust_tree_sitter_cli::"),
|
||||
record.args()
|
||||
);
|
||||
}
|
||||
|
||||
fn flush(&self) {}
|
||||
}
|
||||
|
||||
pub(crate) fn init() {
|
||||
log::set_boxed_logger(Box::new(Logger { filter: None })).unwrap();
|
||||
log::set_max_level(LevelFilter::Info);
|
||||
}
|
||||
28
src/main.rs
28
src/main.rs
|
|
@ -1,20 +1,23 @@
|
|||
#[macro_use]
|
||||
extern crate serde_derive;
|
||||
#[macro_use]
|
||||
extern crate serde_json;
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
#[macro_use]
|
||||
extern crate serde_derive;
|
||||
extern crate hashbrown;
|
||||
extern crate serde_json;
|
||||
|
||||
use std::path::PathBuf;
|
||||
use clap::{App, Arg, SubCommand};
|
||||
use std::env;
|
||||
use std::io::Write;
|
||||
use std::path::PathBuf;
|
||||
use std::process::{Command, Stdio};
|
||||
|
||||
mod build_tables;
|
||||
mod error;
|
||||
mod generate;
|
||||
mod grammars;
|
||||
mod logger;
|
||||
mod nfa;
|
||||
mod parse_grammar;
|
||||
mod prepare_grammar;
|
||||
|
|
@ -27,7 +30,11 @@ fn main() -> error::Result<()> {
|
|||
.version("0.1")
|
||||
.author("Max Brunsfeld <maxbrunsfeld@gmail.com>")
|
||||
.about("Generates and tests parsers")
|
||||
.subcommand(SubCommand::with_name("generate").about("Generate a parser"))
|
||||
.subcommand(
|
||||
SubCommand::with_name("generate")
|
||||
.about("Generate a parser")
|
||||
.arg(Arg::with_name("log").long("log")),
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("parse")
|
||||
.about("Parse a file")
|
||||
|
|
@ -42,7 +49,11 @@ fn main() -> error::Result<()> {
|
|||
)
|
||||
.get_matches();
|
||||
|
||||
if let Some(_) = matches.subcommand_matches("generate") {
|
||||
if let Some(matches) = matches.subcommand_matches("generate") {
|
||||
if matches.is_present("log") {
|
||||
logger::init();
|
||||
}
|
||||
|
||||
let mut grammar_path = env::current_dir().expect("Failed to read CWD");
|
||||
grammar_path.push("grammar.js");
|
||||
let grammar_json = load_js_grammar_file(grammar_path);
|
||||
|
|
@ -70,7 +81,8 @@ fn load_js_grammar_file(grammar_path: PathBuf) -> String {
|
|||
"{}\nconsole.log(JSON.stringify(require(\"{}\"), null, 2));\n",
|
||||
js_prelude,
|
||||
grammar_path.to_str().unwrap()
|
||||
).expect("Failed to write to node's stdin");
|
||||
)
|
||||
.expect("Failed to write to node's stdin");
|
||||
drop(node_stdin);
|
||||
let output = node_process
|
||||
.wait_with_output()
|
||||
|
|
|
|||
26
src/nfa.rs
26
src/nfa.rs
|
|
@ -320,6 +320,10 @@ impl<'a> NfaCursor<'a> {
|
|||
self.add_states(&mut states);
|
||||
}
|
||||
|
||||
pub fn force_reset(&mut self, states: Vec<u32>) {
|
||||
self.state_ids = states
|
||||
}
|
||||
|
||||
pub fn successors(&self) -> impl Iterator<Item = (&CharacterSet, i32, u32, bool)> {
|
||||
self.state_ids.iter().filter_map(move |id| {
|
||||
if let NfaState::Advance {
|
||||
|
|
@ -352,16 +356,26 @@ impl<'a> NfaCursor<'a> {
|
|||
result[i].1 = max(result[i].1, prec);
|
||||
result[i].2.push(state);
|
||||
result[i].3 |= is_sep;
|
||||
} else {
|
||||
let intersection = result[i].0.remove_intersection(&mut chars);
|
||||
if !intersection.is_empty() {
|
||||
let mut states = result[i].2.clone();
|
||||
states.push(state);
|
||||
chars = CharacterSet::empty();
|
||||
break;
|
||||
}
|
||||
|
||||
let intersection = result[i].0.remove_intersection(&mut chars);
|
||||
if !intersection.is_empty() {
|
||||
let mut states = result[i].2.clone();
|
||||
let max_prec = max(result[i].1, prec);
|
||||
states.push(state);
|
||||
if result[i].0.is_empty() {
|
||||
result[i].0 = intersection;
|
||||
result[i].1 = max_prec;
|
||||
result[i].2 = states;
|
||||
result[i].3 |= is_sep;
|
||||
} else {
|
||||
result.insert(
|
||||
i,
|
||||
(
|
||||
intersection,
|
||||
max(result[i].1, prec),
|
||||
max_prec,
|
||||
states,
|
||||
result[i].3 || is_sep,
|
||||
),
|
||||
|
|
|
|||
|
|
@ -133,7 +133,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_parse_grammar() {
|
||||
let grammar = parse_grammar(&json!({
|
||||
let grammar = parse_grammar(r#"{
|
||||
"name": "my_lang",
|
||||
"rules": {
|
||||
"file": {
|
||||
|
|
@ -148,7 +148,7 @@ mod tests {
|
|||
"value": "foo"
|
||||
}
|
||||
}
|
||||
}).to_string()).unwrap();
|
||||
}"#).unwrap();
|
||||
|
||||
assert_eq!(grammar.name, "my_lang");
|
||||
assert_eq!(grammar.variables, vec![
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
use super::ExtractedSyntaxGrammar;
|
||||
use crate::grammars::{Variable, VariableType};
|
||||
use crate::rules::{Rule, Symbol};
|
||||
use std::collections::HashMap;
|
||||
use hashbrown::HashMap;
|
||||
use std::mem;
|
||||
|
||||
struct Expander {
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ use super::{ExtractedLexicalGrammar, ExtractedSyntaxGrammar, InternedGrammar};
|
|||
use crate::error::{Error, Result};
|
||||
use crate::grammars::{ExternalToken, Variable, VariableType};
|
||||
use crate::rules::{MetadataParams, Rule, Symbol, SymbolType};
|
||||
use std::collections::HashMap;
|
||||
use hashbrown::HashMap;
|
||||
use std::mem;
|
||||
|
||||
pub(super) fn extract_tokens(
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
use crate::grammars::{InlinedProductionMap, Production, ProductionStep, SyntaxGrammar};
|
||||
use std::collections::HashMap;
|
||||
use hashbrown::HashMap;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
struct ProductionStepId {
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
use crate::grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType};
|
||||
use crate::nfa::CharacterSet;
|
||||
use crate::rules::{Alias, AliasMap, Symbol, SymbolType};
|
||||
use crate::tables::{LexState, LexTable, ParseAction, ParseTable, ParseTableEntry};
|
||||
use crate::tables::{AdvanceAction, LexState, LexTable, ParseAction, ParseTable, ParseTableEntry};
|
||||
use core::ops::Range;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use hashbrown::{HashMap, HashSet};
|
||||
use std::fmt::Write;
|
||||
use std::mem::swap;
|
||||
|
||||
|
|
@ -372,17 +372,14 @@ impl Generator {
|
|||
if self.add_character_set_condition(&characters, &ruled_out_characters) {
|
||||
add!(self, ")\n");
|
||||
indent!(self);
|
||||
if action.in_main_token {
|
||||
add_line!(self, "ADVANCE({});", action.state);
|
||||
} else {
|
||||
add_line!(self, "SKIP({});", action.state);
|
||||
}
|
||||
self.add_advance_action(&action);
|
||||
if let CharacterSet::Include(chars) = characters {
|
||||
ruled_out_characters.extend(chars.iter().map(|c| *c as u32));
|
||||
}
|
||||
dedent!(self);
|
||||
} else {
|
||||
self.buffer.truncate(previous_length);
|
||||
self.add_advance_action(&action);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -494,6 +491,14 @@ impl Generator {
|
|||
})
|
||||
}
|
||||
|
||||
fn add_advance_action(&mut self, action: &AdvanceAction) {
|
||||
if action.in_main_token {
|
||||
add_line!(self, "ADVANCE({});", action.state);
|
||||
} else {
|
||||
add_line!(self, "SKIP({});", action.state);
|
||||
}
|
||||
}
|
||||
|
||||
fn add_lex_modes_list(&mut self) {
|
||||
self.get_external_scanner_state_id(HashSet::new());
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
use std::collections::HashMap;
|
||||
use hashbrown::HashMap;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
pub(crate) enum SymbolType {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
use crate::nfa::CharacterSet;
|
||||
use crate::rules::{Alias, Associativity, Symbol};
|
||||
use std::collections::HashMap;
|
||||
use hashbrown::HashMap;
|
||||
|
||||
pub(crate) type AliasSequenceId = usize;
|
||||
pub(crate) type ParseStateId = usize;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue