Fix various logic errors in parse table construction

This commit is contained in:
Max Brunsfeld 2019-01-02 16:48:44 -08:00
parent 9824ebbbc3
commit 3fbaff5e69
21 changed files with 297 additions and 115 deletions

18
Cargo.lock generated
View file

@ -76,6 +76,11 @@ dependencies = [
"constant_time_eq 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "byteorder"
version = "1.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "cc"
version = "1.0.25"
@ -212,6 +217,15 @@ dependencies = [
"regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "hashbrown"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"byteorder 1.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
"scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "ignore"
version = "0.4.4"
@ -463,9 +477,11 @@ version = "0.1.0"
dependencies = [
"clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)",
"dirs 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
"hashbrown 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
"ignore 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
"rusqlite 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
@ -737,6 +753,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)" = "c66d56ac8dabd07f6aacdaf633f4b8262f5b3601a810a0dcddffd5c22c69daa0"
"checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12"
"checksum blake2-rfc 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)" = "5d6d530bdd2d52966a6d03b7a964add7ae1a288d25214066fd4b600f0f796400"
"checksum byteorder 1.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "94f88df23a25417badc922ab0f5716cc1330e87f71ddd9203b3a3ccd9cedf75d"
"checksum cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)" = "f159dfd43363c4d08055a07703eb7a3406b0dac4d0584d96965a3262db3c9d16"
"checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4"
"checksum clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b957d88f4b6a63b9d70d5f454ac8011819c6efa7727858f458ab71c756ce2d3e"
@ -753,6 +770,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82"
"checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
"checksum globset 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4743617a7464bbda3c8aec8558ff2f9429047e025771037df561d383337ff865"
"checksum hashbrown 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "64b7d419d0622ae02fe5da6b9a5e1964b610a65bb37923b976aeebb6dbb8f86e"
"checksum ignore 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "36ecfc5ad80f0b1226df948c562e2cddd446096be3f644c95106400eae8a5e01"
"checksum indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7e81a7c05f79578dbc15793d8b619db9ba32b4577003ef3af1a91c416798c58d"
"checksum itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1306f3464951f30e30d12373d31c79fbd52d236e5e896fd92f96ec7babbbe60b"

View file

@ -9,6 +9,7 @@ lazy_static = "1.2.0"
smallbitvec = "2.3.0"
clap = "2.32"
dirs = "1.0.2"
hashbrown = "0.1"
ignore = "0.4.4"
libloading = "0.5"
rusqlite = "0.14.0"
@ -20,3 +21,7 @@ regex-syntax = "0.6.4"
[dependencies.serde_json]
version = "1.0"
features = ["preserve_order"]
[dependencies.log]
version = "0.4.6"
features = ["std"]

View file

@ -2,10 +2,9 @@ use super::item::LookaheadSet;
use super::token_conflicts::TokenConflictMap;
use crate::grammars::{LexicalGrammar, SyntaxGrammar};
use crate::nfa::NfaCursor;
use crate::rules::Symbol;
use crate::tables::{AdvanceAction, LexState, LexTable, ParseTable};
use std::collections::hash_map::Entry;
use std::collections::{HashMap, VecDeque};
use std::collections::{BTreeMap, HashMap, VecDeque};
pub(crate) fn build_lex_table(
parse_table: &mut ParseTable,
@ -16,15 +15,16 @@ pub(crate) fn build_lex_table(
let keyword_lex_table;
if syntax_grammar.word_token.is_some() {
let mut builder = LexTableBuilder::new(lexical_grammar);
builder.add_state_for_tokens(keywords.iter());
builder.add_state_for_tokens(keywords);
keyword_lex_table = builder.table;
} else {
keyword_lex_table = LexTable::default();
}
let mut builder = LexTableBuilder::new(lexical_grammar);
for state in parse_table.states.iter_mut() {
let tokens = state.terminal_entries.keys().filter_map(|token| {
for (i, state) in parse_table.states.iter_mut().enumerate() {
info!("populate lex state for parse state {}", i);
let tokens = LookaheadSet::with(state.terminal_entries.keys().filter_map(|token| {
if token.is_terminal() {
if keywords.contains(&token) {
syntax_grammar.word_token
@ -34,11 +34,14 @@ pub(crate) fn build_lex_table(
} else {
None
}
});
state.lex_state_id = builder.add_state_for_tokens(tokens);
}));
state.lex_state_id = builder.add_state_for_tokens(&tokens);
}
(builder.table, keyword_lex_table)
let mut table = builder.table;
shrink_lex_table(&mut table, parse_table);
(table, keyword_lex_table)
}
struct LexTableBuilder<'a> {
@ -60,32 +63,49 @@ impl<'a> LexTableBuilder<'a> {
}
}
fn add_state_for_tokens(&mut self, tokens: impl Iterator<Item = Symbol>) -> usize {
fn add_state_for_tokens(&mut self, tokens: &LookaheadSet) -> usize {
let nfa_states = tokens
.iter()
.map(|token| self.lexical_grammar.variables[token.index].start_state)
.collect();
let result = self.add_state(nfa_states);
while let Some((state_id, nfa_states)) = self.state_queue.pop_front() {
let (state_id, is_new) = self.add_state(nfa_states);
if is_new {
info!(
"entry point state: {}, tokens: {:?}",
state_id,
tokens
.iter()
.map(|t| &self.lexical_grammar.variables[t.index].name)
.collect::<Vec<_>>()
);
}
while let Some((state_id, nfa_states)) = self.state_queue.pop_back() {
self.populate_state(state_id, nfa_states);
}
result
state_id
}
fn add_state(&mut self, nfa_states: Vec<u32>) -> usize {
match self.state_ids_by_nfa_state_set.entry(nfa_states) {
Entry::Occupied(o) => *o.get(),
fn add_state(&mut self, nfa_states: Vec<u32>) -> (usize, bool) {
self.cursor.reset(nfa_states);
match self
.state_ids_by_nfa_state_set
.entry(self.cursor.state_ids.clone())
{
Entry::Occupied(o) => (*o.get(), false),
Entry::Vacant(v) => {
let state_id = self.table.states.len();
self.table.states.push(LexState::default());
self.state_queue.push_back((state_id, v.key().clone()));
v.insert(state_id);
state_id
(state_id, true)
}
}
}
fn populate_state(&mut self, state_id: usize, nfa_states: Vec<u32>) {
self.cursor.reset(nfa_states);
self.cursor.force_reset(nfa_states);
let mut completion = None;
for (id, prec) in self.cursor.completions() {
@ -102,12 +122,16 @@ impl<'a> LexTableBuilder<'a> {
}
for (chars, advance_precedence, next_states, is_sep) in self.cursor.grouped_successors() {
info!(
"populate state: {}, characters: {:?}, precedence: {:?}",
state_id, chars, advance_precedence
);
if let Some((_, completed_precedence)) = completion {
if advance_precedence < completed_precedence {
continue;
}
}
let next_state_id = self.add_state(next_states);
let (next_state_id, _) = self.add_state(next_states);
self.table.states[state_id].advance_actions.push((
chars,
AdvanceAction {
@ -122,3 +146,59 @@ impl<'a> LexTableBuilder<'a> {
}
}
}
fn shrink_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
let mut state_replacements = BTreeMap::new();
let mut done = false;
while !done {
done = true;
for (i, state_i) in table.states.iter().enumerate() {
if state_replacements.contains_key(&i) {
continue;
}
for (j, state_j) in table.states.iter().enumerate() {
if state_replacements.contains_key(&j) {
continue;
}
if j == i {
break;
}
if state_i == state_j {
info!("replace state {} with state {}", i, j);
state_replacements.insert(i, j);
done = false;
}
}
}
for state in table.states.iter_mut() {
for advance_action in state.advance_actions.iter_mut() {
if let Some(new_state_id) = state_replacements.get(&advance_action.1.state) {
advance_action.1.state = *new_state_id;
}
}
}
}
let final_state_replacements = (0..table.states.len()).into_iter().map(|state_id| {
let replacement = state_replacements.get(&state_id).cloned().unwrap_or(state_id);
let prior_removed = state_replacements.iter().take_while(|i| *i.0 < replacement).count();
replacement - prior_removed
}).collect::<Vec<_>>();
for state in parse_table.states.iter_mut() {
state.lex_state_id = final_state_replacements[state.lex_state_id];
}
for state in table.states.iter_mut() {
for advance_action in state.advance_actions.iter_mut() {
advance_action.1.state = final_state_replacements[advance_action.1.state];
}
}
let mut i = 0;
table.states.retain(|_| {
let result = !state_replacements.contains_key(&i);
i += 1;
result
});
}

View file

@ -7,8 +7,11 @@ use crate::tables::{
AliasSequenceId, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
};
use core::ops::Range;
use std::collections::hash_map::{DefaultHasher, Entry};
use std::collections::{HashMap, HashSet, VecDeque};
use hashbrown::hash_map::Entry;
use hashbrown::{HashMap, HashSet};
use std::collections::hash_map::DefaultHasher;
use std::collections::VecDeque;
use std::fmt::Write;
use std::hash::Hasher;
@ -43,9 +46,10 @@ impl<'a> ParseTableBuilder<'a> {
// Ensure that the empty alias sequence has index 0.
self.parse_table.alias_sequences.push(Vec::new());
// Ensure that the error state has index 0.
// Add the error state at index 0.
self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
// Add the starting state at index 1.
self.add_parse_state(
&Vec::new(),
&Vec::new(),
@ -61,6 +65,8 @@ impl<'a> ParseTableBuilder<'a> {
self.process_part_state_queue()?;
self.populate_used_symbols();
self.remove_precedences();
Ok((self.parse_table, self.following_tokens))
}
@ -112,28 +118,9 @@ impl<'a> ParseTableBuilder<'a> {
fn process_part_state_queue(&mut self) -> Result<()> {
while let Some(entry) = self.parse_state_queue.pop_front() {
let debug = false;
if debug {
println!(
"ITEM SET {}:\n{}",
entry.state_id,
self.item_sets_by_state_id[entry.state_id]
.display_with(&self.syntax_grammar, &self.lexical_grammar,)
);
}
let item_set = self
.item_set_builder
.transitive_closure(&self.item_sets_by_state_id[entry.state_id]);
if debug {
println!(
"TRANSITIVE CLOSURE:\n{}",
item_set.display_with(&self.syntax_grammar, &self.lexical_grammar)
);
}
self.add_actions(
entry.preceding_symbols,
entry.preceding_auxiliary_symbols,
@ -527,6 +514,7 @@ impl<'a> ParseTableBuilder<'a> {
}
fn populate_used_symbols(&mut self) {
self.parse_table.symbols.push(Symbol::end());
let mut terminal_usages = vec![false; self.lexical_grammar.variables.len()];
let mut non_terminal_usages = vec![false; self.syntax_grammar.variables.len()];
let mut external_usages = vec![false; self.syntax_grammar.external_tokens.len()];
@ -542,20 +530,39 @@ impl<'a> ParseTableBuilder<'a> {
non_terminal_usages[symbol.index] = true;
}
}
self.parse_table.symbols.push(Symbol::end());
for (i, value) in terminal_usages.into_iter().enumerate() {
if value {
self.parse_table.symbols.push(Symbol::terminal(i));
}
}
for (i, value) in external_usages.into_iter().enumerate() {
if value {
self.parse_table.symbols.push(Symbol::external(i));
}
}
for (i, value) in non_terminal_usages.into_iter().enumerate() {
if value {
self.parse_table.symbols.push(Symbol::non_terminal(i));
}
}
for (i, value) in external_usages.into_iter().enumerate() {
if value {
self.parse_table.symbols.push(Symbol::external(i));
}
fn remove_precedences(&mut self) {
for state in self.parse_table.states.iter_mut() {
for (_, entry) in state.terminal_entries.iter_mut() {
for action in entry.actions.iter_mut() {
match action {
ParseAction::Reduce {
precedence,
associativity,
..
} => {
*precedence = 0;
*associativity = None;
}
_ => {}
}
}
}
}
}

View file

@ -1,36 +1,44 @@
use crate::grammars::LexicalGrammar;
use crate::rules::Symbol;
use crate::tables::{ParseStateId, ParseTable};
use std::collections::{HashMap, HashSet};
use std::collections::HashSet;
pub(crate) struct CoincidentTokenIndex {
entries: HashMap<(Symbol, Symbol), HashSet<ParseStateId>>,
empty: HashSet<ParseStateId>,
entries: Vec<HashSet<ParseStateId>>,
n: usize,
}
impl CoincidentTokenIndex {
pub fn new(table: &ParseTable) -> Self {
let mut entries = HashMap::new();
pub fn new(table: &ParseTable, lexical_grammar: &LexicalGrammar) -> Self {
let n = lexical_grammar.variables.len();
let mut result = Self {
n,
entries: vec![HashSet::new(); n * n],
};
for (i, state) in table.states.iter().enumerate() {
for symbol in state.terminal_entries.keys() {
for other_symbol in state.terminal_entries.keys() {
entries
.entry((*symbol, *other_symbol))
.or_insert(HashSet::new())
.insert(i);
let index = result.index(*symbol, *other_symbol);
result.entries[index].insert(i);
}
}
}
Self {
entries,
empty: HashSet::new(),
}
result
}
pub fn states_with(&self, a: Symbol, b: Symbol) -> &HashSet<ParseStateId> {
self.entries.get(&(a, b)).unwrap_or(&self.empty)
&self.entries[self.index(a, b)]
}
pub fn contains(&self, a: Symbol, b: Symbol) -> bool {
self.entries.contains_key(&(a, b))
!self.entries[self.index(a, b)].is_empty()
}
fn index(&self, a: Symbol, b: Symbol) -> usize {
if a.index < b.index {
a.index * self.n + b.index
} else {
b.index * self.n + a.index
}
}
}

View file

@ -112,7 +112,9 @@ impl LookaheadSet {
return;
}
};
vec.resize(other.index + 1, false);
if other.index >= vec.len() {
vec.resize(other.index + 1, false);
}
vec.set(other.index, true);
}

View file

@ -1,7 +1,7 @@
use super::item::{LookaheadSet, ParseItem, ParseItemSet};
use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
use crate::rules::Symbol;
use std::collections::{HashMap, HashSet};
use hashbrown::{HashMap, HashSet};
#[derive(Clone, Debug, PartialEq, Eq)]
struct TransitiveClosureAddition<'a> {

View file

@ -27,22 +27,14 @@ pub(crate) fn build_tables(
let (mut parse_table, following_tokens) =
build_parse_table(syntax_grammar, lexical_grammar, inlines)?;
let token_conflict_map = TokenConflictMap::new(lexical_grammar, following_tokens);
eprintln!("{:?}", token_conflict_map);
let coincident_token_index = CoincidentTokenIndex::new(&parse_table);
let keywords = if let Some(word_token) = syntax_grammar.word_token {
identify_keywords(
lexical_grammar,
&parse_table,
word_token,
&token_conflict_map,
&coincident_token_index,
)
} else {
LookaheadSet::new()
};
let coincident_token_index = CoincidentTokenIndex::new(&parse_table, lexical_grammar);
let keywords = identify_keywords(
lexical_grammar,
&parse_table,
syntax_grammar.word_token,
&token_conflict_map,
&coincident_token_index,
);
populate_error_state(
&mut parse_table,
syntax_grammar,
@ -123,10 +115,15 @@ fn populate_error_state(
fn identify_keywords(
lexical_grammar: &LexicalGrammar,
parse_table: &ParseTable,
word_token: Symbol,
word_token: Option<Symbol>,
token_conflict_map: &TokenConflictMap,
coincident_token_index: &CoincidentTokenIndex,
) -> LookaheadSet {
if word_token.is_none() {
return LookaheadSet::new();
}
let word_token = word_token.unwrap();
let mut cursor = NfaCursor::new(&lexical_grammar.nfa, Vec::new());
// First find all of the candidate keyword tokens: tokens that start with
@ -137,6 +134,7 @@ fn identify_keywords(
if all_chars_are_alphabetical(&cursor)
&& token_conflict_map.does_match_same_string(i, word_token.index)
{
info!("Keywords - add candidate {}", lexical_grammar.variables[i].name);
Some(Symbol::terminal(i))
} else {
None
@ -150,8 +148,8 @@ fn identify_keywords(
if other_token != *token
&& token_conflict_map.does_match_same_string(token.index, other_token.index)
{
eprintln!(
"Exclude {} from keywords because it matches the same string as {}",
info!(
"Keywords - exclude {} because it matches the same string as {}",
lexical_grammar.variables[token.index].name,
lexical_grammar.variables[other_token.index].name
);
@ -189,8 +187,8 @@ fn identify_keywords(
word_token.index,
other_index,
) {
eprintln!(
"Exclude {} from keywords because of conflict with {}",
info!(
"Keywords - exclude {} because of conflict with {}",
lexical_grammar.variables[token.index].name,
lexical_grammar.variables[other_index].name
);
@ -198,8 +196,8 @@ fn identify_keywords(
}
}
eprintln!(
"Include {} in keywords",
info!(
"Keywords - include {}",
lexical_grammar.variables[token.index].name,
);
true

View file

@ -2,7 +2,7 @@ use super::token_conflicts::TokenConflictMap;
use crate::grammars::{SyntaxGrammar, VariableType};
use crate::rules::{AliasMap, Symbol};
use crate::tables::{ParseAction, ParseState, ParseTable, ParseTableEntry};
use std::collections::{HashMap, HashSet};
use hashbrown::{HashMap, HashSet};
pub(crate) fn shrink_parse_table(
parse_table: &mut ParseTable,
@ -240,6 +240,10 @@ fn can_add_entry_to_state(
fn remove_unused_states(parse_table: &mut ParseTable) {
let mut state_usage_map = vec![false; parse_table.states.len()];
state_usage_map[0] = true;
state_usage_map[1] = true;
for state in &parse_table.states {
for referenced_state in state.referenced_states() {
state_usage_map[referenced_state] = true;

View file

@ -1,7 +1,7 @@
use crate::build_tables::item::LookaheadSet;
use crate::grammars::LexicalGrammar;
use crate::nfa::{CharacterSet, NfaCursor};
use std::collections::HashSet;
use hashbrown::HashSet;
use std::fmt;
#[derive(Clone, Debug, Default, PartialEq, Eq)]

View file

@ -1,6 +1,6 @@
use crate::nfa::Nfa;
use crate::rules::{Alias, Associativity, Rule, Symbol};
use std::collections::HashMap;
use hashbrown::HashMap;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) enum VariableType {

29
src/logger.rs Normal file
View file

@ -0,0 +1,29 @@
use log::{LevelFilter, Log, Metadata, Record};
struct Logger {
pub filter: Option<String>,
}
impl Log for Logger {
fn enabled(&self, _: &Metadata) -> bool {
true
}
fn log(&self, record: &Record) {
eprintln!(
"[{}] {}",
record
.module_path()
.unwrap_or_default()
.trim_start_matches("rust_tree_sitter_cli::"),
record.args()
);
}
fn flush(&self) {}
}
pub(crate) fn init() {
log::set_boxed_logger(Box::new(Logger { filter: None })).unwrap();
log::set_max_level(LevelFilter::Info);
}

View file

@ -1,20 +1,23 @@
#[macro_use]
extern crate serde_derive;
#[macro_use]
extern crate serde_json;
#[macro_use]
extern crate lazy_static;
#[macro_use]
extern crate log;
#[macro_use]
extern crate serde_derive;
extern crate hashbrown;
extern crate serde_json;
use std::path::PathBuf;
use clap::{App, Arg, SubCommand};
use std::env;
use std::io::Write;
use std::path::PathBuf;
use std::process::{Command, Stdio};
mod build_tables;
mod error;
mod generate;
mod grammars;
mod logger;
mod nfa;
mod parse_grammar;
mod prepare_grammar;
@ -27,7 +30,11 @@ fn main() -> error::Result<()> {
.version("0.1")
.author("Max Brunsfeld <maxbrunsfeld@gmail.com>")
.about("Generates and tests parsers")
.subcommand(SubCommand::with_name("generate").about("Generate a parser"))
.subcommand(
SubCommand::with_name("generate")
.about("Generate a parser")
.arg(Arg::with_name("log").long("log")),
)
.subcommand(
SubCommand::with_name("parse")
.about("Parse a file")
@ -42,7 +49,11 @@ fn main() -> error::Result<()> {
)
.get_matches();
if let Some(_) = matches.subcommand_matches("generate") {
if let Some(matches) = matches.subcommand_matches("generate") {
if matches.is_present("log") {
logger::init();
}
let mut grammar_path = env::current_dir().expect("Failed to read CWD");
grammar_path.push("grammar.js");
let grammar_json = load_js_grammar_file(grammar_path);
@ -70,7 +81,8 @@ fn load_js_grammar_file(grammar_path: PathBuf) -> String {
"{}\nconsole.log(JSON.stringify(require(\"{}\"), null, 2));\n",
js_prelude,
grammar_path.to_str().unwrap()
).expect("Failed to write to node's stdin");
)
.expect("Failed to write to node's stdin");
drop(node_stdin);
let output = node_process
.wait_with_output()

View file

@ -320,6 +320,10 @@ impl<'a> NfaCursor<'a> {
self.add_states(&mut states);
}
pub fn force_reset(&mut self, states: Vec<u32>) {
self.state_ids = states
}
pub fn successors(&self) -> impl Iterator<Item = (&CharacterSet, i32, u32, bool)> {
self.state_ids.iter().filter_map(move |id| {
if let NfaState::Advance {
@ -352,16 +356,26 @@ impl<'a> NfaCursor<'a> {
result[i].1 = max(result[i].1, prec);
result[i].2.push(state);
result[i].3 |= is_sep;
} else {
let intersection = result[i].0.remove_intersection(&mut chars);
if !intersection.is_empty() {
let mut states = result[i].2.clone();
states.push(state);
chars = CharacterSet::empty();
break;
}
let intersection = result[i].0.remove_intersection(&mut chars);
if !intersection.is_empty() {
let mut states = result[i].2.clone();
let max_prec = max(result[i].1, prec);
states.push(state);
if result[i].0.is_empty() {
result[i].0 = intersection;
result[i].1 = max_prec;
result[i].2 = states;
result[i].3 |= is_sep;
} else {
result.insert(
i,
(
intersection,
max(result[i].1, prec),
max_prec,
states,
result[i].3 || is_sep,
),

View file

@ -133,7 +133,7 @@ mod tests {
#[test]
fn test_parse_grammar() {
let grammar = parse_grammar(&json!({
let grammar = parse_grammar(r#"{
"name": "my_lang",
"rules": {
"file": {
@ -148,7 +148,7 @@ mod tests {
"value": "foo"
}
}
}).to_string()).unwrap();
}"#).unwrap();
assert_eq!(grammar.name, "my_lang");
assert_eq!(grammar.variables, vec![

View file

@ -1,7 +1,7 @@
use super::ExtractedSyntaxGrammar;
use crate::grammars::{Variable, VariableType};
use crate::rules::{Rule, Symbol};
use std::collections::HashMap;
use hashbrown::HashMap;
use std::mem;
struct Expander {

View file

@ -2,7 +2,7 @@ use super::{ExtractedLexicalGrammar, ExtractedSyntaxGrammar, InternedGrammar};
use crate::error::{Error, Result};
use crate::grammars::{ExternalToken, Variable, VariableType};
use crate::rules::{MetadataParams, Rule, Symbol, SymbolType};
use std::collections::HashMap;
use hashbrown::HashMap;
use std::mem;
pub(super) fn extract_tokens(

View file

@ -1,5 +1,5 @@
use crate::grammars::{InlinedProductionMap, Production, ProductionStep, SyntaxGrammar};
use std::collections::HashMap;
use hashbrown::HashMap;
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
struct ProductionStepId {

View file

@ -1,9 +1,9 @@
use crate::grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType};
use crate::nfa::CharacterSet;
use crate::rules::{Alias, AliasMap, Symbol, SymbolType};
use crate::tables::{LexState, LexTable, ParseAction, ParseTable, ParseTableEntry};
use crate::tables::{AdvanceAction, LexState, LexTable, ParseAction, ParseTable, ParseTableEntry};
use core::ops::Range;
use std::collections::{HashMap, HashSet};
use hashbrown::{HashMap, HashSet};
use std::fmt::Write;
use std::mem::swap;
@ -372,17 +372,14 @@ impl Generator {
if self.add_character_set_condition(&characters, &ruled_out_characters) {
add!(self, ")\n");
indent!(self);
if action.in_main_token {
add_line!(self, "ADVANCE({});", action.state);
} else {
add_line!(self, "SKIP({});", action.state);
}
self.add_advance_action(&action);
if let CharacterSet::Include(chars) = characters {
ruled_out_characters.extend(chars.iter().map(|c| *c as u32));
}
dedent!(self);
} else {
self.buffer.truncate(previous_length);
self.add_advance_action(&action);
}
}
@ -494,6 +491,14 @@ impl Generator {
})
}
fn add_advance_action(&mut self, action: &AdvanceAction) {
if action.in_main_token {
add_line!(self, "ADVANCE({});", action.state);
} else {
add_line!(self, "SKIP({});", action.state);
}
}
fn add_lex_modes_list(&mut self) {
self.get_external_scanner_state_id(HashSet::new());

View file

@ -1,4 +1,4 @@
use std::collections::HashMap;
use hashbrown::HashMap;
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub(crate) enum SymbolType {

View file

@ -1,6 +1,6 @@
use crate::nfa::CharacterSet;
use crate::rules::{Alias, Associativity, Symbol};
use std::collections::HashMap;
use hashbrown::HashMap;
pub(crate) type AliasSequenceId = usize;
pub(crate) type ParseStateId = usize;