Start work on running test corpus tests

This commit is contained in:
Max Brunsfeld 2019-01-11 13:30:45 -08:00
parent 0d85a1ef53
commit e64f7a64a1
15 changed files with 328 additions and 168 deletions

View file

@ -1,7 +1,9 @@
use super::item::{ParseItem, ParseItemSet, TokenSet};
use super::item_set_builder::ParseItemSetBuilder;
use crate::error::{Error, Result};
use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType};
use crate::generate::grammars::{
InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType,
};
use crate::generate::rules::{Alias, Associativity, Symbol, SymbolType};
use crate::generate::tables::{
AliasSequenceId, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
@ -11,6 +13,7 @@ use hashbrown::hash_map::Entry;
use hashbrown::{HashMap, HashSet};
use std::collections::hash_map::DefaultHasher;
use std::collections::VecDeque;
use std::u32;
use std::fmt::Write;
use std::hash::Hasher;
@ -94,7 +97,6 @@ impl<'a> ParseTableBuilder<'a> {
)?;
}
self.populate_used_symbols();
self.remove_precedences();
Ok((self.parse_table, self.following_tokens))
@ -313,7 +315,10 @@ impl<'a> ParseTableBuilder<'a> {
.first_set(&step.symbol)
.contains(&conflicting_lookahead)
{
conflicting_items.insert(item);
if item.variable_index != u32::MAX {
conflicting_items.insert(item);
}
let precedence = item.precedence();
if let Some(range) = &mut shift_precedence {
if precedence < range.start {
@ -327,7 +332,9 @@ impl<'a> ParseTableBuilder<'a> {
}
}
} else if lookaheads.contains(&conflicting_lookahead) {
conflicting_items.insert(item);
if item.variable_index != u32::MAX {
conflicting_items.insert(item);
}
}
}
@ -610,40 +617,6 @@ impl<'a> ParseTableBuilder<'a> {
}
}
fn populate_used_symbols(&mut self) {
let mut terminal_usages = vec![false; self.lexical_grammar.variables.len()];
let mut non_terminal_usages = vec![false; self.syntax_grammar.variables.len()];
let mut external_usages = vec![false; self.syntax_grammar.external_tokens.len()];
for state in &self.parse_table.states {
for symbol in state.terminal_entries.keys() {
match symbol.kind {
SymbolType::Terminal => terminal_usages[symbol.index] = true,
SymbolType::External => external_usages[symbol.index] = true,
_ => {}
}
}
for symbol in state.nonterminal_entries.keys() {
non_terminal_usages[symbol.index] = true;
}
}
for (i, value) in external_usages.into_iter().enumerate() {
if value {
self.parse_table.symbols.push(Symbol::external(i));
}
}
self.parse_table.symbols.push(Symbol::end());
for (i, value) in terminal_usages.into_iter().enumerate() {
if value {
self.parse_table.symbols.push(Symbol::terminal(i));
}
}
for (i, value) in non_terminal_usages.into_iter().enumerate() {
if value {
self.parse_table.symbols.push(Symbol::non_terminal(i));
}
}
}
fn remove_precedences(&mut self) {
for state in self.parse_table.states.iter_mut() {
for (_, entry) in state.terminal_entries.iter_mut() {
@ -702,7 +675,7 @@ impl<'a> ParseTableBuilder<'a> {
if variable.kind == VariableType::Named {
variable.name.clone()
} else {
format!("\"{}\"", &variable.name)
format!("'{}'", &variable.name)
}
}
}

View file

@ -15,7 +15,7 @@ use self::token_conflicts::TokenConflictMap;
use crate::error::Result;
use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
use crate::generate::nfa::{CharacterSet, NfaCursor};
use crate::generate::rules::{AliasMap, Symbol};
use crate::generate::rules::{AliasMap, Symbol, SymbolType};
use crate::generate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry};
pub(crate) fn build_tables(
@ -45,6 +45,7 @@ pub(crate) fn build_tables(
&token_conflict_map,
&keywords,
);
populate_used_symbols(&mut parse_table, syntax_grammar, lexical_grammar);
mark_fragile_tokens(&mut parse_table, lexical_grammar, &token_conflict_map);
if minimize {
minimize_parse_table(
@ -151,6 +152,44 @@ fn populate_error_state(
state.terminal_entries.insert(Symbol::end(), recover_entry);
}
fn populate_used_symbols(
parse_table: &mut ParseTable,
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
) {
let mut terminal_usages = vec![false; lexical_grammar.variables.len()];
let mut non_terminal_usages = vec![false; syntax_grammar.variables.len()];
let mut external_usages = vec![false; syntax_grammar.external_tokens.len()];
for state in &parse_table.states {
for symbol in state.terminal_entries.keys() {
match symbol.kind {
SymbolType::Terminal => terminal_usages[symbol.index] = true,
SymbolType::External => external_usages[symbol.index] = true,
_ => {}
}
}
for symbol in state.nonterminal_entries.keys() {
non_terminal_usages[symbol.index] = true;
}
}
for (i, value) in external_usages.into_iter().enumerate() {
if value {
parse_table.symbols.push(Symbol::external(i));
}
}
parse_table.symbols.push(Symbol::end());
for (i, value) in terminal_usages.into_iter().enumerate() {
if value {
parse_table.symbols.push(Symbol::terminal(i));
}
}
for (i, value) in non_terminal_usages.into_iter().enumerate() {
if value {
parse_table.symbols.push(Symbol::non_terminal(i));
}
}
}
fn identify_keywords(
lexical_grammar: &LexicalGrammar,
parse_table: &ParseTable,

View file

@ -3,6 +3,7 @@ use self::parse_grammar::parse_grammar;
use self::prepare_grammar::prepare_grammar;
use self::render::render_c_code;
use crate::error::Result;
use regex::{Regex, RegexBuilder};
use std::fs;
use std::io::Write;
use std::path::PathBuf;
@ -18,7 +19,14 @@ mod render;
mod rules;
mod tables;
pub fn generate_parser_for_grammar(
lazy_static! {
static ref JSON_COMMENT_REGEX: Regex = RegexBuilder::new("^\\s*//.*")
.multi_line(true)
.build()
.unwrap();
}
pub fn generate_parser_in_directory(
repo_path: &PathBuf,
minimize: bool,
state_ids_to_log: Vec<usize>,
@ -26,33 +34,48 @@ pub fn generate_parser_for_grammar(
) -> Result<()> {
if !properties_only {
let grammar_json = load_js_grammar_file(&repo_path.join("grammar.js"));
let input_grammar = parse_grammar(&grammar_json)?;
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
prepare_grammar(&input_grammar)?;
let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
&syntax_grammar,
&lexical_grammar,
&simple_aliases,
&inlines,
minimize,
state_ids_to_log,
)?;
let c_code = render_c_code(
&input_grammar.name,
parse_table,
main_lex_table,
keyword_lex_table,
keyword_capture_token,
syntax_grammar,
lexical_grammar,
simple_aliases,
);
let c_code =
generate_parser_for_grammar_with_opts(&grammar_json, minimize, state_ids_to_log)?;
fs::write(repo_path.join("src").join("parser.c"), c_code)?;
}
properties::generate_property_sheets(repo_path)?;
Ok(())
}
#[cfg(test)]
pub fn generate_parser_for_grammar(grammar_json: &String) -> Result<String> {
let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n");
generate_parser_for_grammar_with_opts(&grammar_json, true, Vec::new())
}
fn generate_parser_for_grammar_with_opts(
grammar_json: &str,
minimize: bool,
state_ids_to_log: Vec<usize>,
) -> Result<String> {
let input_grammar = parse_grammar(grammar_json)?;
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
prepare_grammar(&input_grammar)?;
let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
&syntax_grammar,
&lexical_grammar,
&simple_aliases,
&inlines,
minimize,
state_ids_to_log,
)?;
Ok(render_c_code(
&input_grammar.name,
parse_table,
main_lex_table,
keyword_lex_table,
keyword_capture_token,
syntax_grammar,
lexical_grammar,
simple_aliases,
))
}
fn load_js_grammar_file(grammar_path: &PathBuf) -> String {
let mut node_process = Command::new("node")
.stdin(Stdio::piped())