Start work on running test corpus tests
This commit is contained in:
parent
0d85a1ef53
commit
e64f7a64a1
15 changed files with 328 additions and 168 deletions
|
|
@ -1,7 +1,9 @@
|
|||
use super::item::{ParseItem, ParseItemSet, TokenSet};
|
||||
use super::item_set_builder::ParseItemSetBuilder;
|
||||
use crate::error::{Error, Result};
|
||||
use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType};
|
||||
use crate::generate::grammars::{
|
||||
InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType,
|
||||
};
|
||||
use crate::generate::rules::{Alias, Associativity, Symbol, SymbolType};
|
||||
use crate::generate::tables::{
|
||||
AliasSequenceId, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
|
||||
|
|
@ -11,6 +13,7 @@ use hashbrown::hash_map::Entry;
|
|||
use hashbrown::{HashMap, HashSet};
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::collections::VecDeque;
|
||||
use std::u32;
|
||||
|
||||
use std::fmt::Write;
|
||||
use std::hash::Hasher;
|
||||
|
|
@ -94,7 +97,6 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
)?;
|
||||
}
|
||||
|
||||
self.populate_used_symbols();
|
||||
self.remove_precedences();
|
||||
|
||||
Ok((self.parse_table, self.following_tokens))
|
||||
|
|
@ -313,7 +315,10 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
.first_set(&step.symbol)
|
||||
.contains(&conflicting_lookahead)
|
||||
{
|
||||
conflicting_items.insert(item);
|
||||
if item.variable_index != u32::MAX {
|
||||
conflicting_items.insert(item);
|
||||
}
|
||||
|
||||
let precedence = item.precedence();
|
||||
if let Some(range) = &mut shift_precedence {
|
||||
if precedence < range.start {
|
||||
|
|
@ -327,7 +332,9 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
}
|
||||
}
|
||||
} else if lookaheads.contains(&conflicting_lookahead) {
|
||||
conflicting_items.insert(item);
|
||||
if item.variable_index != u32::MAX {
|
||||
conflicting_items.insert(item);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -610,40 +617,6 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
fn populate_used_symbols(&mut self) {
|
||||
let mut terminal_usages = vec![false; self.lexical_grammar.variables.len()];
|
||||
let mut non_terminal_usages = vec![false; self.syntax_grammar.variables.len()];
|
||||
let mut external_usages = vec![false; self.syntax_grammar.external_tokens.len()];
|
||||
for state in &self.parse_table.states {
|
||||
for symbol in state.terminal_entries.keys() {
|
||||
match symbol.kind {
|
||||
SymbolType::Terminal => terminal_usages[symbol.index] = true,
|
||||
SymbolType::External => external_usages[symbol.index] = true,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
for symbol in state.nonterminal_entries.keys() {
|
||||
non_terminal_usages[symbol.index] = true;
|
||||
}
|
||||
}
|
||||
for (i, value) in external_usages.into_iter().enumerate() {
|
||||
if value {
|
||||
self.parse_table.symbols.push(Symbol::external(i));
|
||||
}
|
||||
}
|
||||
self.parse_table.symbols.push(Symbol::end());
|
||||
for (i, value) in terminal_usages.into_iter().enumerate() {
|
||||
if value {
|
||||
self.parse_table.symbols.push(Symbol::terminal(i));
|
||||
}
|
||||
}
|
||||
for (i, value) in non_terminal_usages.into_iter().enumerate() {
|
||||
if value {
|
||||
self.parse_table.symbols.push(Symbol::non_terminal(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn remove_precedences(&mut self) {
|
||||
for state in self.parse_table.states.iter_mut() {
|
||||
for (_, entry) in state.terminal_entries.iter_mut() {
|
||||
|
|
@ -702,7 +675,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
if variable.kind == VariableType::Named {
|
||||
variable.name.clone()
|
||||
} else {
|
||||
format!("\"{}\"", &variable.name)
|
||||
format!("'{}'", &variable.name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ use self::token_conflicts::TokenConflictMap;
|
|||
use crate::error::Result;
|
||||
use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
|
||||
use crate::generate::nfa::{CharacterSet, NfaCursor};
|
||||
use crate::generate::rules::{AliasMap, Symbol};
|
||||
use crate::generate::rules::{AliasMap, Symbol, SymbolType};
|
||||
use crate::generate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry};
|
||||
|
||||
pub(crate) fn build_tables(
|
||||
|
|
@ -45,6 +45,7 @@ pub(crate) fn build_tables(
|
|||
&token_conflict_map,
|
||||
&keywords,
|
||||
);
|
||||
populate_used_symbols(&mut parse_table, syntax_grammar, lexical_grammar);
|
||||
mark_fragile_tokens(&mut parse_table, lexical_grammar, &token_conflict_map);
|
||||
if minimize {
|
||||
minimize_parse_table(
|
||||
|
|
@ -151,6 +152,44 @@ fn populate_error_state(
|
|||
state.terminal_entries.insert(Symbol::end(), recover_entry);
|
||||
}
|
||||
|
||||
fn populate_used_symbols(
|
||||
parse_table: &mut ParseTable,
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
) {
|
||||
let mut terminal_usages = vec![false; lexical_grammar.variables.len()];
|
||||
let mut non_terminal_usages = vec![false; syntax_grammar.variables.len()];
|
||||
let mut external_usages = vec![false; syntax_grammar.external_tokens.len()];
|
||||
for state in &parse_table.states {
|
||||
for symbol in state.terminal_entries.keys() {
|
||||
match symbol.kind {
|
||||
SymbolType::Terminal => terminal_usages[symbol.index] = true,
|
||||
SymbolType::External => external_usages[symbol.index] = true,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
for symbol in state.nonterminal_entries.keys() {
|
||||
non_terminal_usages[symbol.index] = true;
|
||||
}
|
||||
}
|
||||
for (i, value) in external_usages.into_iter().enumerate() {
|
||||
if value {
|
||||
parse_table.symbols.push(Symbol::external(i));
|
||||
}
|
||||
}
|
||||
parse_table.symbols.push(Symbol::end());
|
||||
for (i, value) in terminal_usages.into_iter().enumerate() {
|
||||
if value {
|
||||
parse_table.symbols.push(Symbol::terminal(i));
|
||||
}
|
||||
}
|
||||
for (i, value) in non_terminal_usages.into_iter().enumerate() {
|
||||
if value {
|
||||
parse_table.symbols.push(Symbol::non_terminal(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn identify_keywords(
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
parse_table: &ParseTable,
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ use self::parse_grammar::parse_grammar;
|
|||
use self::prepare_grammar::prepare_grammar;
|
||||
use self::render::render_c_code;
|
||||
use crate::error::Result;
|
||||
use regex::{Regex, RegexBuilder};
|
||||
use std::fs;
|
||||
use std::io::Write;
|
||||
use std::path::PathBuf;
|
||||
|
|
@ -18,7 +19,14 @@ mod render;
|
|||
mod rules;
|
||||
mod tables;
|
||||
|
||||
pub fn generate_parser_for_grammar(
|
||||
lazy_static! {
|
||||
static ref JSON_COMMENT_REGEX: Regex = RegexBuilder::new("^\\s*//.*")
|
||||
.multi_line(true)
|
||||
.build()
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
pub fn generate_parser_in_directory(
|
||||
repo_path: &PathBuf,
|
||||
minimize: bool,
|
||||
state_ids_to_log: Vec<usize>,
|
||||
|
|
@ -26,33 +34,48 @@ pub fn generate_parser_for_grammar(
|
|||
) -> Result<()> {
|
||||
if !properties_only {
|
||||
let grammar_json = load_js_grammar_file(&repo_path.join("grammar.js"));
|
||||
let input_grammar = parse_grammar(&grammar_json)?;
|
||||
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
|
||||
prepare_grammar(&input_grammar)?;
|
||||
let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
|
||||
&syntax_grammar,
|
||||
&lexical_grammar,
|
||||
&simple_aliases,
|
||||
&inlines,
|
||||
minimize,
|
||||
state_ids_to_log,
|
||||
)?;
|
||||
let c_code = render_c_code(
|
||||
&input_grammar.name,
|
||||
parse_table,
|
||||
main_lex_table,
|
||||
keyword_lex_table,
|
||||
keyword_capture_token,
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
simple_aliases,
|
||||
);
|
||||
let c_code =
|
||||
generate_parser_for_grammar_with_opts(&grammar_json, minimize, state_ids_to_log)?;
|
||||
fs::write(repo_path.join("src").join("parser.c"), c_code)?;
|
||||
}
|
||||
properties::generate_property_sheets(repo_path)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn generate_parser_for_grammar(grammar_json: &String) -> Result<String> {
|
||||
let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n");
|
||||
generate_parser_for_grammar_with_opts(&grammar_json, true, Vec::new())
|
||||
}
|
||||
|
||||
fn generate_parser_for_grammar_with_opts(
|
||||
grammar_json: &str,
|
||||
minimize: bool,
|
||||
state_ids_to_log: Vec<usize>,
|
||||
) -> Result<String> {
|
||||
let input_grammar = parse_grammar(grammar_json)?;
|
||||
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
|
||||
prepare_grammar(&input_grammar)?;
|
||||
let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
|
||||
&syntax_grammar,
|
||||
&lexical_grammar,
|
||||
&simple_aliases,
|
||||
&inlines,
|
||||
minimize,
|
||||
state_ids_to_log,
|
||||
)?;
|
||||
Ok(render_c_code(
|
||||
&input_grammar.name,
|
||||
parse_table,
|
||||
main_lex_table,
|
||||
keyword_lex_table,
|
||||
keyword_capture_token,
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
simple_aliases,
|
||||
))
|
||||
}
|
||||
|
||||
fn load_js_grammar_file(grammar_path: &PathBuf) -> String {
|
||||
let mut node_process = Command::new("node")
|
||||
.stdin(Stdio::piped())
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue