Move parser generation code in to 'generate' module within CLI crate

This commit is contained in:
Max Brunsfeld 2019-01-07 10:23:01 -08:00
parent 4e29fe69df
commit f059557a9d
28 changed files with 187 additions and 171 deletions

View file

@ -1,34 +0,0 @@
use crate::build_tables::build_tables;
use crate::error::Result;
use crate::parse_grammar::parse_grammar;
use crate::prepare_grammar::prepare_grammar;
use crate::render::render_c_code;
pub fn generate_parser_for_grammar(
input: &str,
minimize: bool,
state_ids_to_log: Vec<usize>,
) -> Result<String> {
let input_grammar = parse_grammar(input)?;
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
prepare_grammar(&input_grammar)?;
let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
&syntax_grammar,
&lexical_grammar,
&simple_aliases,
&inlines,
minimize,
state_ids_to_log,
)?;
let c_code = render_c_code(
&input_grammar.name,
parse_table,
main_lex_table,
keyword_lex_table,
keyword_capture_token,
syntax_grammar,
lexical_grammar,
simple_aliases,
);
Ok(c_code)
}

View file

@ -1,10 +1,10 @@
use super::coincident_tokens::CoincidentTokenIndex;
use super::item::TokenSet;
use super::token_conflicts::TokenConflictMap;
use crate::grammars::{LexicalGrammar, SyntaxGrammar};
use crate::nfa::{CharacterSet, NfaCursor, NfaTransition};
use crate::rules::Symbol;
use crate::tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable};
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
use crate::generate::nfa::{CharacterSet, NfaCursor, NfaTransition};
use crate::generate::rules::Symbol;
use crate::generate::tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable};
use std::collections::hash_map::Entry;
use std::collections::{BTreeMap, HashMap, VecDeque};

View file

@ -1,9 +1,9 @@
use super::item::{ParseItem, ParseItemSet, TokenSet};
use super::item_set_builder::ParseItemSetBuilder;
use crate::error::{Error, Result};
use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType};
use crate::rules::{Alias, Associativity, Symbol, SymbolType};
use crate::tables::{
use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType};
use crate::generate::rules::{Alias, Associativity, Symbol, SymbolType};
use crate::generate::tables::{
AliasSequenceId, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
};
use core::ops::Range;

View file

@ -1,6 +1,6 @@
use crate::grammars::LexicalGrammar;
use crate::rules::Symbol;
use crate::tables::{ParseStateId, ParseTable};
use crate::generate::grammars::LexicalGrammar;
use crate::generate::rules::Symbol;
use crate::generate::tables::{ParseStateId, ParseTable};
use std::fmt;
pub(crate) struct CoincidentTokenIndex<'a> {

View file

@ -1,6 +1,6 @@
use crate::grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar};
use crate::rules::Associativity;
use crate::rules::{Symbol, SymbolType};
use crate::generate::grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar};
use crate::generate::rules::Associativity;
use crate::generate::rules::{Symbol, SymbolType};
use smallbitvec::SmallBitVec;
use std::cmp::Ordering;
use std::collections::BTreeMap;

View file

@ -1,6 +1,6 @@
use super::item::{ParseItem, ParseItemDisplay, ParseItemSet, TokenSet};
use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
use crate::rules::Symbol;
use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
use crate::generate::rules::Symbol;
use hashbrown::{HashMap, HashSet};
use std::fmt;

View file

@ -1,8 +1,8 @@
use super::item::TokenSet;
use super::token_conflicts::TokenConflictMap;
use crate::grammars::{SyntaxGrammar, VariableType};
use crate::rules::{AliasMap, Symbol};
use crate::tables::{ParseAction, ParseState, ParseTable, ParseTableEntry};
use crate::generate::grammars::{SyntaxGrammar, VariableType};
use crate::generate::rules::{AliasMap, Symbol};
use crate::generate::tables::{ParseAction, ParseState, ParseTable, ParseTableEntry};
use hashbrown::{HashMap, HashSet};
pub(crate) fn minimize_parse_table(

View file

@ -13,10 +13,10 @@ use self::item::TokenSet;
use self::minimize_parse_table::minimize_parse_table;
use self::token_conflicts::TokenConflictMap;
use crate::error::Result;
use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
use crate::nfa::{CharacterSet, NfaCursor};
use crate::rules::{AliasMap, Symbol};
use crate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry};
use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
use crate::generate::nfa::{CharacterSet, NfaCursor};
use crate::generate::rules::{AliasMap, Symbol};
use crate::generate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry};
pub(crate) fn build_tables(
syntax_grammar: &SyntaxGrammar,

View file

@ -1,6 +1,6 @@
use crate::build_tables::item::TokenSet;
use crate::grammars::LexicalGrammar;
use crate::nfa::{CharacterSet, NfaCursor, NfaTransition};
use crate::generate::build_tables::item::TokenSet;
use crate::generate::grammars::LexicalGrammar;
use crate::generate::nfa::{CharacterSet, NfaCursor, NfaTransition};
use hashbrown::HashSet;
use std::cmp::Ordering;
use std::fmt;
@ -288,9 +288,9 @@ fn variable_ids_for_states<'a>(
#[cfg(test)]
mod tests {
use super::*;
use crate::grammars::{Variable, VariableType};
use crate::prepare_grammar::{expand_tokens, ExtractedLexicalGrammar};
use crate::rules::{Rule, Symbol};
use crate::generate::grammars::{Variable, VariableType};
use crate::generate::prepare_grammar::{expand_tokens, ExtractedLexicalGrammar};
use crate::generate::rules::{Rule, Symbol};
#[test]
fn test_starting_characters() {

View file

@ -1,5 +1,5 @@
use crate::nfa::Nfa;
use crate::rules::{Alias, Associativity, Rule, Symbol};
use super::nfa::Nfa;
use super::rules::{Alias, Associativity, Rule, Symbol};
use hashbrown::HashMap;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]

79
cli/src/generate/mod.rs Normal file
View file

@ -0,0 +1,79 @@
use self::build_tables::build_tables;
use self::parse_grammar::parse_grammar;
use self::prepare_grammar::prepare_grammar;
use self::render::render_c_code;
use crate::error::Result;
use std::io::Write;
use std::path::PathBuf;
use std::process::{Command, Stdio};
mod build_tables;
mod grammars;
mod nfa;
mod parse_grammar;
mod prepare_grammar;
mod render;
mod rules;
mod tables;
pub fn generate_parser_for_grammar(
grammar_path: &PathBuf,
minimize: bool,
state_ids_to_log: Vec<usize>,
) -> Result<String> {
let grammar_json = load_js_grammar_file(grammar_path);
let input_grammar = parse_grammar(&grammar_json)?;
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
prepare_grammar(&input_grammar)?;
let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
&syntax_grammar,
&lexical_grammar,
&simple_aliases,
&inlines,
minimize,
state_ids_to_log,
)?;
let c_code = render_c_code(
&input_grammar.name,
parse_table,
main_lex_table,
keyword_lex_table,
keyword_capture_token,
syntax_grammar,
lexical_grammar,
simple_aliases,
);
Ok(c_code)
}
fn load_js_grammar_file(grammar_path: &PathBuf) -> String {
let mut node_process = Command::new("node")
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.expect("Failed to run `node`");
let js_prelude = include_str!("./dsl.js");
let mut node_stdin = node_process
.stdin
.take()
.expect("Failed to open stdin for node");
write!(
node_stdin,
"{}\nconsole.log(JSON.stringify(require(\"{}\"), null, 2));\n",
js_prelude,
grammar_path.to_str().unwrap()
)
.expect("Failed to write to node's stdin");
drop(node_stdin);
let output = node_process
.wait_with_output()
.expect("Failed to read output from node");
match output.status.code() {
None => panic!("Node process was killed"),
Some(0) => {}
Some(code) => panic!(format!("Node process exited with status {}", code)),
}
String::from_utf8(output.stdout).expect("Got invalid UTF8 from node")
}

View file

@ -1,7 +1,7 @@
use serde_json::{Map, Value};
use super::grammars::{InputGrammar, Variable, VariableType};
use super::rules::Rule;
use crate::error::Result;
use crate::grammars::{InputGrammar, Variable, VariableType};
use crate::rules::Rule;
use serde_json::{Map, Value};
#[derive(Deserialize)]
#[serde(tag = "type")]
@ -81,20 +81,20 @@ pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
})
}
let extra_tokens = grammar_json.extras
let extra_tokens = grammar_json
.extras
.unwrap_or(Vec::new())
.into_iter()
.map(parse_rule)
.collect();
let external_tokens = grammar_json.externals
let external_tokens = grammar_json
.externals
.unwrap_or(Vec::new())
.into_iter()
.map(parse_rule)
.collect();
let expected_conflicts = grammar_json.conflicts
.unwrap_or(Vec::new());
let variables_to_inline = grammar_json.inline
.unwrap_or(Vec::new());
let expected_conflicts = grammar_json.conflicts.unwrap_or(Vec::new());
let variables_to_inline = grammar_json.inline.unwrap_or(Vec::new());
Ok(InputGrammar {
name: grammar_json.name,
@ -109,7 +109,11 @@ pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
fn parse_rule(json: RuleJSON) -> Rule {
match json {
RuleJSON::ALIAS { content, value, named } => Rule::alias(parse_rule(*content), value, named),
RuleJSON::ALIAS {
content,
value,
named,
} => Rule::alias(parse_rule(*content), value, named),
RuleJSON::BLANK => Rule::Blank,
RuleJSON::STRING { value } => Rule::String(value),
RuleJSON::PATTERN { value } => Rule::Pattern(value),
@ -117,11 +121,15 @@ fn parse_rule(json: RuleJSON) -> Rule {
RuleJSON::CHOICE { members } => Rule::choice(members.into_iter().map(parse_rule).collect()),
RuleJSON::SEQ { members } => Rule::seq(members.into_iter().map(parse_rule).collect()),
RuleJSON::REPEAT1 { content } => Rule::repeat(parse_rule(*content)),
RuleJSON::REPEAT { content } => Rule::choice(vec![Rule::repeat(parse_rule(*content)), Rule::Blank]),
RuleJSON::REPEAT { content } => {
Rule::choice(vec![Rule::repeat(parse_rule(*content)), Rule::Blank])
}
RuleJSON::PREC { value, content } => Rule::prec(value, parse_rule(*content)),
RuleJSON::PREC_LEFT { value, content } => Rule::prec_left(value, parse_rule(*content)),
RuleJSON::PREC_RIGHT { value, content } => Rule::prec_right(value, parse_rule(*content)),
RuleJSON::PREC_DYNAMIC { value, content } => Rule::prec_dynamic(value, parse_rule(*content)),
RuleJSON::PREC_DYNAMIC { value, content } => {
Rule::prec_dynamic(value, parse_rule(*content))
}
RuleJSON::TOKEN { content } => Rule::token(parse_rule(*content)),
RuleJSON::IMMEDIATE_TOKEN { content } => Rule::immediate_token(parse_rule(*content)),
}
@ -133,7 +141,8 @@ mod tests {
#[test]
fn test_parse_grammar() {
let grammar = parse_grammar(r#"{
let grammar = parse_grammar(
r#"{
"name": "my_lang",
"rules": {
"file": {
@ -148,20 +157,25 @@ mod tests {
"value": "foo"
}
}
}"#).unwrap();
}"#,
)
.unwrap();
assert_eq!(grammar.name, "my_lang");
assert_eq!(grammar.variables, vec![
Variable {
name: "file".to_string(),
kind: VariableType::Named,
rule: Rule::repeat(Rule::NamedSymbol("statement".to_string()))
},
Variable {
name: "statement".to_string(),
kind: VariableType::Named,
rule: Rule::String("foo".to_string())
},
]);
assert_eq!(
grammar.variables,
vec![
Variable {
name: "file".to_string(),
kind: VariableType::Named,
rule: Rule::repeat(Rule::NamedSymbol("statement".to_string()))
},
Variable {
name: "statement".to_string(),
kind: VariableType::Named,
rule: Rule::String("foo".to_string())
},
]
);
}
}

View file

@ -1,6 +1,6 @@
use super::ExtractedSyntaxGrammar;
use crate::grammars::{Variable, VariableType};
use crate::rules::{Rule, Symbol};
use crate::generate::grammars::{Variable, VariableType};
use crate::generate::rules::{Rule, Symbol};
use hashbrown::HashMap;
use std::mem;

View file

@ -1,8 +1,8 @@
use super::ExtractedLexicalGrammar;
use crate::error::{Error, Result};
use crate::grammars::{LexicalGrammar, LexicalVariable};
use crate::nfa::{CharacterSet, Nfa, NfaState};
use crate::rules::Rule;
use crate::generate::grammars::{LexicalGrammar, LexicalVariable};
use crate::generate::nfa::{CharacterSet, Nfa, NfaState};
use crate::generate::rules::Rule;
use regex_syntax::ast::{
parse, Ast, Class, ClassPerlKind, ClassSet, ClassSetItem, RepetitionKind, RepetitionRange,
};
@ -366,8 +366,8 @@ impl NfaBuilder {
#[cfg(test)]
mod tests {
use super::*;
use crate::grammars::Variable;
use crate::nfa::{NfaCursor, NfaTransition};
use crate::generate::grammars::Variable;
use crate::generate::nfa::{NfaCursor, NfaTransition};
fn simulate_nfa<'a>(grammar: &'a LexicalGrammar, s: &'a str) -> Option<(usize, &'a str)> {
let start_states = grammar.variables.iter().map(|v| v.start_state).collect();

View file

@ -1,5 +1,5 @@
use crate::rules::{Alias, AliasMap, Symbol, SymbolType};
use crate::grammars::{LexicalGrammar, SyntaxGrammar};
use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType};
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
#[derive(Clone, Default)]
struct SymbolStatus {
@ -83,8 +83,8 @@ pub(super) fn extract_simple_aliases(
#[cfg(test)]
mod tests {
use super::*;
use crate::grammars::{LexicalVariable, SyntaxVariable, VariableType, Production, ProductionStep};
use crate::nfa::Nfa;
use crate::generate::grammars::{LexicalVariable, SyntaxVariable, VariableType, Production, ProductionStep};
use crate::generate::nfa::Nfa;
#[test]
fn test_extract_simple_aliases() {

View file

@ -1,7 +1,7 @@
use super::{ExtractedLexicalGrammar, ExtractedSyntaxGrammar, InternedGrammar};
use crate::error::{Error, Result};
use crate::grammars::{ExternalToken, Variable, VariableType};
use crate::rules::{MetadataParams, Rule, Symbol, SymbolType};
use crate::generate::grammars::{ExternalToken, Variable, VariableType};
use crate::generate::rules::{MetadataParams, Rule, Symbol, SymbolType};
use hashbrown::HashMap;
use std::mem;
@ -311,7 +311,7 @@ impl SymbolReplacer {
#[cfg(test)]
mod test {
use super::*;
use crate::grammars::VariableType;
use crate::generate::grammars::VariableType;
#[test]
fn test_extraction() {

View file

@ -1,7 +1,7 @@
use super::ExtractedSyntaxGrammar;
use crate::error::Result;
use crate::grammars::{Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable};
use crate::rules::{Alias, Associativity, Rule};
use crate::generate::grammars::{Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable};
use crate::generate::rules::{Alias, Associativity, Rule};
struct RuleFlattener {
production: Production,
@ -163,8 +163,8 @@ pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result<SyntaxG
#[cfg(test)]
mod tests {
use super::*;
use crate::grammars::VariableType;
use crate::rules::Symbol;
use crate::generate::grammars::VariableType;
use crate::generate::rules::Symbol;
#[test]
fn test_flatten_grammar() {

View file

@ -1,7 +1,7 @@
use super::InternedGrammar;
use crate::error::{Error, Result};
use crate::grammars::{InputGrammar, Variable, VariableType};
use crate::rules::{Rule, Symbol};
use crate::generate::grammars::{InputGrammar, Variable, VariableType};
use crate::generate::rules::{Rule, Symbol};
pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar> {
let interner = Interner { grammar };

View file

@ -14,10 +14,10 @@ use self::flatten_grammar::flatten_grammar;
use self::intern_symbols::intern_symbols;
use self::process_inlines::process_inlines;
use crate::error::Result;
use crate::grammars::{
use crate::generate::grammars::{
ExternalToken, InlinedProductionMap, InputGrammar, LexicalGrammar, SyntaxGrammar, Variable,
};
use crate::rules::{AliasMap, Rule, Symbol};
use crate::generate::rules::{AliasMap, Rule, Symbol};
pub(crate) struct IntermediateGrammar<T, U> {
variables: Vec<Variable>,

View file

@ -1,4 +1,4 @@
use crate::grammars::{InlinedProductionMap, Production, ProductionStep, SyntaxGrammar};
use crate::generate::grammars::{InlinedProductionMap, Production, ProductionStep, SyntaxGrammar};
use hashbrown::HashMap;
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
@ -184,8 +184,8 @@ pub(super) fn process_inlines(grammar: &SyntaxGrammar) -> InlinedProductionMap {
#[cfg(test)]
mod tests {
use super::*;
use crate::grammars::{ProductionStep, SyntaxVariable, VariableType};
use crate::rules::{Associativity, Symbol};
use crate::generate::grammars::{ProductionStep, SyntaxVariable, VariableType};
use crate::generate::rules::{Associativity, Symbol};
#[test]
fn test_basic_inlining() {

View file

@ -1,7 +1,7 @@
use crate::grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType};
use crate::nfa::CharacterSet;
use crate::rules::{Alias, AliasMap, Symbol, SymbolType};
use crate::tables::{AdvanceAction, LexState, LexTable, ParseAction, ParseTable, ParseTableEntry};
use super::grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType};
use super::nfa::CharacterSet;
use super::rules::{Alias, AliasMap, Symbol, SymbolType};
use super::tables::{AdvanceAction, LexState, LexTable, ParseAction, ParseTable, ParseTableEntry};
use core::ops::Range;
use hashbrown::{HashMap, HashSet};
use std::fmt::Write;

View file

@ -1,5 +1,5 @@
use crate::nfa::CharacterSet;
use crate::rules::{Alias, Associativity, Symbol};
use super::nfa::CharacterSet;
use super::rules::{Alias, Associativity, Symbol};
use hashbrown::HashMap;
pub(crate) type AliasSequenceId = usize;

View file

@ -7,24 +7,14 @@ extern crate serde_derive;
extern crate hashbrown;
extern crate serde_json;
use clap::{App, Arg, SubCommand};
use std::env;
use std::io::Write;
use std::path::PathBuf;
use std::process::{exit, Command, Stdio};
use std::usize;
mod build_tables;
mod error;
mod generate;
mod grammars;
mod logger;
mod nfa;
mod parse_grammar;
mod prepare_grammar;
mod render;
mod rules;
mod tables;
use clap::{App, Arg, SubCommand};
use std::env;
use std::process::exit;
use std::usize;
fn main() {
if let Err(e) = run() {
@ -77,43 +67,10 @@ fn run() -> error::Result<()> {
});
let mut grammar_path = env::current_dir().expect("Failed to read CWD");
grammar_path.push("grammar.js");
let grammar_json = load_js_grammar_file(grammar_path);
let code =
generate::generate_parser_for_grammar(&grammar_json, minimize, state_ids_to_log)?;
generate::generate_parser_for_grammar(&grammar_path, minimize, state_ids_to_log)?;
println!("{}", code);
}
Ok(())
}
fn load_js_grammar_file(grammar_path: PathBuf) -> String {
let mut node_process = Command::new("node")
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.expect("Failed to run `node`");
let js_prelude = include_str!("./js/dsl.js");
let mut node_stdin = node_process
.stdin
.take()
.expect("Failed to open stdin for node");
write!(
node_stdin,
"{}\nconsole.log(JSON.stringify(require(\"{}\"), null, 2));\n",
js_prelude,
grammar_path.to_str().unwrap()
)
.expect("Failed to write to node's stdin");
drop(node_stdin);
let output = node_process
.wait_with_output()
.expect("Failed to read output from node");
match output.status.code() {
None => panic!("Node process was killed"),
Some(0) => {}
Some(code) => panic!(format!("Node process exited with status {}", code)),
}
String::from_utf8(output.stdout).expect("Got invalid UTF8 from node")
}

View file

@ -1,6 +1,6 @@
#!/usr/bin/env bash
src_dir="src/runtime"
src_dir="lib/src"
allocation_functions=(
malloc