Initial commit

This commit is contained in:
Max Brunsfeld 2018-12-05 12:50:12 -08:00
commit a4c4b85a16
20 changed files with 2036 additions and 0 deletions

22
src/build_tables/item.rs Normal file
View file

@ -0,0 +1,22 @@
use crate::grammars::Production;
use std::collections::HashMap;
use bitvec::BitVec;
#[derive(Debug, PartialEq, Eq)]
pub(super) struct LookaheadSet {
terminal_bits: BitVec,
external_bits: BitVec,
eof: bool,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub(super) struct ParseItem {
variable_index: u32,
production_index: u32,
step_index: u32,
}
#[derive(Debug, PartialEq, Eq)]
pub(super) struct ParseItemSet {
entries: HashMap<ParseItem, LookaheadSet>
}

34
src/build_tables/mod.rs Normal file
View file

@ -0,0 +1,34 @@
mod item;
use std::collections::{HashMap, VecDeque};
use crate::grammars::{SyntaxGrammar, LexicalGrammar};
use crate::tables::{ParseTable, LexTable, ParseStateId};
use crate::rules::{AliasMap, Symbol};
use crate::error::Result;
use self::item::ParseItemSet;
type SymbolSequence = Vec<Symbol>;
struct ParseStateQueueEntry {
preceding_symbols: SymbolSequence,
item_set: ParseItemSet,
state_id: ParseStateId,
}
struct ParseTableBuilder<'a> {
syntax_grammar: &'a SyntaxGrammar,
lexical_grammar: &'a LexicalGrammar,
simple_aliases: &'a AliasMap,
state_ids_by_item_set: HashMap<ParseItemSet, ParseStateId>,
item_sets_by_state_id: Vec<&'a ParseItemSet>,
parse_state_queue: VecDeque<ParseStateQueueEntry>,
parse_table: ParseTable,
}
pub fn build_tables(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
simple_aliases: &AliasMap
) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
unimplemented!();
}

13
src/error.rs Normal file
View file

@ -0,0 +1,13 @@
#[derive(Debug)]
pub enum Error {
GrammarError(String),
SymbolError(String),
}
pub type Result<T> = std::result::Result<T, Error>;
impl From<serde_json::Error> for Error {
fn from(error: serde_json::Error) -> Self {
Error::GrammarError(error.to_string())
}
}

26
src/generate.rs Normal file
View file

@ -0,0 +1,26 @@
use crate::error::Result;
use crate::parse_grammar::parse_grammar;
use crate::prepare_grammar::prepare_grammar;
use crate::build_tables::build_tables;
use crate::render::render_c_code;
pub fn generate_parser_for_grammar(input: String) -> Result<String> {
let input_grammar = parse_grammar(&input)?;
let (syntax_grammar, lexical_grammar, simple_aliases) = prepare_grammar(&input_grammar)?;
let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
&syntax_grammar,
&lexical_grammar,
&simple_aliases
)?;
let c_code = render_c_code(
&input_grammar.name,
parse_table,
main_lex_table,
keyword_lex_table,
keyword_capture_token,
syntax_grammar,
lexical_grammar,
simple_aliases
);
Ok(c_code)
}

98
src/grammars.rs Normal file
View file

@ -0,0 +1,98 @@
use crate::rules::{Associativity, Alias, Rule, Symbol};
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum VariableType {
Hidden,
Auxiliary,
Anonymous,
Named
}
// Input grammar
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct InputVariable {
pub name: String,
pub kind: VariableType,
pub rule: Rule,
}
#[derive(PartialEq, Eq)]
pub struct InputGrammar {
pub name: String,
pub variables: Vec<InputVariable>,
pub extra_tokens: Vec<Rule>,
pub expected_conflicts: Vec<Vec<String>>,
pub external_tokens: Vec<Rule>,
pub variables_to_inline: Vec<String>,
pub word_token: Option<String>,
}
// Extracted lexical grammar
#[derive(PartialEq, Eq)]
pub struct LexicalVariable {
name: String,
kind: VariableType,
rule: Rule,
is_string: bool,
}
pub struct LexicalGrammar {
variables: Vec<LexicalVariable>,
separators: Vec<Rule>,
}
// Extracted syntax grammar
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ProductionStep {
symbol: Symbol,
precedence: i32,
associativity: Option<Associativity>,
alias: Option<Alias>,
is_excluded: bool,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Production {
steps: Vec<ProductionStep>,
dynamic_precedence: i32,
}
#[derive(Clone, PartialEq, Eq)]
pub struct SyntaxVariable {
name: String,
kind: VariableType,
}
#[derive(Clone, PartialEq, Eq)]
pub struct ExternalToken {
name: String,
kind: VariableType,
corresponding_internal_token: Symbol,
}
pub struct SyntaxGrammar {
variables: Vec<SyntaxVariable>,
extra_tokens: Vec<Symbol>,
expected_conflicts: Vec<Vec<Symbol>>,
external_tokens: Vec<ExternalToken>,
variables_to_inline: Vec<Symbol>,
word_token: Symbol,
}
#[cfg(test)]
impl InputVariable {
pub fn named(name: &str, rule: Rule) -> Self {
Self { name: name.to_string(), kind: VariableType::Named, rule }
}
pub fn auxiliary(name: &str, rule: Rule) -> Self {
Self { name: name.to_string(), kind: VariableType::Auxiliary, rule }
}
pub fn hidden(name: &str, rule: Rule) -> Self {
Self { name: name.to_string(), kind: VariableType::Hidden, rule }
}
}

35
src/main.rs Normal file
View file

@ -0,0 +1,35 @@
use clap::{App, Arg, SubCommand};
#[macro_use] extern crate serde_derive;
#[macro_use] extern crate serde_json;
mod build_tables;
mod error;
mod generate;
mod grammars;
mod parse_grammar;
mod prepare_grammar;
mod render;
mod rules;
mod tables;
fn main() {
let matches = App::new("tree-sitter")
.version("0.1")
.author("Max Brunsfeld <maxbrunsfeld@gmail.com>")
.about("Generates and tests parsers")
.subcommand(
SubCommand::with_name("generate")
.about("Generate a parser")
).subcommand(
SubCommand::with_name("parse")
.about("Parse a file")
.arg(Arg::with_name("path").index(1))
).subcommand(
SubCommand::with_name("test")
.about("Run a parser's tests")
.arg(Arg::with_name("path").index(1).required(true))
.arg(Arg::with_name("line").index(2).required(true))
.arg(Arg::with_name("column").index(3).required(true))
);
}

153
src/parse_grammar.rs Normal file
View file

@ -0,0 +1,153 @@
use serde_json::{Map, Value};
use crate::error::Result;
use crate::grammars::{InputGrammar, InputVariable, VariableType};
use crate::rules::Rule;
use std::collections::HashMap;
#[derive(Deserialize)]
#[serde(tag = "type")]
#[allow(non_camel_case_types)]
pub enum RuleJSON {
BLANK,
STRING {
value: String,
},
PATTERN {
value: String,
},
SYMBOL {
name: String,
},
CHOICE {
members: Vec<RuleJSON>,
},
SEQ {
members: Vec<RuleJSON>,
},
REPEAT {
content: Box<RuleJSON>,
},
PREC_LEFT {
value: i32,
content: Box<RuleJSON>,
},
PREC_RIGHT {
value: i32,
content: Box<RuleJSON>,
},
PREC {
value: i32,
content: Box<RuleJSON>,
},
TOKEN {
content: Box<RuleJSON>,
},
TOKEN_IMMEDIATE {
content: Box<RuleJSON>,
},
}
#[derive(Deserialize)]
struct GrammarJSON {
name: String,
rules: Map<String, Value>,
conflicts: Option<Vec<Vec<String>>>,
externals: Option<Vec<RuleJSON>>,
extras: Option<Vec<RuleJSON>>,
inline: Option<Vec<String>>,
word: Option<String>,
}
pub fn parse_grammar(input: &str) -> Result<InputGrammar> {
let grammar_json: GrammarJSON = serde_json::from_str(&input)?;
let mut variables = Vec::with_capacity(grammar_json.rules.len());
for (name, value) in grammar_json.rules {
variables.push(InputVariable {
name: name.to_owned(),
kind: VariableType::Named,
rule: parse_rule(serde_json::from_value(value)?),
})
}
let extra_tokens = grammar_json.extras
.unwrap_or(Vec::new())
.into_iter()
.map(parse_rule)
.collect();
let external_tokens = grammar_json.externals
.unwrap_or(Vec::new())
.into_iter()
.map(parse_rule)
.collect();
let expected_conflicts = grammar_json.conflicts
.unwrap_or(Vec::new());
let variables_to_inline = grammar_json.inline
.unwrap_or(Vec::new());
Ok(InputGrammar {
name: grammar_json.name,
word_token: grammar_json.word,
variables,
extra_tokens,
expected_conflicts,
external_tokens,
variables_to_inline,
})
}
fn parse_rule(json: RuleJSON) -> Rule {
match json {
RuleJSON::BLANK => Rule::Blank,
RuleJSON::STRING { value } => Rule::String(value),
RuleJSON::PATTERN { value } => Rule::Pattern(value),
RuleJSON::SYMBOL { name } => Rule::NamedSymbol(name),
RuleJSON::CHOICE { members } => Rule::choice(members.into_iter().map(parse_rule).collect()),
RuleJSON::SEQ { members } => Rule::seq(members.into_iter().map(parse_rule).collect()),
RuleJSON::REPEAT { content } => Rule::repeat(parse_rule(*content)),
RuleJSON::PREC { value, content } => Rule::prec(value, parse_rule(*content)),
RuleJSON::PREC_LEFT { value, content } => Rule::prec_left(value, parse_rule(*content)),
RuleJSON::PREC_RIGHT { value, content } => Rule::prec_right(value, parse_rule(*content)),
RuleJSON::TOKEN { content } => Rule::token(parse_rule(*content)),
RuleJSON::TOKEN_IMMEDIATE { content } => Rule::immediate_token(parse_rule(*content)),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_grammar() {
let grammar = parse_grammar(&json!({
"name": "my_lang",
"rules": {
"file": {
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "statement"
}
},
"statement": {
"type": "STRING",
"value": "foo"
}
}
}).to_string()).unwrap();
assert_eq!(grammar.name, "my_lang");
assert_eq!(grammar.variables, vec![
InputVariable {
name: "file".to_string(),
kind: VariableType::Named,
rule: Rule::repeat(Rule::NamedSymbol("statement".to_string()))
},
InputVariable {
name: "statement".to_string(),
kind: VariableType::Named,
rule: Rule::String("foo".to_string())
},
]);
}
}

View file

@ -0,0 +1,220 @@
use crate::rules::{Rule, Symbol};
use crate::grammars::{InputVariable, VariableType};
use std::collections::HashMap;
use std::mem;
use std::rc::Rc;
use super::ExtractedGrammar;
struct Expander {
variable_name: String,
repeat_count_in_variable: usize,
preceding_symbol_count: usize,
auxiliary_variables: Vec<InputVariable>,
existing_repeats: HashMap<Rule, Symbol>
}
impl Expander {
fn expand_variable(&mut self, variable: &mut InputVariable) {
self.variable_name.clear();
self.variable_name.push_str(&variable.name);
self.repeat_count_in_variable = 0;
let mut rule = Rule::Blank;
mem::swap(&mut rule, &mut variable.rule);
variable.rule = self.expand_rule(&rule);
}
fn expand_rule(&mut self, rule: &Rule) -> Rule {
match rule {
Rule::Choice { elements } =>
Rule::Choice {
elements: elements.iter().map(|element| self.expand_rule(element)).collect()
},
Rule::Seq { left, right } =>
Rule::Seq {
left: Rc::new(self.expand_rule(left)),
right: Rc::new(self.expand_rule(right)),
},
Rule::Repeat(content) => {
let inner_rule = self.expand_rule(content);
if let Some(existing_symbol) = self.existing_repeats.get(&inner_rule) {
return Rule::Symbol(*existing_symbol);
}
self.repeat_count_in_variable += 1;
let rule_name = format!("{}_repeat{}", self.variable_name, self.repeat_count_in_variable);
let repeat_symbol = Symbol::non_terminal(self.preceding_symbol_count + self.auxiliary_variables.len());
let rc_symbol = Rc::new(Rule::Symbol(repeat_symbol));
self.existing_repeats.insert(inner_rule.clone(), repeat_symbol);
self.auxiliary_variables.push(InputVariable {
name: rule_name,
kind: VariableType::Auxiliary,
rule: Rule::Choice {
elements: vec![
Rule::Seq {
left: rc_symbol.clone(),
right: rc_symbol
},
inner_rule
],
},
});
Rule::Symbol(repeat_symbol)
}
Rule::Metadata { rule, params } => Rule::Metadata {
rule: Rc::new(self.expand_rule(rule)),
params: params.clone()
},
_ => rule.clone()
}
}
}
pub(super) fn expand_repeats(mut grammar: ExtractedGrammar) -> ExtractedGrammar {
let mut expander = Expander {
variable_name: String::new(),
repeat_count_in_variable: 0,
preceding_symbol_count: grammar.variables.len(),
auxiliary_variables: Vec::new(),
existing_repeats: HashMap::new(),
};
for mut variable in grammar.variables.iter_mut() {
expander.expand_variable(&mut variable);
}
grammar.variables.extend(expander.auxiliary_variables.into_iter());
grammar
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_basic_repeat_expansion() {
// Repeats nested inside of sequences and choices are expanded.
let grammar = expand_repeats(build_grammar(vec![
InputVariable::named("rule0", Rule::seq(vec![
Rule::terminal(10),
Rule::choice(vec![
Rule::repeat(Rule::terminal(11)),
Rule::repeat(Rule::terminal(12)),
]),
Rule::terminal(13),
])),
]));
assert_eq!(grammar.variables, vec![
InputVariable::named("rule0", Rule::seq(vec![
Rule::terminal(10),
Rule::choice(vec![
Rule::non_terminal(1),
Rule::non_terminal(2),
]),
Rule::terminal(13),
])),
InputVariable::auxiliary("rule0_repeat1", Rule::choice(vec![
Rule::seq(vec![
Rule::non_terminal(1),
Rule::non_terminal(1),
]),
Rule::terminal(11),
])),
InputVariable::auxiliary("rule0_repeat2", Rule::choice(vec![
Rule::seq(vec![
Rule::non_terminal(2),
Rule::non_terminal(2),
]),
Rule::terminal(12),
])),
]);
}
#[test]
fn test_repeat_deduplication() {
// Terminal 4 appears inside of a repeat in three different places.
let grammar = expand_repeats(build_grammar(vec![
InputVariable::named("rule0", Rule::choice(vec![
Rule::seq(vec![ Rule::terminal(1), Rule::repeat(Rule::terminal(4)) ]),
Rule::seq(vec![ Rule::terminal(2), Rule::repeat(Rule::terminal(4)) ]),
])),
InputVariable::named("rule1", Rule::seq(vec![
Rule::terminal(3),
Rule::repeat(Rule::terminal(4)),
])),
]));
// Only one auxiliary rule is created for repeating terminal 4.
assert_eq!(grammar.variables, vec![
InputVariable::named("rule0", Rule::choice(vec![
Rule::seq(vec![ Rule::terminal(1), Rule::non_terminal(2) ]),
Rule::seq(vec![ Rule::terminal(2), Rule::non_terminal(2) ]),
])),
InputVariable::named("rule1", Rule::seq(vec![
Rule::terminal(3),
Rule::non_terminal(2),
])),
InputVariable::auxiliary("rule0_repeat1", Rule::choice(vec![
Rule::seq(vec![
Rule::non_terminal(2),
Rule::non_terminal(2),
]),
Rule::terminal(4),
]))
]);
}
#[test]
fn test_expansion_of_nested_repeats() {
let grammar = expand_repeats(build_grammar(vec![
InputVariable::named("rule0", Rule::seq(vec![
Rule::terminal(10),
Rule::repeat(Rule::seq(vec![
Rule::terminal(11),
Rule::repeat(Rule::terminal(12))
])),
])),
]));
assert_eq!(grammar.variables, vec![
InputVariable::named("rule0", Rule::seq(vec![
Rule::terminal(10),
Rule::non_terminal(2),
])),
InputVariable::auxiliary("rule0_repeat1", Rule::choice(vec![
Rule::seq(vec![
Rule::non_terminal(1),
Rule::non_terminal(1),
]),
Rule::terminal(12),
])),
InputVariable::auxiliary("rule0_repeat2", Rule::choice(vec![
Rule::seq(vec![
Rule::non_terminal(2),
Rule::non_terminal(2),
]),
Rule::seq(vec![
Rule::terminal(11),
Rule::non_terminal(1),
]),
])),
]);
}
fn build_grammar(variables: Vec<InputVariable>) -> ExtractedGrammar {
ExtractedGrammar {
variables,
extra_tokens: Vec::new(),
external_tokens: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
word_token: None,
}
}
}

View file

@ -0,0 +1,10 @@
use crate::rules::AliasMap;
use crate::grammars::{LexicalGrammar, SyntaxGrammar};
use super::ExtractedGrammar;
pub(super) fn extract_simple_aliases(
syntax_grammar: &mut SyntaxGrammar,
lexical_grammar: &mut LexicalGrammar
) -> AliasMap {
unimplemented!();
}

View file

@ -0,0 +1,7 @@
use crate::error::Result;
use crate::grammars::LexicalGrammar;
use super::{InternedGrammar, ExtractedGrammar};
pub(super) fn extract_tokens(grammar: InternedGrammar) -> Result<(ExtractedGrammar, LexicalGrammar)> {
unimplemented!();
}

View file

@ -0,0 +1,7 @@
use crate::error::Result;
use crate::grammars::SyntaxGrammar;
use super::ExtractedGrammar;
pub(super) fn flatten_grammar(grammar: ExtractedGrammar) -> Result<SyntaxGrammar> {
unimplemented!();
}

View file

@ -0,0 +1,237 @@
use crate::error::{Error, Result};
use crate::rules::{Rule, Symbol};
use crate::grammars::{InputGrammar, InputVariable, VariableType};
use std::rc::Rc;
use super::InternedGrammar;
pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar> {
let interner = Interner { grammar };
if variable_type_for_name(&grammar.variables[0].name) == VariableType::Hidden {
return Err(Error::GrammarError("Grammar's start rule must be visible".to_string()));
}
let mut variables = Vec::with_capacity(grammar.variables.len());
for variable in grammar.variables.iter() {
variables.push(InputVariable {
name: variable.name.clone(),
kind: variable_type_for_name(&variable.name),
rule: interner.intern_rule(&variable.rule)?,
});
}
let mut external_tokens = Vec::with_capacity(grammar.external_tokens.len());
for external_token in grammar.external_tokens.iter() {
let rule = interner.intern_rule(&external_token)?;
let (name, kind) = if let Rule::NamedSymbol(name) = external_token {
(name.clone(), variable_type_for_name(&name))
} else {
(String::new(), VariableType::Anonymous)
};
external_tokens.push(InputVariable { name, kind, rule });
}
let mut extra_tokens = Vec::with_capacity(grammar.extra_tokens.len());
for extra_token in grammar.extra_tokens.iter() {
extra_tokens.push(interner.intern_rule(extra_token)?);
}
let mut expected_conflicts = Vec::new();
for conflict in grammar.expected_conflicts.iter() {
let mut interned_conflict = Vec::with_capacity(conflict.len());
for name in conflict {
interned_conflict.push(interner
.intern_name(&name)
.ok_or_else(|| symbol_error(name))?
);
}
expected_conflicts.push(interned_conflict);
}
let mut variables_to_inline = Vec::new();
for name in grammar.variables_to_inline.iter() {
if let Some(symbol) = interner.intern_name(&name) {
variables_to_inline.push(symbol);
}
}
let mut word_token = None;
if let Some(name) = grammar.word_token.as_ref() {
word_token = Some(interner
.intern_name(&name)
.ok_or_else(|| symbol_error(&name))?
);
}
Ok(InternedGrammar {
variables,
external_tokens,
extra_tokens,
expected_conflicts,
variables_to_inline,
word_token,
})
}
struct Interner<'a> {
grammar: &'a InputGrammar
}
impl<'a> Interner<'a> {
fn intern_rule(&self, rule: &Rule) -> Result<Rule> {
match rule {
Rule::Choice { elements } => {
let mut result = Vec::with_capacity(elements.len());
for element in elements {
result.push(self.intern_rule(element)?);
}
Ok(Rule::Choice { elements: result })
},
Rule::Seq { left, right } =>
Ok(Rule::Seq {
left: Rc::new(self.intern_rule(left)?),
right: Rc::new(self.intern_rule(right)?),
}),
Rule::Repeat(content) =>
Ok(Rule::Repeat(Rc::new(self.intern_rule(content)?))),
Rule::Metadata { rule, params } =>
Ok(Rule::Metadata {
rule: Rc::new(self.intern_rule(rule)?),
params: params.clone()
}),
Rule::NamedSymbol(name) => {
if let Some(symbol) = self.intern_name(&name) {
Ok(Rule::Symbol(symbol))
} else {
Err(symbol_error(name))
}
},
_ => Ok(rule.clone())
}
}
fn intern_name(&self, symbol: &str) -> Option<Symbol> {
for (i, variable) in self.grammar.variables.iter().enumerate() {
if variable.name == symbol {
return Some(Symbol::non_terminal(i))
}
}
for (i, external_token) in self.grammar.external_tokens.iter().enumerate() {
if let Rule::NamedSymbol(name) = external_token {
if name == symbol {
return Some(Symbol::external(i))
}
}
}
return None
}
}
fn symbol_error(name: &str) -> Error {
Error::SymbolError(format!("Undefined symbol '{}'", name))
}
fn variable_type_for_name(name: &str) -> VariableType {
if name.starts_with("_") {
VariableType::Hidden
} else {
VariableType::Named
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_basic_repeat_expansion() {
let grammar = intern_symbols(&build_grammar(vec![
InputVariable::named("x", Rule::choice(vec![
Rule::named("y"),
Rule::named("_z"),
])),
InputVariable::named("y", Rule::named("_z")),
InputVariable::named("_z", Rule::string("a")),
])).unwrap();
assert_eq!(grammar.variables, vec![
InputVariable::named("x", Rule::choice(vec![
Rule::non_terminal(1),
Rule::non_terminal(2),
])),
InputVariable::named("y", Rule::non_terminal(2)),
InputVariable::hidden("_z", Rule::string("a")),
]);
}
#[test]
fn test_interning_external_token_names() {
// Variable `y` is both an internal and an external token.
// Variable `z` is just an external token.
let mut input_grammar = build_grammar(vec![
InputVariable::named("w", Rule::choice(vec![
Rule::named("x"),
Rule::named("y"),
Rule::named("z"),
])),
InputVariable::named("x", Rule::string("a")),
InputVariable::named("y", Rule::string("b")),
]);
input_grammar.external_tokens.extend(vec![
Rule::named("y"),
Rule::named("z"),
]);
let grammar = intern_symbols(&input_grammar).unwrap();
// Variable `y` is referred to by its internal index.
// Variable `z` is referred to by its external index.
assert_eq!(grammar.variables, vec![
InputVariable::named("w", Rule::choice(vec![
Rule::non_terminal(1),
Rule::non_terminal(2),
Rule::external(1),
])),
InputVariable::named("x", Rule::string("a")),
InputVariable::named("y", Rule::string("b")),
]);
// The external token for `y` refers back to its internal index.
assert_eq!(grammar.external_tokens, vec![
InputVariable::named("y", Rule::non_terminal(2)),
InputVariable::named("z", Rule::external(1)),
]);
}
#[test]
fn test_grammar_with_undefined_symbols() {
let result = intern_symbols(&build_grammar(vec![
InputVariable::named("x", Rule::named("y")),
]));
match result {
Err(Error::SymbolError(message)) => assert_eq!(message, "Undefined symbol 'y'"),
_ => panic!("Expected an error but got none"),
}
}
fn build_grammar(variables: Vec<InputVariable>) -> InputGrammar {
InputGrammar {
variables,
name: "the_language".to_string(),
extra_tokens: Vec::new(),
external_tokens: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
word_token: None,
}
}
}

View file

@ -0,0 +1,40 @@
mod intern_symbols;
mod extract_tokens;
mod expand_repeats;
mod flatten_grammar;
mod normalize_rules;
mod extract_simple_aliases;
use crate::rules::{AliasMap, Rule, Symbol};
use crate::grammars::{InputGrammar, SyntaxGrammar, LexicalGrammar, InputVariable, ExternalToken};
use crate::error::Result;
use self::intern_symbols::intern_symbols;
use self::extract_tokens::extract_tokens;
use self::expand_repeats::expand_repeats;
use self::flatten_grammar::flatten_grammar;
use self::normalize_rules::normalize_rules;
use self::extract_simple_aliases::extract_simple_aliases;
pub(self) struct IntermediateGrammar<T, U> {
variables: Vec<InputVariable>,
extra_tokens: Vec<T>,
expected_conflicts: Vec<Vec<Symbol>>,
external_tokens: Vec<U>,
variables_to_inline: Vec<Symbol>,
word_token: Option<Symbol>,
}
pub(self) type InternedGrammar = IntermediateGrammar<Rule, InputVariable>;
pub(self) type ExtractedGrammar = IntermediateGrammar<Symbol, ExternalToken>;
pub fn prepare_grammar(
input_grammar: &InputGrammar
) -> Result<(SyntaxGrammar, LexicalGrammar, AliasMap)> {
let interned_grammar = intern_symbols(input_grammar)?;
let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?;
let syntax_grammar = expand_repeats(syntax_grammar);
let mut syntax_grammar = flatten_grammar(syntax_grammar)?;
let mut lexical_grammar = normalize_rules(lexical_grammar);
let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &mut lexical_grammar);
Ok((syntax_grammar, lexical_grammar, simple_aliases))
}

View file

@ -0,0 +1,5 @@
use crate::grammars::LexicalGrammar;
pub(super) fn normalize_rules(grammar: LexicalGrammar) -> LexicalGrammar {
unimplemented!();
}

16
src/render/mod.rs Normal file
View file

@ -0,0 +1,16 @@
use crate::rules::{Symbol, AliasMap};
use crate::grammars::{SyntaxGrammar, LexicalGrammar};
use crate::tables::{ParseTable, LexTable};
pub fn render_c_code(
name: &str,
parse_table: ParseTable,
main_lex_table: LexTable,
keyword_lex_table: LexTable,
keyword_capture_token: Option<Symbol>,
syntax_grammar: SyntaxGrammar,
lexical_grammar: LexicalGrammar,
simple_aliases: AliasMap,
) -> String {
unimplemented!();
}

205
src/rules.rs Normal file
View file

@ -0,0 +1,205 @@
use std::rc::Rc;
use std::collections::HashMap;
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum SymbolType {
External,
Terminal,
NonTerminal,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum Associativity {
Left,
Right
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct Alias {
value: String,
is_named: bool,
}
pub type AliasMap = HashMap<Symbol, Alias>;
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
pub struct MetadataParams {
precedence: Option<i32>,
dynamic_precedence: i32,
associativity: Option<Associativity>,
is_token: bool,
is_string: bool,
is_active: bool,
is_main_token: bool,
is_excluded: bool,
alias: Option<Alias>,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct Symbol {
kind: SymbolType,
index: usize,
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub enum Rule {
Blank,
CharacterSet(Vec<char>),
String(String),
Pattern(String),
NamedSymbol(String),
Symbol(Symbol),
Choice {
elements: Vec<Rule>,
},
Metadata {
params: MetadataParams,
rule: Rc<Rule>,
},
Repeat(Rc<Rule>),
Seq {
left: Rc<Rule>,
right: Rc<Rule>,
}
}
impl Rule {
pub fn token(content: Rule) -> Self {
add_metadata(content, |params| {
params.is_token = true;
})
}
pub fn immediate_token(content: Rule) -> Self {
add_metadata(content, |params| {
params.is_token = true;
params.is_main_token = true;
})
}
pub fn prec(value: i32, content: Rule) -> Self {
add_metadata(content, |params| {
params.precedence = Some(value);
})
}
pub fn prec_left(value: i32, content: Rule) -> Self {
add_metadata(content, |params| {
params.associativity = Some(Associativity::Left);
params.precedence = Some(value);
})
}
pub fn prec_right(value: i32, content: Rule) -> Self {
add_metadata(content, |params| {
params.associativity = Some(Associativity::Right);
params.precedence = Some(value);
})
}
pub fn repeat(rule: Rule) -> Self {
Rule::Repeat(Rc::new(rule))
}
pub fn choice(rules: Vec<Rule>) -> Self {
let mut elements = Vec::with_capacity(rules.len());
for rule in rules {
choice_helper(&mut elements, rule);
}
Rule::Choice { elements }
}
pub fn seq(rules: Vec<Rule>) -> Self {
let mut result = Rule::Blank;
for rule in rules {
match rule {
Rule::Blank => continue,
Rule::Metadata { rule, params: _ } => {
if *rule == Rule::Blank {
continue;
}
},
_ => {
if result == Rule::Blank {
result = rule;
} else {
result = Rule::Seq {
left: Rc::new(result),
right: Rc::new(rule),
}
}
}
}
}
result
}
pub fn terminal(index: usize) -> Self {
Rule::Symbol(Symbol::terminal(index))
}
pub fn non_terminal(index: usize) -> Self {
Rule::Symbol(Symbol::non_terminal(index))
}
pub fn external(index: usize) -> Self {
Rule::Symbol(Symbol::external(index))
}
pub fn named(name: &'static str) -> Self {
Rule::NamedSymbol(name.to_string())
}
pub fn string(value: &'static str) -> Self {
Rule::String(value.to_string())
}
}
impl Symbol {
pub fn non_terminal(index: usize) -> Self {
Symbol { kind: SymbolType::NonTerminal, index }
}
pub fn terminal(index: usize) -> Self {
Symbol { kind: SymbolType::Terminal, index }
}
pub fn external(index: usize) -> Self {
Symbol { kind: SymbolType::External, index }
}
}
impl From<Symbol> for Rule {
fn from(symbol: Symbol) -> Self {
Rule::Symbol(symbol)
}
}
fn add_metadata<T: Fn(&mut MetadataParams)>(input: Rule, f: T) -> Rule {
match input {
Rule::Metadata { rule, mut params } => {
f(&mut params);
Rule::Metadata { rule, params }
},
_ => {
let mut params = MetadataParams::default();
f(&mut params);
Rule::Metadata { rule: Rc::new(input), params }
}
}
}
fn choice_helper(result: &mut Vec<Rule>, rule: Rule) {
match rule {
Rule::Choice {elements} => {
for element in elements {
choice_helper(result, element);
}
},
_ => {
if !result.contains(&rule) {
result.push(rule);
}
}
}
}

77
src/tables.rs Normal file
View file

@ -0,0 +1,77 @@
use std::collections::HashMap;
use std::ops::Range;
use crate::rules::{Associativity, Symbol, Alias};
pub type AliasSequenceId = usize;
pub type ParseStateId = usize;
pub type LexStateId = usize;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ParseActionType {
Error,
Shift,
Reduce,
Accept,
Recover,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ParseAction {
Accept,
Error,
Shift(ParseStateId),
ShiftExtra,
Recover,
Reduce {
symbol: Symbol,
child_count: usize,
precedence: i32,
dynamic_precedence: i32,
associativity: Option<Associativity>,
alias_sequence_id: Option<AliasSequenceId>,
is_repetition: bool,
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ParseTableEntry {
actions: Vec<ParseAction>,
reusable: bool,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ParseState {
terminal_entries: HashMap<Symbol, ParseTableEntry>,
nonterminal_entries: HashMap<Symbol, ParseStateId>
}
#[derive(Debug, PartialEq, Eq)]
pub struct ParseTable {
states: Vec<ParseState>,
alias_sequences: Vec<Vec<Alias>>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct AdvanceAction {
state: LexStateId,
precedence: Range<i32>,
in_main_token: bool,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct AcceptTokenAction {
symbol: Symbol,
precedence: i32,
implicit_precedence: i32,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct LexState {
advance_actions: HashMap<Symbol, AdvanceAction>,
accept_action: Option<AcceptTokenAction>,
}
#[derive(Debug, PartialEq, Eq)]
pub struct LexTable {
states: Vec<LexState>,
}