From d482894c7d40b9b563262fef49e2ec81f96d346a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sat, 8 Dec 2018 23:35:48 -0800 Subject: [PATCH] Implement expand_tokens --- src/grammars.rs | 12 +- src/main.rs | 11 +- src/nfa.rs | 3 +- src/prepare_grammar/expand_repeats.rs | 40 +++--- .../{normalize_rules.rs => expand_tokens.rs} | 130 +++++++++++++----- src/prepare_grammar/extract_simple_aliases.rs | 1 - src/prepare_grammar/extract_tokens.rs | 45 +++--- src/prepare_grammar/flatten_grammar.rs | 4 +- src/prepare_grammar/intern_symbols.rs | 26 ++-- src/prepare_grammar/mod.rs | 15 +- src/rules.rs | 44 ++---- 11 files changed, 192 insertions(+), 139 deletions(-) rename src/prepare_grammar/{normalize_rules.rs => expand_tokens.rs} (61%) diff --git a/src/grammars.rs b/src/grammars.rs index 62910637..c5e9aaa1 100644 --- a/src/grammars.rs +++ b/src/grammars.rs @@ -1,4 +1,5 @@ use crate::rules::{Associativity, Alias, Rule, Symbol}; +use crate::nfa::Nfa; #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub(crate) enum VariableType { @@ -30,10 +31,17 @@ pub(crate) struct InputGrammar { // Extracted lexical grammar +#[derive(Debug, PartialEq, Eq)] +pub(crate) struct LexicalVariable { + pub name: String, + pub kind: VariableType, + pub nfa: Nfa, +} + #[derive(Debug, PartialEq, Eq)] pub(crate) struct LexicalGrammar { - pub variables: Vec, - pub separators: Vec, + pub variables: Vec, + pub separators: Vec, } // Extracted syntax grammar diff --git a/src/main.rs b/src/main.rs index 4d376929..b83764fc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -14,7 +14,7 @@ mod render; mod rules; mod tables; -fn main() { +fn main() -> error::Result<()> { let matches = App::new("tree-sitter") .version("0.1") .author("Max Brunsfeld ") @@ -32,5 +32,12 @@ fn main() { .arg(Arg::with_name("path").index(1).required(true)) .arg(Arg::with_name("line").index(2).required(true)) .arg(Arg::with_name("column").index(3).required(true)) - ); + ).get_matches(); + + if let Some(matches) = matches.subcommand_matches("generate") { + let code = generate::generate_parser_for_grammar(String::new())?; + println!("{}", code); + } + + Ok(()) } diff --git a/src/nfa.rs b/src/nfa.rs index 55aa11dc..22cb2a2e 100644 --- a/src/nfa.rs +++ b/src/nfa.rs @@ -7,13 +7,14 @@ pub enum CharacterSet { Exclude(Vec), } -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq)] pub enum NfaState { Advance(CharacterSet, u32), Split(u32, u32), Accept, } +#[derive(PartialEq, Eq)] pub struct Nfa { pub states: Vec } diff --git a/src/prepare_grammar/expand_repeats.rs b/src/prepare_grammar/expand_repeats.rs index dcb8f916..85f37c80 100644 --- a/src/prepare_grammar/expand_repeats.rs +++ b/src/prepare_grammar/expand_repeats.rs @@ -3,7 +3,7 @@ use crate::grammars::{Variable, VariableType}; use std::collections::HashMap; use std::mem; use std::rc::Rc; -use super::ExtractedGrammar; +use super::ExtractedSyntaxGrammar; struct Expander { variable_name: String, @@ -25,16 +25,11 @@ impl Expander { fn expand_rule(&mut self, rule: &Rule) -> Rule { match rule { - Rule::Choice { elements } => - Rule::Choice { - elements: elements.iter().map(|element| self.expand_rule(element)).collect() - }, + Rule::Choice(elements) => + Rule::Choice(elements.iter().map(|element| self.expand_rule(element)).collect()), - Rule::Seq { left, right } => - Rule::Seq { - left: Rc::new(self.expand_rule(left)), - right: Rc::new(self.expand_rule(right)), - }, + Rule::Seq(elements) => + Rule::Seq(elements.iter().map(|element| self.expand_rule(element)).collect()), Rule::Repeat(content) => { let inner_rule = self.expand_rule(content); @@ -46,27 +41,24 @@ impl Expander { self.repeat_count_in_variable += 1; let rule_name = format!("{}_repeat{}", self.variable_name, self.repeat_count_in_variable); let repeat_symbol = Symbol::non_terminal(self.preceding_symbol_count + self.auxiliary_variables.len()); - let rc_symbol = Rc::new(Rule::Symbol(repeat_symbol)); self.existing_repeats.insert(inner_rule.clone(), repeat_symbol); self.auxiliary_variables.push(Variable { name: rule_name, kind: VariableType::Auxiliary, - rule: Rule::Choice { - elements: vec![ - Rule::Seq { - left: rc_symbol.clone(), - right: rc_symbol - }, - inner_rule - ], - }, + rule: Rule::Choice(vec![ + Rule::Seq(vec![ + Rule::Symbol(repeat_symbol), + Rule::Symbol(repeat_symbol), + ]), + inner_rule + ]), }); Rule::Symbol(repeat_symbol) } Rule::Metadata { rule, params } => Rule::Metadata { - rule: Rc::new(self.expand_rule(rule)), + rule: Box::new(self.expand_rule(rule)), params: params.clone() }, @@ -75,7 +67,7 @@ impl Expander { } } -pub(super) fn expand_repeats(mut grammar: ExtractedGrammar) -> ExtractedGrammar { +pub(super) fn expand_repeats(mut grammar: ExtractedSyntaxGrammar) -> ExtractedSyntaxGrammar { let mut expander = Expander { variable_name: String::new(), repeat_count_in_variable: 0, @@ -207,8 +199,8 @@ mod tests { ]); } - fn build_grammar(variables: Vec) -> ExtractedGrammar { - ExtractedGrammar { + fn build_grammar(variables: Vec) -> ExtractedSyntaxGrammar { + ExtractedSyntaxGrammar { variables, extra_tokens: Vec::new(), external_tokens: Vec::new(), diff --git a/src/prepare_grammar/normalize_rules.rs b/src/prepare_grammar/expand_tokens.rs similarity index 61% rename from src/prepare_grammar/normalize_rules.rs rename to src/prepare_grammar/expand_tokens.rs index 67177b4f..9cfa819f 100644 --- a/src/prepare_grammar/normalize_rules.rs +++ b/src/prepare_grammar/expand_tokens.rs @@ -1,10 +1,11 @@ use crate::error::{Error, Result}; use crate::rules::Rule; -use crate::grammars::LexicalGrammar; -use crate::nfa::{Nfa, NfaState, NfaCursor, CharacterSet}; +use crate::grammars::{LexicalGrammar, LexicalVariable}; +use crate::nfa::{Nfa, NfaState, CharacterSet}; +use super::{ExtractedLexicalGrammar}; use regex_syntax::ast::{parse, Ast, Class, ClassPerlKind, ClassSet, ClassSetItem, RepetitionKind}; -fn evaluate_perl_class(item: &ClassPerlKind) -> CharacterSet { +fn expand_perl_character_class(item: &ClassPerlKind) -> CharacterSet { match item { ClassPerlKind::Digit => CharacterSet::empty() .add_range('0', '9'), @@ -21,7 +22,7 @@ fn evaluate_perl_class(item: &ClassPerlKind) -> CharacterSet { } } -fn evaluate_character_class(item: &ClassSetItem) -> Result { +fn expand_character_class(item: &ClassSetItem) -> Result { match item { ClassSetItem::Empty(_) => Ok(CharacterSet::Include(Vec::new())), ClassSetItem::Literal(literal) => Ok(CharacterSet::Include(vec![literal.c])), @@ -29,7 +30,7 @@ fn evaluate_character_class(item: &ClassSetItem) -> Result { ClassSetItem::Union(union) => { let mut result = CharacterSet::empty(); for item in &union.items { - result = result.add(evaluate_character_class(&item)?); + result = result.add(expand_character_class(&item)?); } Ok(result) } @@ -37,7 +38,7 @@ fn evaluate_character_class(item: &ClassSetItem) -> Result { } } -fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<()> { +fn expand_regex(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<()> { match ast { Ast::Empty(_) => Ok(()), Ast::Flags(_) => Err(Error::regex("Flags are not supported")), @@ -53,12 +54,12 @@ fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<( Ast::Class(class) => match class { Class::Unicode(_) => Err(Error::regex("Unicode character classes are not supported")), Class::Perl(class) => { - nfa.states.push(NfaState::Advance(evaluate_perl_class(&class.kind), next_state_index)); + nfa.states.push(NfaState::Advance(expand_perl_character_class(&class.kind), next_state_index)); Ok(()) }, Class::Bracketed(class) => match &class.kind { ClassSet::Item(item) => { - let character_set = evaluate_character_class(&item)?; + let character_set = expand_character_class(&item)?; nfa.states.push(NfaState::Advance(character_set, next_state_index)); Ok(()) }, @@ -69,14 +70,14 @@ fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<( }, Ast::Repetition(repetition) => match repetition.op.kind { RepetitionKind::ZeroOrOne => { - regex_to_nfa(&repetition.ast, nfa, next_state_index)?; + expand_regex(&repetition.ast, nfa, next_state_index)?; nfa.prepend(|start_index| NfaState::Split(next_state_index, start_index)); Ok(()) }, RepetitionKind::OneOrMore => { nfa.states.push(NfaState::Accept); // Placeholder for split let split_index = nfa.start_index(); - regex_to_nfa(&repetition.ast, nfa, split_index)?; + expand_regex(&repetition.ast, nfa, split_index)?; nfa.states[split_index as usize] = NfaState::Split( nfa.start_index(), next_state_index @@ -86,7 +87,7 @@ fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<( RepetitionKind::ZeroOrMore => { nfa.states.push(NfaState::Accept); // Placeholder for split let split_index = nfa.start_index(); - regex_to_nfa(&repetition.ast, nfa, split_index)?; + expand_regex(&repetition.ast, nfa, split_index)?; nfa.states[split_index as usize] = NfaState::Split( nfa.start_index(), next_state_index @@ -96,11 +97,11 @@ fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<( }, RepetitionKind::Range(_) => unimplemented!(), }, - Ast::Group(group) => regex_to_nfa(&group.ast, nfa, nfa.start_index()), + Ast::Group(group) => expand_regex(&group.ast, nfa, nfa.start_index()), Ast::Alternation(alternation) => { let mut alternative_start_indices = Vec::new(); for ast in alternation.asts.iter() { - regex_to_nfa(&ast, nfa, next_state_index)?; + expand_regex(&ast, nfa, next_state_index)?; alternative_start_indices.push(nfa.start_index()); } alternative_start_indices.pop(); @@ -111,7 +112,7 @@ fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<( }, Ast::Concat(concat) => { for ast in concat.asts.iter().rev() { - regex_to_nfa(&ast, nfa, next_state_index)?; + expand_regex(&ast, nfa, next_state_index)?; next_state_index = nfa.start_index(); } Ok(()) @@ -119,32 +120,77 @@ fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<( } } -fn expand_rule(rule: Rule) -> Result { +fn expand_rule(rule: Rule, nfa: &mut Nfa, mut next_state_index: u32) -> Result<()> { match rule { Rule::Pattern(s) => { let ast = parse::Parser::new().parse(&s).map_err(|e| Error::GrammarError(e.to_string()))?; - let mut nfa = Nfa::new(); - regex_to_nfa(&ast, &mut nfa, 0)?; - Ok(nfa) + expand_regex(&ast, nfa, next_state_index)?; + Ok(()) }, Rule::String(s) => { - let mut nfa = Nfa::new(); for c in s.chars().rev() { nfa.prepend(|start_index| NfaState::Advance(CharacterSet::empty().add_char(c), start_index)); } - Ok(nfa) + Ok(()) + }, + Rule::Choice(elements) => { + let mut alternative_start_indices = Vec::new(); + for element in elements { + expand_rule(element, nfa, next_state_index)?; + alternative_start_indices.push(nfa.start_index()); + } + alternative_start_indices.pop(); + for alternative_start_index in alternative_start_indices { + nfa.prepend(|start_index| NfaState::Split(start_index, alternative_start_index)); + } + Ok(()) + }, + Rule::Seq(elements) => { + for element in elements.into_iter().rev() { + expand_rule(element, nfa, next_state_index)?; + next_state_index = nfa.start_index(); + } + Ok(()) + }, + Rule::Repeat(rule) => { + nfa.states.push(NfaState::Accept); // Placeholder for split + let split_index = nfa.start_index(); + expand_rule(*rule, nfa, split_index)?; + nfa.states[split_index as usize] = NfaState::Split( + nfa.start_index(), + next_state_index + ); + Ok(()) }, _ => Err(Error::grammar("Unexpected rule type")), } } -pub(super) fn normalize_rules(grammar: LexicalGrammar) -> LexicalGrammar { - unimplemented!(); +pub(super) fn expand_tokens(grammar: ExtractedLexicalGrammar) -> Result { + let mut variables = Vec::new(); + for variable in grammar.variables { + let mut nfa = Nfa::new(); + expand_rule(variable.rule, &mut nfa, 0)?; + variables.push(LexicalVariable { + name: variable.name, + kind: variable.kind, + nfa, + }); + } + let mut separators = Vec::new(); + for separator in grammar.separators { + let mut nfa = Nfa::new(); + expand_rule(separator, &mut nfa, 0)?; + separators.push(nfa); + } + + Ok(LexicalGrammar { variables, separators }) } #[cfg(test)] mod tests { use super::*; + use crate::nfa::NfaCursor; fn simulate_nfa<'a>(nfa: &'a Nfa, s: &'a str) -> Option<&'a str> { let mut result = None; @@ -164,15 +210,15 @@ mod tests { } #[test] - fn test_regex_expansion() { + fn test_rule_expansion() { struct Row { - pattern: &'static str, + rule: Rule, examples: Vec<(&'static str, Option<&'static str>)>, } let table = [ Row { - pattern: "a|bc", + rule: Rule::pattern("a|bc"), examples: vec![ ("a12", Some("a")), ("bc12", Some("bc")), @@ -181,7 +227,7 @@ mod tests { ], }, Row { - pattern: "(a|b|c)d(e|f|g)h?", + rule: Rule::pattern("(a|b|c)d(e|f|g)h?"), examples: vec![ ("ade1", Some("ade")), ("bdf1", Some("bdf")), @@ -190,14 +236,14 @@ mod tests { ], }, Row { - pattern: "a*", + rule: Rule::pattern("a*"), examples: vec![ ("aaa1", Some("aaa")), ("b", Some("")), ], }, Row { - pattern: "a((bc)+|(de)*)f", + rule: Rule::pattern("a((bc)+|(de)*)f"), examples: vec![ ("af1", Some("af")), ("adedef1", Some("adedef")), @@ -206,21 +252,41 @@ mod tests { ], }, Row { - pattern: "[a-fA-F0-9]+", + rule: Rule::pattern("[a-fA-F0-9]+"), examples: vec![ ("A1ff0", Some("A1ff")), ], }, Row { - pattern: "\\w\\d\\s", + rule: Rule::pattern("\\w\\d\\s"), examples: vec![ ("_0 ", Some("_0 ")), ], }, + Row { + rule: Rule::string("abc"), + examples: vec![ + ("abcd", Some("abc")), + ("ab", None), + ], + }, + Row { + rule: Rule::repeat(Rule::seq(vec![ + Rule::string("{"), + Rule::pattern("[a-f]+"), + Rule::string("}"), + ])), + examples: vec![ + ("{a}{", Some("{a}")), + ("{a}{d", Some("{a}")), + ("ab", None), + ], + }, ]; - for Row { pattern, examples } in table.iter() { - let nfa = expand_rule(Rule::pattern(pattern)).unwrap(); + for Row { rule, examples } in table.iter() { + let mut nfa = Nfa::new(); + expand_rule(rule.clone(), &mut nfa, 0).unwrap(); for (haystack, needle) in examples.iter() { assert_eq!(simulate_nfa(&nfa, haystack), *needle); } diff --git a/src/prepare_grammar/extract_simple_aliases.rs b/src/prepare_grammar/extract_simple_aliases.rs index 250246f3..2a175242 100644 --- a/src/prepare_grammar/extract_simple_aliases.rs +++ b/src/prepare_grammar/extract_simple_aliases.rs @@ -1,6 +1,5 @@ use crate::rules::AliasMap; use crate::grammars::{LexicalGrammar, SyntaxGrammar}; -use super::ExtractedGrammar; pub(super) fn extract_simple_aliases( syntax_grammar: &mut SyntaxGrammar, diff --git a/src/prepare_grammar/extract_tokens.rs b/src/prepare_grammar/extract_tokens.rs index ee90b3c8..7322516f 100644 --- a/src/prepare_grammar/extract_tokens.rs +++ b/src/prepare_grammar/extract_tokens.rs @@ -3,12 +3,12 @@ use std::rc::Rc; use std::mem; use crate::error::{Error, Result}; use crate::rules::{Rule, MetadataParams, Symbol, SymbolType}; -use crate::grammars::{Variable, VariableType, LexicalGrammar, ExternalToken}; -use super::{InternedGrammar, ExtractedGrammar}; +use crate::grammars::{Variable, ExternalToken}; +use super::{InternedGrammar, ExtractedSyntaxGrammar, ExtractedLexicalGrammar}; pub(super) fn extract_tokens( mut grammar: InternedGrammar -) -> Result<(ExtractedGrammar, LexicalGrammar)> { +) -> Result<(ExtractedSyntaxGrammar, ExtractedLexicalGrammar)> { let mut extractor = TokenExtractor { current_variable_name: String::new(), current_variable_token_count: 0, @@ -138,7 +138,7 @@ pub(super) fn extract_tokens( } Ok(( - ExtractedGrammar { + ExtractedSyntaxGrammar { variables, expected_conflicts, extra_tokens, @@ -146,7 +146,7 @@ pub(super) fn extract_tokens( external_tokens, word_token, }, - LexicalGrammar { + ExtractedLexicalGrammar { variables: lexical_variables, separators, } @@ -198,20 +198,19 @@ impl TokenExtractor { } else { Rule::Metadata { params: params.clone(), - rule: Rc::new(self.extract_tokens_in_rule((&rule).clone())) + rule: Box::new(self.extract_tokens_in_rule((&rule).clone())) } } }, Rule::Repeat(content) => Rule::Repeat( - Rc::new(self.extract_tokens_in_rule(content)) + Box::new(self.extract_tokens_in_rule(content)) + ), + Rule::Seq(elements) => Rule::Seq( + elements.iter().map(|e| self.extract_tokens_in_rule(e)).collect() + ), + Rule::Choice(elements) => Rule::Choice( + elements.iter().map(|e| self.extract_tokens_in_rule(e)).collect() ), - Rule::Seq { left, right } => Rule::Seq { - left: Rc::new(self.extract_tokens_in_rule(left)), - right: Rc::new(self.extract_tokens_in_rule(right)), - }, - Rule::Choice { elements } => Rule::Choice { - elements: elements.iter().map(|e| self.extract_tokens_in_rule(e)).collect() - }, _ => input.clone() } } @@ -249,19 +248,18 @@ impl SymbolReplacer { fn replace_symbols_in_rule(&mut self, rule: &Rule) -> Rule { match rule { Rule::Symbol(symbol) => self.replace_symbol(*symbol).into(), - Rule::Choice { elements } => Rule::Choice { - elements: elements.iter().map(|e| self.replace_symbols_in_rule(e)).collect() - }, - Rule::Seq { left, right } => Rule::Seq { - left: Rc::new(self.replace_symbols_in_rule(left)), - right: Rc::new(self.replace_symbols_in_rule(right)), - }, + Rule::Choice(elements) => Rule::Choice( + elements.iter().map(|e| self.replace_symbols_in_rule(e)).collect() + ), + Rule::Seq(elements) => Rule::Seq( + elements.iter().map(|e| self.replace_symbols_in_rule(e)).collect() + ), Rule::Repeat(content) => Rule::Repeat( - Rc::new(self.replace_symbols_in_rule(content)) + Box::new(self.replace_symbols_in_rule(content)) ), Rule::Metadata { rule, params } => Rule::Metadata { params: params.clone(), - rule: Rc::new(self.replace_symbols_in_rule(rule)), + rule: Box::new(self.replace_symbols_in_rule(rule)), }, _ => rule.clone() } @@ -290,6 +288,7 @@ impl SymbolReplacer { #[cfg(test)] mod test { use super::*; + use crate::grammars::VariableType; #[test] fn test_extraction() { diff --git a/src/prepare_grammar/flatten_grammar.rs b/src/prepare_grammar/flatten_grammar.rs index 36fe76c9..0f09cd14 100644 --- a/src/prepare_grammar/flatten_grammar.rs +++ b/src/prepare_grammar/flatten_grammar.rs @@ -1,7 +1,7 @@ use crate::error::Result; use crate::grammars::SyntaxGrammar; -use super::ExtractedGrammar; +use super::ExtractedSyntaxGrammar; -pub(super) fn flatten_grammar(grammar: ExtractedGrammar) -> Result { +pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result { unimplemented!(); } diff --git a/src/prepare_grammar/intern_symbols.rs b/src/prepare_grammar/intern_symbols.rs index e4cf7ff1..17132262 100644 --- a/src/prepare_grammar/intern_symbols.rs +++ b/src/prepare_grammar/intern_symbols.rs @@ -80,26 +80,26 @@ struct Interner<'a> { impl<'a> Interner<'a> { fn intern_rule(&self, rule: &Rule) -> Result { match rule { - Rule::Choice { elements } => { + Rule::Choice(elements) => { let mut result = Vec::with_capacity(elements.len()); for element in elements { result.push(self.intern_rule(element)?); } - Ok(Rule::Choice { elements: result }) + Ok(Rule::Choice(result)) }, - - Rule::Seq { left, right } => - Ok(Rule::Seq { - left: Rc::new(self.intern_rule(left)?), - right: Rc::new(self.intern_rule(right)?), - }), - - Rule::Repeat(content) => - Ok(Rule::Repeat(Rc::new(self.intern_rule(content)?))), - + Rule::Seq(elements) => { + let mut result = Vec::with_capacity(elements.len()); + for element in elements { + result.push(self.intern_rule(element)?); + } + Ok(Rule::Seq(result)) + }, + Rule::Repeat(content) => Ok(Rule::Repeat( + Box::new(self.intern_rule(content)?) + )), Rule::Metadata { rule, params } => Ok(Rule::Metadata { - rule: Rc::new(self.intern_rule(rule)?), + rule: Box::new(self.intern_rule(rule)?), params: params.clone() }), diff --git a/src/prepare_grammar/mod.rs b/src/prepare_grammar/mod.rs index b860807a..e2615479 100644 --- a/src/prepare_grammar/mod.rs +++ b/src/prepare_grammar/mod.rs @@ -2,7 +2,7 @@ mod intern_symbols; mod extract_tokens; mod expand_repeats; mod flatten_grammar; -mod normalize_rules; +mod expand_tokens; mod extract_simple_aliases; use crate::rules::{AliasMap, Rule, Symbol}; @@ -12,7 +12,7 @@ use self::intern_symbols::intern_symbols; use self::extract_tokens::extract_tokens; use self::expand_repeats::expand_repeats; use self::flatten_grammar::flatten_grammar; -use self::normalize_rules::normalize_rules; +use self::expand_tokens::expand_tokens; use self::extract_simple_aliases::extract_simple_aliases; pub(self) struct IntermediateGrammar { @@ -25,7 +25,14 @@ pub(self) struct IntermediateGrammar { } pub(self) type InternedGrammar = IntermediateGrammar; -pub(self) type ExtractedGrammar = IntermediateGrammar; + +pub(self) type ExtractedSyntaxGrammar = IntermediateGrammar; + +#[derive(Debug, PartialEq, Eq)] +pub(self) struct ExtractedLexicalGrammar { + variables: Vec, + separators: Vec, +} pub(crate) fn prepare_grammar( input_grammar: &InputGrammar @@ -34,7 +41,7 @@ pub(crate) fn prepare_grammar( let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?; let syntax_grammar = expand_repeats(syntax_grammar); let mut syntax_grammar = flatten_grammar(syntax_grammar)?; - let mut lexical_grammar = normalize_rules(lexical_grammar); + let mut lexical_grammar = expand_tokens(lexical_grammar)?; let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &mut lexical_grammar); Ok((syntax_grammar, lexical_grammar, simple_aliases)) } diff --git a/src/rules.rs b/src/rules.rs index b593496a..c6f18cf4 100644 --- a/src/rules.rs +++ b/src/rules.rs @@ -49,18 +49,13 @@ pub(crate) enum Rule { Pattern(String), NamedSymbol(String), Symbol(Symbol), - Choice { - elements: Vec, - }, + Choice(Vec), Metadata { params: MetadataParams, - rule: Rc, + rule: Box, }, - Repeat(Rc), - Seq { - left: Rc, - right: Rc, - } + Repeat(Box), + Seq(Vec), } impl Rule { @@ -98,7 +93,7 @@ impl Rule { } pub fn repeat(rule: Rule) -> Self { - Rule::Repeat(Rc::new(rule)) + Rule::Repeat(Box::new(rule)) } pub fn choice(rules: Vec) -> Self { @@ -106,32 +101,11 @@ impl Rule { for rule in rules { choice_helper(&mut elements, rule); } - Rule::Choice { elements } + Rule::Choice(elements) } pub fn seq(rules: Vec) -> Self { - let mut result = Rule::Blank; - for rule in rules { - match rule { - Rule::Blank => continue, - Rule::Metadata { rule, params: _ } => { - if *rule == Rule::Blank { - continue; - } - }, - _ => { - if result == Rule::Blank { - result = rule; - } else { - result = Rule::Seq { - left: Rc::new(result), - right: Rc::new(rule), - } - } - } - } - } - result + Rule::Seq(rules) } pub fn terminal(index: usize) -> Self { @@ -196,14 +170,14 @@ fn add_metadata(input: Rule, f: T) -> Rule { _ => { let mut params = MetadataParams::default(); f(&mut params); - Rule::Metadata { rule: Rc::new(input), params } + Rule::Metadata { rule: Box::new(input), params } } } } fn choice_helper(result: &mut Vec, rule: Rule) { match rule { - Rule::Choice {elements} => { + Rule::Choice(elements) => { for element in elements { choice_helper(result, element); }