Implement expand_tokens

2018-12-08 23:35:48 -08:00 · 2018-12-08 23:35:48 -08:00 · d482894c7d
commit d482894c7d
parent ead6ca1738
11 changed files with 192 additions and 139 deletions
--- a/src/grammars.rs
+++ b/src/grammars.rs
@ -1,4 +1,5 @@
 use crate::rules::{Associativity, Alias, Rule, Symbol};
+use crate::nfa::Nfa;

 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub(crate) enum VariableType {
@ -30,10 +31,17 @@ pub(crate) struct InputGrammar {

 // Extracted lexical grammar

+#[derive(Debug, PartialEq, Eq)]
+pub(crate) struct LexicalVariable {
+    pub name: String,
+    pub kind: VariableType,
+    pub nfa: Nfa,
+}
+
 #[derive(Debug, PartialEq, Eq)]
 pub(crate) struct LexicalGrammar {
-    pub variables: Vec<Variable>,
-    pub separators: Vec<Rule>,
+    pub variables: Vec<LexicalVariable>,
+    pub separators: Vec<Nfa>,
 }

 // Extracted syntax grammar
--- a/src/main.rs
+++ b/src/main.rs
@ -14,7 +14,7 @@ mod render;
 mod rules;
 mod tables;

-fn main() {
+fn main() -> error::Result<()> {
    let matches = App::new("tree-sitter")
        .version("0.1")
        .author("Max Brunsfeld <maxbrunsfeld@gmail.com>")
@ -32,5 +32,12 @@ fn main() {
                .arg(Arg::with_name("path").index(1).required(true))
                .arg(Arg::with_name("line").index(2).required(true))
                .arg(Arg::with_name("column").index(3).required(true))
-        );
+        ).get_matches();
+
+    if let Some(matches) = matches.subcommand_matches("generate") {
+        let code = generate::generate_parser_for_grammar(String::new())?;
+        println!("{}", code);
+    }
+
+    Ok(())
 }
--- a/src/nfa.rs
+++ b/src/nfa.rs
@ -7,13 +7,14 @@ pub enum CharacterSet {
    Exclude(Vec<char>),
 }

-#[derive(Debug)]
+#[derive(Debug, PartialEq, Eq)]
 pub enum NfaState {
    Advance(CharacterSet, u32),
    Split(u32, u32),
    Accept,
 }

+#[derive(PartialEq, Eq)]
 pub struct Nfa {
    pub states: Vec<NfaState>
 }
--- a/src/prepare_grammar/expand_repeats.rs
+++ b/src/prepare_grammar/expand_repeats.rs
@ -3,7 +3,7 @@ use crate::grammars::{Variable, VariableType};
 use std::collections::HashMap;
 use std::mem;
 use std::rc::Rc;
-use super::ExtractedGrammar;
+use super::ExtractedSyntaxGrammar;

 struct Expander {
    variable_name: String,
@ -25,16 +25,11 @@ impl Expander {

    fn expand_rule(&mut self, rule: &Rule) -> Rule {
        match rule {
-            Rule::Choice { elements } =>
-                Rule::Choice {
-                    elements: elements.iter().map(|element| self.expand_rule(element)).collect()
-                },
+            Rule::Choice(elements) =>
+                Rule::Choice(elements.iter().map(|element| self.expand_rule(element)).collect()),

-            Rule::Seq { left, right } =>
-                Rule::Seq {
-                    left: Rc::new(self.expand_rule(left)),
-                    right: Rc::new(self.expand_rule(right)),
-                },
+            Rule::Seq(elements) =>
+                Rule::Seq(elements.iter().map(|element| self.expand_rule(element)).collect()),

            Rule::Repeat(content) => {
                let inner_rule = self.expand_rule(content);
@ -46,27 +41,24 @@ impl Expander {
                self.repeat_count_in_variable += 1;
                let rule_name = format!("{}_repeat{}", self.variable_name, self.repeat_count_in_variable);
                let repeat_symbol = Symbol::non_terminal(self.preceding_symbol_count + self.auxiliary_variables.len());
-                let rc_symbol = Rc::new(Rule::Symbol(repeat_symbol));
                self.existing_repeats.insert(inner_rule.clone(), repeat_symbol);
                self.auxiliary_variables.push(Variable {
                    name: rule_name,
                    kind: VariableType::Auxiliary,
-                    rule: Rule::Choice {
-                        elements: vec![
-                            Rule::Seq {
-                                left: rc_symbol.clone(),
-                                right: rc_symbol
-                            },
-                            inner_rule
-                        ],
-                    },
+                    rule: Rule::Choice(vec![
+                        Rule::Seq(vec![
+                            Rule::Symbol(repeat_symbol),
+                            Rule::Symbol(repeat_symbol),
+                        ]),
+                        inner_rule
+                    ]),
                });

                Rule::Symbol(repeat_symbol)
            }

            Rule::Metadata { rule, params } => Rule::Metadata {
-                rule: Rc::new(self.expand_rule(rule)),
+                rule: Box::new(self.expand_rule(rule)),
                params: params.clone()
            },

@ -75,7 +67,7 @@ impl Expander {
    }
 }

-pub(super) fn expand_repeats(mut grammar: ExtractedGrammar) -> ExtractedGrammar {
+pub(super) fn expand_repeats(mut grammar: ExtractedSyntaxGrammar) -> ExtractedSyntaxGrammar {
    let mut expander = Expander {
        variable_name: String::new(),
        repeat_count_in_variable: 0,
@ -207,8 +199,8 @@ mod tests {
        ]);
    }

-    fn build_grammar(variables: Vec<Variable>) -> ExtractedGrammar {
-        ExtractedGrammar {
+    fn build_grammar(variables: Vec<Variable>) -> ExtractedSyntaxGrammar {
+        ExtractedSyntaxGrammar {
            variables,
            extra_tokens: Vec::new(),
            external_tokens: Vec::new(),
--- a/src/prepare_grammar/normalize_rules.rs
+++ b/src/prepare_grammar/normalize_rules.rs
@ -1,10 +1,11 @@
 use crate::error::{Error, Result};
 use crate::rules::Rule;
-use crate::grammars::LexicalGrammar;
-use crate::nfa::{Nfa, NfaState, NfaCursor, CharacterSet};
+use crate::grammars::{LexicalGrammar, LexicalVariable};
+use crate::nfa::{Nfa, NfaState, CharacterSet};
+use super::{ExtractedLexicalGrammar};
 use regex_syntax::ast::{parse, Ast, Class, ClassPerlKind, ClassSet, ClassSetItem, RepetitionKind};

-fn evaluate_perl_class(item: &ClassPerlKind) -> CharacterSet {
+fn expand_perl_character_class(item: &ClassPerlKind) -> CharacterSet {
    match item {
        ClassPerlKind::Digit => CharacterSet::empty()
            .add_range('0', '9'),
@ -21,7 +22,7 @@ fn evaluate_perl_class(item: &ClassPerlKind) -> CharacterSet {
    }
 }

-fn evaluate_character_class(item: &ClassSetItem) -> Result<CharacterSet> {
+fn expand_character_class(item: &ClassSetItem) -> Result<CharacterSet> {
    match item {
        ClassSetItem::Empty(_) => Ok(CharacterSet::Include(Vec::new())),
        ClassSetItem::Literal(literal) => Ok(CharacterSet::Include(vec![literal.c])),
@ -29,7 +30,7 @@ fn evaluate_character_class(item: &ClassSetItem) -> Result<CharacterSet> {
        ClassSetItem::Union(union) => {
            let mut result = CharacterSet::empty();
            for item in &union.items {
-                result = result.add(evaluate_character_class(&item)?);
+                result = result.add(expand_character_class(&item)?);
            }
            Ok(result)
        }
@ -37,7 +38,7 @@ fn evaluate_character_class(item: &ClassSetItem) -> Result<CharacterSet> {
    }
 }

-fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<()> {
+fn expand_regex(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<()> {
    match ast {
        Ast::Empty(_) => Ok(()),
        Ast::Flags(_) => Err(Error::regex("Flags are not supported")),
@ -53,12 +54,12 @@ fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
        Ast::Class(class) => match class {
            Class::Unicode(_) => Err(Error::regex("Unicode character classes are not supported")),
            Class::Perl(class) => {
-                nfa.states.push(NfaState::Advance(evaluate_perl_class(&class.kind), next_state_index));
+                nfa.states.push(NfaState::Advance(expand_perl_character_class(&class.kind), next_state_index));
                Ok(())
            },
            Class::Bracketed(class) => match &class.kind {
                ClassSet::Item(item) => {
-                    let character_set = evaluate_character_class(&item)?;
+                    let character_set = expand_character_class(&item)?;
                    nfa.states.push(NfaState::Advance(character_set, next_state_index));
                    Ok(())
                },
@ -69,14 +70,14 @@ fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
        },
        Ast::Repetition(repetition) => match repetition.op.kind {
            RepetitionKind::ZeroOrOne => {
-                regex_to_nfa(&repetition.ast, nfa, next_state_index)?;
+                expand_regex(&repetition.ast, nfa, next_state_index)?;
                nfa.prepend(|start_index| NfaState::Split(next_state_index, start_index));
                Ok(())
            },
            RepetitionKind::OneOrMore => {
                nfa.states.push(NfaState::Accept); // Placeholder for split
                let split_index = nfa.start_index();
-                regex_to_nfa(&repetition.ast, nfa, split_index)?;
+                expand_regex(&repetition.ast, nfa, split_index)?;
                nfa.states[split_index as usize] = NfaState::Split(
                    nfa.start_index(),
                    next_state_index
@ -86,7 +87,7 @@ fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
            RepetitionKind::ZeroOrMore => {
                nfa.states.push(NfaState::Accept); // Placeholder for split
                let split_index = nfa.start_index();
-                regex_to_nfa(&repetition.ast, nfa, split_index)?;
+                expand_regex(&repetition.ast, nfa, split_index)?;
                nfa.states[split_index as usize] = NfaState::Split(
                    nfa.start_index(),
                    next_state_index
@ -96,11 +97,11 @@ fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
            },
            RepetitionKind::Range(_) => unimplemented!(),
        },
-        Ast::Group(group) => regex_to_nfa(&group.ast, nfa, nfa.start_index()),
+        Ast::Group(group) => expand_regex(&group.ast, nfa, nfa.start_index()),
        Ast::Alternation(alternation) => {
            let mut alternative_start_indices = Vec::new();
            for ast in alternation.asts.iter() {
-                regex_to_nfa(&ast, nfa, next_state_index)?;
+                expand_regex(&ast, nfa, next_state_index)?;
                alternative_start_indices.push(nfa.start_index());
            }
            alternative_start_indices.pop();
@ -111,7 +112,7 @@ fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
        },
        Ast::Concat(concat) => {
            for ast in concat.asts.iter().rev() {
-                regex_to_nfa(&ast, nfa, next_state_index)?;
+                expand_regex(&ast, nfa, next_state_index)?;
                next_state_index = nfa.start_index();
            }
            Ok(())
@ -119,32 +120,77 @@ fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
    }
 }

-fn expand_rule(rule: Rule) -> Result<Nfa> {
+fn expand_rule(rule: Rule, nfa: &mut Nfa, mut next_state_index: u32) -> Result<()> {
    match rule {
        Rule::Pattern(s) => {
            let ast = parse::Parser::new().parse(&s).map_err(|e| Error::GrammarError(e.to_string()))?;
-            let mut nfa = Nfa::new();
-            regex_to_nfa(&ast, &mut nfa, 0)?;
-            Ok(nfa)
+            expand_regex(&ast, nfa, next_state_index)?;
+            Ok(())
        },
        Rule::String(s) => {
-            let mut nfa = Nfa::new();
            for c in s.chars().rev() {
                nfa.prepend(|start_index| NfaState::Advance(CharacterSet::empty().add_char(c), start_index));
            }
-            Ok(nfa)
+            Ok(())
+        },
+        Rule::Choice(elements) => {
+            let mut alternative_start_indices = Vec::new();
+            for element in elements {
+                expand_rule(element, nfa, next_state_index)?;
+                alternative_start_indices.push(nfa.start_index());
+            }
+            alternative_start_indices.pop();
+            for alternative_start_index in alternative_start_indices {
+                nfa.prepend(|start_index| NfaState::Split(start_index, alternative_start_index));
+            }
+            Ok(())
+        },
+        Rule::Seq(elements) => {
+            for element in elements.into_iter().rev() {
+                expand_rule(element, nfa, next_state_index)?;
+                next_state_index = nfa.start_index();
+            }
+            Ok(())
+        },
+        Rule::Repeat(rule) => {
+            nfa.states.push(NfaState::Accept); // Placeholder for split
+            let split_index = nfa.start_index();
+            expand_rule(*rule, nfa, split_index)?;
+            nfa.states[split_index as usize] = NfaState::Split(
+                nfa.start_index(),
+                next_state_index
+            );
+            Ok(())
        },
        _ => Err(Error::grammar("Unexpected rule type")),
    }
 }

-pub(super) fn normalize_rules(grammar: LexicalGrammar) -> LexicalGrammar {
-    unimplemented!();
+pub(super) fn expand_tokens(grammar: ExtractedLexicalGrammar) -> Result<LexicalGrammar> {
+    let mut variables = Vec::new();
+    for variable in grammar.variables {
+        let mut nfa = Nfa::new();
+        expand_rule(variable.rule, &mut nfa, 0)?;
+        variables.push(LexicalVariable {
+            name: variable.name,
+            kind: variable.kind,
+            nfa,
+        });
+    }
+    let mut separators = Vec::new();
+    for separator in grammar.separators {
+        let mut nfa = Nfa::new();
+        expand_rule(separator, &mut nfa, 0)?;
+        separators.push(nfa);
+    }
+
+    Ok(LexicalGrammar { variables, separators })
 }

 #[cfg(test)]
 mod tests {
    use super::*;
+    use crate::nfa::NfaCursor;

    fn simulate_nfa<'a>(nfa: &'a Nfa, s: &'a str) -> Option<&'a str> {
        let mut result = None;
@ -164,15 +210,15 @@ mod tests {
    }

    #[test]
-    fn test_regex_expansion() {
+    fn test_rule_expansion() {
        struct Row {
-            pattern: &'static str,
+            rule: Rule,
            examples: Vec<(&'static str, Option<&'static str>)>,
        }

        let table = [
            Row {
-                pattern: "a|bc",
+                rule: Rule::pattern("a|bc"),
                examples: vec![
                    ("a12", Some("a")),
                    ("bc12", Some("bc")),
@ -181,7 +227,7 @@ mod tests {
                ],
            },
            Row {
-                pattern: "(a|b|c)d(e|f|g)h?",
+                rule: Rule::pattern("(a|b|c)d(e|f|g)h?"),
                examples: vec![
                    ("ade1", Some("ade")),
                    ("bdf1", Some("bdf")),
@ -190,14 +236,14 @@ mod tests {
                ],
            },
            Row {
-                pattern: "a*",
+                rule: Rule::pattern("a*"),
                examples: vec![
                    ("aaa1", Some("aaa")),
                    ("b", Some("")),
                ],
            },
            Row {
-                pattern: "a((bc)+|(de)*)f",
+                rule: Rule::pattern("a((bc)+|(de)*)f"),
                examples: vec![
                    ("af1", Some("af")),
                    ("adedef1", Some("adedef")),
@ -206,21 +252,41 @@ mod tests {
                ],
            },
            Row {
-                pattern: "[a-fA-F0-9]+",
+                rule: Rule::pattern("[a-fA-F0-9]+"),
                examples: vec![
                    ("A1ff0", Some("A1ff")),
                ],
            },
            Row {
-                pattern: "\\w\\d\\s",
+                rule: Rule::pattern("\\w\\d\\s"),
                examples: vec![
                    ("_0  ", Some("_0 ")),
                ],
            },
+            Row {
+                rule: Rule::string("abc"),
+                examples: vec![
+                    ("abcd", Some("abc")),
+                    ("ab", None),
+                ],
+            },
+            Row {
+                rule: Rule::repeat(Rule::seq(vec![
+                    Rule::string("{"),
+                    Rule::pattern("[a-f]+"),
+                    Rule::string("}"),
+                ])),
+                examples: vec![
+                    ("{a}{", Some("{a}")),
+                    ("{a}{d", Some("{a}")),
+                    ("ab", None),
+                ],
+            },
        ];

-        for Row { pattern, examples } in table.iter() {
-            let nfa = expand_rule(Rule::pattern(pattern)).unwrap();
+        for Row { rule, examples } in table.iter() {
+            let mut nfa = Nfa::new();
+            expand_rule(rule.clone(), &mut nfa, 0).unwrap();
            for (haystack, needle) in examples.iter() {
                assert_eq!(simulate_nfa(&nfa, haystack), *needle);
            }
--- a/src/prepare_grammar/extract_simple_aliases.rs
+++ b/src/prepare_grammar/extract_simple_aliases.rs
@ -1,6 +1,5 @@
 use crate::rules::AliasMap;
 use crate::grammars::{LexicalGrammar, SyntaxGrammar};
-use super::ExtractedGrammar;

 pub(super) fn extract_simple_aliases(
    syntax_grammar: &mut SyntaxGrammar,
--- a/src/prepare_grammar/extract_tokens.rs
+++ b/src/prepare_grammar/extract_tokens.rs
@ -3,12 +3,12 @@ use std::rc::Rc;
 use std::mem;
 use crate::error::{Error, Result};
 use crate::rules::{Rule, MetadataParams, Symbol, SymbolType};
-use crate::grammars::{Variable, VariableType, LexicalGrammar, ExternalToken};
-use super::{InternedGrammar, ExtractedGrammar};
+use crate::grammars::{Variable, ExternalToken};
+use super::{InternedGrammar, ExtractedSyntaxGrammar, ExtractedLexicalGrammar};

 pub(super) fn extract_tokens(
    mut grammar: InternedGrammar
-) -> Result<(ExtractedGrammar, LexicalGrammar)> {
+) -> Result<(ExtractedSyntaxGrammar, ExtractedLexicalGrammar)> {
    let mut extractor = TokenExtractor {
        current_variable_name: String::new(),
        current_variable_token_count: 0,
@ -138,7 +138,7 @@ pub(super) fn extract_tokens(
    }

    Ok((
-        ExtractedGrammar {
+        ExtractedSyntaxGrammar {
            variables,
            expected_conflicts,
            extra_tokens,
@ -146,7 +146,7 @@ pub(super) fn extract_tokens(
            external_tokens,
            word_token,
        },
-        LexicalGrammar {
+        ExtractedLexicalGrammar {
            variables: lexical_variables,
            separators,
        }
@ -198,20 +198,19 @@ impl TokenExtractor {
                } else {
                    Rule::Metadata {
                        params: params.clone(),
-                        rule: Rc::new(self.extract_tokens_in_rule((&rule).clone()))
+                        rule: Box::new(self.extract_tokens_in_rule((&rule).clone()))
                    }
                }
            },
            Rule::Repeat(content) => Rule::Repeat(
-                Rc::new(self.extract_tokens_in_rule(content))
+                Box::new(self.extract_tokens_in_rule(content))
+            ),
+            Rule::Seq(elements) => Rule::Seq(
+                elements.iter().map(|e| self.extract_tokens_in_rule(e)).collect()
+            ),
+            Rule::Choice(elements) => Rule::Choice(
+                elements.iter().map(|e| self.extract_tokens_in_rule(e)).collect()
            ),
-            Rule::Seq { left, right } => Rule::Seq {
-                left: Rc::new(self.extract_tokens_in_rule(left)),
-                right: Rc::new(self.extract_tokens_in_rule(right)),
-            },
-            Rule::Choice { elements } => Rule::Choice {
-                elements: elements.iter().map(|e| self.extract_tokens_in_rule(e)).collect()
-            },
            _ => input.clone()
        }
    }
@ -249,19 +248,18 @@ impl SymbolReplacer {
    fn replace_symbols_in_rule(&mut self, rule: &Rule) -> Rule {
        match rule {
            Rule::Symbol(symbol) => self.replace_symbol(*symbol).into(),
-            Rule::Choice { elements } => Rule::Choice {
-                elements: elements.iter().map(|e| self.replace_symbols_in_rule(e)).collect()
-            },
-            Rule::Seq { left, right } => Rule::Seq {
-                left: Rc::new(self.replace_symbols_in_rule(left)),
-                right: Rc::new(self.replace_symbols_in_rule(right)),
-            },
+            Rule::Choice(elements) => Rule::Choice(
+                elements.iter().map(|e| self.replace_symbols_in_rule(e)).collect()
+            ),
+            Rule::Seq(elements) => Rule::Seq(
+                elements.iter().map(|e| self.replace_symbols_in_rule(e)).collect()
+            ),
            Rule::Repeat(content) => Rule::Repeat(
-                Rc::new(self.replace_symbols_in_rule(content))
+                Box::new(self.replace_symbols_in_rule(content))
            ),
            Rule::Metadata { rule, params } => Rule::Metadata {
                params: params.clone(),
-                rule: Rc::new(self.replace_symbols_in_rule(rule)),
+                rule: Box::new(self.replace_symbols_in_rule(rule)),
            },
            _ => rule.clone()
        }
@ -290,6 +288,7 @@ impl SymbolReplacer {
 #[cfg(test)]
 mod test {
    use super::*;
+    use crate::grammars::VariableType;

    #[test]
    fn test_extraction() {
--- a/src/prepare_grammar/flatten_grammar.rs
+++ b/src/prepare_grammar/flatten_grammar.rs
@ -1,7 +1,7 @@
 use crate::error::Result;
 use crate::grammars::SyntaxGrammar;
-use super::ExtractedGrammar;
+use super::ExtractedSyntaxGrammar;

-pub(super) fn flatten_grammar(grammar: ExtractedGrammar) -> Result<SyntaxGrammar> {
+pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result<SyntaxGrammar> {
    unimplemented!();
 }
--- a/src/prepare_grammar/intern_symbols.rs
+++ b/src/prepare_grammar/intern_symbols.rs
@ -80,26 +80,26 @@ struct Interner<'a> {
 impl<'a> Interner<'a> {
    fn intern_rule(&self, rule: &Rule) -> Result<Rule> {
        match rule {
-            Rule::Choice { elements } => {
+            Rule::Choice(elements) => {
                let mut result = Vec::with_capacity(elements.len());
                for element in elements {
                    result.push(self.intern_rule(element)?);
                }
-                Ok(Rule::Choice { elements: result })
+                Ok(Rule::Choice(result))
            },
-
-            Rule::Seq { left, right } =>
-                Ok(Rule::Seq {
-                    left: Rc::new(self.intern_rule(left)?),
-                    right: Rc::new(self.intern_rule(right)?),
-                }),
-
-            Rule::Repeat(content) =>
-                Ok(Rule::Repeat(Rc::new(self.intern_rule(content)?))),
-
+            Rule::Seq(elements) => {
+                let mut result = Vec::with_capacity(elements.len());
+                for element in elements {
+                    result.push(self.intern_rule(element)?);
+                }
+                Ok(Rule::Seq(result))
+            },
+            Rule::Repeat(content) => Ok(Rule::Repeat(
+                Box::new(self.intern_rule(content)?)
+            )),
            Rule::Metadata { rule, params } =>
                Ok(Rule::Metadata {
-                    rule: Rc::new(self.intern_rule(rule)?),
+                    rule: Box::new(self.intern_rule(rule)?),
                    params: params.clone()
                }),

--- a/src/prepare_grammar/mod.rs
+++ b/src/prepare_grammar/mod.rs
@ -2,7 +2,7 @@ mod intern_symbols;
 mod extract_tokens;
 mod expand_repeats;
 mod flatten_grammar;
-mod normalize_rules;
+mod expand_tokens;
 mod extract_simple_aliases;

 use crate::rules::{AliasMap, Rule, Symbol};
@ -12,7 +12,7 @@ use self::intern_symbols::intern_symbols;
 use self::extract_tokens::extract_tokens;
 use self::expand_repeats::expand_repeats;
 use self::flatten_grammar::flatten_grammar;
-use self::normalize_rules::normalize_rules;
+use self::expand_tokens::expand_tokens;
 use self::extract_simple_aliases::extract_simple_aliases;

 pub(self) struct IntermediateGrammar<T, U> {
@ -25,7 +25,14 @@ pub(self) struct IntermediateGrammar<T, U> {
 }

 pub(self) type InternedGrammar = IntermediateGrammar<Rule, Variable>;
-pub(self) type ExtractedGrammar = IntermediateGrammar<Symbol, ExternalToken>;
+
+pub(self) type ExtractedSyntaxGrammar = IntermediateGrammar<Symbol, ExternalToken>;
+
+#[derive(Debug, PartialEq, Eq)]
+pub(self) struct ExtractedLexicalGrammar {
+    variables: Vec<Variable>,
+    separators: Vec<Rule>,
+}

 pub(crate) fn prepare_grammar(
    input_grammar: &InputGrammar
@ -34,7 +41,7 @@ pub(crate) fn prepare_grammar(
    let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?;
    let syntax_grammar = expand_repeats(syntax_grammar);
    let mut syntax_grammar = flatten_grammar(syntax_grammar)?;
-    let mut lexical_grammar = normalize_rules(lexical_grammar);
+    let mut lexical_grammar = expand_tokens(lexical_grammar)?;
    let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &mut lexical_grammar);
    Ok((syntax_grammar, lexical_grammar, simple_aliases))
 }
--- a/src/rules.rs
+++ b/src/rules.rs
@ -49,18 +49,13 @@ pub(crate) enum Rule {
    Pattern(String),
    NamedSymbol(String),
    Symbol(Symbol),
-    Choice {
-        elements: Vec<Rule>,
-    },
+    Choice(Vec<Rule>),
    Metadata {
        params: MetadataParams,
-        rule: Rc<Rule>,
+        rule: Box<Rule>,
    },
-    Repeat(Rc<Rule>),
-    Seq {
-        left: Rc<Rule>,
-        right: Rc<Rule>,
-    }
+    Repeat(Box<Rule>),
+    Seq(Vec<Rule>),
 }

 impl Rule {
@ -98,7 +93,7 @@ impl Rule {
    }

    pub fn repeat(rule: Rule) -> Self {
-        Rule::Repeat(Rc::new(rule))
+        Rule::Repeat(Box::new(rule))
    }

    pub fn choice(rules: Vec<Rule>) -> Self {
@ -106,32 +101,11 @@ impl Rule {
        for rule in rules {
            choice_helper(&mut elements, rule);
        }
-        Rule::Choice { elements }
+        Rule::Choice(elements)
    }

    pub fn seq(rules: Vec<Rule>) -> Self {
-        let mut result = Rule::Blank;
-        for rule in rules {
-            match rule {
-                Rule::Blank => continue,
-                Rule::Metadata { rule, params: _ } => {
-                    if *rule == Rule::Blank {
-                        continue;
-                    }
-                },
-                _ => {
-                    if result == Rule::Blank {
-                        result = rule;
-                    } else {
-                        result = Rule::Seq {
-                            left: Rc::new(result),
-                            right: Rc::new(rule),
-                        }
-                    }
-                }
-            }
-        }
-        result
+        Rule::Seq(rules)
    }

    pub fn terminal(index: usize) -> Self {
@ -196,14 +170,14 @@ fn add_metadata<T: Fn(&mut MetadataParams)>(input: Rule, f: T) -> Rule {
        _ => {
            let mut params = MetadataParams::default();
            f(&mut params);
-            Rule::Metadata { rule: Rc::new(input), params }
+            Rule::Metadata { rule: Box::new(input), params }
        }
    }
 }

 fn choice_helper(result: &mut Vec<Rule>, rule: Rule) {
    match rule {
-        Rule::Choice {elements} => {
+        Rule::Choice(elements) => {
            for element in elements {
                choice_helper(result, element);
            }