From d482894c7d40b9b563262fef49e2ec81f96d346a Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Sat, 8 Dec 2018 23:35:48 -0800
Subject: [PATCH] Implement expand_tokens

---
 src/grammars.rs                               |  12 +-
 src/main.rs                                   |  11 +-
 src/nfa.rs                                    |   3 +-
 src/prepare_grammar/expand_repeats.rs         |  40 +++---
 .../{normalize_rules.rs => expand_tokens.rs}  | 130 +++++++++++++-----
 src/prepare_grammar/extract_simple_aliases.rs |   1 -
 src/prepare_grammar/extract_tokens.rs         |  45 +++---
 src/prepare_grammar/flatten_grammar.rs        |   4 +-
 src/prepare_grammar/intern_symbols.rs         |  26 ++--
 src/prepare_grammar/mod.rs                    |  15 +-
 src/rules.rs                                  |  44 ++----
 11 files changed, 192 insertions(+), 139 deletions(-)
 rename src/prepare_grammar/{normalize_rules.rs => expand_tokens.rs} (61%)
diff --git a/src/grammars.rs b/src/grammars.rs
index 62910637..c5e9aaa1 100644
--- a/src/grammars.rs
+++ b/src/grammars.rs
@@ -1,4 +1,5 @@
 use crate::rules::{Associativity, Alias, Rule, Symbol};
+use crate::nfa::Nfa;
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub(crate) enum VariableType {
@@ -30,10 +31,17 @@ pub(crate) struct InputGrammar {
 
 // Extracted lexical grammar
 
+#[derive(Debug, PartialEq, Eq)]
+pub(crate) struct LexicalVariable {
+    pub name: String,
+    pub kind: VariableType,
+    pub nfa: Nfa,
+}
+
 #[derive(Debug, PartialEq, Eq)]
 pub(crate) struct LexicalGrammar {
-    pub variables: Vec<Variable>,
-    pub separators: Vec<Rule>,
+    pub variables: Vec<LexicalVariable>,
+    pub separators: Vec<Nfa>,
 }
 
 // Extracted syntax grammar
diff --git a/src/main.rs b/src/main.rs
index 4d376929..b83764fc 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -14,7 +14,7 @@ mod render;
 mod rules;
 mod tables;
 
-fn main() {
+fn main() -> error::Result<()> {
     let matches = App::new("tree-sitter")
         .version("0.1")
         .author("Max Brunsfeld <maxbrunsfeld@gmail.com>")
@@ -32,5 +32,12 @@ fn main() {
                 .arg(Arg::with_name("path").index(1).required(true))
                 .arg(Arg::with_name("line").index(2).required(true))
                 .arg(Arg::with_name("column").index(3).required(true))
-        );
+        ).get_matches();
+
+    if let Some(matches) = matches.subcommand_matches("generate") {
+        let code = generate::generate_parser_for_grammar(String::new())?;
+        println!("{}", code);
+    }
+
+    Ok(())
 }
diff --git a/src/nfa.rs b/src/nfa.rs
index 55aa11dc..22cb2a2e 100644
--- a/src/nfa.rs
+++ b/src/nfa.rs
@@ -7,13 +7,14 @@ pub enum CharacterSet {
     Exclude(Vec<char>),
 }
 
-#[derive(Debug)]
+#[derive(Debug, PartialEq, Eq)]
 pub enum NfaState {
     Advance(CharacterSet, u32),
     Split(u32, u32),
     Accept,
 }
 
+#[derive(PartialEq, Eq)]
 pub struct Nfa {
     pub states: Vec<NfaState>
 }
diff --git a/src/prepare_grammar/expand_repeats.rs b/src/prepare_grammar/expand_repeats.rs
index dcb8f916..85f37c80 100644
--- a/src/prepare_grammar/expand_repeats.rs
+++ b/src/prepare_grammar/expand_repeats.rs
@@ -3,7 +3,7 @@ use crate::grammars::{Variable, VariableType};
 use std::collections::HashMap;
 use std::mem;
 use std::rc::Rc;
-use super::ExtractedGrammar;
+use super::ExtractedSyntaxGrammar;
 
 struct Expander {
     variable_name: String,
@@ -25,16 +25,11 @@ impl Expander {
 
     fn expand_rule(&mut self, rule: &Rule) -> Rule {
         match rule {
-            Rule::Choice { elements } =>
-                Rule::Choice {
-                    elements: elements.iter().map(|element| self.expand_rule(element)).collect()
-                },
+            Rule::Choice(elements) =>
+                Rule::Choice(elements.iter().map(|element| self.expand_rule(element)).collect()),
 
-            Rule::Seq { left, right } =>
-                Rule::Seq {
-                    left: Rc::new(self.expand_rule(left)),
-                    right: Rc::new(self.expand_rule(right)),
-                },
+            Rule::Seq(elements) =>
+                Rule::Seq(elements.iter().map(|element| self.expand_rule(element)).collect()),
 
             Rule::Repeat(content) => {
                 let inner_rule = self.expand_rule(content);
@@ -46,27 +41,24 @@ impl Expander {
                 self.repeat_count_in_variable += 1;
                 let rule_name = format!("{}_repeat{}", self.variable_name, self.repeat_count_in_variable);
                 let repeat_symbol = Symbol::non_terminal(self.preceding_symbol_count + self.auxiliary_variables.len());
-                let rc_symbol = Rc::new(Rule::Symbol(repeat_symbol));
                 self.existing_repeats.insert(inner_rule.clone(), repeat_symbol);
                 self.auxiliary_variables.push(Variable {
                     name: rule_name,
                     kind: VariableType::Auxiliary,
-                    rule: Rule::Choice {
-                        elements: vec![
-                            Rule::Seq {
-                                left: rc_symbol.clone(),
-                                right: rc_symbol
-                            },
-                            inner_rule
-                        ],
-                    },
+                    rule: Rule::Choice(vec![
+                        Rule::Seq(vec![
+                            Rule::Symbol(repeat_symbol),
+                            Rule::Symbol(repeat_symbol),
+                        ]),
+                        inner_rule
+                    ]),
                 });
 
                 Rule::Symbol(repeat_symbol)
             }
 
             Rule::Metadata { rule, params } => Rule::Metadata {
-                rule: Rc::new(self.expand_rule(rule)),
+                rule: Box::new(self.expand_rule(rule)),
                 params: params.clone()
             },
 
@@ -75,7 +67,7 @@ impl Expander {
     }
 }
 
-pub(super) fn expand_repeats(mut grammar: ExtractedGrammar) -> ExtractedGrammar {
+pub(super) fn expand_repeats(mut grammar: ExtractedSyntaxGrammar) -> ExtractedSyntaxGrammar {
     let mut expander = Expander {
         variable_name: String::new(),
         repeat_count_in_variable: 0,
@@ -207,8 +199,8 @@ mod tests {
         ]);
     }
 
-    fn build_grammar(variables: Vec<Variable>) -> ExtractedGrammar {
-        ExtractedGrammar {
+    fn build_grammar(variables: Vec<Variable>) -> ExtractedSyntaxGrammar {
+        ExtractedSyntaxGrammar {
             variables,
             extra_tokens: Vec::new(),
             external_tokens: Vec::new(),
diff --git a/src/prepare_grammar/normalize_rules.rs b/src/prepare_grammar/expand_tokens.rs
similarity index 61%
rename from src/prepare_grammar/normalize_rules.rs
rename to src/prepare_grammar/expand_tokens.rs
index 67177b4f..9cfa819f 100644
--- a/src/prepare_grammar/normalize_rules.rs
+++ b/src/prepare_grammar/expand_tokens.rs
@@ -1,10 +1,11 @@
 use crate::error::{Error, Result};
 use crate::rules::Rule;
-use crate::grammars::LexicalGrammar;
-use crate::nfa::{Nfa, NfaState, NfaCursor, CharacterSet};
+use crate::grammars::{LexicalGrammar, LexicalVariable};
+use crate::nfa::{Nfa, NfaState, CharacterSet};
+use super::{ExtractedLexicalGrammar};
 use regex_syntax::ast::{parse, Ast, Class, ClassPerlKind, ClassSet, ClassSetItem, RepetitionKind};
 
-fn evaluate_perl_class(item: &ClassPerlKind) -> CharacterSet {
+fn expand_perl_character_class(item: &ClassPerlKind) -> CharacterSet {
     match item {
         ClassPerlKind::Digit => CharacterSet::empty()
             .add_range('0', '9'),
@@ -21,7 +22,7 @@ fn evaluate_perl_class(item: &ClassPerlKind) -> CharacterSet {
     }
 }
 
-fn evaluate_character_class(item: &ClassSetItem) -> Result<CharacterSet> {
+fn expand_character_class(item: &ClassSetItem) -> Result<CharacterSet> {
     match item {
         ClassSetItem::Empty(_) => Ok(CharacterSet::Include(Vec::new())),
         ClassSetItem::Literal(literal) => Ok(CharacterSet::Include(vec![literal.c])),
@@ -29,7 +30,7 @@ fn evaluate_character_class(item: &ClassSetItem) -> Result<CharacterSet> {
         ClassSetItem::Union(union) => {
             let mut result = CharacterSet::empty();
             for item in &union.items {
-                result = result.add(evaluate_character_class(&item)?);
+                result = result.add(expand_character_class(&item)?);
             }
             Ok(result)
         }
@@ -37,7 +38,7 @@ fn evaluate_character_class(item: &ClassSetItem) -> Result<CharacterSet> {
     }
 }
 
-fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<()> {
+fn expand_regex(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<()> {
     match ast {
         Ast::Empty(_) => Ok(()),
         Ast::Flags(_) => Err(Error::regex("Flags are not supported")),
@@ -53,12 +54,12 @@ fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
         Ast::Class(class) => match class {
             Class::Unicode(_) => Err(Error::regex("Unicode character classes are not supported")),
             Class::Perl(class) => {
-                nfa.states.push(NfaState::Advance(evaluate_perl_class(&class.kind), next_state_index));
+                nfa.states.push(NfaState::Advance(expand_perl_character_class(&class.kind), next_state_index));
                 Ok(())
             },
             Class::Bracketed(class) => match &class.kind {
                 ClassSet::Item(item) => {
-                    let character_set = evaluate_character_class(&item)?;
+                    let character_set = expand_character_class(&item)?;
                     nfa.states.push(NfaState::Advance(character_set, next_state_index));
                     Ok(())
                 },
@@ -69,14 +70,14 @@ fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
         },
         Ast::Repetition(repetition) => match repetition.op.kind {
             RepetitionKind::ZeroOrOne => {
-                regex_to_nfa(&repetition.ast, nfa, next_state_index)?;
+                expand_regex(&repetition.ast, nfa, next_state_index)?;
                 nfa.prepend(|start_index| NfaState::Split(next_state_index, start_index));
                 Ok(())
             },
             RepetitionKind::OneOrMore => {
                 nfa.states.push(NfaState::Accept); // Placeholder for split
                 let split_index = nfa.start_index();
-                regex_to_nfa(&repetition.ast, nfa, split_index)?;
+                expand_regex(&repetition.ast, nfa, split_index)?;
                 nfa.states[split_index as usize] = NfaState::Split(
                     nfa.start_index(),
                     next_state_index
@@ -86,7 +87,7 @@ fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
             RepetitionKind::ZeroOrMore => {
                 nfa.states.push(NfaState::Accept); // Placeholder for split
                 let split_index = nfa.start_index();
-                regex_to_nfa(&repetition.ast, nfa, split_index)?;
+                expand_regex(&repetition.ast, nfa, split_index)?;
                 nfa.states[split_index as usize] = NfaState::Split(
                     nfa.start_index(),
                     next_state_index
@@ -96,11 +97,11 @@ fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
             },
             RepetitionKind::Range(_) => unimplemented!(),
         },
-        Ast::Group(group) => regex_to_nfa(&group.ast, nfa, nfa.start_index()),
+        Ast::Group(group) => expand_regex(&group.ast, nfa, nfa.start_index()),
         Ast::Alternation(alternation) => {
             let mut alternative_start_indices = Vec::new();
             for ast in alternation.asts.iter() {
-                regex_to_nfa(&ast, nfa, next_state_index)?;
+                expand_regex(&ast, nfa, next_state_index)?;
                 alternative_start_indices.push(nfa.start_index());
             }
             alternative_start_indices.pop();
@@ -111,7 +112,7 @@ fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
         },
         Ast::Concat(concat) => {
             for ast in concat.asts.iter().rev() {
-                regex_to_nfa(&ast, nfa, next_state_index)?;
+                expand_regex(&ast, nfa, next_state_index)?;
                 next_state_index = nfa.start_index();
             }
             Ok(())
@@ -119,32 +120,77 @@ fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
     }
 }
 
-fn expand_rule(rule: Rule) -> Result<Nfa> {
+fn expand_rule(rule: Rule, nfa: &mut Nfa, mut next_state_index: u32) -> Result<()> {
     match rule {
         Rule::Pattern(s) => {
             let ast = parse::Parser::new().parse(&s).map_err(|e| Error::GrammarError(e.to_string()))?;
-            let mut nfa = Nfa::new();
-            regex_to_nfa(&ast, &mut nfa, 0)?;
-            Ok(nfa)
+            expand_regex(&ast, nfa, next_state_index)?;
+            Ok(())
         },
         Rule::String(s) => {
-            let mut nfa = Nfa::new();
             for c in s.chars().rev() {
                 nfa.prepend(|start_index| NfaState::Advance(CharacterSet::empty().add_char(c), start_index));
             }
-            Ok(nfa)
+            Ok(())
+        },
+        Rule::Choice(elements) => {
+            let mut alternative_start_indices = Vec::new();
+            for element in elements {
+                expand_rule(element, nfa, next_state_index)?;
+                alternative_start_indices.push(nfa.start_index());
+            }
+            alternative_start_indices.pop();
+            for alternative_start_index in alternative_start_indices {
+                nfa.prepend(|start_index| NfaState::Split(start_index, alternative_start_index));
+            }
+            Ok(())
+        },
+        Rule::Seq(elements) => {
+            for element in elements.into_iter().rev() {
+                expand_rule(element, nfa, next_state_index)?;
+                next_state_index = nfa.start_index();
+            }
+            Ok(())
+        },
+        Rule::Repeat(rule) => {
+            nfa.states.push(NfaState::Accept); // Placeholder for split
+            let split_index = nfa.start_index();
+            expand_rule(*rule, nfa, split_index)?;
+            nfa.states[split_index as usize] = NfaState::Split(
+                nfa.start_index(),
+                next_state_index
+            );
+            Ok(())
         },
         _ => Err(Error::grammar("Unexpected rule type")),
     }
 }
 
-pub(super) fn normalize_rules(grammar: LexicalGrammar) -> LexicalGrammar {
-    unimplemented!();
+pub(super) fn expand_tokens(grammar: ExtractedLexicalGrammar) -> Result<LexicalGrammar> {
+    let mut variables = Vec::new();
+    for variable in grammar.variables {
+        let mut nfa = Nfa::new();
+        expand_rule(variable.rule, &mut nfa, 0)?;
+        variables.push(LexicalVariable {
+            name: variable.name,
+            kind: variable.kind,
+            nfa,
+        });
+    }
+    let mut separators = Vec::new();
+    for separator in grammar.separators {
+        let mut nfa = Nfa::new();
+        expand_rule(separator, &mut nfa, 0)?;
+        separators.push(nfa);
+    }
+
+    Ok(LexicalGrammar { variables, separators })
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::nfa::NfaCursor;
 
     fn simulate_nfa<'a>(nfa: &'a Nfa, s: &'a str) -> Option<&'a str> {
         let mut result = None;
@@ -164,15 +210,15 @@ mod tests {
     }
 
     #[test]
-    fn test_regex_expansion() {
+    fn test_rule_expansion() {
         struct Row {
-            pattern: &'static str,
+            rule: Rule,
             examples: Vec<(&'static str, Option<&'static str>)>,
         }
 
         let table = [
             Row {
-                pattern: "a|bc",
+                rule: Rule::pattern("a|bc"),
                 examples: vec![
                     ("a12", Some("a")),
                     ("bc12", Some("bc")),
@@ -181,7 +227,7 @@ mod tests {
                 ],
             },
             Row {
-                pattern: "(a|b|c)d(e|f|g)h?",
+                rule: Rule::pattern("(a|b|c)d(e|f|g)h?"),
                 examples: vec![
                     ("ade1", Some("ade")),
                     ("bdf1", Some("bdf")),
@@ -190,14 +236,14 @@ mod tests {
                 ],
             },
             Row {
-                pattern: "a*",
+                rule: Rule::pattern("a*"),
                 examples: vec![
                     ("aaa1", Some("aaa")),
                     ("b", Some("")),
                 ],
             },
             Row {
-                pattern: "a((bc)+|(de)*)f",
+                rule: Rule::pattern("a((bc)+|(de)*)f"),
                 examples: vec![
                     ("af1", Some("af")),
                     ("adedef1", Some("adedef")),
@@ -206,21 +252,41 @@ mod tests {
                 ],
             },
             Row {
-                pattern: "[a-fA-F0-9]+",
+                rule: Rule::pattern("[a-fA-F0-9]+"),
                 examples: vec![
                     ("A1ff0", Some("A1ff")),
                 ],
             },
             Row {
-                pattern: "\\w\\d\\s",
+                rule: Rule::pattern("\\w\\d\\s"),
                 examples: vec![
                     ("_0  ", Some("_0 ")),
                 ],
             },
+            Row {
+                rule: Rule::string("abc"),
+                examples: vec![
+                    ("abcd", Some("abc")),
+                    ("ab", None),
+                ],
+            },
+            Row {
+                rule: Rule::repeat(Rule::seq(vec![
+                    Rule::string("{"),
+                    Rule::pattern("[a-f]+"),
+                    Rule::string("}"),
+                ])),
+                examples: vec![
+                    ("{a}{", Some("{a}")),
+                    ("{a}{d", Some("{a}")),
+                    ("ab", None),
+                ],
+            },
         ];
 
-        for Row { pattern, examples } in table.iter() {
-            let nfa = expand_rule(Rule::pattern(pattern)).unwrap();
+        for Row { rule, examples } in table.iter() {
+            let mut nfa = Nfa::new();
+            expand_rule(rule.clone(), &mut nfa, 0).unwrap();
             for (haystack, needle) in examples.iter() {
                 assert_eq!(simulate_nfa(&nfa, haystack), *needle);
             }
diff --git a/src/prepare_grammar/extract_simple_aliases.rs b/src/prepare_grammar/extract_simple_aliases.rs
index 250246f3..2a175242 100644
--- a/src/prepare_grammar/extract_simple_aliases.rs
+++ b/src/prepare_grammar/extract_simple_aliases.rs
@@ -1,6 +1,5 @@
 use crate::rules::AliasMap;
 use crate::grammars::{LexicalGrammar, SyntaxGrammar};
-use super::ExtractedGrammar;
 
 pub(super) fn extract_simple_aliases(
     syntax_grammar: &mut SyntaxGrammar,
diff --git a/src/prepare_grammar/extract_tokens.rs b/src/prepare_grammar/extract_tokens.rs
index ee90b3c8..7322516f 100644
--- a/src/prepare_grammar/extract_tokens.rs
+++ b/src/prepare_grammar/extract_tokens.rs
@@ -3,12 +3,12 @@ use std::rc::Rc;
 use std::mem;
 use crate::error::{Error, Result};
 use crate::rules::{Rule, MetadataParams, Symbol, SymbolType};
-use crate::grammars::{Variable, VariableType, LexicalGrammar, ExternalToken};
-use super::{InternedGrammar, ExtractedGrammar};
+use crate::grammars::{Variable, ExternalToken};
+use super::{InternedGrammar, ExtractedSyntaxGrammar, ExtractedLexicalGrammar};
 
 pub(super) fn extract_tokens(
     mut grammar: InternedGrammar
-) -> Result<(ExtractedGrammar, LexicalGrammar)> {
+) -> Result<(ExtractedSyntaxGrammar, ExtractedLexicalGrammar)> {
     let mut extractor = TokenExtractor {
         current_variable_name: String::new(),
         current_variable_token_count: 0,
@@ -138,7 +138,7 @@ pub(super) fn extract_tokens(
     }
 
     Ok((
-        ExtractedGrammar {
+        ExtractedSyntaxGrammar {
             variables,
             expected_conflicts,
             extra_tokens,
@@ -146,7 +146,7 @@ pub(super) fn extract_tokens(
             external_tokens,
             word_token,
         },
-        LexicalGrammar {
+        ExtractedLexicalGrammar {
             variables: lexical_variables,
             separators,
         }
@@ -198,20 +198,19 @@ impl TokenExtractor {
                 } else {
                     Rule::Metadata {
                         params: params.clone(),
-                        rule: Rc::new(self.extract_tokens_in_rule((&rule).clone()))
+                        rule: Box::new(self.extract_tokens_in_rule((&rule).clone()))
                     }
                 }
             },
             Rule::Repeat(content) => Rule::Repeat(
-                Rc::new(self.extract_tokens_in_rule(content))
+                Box::new(self.extract_tokens_in_rule(content))
+            ),
+            Rule::Seq(elements) => Rule::Seq(
+                elements.iter().map(|e| self.extract_tokens_in_rule(e)).collect()
+            ),
+            Rule::Choice(elements) => Rule::Choice(
+                elements.iter().map(|e| self.extract_tokens_in_rule(e)).collect()
             ),
-            Rule::Seq { left, right } => Rule::Seq {
-                left: Rc::new(self.extract_tokens_in_rule(left)),
-                right: Rc::new(self.extract_tokens_in_rule(right)),
-            },
-            Rule::Choice { elements } => Rule::Choice {
-                elements: elements.iter().map(|e| self.extract_tokens_in_rule(e)).collect()
-            },
             _ => input.clone()
         }
     }
@@ -249,19 +248,18 @@ impl SymbolReplacer {
     fn replace_symbols_in_rule(&mut self, rule: &Rule) -> Rule {
         match rule {
             Rule::Symbol(symbol) => self.replace_symbol(*symbol).into(),
-            Rule::Choice { elements } => Rule::Choice {
-                elements: elements.iter().map(|e| self.replace_symbols_in_rule(e)).collect()
-            },
-            Rule::Seq { left, right } => Rule::Seq {
-                left: Rc::new(self.replace_symbols_in_rule(left)),
-                right: Rc::new(self.replace_symbols_in_rule(right)),
-            },
+            Rule::Choice(elements) => Rule::Choice(
+                elements.iter().map(|e| self.replace_symbols_in_rule(e)).collect()
+            ),
+            Rule::Seq(elements) => Rule::Seq(
+                elements.iter().map(|e| self.replace_symbols_in_rule(e)).collect()
+            ),
             Rule::Repeat(content) => Rule::Repeat(
-                Rc::new(self.replace_symbols_in_rule(content))
+                Box::new(self.replace_symbols_in_rule(content))
             ),
             Rule::Metadata { rule, params } => Rule::Metadata {
                 params: params.clone(),
-                rule: Rc::new(self.replace_symbols_in_rule(rule)),
+                rule: Box::new(self.replace_symbols_in_rule(rule)),
             },
             _ => rule.clone()
         }
@@ -290,6 +288,7 @@ impl SymbolReplacer {
 #[cfg(test)]
 mod test {
     use super::*;
+    use crate::grammars::VariableType;
 
     #[test]
     fn test_extraction() {
diff --git a/src/prepare_grammar/flatten_grammar.rs b/src/prepare_grammar/flatten_grammar.rs
index 36fe76c9..0f09cd14 100644
--- a/src/prepare_grammar/flatten_grammar.rs
+++ b/src/prepare_grammar/flatten_grammar.rs
@@ -1,7 +1,7 @@
 use crate::error::Result;
 use crate::grammars::SyntaxGrammar;
-use super::ExtractedGrammar;
+use super::ExtractedSyntaxGrammar;
 
-pub(super) fn flatten_grammar(grammar: ExtractedGrammar) -> Result<SyntaxGrammar> {
+pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result<SyntaxGrammar> {
     unimplemented!();
 }
diff --git a/src/prepare_grammar/intern_symbols.rs b/src/prepare_grammar/intern_symbols.rs
index e4cf7ff1..17132262 100644
--- a/src/prepare_grammar/intern_symbols.rs
+++ b/src/prepare_grammar/intern_symbols.rs
@@ -80,26 +80,26 @@ struct Interner<'a> {
 impl<'a> Interner<'a> {
     fn intern_rule(&self, rule: &Rule) -> Result<Rule> {
         match rule {
-            Rule::Choice { elements } => {
+            Rule::Choice(elements) => {
                 let mut result = Vec::with_capacity(elements.len());
                 for element in elements {
                     result.push(self.intern_rule(element)?);
                 }
-                Ok(Rule::Choice { elements: result })
+                Ok(Rule::Choice(result))
             },
-
-            Rule::Seq { left, right } =>
-                Ok(Rule::Seq {
-                    left: Rc::new(self.intern_rule(left)?),
-                    right: Rc::new(self.intern_rule(right)?),
-                }),
-
-            Rule::Repeat(content) =>
-                Ok(Rule::Repeat(Rc::new(self.intern_rule(content)?))),
-
+            Rule::Seq(elements) => {
+                let mut result = Vec::with_capacity(elements.len());
+                for element in elements {
+                    result.push(self.intern_rule(element)?);
+                }
+                Ok(Rule::Seq(result))
+            },
+            Rule::Repeat(content) => Ok(Rule::Repeat(
+                Box::new(self.intern_rule(content)?)
+            )),
             Rule::Metadata { rule, params } =>
                 Ok(Rule::Metadata {
-                    rule: Rc::new(self.intern_rule(rule)?),
+                    rule: Box::new(self.intern_rule(rule)?),
                     params: params.clone()
                 }),
 
diff --git a/src/prepare_grammar/mod.rs b/src/prepare_grammar/mod.rs
index b860807a..e2615479 100644
--- a/src/prepare_grammar/mod.rs
+++ b/src/prepare_grammar/mod.rs
@@ -2,7 +2,7 @@ mod intern_symbols;
 mod extract_tokens;
 mod expand_repeats;
 mod flatten_grammar;
-mod normalize_rules;
+mod expand_tokens;
 mod extract_simple_aliases;
 
 use crate::rules::{AliasMap, Rule, Symbol};
@@ -12,7 +12,7 @@ use self::intern_symbols::intern_symbols;
 use self::extract_tokens::extract_tokens;
 use self::expand_repeats::expand_repeats;
 use self::flatten_grammar::flatten_grammar;
-use self::normalize_rules::normalize_rules;
+use self::expand_tokens::expand_tokens;
 use self::extract_simple_aliases::extract_simple_aliases;
 
 pub(self) struct IntermediateGrammar<T, U> {
@@ -25,7 +25,14 @@ pub(self) struct IntermediateGrammar<T, U> {
 }
 
 pub(self) type InternedGrammar = IntermediateGrammar<Rule, Variable>;
-pub(self) type ExtractedGrammar = IntermediateGrammar<Symbol, ExternalToken>;
+
+pub(self) type ExtractedSyntaxGrammar = IntermediateGrammar<Symbol, ExternalToken>;
+
+#[derive(Debug, PartialEq, Eq)]
+pub(self) struct ExtractedLexicalGrammar {
+    variables: Vec<Variable>,
+    separators: Vec<Rule>,
+}
 
 pub(crate) fn prepare_grammar(
     input_grammar: &InputGrammar
@@ -34,7 +41,7 @@ pub(crate) fn prepare_grammar(
     let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?;
     let syntax_grammar = expand_repeats(syntax_grammar);
     let mut syntax_grammar = flatten_grammar(syntax_grammar)?;
-    let mut lexical_grammar = normalize_rules(lexical_grammar);
+    let mut lexical_grammar = expand_tokens(lexical_grammar)?;
     let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &mut lexical_grammar);
     Ok((syntax_grammar, lexical_grammar, simple_aliases))
 }
diff --git a/src/rules.rs b/src/rules.rs
index b593496a..c6f18cf4 100644
--- a/src/rules.rs
+++ b/src/rules.rs
@@ -49,18 +49,13 @@ pub(crate) enum Rule {
     Pattern(String),
     NamedSymbol(String),
     Symbol(Symbol),
-    Choice {
-        elements: Vec<Rule>,
-    },
+    Choice(Vec<Rule>),
     Metadata {
         params: MetadataParams,
-        rule: Rc<Rule>,
+        rule: Box<Rule>,
     },
-    Repeat(Rc<Rule>),
-    Seq {
-        left: Rc<Rule>,
-        right: Rc<Rule>,
-    }
+    Repeat(Box<Rule>),
+    Seq(Vec<Rule>),
 }
 
 impl Rule {
@@ -98,7 +93,7 @@ impl Rule {
     }
 
     pub fn repeat(rule: Rule) -> Self {
-        Rule::Repeat(Rc::new(rule))
+        Rule::Repeat(Box::new(rule))
     }
 
     pub fn choice(rules: Vec<Rule>) -> Self {
@@ -106,32 +101,11 @@ impl Rule {
         for rule in rules {
             choice_helper(&mut elements, rule);
         }
-        Rule::Choice { elements }
+        Rule::Choice(elements)
     }
 
     pub fn seq(rules: Vec<Rule>) -> Self {
-        let mut result = Rule::Blank;
-        for rule in rules {
-            match rule {
-                Rule::Blank => continue,
-                Rule::Metadata { rule, params: _ } => {
-                    if *rule == Rule::Blank {
-                        continue;
-                    }
-                },
-                _ => {
-                    if result == Rule::Blank {
-                        result = rule;
-                    } else {
-                        result = Rule::Seq {
-                            left: Rc::new(result),
-                            right: Rc::new(rule),
-                        }
-                    }
-                }
-            }
-        }
-        result
+        Rule::Seq(rules)
     }
 
     pub fn terminal(index: usize) -> Self {
@@ -196,14 +170,14 @@ fn add_metadata<T: Fn(&mut MetadataParams)>(input: Rule, f: T) -> Rule {
         _ => {
             let mut params = MetadataParams::default();
             f(&mut params);
-            Rule::Metadata { rule: Rc::new(input), params }
+            Rule::Metadata { rule: Box::new(input), params }
         }
     }
 }
 
 fn choice_helper(result: &mut Vec<Rule>, rule: Rule) {
     match rule {
-        Rule::Choice {elements} => {
+        Rule::Choice(elements) => {
             for element in elements {
                 choice_helper(result, element);
             }