Implement expand_tokens
This commit is contained in:
parent
ead6ca1738
commit
d482894c7d
11 changed files with 192 additions and 139 deletions
|
|
@ -1,4 +1,5 @@
|
|||
use crate::rules::{Associativity, Alias, Rule, Symbol};
|
||||
use crate::nfa::Nfa;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub(crate) enum VariableType {
|
||||
|
|
@ -30,10 +31,17 @@ pub(crate) struct InputGrammar {
|
|||
|
||||
// Extracted lexical grammar
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub(crate) struct LexicalVariable {
|
||||
pub name: String,
|
||||
pub kind: VariableType,
|
||||
pub nfa: Nfa,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub(crate) struct LexicalGrammar {
|
||||
pub variables: Vec<Variable>,
|
||||
pub separators: Vec<Rule>,
|
||||
pub variables: Vec<LexicalVariable>,
|
||||
pub separators: Vec<Nfa>,
|
||||
}
|
||||
|
||||
// Extracted syntax grammar
|
||||
|
|
|
|||
11
src/main.rs
11
src/main.rs
|
|
@ -14,7 +14,7 @@ mod render;
|
|||
mod rules;
|
||||
mod tables;
|
||||
|
||||
fn main() {
|
||||
fn main() -> error::Result<()> {
|
||||
let matches = App::new("tree-sitter")
|
||||
.version("0.1")
|
||||
.author("Max Brunsfeld <maxbrunsfeld@gmail.com>")
|
||||
|
|
@ -32,5 +32,12 @@ fn main() {
|
|||
.arg(Arg::with_name("path").index(1).required(true))
|
||||
.arg(Arg::with_name("line").index(2).required(true))
|
||||
.arg(Arg::with_name("column").index(3).required(true))
|
||||
);
|
||||
).get_matches();
|
||||
|
||||
if let Some(matches) = matches.subcommand_matches("generate") {
|
||||
let code = generate::generate_parser_for_grammar(String::new())?;
|
||||
println!("{}", code);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,13 +7,14 @@ pub enum CharacterSet {
|
|||
Exclude(Vec<char>),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum NfaState {
|
||||
Advance(CharacterSet, u32),
|
||||
Split(u32, u32),
|
||||
Accept,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq)]
|
||||
pub struct Nfa {
|
||||
pub states: Vec<NfaState>
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ use crate::grammars::{Variable, VariableType};
|
|||
use std::collections::HashMap;
|
||||
use std::mem;
|
||||
use std::rc::Rc;
|
||||
use super::ExtractedGrammar;
|
||||
use super::ExtractedSyntaxGrammar;
|
||||
|
||||
struct Expander {
|
||||
variable_name: String,
|
||||
|
|
@ -25,16 +25,11 @@ impl Expander {
|
|||
|
||||
fn expand_rule(&mut self, rule: &Rule) -> Rule {
|
||||
match rule {
|
||||
Rule::Choice { elements } =>
|
||||
Rule::Choice {
|
||||
elements: elements.iter().map(|element| self.expand_rule(element)).collect()
|
||||
},
|
||||
Rule::Choice(elements) =>
|
||||
Rule::Choice(elements.iter().map(|element| self.expand_rule(element)).collect()),
|
||||
|
||||
Rule::Seq { left, right } =>
|
||||
Rule::Seq {
|
||||
left: Rc::new(self.expand_rule(left)),
|
||||
right: Rc::new(self.expand_rule(right)),
|
||||
},
|
||||
Rule::Seq(elements) =>
|
||||
Rule::Seq(elements.iter().map(|element| self.expand_rule(element)).collect()),
|
||||
|
||||
Rule::Repeat(content) => {
|
||||
let inner_rule = self.expand_rule(content);
|
||||
|
|
@ -46,27 +41,24 @@ impl Expander {
|
|||
self.repeat_count_in_variable += 1;
|
||||
let rule_name = format!("{}_repeat{}", self.variable_name, self.repeat_count_in_variable);
|
||||
let repeat_symbol = Symbol::non_terminal(self.preceding_symbol_count + self.auxiliary_variables.len());
|
||||
let rc_symbol = Rc::new(Rule::Symbol(repeat_symbol));
|
||||
self.existing_repeats.insert(inner_rule.clone(), repeat_symbol);
|
||||
self.auxiliary_variables.push(Variable {
|
||||
name: rule_name,
|
||||
kind: VariableType::Auxiliary,
|
||||
rule: Rule::Choice {
|
||||
elements: vec![
|
||||
Rule::Seq {
|
||||
left: rc_symbol.clone(),
|
||||
right: rc_symbol
|
||||
},
|
||||
inner_rule
|
||||
],
|
||||
},
|
||||
rule: Rule::Choice(vec![
|
||||
Rule::Seq(vec![
|
||||
Rule::Symbol(repeat_symbol),
|
||||
Rule::Symbol(repeat_symbol),
|
||||
]),
|
||||
inner_rule
|
||||
]),
|
||||
});
|
||||
|
||||
Rule::Symbol(repeat_symbol)
|
||||
}
|
||||
|
||||
Rule::Metadata { rule, params } => Rule::Metadata {
|
||||
rule: Rc::new(self.expand_rule(rule)),
|
||||
rule: Box::new(self.expand_rule(rule)),
|
||||
params: params.clone()
|
||||
},
|
||||
|
||||
|
|
@ -75,7 +67,7 @@ impl Expander {
|
|||
}
|
||||
}
|
||||
|
||||
pub(super) fn expand_repeats(mut grammar: ExtractedGrammar) -> ExtractedGrammar {
|
||||
pub(super) fn expand_repeats(mut grammar: ExtractedSyntaxGrammar) -> ExtractedSyntaxGrammar {
|
||||
let mut expander = Expander {
|
||||
variable_name: String::new(),
|
||||
repeat_count_in_variable: 0,
|
||||
|
|
@ -207,8 +199,8 @@ mod tests {
|
|||
]);
|
||||
}
|
||||
|
||||
fn build_grammar(variables: Vec<Variable>) -> ExtractedGrammar {
|
||||
ExtractedGrammar {
|
||||
fn build_grammar(variables: Vec<Variable>) -> ExtractedSyntaxGrammar {
|
||||
ExtractedSyntaxGrammar {
|
||||
variables,
|
||||
extra_tokens: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
use crate::error::{Error, Result};
|
||||
use crate::rules::Rule;
|
||||
use crate::grammars::LexicalGrammar;
|
||||
use crate::nfa::{Nfa, NfaState, NfaCursor, CharacterSet};
|
||||
use crate::grammars::{LexicalGrammar, LexicalVariable};
|
||||
use crate::nfa::{Nfa, NfaState, CharacterSet};
|
||||
use super::{ExtractedLexicalGrammar};
|
||||
use regex_syntax::ast::{parse, Ast, Class, ClassPerlKind, ClassSet, ClassSetItem, RepetitionKind};
|
||||
|
||||
fn evaluate_perl_class(item: &ClassPerlKind) -> CharacterSet {
|
||||
fn expand_perl_character_class(item: &ClassPerlKind) -> CharacterSet {
|
||||
match item {
|
||||
ClassPerlKind::Digit => CharacterSet::empty()
|
||||
.add_range('0', '9'),
|
||||
|
|
@ -21,7 +22,7 @@ fn evaluate_perl_class(item: &ClassPerlKind) -> CharacterSet {
|
|||
}
|
||||
}
|
||||
|
||||
fn evaluate_character_class(item: &ClassSetItem) -> Result<CharacterSet> {
|
||||
fn expand_character_class(item: &ClassSetItem) -> Result<CharacterSet> {
|
||||
match item {
|
||||
ClassSetItem::Empty(_) => Ok(CharacterSet::Include(Vec::new())),
|
||||
ClassSetItem::Literal(literal) => Ok(CharacterSet::Include(vec![literal.c])),
|
||||
|
|
@ -29,7 +30,7 @@ fn evaluate_character_class(item: &ClassSetItem) -> Result<CharacterSet> {
|
|||
ClassSetItem::Union(union) => {
|
||||
let mut result = CharacterSet::empty();
|
||||
for item in &union.items {
|
||||
result = result.add(evaluate_character_class(&item)?);
|
||||
result = result.add(expand_character_class(&item)?);
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
|
@ -37,7 +38,7 @@ fn evaluate_character_class(item: &ClassSetItem) -> Result<CharacterSet> {
|
|||
}
|
||||
}
|
||||
|
||||
fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<()> {
|
||||
fn expand_regex(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<()> {
|
||||
match ast {
|
||||
Ast::Empty(_) => Ok(()),
|
||||
Ast::Flags(_) => Err(Error::regex("Flags are not supported")),
|
||||
|
|
@ -53,12 +54,12 @@ fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
|
|||
Ast::Class(class) => match class {
|
||||
Class::Unicode(_) => Err(Error::regex("Unicode character classes are not supported")),
|
||||
Class::Perl(class) => {
|
||||
nfa.states.push(NfaState::Advance(evaluate_perl_class(&class.kind), next_state_index));
|
||||
nfa.states.push(NfaState::Advance(expand_perl_character_class(&class.kind), next_state_index));
|
||||
Ok(())
|
||||
},
|
||||
Class::Bracketed(class) => match &class.kind {
|
||||
ClassSet::Item(item) => {
|
||||
let character_set = evaluate_character_class(&item)?;
|
||||
let character_set = expand_character_class(&item)?;
|
||||
nfa.states.push(NfaState::Advance(character_set, next_state_index));
|
||||
Ok(())
|
||||
},
|
||||
|
|
@ -69,14 +70,14 @@ fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
|
|||
},
|
||||
Ast::Repetition(repetition) => match repetition.op.kind {
|
||||
RepetitionKind::ZeroOrOne => {
|
||||
regex_to_nfa(&repetition.ast, nfa, next_state_index)?;
|
||||
expand_regex(&repetition.ast, nfa, next_state_index)?;
|
||||
nfa.prepend(|start_index| NfaState::Split(next_state_index, start_index));
|
||||
Ok(())
|
||||
},
|
||||
RepetitionKind::OneOrMore => {
|
||||
nfa.states.push(NfaState::Accept); // Placeholder for split
|
||||
let split_index = nfa.start_index();
|
||||
regex_to_nfa(&repetition.ast, nfa, split_index)?;
|
||||
expand_regex(&repetition.ast, nfa, split_index)?;
|
||||
nfa.states[split_index as usize] = NfaState::Split(
|
||||
nfa.start_index(),
|
||||
next_state_index
|
||||
|
|
@ -86,7 +87,7 @@ fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
|
|||
RepetitionKind::ZeroOrMore => {
|
||||
nfa.states.push(NfaState::Accept); // Placeholder for split
|
||||
let split_index = nfa.start_index();
|
||||
regex_to_nfa(&repetition.ast, nfa, split_index)?;
|
||||
expand_regex(&repetition.ast, nfa, split_index)?;
|
||||
nfa.states[split_index as usize] = NfaState::Split(
|
||||
nfa.start_index(),
|
||||
next_state_index
|
||||
|
|
@ -96,11 +97,11 @@ fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
|
|||
},
|
||||
RepetitionKind::Range(_) => unimplemented!(),
|
||||
},
|
||||
Ast::Group(group) => regex_to_nfa(&group.ast, nfa, nfa.start_index()),
|
||||
Ast::Group(group) => expand_regex(&group.ast, nfa, nfa.start_index()),
|
||||
Ast::Alternation(alternation) => {
|
||||
let mut alternative_start_indices = Vec::new();
|
||||
for ast in alternation.asts.iter() {
|
||||
regex_to_nfa(&ast, nfa, next_state_index)?;
|
||||
expand_regex(&ast, nfa, next_state_index)?;
|
||||
alternative_start_indices.push(nfa.start_index());
|
||||
}
|
||||
alternative_start_indices.pop();
|
||||
|
|
@ -111,7 +112,7 @@ fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
|
|||
},
|
||||
Ast::Concat(concat) => {
|
||||
for ast in concat.asts.iter().rev() {
|
||||
regex_to_nfa(&ast, nfa, next_state_index)?;
|
||||
expand_regex(&ast, nfa, next_state_index)?;
|
||||
next_state_index = nfa.start_index();
|
||||
}
|
||||
Ok(())
|
||||
|
|
@ -119,32 +120,77 @@ fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
|
|||
}
|
||||
}
|
||||
|
||||
fn expand_rule(rule: Rule) -> Result<Nfa> {
|
||||
fn expand_rule(rule: Rule, nfa: &mut Nfa, mut next_state_index: u32) -> Result<()> {
|
||||
match rule {
|
||||
Rule::Pattern(s) => {
|
||||
let ast = parse::Parser::new().parse(&s).map_err(|e| Error::GrammarError(e.to_string()))?;
|
||||
let mut nfa = Nfa::new();
|
||||
regex_to_nfa(&ast, &mut nfa, 0)?;
|
||||
Ok(nfa)
|
||||
expand_regex(&ast, nfa, next_state_index)?;
|
||||
Ok(())
|
||||
},
|
||||
Rule::String(s) => {
|
||||
let mut nfa = Nfa::new();
|
||||
for c in s.chars().rev() {
|
||||
nfa.prepend(|start_index| NfaState::Advance(CharacterSet::empty().add_char(c), start_index));
|
||||
}
|
||||
Ok(nfa)
|
||||
Ok(())
|
||||
},
|
||||
Rule::Choice(elements) => {
|
||||
let mut alternative_start_indices = Vec::new();
|
||||
for element in elements {
|
||||
expand_rule(element, nfa, next_state_index)?;
|
||||
alternative_start_indices.push(nfa.start_index());
|
||||
}
|
||||
alternative_start_indices.pop();
|
||||
for alternative_start_index in alternative_start_indices {
|
||||
nfa.prepend(|start_index| NfaState::Split(start_index, alternative_start_index));
|
||||
}
|
||||
Ok(())
|
||||
},
|
||||
Rule::Seq(elements) => {
|
||||
for element in elements.into_iter().rev() {
|
||||
expand_rule(element, nfa, next_state_index)?;
|
||||
next_state_index = nfa.start_index();
|
||||
}
|
||||
Ok(())
|
||||
},
|
||||
Rule::Repeat(rule) => {
|
||||
nfa.states.push(NfaState::Accept); // Placeholder for split
|
||||
let split_index = nfa.start_index();
|
||||
expand_rule(*rule, nfa, split_index)?;
|
||||
nfa.states[split_index as usize] = NfaState::Split(
|
||||
nfa.start_index(),
|
||||
next_state_index
|
||||
);
|
||||
Ok(())
|
||||
},
|
||||
_ => Err(Error::grammar("Unexpected rule type")),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn normalize_rules(grammar: LexicalGrammar) -> LexicalGrammar {
|
||||
unimplemented!();
|
||||
pub(super) fn expand_tokens(grammar: ExtractedLexicalGrammar) -> Result<LexicalGrammar> {
|
||||
let mut variables = Vec::new();
|
||||
for variable in grammar.variables {
|
||||
let mut nfa = Nfa::new();
|
||||
expand_rule(variable.rule, &mut nfa, 0)?;
|
||||
variables.push(LexicalVariable {
|
||||
name: variable.name,
|
||||
kind: variable.kind,
|
||||
nfa,
|
||||
});
|
||||
}
|
||||
let mut separators = Vec::new();
|
||||
for separator in grammar.separators {
|
||||
let mut nfa = Nfa::new();
|
||||
expand_rule(separator, &mut nfa, 0)?;
|
||||
separators.push(nfa);
|
||||
}
|
||||
|
||||
Ok(LexicalGrammar { variables, separators })
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::nfa::NfaCursor;
|
||||
|
||||
fn simulate_nfa<'a>(nfa: &'a Nfa, s: &'a str) -> Option<&'a str> {
|
||||
let mut result = None;
|
||||
|
|
@ -164,15 +210,15 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn test_regex_expansion() {
|
||||
fn test_rule_expansion() {
|
||||
struct Row {
|
||||
pattern: &'static str,
|
||||
rule: Rule,
|
||||
examples: Vec<(&'static str, Option<&'static str>)>,
|
||||
}
|
||||
|
||||
let table = [
|
||||
Row {
|
||||
pattern: "a|bc",
|
||||
rule: Rule::pattern("a|bc"),
|
||||
examples: vec![
|
||||
("a12", Some("a")),
|
||||
("bc12", Some("bc")),
|
||||
|
|
@ -181,7 +227,7 @@ mod tests {
|
|||
],
|
||||
},
|
||||
Row {
|
||||
pattern: "(a|b|c)d(e|f|g)h?",
|
||||
rule: Rule::pattern("(a|b|c)d(e|f|g)h?"),
|
||||
examples: vec![
|
||||
("ade1", Some("ade")),
|
||||
("bdf1", Some("bdf")),
|
||||
|
|
@ -190,14 +236,14 @@ mod tests {
|
|||
],
|
||||
},
|
||||
Row {
|
||||
pattern: "a*",
|
||||
rule: Rule::pattern("a*"),
|
||||
examples: vec![
|
||||
("aaa1", Some("aaa")),
|
||||
("b", Some("")),
|
||||
],
|
||||
},
|
||||
Row {
|
||||
pattern: "a((bc)+|(de)*)f",
|
||||
rule: Rule::pattern("a((bc)+|(de)*)f"),
|
||||
examples: vec![
|
||||
("af1", Some("af")),
|
||||
("adedef1", Some("adedef")),
|
||||
|
|
@ -206,21 +252,41 @@ mod tests {
|
|||
],
|
||||
},
|
||||
Row {
|
||||
pattern: "[a-fA-F0-9]+",
|
||||
rule: Rule::pattern("[a-fA-F0-9]+"),
|
||||
examples: vec![
|
||||
("A1ff0", Some("A1ff")),
|
||||
],
|
||||
},
|
||||
Row {
|
||||
pattern: "\\w\\d\\s",
|
||||
rule: Rule::pattern("\\w\\d\\s"),
|
||||
examples: vec![
|
||||
("_0 ", Some("_0 ")),
|
||||
],
|
||||
},
|
||||
Row {
|
||||
rule: Rule::string("abc"),
|
||||
examples: vec![
|
||||
("abcd", Some("abc")),
|
||||
("ab", None),
|
||||
],
|
||||
},
|
||||
Row {
|
||||
rule: Rule::repeat(Rule::seq(vec![
|
||||
Rule::string("{"),
|
||||
Rule::pattern("[a-f]+"),
|
||||
Rule::string("}"),
|
||||
])),
|
||||
examples: vec![
|
||||
("{a}{", Some("{a}")),
|
||||
("{a}{d", Some("{a}")),
|
||||
("ab", None),
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
for Row { pattern, examples } in table.iter() {
|
||||
let nfa = expand_rule(Rule::pattern(pattern)).unwrap();
|
||||
for Row { rule, examples } in table.iter() {
|
||||
let mut nfa = Nfa::new();
|
||||
expand_rule(rule.clone(), &mut nfa, 0).unwrap();
|
||||
for (haystack, needle) in examples.iter() {
|
||||
assert_eq!(simulate_nfa(&nfa, haystack), *needle);
|
||||
}
|
||||
|
|
@ -1,6 +1,5 @@
|
|||
use crate::rules::AliasMap;
|
||||
use crate::grammars::{LexicalGrammar, SyntaxGrammar};
|
||||
use super::ExtractedGrammar;
|
||||
|
||||
pub(super) fn extract_simple_aliases(
|
||||
syntax_grammar: &mut SyntaxGrammar,
|
||||
|
|
|
|||
|
|
@ -3,12 +3,12 @@ use std::rc::Rc;
|
|||
use std::mem;
|
||||
use crate::error::{Error, Result};
|
||||
use crate::rules::{Rule, MetadataParams, Symbol, SymbolType};
|
||||
use crate::grammars::{Variable, VariableType, LexicalGrammar, ExternalToken};
|
||||
use super::{InternedGrammar, ExtractedGrammar};
|
||||
use crate::grammars::{Variable, ExternalToken};
|
||||
use super::{InternedGrammar, ExtractedSyntaxGrammar, ExtractedLexicalGrammar};
|
||||
|
||||
pub(super) fn extract_tokens(
|
||||
mut grammar: InternedGrammar
|
||||
) -> Result<(ExtractedGrammar, LexicalGrammar)> {
|
||||
) -> Result<(ExtractedSyntaxGrammar, ExtractedLexicalGrammar)> {
|
||||
let mut extractor = TokenExtractor {
|
||||
current_variable_name: String::new(),
|
||||
current_variable_token_count: 0,
|
||||
|
|
@ -138,7 +138,7 @@ pub(super) fn extract_tokens(
|
|||
}
|
||||
|
||||
Ok((
|
||||
ExtractedGrammar {
|
||||
ExtractedSyntaxGrammar {
|
||||
variables,
|
||||
expected_conflicts,
|
||||
extra_tokens,
|
||||
|
|
@ -146,7 +146,7 @@ pub(super) fn extract_tokens(
|
|||
external_tokens,
|
||||
word_token,
|
||||
},
|
||||
LexicalGrammar {
|
||||
ExtractedLexicalGrammar {
|
||||
variables: lexical_variables,
|
||||
separators,
|
||||
}
|
||||
|
|
@ -198,20 +198,19 @@ impl TokenExtractor {
|
|||
} else {
|
||||
Rule::Metadata {
|
||||
params: params.clone(),
|
||||
rule: Rc::new(self.extract_tokens_in_rule((&rule).clone()))
|
||||
rule: Box::new(self.extract_tokens_in_rule((&rule).clone()))
|
||||
}
|
||||
}
|
||||
},
|
||||
Rule::Repeat(content) => Rule::Repeat(
|
||||
Rc::new(self.extract_tokens_in_rule(content))
|
||||
Box::new(self.extract_tokens_in_rule(content))
|
||||
),
|
||||
Rule::Seq(elements) => Rule::Seq(
|
||||
elements.iter().map(|e| self.extract_tokens_in_rule(e)).collect()
|
||||
),
|
||||
Rule::Choice(elements) => Rule::Choice(
|
||||
elements.iter().map(|e| self.extract_tokens_in_rule(e)).collect()
|
||||
),
|
||||
Rule::Seq { left, right } => Rule::Seq {
|
||||
left: Rc::new(self.extract_tokens_in_rule(left)),
|
||||
right: Rc::new(self.extract_tokens_in_rule(right)),
|
||||
},
|
||||
Rule::Choice { elements } => Rule::Choice {
|
||||
elements: elements.iter().map(|e| self.extract_tokens_in_rule(e)).collect()
|
||||
},
|
||||
_ => input.clone()
|
||||
}
|
||||
}
|
||||
|
|
@ -249,19 +248,18 @@ impl SymbolReplacer {
|
|||
fn replace_symbols_in_rule(&mut self, rule: &Rule) -> Rule {
|
||||
match rule {
|
||||
Rule::Symbol(symbol) => self.replace_symbol(*symbol).into(),
|
||||
Rule::Choice { elements } => Rule::Choice {
|
||||
elements: elements.iter().map(|e| self.replace_symbols_in_rule(e)).collect()
|
||||
},
|
||||
Rule::Seq { left, right } => Rule::Seq {
|
||||
left: Rc::new(self.replace_symbols_in_rule(left)),
|
||||
right: Rc::new(self.replace_symbols_in_rule(right)),
|
||||
},
|
||||
Rule::Choice(elements) => Rule::Choice(
|
||||
elements.iter().map(|e| self.replace_symbols_in_rule(e)).collect()
|
||||
),
|
||||
Rule::Seq(elements) => Rule::Seq(
|
||||
elements.iter().map(|e| self.replace_symbols_in_rule(e)).collect()
|
||||
),
|
||||
Rule::Repeat(content) => Rule::Repeat(
|
||||
Rc::new(self.replace_symbols_in_rule(content))
|
||||
Box::new(self.replace_symbols_in_rule(content))
|
||||
),
|
||||
Rule::Metadata { rule, params } => Rule::Metadata {
|
||||
params: params.clone(),
|
||||
rule: Rc::new(self.replace_symbols_in_rule(rule)),
|
||||
rule: Box::new(self.replace_symbols_in_rule(rule)),
|
||||
},
|
||||
_ => rule.clone()
|
||||
}
|
||||
|
|
@ -290,6 +288,7 @@ impl SymbolReplacer {
|
|||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::grammars::VariableType;
|
||||
|
||||
#[test]
|
||||
fn test_extraction() {
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
use crate::error::Result;
|
||||
use crate::grammars::SyntaxGrammar;
|
||||
use super::ExtractedGrammar;
|
||||
use super::ExtractedSyntaxGrammar;
|
||||
|
||||
pub(super) fn flatten_grammar(grammar: ExtractedGrammar) -> Result<SyntaxGrammar> {
|
||||
pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result<SyntaxGrammar> {
|
||||
unimplemented!();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -80,26 +80,26 @@ struct Interner<'a> {
|
|||
impl<'a> Interner<'a> {
|
||||
fn intern_rule(&self, rule: &Rule) -> Result<Rule> {
|
||||
match rule {
|
||||
Rule::Choice { elements } => {
|
||||
Rule::Choice(elements) => {
|
||||
let mut result = Vec::with_capacity(elements.len());
|
||||
for element in elements {
|
||||
result.push(self.intern_rule(element)?);
|
||||
}
|
||||
Ok(Rule::Choice { elements: result })
|
||||
Ok(Rule::Choice(result))
|
||||
},
|
||||
|
||||
Rule::Seq { left, right } =>
|
||||
Ok(Rule::Seq {
|
||||
left: Rc::new(self.intern_rule(left)?),
|
||||
right: Rc::new(self.intern_rule(right)?),
|
||||
}),
|
||||
|
||||
Rule::Repeat(content) =>
|
||||
Ok(Rule::Repeat(Rc::new(self.intern_rule(content)?))),
|
||||
|
||||
Rule::Seq(elements) => {
|
||||
let mut result = Vec::with_capacity(elements.len());
|
||||
for element in elements {
|
||||
result.push(self.intern_rule(element)?);
|
||||
}
|
||||
Ok(Rule::Seq(result))
|
||||
},
|
||||
Rule::Repeat(content) => Ok(Rule::Repeat(
|
||||
Box::new(self.intern_rule(content)?)
|
||||
)),
|
||||
Rule::Metadata { rule, params } =>
|
||||
Ok(Rule::Metadata {
|
||||
rule: Rc::new(self.intern_rule(rule)?),
|
||||
rule: Box::new(self.intern_rule(rule)?),
|
||||
params: params.clone()
|
||||
}),
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ mod intern_symbols;
|
|||
mod extract_tokens;
|
||||
mod expand_repeats;
|
||||
mod flatten_grammar;
|
||||
mod normalize_rules;
|
||||
mod expand_tokens;
|
||||
mod extract_simple_aliases;
|
||||
|
||||
use crate::rules::{AliasMap, Rule, Symbol};
|
||||
|
|
@ -12,7 +12,7 @@ use self::intern_symbols::intern_symbols;
|
|||
use self::extract_tokens::extract_tokens;
|
||||
use self::expand_repeats::expand_repeats;
|
||||
use self::flatten_grammar::flatten_grammar;
|
||||
use self::normalize_rules::normalize_rules;
|
||||
use self::expand_tokens::expand_tokens;
|
||||
use self::extract_simple_aliases::extract_simple_aliases;
|
||||
|
||||
pub(self) struct IntermediateGrammar<T, U> {
|
||||
|
|
@ -25,7 +25,14 @@ pub(self) struct IntermediateGrammar<T, U> {
|
|||
}
|
||||
|
||||
pub(self) type InternedGrammar = IntermediateGrammar<Rule, Variable>;
|
||||
pub(self) type ExtractedGrammar = IntermediateGrammar<Symbol, ExternalToken>;
|
||||
|
||||
pub(self) type ExtractedSyntaxGrammar = IntermediateGrammar<Symbol, ExternalToken>;
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub(self) struct ExtractedLexicalGrammar {
|
||||
variables: Vec<Variable>,
|
||||
separators: Vec<Rule>,
|
||||
}
|
||||
|
||||
pub(crate) fn prepare_grammar(
|
||||
input_grammar: &InputGrammar
|
||||
|
|
@ -34,7 +41,7 @@ pub(crate) fn prepare_grammar(
|
|||
let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?;
|
||||
let syntax_grammar = expand_repeats(syntax_grammar);
|
||||
let mut syntax_grammar = flatten_grammar(syntax_grammar)?;
|
||||
let mut lexical_grammar = normalize_rules(lexical_grammar);
|
||||
let mut lexical_grammar = expand_tokens(lexical_grammar)?;
|
||||
let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &mut lexical_grammar);
|
||||
Ok((syntax_grammar, lexical_grammar, simple_aliases))
|
||||
}
|
||||
|
|
|
|||
44
src/rules.rs
44
src/rules.rs
|
|
@ -49,18 +49,13 @@ pub(crate) enum Rule {
|
|||
Pattern(String),
|
||||
NamedSymbol(String),
|
||||
Symbol(Symbol),
|
||||
Choice {
|
||||
elements: Vec<Rule>,
|
||||
},
|
||||
Choice(Vec<Rule>),
|
||||
Metadata {
|
||||
params: MetadataParams,
|
||||
rule: Rc<Rule>,
|
||||
rule: Box<Rule>,
|
||||
},
|
||||
Repeat(Rc<Rule>),
|
||||
Seq {
|
||||
left: Rc<Rule>,
|
||||
right: Rc<Rule>,
|
||||
}
|
||||
Repeat(Box<Rule>),
|
||||
Seq(Vec<Rule>),
|
||||
}
|
||||
|
||||
impl Rule {
|
||||
|
|
@ -98,7 +93,7 @@ impl Rule {
|
|||
}
|
||||
|
||||
pub fn repeat(rule: Rule) -> Self {
|
||||
Rule::Repeat(Rc::new(rule))
|
||||
Rule::Repeat(Box::new(rule))
|
||||
}
|
||||
|
||||
pub fn choice(rules: Vec<Rule>) -> Self {
|
||||
|
|
@ -106,32 +101,11 @@ impl Rule {
|
|||
for rule in rules {
|
||||
choice_helper(&mut elements, rule);
|
||||
}
|
||||
Rule::Choice { elements }
|
||||
Rule::Choice(elements)
|
||||
}
|
||||
|
||||
pub fn seq(rules: Vec<Rule>) -> Self {
|
||||
let mut result = Rule::Blank;
|
||||
for rule in rules {
|
||||
match rule {
|
||||
Rule::Blank => continue,
|
||||
Rule::Metadata { rule, params: _ } => {
|
||||
if *rule == Rule::Blank {
|
||||
continue;
|
||||
}
|
||||
},
|
||||
_ => {
|
||||
if result == Rule::Blank {
|
||||
result = rule;
|
||||
} else {
|
||||
result = Rule::Seq {
|
||||
left: Rc::new(result),
|
||||
right: Rc::new(rule),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
result
|
||||
Rule::Seq(rules)
|
||||
}
|
||||
|
||||
pub fn terminal(index: usize) -> Self {
|
||||
|
|
@ -196,14 +170,14 @@ fn add_metadata<T: Fn(&mut MetadataParams)>(input: Rule, f: T) -> Rule {
|
|||
_ => {
|
||||
let mut params = MetadataParams::default();
|
||||
f(&mut params);
|
||||
Rule::Metadata { rule: Rc::new(input), params }
|
||||
Rule::Metadata { rule: Box::new(input), params }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn choice_helper(result: &mut Vec<Rule>, rule: Rule) {
|
||||
match rule {
|
||||
Rule::Choice {elements} => {
|
||||
Rule::Choice(elements) => {
|
||||
for element in elements {
|
||||
choice_helper(result, element);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue