tree-sitter/cli/src/generate/parse_grammar.rs

182 lines
4.9 KiB
Rust
Raw Normal View History

use super::grammars::{InputGrammar, Variable, VariableType};
use super::rules::Rule;
2018-12-05 12:50:12 -08:00
use crate::error::Result;
use serde_json::{Map, Value};
2018-12-05 12:50:12 -08:00
#[derive(Deserialize)]
#[serde(tag = "type")]
#[allow(non_camel_case_types)]
2018-12-06 22:11:52 -08:00
enum RuleJSON {
2018-12-20 13:35:13 -08:00
ALIAS {
content: Box<RuleJSON>,
named: bool,
value: String,
},
2018-12-05 12:50:12 -08:00
BLANK,
STRING {
value: String,
},
PATTERN {
value: String,
},
SYMBOL {
name: String,
},
CHOICE {
members: Vec<RuleJSON>,
},
SEQ {
members: Vec<RuleJSON>,
},
REPEAT {
content: Box<RuleJSON>,
},
2018-12-20 13:35:13 -08:00
REPEAT1 {
content: Box<RuleJSON>,
},
PREC_DYNAMIC {
value: i32,
content: Box<RuleJSON>,
},
2018-12-05 12:50:12 -08:00
PREC_LEFT {
value: i32,
content: Box<RuleJSON>,
},
PREC_RIGHT {
value: i32,
content: Box<RuleJSON>,
},
PREC {
value: i32,
content: Box<RuleJSON>,
},
TOKEN {
content: Box<RuleJSON>,
},
2018-12-20 13:35:13 -08:00
IMMEDIATE_TOKEN {
2018-12-05 12:50:12 -08:00
content: Box<RuleJSON>,
},
}
#[derive(Deserialize)]
struct GrammarJSON {
name: String,
rules: Map<String, Value>,
conflicts: Option<Vec<Vec<String>>>,
externals: Option<Vec<RuleJSON>>,
extras: Option<Vec<RuleJSON>>,
inline: Option<Vec<String>>,
word: Option<String>,
}
2018-12-06 22:11:52 -08:00
pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
2018-12-05 12:50:12 -08:00
let grammar_json: GrammarJSON = serde_json::from_str(&input)?;
let mut variables = Vec::with_capacity(grammar_json.rules.len());
for (name, value) in grammar_json.rules {
2018-12-06 22:11:52 -08:00
variables.push(Variable {
2018-12-05 12:50:12 -08:00
name: name.to_owned(),
kind: VariableType::Named,
rule: parse_rule(serde_json::from_value(value)?),
})
}
let extra_tokens = grammar_json
.extras
2018-12-05 12:50:12 -08:00
.unwrap_or(Vec::new())
.into_iter()
.map(parse_rule)
.collect();
let external_tokens = grammar_json
.externals
2018-12-05 12:50:12 -08:00
.unwrap_or(Vec::new())
.into_iter()
.map(parse_rule)
.collect();
let expected_conflicts = grammar_json.conflicts.unwrap_or(Vec::new());
let variables_to_inline = grammar_json.inline.unwrap_or(Vec::new());
2018-12-05 12:50:12 -08:00
Ok(InputGrammar {
name: grammar_json.name,
word_token: grammar_json.word,
variables,
extra_tokens,
expected_conflicts,
external_tokens,
variables_to_inline,
})
}
fn parse_rule(json: RuleJSON) -> Rule {
match json {
RuleJSON::ALIAS {
content,
value,
named,
} => Rule::alias(parse_rule(*content), value, named),
2018-12-05 12:50:12 -08:00
RuleJSON::BLANK => Rule::Blank,
RuleJSON::STRING { value } => Rule::String(value),
RuleJSON::PATTERN { value } => Rule::Pattern(value),
RuleJSON::SYMBOL { name } => Rule::NamedSymbol(name),
RuleJSON::CHOICE { members } => Rule::choice(members.into_iter().map(parse_rule).collect()),
RuleJSON::SEQ { members } => Rule::seq(members.into_iter().map(parse_rule).collect()),
2018-12-20 13:35:13 -08:00
RuleJSON::REPEAT1 { content } => Rule::repeat(parse_rule(*content)),
RuleJSON::REPEAT { content } => {
Rule::choice(vec![Rule::repeat(parse_rule(*content)), Rule::Blank])
}
2018-12-05 12:50:12 -08:00
RuleJSON::PREC { value, content } => Rule::prec(value, parse_rule(*content)),
RuleJSON::PREC_LEFT { value, content } => Rule::prec_left(value, parse_rule(*content)),
RuleJSON::PREC_RIGHT { value, content } => Rule::prec_right(value, parse_rule(*content)),
RuleJSON::PREC_DYNAMIC { value, content } => {
Rule::prec_dynamic(value, parse_rule(*content))
}
2018-12-05 12:50:12 -08:00
RuleJSON::TOKEN { content } => Rule::token(parse_rule(*content)),
2018-12-20 13:35:13 -08:00
RuleJSON::IMMEDIATE_TOKEN { content } => Rule::immediate_token(parse_rule(*content)),
2018-12-05 12:50:12 -08:00
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_grammar() {
let grammar = parse_grammar(
r#"{
2018-12-05 12:50:12 -08:00
"name": "my_lang",
"rules": {
"file": {
2018-12-20 13:35:13 -08:00
"type": "REPEAT1",
2018-12-05 12:50:12 -08:00
"content": {
"type": "SYMBOL",
"name": "statement"
}
},
"statement": {
"type": "STRING",
"value": "foo"
}
}
}"#,
)
.unwrap();
2018-12-05 12:50:12 -08:00
assert_eq!(grammar.name, "my_lang");
assert_eq!(
grammar.variables,
vec![
Variable {
name: "file".to_string(),
kind: VariableType::Named,
rule: Rule::repeat(Rule::NamedSymbol("statement".to_string()))
},
Variable {
name: "statement".to_string(),
kind: VariableType::Named,
rule: Rule::String("foo".to_string())
},
]
);
2018-12-05 12:50:12 -08:00
}
}