Get basic field API working

This commit is contained in:
Max Brunsfeld 2019-02-07 12:29:20 -08:00
parent 7f66d2406f
commit 18a13b457d
27 changed files with 498 additions and 195 deletions

View file

@ -4,9 +4,10 @@ use crate::error::{Error, Result};
use crate::generate::grammars::{
InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType,
};
use crate::generate::rules::{Alias, Associativity, Symbol, SymbolType};
use crate::generate::rules::{Associativity, Symbol, SymbolType};
use crate::generate::tables::{
ChildInfoSequenceId, ChildInfo, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
ChildInfo, ChildInfoSequenceId, ParseAction, ParseState, ParseStateId, ParseTable,
ParseTableEntry,
};
use core::ops::Range;
use hashbrown::hash_map::Entry;
@ -652,14 +653,14 @@ impl<'a> ParseTableBuilder<'a> {
.iter()
.map(|s| ChildInfo {
alias: s.alias.clone(),
child_ref: s.child_ref.clone(),
field_name: s.field_name.clone(),
})
.collect();
while child_info_sequence.last() == Some(&ChildInfo::default()) {
child_info_sequence.pop();
}
if item.production.steps.len() > self.parse_table.max_aliased_production_length {
self.parse_table.max_aliased_production_length = item.production.steps.len()
if item.production.steps.len() > self.parse_table.max_production_length_with_child_info {
self.parse_table.max_production_length_with_child_info = item.production.steps.len()
}
if let Some(index) = self
.parse_table
@ -669,7 +670,9 @@ impl<'a> ParseTableBuilder<'a> {
{
index
} else {
self.parse_table.child_info_sequences.push(child_info_sequence);
self.parse_table
.child_info_sequences
.push(child_info_sequence);
self.parse_table.child_info_sequences.len() - 1
}
}
@ -744,7 +747,7 @@ pub(crate) fn build_parse_table(
states: Vec::new(),
symbols: Vec::new(),
child_info_sequences: Vec::new(),
max_aliased_production_length: 0,
max_production_length_with_child_info: 0,
},
}
.build()?;

View file

@ -20,7 +20,7 @@ lazy_static! {
precedence: 0,
associativity: None,
alias: None,
child_ref: None,
field_name: None,
}],
};
}

View file

@ -34,6 +34,14 @@ function blank() {
};
}
function field(name, rule) {
return {
type: "FIELD",
name: name,
content: rule
}
}
function choice(...elements) {
return {
type: "CHOICE",
@ -363,6 +371,7 @@ global.seq = seq;
global.sym = sym;
global.token = token;
global.grammar = grammar;
global.field = field;
const result = require(process.env.TREE_SITTER_GRAMMAR_PATH);
console.log(JSON.stringify(result, null, 2));

View file

@ -54,7 +54,7 @@ pub(crate) struct ProductionStep {
pub precedence: i32,
pub associativity: Option<Associativity>,
pub alias: Option<Alias>,
pub child_ref: Option<String>,
pub field_name: Option<String>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
@ -100,7 +100,7 @@ impl ProductionStep {
precedence: 0,
associativity: None,
alias: None,
child_ref: None,
field_name: None,
}
}
@ -110,7 +110,7 @@ impl ProductionStep {
precedence,
associativity,
alias: self.alias,
child_ref: self.child_ref,
field_name: self.field_name,
}
}
@ -123,16 +123,16 @@ impl ProductionStep {
value: value.to_string(),
is_named,
}),
child_ref: self.child_ref,
field_name: self.field_name,
}
}
pub(crate) fn with_child_ref(self, name: &str) -> Self {
pub(crate) fn with_field_name(self, name: &str) -> Self {
Self {
symbol: self.symbol,
precedence: self.precedence,
associativity: self.associativity,
alias: self.alias,
child_ref: Some(name.to_string()),
field_name: Some(name.to_string()),
}
}
}

View file

@ -26,8 +26,8 @@ enum RuleJSON {
CHOICE {
members: Vec<RuleJSON>,
},
REF {
value: String,
FIELD {
name: String,
content: Box<RuleJSON>,
},
SEQ {
@ -124,7 +124,7 @@ fn parse_rule(json: RuleJSON) -> Rule {
RuleJSON::PATTERN { value } => Rule::Pattern(value),
RuleJSON::SYMBOL { name } => Rule::NamedSymbol(name),
RuleJSON::CHOICE { members } => Rule::choice(members.into_iter().map(parse_rule).collect()),
RuleJSON::REF { content, value } => Rule::child_ref(value, parse_rule(*content)),
RuleJSON::FIELD { content, name } => Rule::field(name, parse_rule(*content)),
RuleJSON::SEQ { members } => Rule::seq(members.into_iter().map(parse_rule).collect()),
RuleJSON::REPEAT1 { content } => Rule::repeat(parse_rule(*content)),
RuleJSON::REPEAT { content } => {

View file

@ -11,7 +11,7 @@ struct RuleFlattener {
precedence_stack: Vec<i32>,
associativity_stack: Vec<Associativity>,
alias_stack: Vec<Alias>,
child_ref_stack: Vec<String>,
field_name_stack: Vec<String>,
}
impl RuleFlattener {
@ -24,7 +24,7 @@ impl RuleFlattener {
precedence_stack: Vec::new(),
associativity_stack: Vec::new(),
alias_stack: Vec::new(),
child_ref_stack: Vec::new(),
field_name_stack: Vec::new(),
}
}
@ -62,10 +62,10 @@ impl RuleFlattener {
self.alias_stack.push(alias);
}
let mut has_child_ref = false;
if let Some(child_ref) = params.child_ref {
has_child_ref = true;
self.child_ref_stack.push(child_ref);
let mut has_field_name = false;
if let Some(field_name) = params.field_name {
has_field_name = true;
self.field_name_stack.push(field_name);
}
if params.dynamic_precedence.abs() > self.production.dynamic_precedence.abs() {
@ -94,8 +94,8 @@ impl RuleFlattener {
self.alias_stack.pop();
}
if has_child_ref {
self.child_ref_stack.pop();
if has_field_name {
self.field_name_stack.pop();
}
did_push
@ -106,7 +106,7 @@ impl RuleFlattener {
precedence: self.precedence_stack.last().cloned().unwrap_or(0),
associativity: self.associativity_stack.last().cloned(),
alias: self.alias_stack.last().cloned(),
child_ref: self.child_ref_stack.last().cloned(),
field_name: self.field_name_stack.last().cloned(),
});
true
}
@ -370,16 +370,16 @@ mod tests {
}
#[test]
fn test_flatten_grammar_with_child_refs() {
fn test_flatten_grammar_with_field_names() {
let result = flatten_variable(Variable {
name: "test".to_string(),
kind: VariableType::Named,
rule: Rule::seq(vec![
Rule::child_ref("first-thing".to_string(), Rule::terminal(1)),
Rule::field("first-thing".to_string(), Rule::terminal(1)),
Rule::terminal(2),
Rule::choice(vec![
Rule::Blank,
Rule::child_ref("second-thing".to_string(), Rule::terminal(3)),
Rule::field("second-thing".to_string(), Rule::terminal(3)),
]),
]),
})
@ -391,16 +391,16 @@ mod tests {
Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(1)).with_child_ref("first-thing"),
ProductionStep::new(Symbol::terminal(1)).with_field_name("first-thing"),
ProductionStep::new(Symbol::terminal(2))
]
},
Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(1)).with_child_ref("first-thing"),
ProductionStep::new(Symbol::terminal(1)).with_field_name("first-thing"),
ProductionStep::new(Symbol::terminal(2)),
ProductionStep::new(Symbol::terminal(3)).with_child_ref("second-thing"),
ProductionStep::new(Symbol::terminal(3)).with_field_name("second-thing"),
]
},
]

View file

@ -6,6 +6,7 @@ use core::ops::Range;
use hashbrown::{HashMap, HashSet};
use std::fmt::Write;
use std::mem::swap;
use tree_sitter::LANGUAGE_VERSION;
macro_rules! add {
($this: tt, $($arg: tt)*) => {{
@ -56,10 +57,12 @@ struct Generator {
alias_ids: HashMap<Alias, String>,
external_scanner_states: Vec<HashSet<usize>>,
alias_map: HashMap<Alias, Option<Symbol>>,
field_names: Vec<String>,
}
impl Generator {
fn generate(mut self) -> String {
self.init();
self.add_includes();
self.add_pragmas();
self.add_stats();
@ -68,7 +71,11 @@ impl Generator {
self.add_symbol_metadata_list();
if self.parse_table.child_info_sequences.len() > 1 {
self.add_alias_sequences();
if !self.field_names.is_empty() {
self.add_field_name_enum();
}
self.add_field_name_names_list();
self.add_child_info_sequences();
}
let mut main_lex_table = LexTable::default();
@ -95,6 +102,49 @@ impl Generator {
self.buffer
}
fn init(&mut self) {
let mut symbol_identifiers = HashSet::new();
for i in 0..self.parse_table.symbols.len() {
self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers);
}
let mut field_names = Vec::new();
for child_info_sequence in &self.parse_table.child_info_sequences {
for entry in child_info_sequence {
if let Some(field_name) = &entry.field_name {
field_names.push(field_name);
}
if let Some(alias) = &entry.alias {
let alias_kind = if alias.is_named {
VariableType::Named
} else {
VariableType::Anonymous
};
let matching_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| {
let (name, kind) = self.metadata_for_symbol(*symbol);
name == alias.value && kind == alias_kind
});
let alias_id = if let Some(symbol) = matching_symbol {
self.symbol_ids[&symbol].clone()
} else if alias.is_named {
format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
} else {
format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
};
self.alias_ids.entry(alias.clone()).or_insert(alias_id);
self.alias_map
.entry(alias.clone())
.or_insert(matching_symbol);
}
}
}
field_names.sort_unstable();
field_names.dedup();
self.field_names = field_names.into_iter().cloned().collect();
}
fn add_includes(&mut self) {
add_line!(self, "#include <tree_sitter/parser.h>");
add_line!(self, "");
@ -143,39 +193,7 @@ impl Generator {
})
.count();
let mut symbol_identifiers = HashSet::new();
for i in 0..self.parse_table.symbols.len() {
self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers);
}
for child_info_sequence in &self.parse_table.child_info_sequences {
for entry in child_info_sequence {
if let Some(alias) = &entry.alias {
let alias_kind = if alias.is_named {
VariableType::Named
} else {
VariableType::Anonymous
};
let matching_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| {
let (name, kind) = self.metadata_for_symbol(*symbol);
name == alias.value && kind == alias_kind
});
let alias_id = if let Some(symbol) = matching_symbol {
self.symbol_ids[&symbol].clone()
} else if alias.is_named {
format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
} else {
format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
};
self.alias_ids.entry(alias.clone()).or_insert(alias_id);
self.alias_map
.entry(alias.clone())
.or_insert(matching_symbol);
}
}
}
add_line!(self, "#define LANGUAGE_VERSION {}", 9);
add_line!(self, "#define LANGUAGE_VERSION {}", LANGUAGE_VERSION);
add_line!(
self,
"#define STATE_COUNT {}",
@ -197,10 +215,11 @@ impl Generator {
"#define EXTERNAL_TOKEN_COUNT {}",
self.syntax_grammar.external_tokens.len()
);
add_line!(self, "#define FIELD_COUNT {}", self.field_names.len());
add_line!(
self,
"#define MAX_ALIAS_SEQUENCE_LENGTH {}",
self.parse_table.max_aliased_production_length
"#define MAX_CHILD_INFO_PRODUCTION_LENGTH {}",
self.parse_table.max_production_length_with_child_info
);
add_line!(self, "");
}
@ -253,6 +272,34 @@ impl Generator {
add_line!(self, "");
}
fn add_field_name_enum(&mut self) {
add_line!(self, "enum {{");
indent!(self);
for (i, field_name) in self.field_names.iter().enumerate() {
add_line!(self, "{} = {},", self.field_id(field_name), i + 1);
}
dedent!(self);
add_line!(self, "}};");
add_line!(self, "");
}
fn add_field_name_names_list(&mut self) {
add_line!(self, "static const char *ts_field_names[] = {{");
indent!(self);
add_line!(self, "[0] = NULL,");
for field_name in &self.field_names {
add_line!(
self,
"[{}] = \"{}\",",
self.field_id(field_name),
field_name
);
}
dedent!(self);
add_line!(self, "}};");
add_line!(self, "");
}
fn add_symbol_metadata_list(&mut self) {
add_line!(
self,
@ -303,14 +350,18 @@ impl Generator {
add_line!(self, "");
}
fn add_alias_sequences(&mut self) {
fn add_child_info_sequences(&mut self) {
add_line!(
self,
"static TSSymbol ts_alias_sequences[{}][MAX_ALIAS_SEQUENCE_LENGTH] = {{",
"static TSSymbol ts_alias_sequences[{}][MAX_CHILD_INFO_PRODUCTION_LENGTH] = {{",
self.parse_table.child_info_sequences.len()
);
indent!(self);
for (i, sequence) in self.parse_table.child_info_sequences.iter().enumerate().skip(1) {
for (i, sequence) in self.parse_table.child_info_sequences.iter().enumerate() {
if sequence.iter().all(|i| i.alias.is_none()) {
continue;
}
add_line!(self, "[{}] = {{", i);
indent!(self);
for (j, child_info) in sequence.iter().enumerate() {
@ -324,6 +375,31 @@ impl Generator {
dedent!(self);
add_line!(self, "}};");
add_line!(self, "");
add_line!(
self,
"static TSFieldId ts_field_sequences[{}][MAX_CHILD_INFO_PRODUCTION_LENGTH] = {{",
self.parse_table.child_info_sequences.len()
);
indent!(self);
for (i, sequence) in self.parse_table.child_info_sequences.iter().enumerate() {
if sequence.iter().all(|i| i.field_name.is_none()) {
continue;
}
add_line!(self, "[{}] = {{", i);
indent!(self);
for (j, child_info) in sequence.iter().enumerate() {
if let Some(field_name) = &child_info.field_name {
add_line!(self, "[{}] = {},", j, self.field_id(&field_name));
}
}
dedent!(self);
add_line!(self, "}},");
}
dedent!(self);
add_line!(self, "}};");
add_line!(self, "");
}
fn add_lex_function(&mut self, name: &str, lex_table: LexTable) {
@ -694,7 +770,11 @@ impl Generator {
add!(self, ", .dynamic_precedence = {}", dynamic_precedence);
}
if child_info_sequence_id != 0 {
add!(self, ", .alias_sequence_id = {}", child_info_sequence_id);
add!(
self,
", .child_info_sequence_id = {}",
child_info_sequence_id
);
}
add!(self, ")");
}
@ -764,11 +844,18 @@ impl Generator {
self,
".alias_sequences = (const TSSymbol *)ts_alias_sequences,"
);
add_line!(self, ".field_count = FIELD_COUNT,");
add_line!(
self,
".field_sequences = (const TSFieldId *)ts_field_sequences,"
);
add_line!(self, ".field_names = ts_field_names,");
}
add_line!(
self,
".max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH,"
".max_child_info_production_length = MAX_CHILD_INFO_PRODUCTION_LENGTH,"
);
add_line!(self, ".lex_fn = ts_lex,");
@ -865,6 +952,10 @@ impl Generator {
self.symbol_ids.insert(symbol, id);
}
fn field_id(&self, field_name: &String) -> String {
format!("field_id_{}", field_name)
}
fn metadata_for_symbol(&self, symbol: Symbol) -> (&str, VariableType) {
match symbol.kind {
SymbolType::End => ("end", VariableType::Hidden),
@ -996,6 +1087,7 @@ pub(crate) fn render_c_code(
alias_ids: HashMap::new(),
external_scanner_states: Vec::new(),
alias_map: HashMap::new(),
field_names: Vec::new(),
}
.generate()
}

View file

@ -32,7 +32,7 @@ pub(crate) struct MetadataParams {
pub is_active: bool,
pub is_main_token: bool,
pub alias: Option<Alias>,
pub child_ref: Option<String>,
pub field_name: Option<String>,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
@ -58,9 +58,9 @@ pub(crate) enum Rule {
}
impl Rule {
pub fn child_ref(name: String, content: Rule) -> Self {
pub fn field(name: String, content: Rule) -> Self {
add_metadata(content, move |params| {
params.child_ref = Some(name);
params.field_name = Some(name);
})
}

View file

@ -42,7 +42,7 @@ pub(crate) struct ParseState {
#[derive(Debug, Default, PartialEq, Eq)]
pub(crate) struct ChildInfo {
pub alias: Option<Alias>,
pub child_ref: Option<String>,
pub field_name: Option<String>,
}
#[derive(Debug, PartialEq, Eq)]
@ -50,7 +50,7 @@ pub(crate) struct ParseTable {
pub states: Vec<ParseState>,
pub symbols: Vec<Symbol>,
pub child_info_sequences: Vec<Vec<ChildInfo>>,
pub max_aliased_production_length: usize,
pub max_production_length_with_child_info: usize,
}
#[derive(Clone, Debug, PartialEq, Eq)]

View file

@ -49,9 +49,8 @@ pub fn parse_file_at_path(
let mut did_visit_children = false;
loop {
let node = cursor.node();
let is_named = node.is_named();
if did_visit_children {
if is_named {
if node.is_named() {
stdout.write(b")")?;
needs_newline = true;
}
@ -64,13 +63,16 @@ pub fn parse_file_at_path(
break;
}
} else {
if is_named {
if node.is_named() {
if needs_newline {
stdout.write(b"\n")?;
}
for _ in 0..indent_level {
stdout.write(b" ")?;
}
if let Some(field_name) = cursor.field_name() {
write!(&mut stdout, "{}: ", field_name)?;
}
let start = node.start_position();
let end = node.end_position();
write!(

View file

@ -1,6 +1,5 @@
mod corpus_test;
mod helpers;
mod node_refs;
mod node_test;
mod parser_test;
mod properties_test;

View file

@ -338,6 +338,65 @@ fn test_node_edit() {
}
}
#[test]
fn test_node_field_names() {
let (parser_name, parser_code) = generate_parser_for_grammar(
r#"
{
"name": "test_grammar_with_refs",
"extras": [
{"type": "PATTERN", "value": "\\s+"}
],
"rules": {
"rule_a": {
"type": "SEQ",
"members": [
{
"type": "FIELD",
"name": "field_1",
"content": {
"type": "STRING",
"value": "child-1"
}
},
{
"type": "CHOICE",
"members": [
{
"type": "STRING",
"value": "child-2"
},
{
"type": "BLANK"
}
]
},
{
"type": "FIELD",
"name": "field_2",
"content": {
"type": "STRING",
"value": "child-3"
}
}
]
}
}
}
"#,
)
.unwrap();
let mut parser = Parser::new();
let language = get_test_language(&parser_name, &parser_code, None);
parser.set_language(language).unwrap();
let tree = parser.parse("child-1 child-2 child-3", None).unwrap();
let root_node = tree.root_node();
assert_eq!(root_node.child_by_field_name("field_1"), root_node.child(0));
assert_eq!(root_node.child_by_field_name("field_2"), root_node.child(2));
}
fn get_all_nodes(tree: &Tree) -> Vec<Node> {
let mut result = Vec::new();
let mut visited_children = false;