Merge pull request #367 from tree-sitter/node-type-children

Include info about children w/o fields in node-types.json
This commit is contained in:
Max Brunsfeld 2019-06-20 14:06:27 -07:00 committed by GitHub
commit dfd012d13d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 337 additions and 109 deletions

View file

@ -272,7 +272,9 @@ impl<'a> ParseItemSet<'a> {
}
pub fn core(&self) -> ParseItemSetCore<'a> {
ParseItemSetCore { entries: self.entries.iter().map(|e| e.0).collect() }
ParseItemSetCore {
entries: self.entries.iter().map(|e| e.0).collect(),
}
}
}

View file

@ -2,7 +2,7 @@ use super::nfa::Nfa;
use super::rules::{Alias, Associativity, Rule, Symbol};
use hashbrown::HashMap;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub(crate) enum VariableType {
Hidden,
Auxiliary,
@ -64,6 +64,7 @@ pub(crate) struct Production {
pub dynamic_precedence: i32,
}
#[derive(Default)]
pub(crate) struct InlinedProductionMap {
pub productions: Vec<Production>,
pub production_map: HashMap<(*const Production, u32), Vec<usize>>,

View file

@ -97,7 +97,7 @@ fn generate_parser_for_grammar_with_opts(
let input_grammar = parse_grammar(grammar_json)?;
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
prepare_grammar(&input_grammar)?;
let variable_info = node_types::get_variable_info(&syntax_grammar, &lexical_grammar)?;
let variable_info = node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &inlines)?;
let node_types_json = node_types::generate_node_types_json(
&syntax_grammar,
&lexical_grammar,

View file

@ -1,4 +1,7 @@
use super::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
use super::grammars::{
InlinedProductionMap, LexicalGrammar, Production, ProductionStep, SyntaxGrammar,
SyntaxVariable, VariableType,
};
use super::rules::{Alias, AliasMap, Symbol, SymbolType};
use crate::error::{Error, Result};
use hashbrown::HashMap;
@ -23,6 +26,7 @@ pub(crate) struct FieldInfo {
pub(crate) struct VariableInfo {
pub fields: HashMap<String, FieldInfo>,
pub child_types: Vec<ChildType>,
pub child_types_without_fields: Vec<ChildType>,
pub has_multi_step_production: bool,
}
@ -34,6 +38,8 @@ pub(crate) struct NodeInfoJSON {
#[serde(skip_serializing_if = "Option::is_none")]
fields: Option<BTreeMap<String, FieldInfoJSON>>,
#[serde(skip_serializing_if = "Option::is_none")]
children: Option<FieldInfoJSON>,
#[serde(skip_serializing_if = "Option::is_none")]
subtypes: Option<Vec<NodeTypeJSON>>,
}
@ -54,49 +60,56 @@ pub(crate) struct FieldInfoJSON {
pub(crate) fn get_variable_info(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
inlines: &InlinedProductionMap,
) -> Result<Vec<VariableInfo>> {
let mut result = Vec::new();
// Determine which field names and child node types can appear directly
// within each type of node.
let mut steps = Vec::new();
for (i, variable) in syntax_grammar.variables.iter().enumerate() {
let mut info = VariableInfo {
fields: HashMap::new(),
child_types: Vec::new(),
child_types_without_fields: Vec::new(),
has_multi_step_production: false,
};
let is_recursive = variable
.productions
.iter()
.any(|p| p.steps.iter().any(|s| s.symbol == Symbol::non_terminal(i)));
for production in &variable.productions {
if production.steps.len() > 1 {
info.has_multi_step_production = true;
steps.clear();
if get_all_child_steps(variable, inlines, &mut steps) > 1 {
info.has_multi_step_production = true;
}
let is_recursive = steps.iter().any(|s| s.symbol == Symbol::non_terminal(i));
for step in &steps {
let child_type = if let Some(alias) = &step.alias {
ChildType::Aliased(alias.clone())
} else {
ChildType::Normal(step.symbol)
};
if let Some(field_name) = &step.field_name {
let field_info = info.fields.entry(field_name.clone()).or_insert(FieldInfo {
multiple: false,
required: true,
types: Vec::new(),
});
field_info.multiple |= is_recursive;
if let Err(i) = field_info.types.binary_search(&child_type) {
field_info.types.insert(i, child_type.clone());
}
} else if variable_type_for_child_type(&child_type, syntax_grammar, lexical_grammar)
== VariableType::Named
{
if let Err(i) = info.child_types_without_fields.binary_search(&child_type) {
info.child_types_without_fields
.insert(i, child_type.clone());
}
}
for step in &production.steps {
let child_type = if let Some(alias) = &step.alias {
ChildType::Aliased(alias.clone())
} else {
ChildType::Normal(step.symbol)
};
if let Some(field_name) = &step.field_name {
let field_info = info.fields.entry(field_name.clone()).or_insert(FieldInfo {
multiple: false,
required: true,
types: Vec::new(),
});
field_info.multiple |= is_recursive;
if let Err(i) = field_info.types.binary_search(&child_type) {
field_info.types.insert(i, child_type.clone());
}
}
if let Err(i) = info.child_types.binary_search(&child_type) {
info.child_types.insert(i, child_type.clone());
}
if let Err(i) = info.child_types.binary_search(&child_type) {
info.child_types.insert(i, child_type.clone());
}
}
@ -107,9 +120,18 @@ pub(crate) fn get_variable_info(
.filter_map(|s| s.field_name.as_ref())
.collect();
for (field_name, field_info) in info.fields.iter_mut() {
if !production_fields.contains(&field_name) {
let mut occurrence_count = 0;
for f in &production_fields {
if *f == field_name {
occurrence_count += 1;
}
}
if occurrence_count == 0 {
field_info.required = false;
}
if occurrence_count > 1 {
field_info.multiple = true;
}
}
}
@ -127,68 +149,80 @@ pub(crate) fn get_variable_info(
let mut variable_info = VariableInfo::default();
mem::swap(&mut variable_info, &mut result[i]);
for production in &variable.productions {
for step in &production.steps {
let child_symbol = step.symbol;
if step.alias.is_none()
&& child_symbol.kind == SymbolType::NonTerminal
&& !syntax_grammar.variables[child_symbol.index]
.kind
.is_visible()
{
let child_variable_info = &result[child_symbol.index];
steps.clear();
get_all_child_steps(variable, inlines, &mut steps);
// If a hidden child can have multiple children, then this
// node can appear to have multiple children.
if child_variable_info.has_multi_step_production {
variable_info.has_multi_step_production = true;
for step in &steps {
let child_symbol = step.symbol;
if step.alias.is_none()
&& child_symbol.kind == SymbolType::NonTerminal
&& !syntax_grammar.variables[child_symbol.index]
.kind
.is_visible()
&& !syntax_grammar.supertype_symbols.contains(&child_symbol)
{
let child_variable_info = &result[child_symbol.index];
// If a hidden child can have multiple children, then this
// node can appear to have multiple children.
if child_variable_info.has_multi_step_production {
variable_info.has_multi_step_production = true;
}
// Inherit fields from this hidden child
for (field_name, child_field_info) in &child_variable_info.fields {
let field_info = variable_info
.fields
.entry(field_name.clone())
.or_insert_with(|| {
done = false;
child_field_info.clone()
});
if child_field_info.multiple && !field_info.multiple {
field_info.multiple = child_field_info.multiple;
done = false;
}
// Inherit fields from this hidden child
for (field_name, child_field_info) in &child_variable_info.fields {
let field_info = variable_info
.fields
.entry(field_name.clone())
.or_insert_with(|| {
done = false;
child_field_info.clone()
});
if child_field_info.multiple && !field_info.multiple {
field_info.multiple = child_field_info.multiple;
if !child_field_info.required && field_info.required {
field_info.required = child_field_info.required;
done = false;
}
for child_type in &child_field_info.types {
if let Err(i) = field_info.types.binary_search(&child_type) {
field_info.types.insert(i, child_type.clone());
done = false;
}
if !child_field_info.required && field_info.required {
field_info.required = child_field_info.required;
}
}
// Inherit child types from this hidden child
for child_type in &child_variable_info.child_types {
if let Err(i) = variable_info.child_types.binary_search(&child_type) {
variable_info.child_types.insert(i, child_type.clone());
done = false;
}
}
// If any field points to this hidden child, inherit child types
// for the field.
if let Some(field_name) = &step.field_name {
let field_info = variable_info.fields.get_mut(field_name).unwrap();
for child_type in &child_variable_info.child_types {
if let Err(i) = field_info.types.binary_search(&child_type) {
field_info.types.insert(i, child_type.clone());
done = false;
}
for child_type in &child_field_info.types {
if let Err(i) = field_info.types.binary_search(&child_type) {
field_info.types.insert(i, child_type.clone());
done = false;
}
}
}
if !syntax_grammar.supertype_symbols.contains(&child_symbol) {
// Inherit child types from this hidden child
for child_type in &child_variable_info.child_types {
if let Err(i) = variable_info.child_types.binary_search(&child_type)
{
variable_info.child_types.insert(i, child_type.clone());
done = false;
}
}
// If any field points to this hidden child, inherit child types
// for the field.
if let Some(field_name) = &step.field_name {
let field_info = variable_info.fields.get_mut(field_name).unwrap();
for child_type in &child_variable_info.child_types {
if let Err(i) = field_info.types.binary_search(&child_type) {
field_info.types.insert(i, child_type.clone());
done = false;
}
}
} else {
// Inherit child types without fields from this hidden child
for child_type in &child_variable_info.child_types_without_fields {
if let Err(i) = variable_info
.child_types_without_fields
.binary_search(&child_type)
{
variable_info
.child_types_without_fields
.insert(i, child_type.clone());
done = false;
}
}
}
@ -217,20 +251,8 @@ pub(crate) fn get_variable_info(
}
}
let child_type_is_visible = |child_type: &ChildType| match child_type {
ChildType::Aliased(_) => true,
ChildType::Normal(symbol) => {
if syntax_grammar.supertype_symbols.contains(&symbol) {
return true;
}
let variable_kind = match symbol.kind {
SymbolType::NonTerminal => syntax_grammar.variables[symbol.index].kind,
SymbolType::Terminal => lexical_grammar.variables[symbol.index].kind,
SymbolType::External => syntax_grammar.external_tokens[symbol.index].kind,
_ => VariableType::Hidden,
};
variable_kind.is_visible()
}
let child_type_is_visible = |t: &ChildType| {
variable_type_for_child_type(t, syntax_grammar, lexical_grammar) >= VariableType::Anonymous
};
for supertype_symbol in &syntax_grammar.supertype_symbols {
@ -247,28 +269,121 @@ pub(crate) fn get_variable_info(
// subtypes with a single supertype.
for (_, field_info) in variable_info.fields.iter_mut() {
for supertype_symbol in &syntax_grammar.supertype_symbols {
if sorted_vec_replace(
sorted_vec_replace(
&mut field_info.types,
&result[supertype_symbol.index].child_types,
ChildType::Normal(*supertype_symbol),
) {
break;
}
);
}
field_info.types.retain(child_type_is_visible);
}
for supertype_symbol in &syntax_grammar.supertype_symbols {
sorted_vec_replace(
&mut variable_info.child_types_without_fields,
&result[supertype_symbol.index].child_types,
ChildType::Normal(*supertype_symbol),
);
}
variable_info
.child_types_without_fields
.retain(child_type_is_visible);
result[i] = variable_info;
}
Ok(result)
}
// Summarize information about this variable's possible children by walking
// all of its productions.
fn get_all_child_steps(
variable: &SyntaxVariable,
inlines: &InlinedProductionMap,
output: &mut Vec<ProductionStep>,
) -> usize {
// For each of the given variable's productions, insert all of the reachable steps
// into the output vector, and return the longest possible production length.
return variable
.productions
.iter()
.map(|p| process_production(inlines, p, 0, output))
.max()
.unwrap_or(0);
// For the given production suffix, add all of the remaining steps into the output
// vector and return the longest possible production length.
fn process_production(
inlines: &InlinedProductionMap,
production: &Production,
step_index: usize,
output: &mut Vec<ProductionStep>,
) -> usize {
let mut max_length = production.steps.len();
// Process each of the remaining steps of the production.
for (i, step) in production.steps.iter().enumerate().skip(step_index) {
// If this step is inlined, then process the corresponding suffixes of
// all of the inlined productions instead.
if let Some(inlined_productions) = inlines.inlined_productions(production, i as u32) {
for inlined_production in inlined_productions {
let length = process_production(inlines, inlined_production, i, output);
if length > max_length {
max_length = length;
}
}
break;
}
// Otherwise, insert this step into the output vector unless it is already
// present.
if let Err(i) = output.binary_search(step) {
output.insert(i, step.clone());
}
}
return max_length;
}
}
fn variable_type_for_child_type(
child_type: &ChildType,
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
) -> VariableType {
match child_type {
ChildType::Aliased(alias) => {
if alias.is_named {
VariableType::Named
} else {
VariableType::Anonymous
}
}
ChildType::Normal(symbol) => {
if syntax_grammar.supertype_symbols.contains(&symbol) {
return VariableType::Named;
} else {
match symbol.kind {
SymbolType::NonTerminal => syntax_grammar.variables[symbol.index].kind,
SymbolType::Terminal => lexical_grammar.variables[symbol.index].kind,
SymbolType::External => syntax_grammar.external_tokens[symbol.index].kind,
_ => VariableType::Hidden,
}
}
}
}
}
fn sorted_vec_replace<T>(left: &mut Vec<T>, right: &Vec<T>, value: T) -> bool
where
T: Eq + Ord,
{
if left.len() == 0 {
return false;
}
let mut i = 0;
for right_elem in right.iter() {
while left[i] < *right_elem {
@ -366,6 +481,7 @@ pub(crate) fn generate_node_types_json(
kind: name.clone(),
named: true,
fields: None,
children: None,
subtypes: None,
});
let mut subtypes = info
@ -376,7 +492,9 @@ pub(crate) fn generate_node_types_json(
subtypes.sort_unstable();
subtypes.dedup();
node_type_json.subtypes = Some(subtypes);
} else if variable.kind.is_visible() {
} else if variable.kind.is_visible()
&& !syntax_grammar.variables_to_inline.contains(&symbol)
{
let node_type_json =
node_types_json
.entry(name.clone())
@ -384,6 +502,7 @@ pub(crate) fn generate_node_types_json(
kind: name.clone(),
named: true,
fields: None,
children: None,
subtypes: None,
});
let mut fields_json = BTreeMap::new();
@ -403,6 +522,20 @@ pub(crate) fn generate_node_types_json(
field_info_json.types.dedup();
}
node_type_json.fields = Some(fields_json);
if info.child_types_without_fields.len() > 0 {
let mut children_types = info
.child_types_without_fields
.iter()
.map(child_type_to_node_type)
.collect::<Vec<_>>();
children_types.sort_unstable();
children_types.dedup();
node_type_json.children = Some(FieldInfoJSON {
multiple: true,
required: false,
types: children_types,
});
}
}
}
@ -414,6 +547,7 @@ pub(crate) fn generate_node_types_json(
kind: variable.name.clone(),
named: true,
fields: None,
children: None,
subtypes: None,
});
} else if variable.kind == VariableType::Anonymous {
@ -421,6 +555,7 @@ pub(crate) fn generate_node_types_json(
kind: variable.name.clone(),
named: false,
fields: None,
children: None,
subtypes: None,
});
}
@ -471,6 +606,7 @@ mod tests {
kind: "v1".to_string(),
named: true,
subtypes: None,
children: None,
fields: Some(
vec![
(
@ -507,6 +643,7 @@ mod tests {
kind: ";".to_string(),
named: false,
subtypes: None,
children: None,
fields: None
}
);
@ -516,6 +653,7 @@ mod tests {
kind: "v2".to_string(),
named: true,
subtypes: None,
children: None,
fields: None
}
);
@ -565,6 +703,7 @@ mod tests {
kind: "_v2".to_string(),
named: true,
fields: None,
children: None,
subtypes: Some(vec![
NodeTypeJSON {
kind: "*".to_string(),
@ -587,6 +726,7 @@ mod tests {
kind: "v1".to_string(),
named: true,
subtypes: None,
children: None,
fields: Some(
vec![(
"f1".to_string(),
@ -606,6 +746,83 @@ mod tests {
);
}
#[test]
fn test_node_types_for_children_without_fields() {
let node_types = get_node_types(InputGrammar {
name: String::new(),
extra_tokens: Vec::new(),
external_tokens: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
word_token: None,
supertype_symbols: vec![],
variables: vec![
Variable {
name: "v1".to_string(),
kind: VariableType::Named,
rule: Rule::seq(vec![
Rule::named("v2"),
Rule::field("f1".to_string(), Rule::named("v3")),
Rule::named("v4"),
]),
},
Variable {
name: "v2".to_string(),
kind: VariableType::Named,
rule: Rule::string("x"),
},
Variable {
name: "v3".to_string(),
kind: VariableType::Named,
rule: Rule::string("x"),
},
Variable {
name: "v4".to_string(),
kind: VariableType::Named,
rule: Rule::string("x"),
},
],
});
assert_eq!(
node_types[0],
NodeInfoJSON {
kind: "v1".to_string(),
named: true,
subtypes: None,
children: Some(FieldInfoJSON {
multiple: true,
required: false,
types: vec![
NodeTypeJSON {
kind: "v2".to_string(),
named: true,
},
NodeTypeJSON {
kind: "v4".to_string(),
named: true,
},
]
}),
fields: Some(
vec![(
"f1".to_string(),
FieldInfoJSON {
multiple: false,
required: true,
types: vec![NodeTypeJSON {
kind: "v3".to_string(),
named: true,
}]
}
),]
.into_iter()
.collect()
)
}
);
}
#[test]
fn test_get_variable_info() {
let variable_info = get_variable_info(
@ -664,6 +881,7 @@ mod tests {
vec![],
),
&build_lexical_grammar(),
&InlinedProductionMap::default(),
)
.unwrap();
@ -732,6 +950,7 @@ mod tests {
vec![],
),
&build_lexical_grammar(),
&InlinedProductionMap::default(),
)
.unwrap();
@ -787,6 +1006,7 @@ mod tests {
vec![Symbol::non_terminal(1)],
),
&build_lexical_grammar(),
&InlinedProductionMap::default(),
)
.unwrap();
@ -808,7 +1028,12 @@ mod tests {
fn get_node_types(grammar: InputGrammar) -> Vec<NodeInfoJSON> {
let (syntax_grammar, lexical_grammar, _, simple_aliases) =
prepare_grammar(&grammar).unwrap();
let variable_info = get_variable_info(&syntax_grammar, &lexical_grammar).unwrap();
let variable_info = get_variable_info(
&syntax_grammar,
&lexical_grammar,
&InlinedProductionMap::default(),
)
.unwrap();
generate_node_types_json(
&syntax_grammar,
&lexical_grammar,