diff --git a/cli/src/generate/build_tables/item.rs b/cli/src/generate/build_tables/item.rs index 60751002..3bea6293 100644 --- a/cli/src/generate/build_tables/item.rs +++ b/cli/src/generate/build_tables/item.rs @@ -272,7 +272,9 @@ impl<'a> ParseItemSet<'a> { } pub fn core(&self) -> ParseItemSetCore<'a> { - ParseItemSetCore { entries: self.entries.iter().map(|e| e.0).collect() } + ParseItemSetCore { + entries: self.entries.iter().map(|e| e.0).collect(), + } } } diff --git a/cli/src/generate/grammars.rs b/cli/src/generate/grammars.rs index f904efa3..df22eaff 100644 --- a/cli/src/generate/grammars.rs +++ b/cli/src/generate/grammars.rs @@ -2,7 +2,7 @@ use super::nfa::Nfa; use super::rules::{Alias, Associativity, Rule, Symbol}; use hashbrown::HashMap; -#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub(crate) enum VariableType { Hidden, Auxiliary, @@ -64,6 +64,7 @@ pub(crate) struct Production { pub dynamic_precedence: i32, } +#[derive(Default)] pub(crate) struct InlinedProductionMap { pub productions: Vec, pub production_map: HashMap<(*const Production, u32), Vec>, diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs index b779a004..68bebc1f 100644 --- a/cli/src/generate/mod.rs +++ b/cli/src/generate/mod.rs @@ -97,7 +97,7 @@ fn generate_parser_for_grammar_with_opts( let input_grammar = parse_grammar(grammar_json)?; let (syntax_grammar, lexical_grammar, inlines, simple_aliases) = prepare_grammar(&input_grammar)?; - let variable_info = node_types::get_variable_info(&syntax_grammar, &lexical_grammar)?; + let variable_info = node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &inlines)?; let node_types_json = node_types::generate_node_types_json( &syntax_grammar, &lexical_grammar, diff --git a/cli/src/generate/node_types.rs b/cli/src/generate/node_types.rs index f43ffce2..92b9dd67 100644 --- a/cli/src/generate/node_types.rs +++ b/cli/src/generate/node_types.rs @@ -1,4 +1,7 @@ -use super::grammars::{LexicalGrammar, SyntaxGrammar, VariableType}; +use super::grammars::{ + InlinedProductionMap, LexicalGrammar, Production, ProductionStep, SyntaxGrammar, + SyntaxVariable, VariableType, +}; use super::rules::{Alias, AliasMap, Symbol, SymbolType}; use crate::error::{Error, Result}; use hashbrown::HashMap; @@ -23,6 +26,7 @@ pub(crate) struct FieldInfo { pub(crate) struct VariableInfo { pub fields: HashMap, pub child_types: Vec, + pub child_types_without_fields: Vec, pub has_multi_step_production: bool, } @@ -34,6 +38,8 @@ pub(crate) struct NodeInfoJSON { #[serde(skip_serializing_if = "Option::is_none")] fields: Option>, #[serde(skip_serializing_if = "Option::is_none")] + children: Option, + #[serde(skip_serializing_if = "Option::is_none")] subtypes: Option>, } @@ -54,49 +60,56 @@ pub(crate) struct FieldInfoJSON { pub(crate) fn get_variable_info( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, + inlines: &InlinedProductionMap, ) -> Result> { let mut result = Vec::new(); // Determine which field names and child node types can appear directly // within each type of node. + let mut steps = Vec::new(); for (i, variable) in syntax_grammar.variables.iter().enumerate() { let mut info = VariableInfo { fields: HashMap::new(), child_types: Vec::new(), + child_types_without_fields: Vec::new(), has_multi_step_production: false, }; - let is_recursive = variable - .productions - .iter() - .any(|p| p.steps.iter().any(|s| s.symbol == Symbol::non_terminal(i))); - for production in &variable.productions { - if production.steps.len() > 1 { - info.has_multi_step_production = true; + steps.clear(); + if get_all_child_steps(variable, inlines, &mut steps) > 1 { + info.has_multi_step_production = true; + } + + let is_recursive = steps.iter().any(|s| s.symbol == Symbol::non_terminal(i)); + + for step in &steps { + let child_type = if let Some(alias) = &step.alias { + ChildType::Aliased(alias.clone()) + } else { + ChildType::Normal(step.symbol) + }; + + if let Some(field_name) = &step.field_name { + let field_info = info.fields.entry(field_name.clone()).or_insert(FieldInfo { + multiple: false, + required: true, + types: Vec::new(), + }); + field_info.multiple |= is_recursive; + if let Err(i) = field_info.types.binary_search(&child_type) { + field_info.types.insert(i, child_type.clone()); + } + } else if variable_type_for_child_type(&child_type, syntax_grammar, lexical_grammar) + == VariableType::Named + { + if let Err(i) = info.child_types_without_fields.binary_search(&child_type) { + info.child_types_without_fields + .insert(i, child_type.clone()); + } } - for step in &production.steps { - let child_type = if let Some(alias) = &step.alias { - ChildType::Aliased(alias.clone()) - } else { - ChildType::Normal(step.symbol) - }; - - if let Some(field_name) = &step.field_name { - let field_info = info.fields.entry(field_name.clone()).or_insert(FieldInfo { - multiple: false, - required: true, - types: Vec::new(), - }); - field_info.multiple |= is_recursive; - if let Err(i) = field_info.types.binary_search(&child_type) { - field_info.types.insert(i, child_type.clone()); - } - } - - if let Err(i) = info.child_types.binary_search(&child_type) { - info.child_types.insert(i, child_type.clone()); - } + if let Err(i) = info.child_types.binary_search(&child_type) { + info.child_types.insert(i, child_type.clone()); } } @@ -107,9 +120,18 @@ pub(crate) fn get_variable_info( .filter_map(|s| s.field_name.as_ref()) .collect(); for (field_name, field_info) in info.fields.iter_mut() { - if !production_fields.contains(&field_name) { + let mut occurrence_count = 0; + for f in &production_fields { + if *f == field_name { + occurrence_count += 1; + } + } + if occurrence_count == 0 { field_info.required = false; } + if occurrence_count > 1 { + field_info.multiple = true; + } } } @@ -127,68 +149,80 @@ pub(crate) fn get_variable_info( let mut variable_info = VariableInfo::default(); mem::swap(&mut variable_info, &mut result[i]); - for production in &variable.productions { - for step in &production.steps { - let child_symbol = step.symbol; - if step.alias.is_none() - && child_symbol.kind == SymbolType::NonTerminal - && !syntax_grammar.variables[child_symbol.index] - .kind - .is_visible() - { - let child_variable_info = &result[child_symbol.index]; + steps.clear(); + get_all_child_steps(variable, inlines, &mut steps); - // If a hidden child can have multiple children, then this - // node can appear to have multiple children. - if child_variable_info.has_multi_step_production { - variable_info.has_multi_step_production = true; + for step in &steps { + let child_symbol = step.symbol; + if step.alias.is_none() + && child_symbol.kind == SymbolType::NonTerminal + && !syntax_grammar.variables[child_symbol.index] + .kind + .is_visible() + && !syntax_grammar.supertype_symbols.contains(&child_symbol) + { + let child_variable_info = &result[child_symbol.index]; + + // If a hidden child can have multiple children, then this + // node can appear to have multiple children. + if child_variable_info.has_multi_step_production { + variable_info.has_multi_step_production = true; + } + + // Inherit fields from this hidden child + for (field_name, child_field_info) in &child_variable_info.fields { + let field_info = variable_info + .fields + .entry(field_name.clone()) + .or_insert_with(|| { + done = false; + child_field_info.clone() + }); + if child_field_info.multiple && !field_info.multiple { + field_info.multiple = child_field_info.multiple; + done = false; } - - // Inherit fields from this hidden child - for (field_name, child_field_info) in &child_variable_info.fields { - let field_info = variable_info - .fields - .entry(field_name.clone()) - .or_insert_with(|| { - done = false; - child_field_info.clone() - }); - if child_field_info.multiple && !field_info.multiple { - field_info.multiple = child_field_info.multiple; + if !child_field_info.required && field_info.required { + field_info.required = child_field_info.required; + done = false; + } + for child_type in &child_field_info.types { + if let Err(i) = field_info.types.binary_search(&child_type) { + field_info.types.insert(i, child_type.clone()); done = false; } - if !child_field_info.required && field_info.required { - field_info.required = child_field_info.required; + } + } + + // Inherit child types from this hidden child + for child_type in &child_variable_info.child_types { + if let Err(i) = variable_info.child_types.binary_search(&child_type) { + variable_info.child_types.insert(i, child_type.clone()); + done = false; + } + } + + // If any field points to this hidden child, inherit child types + // for the field. + if let Some(field_name) = &step.field_name { + let field_info = variable_info.fields.get_mut(field_name).unwrap(); + for child_type in &child_variable_info.child_types { + if let Err(i) = field_info.types.binary_search(&child_type) { + field_info.types.insert(i, child_type.clone()); done = false; } - for child_type in &child_field_info.types { - if let Err(i) = field_info.types.binary_search(&child_type) { - field_info.types.insert(i, child_type.clone()); - done = false; - } - } } - - if !syntax_grammar.supertype_symbols.contains(&child_symbol) { - // Inherit child types from this hidden child - for child_type in &child_variable_info.child_types { - if let Err(i) = variable_info.child_types.binary_search(&child_type) - { - variable_info.child_types.insert(i, child_type.clone()); - done = false; - } - } - - // If any field points to this hidden child, inherit child types - // for the field. - if let Some(field_name) = &step.field_name { - let field_info = variable_info.fields.get_mut(field_name).unwrap(); - for child_type in &child_variable_info.child_types { - if let Err(i) = field_info.types.binary_search(&child_type) { - field_info.types.insert(i, child_type.clone()); - done = false; - } - } + } else { + // Inherit child types without fields from this hidden child + for child_type in &child_variable_info.child_types_without_fields { + if let Err(i) = variable_info + .child_types_without_fields + .binary_search(&child_type) + { + variable_info + .child_types_without_fields + .insert(i, child_type.clone()); + done = false; } } } @@ -217,20 +251,8 @@ pub(crate) fn get_variable_info( } } - let child_type_is_visible = |child_type: &ChildType| match child_type { - ChildType::Aliased(_) => true, - ChildType::Normal(symbol) => { - if syntax_grammar.supertype_symbols.contains(&symbol) { - return true; - } - let variable_kind = match symbol.kind { - SymbolType::NonTerminal => syntax_grammar.variables[symbol.index].kind, - SymbolType::Terminal => lexical_grammar.variables[symbol.index].kind, - SymbolType::External => syntax_grammar.external_tokens[symbol.index].kind, - _ => VariableType::Hidden, - }; - variable_kind.is_visible() - } + let child_type_is_visible = |t: &ChildType| { + variable_type_for_child_type(t, syntax_grammar, lexical_grammar) >= VariableType::Anonymous }; for supertype_symbol in &syntax_grammar.supertype_symbols { @@ -247,28 +269,121 @@ pub(crate) fn get_variable_info( // subtypes with a single supertype. for (_, field_info) in variable_info.fields.iter_mut() { for supertype_symbol in &syntax_grammar.supertype_symbols { - if sorted_vec_replace( + sorted_vec_replace( &mut field_info.types, &result[supertype_symbol.index].child_types, ChildType::Normal(*supertype_symbol), - ) { - break; - } + ); } field_info.types.retain(child_type_is_visible); } + for supertype_symbol in &syntax_grammar.supertype_symbols { + sorted_vec_replace( + &mut variable_info.child_types_without_fields, + &result[supertype_symbol.index].child_types, + ChildType::Normal(*supertype_symbol), + ); + } + + variable_info + .child_types_without_fields + .retain(child_type_is_visible); + result[i] = variable_info; } Ok(result) } +// Summarize information about this variable's possible children by walking +// all of its productions. +fn get_all_child_steps( + variable: &SyntaxVariable, + inlines: &InlinedProductionMap, + output: &mut Vec, +) -> usize { + // For each of the given variable's productions, insert all of the reachable steps + // into the output vector, and return the longest possible production length. + return variable + .productions + .iter() + .map(|p| process_production(inlines, p, 0, output)) + .max() + .unwrap_or(0); + + // For the given production suffix, add all of the remaining steps into the output + // vector and return the longest possible production length. + fn process_production( + inlines: &InlinedProductionMap, + production: &Production, + step_index: usize, + output: &mut Vec, + ) -> usize { + let mut max_length = production.steps.len(); + + // Process each of the remaining steps of the production. + for (i, step) in production.steps.iter().enumerate().skip(step_index) { + // If this step is inlined, then process the corresponding suffixes of + // all of the inlined productions instead. + if let Some(inlined_productions) = inlines.inlined_productions(production, i as u32) { + for inlined_production in inlined_productions { + let length = process_production(inlines, inlined_production, i, output); + if length > max_length { + max_length = length; + } + } + break; + } + + // Otherwise, insert this step into the output vector unless it is already + // present. + if let Err(i) = output.binary_search(step) { + output.insert(i, step.clone()); + } + } + + return max_length; + } +} + +fn variable_type_for_child_type( + child_type: &ChildType, + syntax_grammar: &SyntaxGrammar, + lexical_grammar: &LexicalGrammar, +) -> VariableType { + match child_type { + ChildType::Aliased(alias) => { + if alias.is_named { + VariableType::Named + } else { + VariableType::Anonymous + } + } + ChildType::Normal(symbol) => { + if syntax_grammar.supertype_symbols.contains(&symbol) { + return VariableType::Named; + } else { + match symbol.kind { + SymbolType::NonTerminal => syntax_grammar.variables[symbol.index].kind, + SymbolType::Terminal => lexical_grammar.variables[symbol.index].kind, + SymbolType::External => syntax_grammar.external_tokens[symbol.index].kind, + _ => VariableType::Hidden, + } + } + } + } +} + fn sorted_vec_replace(left: &mut Vec, right: &Vec, value: T) -> bool where T: Eq + Ord, { + if left.len() == 0 { + return false; + } + let mut i = 0; for right_elem in right.iter() { while left[i] < *right_elem { @@ -366,6 +481,7 @@ pub(crate) fn generate_node_types_json( kind: name.clone(), named: true, fields: None, + children: None, subtypes: None, }); let mut subtypes = info @@ -376,7 +492,9 @@ pub(crate) fn generate_node_types_json( subtypes.sort_unstable(); subtypes.dedup(); node_type_json.subtypes = Some(subtypes); - } else if variable.kind.is_visible() { + } else if variable.kind.is_visible() + && !syntax_grammar.variables_to_inline.contains(&symbol) + { let node_type_json = node_types_json .entry(name.clone()) @@ -384,6 +502,7 @@ pub(crate) fn generate_node_types_json( kind: name.clone(), named: true, fields: None, + children: None, subtypes: None, }); let mut fields_json = BTreeMap::new(); @@ -403,6 +522,20 @@ pub(crate) fn generate_node_types_json( field_info_json.types.dedup(); } node_type_json.fields = Some(fields_json); + if info.child_types_without_fields.len() > 0 { + let mut children_types = info + .child_types_without_fields + .iter() + .map(child_type_to_node_type) + .collect::>(); + children_types.sort_unstable(); + children_types.dedup(); + node_type_json.children = Some(FieldInfoJSON { + multiple: true, + required: false, + types: children_types, + }); + } } } @@ -414,6 +547,7 @@ pub(crate) fn generate_node_types_json( kind: variable.name.clone(), named: true, fields: None, + children: None, subtypes: None, }); } else if variable.kind == VariableType::Anonymous { @@ -421,6 +555,7 @@ pub(crate) fn generate_node_types_json( kind: variable.name.clone(), named: false, fields: None, + children: None, subtypes: None, }); } @@ -471,6 +606,7 @@ mod tests { kind: "v1".to_string(), named: true, subtypes: None, + children: None, fields: Some( vec![ ( @@ -507,6 +643,7 @@ mod tests { kind: ";".to_string(), named: false, subtypes: None, + children: None, fields: None } ); @@ -516,6 +653,7 @@ mod tests { kind: "v2".to_string(), named: true, subtypes: None, + children: None, fields: None } ); @@ -565,6 +703,7 @@ mod tests { kind: "_v2".to_string(), named: true, fields: None, + children: None, subtypes: Some(vec![ NodeTypeJSON { kind: "*".to_string(), @@ -587,6 +726,7 @@ mod tests { kind: "v1".to_string(), named: true, subtypes: None, + children: None, fields: Some( vec![( "f1".to_string(), @@ -606,6 +746,83 @@ mod tests { ); } + #[test] + fn test_node_types_for_children_without_fields() { + let node_types = get_node_types(InputGrammar { + name: String::new(), + extra_tokens: Vec::new(), + external_tokens: Vec::new(), + expected_conflicts: Vec::new(), + variables_to_inline: Vec::new(), + word_token: None, + supertype_symbols: vec![], + variables: vec![ + Variable { + name: "v1".to_string(), + kind: VariableType::Named, + rule: Rule::seq(vec![ + Rule::named("v2"), + Rule::field("f1".to_string(), Rule::named("v3")), + Rule::named("v4"), + ]), + }, + Variable { + name: "v2".to_string(), + kind: VariableType::Named, + rule: Rule::string("x"), + }, + Variable { + name: "v3".to_string(), + kind: VariableType::Named, + rule: Rule::string("x"), + }, + Variable { + name: "v4".to_string(), + kind: VariableType::Named, + rule: Rule::string("x"), + }, + ], + }); + + assert_eq!( + node_types[0], + NodeInfoJSON { + kind: "v1".to_string(), + named: true, + subtypes: None, + children: Some(FieldInfoJSON { + multiple: true, + required: false, + types: vec![ + NodeTypeJSON { + kind: "v2".to_string(), + named: true, + }, + NodeTypeJSON { + kind: "v4".to_string(), + named: true, + }, + ] + }), + fields: Some( + vec![( + "f1".to_string(), + FieldInfoJSON { + multiple: false, + required: true, + types: vec![NodeTypeJSON { + kind: "v3".to_string(), + named: true, + }] + } + ),] + .into_iter() + .collect() + ) + } + ); + } + #[test] fn test_get_variable_info() { let variable_info = get_variable_info( @@ -664,6 +881,7 @@ mod tests { vec![], ), &build_lexical_grammar(), + &InlinedProductionMap::default(), ) .unwrap(); @@ -732,6 +950,7 @@ mod tests { vec![], ), &build_lexical_grammar(), + &InlinedProductionMap::default(), ) .unwrap(); @@ -787,6 +1006,7 @@ mod tests { vec![Symbol::non_terminal(1)], ), &build_lexical_grammar(), + &InlinedProductionMap::default(), ) .unwrap(); @@ -808,7 +1028,12 @@ mod tests { fn get_node_types(grammar: InputGrammar) -> Vec { let (syntax_grammar, lexical_grammar, _, simple_aliases) = prepare_grammar(&grammar).unwrap(); - let variable_info = get_variable_info(&syntax_grammar, &lexical_grammar).unwrap(); + let variable_info = get_variable_info( + &syntax_grammar, + &lexical_grammar, + &InlinedProductionMap::default(), + ) + .unwrap(); generate_node_types_json( &syntax_grammar, &lexical_grammar,