diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs index 5446e4af..be293047 100644 --- a/cli/src/generate/mod.rs +++ b/cli/src/generate/mod.rs @@ -176,7 +176,7 @@ fn generate_parser_for_grammar_with_opts( next_abi: bool, report_symbol_name: Option<&str>, ) -> Result { - let variable_info = node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &inlines)?; + let variable_info = node_types::get_variable_info(&syntax_grammar, &lexical_grammar)?; let node_types_json = node_types::generate_node_types_json( &syntax_grammar, &lexical_grammar, diff --git a/cli/src/generate/node_types.rs b/cli/src/generate/node_types.rs index 3f9db323..2b88e54f 100644 --- a/cli/src/generate/node_types.rs +++ b/cli/src/generate/node_types.rs @@ -1,7 +1,4 @@ -use super::grammars::{ - InlinedProductionMap, LexicalGrammar, Production, ProductionStep, SyntaxGrammar, - SyntaxVariable, VariableType, -}; +use super::grammars::{LexicalGrammar, SyntaxGrammar, VariableType}; use super::rules::{Alias, AliasMap, Symbol, SymbolType}; use crate::error::{Error, Result}; use serde_derive::Serialize; @@ -16,12 +13,11 @@ pub(crate) enum ChildType { #[derive(Clone, Debug, Default, PartialEq, Eq)] pub(crate) struct FieldInfo { - pub required: bool, - pub multiple: bool, + pub quantity: ChildQuantity, pub types: Vec, } -#[derive(Debug, Default, PartialEq, Eq)] +#[derive(Clone, Debug, Default, PartialEq, Eq)] pub(crate) struct VariableInfo { pub fields: HashMap, pub child_types: Vec, @@ -56,226 +52,273 @@ pub(crate) struct FieldInfoJSON { types: Vec, } +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct ChildQuantity { + exists: bool, + required: bool, + multiple: bool, +} + +impl Default for ChildQuantity { + fn default() -> Self { + Self::zero() + } +} + +impl ChildQuantity { + fn zero() -> Self { + ChildQuantity { + exists: false, + required: false, + multiple: false, + } + } + + fn one() -> Self { + ChildQuantity { + exists: true, + required: true, + multiple: false, + } + } + + fn append(&mut self, other: ChildQuantity) { + if other.exists { + if self.exists || other.multiple { + self.multiple = true; + } + if other.required { + self.required = true; + } + self.exists = true; + } + } + + fn union(&mut self, other: ChildQuantity) -> bool { + let mut result = false; + if !self.exists && other.exists { + result = true; + self.exists = true; + } + if self.required && !other.required { + result = true; + self.required = false; + } + if !self.multiple && other.multiple { + result = true; + self.multiple = true; + } + result + } +} + +/// Compute a summary of the public-facing structure of each variable in the +/// grammar. Each variable in the grammar corresponds to a distinct public-facing +/// node type. +/// +/// The information collected about each node type `N` is: +/// 1. `child_types` - The types of visible children that can appear within `N`. +/// 2. `fields` - The fields that `N` can have. Data regarding each field: +/// * `types` - The types of visible children the field can contain. +/// * `optional` - Do `N` nodes always have this field? +/// * `multiple` - Can `N` nodes have multiple children for this field? +/// 3. `children_without_fields` - The *other* named children of `N` that are +/// not associated with fields. Data regarding these children: +/// * `types` - The types of named children with no field. +/// * `optional` - Do `N` nodes always have at least one named child with no field? +/// * `multiple` - Can `N` nodes have multiple named child with no field? +/// +/// Each summary must account for some indirect factors: +/// 1. hidden nodes. When a parent node `N` has a hidden child `C`, the visible +/// children of `C` *appear* to be direct children of `N`. +/// 2. aliases. If a parent node type `M` is aliased as some other type `N`, +/// then nodes which *appear* to have type `N` may have internal structure based +/// on `M`. pub(crate) fn get_variable_info( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, - inlines: &InlinedProductionMap, ) -> Result> { - let mut result = Vec::new(); + let child_type_is_visible = |t: &ChildType| { + variable_type_for_child_type(t, syntax_grammar, lexical_grammar) >= VariableType::Anonymous + }; - // Determine which field names and child node types can appear directly - // within each type of node. - let mut steps = Vec::new(); - for (i, variable) in syntax_grammar.variables.iter().enumerate() { - let mut info = VariableInfo { - fields: HashMap::new(), - child_types: Vec::new(), - children_without_fields: FieldInfo { - multiple: false, - required: true, - types: Vec::new(), - }, - has_multi_step_production: false, - }; + let child_type_is_named = |t: &ChildType| { + variable_type_for_child_type(t, syntax_grammar, lexical_grammar) == VariableType::Named + }; - steps.clear(); - if get_all_child_steps(variable, inlines, &mut steps) > 1 { - info.has_multi_step_production = true; - } + // Each variable's summary can depend on the summaries of other hidden variables, + // and variables can have mutually recursive structure. So we compute the summaries + // iteratively, in a loop that terminates only when more changes are possible. + let mut did_change = true; + let mut all_initialized = false; + let mut result = vec![VariableInfo::default(); syntax_grammar.variables.len()]; + while did_change { + did_change = false; - let is_recursive = steps.iter().any(|s| s.symbol == Symbol::non_terminal(i)); - - for step in &steps { - let child_type = if let Some(alias) = &step.alias { - ChildType::Aliased(alias.clone()) - } else { - ChildType::Normal(step.symbol) - }; - - if let Some(field_name) = &step.field_name { - let field_info = info.fields.entry(field_name.clone()).or_insert(FieldInfo { - multiple: false, - required: true, - types: Vec::new(), - }); - field_info.multiple |= is_recursive; - if let Err(i) = field_info.types.binary_search(&child_type) { - field_info.types.insert(i, child_type.clone()); - } - } else if variable_type_for_child_type(&child_type, syntax_grammar, lexical_grammar) - == VariableType::Named - { - let children_info = &mut info.children_without_fields; - children_info.multiple |= is_recursive; - if let Err(i) = children_info.types.binary_search(&child_type) { - children_info.types.insert(i, child_type.clone()); - } - } - - if let Err(i) = info.child_types.binary_search(&child_type) { - info.child_types.insert(i, child_type.clone()); - } - } - - for production in &variable.productions { - let production_fields: Vec<&String> = production - .steps - .iter() - .filter_map(|s| s.field_name.as_ref()) - .collect(); - for (field_name, field_info) in info.fields.iter_mut() { - let mut occurrence_count = 0; - for f in &production_fields { - if *f == field_name { - occurrence_count += 1; - } - } - if occurrence_count == 0 { - field_info.required = false; - } - if occurrence_count > 1 { - field_info.multiple = true; - } - } - - let named_children_without_fields_count = production - .steps - .iter() - .filter(|s| { - if s.field_name.is_some() { - false - } else if let Some(alias) = &s.alias { - alias.is_named - } else if s.symbol.is_non_terminal() { - true - } else if s.symbol.is_external() { - syntax_grammar.external_tokens[s.symbol.index].kind == VariableType::Named - } else { - lexical_grammar.variables[s.symbol.index].kind == VariableType::Named - } - }) - .count(); - if named_children_without_fields_count == 0 { - info.children_without_fields.required = false; - } - if named_children_without_fields_count > 1 { - info.children_without_fields.multiple = true; - } - } - - result.push(info); - } - - // Expand each node type's information recursively to inherit the properties of - // hidden children. - let mut done = false; - while !done { - done = true; for (i, variable) in syntax_grammar.variables.iter().enumerate() { - // Move this variable's info out of the vector so it can be modified - // while reading from other entries of the vector. - let mut variable_info = VariableInfo::default(); - mem::swap(&mut variable_info, &mut result[i]); + let mut variable_info = result[i].clone(); - steps.clear(); - get_all_child_steps(variable, inlines, &mut steps); + // Within a variable, consider each production separately. For each + // production, determine which children and fields can occur, and how many + // times they can occur. + for (production_index, production) in variable.productions.iter().enumerate() { + let mut field_quantities = HashMap::new(); + let mut children_without_fields_quantity = ChildQuantity::zero(); + let mut has_uninitialized_invisible_children = false; - for step in &steps { - let child_symbol = step.symbol; - if step.alias.is_none() - && child_symbol.kind == SymbolType::NonTerminal - && !syntax_grammar.variables[child_symbol.index] - .kind - .is_visible() - && !syntax_grammar.supertype_symbols.contains(&child_symbol) - { - let child_variable_info = &result[child_symbol.index]; + if production.steps.len() > 1 { + variable_info.has_multi_step_production = true; + } - // If a hidden child can have multiple children, then this - // node can appear to have multiple children. - if child_variable_info.has_multi_step_production { - variable_info.has_multi_step_production = true; - } - - // Inherit fields from this hidden child - for (field_name, child_field_info) in &child_variable_info.fields { - let field_info = variable_info - .fields - .entry(field_name.clone()) - .or_insert_with(|| { - done = false; - child_field_info.clone() - }); - if child_field_info.multiple && !field_info.multiple { - field_info.multiple = child_field_info.multiple; - done = false; - } - if !child_field_info.required && field_info.required { - field_info.required = child_field_info.required; - done = false; - } - for child_type in &child_field_info.types { - if let Err(i) = field_info.types.binary_search(&child_type) { - field_info.types.insert(i, child_type.clone()); - done = false; - } - } - } - - // Inherit child types from this hidden child - for child_type in &child_variable_info.child_types { - if let Err(i) = variable_info.child_types.binary_search(&child_type) { - variable_info.child_types.insert(i, child_type.clone()); - done = false; - } - } - - // If any field points to this hidden child, inherit child types - // for the field. - if let Some(field_name) = &step.field_name { - let field_info = variable_info.fields.get_mut(field_name).unwrap(); - for child_type in &child_variable_info.child_types { - if let Err(i) = field_info.types.binary_search(&child_type) { - field_info.types.insert(i, child_type.clone()); - done = false; - } - } + for step in &production.steps { + let child_symbol = step.symbol; + let child_type = if let Some(alias) = &step.alias { + ChildType::Aliased(alias.clone()) } else { - // Inherit child types without fields from this hidden child - // Inherit info about children w/o fields from this hidden child - let grandchildren_info = &child_variable_info.children_without_fields; - if grandchildren_info.multiple - && !variable_info.children_without_fields.multiple - { - variable_info.children_without_fields.multiple = true; - done = false; + ChildType::Normal(child_symbol) + }; + + // Record all of the types of direct children. + did_change |= sorted_vec_insert(&mut variable_info.child_types, &child_type); + + // Record all of the field names that occur. + if let Some(field_name) = &step.field_name { + // Record how many times each field occurs in this production. + field_quantities + .entry(field_name) + .or_insert(ChildQuantity::zero()) + .append(ChildQuantity::one()); + + // Record the types of children for this field. + let field_info = + variable_info.fields.entry(field_name.clone()).or_insert({ + let mut info = FieldInfo { + types: Vec::new(), + quantity: ChildQuantity::one(), + }; + + // If this field did *not* occur in an earlier production, + // then it is not required. + if production_index > 0 { + info.quantity.required = false; + } + info + }); + did_change |= sorted_vec_insert(&mut field_info.types, &child_type); + } + // Record named children without fields. + else if child_type_is_named(&child_type) { + // Record how many named children without fields occur in this production. + children_without_fields_quantity.append(ChildQuantity::one()); + + // Record the types of all of the named children without fields. + let children_info = &mut variable_info.children_without_fields; + if children_info.types.is_empty() { + children_info.quantity = ChildQuantity::one(); } - // if !grandchildren_info.required - // && variable_info.children_without_fields.required - // { - // variable_info.children_without_fields.required = false; - // done = false; - // } - for child_type in &grandchildren_info.types { - if let Err(i) = variable_info - .children_without_fields - .types - .binary_search(&child_type) - { - variable_info - .children_without_fields - .types - .insert(i, child_type.clone()); - done = false; + did_change |= sorted_vec_insert(&mut children_info.types, &child_type); + } + + // Inherit information from any hidden children. + if child_symbol.is_non_terminal() + && !syntax_grammar.supertype_symbols.contains(&child_symbol) + && step.alias.is_none() + && (!child_type_is_visible(&child_type) + || syntax_grammar.variables_to_inline.contains(&child_symbol)) + { + let child_variable_info = &result[child_symbol.index]; + + // If a hidden child can have multiple children, then this + // node can appear to have multiple children. + if child_variable_info.has_multi_step_production { + variable_info.has_multi_step_production = true; + } + + // Inherit fields from this hidden child + for (field_name, child_field_info) in &child_variable_info.fields { + field_quantities + .entry(field_name) + .or_insert(ChildQuantity::zero()) + .append(child_field_info.quantity); + let field_info = variable_info + .fields + .entry(field_name.clone()) + .or_insert(FieldInfo { + types: Vec::new(), + quantity: ChildQuantity::one(), + }); + for child_type in &child_field_info.types { + sorted_vec_insert(&mut field_info.types, &child_type); } } + + // Inherit child types from this hidden child + for child_type in &child_variable_info.child_types { + did_change |= + sorted_vec_insert(&mut variable_info.child_types, child_type); + } + + // If any field points to this hidden child, inherit child types + // for the field. + if let Some(field_name) = &step.field_name { + let field_info = variable_info.fields.get_mut(field_name).unwrap(); + for child_type in &child_variable_info.child_types { + did_change |= sorted_vec_insert(&mut field_info.types, &child_type); + } + } + // Inherit info about children without fields from this hidden child. + else { + let grandchildren_info = &child_variable_info.children_without_fields; + if !grandchildren_info.types.is_empty() { + children_without_fields_quantity + .append(grandchildren_info.quantity); + + if variable_info.children_without_fields.types.is_empty() { + variable_info.children_without_fields.quantity = + ChildQuantity::one(); + } + + for child_type in &grandchildren_info.types { + did_change |= sorted_vec_insert( + &mut variable_info.children_without_fields.types, + &child_type, + ); + } + } + } + } + + // Note whether or not this production contains children whose summaries + // have not yet been computed. + if child_symbol.index >= i && !all_initialized { + has_uninitialized_invisible_children = true; + } + } + + // If this production's children all have had their summaries initialized, + // then expand the quantity information with all of the possibilities introduced + // by this production. + if !has_uninitialized_invisible_children { + did_change |= variable_info + .children_without_fields + .quantity + .union(children_without_fields_quantity); + + for (field_name, info) in variable_info.fields.iter_mut() { + did_change |= info.quantity.union( + field_quantities + .get(field_name) + .cloned() + .unwrap_or(ChildQuantity::zero()), + ); } } } - // Move this variable's info back into the vector. result[i] = variable_info; } + + all_initialized = true; } for supertype_symbol in &syntax_grammar.supertype_symbols { @@ -295,16 +338,15 @@ pub(crate) fn get_variable_info( } } - let child_type_is_visible = |t: &ChildType| { - variable_type_for_child_type(t, syntax_grammar, lexical_grammar) >= VariableType::Anonymous - }; - for supertype_symbol in &syntax_grammar.supertype_symbols { result[supertype_symbol.index] .child_types .retain(child_type_is_visible); } + // Update all of the node type lists to account for node visiblity: + // * Wherever possible, repalce lists of subtypes with their supertypes. + // * Remove other hidden node types. for i in 0..result.len() { let mut variable_info = VariableInfo::default(); mem::swap(&mut variable_info, &mut result[i]); @@ -341,57 +383,6 @@ pub(crate) fn get_variable_info( Ok(result) } -// Summarize information about this variable's possible children by walking -// all of its productions. -fn get_all_child_steps( - variable: &SyntaxVariable, - inlines: &InlinedProductionMap, - output: &mut Vec, -) -> usize { - // For each of the given variable's productions, insert all of the reachable steps - // into the output vector, and return the longest possible production length. - return variable - .productions - .iter() - .map(|p| process_production(inlines, p, 0, output)) - .max() - .unwrap_or(0); - - // For the given production suffix, add all of the remaining steps into the output - // vector and return the longest possible production length. - fn process_production( - inlines: &InlinedProductionMap, - production: &Production, - step_index: usize, - output: &mut Vec, - ) -> usize { - let mut max_length = production.steps.len(); - - // Process each of the remaining steps of the production. - for (i, step) in production.steps.iter().enumerate().skip(step_index) { - // If this step is inlined, then process the corresponding suffixes of - // all of the inlined productions instead. - if let Some(inlined_productions) = inlines.inlined_productions(production, i as u32) { - for inlined_production in inlined_productions { - let length = process_production(inlines, inlined_production, i, output); - if length > max_length { - max_length = length; - } - } - break; - } - - // Otherwise, insert this step into the output vector unless it is already - // present. - if let Err(i) = output.binary_search(step) { - output.insert(i, step.clone()); - } - } - - return max_length; - } -} - fn variable_type_for_child_type( child_type: &ChildType, syntax_grammar: &SyntaxGrammar, @@ -420,6 +411,18 @@ fn variable_type_for_child_type( } } +fn sorted_vec_insert(vec: &mut Vec, value: &T) -> bool +where + T: Clone + Eq + Ord, +{ + if let Err(i) = vec.binary_search(&value) { + vec.insert(i, value.clone()); + true + } else { + false + } +} + fn sorted_vec_replace(left: &mut Vec, right: &Vec, value: T) -> bool where T: Eq + Ord, @@ -593,8 +596,8 @@ pub(crate) fn generate_node_types_json( types: Vec::new(), }); - field_info_json.multiple |= field_info.multiple; - field_info_json.required &= field_info.required; + field_info_json.multiple |= field_info.quantity.multiple; + field_info_json.required &= field_info.quantity.required; field_info_json .types .extend(field_info.types.iter().map(child_type_to_node_type)); @@ -612,8 +615,8 @@ pub(crate) fn generate_node_types_json( children_types.sort_unstable(); children_types.dedup(); node_type_json.children = Some(FieldInfoJSON { - multiple: info.children_without_fields.multiple, - required: info.children_without_fields.required, + multiple: info.children_without_fields.quantity.multiple, + required: info.children_without_fields.quantity.required, types: children_types, }); } @@ -1029,6 +1032,74 @@ mod tests { ); } + #[test] + fn test_node_types_with_multiple_valued_fields() { + let node_types = get_node_types(InputGrammar { + name: String::new(), + extra_tokens: Vec::new(), + external_tokens: Vec::new(), + expected_conflicts: Vec::new(), + variables_to_inline: Vec::new(), + word_token: None, + supertype_symbols: vec![], + variables: vec![ + Variable { + name: "a".to_string(), + kind: VariableType::Named, + rule: Rule::seq(vec![ + Rule::choice(vec![ + Rule::Blank, + Rule::repeat(Rule::field("f1".to_string(), Rule::named("b"))), + ]), + Rule::repeat(Rule::named("c")), + ]), + }, + Variable { + name: "b".to_string(), + kind: VariableType::Named, + rule: Rule::string("b"), + }, + Variable { + name: "c".to_string(), + kind: VariableType::Named, + rule: Rule::string("c"), + }, + ], + }); + + assert_eq!( + node_types[0], + NodeInfoJSON { + kind: "a".to_string(), + named: true, + subtypes: None, + children: Some(FieldInfoJSON { + multiple: true, + required: true, + types: vec![NodeTypeJSON { + kind: "c".to_string(), + named: true, + },] + }), + fields: Some( + vec![( + "f1".to_string(), + FieldInfoJSON { + multiple: true, + required: false, + types: vec![NodeTypeJSON { + kind: "b".to_string(), + named: true, + }] + } + )] + .into_iter() + .collect() + ), + } + ); + } + #[test] fn test_get_variable_info() { let variable_info = get_variable_info( @@ -1087,7 +1158,6 @@ mod tests { vec![], ), &build_lexical_grammar(), - &InlinedProductionMap::default(), ) .unwrap(); @@ -1096,8 +1166,11 @@ mod tests { vec![( "field1".to_string(), FieldInfo { - required: true, - multiple: false, + quantity: ChildQuantity { + exists: true, + required: true, + multiple: false, + }, types: vec![ChildType::Normal(Symbol::terminal(1))], } )] @@ -1110,8 +1183,11 @@ mod tests { vec![( "field2".to_string(), FieldInfo { - required: false, - multiple: false, + quantity: ChildQuantity { + exists: true, + required: false, + multiple: false, + }, types: vec![ ChildType::Normal(Symbol::terminal(2)), ChildType::Normal(Symbol::terminal(3)), @@ -1131,14 +1207,20 @@ mod tests { SyntaxVariable { name: "rule0".to_string(), kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(0)), - ProductionStep::new(Symbol::non_terminal(1)), - ProductionStep::new(Symbol::terminal(1)), - ], - }], + productions: vec![ + Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::non_terminal(1)), + ProductionStep::new(Symbol::terminal(1)), + ], + }, + Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::non_terminal(1))], + }, + ], }, // Hidden node with fields SyntaxVariable { @@ -1147,7 +1229,7 @@ mod tests { productions: vec![Production { dynamic_precedence: 0, steps: vec![ - ProductionStep::new(Symbol::terminal(2)), + ProductionStep::new(Symbol::terminal(2)).with_alias(".", false), ProductionStep::new(Symbol::terminal(3)).with_field_name("field1"), ], }], @@ -1156,7 +1238,6 @@ mod tests { vec![], ), &build_lexical_grammar(), - &InlinedProductionMap::default(), ) .unwrap(); @@ -1165,14 +1246,33 @@ mod tests { vec![( "field1".to_string(), FieldInfo { - required: true, - multiple: false, + quantity: ChildQuantity { + exists: true, + required: true, + multiple: false, + }, types: vec![ChildType::Normal(Symbol::terminal(3))], } )] .into_iter() .collect::>() ); + + // + assert_eq!( + variable_info[0].children_without_fields, + FieldInfo { + quantity: ChildQuantity { + exists: true, + required: false, + multiple: true, + }, + types: vec![ + ChildType::Normal(Symbol::terminal(0)), + ChildType::Normal(Symbol::terminal(1)), + ], + } + ); } #[test] @@ -1212,7 +1312,6 @@ mod tests { vec![Symbol::non_terminal(1)], ), &build_lexical_grammar(), - &InlinedProductionMap::default(), ) .unwrap(); @@ -1221,8 +1320,11 @@ mod tests { vec![( "field1".to_string(), FieldInfo { - required: true, - multiple: false, + quantity: ChildQuantity { + exists: true, + required: true, + multiple: false, + }, types: vec![ChildType::Normal(Symbol::non_terminal(1))], } )] @@ -1234,12 +1336,7 @@ mod tests { fn get_node_types(grammar: InputGrammar) -> Vec { let (syntax_grammar, lexical_grammar, _, simple_aliases) = prepare_grammar(&grammar).unwrap(); - let variable_info = get_variable_info( - &syntax_grammar, - &lexical_grammar, - &InlinedProductionMap::default(), - ) - .unwrap(); + let variable_info = get_variable_info(&syntax_grammar, &lexical_grammar).unwrap(); generate_node_types_json( &syntax_grammar, &lexical_grammar,