From eb96dd6ddb828e56225d25b39c2ab851636a1e56 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 27 Mar 2019 16:17:02 -0700 Subject: [PATCH] node types: Preserve all supertypes in field type lists --- .../build_tables/build_parse_table.rs | 496 +---------------- cli/src/generate/build_tables/mod.rs | 11 +- cli/src/generate/mod.rs | 14 +- cli/src/generate/node_types.rs | 505 +++++++++++++++++- cli/src/generate/tables.rs | 21 - 5 files changed, 526 insertions(+), 521 deletions(-) diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs index 53f59221..04ab3aae 100644 --- a/cli/src/generate/build_tables/build_parse_table.rs +++ b/cli/src/generate/build_tables/build_parse_table.rs @@ -4,10 +4,11 @@ use crate::error::{Error, Result}; use crate::generate::grammars::{ InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType, }; +use crate::generate::node_types::VariableInfo; use crate::generate::rules::{Associativity, Symbol, SymbolType}; use crate::generate::tables::{ - ChildType, FieldInfo, FieldLocation, ParseAction, ParseState, ParseStateId, ParseTable, - ParseTableEntry, ProductionInfo, ProductionInfoId, VariableInfo, + FieldLocation, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry, + ProductionInfo, ProductionInfoId, }; use core::ops::Range; use hashbrown::hash_map::Entry; @@ -16,7 +17,7 @@ use std::collections::hash_map::DefaultHasher; use std::collections::{BTreeMap, VecDeque}; use std::fmt::Write; use std::hash::Hasher; -use std::{mem, u32}; +use std::u32; #[derive(Clone)] struct AuxiliarySymbolInfo { @@ -37,6 +38,7 @@ struct ParseTableBuilder<'a> { item_set_builder: ParseItemSetBuilder<'a>, syntax_grammar: &'a SyntaxGrammar, lexical_grammar: &'a LexicalGrammar, + variable_info: &'a Vec, state_ids_by_item_set: HashMap, ParseStateId>, item_sets_by_state_id: Vec>, parse_state_queue: VecDeque, @@ -670,7 +672,7 @@ impl<'a> ParseTableBuilder<'a> { .kind .is_visible() { - let info = &self.parse_table.variable_info[step.symbol.index]; + let info = &self.variable_info[step.symbol.index]; for (field_name, _) in &info.fields { production_info .field_map @@ -748,261 +750,11 @@ fn populate_following_tokens( } } -pub(crate) fn get_variable_info( - syntax_grammar: &SyntaxGrammar, - lexical_grammar: &LexicalGrammar, -) -> Result> { - let mut result = Vec::new(); - - // Determine which field names and child node types can appear directly - // within each type of node. - for (i, variable) in syntax_grammar.variables.iter().enumerate() { - let mut info = VariableInfo { - fields: HashMap::new(), - child_types: Vec::new(), - has_multi_step_production: false, - }; - let is_recursive = variable - .productions - .iter() - .any(|p| p.steps.iter().any(|s| s.symbol == Symbol::non_terminal(i))); - - for production in &variable.productions { - if production.steps.len() > 1 { - info.has_multi_step_production = true; - } - - for step in &production.steps { - let child_type = if let Some(alias) = &step.alias { - ChildType::Aliased(alias.clone()) - } else { - ChildType::Normal(step.symbol) - }; - - if let Some(field_name) = &step.field_name { - let field_info = info.fields.entry(field_name.clone()).or_insert(FieldInfo { - multiple: false, - required: true, - types: Vec::new(), - }); - field_info.multiple |= is_recursive; - if let Err(i) = field_info.types.binary_search(&child_type) { - field_info.types.insert(i, child_type.clone()); - } - } - - if let Err(i) = info.child_types.binary_search(&child_type) { - info.child_types.insert(i, child_type.clone()); - } - } - } - - for production in &variable.productions { - let production_fields: Vec<&String> = production - .steps - .iter() - .filter_map(|s| s.field_name.as_ref()) - .collect(); - for (field_name, field_info) in info.fields.iter_mut() { - if !production_fields.contains(&field_name) { - field_info.required = false; - } - } - } - - result.push(info); - } - - // Expand each node type's information recursively to inherit the properties of - // hidden children. - let mut done = false; - while !done { - done = true; - for (i, variable) in syntax_grammar.variables.iter().enumerate() { - // Move this variable's info out of the vector so it can be modified - // while reading from other entries of the vector. - let mut variable_info = VariableInfo::default(); - mem::swap(&mut variable_info, &mut result[i]); - - for production in &variable.productions { - for step in &production.steps { - let child_symbol = step.symbol; - if step.alias.is_none() - && child_symbol.kind == SymbolType::NonTerminal - && !syntax_grammar.variables[child_symbol.index] - .kind - .is_visible() - { - let child_variable_info = &result[child_symbol.index]; - - if child_variable_info.has_multi_step_production { - variable_info.has_multi_step_production = true; - } - - // Inherit fields from this hidden child - for (field_name, child_field_info) in &child_variable_info.fields { - let field_info = variable_info - .fields - .entry(field_name.clone()) - .or_insert_with(|| { - done = false; - child_field_info.clone() - }); - if child_field_info.multiple && !field_info.multiple { - field_info.multiple = child_field_info.multiple; - done = false; - } - if !child_field_info.required && field_info.required { - field_info.required = child_field_info.required; - done = false; - } - for child_type in &child_field_info.types { - if let Err(i) = field_info.types.binary_search(&child_type) { - field_info.types.insert(i, child_type.clone()); - done = false; - } - } - } - - // Inherit child types from this hidden child - for child_type in &child_variable_info.child_types { - if let Err(i) = variable_info.child_types.binary_search(&child_type) { - variable_info.child_types.insert(i, child_type.clone()); - done = false; - } - } - - // If any field points to this hidden child, inherit child types - // for the field. - if let Some(field_name) = &step.field_name { - let field_info = variable_info.fields.get_mut(field_name).unwrap(); - for child_type in &child_variable_info.child_types { - if let Err(i) = field_info.types.binary_search(&child_type) { - field_info.types.insert(i, child_type.clone()); - done = false; - } - } - } - } - } - } - - // Move this variable's info back into the vector. - result[i] = variable_info; - } - } - - for supertype_symbol in &syntax_grammar.supertype_symbols { - let variable = &syntax_grammar.variables[supertype_symbol.index]; - if variable.kind != VariableType::Hidden { - return Err(Error::grammar(&format!( - "Supertype symbols must be hidden, but `{}` is not", - variable.name - ))); - } - - if result[supertype_symbol.index].has_multi_step_production { - return Err(Error::grammar(&format!( - "Supertype symbols must always have a single visible child, but `{}` can have multiple", - variable.name - ))); - } - } - - let child_type_is_visible = |child_type: &ChildType| match child_type { - ChildType::Aliased(_) => true, - ChildType::Normal(symbol) => { - let variable_kind = match symbol.kind { - SymbolType::NonTerminal => syntax_grammar.variables[symbol.index].kind, - SymbolType::Terminal => lexical_grammar.variables[symbol.index].kind, - SymbolType::External => syntax_grammar.external_tokens[symbol.index].kind, - _ => VariableType::Hidden, - }; - variable_kind.is_visible() - } - }; - - for supertype_symbol in &syntax_grammar.supertype_symbols { - result[supertype_symbol.index] - .child_types - .retain(child_type_is_visible); - } - - for i in 0..result.len() { - let mut variable_info = VariableInfo::default(); - mem::swap(&mut variable_info, &mut result[i]); - - // For each field, make the `types` list more concise by replacing sets of - // subtypes with a single supertype. - for (_, field_info) in variable_info.fields.iter_mut() { - for supertype_symbol in &syntax_grammar.supertype_symbols { - if sorted_vec_replace( - &mut field_info.types, - &result[supertype_symbol.index].child_types, - ChildType::Normal(*supertype_symbol), - ) { - break; - } - } - - field_info.types.retain(|t| { - if let ChildType::Normal(symbol) = t { - if syntax_grammar.supertype_symbols.contains(&symbol) { - return true; - } - } - child_type_is_visible(t) - }); - } - - result[i] = variable_info; - } - - Ok(result) -} - -fn sorted_vec_replace(left: &mut Vec, right: &Vec, value: T) -> bool -where - T: Eq + Ord, -{ - let mut i = 0; - for right_elem in right.iter() { - while left[i] < *right_elem { - i += 1; - if i == left.len() { - return false; - } - } - if left[i] != *right_elem { - return false; - } - } - - i = 0; - left.retain(|left_elem| { - if i == right.len() { - return true; - } - while right[i] < *left_elem { - i += 1; - if i == right.len() { - return true; - } - } - right[i] != *left_elem - }); - - if let Err(i) = left.binary_search(&value) { - left.insert(i, value); - } - - true -} - pub(crate) fn build_parse_table( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, inlines: &InlinedProductionMap, + variable_info: &Vec, state_ids_to_log: Vec, ) -> Result<(ParseTable, Vec)> { let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines); @@ -1014,13 +766,12 @@ pub(crate) fn build_parse_table( &item_set_builder, ); - let variable_info = get_variable_info(syntax_grammar, lexical_grammar)?; - let table = ParseTableBuilder { syntax_grammar, lexical_grammar, state_ids_to_log, item_set_builder, + variable_info, state_ids_by_item_set: HashMap::new(), item_sets_by_state_id: Vec::new(), parse_state_queue: VecDeque::new(), @@ -1029,240 +780,9 @@ pub(crate) fn build_parse_table( symbols: Vec::new(), production_infos: Vec::new(), max_aliased_production_length: 0, - variable_info, }, } .build()?; Ok((table, following_tokens)) } - -#[cfg(test)] -mod tests { - use super::*; - use crate::generate::grammars::{ - LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType, - }; - - #[test] - fn test_get_variable_info() { - let variable_info = get_variable_info( - &build_syntax_grammar( - vec![ - // Required field `field1` has only one node type. - SyntaxVariable { - name: "rule0".to_string(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(0)), - ProductionStep::new(Symbol::non_terminal(1)) - .with_field_name("field1"), - ], - }], - }, - // Hidden node - SyntaxVariable { - name: "_rule1".to_string(), - kind: VariableType::Hidden, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ProductionStep::new(Symbol::terminal(1))], - }], - }, - // Optional field `field2` can have two possible node types. - SyntaxVariable { - name: "rule2".to_string(), - kind: VariableType::Named, - productions: vec![ - Production { - dynamic_precedence: 0, - steps: vec![ProductionStep::new(Symbol::terminal(0))], - }, - Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(0)), - ProductionStep::new(Symbol::terminal(2)) - .with_field_name("field2"), - ], - }, - Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(0)), - ProductionStep::new(Symbol::terminal(3)) - .with_field_name("field2"), - ], - }, - ], - }, - ], - vec![], - ), - &build_lexical_grammar(), - ) - .unwrap(); - - assert_eq!( - variable_info[0].fields, - vec![( - "field1".to_string(), - FieldInfo { - required: true, - multiple: false, - types: vec![ChildType::Normal(Symbol::terminal(1))], - } - )] - .into_iter() - .collect::>() - ); - - assert_eq!( - variable_info[2].fields, - vec![( - "field2".to_string(), - FieldInfo { - required: false, - multiple: false, - types: vec![ - ChildType::Normal(Symbol::terminal(2)), - ChildType::Normal(Symbol::terminal(3)), - ], - } - )] - .into_iter() - .collect::>() - ); - } - - #[test] - fn test_get_variable_info_with_inherited_fields() { - let variable_info = get_variable_info( - &build_syntax_grammar( - vec![ - SyntaxVariable { - name: "rule0".to_string(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(0)), - ProductionStep::new(Symbol::non_terminal(1)), - ProductionStep::new(Symbol::terminal(1)), - ], - }], - }, - // Hidden node with fields - SyntaxVariable { - name: "_rule1".to_string(), - kind: VariableType::Hidden, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(2)), - ProductionStep::new(Symbol::terminal(3)).with_field_name("field1"), - ], - }], - }, - ], - vec![], - ), - &build_lexical_grammar(), - ) - .unwrap(); - - assert_eq!( - variable_info[0].fields, - vec![( - "field1".to_string(), - FieldInfo { - required: true, - multiple: false, - types: vec![ChildType::Normal(Symbol::terminal(3))], - } - )] - .into_iter() - .collect::>() - ); - } - - #[test] - fn test_get_variable_info_with_supertypes() { - let variable_info = get_variable_info( - &build_syntax_grammar( - vec![ - SyntaxVariable { - name: "rule0".to_string(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(0)), - ProductionStep::new(Symbol::non_terminal(1)) - .with_field_name("field1"), - ProductionStep::new(Symbol::terminal(1)), - ], - }], - }, - SyntaxVariable { - name: "_rule1".to_string(), - kind: VariableType::Hidden, - productions: vec![ - Production { - dynamic_precedence: 0, - steps: vec![ProductionStep::new(Symbol::terminal(2))], - }, - Production { - dynamic_precedence: 0, - steps: vec![ProductionStep::new(Symbol::terminal(3))], - }, - ], - }, - ], - // _rule1 is a supertype - vec![Symbol::non_terminal(1)], - ), - &build_lexical_grammar(), - ) - .unwrap(); - - assert_eq!( - variable_info[0].fields, - vec![( - "field1".to_string(), - FieldInfo { - required: true, - multiple: false, - types: vec![ChildType::Normal(Symbol::non_terminal(1))], - } - )] - .into_iter() - .collect::>() - ); - } - - fn build_syntax_grammar( - variables: Vec, - supertype_symbols: Vec, - ) -> SyntaxGrammar { - let mut syntax_grammar = SyntaxGrammar::default(); - syntax_grammar.variables = variables; - syntax_grammar.supertype_symbols = supertype_symbols; - syntax_grammar - } - - fn build_lexical_grammar() -> LexicalGrammar { - let mut lexical_grammar = LexicalGrammar::default(); - for i in 0..10 { - lexical_grammar.variables.push(LexicalVariable { - name: format!("token_{}", i), - kind: VariableType::Named, - implicit_precedence: 0, - start_state: 0, - }); - } - lexical_grammar - } -} diff --git a/cli/src/generate/build_tables/mod.rs b/cli/src/generate/build_tables/mod.rs index 4b357f47..de28cda3 100644 --- a/cli/src/generate/build_tables/mod.rs +++ b/cli/src/generate/build_tables/mod.rs @@ -15,6 +15,7 @@ use self::token_conflicts::TokenConflictMap; use crate::error::Result; use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar}; use crate::generate::nfa::{CharacterSet, NfaCursor}; +use crate::generate::node_types::VariableInfo; use crate::generate::rules::{AliasMap, Symbol, SymbolType}; use crate::generate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry}; use log::info; @@ -23,12 +24,18 @@ pub(crate) fn build_tables( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, simple_aliases: &AliasMap, + variable_info: &Vec, inlines: &InlinedProductionMap, minimize: bool, state_ids_to_log: Vec, ) -> Result<(ParseTable, LexTable, LexTable, Option)> { - let (mut parse_table, following_tokens) = - build_parse_table(syntax_grammar, lexical_grammar, inlines, state_ids_to_log)?; + let (mut parse_table, following_tokens) = build_parse_table( + syntax_grammar, + lexical_grammar, + inlines, + variable_info, + state_ids_to_log, + )?; let token_conflict_map = TokenConflictMap::new(lexical_grammar, following_tokens); let coincident_token_index = CoincidentTokenIndex::new(&parse_table, lexical_grammar); let keywords = identify_keywords( diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs index b5c4c0e4..7ad15051 100644 --- a/cli/src/generate/mod.rs +++ b/cli/src/generate/mod.rs @@ -99,21 +99,23 @@ fn generate_parser_for_grammar_with_opts( let input_grammar = parse_grammar(grammar_json)?; let (syntax_grammar, lexical_grammar, inlines, simple_aliases) = prepare_grammar(&input_grammar)?; + let variable_info = node_types::get_variable_info(&syntax_grammar, &lexical_grammar)?; + let node_types_json = node_types::generate_node_types_json( + &syntax_grammar, + &lexical_grammar, + &simple_aliases, + &variable_info, + ); let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables( &syntax_grammar, &lexical_grammar, &simple_aliases, + &variable_info, &inlines, minimize, state_ids_to_log, )?; let name = input_grammar.name; - let node_types_json = node_types::generate_node_types_json( - &syntax_grammar, - &lexical_grammar, - &simple_aliases, - &parse_table.variable_info, - ); let c_code = render_c_code( &name, parse_table, diff --git a/cli/src/generate/node_types.rs b/cli/src/generate/node_types.rs index 7e87b3b0..f43ffce2 100644 --- a/cli/src/generate/node_types.rs +++ b/cli/src/generate/node_types.rs @@ -1,8 +1,30 @@ use super::grammars::{LexicalGrammar, SyntaxGrammar, VariableType}; -use super::rules::{AliasMap, Symbol, SymbolType}; -use super::tables::{ChildType, VariableInfo}; +use super::rules::{Alias, AliasMap, Symbol, SymbolType}; +use crate::error::{Error, Result}; +use hashbrown::HashMap; use serde_derive::Serialize; use std::collections::BTreeMap; +use std::mem; + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub(crate) enum ChildType { + Normal(Symbol), + Aliased(Alias), +} + +#[derive(Clone, Debug, Default, PartialEq, Eq)] +pub(crate) struct FieldInfo { + pub required: bool, + pub multiple: bool, + pub types: Vec, +} + +#[derive(Debug, Default, PartialEq, Eq)] +pub(crate) struct VariableInfo { + pub fields: HashMap, + pub child_types: Vec, + pub has_multi_step_production: bool, +} #[derive(Debug, Serialize, PartialEq, Eq, Default)] pub(crate) struct NodeInfoJSON { @@ -29,6 +51,258 @@ pub(crate) struct FieldInfoJSON { types: Vec, } +pub(crate) fn get_variable_info( + syntax_grammar: &SyntaxGrammar, + lexical_grammar: &LexicalGrammar, +) -> Result> { + let mut result = Vec::new(); + + // Determine which field names and child node types can appear directly + // within each type of node. + for (i, variable) in syntax_grammar.variables.iter().enumerate() { + let mut info = VariableInfo { + fields: HashMap::new(), + child_types: Vec::new(), + has_multi_step_production: false, + }; + let is_recursive = variable + .productions + .iter() + .any(|p| p.steps.iter().any(|s| s.symbol == Symbol::non_terminal(i))); + + for production in &variable.productions { + if production.steps.len() > 1 { + info.has_multi_step_production = true; + } + + for step in &production.steps { + let child_type = if let Some(alias) = &step.alias { + ChildType::Aliased(alias.clone()) + } else { + ChildType::Normal(step.symbol) + }; + + if let Some(field_name) = &step.field_name { + let field_info = info.fields.entry(field_name.clone()).or_insert(FieldInfo { + multiple: false, + required: true, + types: Vec::new(), + }); + field_info.multiple |= is_recursive; + if let Err(i) = field_info.types.binary_search(&child_type) { + field_info.types.insert(i, child_type.clone()); + } + } + + if let Err(i) = info.child_types.binary_search(&child_type) { + info.child_types.insert(i, child_type.clone()); + } + } + } + + for production in &variable.productions { + let production_fields: Vec<&String> = production + .steps + .iter() + .filter_map(|s| s.field_name.as_ref()) + .collect(); + for (field_name, field_info) in info.fields.iter_mut() { + if !production_fields.contains(&field_name) { + field_info.required = false; + } + } + } + + result.push(info); + } + + // Expand each node type's information recursively to inherit the properties of + // hidden children. + let mut done = false; + while !done { + done = true; + for (i, variable) in syntax_grammar.variables.iter().enumerate() { + // Move this variable's info out of the vector so it can be modified + // while reading from other entries of the vector. + let mut variable_info = VariableInfo::default(); + mem::swap(&mut variable_info, &mut result[i]); + + for production in &variable.productions { + for step in &production.steps { + let child_symbol = step.symbol; + if step.alias.is_none() + && child_symbol.kind == SymbolType::NonTerminal + && !syntax_grammar.variables[child_symbol.index] + .kind + .is_visible() + { + let child_variable_info = &result[child_symbol.index]; + + // If a hidden child can have multiple children, then this + // node can appear to have multiple children. + if child_variable_info.has_multi_step_production { + variable_info.has_multi_step_production = true; + } + + // Inherit fields from this hidden child + for (field_name, child_field_info) in &child_variable_info.fields { + let field_info = variable_info + .fields + .entry(field_name.clone()) + .or_insert_with(|| { + done = false; + child_field_info.clone() + }); + if child_field_info.multiple && !field_info.multiple { + field_info.multiple = child_field_info.multiple; + done = false; + } + if !child_field_info.required && field_info.required { + field_info.required = child_field_info.required; + done = false; + } + for child_type in &child_field_info.types { + if let Err(i) = field_info.types.binary_search(&child_type) { + field_info.types.insert(i, child_type.clone()); + done = false; + } + } + } + + if !syntax_grammar.supertype_symbols.contains(&child_symbol) { + // Inherit child types from this hidden child + for child_type in &child_variable_info.child_types { + if let Err(i) = variable_info.child_types.binary_search(&child_type) + { + variable_info.child_types.insert(i, child_type.clone()); + done = false; + } + } + + // If any field points to this hidden child, inherit child types + // for the field. + if let Some(field_name) = &step.field_name { + let field_info = variable_info.fields.get_mut(field_name).unwrap(); + for child_type in &child_variable_info.child_types { + if let Err(i) = field_info.types.binary_search(&child_type) { + field_info.types.insert(i, child_type.clone()); + done = false; + } + } + } + } + } + } + } + + // Move this variable's info back into the vector. + result[i] = variable_info; + } + } + + for supertype_symbol in &syntax_grammar.supertype_symbols { + let variable = &syntax_grammar.variables[supertype_symbol.index]; + if variable.kind != VariableType::Hidden { + return Err(Error::grammar(&format!( + "Supertype symbols must be hidden, but `{}` is not", + variable.name + ))); + } + + if result[supertype_symbol.index].has_multi_step_production { + return Err(Error::grammar(&format!( + "Supertype symbols must always have a single visible child, but `{}` can have multiple", + variable.name + ))); + } + } + + let child_type_is_visible = |child_type: &ChildType| match child_type { + ChildType::Aliased(_) => true, + ChildType::Normal(symbol) => { + if syntax_grammar.supertype_symbols.contains(&symbol) { + return true; + } + let variable_kind = match symbol.kind { + SymbolType::NonTerminal => syntax_grammar.variables[symbol.index].kind, + SymbolType::Terminal => lexical_grammar.variables[symbol.index].kind, + SymbolType::External => syntax_grammar.external_tokens[symbol.index].kind, + _ => VariableType::Hidden, + }; + variable_kind.is_visible() + } + }; + + for supertype_symbol in &syntax_grammar.supertype_symbols { + result[supertype_symbol.index] + .child_types + .retain(child_type_is_visible); + } + + for i in 0..result.len() { + let mut variable_info = VariableInfo::default(); + mem::swap(&mut variable_info, &mut result[i]); + + // For each field, make the `types` list more concise by replacing sets of + // subtypes with a single supertype. + for (_, field_info) in variable_info.fields.iter_mut() { + for supertype_symbol in &syntax_grammar.supertype_symbols { + if sorted_vec_replace( + &mut field_info.types, + &result[supertype_symbol.index].child_types, + ChildType::Normal(*supertype_symbol), + ) { + break; + } + } + + field_info.types.retain(child_type_is_visible); + } + + result[i] = variable_info; + } + + Ok(result) +} + +fn sorted_vec_replace(left: &mut Vec, right: &Vec, value: T) -> bool +where + T: Eq + Ord, +{ + let mut i = 0; + for right_elem in right.iter() { + while left[i] < *right_elem { + i += 1; + if i == left.len() { + return false; + } + } + if left[i] != *right_elem { + return false; + } + } + + i = 0; + left.retain(|left_elem| { + if i == right.len() { + return true; + } + while right[i] < *left_elem { + i += 1; + if i == right.len() { + return true; + } + } + right[i] != *left_elem + }); + + if let Err(i) = left.binary_search(&value) { + left.insert(i, value); + } + + true +} + pub(crate) fn generate_node_types_json( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, @@ -158,8 +432,9 @@ pub(crate) fn generate_node_types_json( #[cfg(test)] mod tests { use super::*; - use crate::generate::build_tables::build_parse_table::get_variable_info; - use crate::generate::grammars::{InputGrammar, Variable, VariableType}; + use crate::generate::grammars::{ + InputGrammar, LexicalVariable, Production, ProductionStep, SyntaxVariable, Variable, + }; use crate::generate::prepare_grammar::prepare_grammar; use crate::generate::rules::Rule; @@ -331,6 +606,205 @@ mod tests { ); } + #[test] + fn test_get_variable_info() { + let variable_info = get_variable_info( + &build_syntax_grammar( + vec![ + // Required field `field1` has only one node type. + SyntaxVariable { + name: "rule0".to_string(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::non_terminal(1)) + .with_field_name("field1"), + ], + }], + }, + // Hidden node + SyntaxVariable { + name: "_rule1".to_string(), + kind: VariableType::Hidden, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::terminal(1))], + }], + }, + // Optional field `field2` can have two possible node types. + SyntaxVariable { + name: "rule2".to_string(), + kind: VariableType::Named, + productions: vec![ + Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::terminal(0))], + }, + Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::terminal(2)) + .with_field_name("field2"), + ], + }, + Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::terminal(3)) + .with_field_name("field2"), + ], + }, + ], + }, + ], + vec![], + ), + &build_lexical_grammar(), + ) + .unwrap(); + + assert_eq!( + variable_info[0].fields, + vec![( + "field1".to_string(), + FieldInfo { + required: true, + multiple: false, + types: vec![ChildType::Normal(Symbol::terminal(1))], + } + )] + .into_iter() + .collect::>() + ); + + assert_eq!( + variable_info[2].fields, + vec![( + "field2".to_string(), + FieldInfo { + required: false, + multiple: false, + types: vec![ + ChildType::Normal(Symbol::terminal(2)), + ChildType::Normal(Symbol::terminal(3)), + ], + } + )] + .into_iter() + .collect::>() + ); + } + + #[test] + fn test_get_variable_info_with_inherited_fields() { + let variable_info = get_variable_info( + &build_syntax_grammar( + vec![ + SyntaxVariable { + name: "rule0".to_string(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::non_terminal(1)), + ProductionStep::new(Symbol::terminal(1)), + ], + }], + }, + // Hidden node with fields + SyntaxVariable { + name: "_rule1".to_string(), + kind: VariableType::Hidden, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(2)), + ProductionStep::new(Symbol::terminal(3)).with_field_name("field1"), + ], + }], + }, + ], + vec![], + ), + &build_lexical_grammar(), + ) + .unwrap(); + + assert_eq!( + variable_info[0].fields, + vec![( + "field1".to_string(), + FieldInfo { + required: true, + multiple: false, + types: vec![ChildType::Normal(Symbol::terminal(3))], + } + )] + .into_iter() + .collect::>() + ); + } + + #[test] + fn test_get_variable_info_with_supertypes() { + let variable_info = get_variable_info( + &build_syntax_grammar( + vec![ + SyntaxVariable { + name: "rule0".to_string(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::non_terminal(1)) + .with_field_name("field1"), + ProductionStep::new(Symbol::terminal(1)), + ], + }], + }, + SyntaxVariable { + name: "_rule1".to_string(), + kind: VariableType::Hidden, + productions: vec![ + Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::terminal(2))], + }, + Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::terminal(3))], + }, + ], + }, + ], + // _rule1 is a supertype + vec![Symbol::non_terminal(1)], + ), + &build_lexical_grammar(), + ) + .unwrap(); + + assert_eq!( + variable_info[0].fields, + vec![( + "field1".to_string(), + FieldInfo { + required: true, + multiple: false, + types: vec![ChildType::Normal(Symbol::non_terminal(1))], + } + )] + .into_iter() + .collect::>() + ); + } + fn get_node_types(grammar: InputGrammar) -> Vec { let (syntax_grammar, lexical_grammar, _, simple_aliases) = prepare_grammar(&grammar).unwrap(); @@ -342,4 +816,27 @@ mod tests { &variable_info, ) } + + fn build_syntax_grammar( + variables: Vec, + supertype_symbols: Vec, + ) -> SyntaxGrammar { + let mut syntax_grammar = SyntaxGrammar::default(); + syntax_grammar.variables = variables; + syntax_grammar.supertype_symbols = supertype_symbols; + syntax_grammar + } + + fn build_lexical_grammar() -> LexicalGrammar { + let mut lexical_grammar = LexicalGrammar::default(); + for i in 0..10 { + lexical_grammar.variables.push(LexicalVariable { + name: format!("token_{}", i), + kind: VariableType::Named, + implicit_precedence: 0, + start_state: 0, + }); + } + lexical_grammar + } } diff --git a/cli/src/generate/tables.rs b/cli/src/generate/tables.rs index 929ba83e..1ee5dde8 100644 --- a/cli/src/generate/tables.rs +++ b/cli/src/generate/tables.rs @@ -52,31 +52,10 @@ pub(crate) struct ProductionInfo { pub field_map: BTreeMap>, } -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub(crate) enum ChildType { - Normal(Symbol), - Aliased(Alias), -} - -#[derive(Clone, Debug, Default, PartialEq, Eq)] -pub(crate) struct FieldInfo { - pub required: bool, - pub multiple: bool, - pub types: Vec, -} - -#[derive(Debug, Default, PartialEq, Eq)] -pub(crate) struct VariableInfo { - pub fields: HashMap, - pub child_types: Vec, - pub has_multi_step_production: bool, -} - #[derive(Debug, PartialEq, Eq)] pub(crate) struct ParseTable { pub states: Vec, pub symbols: Vec, - pub variable_info: Vec, pub production_infos: Vec, pub max_aliased_production_length: usize, }