From b79bd8693b2e95abbab112faa4271e3dc2db9785 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 8 Mar 2019 06:20:07 -0500 Subject: [PATCH] Start work on handling node supertypes --- .../build_tables/build_parse_table.rs | 389 +++++++++++++----- cli/src/generate/dsl.js | 245 ++++++----- cli/src/generate/grammars.rs | 2 + cli/src/generate/mod.rs | 116 +++--- cli/src/generate/parse_grammar.rs | 3 + .../prepare_grammar/expand_repeats.rs | 1 + .../prepare_grammar/extract_simple_aliases.rs | 1 + .../prepare_grammar/extract_tokens.rs | 8 + .../prepare_grammar/flatten_grammar.rs | 1 + .../prepare_grammar/intern_symbols.rs | 13 + cli/src/generate/prepare_grammar/mod.rs | 1 + .../prepare_grammar/process_inlines.rs | 3 + cli/src/generate/tables.rs | 10 +- 13 files changed, 513 insertions(+), 280 deletions(-) diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs index 417d5d3a..9f9413a3 100644 --- a/cli/src/generate/build_tables/build_parse_table.rs +++ b/cli/src/generate/build_tables/build_parse_table.rs @@ -47,7 +47,9 @@ struct ParseTableBuilder<'a> { impl<'a> ParseTableBuilder<'a> { fn build(mut self) -> Result { // Ensure that the empty alias sequence has index 0. - self.parse_table.production_infos.push(ProductionInfo::default()); + self.parse_table + .production_infos + .push(ProductionInfo::default()); // Add the error state at index 0. self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default()); @@ -749,7 +751,7 @@ fn populate_following_tokens( pub(crate) fn get_variable_info( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, -) -> Vec { +) -> Result> { let mut result = Vec::new(); // Determine which field names and child node types can appear directly @@ -757,7 +759,9 @@ pub(crate) fn get_variable_info( for (i, variable) in syntax_grammar.variables.iter().enumerate() { let mut info = VariableInfo { fields: HashMap::new(), - child_types: HashSet::new(), + subclasses: Vec::new(), + child_types: Vec::new(), + has_multi_step_production: false, }; let is_recursive = variable .productions @@ -765,6 +769,10 @@ pub(crate) fn get_variable_info( .any(|p| p.steps.iter().any(|s| s.symbol == Symbol::non_terminal(i))); for production in &variable.productions { + if production.steps.len() > 1 { + info.has_multi_step_production = true; + } + for step in &production.steps { let child_type = if let Some(alias) = &step.alias { ChildType::Aliased(alias.clone()) @@ -776,13 +784,17 @@ pub(crate) fn get_variable_info( let field_info = info.fields.entry(field_name.clone()).or_insert(FieldInfo { multiple: false, required: true, - types: HashSet::new(), + types: Vec::new(), }); field_info.multiple |= is_recursive; - field_info.types.insert(child_type.clone()); + if let Err(i) = field_info.types.binary_search(&child_type) { + field_info.types.insert(i, child_type.clone()); + } } - info.child_types.insert(child_type); + if let Err(i) = info.child_types.binary_search(&child_type) { + info.child_types.insert(i, child_type.clone()); + } } } @@ -810,23 +822,25 @@ pub(crate) fn get_variable_info( for (i, variable) in syntax_grammar.variables.iter().enumerate() { // Move this variable's info out of the vector so it can be modified // while reading from other entries of the vector. - let mut variable_info = VariableInfo { - fields: HashMap::new(), - child_types: HashSet::new(), - }; + let mut variable_info = VariableInfo::default(); mem::swap(&mut variable_info, &mut result[i]); for production in &variable.productions { for step in &production.steps { - if step.symbol.kind == SymbolType::NonTerminal - && !syntax_grammar.variables[step.symbol.index] + let child_symbol = step.symbol; + if child_symbol.kind == SymbolType::NonTerminal + && !syntax_grammar.variables[child_symbol.index] .kind .is_visible() { - let production_info = &result[step.symbol.index]; + let child_variable_info = &result[child_symbol.index]; + + if child_variable_info.has_multi_step_production { + variable_info.has_multi_step_production = true; + } // Inherit fields from this hidden child - for (field_name, child_field_info) in &production_info.fields { + for (field_name, child_field_info) in &child_variable_info.fields { let field_info = variable_info .fields .entry(field_name.clone()) @@ -843,15 +857,17 @@ pub(crate) fn get_variable_info( done = false; } for child_type in &child_field_info.types { - if field_info.types.insert(child_type.clone()) { + if let Err(i) = field_info.types.binary_search(&child_type) { + field_info.types.insert(i, child_type.clone()); done = false; } } } // Inherit child types from this hidden child - for child_type in &production_info.child_types { - if variable_info.child_types.insert(child_type.clone()) { + for child_type in &child_variable_info.child_types { + if let Err(i) = variable_info.child_types.binary_search(&child_type) { + variable_info.child_types.insert(i, child_type.clone()); done = false; } } @@ -860,8 +876,9 @@ pub(crate) fn get_variable_info( // for the field. if let Some(field_name) = &step.field_name { let field_info = variable_info.fields.get_mut(field_name).unwrap(); - for child_type in &production_info.child_types { - if field_info.types.insert(child_type.clone()) { + for child_type in &child_variable_info.child_types { + if let Err(i) = field_info.types.binary_search(&child_type) { + field_info.types.insert(i, child_type.clone()); done = false; } } @@ -875,27 +892,111 @@ pub(crate) fn get_variable_info( } } + for supertype_symbol in &syntax_grammar.supertype_symbols { + let variable = &syntax_grammar.variables[supertype_symbol.index]; + if variable.kind != VariableType::Hidden { + return Err(Error::grammar(&format!( + "Supertype symbols must be hidden, but `{}` is not", + variable.name + ))); + } + + if result[supertype_symbol.index].has_multi_step_production { + return Err(Error::grammar(&format!( + "Supertype symbols must always have a single visible child, but `{}` can have multiple", + variable.name + ))); + } + } + let child_type_is_visible = |child_type: &ChildType| match child_type { ChildType::Aliased(_) => true, ChildType::Normal(symbol) => { - let step_kind = match symbol.kind { + let variable_kind = match symbol.kind { SymbolType::NonTerminal => syntax_grammar.variables[symbol.index].kind, SymbolType::Terminal => lexical_grammar.variables[symbol.index].kind, SymbolType::External => syntax_grammar.external_tokens[symbol.index].kind, _ => VariableType::Hidden, }; - step_kind.is_visible() + variable_kind.is_visible() } }; - for variable_info in result.iter_mut() { - variable_info.child_types.retain(&child_type_is_visible); + for supertype_symbol in &syntax_grammar.supertype_symbols { + result[supertype_symbol.index] + .child_types + .retain(child_type_is_visible); + } + + for i in 0..result.len() { + let mut variable_info = VariableInfo::default(); + mem::swap(&mut variable_info, &mut result[i]); + + // For each field, make the `types` list more concise by replacing sets of + // subtypes with a single supertype. for (_, field_info) in variable_info.fields.iter_mut() { - field_info.types.retain(&child_type_is_visible); + for supertype_symbol in &syntax_grammar.supertype_symbols { + if sorted_vec_replace( + &mut field_info.types, + &result[supertype_symbol.index].child_types, + ChildType::Normal(*supertype_symbol), + ) { + break; + } + } + + field_info.types.retain(|t| { + if let ChildType::Normal(symbol) = t { + if syntax_grammar.supertype_symbols.contains(&symbol) { + return true; + } + } + child_type_is_visible(t) + }); + } + + result[i] = variable_info; + } + + Ok(result) +} + +fn sorted_vec_replace(left: &mut Vec, right: &Vec, value: T) -> bool +where + T: Eq + Ord, +{ + let mut i = 0; + for right_elem in right.iter() { + while left[i] < *right_elem { + i += 1; + if i == left.len() { + return false; + } + } + if left[i] != *right_elem { + return false; } } - result + i = 0; + left.retain(|left_elem| { + if i == right.len() { + return true; + } + while right[i] < *left_elem { + i += 1; + if i == right.len() { + return true; + } + } + right[i] != *left_elem + }); + + if let Err(i) = left.binary_search(&value) { + left.insert(i, value); + } + + true } pub(crate) fn build_parse_table( @@ -913,6 +1014,8 @@ pub(crate) fn build_parse_table( &item_set_builder, ); + let variable_info = get_variable_info(syntax_grammar, lexical_grammar)?; + let table = ParseTableBuilder { syntax_grammar, lexical_grammar, @@ -926,7 +1029,7 @@ pub(crate) fn build_parse_table( symbols: Vec::new(), production_infos: Vec::new(), max_aliased_production_length: 0, - variable_info: get_variable_info(syntax_grammar, lexical_grammar), + variable_info, }, } .build()?; @@ -944,56 +1047,63 @@ mod tests { #[test] fn test_get_variable_info() { let variable_info = get_variable_info( - &build_syntax_grammar(vec![ - // Required field `field1` has only one node type. - SyntaxVariable { - name: "rule0".to_string(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(0)), - ProductionStep::new(Symbol::non_terminal(1)).with_field_name("field1"), + &build_syntax_grammar( + vec![ + // Required field `field1` has only one node type. + SyntaxVariable { + name: "rule0".to_string(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::non_terminal(1)) + .with_field_name("field1"), + ], + }], + }, + // Hidden node + SyntaxVariable { + name: "_rule1".to_string(), + kind: VariableType::Hidden, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::terminal(1))], + }], + }, + // Optional field `field2` can have two possible node types. + SyntaxVariable { + name: "rule2".to_string(), + kind: VariableType::Named, + productions: vec![ + Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::terminal(0))], + }, + Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::terminal(2)) + .with_field_name("field2"), + ], + }, + Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::terminal(3)) + .with_field_name("field2"), + ], + }, ], - }], - }, - // Hidden node - SyntaxVariable { - name: "_rule1".to_string(), - kind: VariableType::Hidden, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ProductionStep::new(Symbol::terminal(1))], - }], - }, - // Optional field `field2` can have two possible node types. - SyntaxVariable { - name: "rule2".to_string(), - kind: VariableType::Named, - productions: vec![ - Production { - dynamic_precedence: 0, - steps: vec![ProductionStep::new(Symbol::terminal(0))], - }, - Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(0)), - ProductionStep::new(Symbol::terminal(2)).with_field_name("field2"), - ], - }, - Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(0)), - ProductionStep::new(Symbol::terminal(3)).with_field_name("field2"), - ], - }, - ], - }, - ]), + }, + ], + vec![], + ), &build_lexical_grammar(), - ); + ) + .unwrap(); assert_eq!( variable_info[0].fields, @@ -1002,9 +1112,7 @@ mod tests { FieldInfo { required: true, multiple: false, - types: vec![ChildType::Normal(Symbol::terminal(1))] - .into_iter() - .collect::>(), + types: vec![ChildType::Normal(Symbol::terminal(1))], } )] .into_iter() @@ -1021,9 +1129,7 @@ mod tests { types: vec![ ChildType::Normal(Symbol::terminal(2)), ChildType::Normal(Symbol::terminal(3)), - ] - .into_iter() - .collect::>(), + ], } )] .into_iter() @@ -1034,34 +1140,38 @@ mod tests { #[test] fn test_get_variable_info_with_inherited_fields() { let variable_info = get_variable_info( - &build_syntax_grammar(vec![ - SyntaxVariable { - name: "rule0".to_string(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(0)), - ProductionStep::new(Symbol::non_terminal(1)), - ProductionStep::new(Symbol::terminal(1)), - ], - }], - }, - // Hidden node with fields - SyntaxVariable { - name: "_rule1".to_string(), - kind: VariableType::Hidden, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(2)), - ProductionStep::new(Symbol::terminal(3)).with_field_name("field1"), - ], - }], - }, - ]), + &build_syntax_grammar( + vec![ + SyntaxVariable { + name: "rule0".to_string(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::non_terminal(1)), + ProductionStep::new(Symbol::terminal(1)), + ], + }], + }, + // Hidden node with fields + SyntaxVariable { + name: "_rule1".to_string(), + kind: VariableType::Hidden, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(2)), + ProductionStep::new(Symbol::terminal(3)).with_field_name("field1"), + ], + }], + }, + ], + vec![], + ), &build_lexical_grammar(), - ); + ) + .unwrap(); assert_eq!( variable_info[0].fields, @@ -1070,9 +1180,7 @@ mod tests { FieldInfo { required: true, multiple: false, - types: vec![ChildType::Normal(Symbol::terminal(3))] - .into_iter() - .collect::>(), + types: vec![ChildType::Normal(Symbol::terminal(3))], } )] .into_iter() @@ -1080,9 +1188,68 @@ mod tests { ); } - fn build_syntax_grammar(variables: Vec) -> SyntaxGrammar { + #[test] + fn test_get_variable_info_with_supertypes() { + let variable_info = get_variable_info( + &build_syntax_grammar( + vec![ + SyntaxVariable { + name: "rule0".to_string(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::non_terminal(1)) + .with_field_name("field1"), + ProductionStep::new(Symbol::terminal(1)), + ], + }], + }, + SyntaxVariable { + name: "_rule1".to_string(), + kind: VariableType::Hidden, + productions: vec![ + Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::terminal(2))], + }, + Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::terminal(3))], + }, + ], + }, + ], + // _rule1 is a supertype + vec![Symbol::non_terminal(1)], + ), + &build_lexical_grammar(), + ) + .unwrap(); + + assert_eq!( + variable_info[0].fields, + vec![( + "field1".to_string(), + FieldInfo { + required: true, + multiple: false, + types: vec![ChildType::Normal(Symbol::non_terminal(1))], + } + )] + .into_iter() + .collect::>() + ); + } + + fn build_syntax_grammar( + variables: Vec, + supertype_symbols: Vec, + ) -> SyntaxGrammar { let mut syntax_grammar = SyntaxGrammar::default(); syntax_grammar.variables = variables; + syntax_grammar.supertype_symbols = supertype_symbols; syntax_grammar } diff --git a/cli/src/generate/dsl.js b/cli/src/generate/dsl.js index cf124258..651e6713 100644 --- a/cli/src/generate/dsl.js +++ b/cli/src/generate/dsl.js @@ -212,137 +212,154 @@ function RuleBuilder(ruleMap) { } function grammar(baseGrammar, options) { - if (!options) { - options = baseGrammar; - baseGrammar = { - name: null, - rules: {}, - extras: [normalize(/\s/)], - conflicts: [], - externals: [], - inline: [] - }; + if (!options) { + options = baseGrammar; + baseGrammar = { + name: null, + rules: {}, + extras: [normalize(/\s/)], + conflicts: [], + externals: [], + inline: [], + supertypes: [] + }; + } + + let externals = baseGrammar.externals; + if (options.externals) { + if (typeof options.externals !== "function") { + throw new Error("Grammar's 'externals' property must be a function."); } - let externals = baseGrammar.externals; - if (options.externals) { - if (typeof options.externals !== "function") { - throw new Error("Grammar's 'externals' property must be a function."); + const externalsRuleBuilder = RuleBuilder(null) + const externalRules = options.externals.call(externalsRuleBuilder, externalsRuleBuilder, baseGrammar.externals); + + if (!Array.isArray(externalRules)) { + throw new Error("Grammar's 'externals' property must return an array of rules."); + } + + externals = externalRules.map(normalize); + } + + const ruleMap = {}; + for (const key in options.rules) { + ruleMap[key] = true; + } + for (const key in baseGrammar.rules) { + ruleMap[key] = true; + } + for (const external of externals) { + if (typeof external.name === 'string') { + ruleMap[external.name] = true; + } + } + + const ruleBuilder = RuleBuilder(ruleMap); + + const name = options.name; + if (typeof name !== "string") { + throw new Error("Grammar's 'name' property must be a string."); + } + + if (!/^[a-zA-Z_]\w*$/.test(name)) { + throw new Error("Grammar's 'name' property must not start with a digit and cannot contain non-word characters."); + } + + let rules = Object.assign({}, baseGrammar.rules); + if (options.rules) { + if (typeof options.rules !== "object") { + throw new Error("Grammar's 'rules' property must be an object."); + } + + for (const ruleName in options.rules) { + const ruleFn = options.rules[ruleName]; + if (typeof ruleFn !== "function") { + throw new Error("Grammar rules must all be functions. '" + ruleName + "' rule is not."); } + rules[ruleName] = normalize(ruleFn.call(ruleBuilder, ruleBuilder, baseGrammar.rules[ruleName])); + } + } - const externalsRuleBuilder = RuleBuilder(null) - const externalRules = options.externals.call(externalsRuleBuilder, externalsRuleBuilder, baseGrammar.externals); - - if (!Array.isArray(externalRules)) { - throw new Error("Grammar's 'externals' property must return an array of rules."); - } - - externals = externalRules.map(normalize); + let extras = baseGrammar.extras.slice(); + if (options.extras) { + if (typeof options.extras !== "function") { + throw new Error("Grammar's 'extras' property must be a function."); } - const ruleMap = {}; - for (const key in options.rules) { - ruleMap[key] = true; + extras = options.extras + .call(ruleBuilder, ruleBuilder, baseGrammar.extras) + .map(normalize); + } + + let word = baseGrammar.word; + if (options.word) { + word = options.word.call(ruleBuilder, ruleBuilder).name; + if (typeof word != 'string') { + throw new Error("Grammar's 'word' property must be a named rule."); } - for (const key in baseGrammar.rules) { - ruleMap[key] = true; - } - for (const external of externals) { - if (typeof external.name === 'string') { - ruleMap[external.name] = true; - } + } + + let conflicts = baseGrammar.conflicts; + if (options.conflicts) { + if (typeof options.conflicts !== "function") { + throw new Error("Grammar's 'conflicts' property must be a function."); } - const ruleBuilder = RuleBuilder(ruleMap); + const baseConflictRules = baseGrammar.conflicts.map(conflict => conflict.map(sym)); + const conflictRules = options.conflicts.call(ruleBuilder, ruleBuilder, baseConflictRules); - const name = options.name; - if (typeof name !== "string") { - throw new Error("Grammar's 'name' property must be a string."); + if (!Array.isArray(conflictRules)) { + throw new Error("Grammar's conflicts must be an array of arrays of rules."); } - if (!/^[a-zA-Z_]\w*$/.test(name)) { - throw new Error("Grammar's 'name' property must not start with a digit and cannot contain non-word characters."); - } - - let rules = Object.assign({}, baseGrammar.rules); - if (options.rules) { - if (typeof options.rules !== "object") { - throw new Error("Grammar's 'rules' property must be an object."); - } - - for (const ruleName in options.rules) { - const ruleFn = options.rules[ruleName]; - if (typeof ruleFn !== "function") { - throw new Error("Grammar rules must all be functions. '" + ruleName + "' rule is not."); - } - rules[ruleName] = normalize(ruleFn.call(ruleBuilder, ruleBuilder, baseGrammar.rules[ruleName])); - } - } - - let extras = baseGrammar.extras.slice(); - if (options.extras) { - if (typeof options.extras !== "function") { - throw new Error("Grammar's 'extras' property must be a function."); - } - - extras = options.extras - .call(ruleBuilder, ruleBuilder, baseGrammar.extras) - .map(normalize); - } - - let word = baseGrammar.word; - if (options.word) { - word = options.word.call(ruleBuilder, ruleBuilder).name; - if (typeof word != 'string') { - throw new Error("Grammar's 'word' property must be a named rule."); - } - } - - let conflicts = baseGrammar.conflicts; - if (options.conflicts) { - if (typeof options.conflicts !== "function") { - throw new Error("Grammar's 'conflicts' property must be a function."); - } - - const baseConflictRules = baseGrammar.conflicts.map(conflict => conflict.map(sym)); - const conflictRules = options.conflicts.call(ruleBuilder, ruleBuilder, baseConflictRules); - - if (!Array.isArray(conflictRules)) { + conflicts = conflictRules.map(conflictSet => { + if (!Array.isArray(conflictSet)) { throw new Error("Grammar's conflicts must be an array of arrays of rules."); } - conflicts = conflictRules.map(conflictSet => { - if (!Array.isArray(conflictSet)) { - throw new Error("Grammar's conflicts must be an array of arrays of rules."); - } - - return conflictSet.map(symbol => normalize(symbol).name); - }); - } - - let inline = baseGrammar.inline; - if (options.inline) { - if (typeof options.inline !== "function") { - throw new Error("Grammar's 'inline' property must be a function."); - } - - const baseInlineRules = baseGrammar.inline.map(sym); - const inlineRules = options.inline.call(ruleBuilder, ruleBuilder, baseInlineRules); - - if (!Array.isArray(inlineRules)) { - throw new Error("Grammar's inline must be an array of rules."); - } - - inline = inlineRules.map(symbol => symbol.name); - } - - if (Object.keys(rules).length == 0) { - throw new Error("Grammar must have at least one rule."); - } - - return {name, word, rules, extras, conflicts, externals, inline}; + return conflictSet.map(symbol => normalize(symbol).name); + }); } + let inline = baseGrammar.inline; + if (options.inline) { + if (typeof options.inline !== "function") { + throw new Error("Grammar's 'inline' property must be a function."); + } + + const baseInlineRules = baseGrammar.inline.map(sym); + const inlineRules = options.inline.call(ruleBuilder, ruleBuilder, baseInlineRules); + + if (!Array.isArray(inlineRules)) { + throw new Error("Grammar's inline must be an array of rules."); + } + + inline = inlineRules.map(symbol => symbol.name); + } + + let supertypes = baseGrammar.supertypes; + if (options.supertypes) { + if (typeof options.supertypes !== "function") { + throw new Error("Grammar's 'supertypes' property must be a function."); + } + + const baseSupertypeRules = baseGrammar.supertypes.map(sym); + const supertypeRules = options.supertypes.call(ruleBuilder, ruleBuilder, baseSupertypeRules); + + if (!Array.isArray(supertypeRules)) { + throw new Error("Grammar's supertypes must be an array of rules."); + } + + supertypes = supertypeRules.map(symbol => symbol.name); + } + + if (Object.keys(rules).length == 0) { + throw new Error("Grammar must have at least one rule."); + } + + return {name, word, rules, extras, conflicts, externals, inline, supertypes}; +} + function checkArguments(ruleCount, caller, callerName, suffix = '') { if (ruleCount > 1) { const error = new Error([ diff --git a/cli/src/generate/grammars.rs b/cli/src/generate/grammars.rs index 6cc1a5f7..f904efa3 100644 --- a/cli/src/generate/grammars.rs +++ b/cli/src/generate/grammars.rs @@ -27,6 +27,7 @@ pub(crate) struct InputGrammar { pub expected_conflicts: Vec>, pub external_tokens: Vec, pub variables_to_inline: Vec, + pub supertype_symbols: Vec, pub word_token: Option, } @@ -88,6 +89,7 @@ pub(crate) struct SyntaxGrammar { pub extra_tokens: Vec, pub expected_conflicts: Vec>, pub external_tokens: Vec, + pub supertype_symbols: Vec, pub variables_to_inline: Vec, pub word_token: Option, } diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs index 1b43df3b..4d6a1057 100644 --- a/cli/src/generate/mod.rs +++ b/cli/src/generate/mod.rs @@ -184,17 +184,23 @@ fn ensure_file>(path: &PathBuf, f: impl Fn() -> T) -> Result<()> } } +#[derive(Debug, Serialize, PartialEq, Eq, Default)] +struct NodeInfoJSON { + fields: Option>, + subtypes: Option>, +} + #[derive(Debug, Serialize, PartialEq, Eq, PartialOrd, Ord)] -struct FieldTypeJSON { +struct NodeTypeJSON { kind: String, named: bool, } -#[derive(Debug, Serialize)] +#[derive(Debug, Serialize, PartialEq, Eq)] struct FieldInfoJSON { multiple: bool, required: bool, - types: Vec, + types: Vec, } fn generate_field_info_json( @@ -203,7 +209,7 @@ fn generate_field_info_json( simple_aliases: &AliasMap, variable_info: &Vec, ) -> String { - let mut map = BTreeMap::new(); + let mut node_types_json = BTreeMap::new(); for (i, info) in variable_info.iter().enumerate() { let variable = &syntax_grammar.variables[i]; if !variable.kind.is_visible() || info.fields.is_empty() { @@ -214,60 +220,68 @@ fn generate_field_info_json( .get(&Symbol::non_terminal(i)) .map_or(&variable.name, |alias| &alias.value); - let fields = map.entry(name.clone()).or_insert_with(|| BTreeMap::new()); - for (field, field_info) in info.fields.iter() { - let field_info_json = fields.entry(field.clone()).or_insert(FieldInfoJSON { - multiple: false, - required: true, - types: Vec::new(), - }); + let node_type_json = node_types_json + .entry(name.clone()) + .or_insert_with(|| NodeInfoJSON::default()); - field_info_json.multiple |= field_info.multiple; - field_info_json.required &= field_info.required; - field_info_json.types.extend(field_info.types.iter().map( - |child_type| match child_type { - ChildType::Aliased(alias) => FieldTypeJSON { - kind: alias.value.clone(), - named: alias.is_named, - }, - ChildType::Normal(symbol) => { - if let Some(alias) = simple_aliases.get(&symbol) { - FieldTypeJSON { - kind: alias.value.clone(), - named: alias.is_named, - } - } else { - match symbol.kind { - SymbolType::NonTerminal => { - let variable = &syntax_grammar.variables[symbol.index]; - FieldTypeJSON { - kind: variable.name.clone(), - named: variable.kind == VariableType::Named, - } + if info.fields.len() > 0 { + let mut fields_json = BTreeMap::new(); + for (field, field_info) in info.fields.iter() { + let field_info_json = fields_json.entry(field.clone()).or_insert(FieldInfoJSON { + multiple: false, + required: true, + types: Vec::new(), + }); + + field_info_json.multiple |= field_info.multiple; + field_info_json.required &= field_info.required; + field_info_json.types.extend(field_info.types.iter().map( + |child_type| match child_type { + ChildType::Aliased(alias) => NodeTypeJSON { + kind: alias.value.clone(), + named: alias.is_named, + }, + ChildType::Normal(symbol) => { + if let Some(alias) = simple_aliases.get(&symbol) { + NodeTypeJSON { + kind: alias.value.clone(), + named: alias.is_named, } - SymbolType::Terminal => { - let variable = &lexical_grammar.variables[symbol.index]; - FieldTypeJSON { - kind: variable.name.clone(), - named: variable.kind == VariableType::Named, + } else { + match symbol.kind { + SymbolType::NonTerminal => { + let variable = &syntax_grammar.variables[symbol.index]; + NodeTypeJSON { + kind: variable.name.clone(), + named: variable.kind == VariableType::Named, + } } - } - SymbolType::External => { - let variable = &syntax_grammar.external_tokens[symbol.index]; - FieldTypeJSON { - kind: variable.name.clone(), - named: variable.kind == VariableType::Named, + SymbolType::Terminal => { + let variable = &lexical_grammar.variables[symbol.index]; + NodeTypeJSON { + kind: variable.name.clone(), + named: variable.kind == VariableType::Named, + } } + SymbolType::External => { + let variable = &syntax_grammar.external_tokens[symbol.index]; + NodeTypeJSON { + kind: variable.name.clone(), + named: variable.kind == VariableType::Named, + } + } + _ => panic!("Unexpected symbol type"), } - _ => panic!("Unexpected symbol type"), } } - } - }, - )); - field_info_json.types.sort_unstable(); - field_info_json.types.dedup(); + }, + )); + field_info_json.types.sort_unstable(); + field_info_json.types.dedup(); + } + node_type_json.fields = Some(fields_json); } + } - serde_json::to_string_pretty(&map).unwrap() + serde_json::to_string_pretty(&node_types_json).unwrap() } diff --git a/cli/src/generate/parse_grammar.rs b/cli/src/generate/parse_grammar.rs index 5b244c87..ce4b881a 100644 --- a/cli/src/generate/parse_grammar.rs +++ b/cli/src/generate/parse_grammar.rs @@ -71,6 +71,7 @@ struct GrammarJSON { externals: Option>, extras: Option>, inline: Option>, + supertypes: Option>, word: Option, } @@ -100,6 +101,7 @@ pub(crate) fn parse_grammar(input: &str) -> Result { .collect(); let expected_conflicts = grammar_json.conflicts.unwrap_or(Vec::new()); let variables_to_inline = grammar_json.inline.unwrap_or(Vec::new()); + let supertype_symbols = grammar_json.supertypes.unwrap_or(Vec::new()); Ok(InputGrammar { name: grammar_json.name, @@ -108,6 +110,7 @@ pub(crate) fn parse_grammar(input: &str) -> Result { extra_tokens, expected_conflicts, external_tokens, + supertype_symbols, variables_to_inline, }) } diff --git a/cli/src/generate/prepare_grammar/expand_repeats.rs b/cli/src/generate/prepare_grammar/expand_repeats.rs index b290799b..ec0deb75 100644 --- a/cli/src/generate/prepare_grammar/expand_repeats.rs +++ b/cli/src/generate/prepare_grammar/expand_repeats.rs @@ -235,6 +235,7 @@ mod tests { external_tokens: Vec::new(), expected_conflicts: Vec::new(), variables_to_inline: Vec::new(), + supertype_symbols: Vec::new(), word_token: None, } } diff --git a/cli/src/generate/prepare_grammar/extract_simple_aliases.rs b/cli/src/generate/prepare_grammar/extract_simple_aliases.rs index 79ea5e67..9a0b7fbb 100644 --- a/cli/src/generate/prepare_grammar/extract_simple_aliases.rs +++ b/cli/src/generate/prepare_grammar/extract_simple_aliases.rs @@ -149,6 +149,7 @@ mod tests { extra_tokens: Vec::new(), expected_conflicts: Vec::new(), variables_to_inline: Vec::new(), + supertype_symbols: Vec::new(), external_tokens: Vec::new(), word_token: None, }; diff --git a/cli/src/generate/prepare_grammar/extract_tokens.rs b/cli/src/generate/prepare_grammar/extract_tokens.rs index 88afb50f..d1264b6e 100644 --- a/cli/src/generate/prepare_grammar/extract_tokens.rs +++ b/cli/src/generate/prepare_grammar/extract_tokens.rs @@ -77,6 +77,12 @@ pub(super) fn extract_tokens( }) .collect(); + let supertype_symbols = grammar + .supertype_symbols + .into_iter() + .map(|symbol| symbol_replacer.replace_symbol(symbol)) + .collect(); + let variables_to_inline = grammar .variables_to_inline .into_iter() @@ -154,6 +160,7 @@ pub(super) fn extract_tokens( expected_conflicts, extra_tokens, variables_to_inline, + supertype_symbols, external_tokens, word_token, }, @@ -519,6 +526,7 @@ mod test { external_tokens: Vec::new(), expected_conflicts: Vec::new(), variables_to_inline: Vec::new(), + supertype_symbols: Vec::new(), word_token: None, } } diff --git a/cli/src/generate/prepare_grammar/flatten_grammar.rs b/cli/src/generate/prepare_grammar/flatten_grammar.rs index 1c050a6b..af93a82d 100644 --- a/cli/src/generate/prepare_grammar/flatten_grammar.rs +++ b/cli/src/generate/prepare_grammar/flatten_grammar.rs @@ -203,6 +203,7 @@ unless they are used only as the grammar's start rule. expected_conflicts: grammar.expected_conflicts, variables_to_inline: grammar.variables_to_inline, external_tokens: grammar.external_tokens, + supertype_symbols: grammar.supertype_symbols, word_token: grammar.word_token, variables, }) diff --git a/cli/src/generate/prepare_grammar/intern_symbols.rs b/cli/src/generate/prepare_grammar/intern_symbols.rs index d742864c..54abdf83 100644 --- a/cli/src/generate/prepare_grammar/intern_symbols.rs +++ b/cli/src/generate/prepare_grammar/intern_symbols.rs @@ -35,6 +35,15 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result extra_tokens.push(interner.intern_rule(extra_token)?); } + let mut supertype_symbols = Vec::with_capacity(grammar.supertype_symbols.len()); + for supertype_symbol_name in grammar.supertype_symbols.iter() { + supertype_symbols.push( + interner + .intern_name(supertype_symbol_name) + .ok_or_else(|| Error::undefined_symbol(supertype_symbol_name))?, + ); + } + let mut expected_conflicts = Vec::new(); for conflict in grammar.expected_conflicts.iter() { let mut interned_conflict = Vec::with_capacity(conflict.len()); @@ -64,12 +73,15 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result ); } + eprintln!("supertype_symbols: {:?}", supertype_symbols); + Ok(InternedGrammar { variables, external_tokens, extra_tokens, expected_conflicts, variables_to_inline, + supertype_symbols, word_token, }) } @@ -230,6 +242,7 @@ mod tests { external_tokens: Vec::new(), expected_conflicts: Vec::new(), variables_to_inline: Vec::new(), + supertype_symbols: Vec::new(), word_token: None, } } diff --git a/cli/src/generate/prepare_grammar/mod.rs b/cli/src/generate/prepare_grammar/mod.rs index 41f668f4..a574aefb 100644 --- a/cli/src/generate/prepare_grammar/mod.rs +++ b/cli/src/generate/prepare_grammar/mod.rs @@ -25,6 +25,7 @@ pub(crate) struct IntermediateGrammar { expected_conflicts: Vec>, external_tokens: Vec, variables_to_inline: Vec, + supertype_symbols: Vec, word_token: Option, } diff --git a/cli/src/generate/prepare_grammar/process_inlines.rs b/cli/src/generate/prepare_grammar/process_inlines.rs index e067cd9e..e7c1b3e8 100644 --- a/cli/src/generate/prepare_grammar/process_inlines.rs +++ b/cli/src/generate/prepare_grammar/process_inlines.rs @@ -198,6 +198,7 @@ mod tests { expected_conflicts: Vec::new(), extra_tokens: Vec::new(), external_tokens: Vec::new(), + supertype_symbols: Vec::new(), word_token: None, variables_to_inline: vec![Symbol::non_terminal(1)], variables: vec![ @@ -328,6 +329,7 @@ mod tests { expected_conflicts: Vec::new(), extra_tokens: Vec::new(), external_tokens: Vec::new(), + supertype_symbols: Vec::new(), word_token: None, }; let inline_map = process_inlines(&grammar); @@ -429,6 +431,7 @@ mod tests { expected_conflicts: Vec::new(), extra_tokens: Vec::new(), external_tokens: Vec::new(), + supertype_symbols: Vec::new(), word_token: None, }; diff --git a/cli/src/generate/tables.rs b/cli/src/generate/tables.rs index e358f4fa..7ea5acdd 100644 --- a/cli/src/generate/tables.rs +++ b/cli/src/generate/tables.rs @@ -1,6 +1,6 @@ use super::nfa::CharacterSet; use super::rules::{Alias, Associativity, Symbol}; -use hashbrown::{HashMap, HashSet}; +use hashbrown::HashMap; use std::collections::BTreeMap; pub(crate) type ProductionInfoId = usize; @@ -52,7 +52,7 @@ pub(crate) struct ProductionInfo { pub field_map: BTreeMap>, } -#[derive(Clone, Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub(crate) enum ChildType { Normal(Symbol), Aliased(Alias), @@ -62,13 +62,15 @@ pub(crate) enum ChildType { pub(crate) struct FieldInfo { pub required: bool, pub multiple: bool, - pub types: HashSet, + pub types: Vec, } #[derive(Debug, Default, PartialEq, Eq)] pub(crate) struct VariableInfo { pub fields: HashMap, - pub child_types: HashSet, + pub subclasses: Vec, + pub child_types: Vec, + pub has_multi_step_production: bool, } #[derive(Debug, PartialEq, Eq)]