diff --git a/cli/src/generate/node_types.rs b/cli/src/generate/node_types.rs index 9fb1fe8d..bc5a836f 100644 --- a/cli/src/generate/node_types.rs +++ b/cli/src/generate/node_types.rs @@ -146,7 +146,7 @@ impl ChildQuantity { pub(crate) fn get_variable_info( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, - simple_aliases: &AliasMap, + default_aliases: &AliasMap, ) -> Result> { let child_type_is_visible = |t: &ChildType| { variable_type_for_child_type(t, syntax_grammar, lexical_grammar) >= VariableType::Anonymous @@ -185,7 +185,7 @@ pub(crate) fn get_variable_info( let child_symbol = step.symbol; let child_type = if let Some(alias) = &step.alias { ChildType::Aliased(alias.clone()) - } else if let Some(alias) = simple_aliases.get(&step.symbol) { + } else if let Some(alias) = default_aliases.get(&step.symbol) { ChildType::Aliased(alias.clone()) } else { ChildType::Normal(child_symbol) @@ -358,7 +358,7 @@ pub(crate) fn get_variable_info( pub(crate) fn generate_node_types_json( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, - simple_aliases: &AliasMap, + default_aliases: &AliasMap, variable_info: &Vec, ) -> Vec { let mut node_types_json = BTreeMap::new(); @@ -369,7 +369,7 @@ pub(crate) fn generate_node_types_json( named: alias.is_named, }, ChildType::Normal(symbol) => { - if let Some(alias) = simple_aliases.get(&symbol) { + if let Some(alias) = default_aliases.get(&symbol) { NodeTypeJSON { kind: alias.value.clone(), named: alias.is_named, @@ -417,7 +417,7 @@ pub(crate) fn generate_node_types_json( }; let mut aliases_by_symbol = HashMap::new(); - for (symbol, alias) in simple_aliases { + for (symbol, alias) in default_aliases { aliases_by_symbol.insert(*symbol, { let mut aliases = HashSet::new(); aliases.insert(Some(alias.clone())); @@ -425,7 +425,7 @@ pub(crate) fn generate_node_types_json( }); } for extra_symbol in &syntax_grammar.extra_symbols { - if !simple_aliases.contains_key(extra_symbol) { + if !default_aliases.contains_key(extra_symbol) { aliases_by_symbol .entry(*extra_symbol) .or_insert(HashSet::new()) @@ -435,12 +435,15 @@ pub(crate) fn generate_node_types_json( for variable in &syntax_grammar.variables { for production in &variable.productions { for step in &production.steps { - if !simple_aliases.contains_key(&step.symbol) { - aliases_by_symbol - .entry(step.symbol) - .or_insert(HashSet::new()) - .insert(step.alias.clone()); - } + aliases_by_symbol + .entry(step.symbol) + .or_insert(HashSet::new()) + .insert( + step.alias + .as_ref() + .or_else(|| default_aliases.get(&step.symbol)) + .cloned(), + ); } } } @@ -1808,14 +1811,14 @@ mod tests { } fn get_node_types(grammar: InputGrammar) -> Vec { - let (syntax_grammar, lexical_grammar, _, simple_aliases) = + let (syntax_grammar, lexical_grammar, _, default_aliases) = prepare_grammar(&grammar).unwrap(); let variable_info = - get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases).unwrap(); + get_variable_info(&syntax_grammar, &lexical_grammar, &default_aliases).unwrap(); generate_node_types_json( &syntax_grammar, &lexical_grammar, - &simple_aliases, + &default_aliases, &variable_info, ) } diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index 04f9e47b..58d99cc4 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -143,49 +143,6 @@ impl Generator { self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers); } - let mut field_names = Vec::new(); - for production_info in &self.parse_table.production_infos { - for field_name in production_info.field_map.keys() { - field_names.push(field_name); - } - - for alias in &production_info.alias_sequence { - if let Some(alias) = &alias { - let alias_kind = alias.kind(); - let matching_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| { - let (name, kind) = self.metadata_for_symbol(*symbol); - name == alias.value && kind == alias_kind - }); - let alias_id = if let Some(symbol) = matching_symbol { - self.symbol_ids[&symbol].clone() - } else if alias.is_named { - format!("alias_sym_{}", self.sanitize_identifier(&alias.value)) - } else { - format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value)) - }; - self.alias_ids.entry(alias.clone()).or_insert(alias_id); - } - } - } - - self.unique_aliases = self - .alias_ids - .keys() - .filter(|alias| { - self.parse_table - .symbols - .iter() - .cloned() - .find(|symbol| { - let (name, kind) = self.metadata_for_symbol(*symbol); - name == alias.value && kind == alias.kind() - }) - .is_none() - }) - .cloned() - .collect(); - self.unique_aliases.sort_unstable(); - self.symbol_map = self .parse_table .symbols @@ -230,13 +187,51 @@ impl Generator { }) .collect(); - field_names.sort_unstable(); - field_names.dedup(); - self.field_names = field_names.into_iter().cloned().collect(); + for production_info in &self.parse_table.production_infos { + // Build a list of all field names + for field_name in production_info.field_map.keys() { + if let Err(i) = self.field_names.binary_search(&field_name) { + self.field_names.insert(i, field_name.clone()); + } + } - // If we are opting in to the new unstable language ABI, then use the concept of - // "small parse states". Otherwise, use the same representation for all parse - // states. + for alias in &production_info.alias_sequence { + // Generate a mapping from aliases to C identifiers. + if let Some(alias) = &alias { + let existing_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| { + if let Some(default_alias) = self.default_aliases.get(symbol) { + default_alias == alias + } else { + let (name, kind) = self.metadata_for_symbol(*symbol); + name == alias.value && kind == alias.kind() + } + }); + + // Some aliases match an existing symbol in the grammar. + let alias_id; + if let Some(existing_symbol) = existing_symbol { + alias_id = self.symbol_ids[&self.symbol_map[&existing_symbol]].clone(); + } + // Other aliases don't match any existing symbol, and need their own identifiers. + else { + if let Err(i) = self.unique_aliases.binary_search(alias) { + self.unique_aliases.insert(i, alias.clone()); + } + + alias_id = if alias.is_named { + format!("alias_sym_{}", self.sanitize_identifier(&alias.value)) + } else { + format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value)) + }; + } + + self.alias_ids.entry(alias.clone()).or_insert(alias_id); + } + } + } + + // Determine which states should use the "small state" representation, and which should + // use the normal array representation. let threshold = cmp::min(SMALL_STATE_THRESHOLD, self.parse_table.symbols.len() / 2); self.large_state_count = self .parse_table