Fix parser-generation bugs introduced in #782

This commit is contained in:
Max Brunsfeld 2020-11-02 13:43:28 -08:00
parent cced66cb3b
commit 3497f34dd7
2 changed files with 62 additions and 64 deletions

View file

@ -146,7 +146,7 @@ impl ChildQuantity {
pub(crate) fn get_variable_info(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
simple_aliases: &AliasMap,
default_aliases: &AliasMap,
) -> Result<Vec<VariableInfo>> {
let child_type_is_visible = |t: &ChildType| {
variable_type_for_child_type(t, syntax_grammar, lexical_grammar) >= VariableType::Anonymous
@ -185,7 +185,7 @@ pub(crate) fn get_variable_info(
let child_symbol = step.symbol;
let child_type = if let Some(alias) = &step.alias {
ChildType::Aliased(alias.clone())
} else if let Some(alias) = simple_aliases.get(&step.symbol) {
} else if let Some(alias) = default_aliases.get(&step.symbol) {
ChildType::Aliased(alias.clone())
} else {
ChildType::Normal(child_symbol)
@ -358,7 +358,7 @@ pub(crate) fn get_variable_info(
pub(crate) fn generate_node_types_json(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
simple_aliases: &AliasMap,
default_aliases: &AliasMap,
variable_info: &Vec<VariableInfo>,
) -> Vec<NodeInfoJSON> {
let mut node_types_json = BTreeMap::new();
@ -369,7 +369,7 @@ pub(crate) fn generate_node_types_json(
named: alias.is_named,
},
ChildType::Normal(symbol) => {
if let Some(alias) = simple_aliases.get(&symbol) {
if let Some(alias) = default_aliases.get(&symbol) {
NodeTypeJSON {
kind: alias.value.clone(),
named: alias.is_named,
@ -417,7 +417,7 @@ pub(crate) fn generate_node_types_json(
};
let mut aliases_by_symbol = HashMap::new();
for (symbol, alias) in simple_aliases {
for (symbol, alias) in default_aliases {
aliases_by_symbol.insert(*symbol, {
let mut aliases = HashSet::new();
aliases.insert(Some(alias.clone()));
@ -425,7 +425,7 @@ pub(crate) fn generate_node_types_json(
});
}
for extra_symbol in &syntax_grammar.extra_symbols {
if !simple_aliases.contains_key(extra_symbol) {
if !default_aliases.contains_key(extra_symbol) {
aliases_by_symbol
.entry(*extra_symbol)
.or_insert(HashSet::new())
@ -435,12 +435,15 @@ pub(crate) fn generate_node_types_json(
for variable in &syntax_grammar.variables {
for production in &variable.productions {
for step in &production.steps {
if !simple_aliases.contains_key(&step.symbol) {
aliases_by_symbol
.entry(step.symbol)
.or_insert(HashSet::new())
.insert(step.alias.clone());
}
aliases_by_symbol
.entry(step.symbol)
.or_insert(HashSet::new())
.insert(
step.alias
.as_ref()
.or_else(|| default_aliases.get(&step.symbol))
.cloned(),
);
}
}
}
@ -1808,14 +1811,14 @@ mod tests {
}
fn get_node_types(grammar: InputGrammar) -> Vec<NodeInfoJSON> {
let (syntax_grammar, lexical_grammar, _, simple_aliases) =
let (syntax_grammar, lexical_grammar, _, default_aliases) =
prepare_grammar(&grammar).unwrap();
let variable_info =
get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases).unwrap();
get_variable_info(&syntax_grammar, &lexical_grammar, &default_aliases).unwrap();
generate_node_types_json(
&syntax_grammar,
&lexical_grammar,
&simple_aliases,
&default_aliases,
&variable_info,
)
}

View file

@ -143,49 +143,6 @@ impl Generator {
self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers);
}
let mut field_names = Vec::new();
for production_info in &self.parse_table.production_infos {
for field_name in production_info.field_map.keys() {
field_names.push(field_name);
}
for alias in &production_info.alias_sequence {
if let Some(alias) = &alias {
let alias_kind = alias.kind();
let matching_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| {
let (name, kind) = self.metadata_for_symbol(*symbol);
name == alias.value && kind == alias_kind
});
let alias_id = if let Some(symbol) = matching_symbol {
self.symbol_ids[&symbol].clone()
} else if alias.is_named {
format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
} else {
format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
};
self.alias_ids.entry(alias.clone()).or_insert(alias_id);
}
}
}
self.unique_aliases = self
.alias_ids
.keys()
.filter(|alias| {
self.parse_table
.symbols
.iter()
.cloned()
.find(|symbol| {
let (name, kind) = self.metadata_for_symbol(*symbol);
name == alias.value && kind == alias.kind()
})
.is_none()
})
.cloned()
.collect();
self.unique_aliases.sort_unstable();
self.symbol_map = self
.parse_table
.symbols
@ -230,13 +187,51 @@ impl Generator {
})
.collect();
field_names.sort_unstable();
field_names.dedup();
self.field_names = field_names.into_iter().cloned().collect();
for production_info in &self.parse_table.production_infos {
// Build a list of all field names
for field_name in production_info.field_map.keys() {
if let Err(i) = self.field_names.binary_search(&field_name) {
self.field_names.insert(i, field_name.clone());
}
}
// If we are opting in to the new unstable language ABI, then use the concept of
// "small parse states". Otherwise, use the same representation for all parse
// states.
for alias in &production_info.alias_sequence {
// Generate a mapping from aliases to C identifiers.
if let Some(alias) = &alias {
let existing_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| {
if let Some(default_alias) = self.default_aliases.get(symbol) {
default_alias == alias
} else {
let (name, kind) = self.metadata_for_symbol(*symbol);
name == alias.value && kind == alias.kind()
}
});
// Some aliases match an existing symbol in the grammar.
let alias_id;
if let Some(existing_symbol) = existing_symbol {
alias_id = self.symbol_ids[&self.symbol_map[&existing_symbol]].clone();
}
// Other aliases don't match any existing symbol, and need their own identifiers.
else {
if let Err(i) = self.unique_aliases.binary_search(alias) {
self.unique_aliases.insert(i, alias.clone());
}
alias_id = if alias.is_named {
format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
} else {
format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
};
}
self.alias_ids.entry(alias.clone()).or_insert(alias_id);
}
}
}
// Determine which states should use the "small state" representation, and which should
// use the normal array representation.
let threshold = cmp::min(SMALL_STATE_THRESHOLD, self.parse_table.symbols.len() / 2);
self.large_state_count = self
.parse_table