Merge remote-tracking branch 'origin/master' into query-testy

This commit is contained in:
Patrick Thomson 2020-11-10 16:16:53 -05:00
commit c9c886d971
19 changed files with 721 additions and 459 deletions

View file

@ -146,7 +146,7 @@ impl ChildQuantity {
pub(crate) fn get_variable_info(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
simple_aliases: &AliasMap,
default_aliases: &AliasMap,
) -> Result<Vec<VariableInfo>> {
let child_type_is_visible = |t: &ChildType| {
variable_type_for_child_type(t, syntax_grammar, lexical_grammar) >= VariableType::Anonymous
@ -185,7 +185,7 @@ pub(crate) fn get_variable_info(
let child_symbol = step.symbol;
let child_type = if let Some(alias) = &step.alias {
ChildType::Aliased(alias.clone())
} else if let Some(alias) = simple_aliases.get(&step.symbol) {
} else if let Some(alias) = default_aliases.get(&step.symbol) {
ChildType::Aliased(alias.clone())
} else {
ChildType::Normal(child_symbol)
@ -358,7 +358,7 @@ pub(crate) fn get_variable_info(
pub(crate) fn generate_node_types_json(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
simple_aliases: &AliasMap,
default_aliases: &AliasMap,
variable_info: &Vec<VariableInfo>,
) -> Vec<NodeInfoJSON> {
let mut node_types_json = BTreeMap::new();
@ -369,7 +369,7 @@ pub(crate) fn generate_node_types_json(
named: alias.is_named,
},
ChildType::Normal(symbol) => {
if let Some(alias) = simple_aliases.get(&symbol) {
if let Some(alias) = default_aliases.get(&symbol) {
NodeTypeJSON {
kind: alias.value.clone(),
named: alias.is_named,
@ -417,22 +417,33 @@ pub(crate) fn generate_node_types_json(
};
let mut aliases_by_symbol = HashMap::new();
for (symbol, alias) in simple_aliases {
for (symbol, alias) in default_aliases {
aliases_by_symbol.insert(*symbol, {
let mut aliases = HashSet::new();
aliases.insert(Some(alias.clone()));
aliases
});
}
for extra_symbol in &syntax_grammar.extra_symbols {
if !default_aliases.contains_key(extra_symbol) {
aliases_by_symbol
.entry(*extra_symbol)
.or_insert(HashSet::new())
.insert(None);
}
}
for variable in &syntax_grammar.variables {
for production in &variable.productions {
for step in &production.steps {
if !simple_aliases.contains_key(&step.symbol) {
aliases_by_symbol
.entry(step.symbol)
.or_insert(HashSet::new())
.insert(step.alias.clone());
}
aliases_by_symbol
.entry(step.symbol)
.or_insert(HashSet::new())
.insert(
step.alias
.as_ref()
.or_else(|| default_aliases.get(&step.symbol))
.cloned(),
);
}
}
}
@ -722,9 +733,18 @@ mod tests {
kind: VariableType::Named,
rule: Rule::string("x"),
},
// This rule is not reachable from the start symbol
// so it won't be present in the node_types
Variable {
name: "v3".to_string(),
kind: VariableType::Named,
rule: Rule::string("y"),
},
],
});
assert_eq!(node_types.len(), 3);
assert_eq!(
node_types[0],
NodeInfoJSON {
@ -784,6 +804,112 @@ mod tests {
);
}
#[test]
fn test_node_types_simple_extras() {
let node_types = get_node_types(InputGrammar {
name: String::new(),
extra_symbols: vec![Rule::named("v3")],
external_tokens: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
word_token: None,
supertype_symbols: vec![],
variables: vec![
Variable {
name: "v1".to_string(),
kind: VariableType::Named,
rule: Rule::seq(vec![
Rule::field("f1".to_string(), Rule::named("v2")),
Rule::field("f2".to_string(), Rule::string(";")),
]),
},
Variable {
name: "v2".to_string(),
kind: VariableType::Named,
rule: Rule::string("x"),
},
// This rule is not reachable from the start symbol, but
// it is reachable from the 'extra_symbols' so it
// should be present in the node_types
Variable {
name: "v3".to_string(),
kind: VariableType::Named,
rule: Rule::string("y"),
},
],
});
assert_eq!(node_types.len(), 4);
assert_eq!(
node_types[0],
NodeInfoJSON {
kind: "v1".to_string(),
named: true,
subtypes: None,
children: None,
fields: Some(
vec![
(
"f1".to_string(),
FieldInfoJSON {
multiple: false,
required: true,
types: vec![NodeTypeJSON {
kind: "v2".to_string(),
named: true,
}]
}
),
(
"f2".to_string(),
FieldInfoJSON {
multiple: false,
required: true,
types: vec![NodeTypeJSON {
kind: ";".to_string(),
named: false,
}]
}
),
]
.into_iter()
.collect()
)
}
);
assert_eq!(
node_types[1],
NodeInfoJSON {
kind: ";".to_string(),
named: false,
subtypes: None,
children: None,
fields: None
}
);
assert_eq!(
node_types[2],
NodeInfoJSON {
kind: "v2".to_string(),
named: true,
subtypes: None,
children: None,
fields: None
}
);
assert_eq!(
node_types[3],
NodeInfoJSON {
kind: "v3".to_string(),
named: true,
subtypes: None,
children: None,
fields: None
}
);
}
#[test]
fn test_node_types_with_supertypes() {
let node_types = get_node_types(InputGrammar {
@ -1685,14 +1811,14 @@ mod tests {
}
fn get_node_types(grammar: InputGrammar) -> Vec<NodeInfoJSON> {
let (syntax_grammar, lexical_grammar, _, simple_aliases) =
let (syntax_grammar, lexical_grammar, _, default_aliases) =
prepare_grammar(&grammar).unwrap();
let variable_info =
get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases).unwrap();
get_variable_info(&syntax_grammar, &lexical_grammar, &default_aliases).unwrap();
generate_node_types_json(
&syntax_grammar,
&lexical_grammar,
&simple_aliases,
&default_aliases,
&variable_info,
)
}

View file

@ -0,0 +1,293 @@
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType};
#[derive(Clone, Default)]
struct SymbolStatus {
aliases: Vec<(Alias, usize)>,
appears_unaliased: bool,
}
// Update the grammar by finding symbols that always are aliased, and for each such symbol,
// promoting one of its aliases to a "default alias", which is applied globally instead
// of in a context-specific way.
//
// This has two benefits:
// * It reduces the overhead of storing production-specific alias info in the parse table.
// * Within an `ERROR` node, no context-specific aliases will be applied. This transformation
// ensures that the children of an `ERROR` node have symbols that are consistent with the
// way that they would appear in a valid syntax tree.
pub(super) fn extract_default_aliases(
syntax_grammar: &mut SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
) -> AliasMap {
let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()];
let mut non_terminal_status_list =
vec![SymbolStatus::default(); syntax_grammar.variables.len()];
let mut external_status_list =
vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()];
// For each grammar symbol, find all of the aliases under which the symbol appears,
// and determine whether or not the symbol ever appears *unaliased*.
for variable in syntax_grammar.variables.iter() {
for production in variable.productions.iter() {
for step in production.steps.iter() {
let mut status = match step.symbol.kind {
SymbolType::External => &mut external_status_list[step.symbol.index],
SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index],
SymbolType::Terminal => &mut terminal_status_list[step.symbol.index],
SymbolType::End => panic!("Unexpected end token"),
};
// Default aliases don't work for inlined variables.
if syntax_grammar.variables_to_inline.contains(&step.symbol) {
continue;
}
if let Some(alias) = &step.alias {
if let Some(count_for_alias) = status
.aliases
.iter_mut()
.find_map(|(a, count)| if a == alias { Some(count) } else { None })
{
*count_for_alias += 1;
} else {
status.aliases.push((alias.clone(), 1));
}
} else {
status.appears_unaliased = true;
}
}
}
}
let symbols_with_statuses = (terminal_status_list
.iter_mut()
.enumerate()
.map(|(i, status)| (Symbol::terminal(i), status)))
.chain(
non_terminal_status_list
.iter_mut()
.enumerate()
.map(|(i, status)| (Symbol::non_terminal(i), status)),
)
.chain(
external_status_list
.iter_mut()
.enumerate()
.map(|(i, status)| (Symbol::external(i), status)),
);
// For each symbol that always appears aliased, find the alias the occurs most often,
// and designate that alias as the symbol's "default alias". Store all of these
// default aliases in a map that will be returned.
let mut result = AliasMap::new();
for (symbol, status) in symbols_with_statuses {
if status.appears_unaliased {
status.aliases.clear();
} else {
if let Some(default_entry) = status
.aliases
.iter()
.enumerate()
.max_by_key(|(i, (_, count))| (count, -(*i as i64)))
.map(|(_, entry)| entry.clone())
{
status.aliases.clear();
status.aliases.push(default_entry.clone());
result.insert(symbol, default_entry.0);
}
}
}
// Wherever a symbol is aliased as its default alias, remove the usage of the alias,
// because it will now be redundant.
let mut alias_positions_to_clear = Vec::new();
for variable in syntax_grammar.variables.iter_mut() {
alias_positions_to_clear.clear();
for (i, production) in variable.productions.iter().enumerate() {
for (j, step) in production.steps.iter().enumerate() {
let status = match step.symbol.kind {
SymbolType::External => &mut external_status_list[step.symbol.index],
SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index],
SymbolType::Terminal => &mut terminal_status_list[step.symbol.index],
SymbolType::End => panic!("Unexpected end token"),
};
// If this step is aliased as the symbol's default alias, then remove that alias.
if step.alias.is_some()
&& step.alias.as_ref() == status.aliases.get(0).map(|t| &t.0)
{
let mut other_productions_must_use_this_alias_at_this_index = false;
for (other_i, other_production) in variable.productions.iter().enumerate() {
if other_i != i
&& other_production.steps.len() > j
&& other_production.steps[j].alias == step.alias
&& result.get(&other_production.steps[j].symbol) != step.alias.as_ref()
{
other_productions_must_use_this_alias_at_this_index = true;
break;
}
}
if !other_productions_must_use_this_alias_at_this_index {
alias_positions_to_clear.push((i, j));
}
}
}
}
for (production_index, step_index) in &alias_positions_to_clear {
variable.productions[*production_index].steps[*step_index].alias = None;
}
}
result
}
#[cfg(test)]
mod tests {
use super::*;
use crate::generate::grammars::{
LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType,
};
use crate::generate::nfa::Nfa;
#[test]
fn test_extract_simple_aliases() {
let mut syntax_grammar = SyntaxGrammar {
variables: vec![
SyntaxVariable {
name: "v1".to_owned(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true),
],
}],
},
SyntaxVariable {
name: "v2".to_owned(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
// Token 0 is always aliased as "a1".
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
// Token 1 is aliased within rule `v1` above, but not here.
ProductionStep::new(Symbol::terminal(1)),
// Token 2 is aliased differently here than in `v1`. The alias from
// `v1` should be promoted to the default alias, because `v1` appears
// first in the grammar.
ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true),
// Token 3 is also aliased differently here than in `v1`. In this case,
// this alias should be promoted to the default alias, because it is
// used a greater number of times (twice).
ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true),
ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true),
],
}],
},
],
extra_symbols: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
supertype_symbols: Vec::new(),
external_tokens: Vec::new(),
word_token: None,
};
let lexical_grammar = LexicalGrammar {
nfa: Nfa::new(),
variables: vec![
LexicalVariable {
name: "t0".to_string(),
kind: VariableType::Anonymous,
implicit_precedence: 0,
start_state: 0,
},
LexicalVariable {
name: "t1".to_string(),
kind: VariableType::Anonymous,
implicit_precedence: 0,
start_state: 0,
},
LexicalVariable {
name: "t2".to_string(),
kind: VariableType::Anonymous,
implicit_precedence: 0,
start_state: 0,
},
LexicalVariable {
name: "t3".to_string(),
kind: VariableType::Anonymous,
implicit_precedence: 0,
start_state: 0,
},
],
};
let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar);
assert_eq!(default_aliases.len(), 3);
assert_eq!(
default_aliases.get(&Symbol::terminal(0)),
Some(&Alias {
value: "a1".to_string(),
is_named: true,
})
);
assert_eq!(
default_aliases.get(&Symbol::terminal(2)),
Some(&Alias {
value: "a3".to_string(),
is_named: true,
})
);
assert_eq!(
default_aliases.get(&Symbol::terminal(3)),
Some(&Alias {
value: "a6".to_string(),
is_named: true,
})
);
assert_eq!(default_aliases.get(&Symbol::terminal(1)), None);
assert_eq!(
syntax_grammar.variables,
vec![
SyntaxVariable {
name: "v1".to_owned(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
ProductionStep::new(Symbol::terminal(2)),
ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true),
],
},],
},
SyntaxVariable {
name: "v2".to_owned(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::terminal(1)),
ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true),
ProductionStep::new(Symbol::terminal(3)),
ProductionStep::new(Symbol::terminal(3)),
],
},],
},
]
);
}
}

View file

@ -1,223 +0,0 @@
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType};
#[derive(Clone, Default)]
struct SymbolStatus {
alias: Option<Alias>,
conflicting: bool,
}
pub(super) fn extract_simple_aliases(
syntax_grammar: &mut SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
) -> AliasMap {
// Determine which symbols in the grammars are *always* aliased to a single name.
let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()];
let mut non_terminal_status_list =
vec![SymbolStatus::default(); syntax_grammar.variables.len()];
let mut external_status_list =
vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()];
for variable in syntax_grammar.variables.iter() {
for production in variable.productions.iter() {
for step in production.steps.iter() {
let mut status = match step.symbol {
Symbol {
kind: SymbolType::External,
index,
} => &mut external_status_list[index],
Symbol {
kind: SymbolType::NonTerminal,
index,
} => &mut non_terminal_status_list[index],
Symbol {
kind: SymbolType::Terminal,
index,
} => &mut terminal_status_list[index],
Symbol {
kind: SymbolType::End,
..
} => panic!("Unexpected end token"),
};
if step.alias.is_none() {
status.alias = None;
status.conflicting = true;
}
if !status.conflicting {
if status.alias.is_none() {
status.alias = step.alias.clone();
} else if status.alias != step.alias {
status.alias = None;
status.conflicting = true;
}
}
}
}
}
// Remove the aliases for those symbols.
for variable in syntax_grammar.variables.iter_mut() {
for production in variable.productions.iter_mut() {
for step in production.steps.iter_mut() {
let status = match step.symbol {
Symbol {
kind: SymbolType::External,
index,
} => &external_status_list[index],
Symbol {
kind: SymbolType::NonTerminal,
index,
} => &non_terminal_status_list[index],
Symbol {
kind: SymbolType::Terminal,
index,
} => &terminal_status_list[index],
Symbol {
kind: SymbolType::End,
..
} => panic!("Unexpected end token"),
};
if status.alias.is_some() {
step.alias = None;
}
}
}
}
// Populate a map of the symbols to their aliases.
let mut result = AliasMap::new();
for (i, status) in terminal_status_list.into_iter().enumerate() {
if let Some(alias) = status.alias {
result.insert(Symbol::terminal(i), alias);
}
}
for (i, status) in non_terminal_status_list.into_iter().enumerate() {
if let Some(alias) = status.alias {
result.insert(Symbol::non_terminal(i), alias);
}
}
for (i, status) in external_status_list.into_iter().enumerate() {
if let Some(alias) = status.alias {
result.insert(Symbol::external(i), alias);
}
}
result
}
#[cfg(test)]
mod tests {
use super::*;
use crate::generate::grammars::{
LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType,
};
use crate::generate::nfa::Nfa;
#[test]
fn test_extract_simple_aliases() {
let mut syntax_grammar = SyntaxGrammar {
variables: vec![
SyntaxVariable {
name: "v1".to_owned(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
],
}],
},
SyntaxVariable {
name: "v2".to_owned(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
// Token 0 is always aliased as "a1".
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
// Token 1 is aliased above, but not here.
ProductionStep::new(Symbol::terminal(1)),
// Token 2 is aliased differently than above.
ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true),
],
}],
},
],
extra_symbols: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
supertype_symbols: Vec::new(),
external_tokens: Vec::new(),
word_token: None,
};
let lexical_grammar = LexicalGrammar {
nfa: Nfa::new(),
variables: vec![
LexicalVariable {
name: "t1".to_string(),
kind: VariableType::Anonymous,
implicit_precedence: 0,
start_state: 0,
},
LexicalVariable {
name: "t2".to_string(),
kind: VariableType::Anonymous,
implicit_precedence: 0,
start_state: 0,
},
LexicalVariable {
name: "t3".to_string(),
kind: VariableType::Anonymous,
implicit_precedence: 0,
start_state: 0,
},
],
};
let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar);
assert_eq!(simple_aliases.len(), 1);
assert_eq!(
simple_aliases[&Symbol::terminal(0)],
Alias {
value: "a1".to_string(),
is_named: true,
}
);
assert_eq!(
syntax_grammar.variables,
vec![
SyntaxVariable {
name: "v1".to_owned(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
// 'Simple' alias removed
ProductionStep::new(Symbol::terminal(0)),
// Other aliases unchanged
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
],
},],
},
SyntaxVariable {
name: "v2".to_owned(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::terminal(1)),
ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true),
],
},],
},
]
);
}
}

View file

@ -1,6 +1,6 @@
mod expand_repeats;
mod expand_tokens;
mod extract_simple_aliases;
mod extract_default_aliases;
mod extract_tokens;
mod flatten_grammar;
mod intern_symbols;
@ -8,7 +8,7 @@ mod process_inlines;
use self::expand_repeats::expand_repeats;
pub(crate) use self::expand_tokens::expand_tokens;
use self::extract_simple_aliases::extract_simple_aliases;
use self::extract_default_aliases::extract_default_aliases;
use self::extract_tokens::extract_tokens;
use self::flatten_grammar::flatten_grammar;
use self::intern_symbols::intern_symbols;
@ -52,7 +52,7 @@ pub(crate) fn prepare_grammar(
let syntax_grammar = expand_repeats(syntax_grammar);
let mut syntax_grammar = flatten_grammar(syntax_grammar)?;
let lexical_grammar = expand_tokens(lexical_grammar)?;
let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar);
let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar);
let inlines = process_inlines(&syntax_grammar);
Ok((syntax_grammar, lexical_grammar, inlines, simple_aliases))
Ok((syntax_grammar, lexical_grammar, inlines, default_aliases))
}

View file

@ -65,7 +65,7 @@ struct Generator {
keyword_capture_token: Option<Symbol>,
syntax_grammar: SyntaxGrammar,
lexical_grammar: LexicalGrammar,
simple_aliases: AliasMap,
default_aliases: AliasMap,
symbol_order: HashMap<Symbol, usize>,
symbol_ids: HashMap<Symbol, String>,
alias_ids: HashMap<Alias, String>,
@ -143,49 +143,6 @@ impl Generator {
self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers);
}
let mut field_names = Vec::new();
for production_info in &self.parse_table.production_infos {
for field_name in production_info.field_map.keys() {
field_names.push(field_name);
}
for alias in &production_info.alias_sequence {
if let Some(alias) = &alias {
let alias_kind = alias.kind();
let matching_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| {
let (name, kind) = self.metadata_for_symbol(*symbol);
name == alias.value && kind == alias_kind
});
let alias_id = if let Some(symbol) = matching_symbol {
self.symbol_ids[&symbol].clone()
} else if alias.is_named {
format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
} else {
format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
};
self.alias_ids.entry(alias.clone()).or_insert(alias_id);
}
}
}
self.unique_aliases = self
.alias_ids
.keys()
.filter(|alias| {
self.parse_table
.symbols
.iter()
.cloned()
.find(|symbol| {
let (name, kind) = self.metadata_for_symbol(*symbol);
name == alias.value && kind == alias.kind()
})
.is_none()
})
.cloned()
.collect();
self.unique_aliases.sort_unstable();
self.symbol_map = self
.parse_table
.symbols
@ -198,10 +155,10 @@ impl Generator {
// public-facing symbol. If one of the symbols is not aliased, choose that one
// to be the public-facing symbol. Otherwise, pick the symbol with the lowest
// numeric value.
if let Some(alias) = self.simple_aliases.get(symbol) {
if let Some(alias) = self.default_aliases.get(symbol) {
let kind = alias.kind();
for other_symbol in &self.parse_table.symbols {
if let Some(other_alias) = self.simple_aliases.get(other_symbol) {
if let Some(other_alias) = self.default_aliases.get(other_symbol) {
if other_symbol < mapping && other_alias == alias {
mapping = other_symbol;
}
@ -230,13 +187,51 @@ impl Generator {
})
.collect();
field_names.sort_unstable();
field_names.dedup();
self.field_names = field_names.into_iter().cloned().collect();
for production_info in &self.parse_table.production_infos {
// Build a list of all field names
for field_name in production_info.field_map.keys() {
if let Err(i) = self.field_names.binary_search(&field_name) {
self.field_names.insert(i, field_name.clone());
}
}
// If we are opting in to the new unstable language ABI, then use the concept of
// "small parse states". Otherwise, use the same representation for all parse
// states.
for alias in &production_info.alias_sequence {
// Generate a mapping from aliases to C identifiers.
if let Some(alias) = &alias {
let existing_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| {
if let Some(default_alias) = self.default_aliases.get(symbol) {
default_alias == alias
} else {
let (name, kind) = self.metadata_for_symbol(*symbol);
name == alias.value && kind == alias.kind()
}
});
// Some aliases match an existing symbol in the grammar.
let alias_id;
if let Some(existing_symbol) = existing_symbol {
alias_id = self.symbol_ids[&self.symbol_map[&existing_symbol]].clone();
}
// Other aliases don't match any existing symbol, and need their own identifiers.
else {
if let Err(i) = self.unique_aliases.binary_search(alias) {
self.unique_aliases.insert(i, alias.clone());
}
alias_id = if alias.is_named {
format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
} else {
format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
};
}
self.alias_ids.entry(alias.clone()).or_insert(alias_id);
}
}
}
// Determine which states should use the "small state" representation, and which should
// use the normal array representation.
let threshold = cmp::min(SMALL_STATE_THRESHOLD, self.parse_table.symbols.len() / 2);
self.large_state_count = self
.parse_table
@ -361,7 +356,7 @@ impl Generator {
indent!(self);
for symbol in self.parse_table.symbols.iter() {
let name = self.sanitize_string(
self.simple_aliases
self.default_aliases
.get(symbol)
.map(|alias| alias.value.as_str())
.unwrap_or(self.metadata_for_symbol(*symbol).0),
@ -444,7 +439,7 @@ impl Generator {
for symbol in &self.parse_table.symbols {
add_line!(self, "[{}] = {{", self.symbol_ids[&symbol]);
indent!(self);
if let Some(Alias { is_named, .. }) = self.simple_aliases.get(symbol) {
if let Some(Alias { is_named, .. }) = self.default_aliases.get(symbol) {
add_line!(self, ".visible = true,");
add_line!(self, ".named = {},", is_named);
} else {
@ -519,19 +514,22 @@ impl Generator {
}
fn add_non_terminal_alias_map(&mut self) {
let mut aliases_by_symbol = HashMap::new();
let mut alias_ids_by_symbol = HashMap::new();
for variable in &self.syntax_grammar.variables {
for production in &variable.productions {
for step in &production.steps {
if let Some(alias) = &step.alias {
if step.symbol.is_non_terminal()
&& !self.simple_aliases.contains_key(&step.symbol)
&& Some(alias) != self.default_aliases.get(&step.symbol)
{
if self.symbol_ids.contains_key(&step.symbol) {
let alias_ids =
aliases_by_symbol.entry(step.symbol).or_insert(Vec::new());
if let Err(i) = alias_ids.binary_search(&alias) {
alias_ids.insert(i, alias);
if let Some(alias_id) = self.alias_ids.get(&alias) {
let alias_ids = alias_ids_by_symbol
.entry(step.symbol)
.or_insert(Vec::new());
if let Err(i) = alias_ids.binary_search(&alias_id) {
alias_ids.insert(i, alias_id);
}
}
}
}
@ -540,19 +538,19 @@ impl Generator {
}
}
let mut aliases_by_symbol = aliases_by_symbol.iter().collect::<Vec<_>>();
aliases_by_symbol.sort_unstable_by_key(|e| e.0);
let mut alias_ids_by_symbol = alias_ids_by_symbol.iter().collect::<Vec<_>>();
alias_ids_by_symbol.sort_unstable_by_key(|e| e.0);
add_line!(self, "static uint16_t ts_non_terminal_alias_map[] = {{");
indent!(self);
for (symbol, aliases) in aliases_by_symbol {
for (symbol, alias_ids) in alias_ids_by_symbol {
let symbol_id = &self.symbol_ids[symbol];
let public_symbol_id = &self.symbol_ids[&self.symbol_map[&symbol]];
add_line!(self, "{}, {},", symbol_id, 1 + aliases.len());
add_line!(self, "{}, {},", symbol_id, 1 + alias_ids.len());
indent!(self);
add_line!(self, "{},", public_symbol_id);
for alias in aliases {
add_line!(self, "{},", &self.alias_ids[&alias]);
for alias_id in alias_ids {
add_line!(self, "{},", alias_id);
}
dedent!(self);
}
@ -1545,7 +1543,7 @@ impl Generator {
/// for keyword capture, if any.
/// * `syntax_grammar` - The syntax grammar extracted from the language's grammar
/// * `lexical_grammar` - The lexical grammar extracted from the language's grammar
/// * `simple_aliases` - A map describing the global rename rules that should apply.
/// * `default_aliases` - A map describing the global rename rules that should apply.
/// the keys are symbols that are *always* aliased in the same way, and the values
/// are the aliases that are applied to those symbols.
/// * `next_abi` - A boolean indicating whether to opt into the new, unstable parse
@ -1558,7 +1556,7 @@ pub(crate) fn render_c_code(
keyword_capture_token: Option<Symbol>,
syntax_grammar: SyntaxGrammar,
lexical_grammar: LexicalGrammar,
simple_aliases: AliasMap,
default_aliases: AliasMap,
next_abi: bool,
) -> String {
Generator {
@ -1572,7 +1570,7 @@ pub(crate) fn render_c_code(
keyword_capture_token,
syntax_grammar,
lexical_grammar,
simple_aliases,
default_aliases,
symbol_ids: HashMap::new(),
symbol_order: HashMap::new(),
alias_ids: HashMap::new(),

View file

@ -367,6 +367,30 @@ fn test_query_errors_on_impossible_patterns() {
});
}
#[test]
fn test_query_verifies_possible_patterns_with_aliased_parent_nodes() {
allocations::record(|| {
let ruby = get_language("ruby");
Query::new(ruby, "(destructured_parameter (identifier))").unwrap();
assert_eq!(
Query::new(ruby, "(destructured_parameter (string))",),
Err(QueryError {
kind: QueryErrorKind::Structure,
row: 0,
offset: 24,
column: 24,
message: [
"(destructured_parameter (string))", //
" ^",
]
.join("\n")
})
);
});
}
#[test]
fn test_query_matches_with_simple_pattern() {
allocations::record(|| {
@ -1451,6 +1475,7 @@ fn test_query_matches_with_anonymous_tokens() {
r#"
";" @punctuation
"&&" @operator
"\"" @quote
"#,
)
.unwrap();
@ -1458,9 +1483,11 @@ fn test_query_matches_with_anonymous_tokens() {
assert_query_matches(
language,
&query,
"foo(a && b);",
r#"foo(a && "b");"#,
&[
(1, vec![("operator", "&&")]),
(2, vec![("quote", "\"")]),
(2, vec![("quote", "\"")]),
(0, vec![("punctuation", ";")]),
],
);
@ -1808,6 +1835,33 @@ fn test_query_matches_with_no_captures() {
});
}
#[test]
fn test_query_matches_with_repeated_fields() {
allocations::record(|| {
let language = get_language("c");
let query = Query::new(
language,
"(field_declaration declarator: (field_identifier) @field)",
)
.unwrap();
assert_query_matches(
language,
&query,
"
struct S {
int a, b, c;
}
",
&[
(0, vec![("field", "a")]),
(0, vec![("field", "b")]),
(0, vec![("field", "c")]),
],
);
});
}
#[test]
fn test_query_captures_basic() {
allocations::record(|| {