Merge remote-tracking branch 'origin/master' into query-testy
This commit is contained in:
commit
c9c886d971
19 changed files with 721 additions and 459 deletions
|
|
@ -146,7 +146,7 @@ impl ChildQuantity {
|
|||
pub(crate) fn get_variable_info(
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
simple_aliases: &AliasMap,
|
||||
default_aliases: &AliasMap,
|
||||
) -> Result<Vec<VariableInfo>> {
|
||||
let child_type_is_visible = |t: &ChildType| {
|
||||
variable_type_for_child_type(t, syntax_grammar, lexical_grammar) >= VariableType::Anonymous
|
||||
|
|
@ -185,7 +185,7 @@ pub(crate) fn get_variable_info(
|
|||
let child_symbol = step.symbol;
|
||||
let child_type = if let Some(alias) = &step.alias {
|
||||
ChildType::Aliased(alias.clone())
|
||||
} else if let Some(alias) = simple_aliases.get(&step.symbol) {
|
||||
} else if let Some(alias) = default_aliases.get(&step.symbol) {
|
||||
ChildType::Aliased(alias.clone())
|
||||
} else {
|
||||
ChildType::Normal(child_symbol)
|
||||
|
|
@ -358,7 +358,7 @@ pub(crate) fn get_variable_info(
|
|||
pub(crate) fn generate_node_types_json(
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
simple_aliases: &AliasMap,
|
||||
default_aliases: &AliasMap,
|
||||
variable_info: &Vec<VariableInfo>,
|
||||
) -> Vec<NodeInfoJSON> {
|
||||
let mut node_types_json = BTreeMap::new();
|
||||
|
|
@ -369,7 +369,7 @@ pub(crate) fn generate_node_types_json(
|
|||
named: alias.is_named,
|
||||
},
|
||||
ChildType::Normal(symbol) => {
|
||||
if let Some(alias) = simple_aliases.get(&symbol) {
|
||||
if let Some(alias) = default_aliases.get(&symbol) {
|
||||
NodeTypeJSON {
|
||||
kind: alias.value.clone(),
|
||||
named: alias.is_named,
|
||||
|
|
@ -417,22 +417,33 @@ pub(crate) fn generate_node_types_json(
|
|||
};
|
||||
|
||||
let mut aliases_by_symbol = HashMap::new();
|
||||
for (symbol, alias) in simple_aliases {
|
||||
for (symbol, alias) in default_aliases {
|
||||
aliases_by_symbol.insert(*symbol, {
|
||||
let mut aliases = HashSet::new();
|
||||
aliases.insert(Some(alias.clone()));
|
||||
aliases
|
||||
});
|
||||
}
|
||||
for extra_symbol in &syntax_grammar.extra_symbols {
|
||||
if !default_aliases.contains_key(extra_symbol) {
|
||||
aliases_by_symbol
|
||||
.entry(*extra_symbol)
|
||||
.or_insert(HashSet::new())
|
||||
.insert(None);
|
||||
}
|
||||
}
|
||||
for variable in &syntax_grammar.variables {
|
||||
for production in &variable.productions {
|
||||
for step in &production.steps {
|
||||
if !simple_aliases.contains_key(&step.symbol) {
|
||||
aliases_by_symbol
|
||||
.entry(step.symbol)
|
||||
.or_insert(HashSet::new())
|
||||
.insert(step.alias.clone());
|
||||
}
|
||||
aliases_by_symbol
|
||||
.entry(step.symbol)
|
||||
.or_insert(HashSet::new())
|
||||
.insert(
|
||||
step.alias
|
||||
.as_ref()
|
||||
.or_else(|| default_aliases.get(&step.symbol))
|
||||
.cloned(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -722,9 +733,18 @@ mod tests {
|
|||
kind: VariableType::Named,
|
||||
rule: Rule::string("x"),
|
||||
},
|
||||
// This rule is not reachable from the start symbol
|
||||
// so it won't be present in the node_types
|
||||
Variable {
|
||||
name: "v3".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::string("y"),
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
assert_eq!(node_types.len(), 3);
|
||||
|
||||
assert_eq!(
|
||||
node_types[0],
|
||||
NodeInfoJSON {
|
||||
|
|
@ -784,6 +804,112 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_node_types_simple_extras() {
|
||||
let node_types = get_node_types(InputGrammar {
|
||||
name: String::new(),
|
||||
extra_symbols: vec![Rule::named("v3")],
|
||||
external_tokens: Vec::new(),
|
||||
expected_conflicts: Vec::new(),
|
||||
variables_to_inline: Vec::new(),
|
||||
word_token: None,
|
||||
supertype_symbols: vec![],
|
||||
variables: vec![
|
||||
Variable {
|
||||
name: "v1".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::seq(vec![
|
||||
Rule::field("f1".to_string(), Rule::named("v2")),
|
||||
Rule::field("f2".to_string(), Rule::string(";")),
|
||||
]),
|
||||
},
|
||||
Variable {
|
||||
name: "v2".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::string("x"),
|
||||
},
|
||||
// This rule is not reachable from the start symbol, but
|
||||
// it is reachable from the 'extra_symbols' so it
|
||||
// should be present in the node_types
|
||||
Variable {
|
||||
name: "v3".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::string("y"),
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
assert_eq!(node_types.len(), 4);
|
||||
|
||||
assert_eq!(
|
||||
node_types[0],
|
||||
NodeInfoJSON {
|
||||
kind: "v1".to_string(),
|
||||
named: true,
|
||||
subtypes: None,
|
||||
children: None,
|
||||
fields: Some(
|
||||
vec![
|
||||
(
|
||||
"f1".to_string(),
|
||||
FieldInfoJSON {
|
||||
multiple: false,
|
||||
required: true,
|
||||
types: vec![NodeTypeJSON {
|
||||
kind: "v2".to_string(),
|
||||
named: true,
|
||||
}]
|
||||
}
|
||||
),
|
||||
(
|
||||
"f2".to_string(),
|
||||
FieldInfoJSON {
|
||||
multiple: false,
|
||||
required: true,
|
||||
types: vec![NodeTypeJSON {
|
||||
kind: ";".to_string(),
|
||||
named: false,
|
||||
}]
|
||||
}
|
||||
),
|
||||
]
|
||||
.into_iter()
|
||||
.collect()
|
||||
)
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
node_types[1],
|
||||
NodeInfoJSON {
|
||||
kind: ";".to_string(),
|
||||
named: false,
|
||||
subtypes: None,
|
||||
children: None,
|
||||
fields: None
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
node_types[2],
|
||||
NodeInfoJSON {
|
||||
kind: "v2".to_string(),
|
||||
named: true,
|
||||
subtypes: None,
|
||||
children: None,
|
||||
fields: None
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
node_types[3],
|
||||
NodeInfoJSON {
|
||||
kind: "v3".to_string(),
|
||||
named: true,
|
||||
subtypes: None,
|
||||
children: None,
|
||||
fields: None
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_node_types_with_supertypes() {
|
||||
let node_types = get_node_types(InputGrammar {
|
||||
|
|
@ -1685,14 +1811,14 @@ mod tests {
|
|||
}
|
||||
|
||||
fn get_node_types(grammar: InputGrammar) -> Vec<NodeInfoJSON> {
|
||||
let (syntax_grammar, lexical_grammar, _, simple_aliases) =
|
||||
let (syntax_grammar, lexical_grammar, _, default_aliases) =
|
||||
prepare_grammar(&grammar).unwrap();
|
||||
let variable_info =
|
||||
get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases).unwrap();
|
||||
get_variable_info(&syntax_grammar, &lexical_grammar, &default_aliases).unwrap();
|
||||
generate_node_types_json(
|
||||
&syntax_grammar,
|
||||
&lexical_grammar,
|
||||
&simple_aliases,
|
||||
&default_aliases,
|
||||
&variable_info,
|
||||
)
|
||||
}
|
||||
|
|
|
|||
293
cli/src/generate/prepare_grammar/extract_default_aliases.rs
Normal file
293
cli/src/generate/prepare_grammar/extract_default_aliases.rs
Normal file
|
|
@ -0,0 +1,293 @@
|
|||
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
|
||||
use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType};
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
struct SymbolStatus {
|
||||
aliases: Vec<(Alias, usize)>,
|
||||
appears_unaliased: bool,
|
||||
}
|
||||
|
||||
// Update the grammar by finding symbols that always are aliased, and for each such symbol,
|
||||
// promoting one of its aliases to a "default alias", which is applied globally instead
|
||||
// of in a context-specific way.
|
||||
//
|
||||
// This has two benefits:
|
||||
// * It reduces the overhead of storing production-specific alias info in the parse table.
|
||||
// * Within an `ERROR` node, no context-specific aliases will be applied. This transformation
|
||||
// ensures that the children of an `ERROR` node have symbols that are consistent with the
|
||||
// way that they would appear in a valid syntax tree.
|
||||
pub(super) fn extract_default_aliases(
|
||||
syntax_grammar: &mut SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
) -> AliasMap {
|
||||
let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()];
|
||||
let mut non_terminal_status_list =
|
||||
vec![SymbolStatus::default(); syntax_grammar.variables.len()];
|
||||
let mut external_status_list =
|
||||
vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()];
|
||||
|
||||
// For each grammar symbol, find all of the aliases under which the symbol appears,
|
||||
// and determine whether or not the symbol ever appears *unaliased*.
|
||||
for variable in syntax_grammar.variables.iter() {
|
||||
for production in variable.productions.iter() {
|
||||
for step in production.steps.iter() {
|
||||
let mut status = match step.symbol.kind {
|
||||
SymbolType::External => &mut external_status_list[step.symbol.index],
|
||||
SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index],
|
||||
SymbolType::Terminal => &mut terminal_status_list[step.symbol.index],
|
||||
SymbolType::End => panic!("Unexpected end token"),
|
||||
};
|
||||
|
||||
// Default aliases don't work for inlined variables.
|
||||
if syntax_grammar.variables_to_inline.contains(&step.symbol) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(alias) = &step.alias {
|
||||
if let Some(count_for_alias) = status
|
||||
.aliases
|
||||
.iter_mut()
|
||||
.find_map(|(a, count)| if a == alias { Some(count) } else { None })
|
||||
{
|
||||
*count_for_alias += 1;
|
||||
} else {
|
||||
status.aliases.push((alias.clone(), 1));
|
||||
}
|
||||
} else {
|
||||
status.appears_unaliased = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let symbols_with_statuses = (terminal_status_list
|
||||
.iter_mut()
|
||||
.enumerate()
|
||||
.map(|(i, status)| (Symbol::terminal(i), status)))
|
||||
.chain(
|
||||
non_terminal_status_list
|
||||
.iter_mut()
|
||||
.enumerate()
|
||||
.map(|(i, status)| (Symbol::non_terminal(i), status)),
|
||||
)
|
||||
.chain(
|
||||
external_status_list
|
||||
.iter_mut()
|
||||
.enumerate()
|
||||
.map(|(i, status)| (Symbol::external(i), status)),
|
||||
);
|
||||
|
||||
// For each symbol that always appears aliased, find the alias the occurs most often,
|
||||
// and designate that alias as the symbol's "default alias". Store all of these
|
||||
// default aliases in a map that will be returned.
|
||||
let mut result = AliasMap::new();
|
||||
for (symbol, status) in symbols_with_statuses {
|
||||
if status.appears_unaliased {
|
||||
status.aliases.clear();
|
||||
} else {
|
||||
if let Some(default_entry) = status
|
||||
.aliases
|
||||
.iter()
|
||||
.enumerate()
|
||||
.max_by_key(|(i, (_, count))| (count, -(*i as i64)))
|
||||
.map(|(_, entry)| entry.clone())
|
||||
{
|
||||
status.aliases.clear();
|
||||
status.aliases.push(default_entry.clone());
|
||||
result.insert(symbol, default_entry.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Wherever a symbol is aliased as its default alias, remove the usage of the alias,
|
||||
// because it will now be redundant.
|
||||
let mut alias_positions_to_clear = Vec::new();
|
||||
for variable in syntax_grammar.variables.iter_mut() {
|
||||
alias_positions_to_clear.clear();
|
||||
|
||||
for (i, production) in variable.productions.iter().enumerate() {
|
||||
for (j, step) in production.steps.iter().enumerate() {
|
||||
let status = match step.symbol.kind {
|
||||
SymbolType::External => &mut external_status_list[step.symbol.index],
|
||||
SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index],
|
||||
SymbolType::Terminal => &mut terminal_status_list[step.symbol.index],
|
||||
SymbolType::End => panic!("Unexpected end token"),
|
||||
};
|
||||
|
||||
// If this step is aliased as the symbol's default alias, then remove that alias.
|
||||
if step.alias.is_some()
|
||||
&& step.alias.as_ref() == status.aliases.get(0).map(|t| &t.0)
|
||||
{
|
||||
let mut other_productions_must_use_this_alias_at_this_index = false;
|
||||
for (other_i, other_production) in variable.productions.iter().enumerate() {
|
||||
if other_i != i
|
||||
&& other_production.steps.len() > j
|
||||
&& other_production.steps[j].alias == step.alias
|
||||
&& result.get(&other_production.steps[j].symbol) != step.alias.as_ref()
|
||||
{
|
||||
other_productions_must_use_this_alias_at_this_index = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if !other_productions_must_use_this_alias_at_this_index {
|
||||
alias_positions_to_clear.push((i, j));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (production_index, step_index) in &alias_positions_to_clear {
|
||||
variable.productions[*production_index].steps[*step_index].alias = None;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::generate::grammars::{
|
||||
LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType,
|
||||
};
|
||||
use crate::generate::nfa::Nfa;
|
||||
|
||||
#[test]
|
||||
fn test_extract_simple_aliases() {
|
||||
let mut syntax_grammar = SyntaxGrammar {
|
||||
variables: vec![
|
||||
SyntaxVariable {
|
||||
name: "v1".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
|
||||
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
|
||||
ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true),
|
||||
],
|
||||
}],
|
||||
},
|
||||
SyntaxVariable {
|
||||
name: "v2".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
// Token 0 is always aliased as "a1".
|
||||
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
|
||||
// Token 1 is aliased within rule `v1` above, but not here.
|
||||
ProductionStep::new(Symbol::terminal(1)),
|
||||
// Token 2 is aliased differently here than in `v1`. The alias from
|
||||
// `v1` should be promoted to the default alias, because `v1` appears
|
||||
// first in the grammar.
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true),
|
||||
// Token 3 is also aliased differently here than in `v1`. In this case,
|
||||
// this alias should be promoted to the default alias, because it is
|
||||
// used a greater number of times (twice).
|
||||
ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true),
|
||||
ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true),
|
||||
],
|
||||
}],
|
||||
},
|
||||
],
|
||||
extra_symbols: Vec::new(),
|
||||
expected_conflicts: Vec::new(),
|
||||
variables_to_inline: Vec::new(),
|
||||
supertype_symbols: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
word_token: None,
|
||||
};
|
||||
|
||||
let lexical_grammar = LexicalGrammar {
|
||||
nfa: Nfa::new(),
|
||||
variables: vec![
|
||||
LexicalVariable {
|
||||
name: "t0".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
LexicalVariable {
|
||||
name: "t1".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
LexicalVariable {
|
||||
name: "t2".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
LexicalVariable {
|
||||
name: "t3".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar);
|
||||
assert_eq!(default_aliases.len(), 3);
|
||||
|
||||
assert_eq!(
|
||||
default_aliases.get(&Symbol::terminal(0)),
|
||||
Some(&Alias {
|
||||
value: "a1".to_string(),
|
||||
is_named: true,
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
default_aliases.get(&Symbol::terminal(2)),
|
||||
Some(&Alias {
|
||||
value: "a3".to_string(),
|
||||
is_named: true,
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
default_aliases.get(&Symbol::terminal(3)),
|
||||
Some(&Alias {
|
||||
value: "a6".to_string(),
|
||||
is_named: true,
|
||||
})
|
||||
);
|
||||
assert_eq!(default_aliases.get(&Symbol::terminal(1)), None);
|
||||
|
||||
assert_eq!(
|
||||
syntax_grammar.variables,
|
||||
vec![
|
||||
SyntaxVariable {
|
||||
name: "v1".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
|
||||
ProductionStep::new(Symbol::terminal(2)),
|
||||
ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true),
|
||||
],
|
||||
},],
|
||||
},
|
||||
SyntaxVariable {
|
||||
name: "v2".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::terminal(1)),
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true),
|
||||
ProductionStep::new(Symbol::terminal(3)),
|
||||
ProductionStep::new(Symbol::terminal(3)),
|
||||
],
|
||||
},],
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,223 +0,0 @@
|
|||
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
|
||||
use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType};
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
struct SymbolStatus {
|
||||
alias: Option<Alias>,
|
||||
conflicting: bool,
|
||||
}
|
||||
|
||||
pub(super) fn extract_simple_aliases(
|
||||
syntax_grammar: &mut SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
) -> AliasMap {
|
||||
// Determine which symbols in the grammars are *always* aliased to a single name.
|
||||
let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()];
|
||||
let mut non_terminal_status_list =
|
||||
vec![SymbolStatus::default(); syntax_grammar.variables.len()];
|
||||
let mut external_status_list =
|
||||
vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()];
|
||||
for variable in syntax_grammar.variables.iter() {
|
||||
for production in variable.productions.iter() {
|
||||
for step in production.steps.iter() {
|
||||
let mut status = match step.symbol {
|
||||
Symbol {
|
||||
kind: SymbolType::External,
|
||||
index,
|
||||
} => &mut external_status_list[index],
|
||||
Symbol {
|
||||
kind: SymbolType::NonTerminal,
|
||||
index,
|
||||
} => &mut non_terminal_status_list[index],
|
||||
Symbol {
|
||||
kind: SymbolType::Terminal,
|
||||
index,
|
||||
} => &mut terminal_status_list[index],
|
||||
Symbol {
|
||||
kind: SymbolType::End,
|
||||
..
|
||||
} => panic!("Unexpected end token"),
|
||||
};
|
||||
|
||||
if step.alias.is_none() {
|
||||
status.alias = None;
|
||||
status.conflicting = true;
|
||||
}
|
||||
|
||||
if !status.conflicting {
|
||||
if status.alias.is_none() {
|
||||
status.alias = step.alias.clone();
|
||||
} else if status.alias != step.alias {
|
||||
status.alias = None;
|
||||
status.conflicting = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove the aliases for those symbols.
|
||||
for variable in syntax_grammar.variables.iter_mut() {
|
||||
for production in variable.productions.iter_mut() {
|
||||
for step in production.steps.iter_mut() {
|
||||
let status = match step.symbol {
|
||||
Symbol {
|
||||
kind: SymbolType::External,
|
||||
index,
|
||||
} => &external_status_list[index],
|
||||
Symbol {
|
||||
kind: SymbolType::NonTerminal,
|
||||
index,
|
||||
} => &non_terminal_status_list[index],
|
||||
Symbol {
|
||||
kind: SymbolType::Terminal,
|
||||
index,
|
||||
} => &terminal_status_list[index],
|
||||
Symbol {
|
||||
kind: SymbolType::End,
|
||||
..
|
||||
} => panic!("Unexpected end token"),
|
||||
};
|
||||
|
||||
if status.alias.is_some() {
|
||||
step.alias = None;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Populate a map of the symbols to their aliases.
|
||||
let mut result = AliasMap::new();
|
||||
for (i, status) in terminal_status_list.into_iter().enumerate() {
|
||||
if let Some(alias) = status.alias {
|
||||
result.insert(Symbol::terminal(i), alias);
|
||||
}
|
||||
}
|
||||
for (i, status) in non_terminal_status_list.into_iter().enumerate() {
|
||||
if let Some(alias) = status.alias {
|
||||
result.insert(Symbol::non_terminal(i), alias);
|
||||
}
|
||||
}
|
||||
for (i, status) in external_status_list.into_iter().enumerate() {
|
||||
if let Some(alias) = status.alias {
|
||||
result.insert(Symbol::external(i), alias);
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::generate::grammars::{
|
||||
LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType,
|
||||
};
|
||||
use crate::generate::nfa::Nfa;
|
||||
|
||||
#[test]
|
||||
fn test_extract_simple_aliases() {
|
||||
let mut syntax_grammar = SyntaxGrammar {
|
||||
variables: vec![
|
||||
SyntaxVariable {
|
||||
name: "v1".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
|
||||
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
|
||||
],
|
||||
}],
|
||||
},
|
||||
SyntaxVariable {
|
||||
name: "v2".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
// Token 0 is always aliased as "a1".
|
||||
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
|
||||
// Token 1 is aliased above, but not here.
|
||||
ProductionStep::new(Symbol::terminal(1)),
|
||||
// Token 2 is aliased differently than above.
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true),
|
||||
],
|
||||
}],
|
||||
},
|
||||
],
|
||||
extra_symbols: Vec::new(),
|
||||
expected_conflicts: Vec::new(),
|
||||
variables_to_inline: Vec::new(),
|
||||
supertype_symbols: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
word_token: None,
|
||||
};
|
||||
|
||||
let lexical_grammar = LexicalGrammar {
|
||||
nfa: Nfa::new(),
|
||||
variables: vec![
|
||||
LexicalVariable {
|
||||
name: "t1".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
LexicalVariable {
|
||||
name: "t2".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
LexicalVariable {
|
||||
name: "t3".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar);
|
||||
assert_eq!(simple_aliases.len(), 1);
|
||||
assert_eq!(
|
||||
simple_aliases[&Symbol::terminal(0)],
|
||||
Alias {
|
||||
value: "a1".to_string(),
|
||||
is_named: true,
|
||||
}
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
syntax_grammar.variables,
|
||||
vec![
|
||||
SyntaxVariable {
|
||||
name: "v1".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
// 'Simple' alias removed
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
// Other aliases unchanged
|
||||
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
|
||||
],
|
||||
},],
|
||||
},
|
||||
SyntaxVariable {
|
||||
name: "v2".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::terminal(1)),
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true),
|
||||
],
|
||||
},],
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
mod expand_repeats;
|
||||
mod expand_tokens;
|
||||
mod extract_simple_aliases;
|
||||
mod extract_default_aliases;
|
||||
mod extract_tokens;
|
||||
mod flatten_grammar;
|
||||
mod intern_symbols;
|
||||
|
|
@ -8,7 +8,7 @@ mod process_inlines;
|
|||
|
||||
use self::expand_repeats::expand_repeats;
|
||||
pub(crate) use self::expand_tokens::expand_tokens;
|
||||
use self::extract_simple_aliases::extract_simple_aliases;
|
||||
use self::extract_default_aliases::extract_default_aliases;
|
||||
use self::extract_tokens::extract_tokens;
|
||||
use self::flatten_grammar::flatten_grammar;
|
||||
use self::intern_symbols::intern_symbols;
|
||||
|
|
@ -52,7 +52,7 @@ pub(crate) fn prepare_grammar(
|
|||
let syntax_grammar = expand_repeats(syntax_grammar);
|
||||
let mut syntax_grammar = flatten_grammar(syntax_grammar)?;
|
||||
let lexical_grammar = expand_tokens(lexical_grammar)?;
|
||||
let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar);
|
||||
let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar);
|
||||
let inlines = process_inlines(&syntax_grammar);
|
||||
Ok((syntax_grammar, lexical_grammar, inlines, simple_aliases))
|
||||
Ok((syntax_grammar, lexical_grammar, inlines, default_aliases))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ struct Generator {
|
|||
keyword_capture_token: Option<Symbol>,
|
||||
syntax_grammar: SyntaxGrammar,
|
||||
lexical_grammar: LexicalGrammar,
|
||||
simple_aliases: AliasMap,
|
||||
default_aliases: AliasMap,
|
||||
symbol_order: HashMap<Symbol, usize>,
|
||||
symbol_ids: HashMap<Symbol, String>,
|
||||
alias_ids: HashMap<Alias, String>,
|
||||
|
|
@ -143,49 +143,6 @@ impl Generator {
|
|||
self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers);
|
||||
}
|
||||
|
||||
let mut field_names = Vec::new();
|
||||
for production_info in &self.parse_table.production_infos {
|
||||
for field_name in production_info.field_map.keys() {
|
||||
field_names.push(field_name);
|
||||
}
|
||||
|
||||
for alias in &production_info.alias_sequence {
|
||||
if let Some(alias) = &alias {
|
||||
let alias_kind = alias.kind();
|
||||
let matching_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| {
|
||||
let (name, kind) = self.metadata_for_symbol(*symbol);
|
||||
name == alias.value && kind == alias_kind
|
||||
});
|
||||
let alias_id = if let Some(symbol) = matching_symbol {
|
||||
self.symbol_ids[&symbol].clone()
|
||||
} else if alias.is_named {
|
||||
format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
|
||||
} else {
|
||||
format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
|
||||
};
|
||||
self.alias_ids.entry(alias.clone()).or_insert(alias_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.unique_aliases = self
|
||||
.alias_ids
|
||||
.keys()
|
||||
.filter(|alias| {
|
||||
self.parse_table
|
||||
.symbols
|
||||
.iter()
|
||||
.cloned()
|
||||
.find(|symbol| {
|
||||
let (name, kind) = self.metadata_for_symbol(*symbol);
|
||||
name == alias.value && kind == alias.kind()
|
||||
})
|
||||
.is_none()
|
||||
})
|
||||
.cloned()
|
||||
.collect();
|
||||
self.unique_aliases.sort_unstable();
|
||||
|
||||
self.symbol_map = self
|
||||
.parse_table
|
||||
.symbols
|
||||
|
|
@ -198,10 +155,10 @@ impl Generator {
|
|||
// public-facing symbol. If one of the symbols is not aliased, choose that one
|
||||
// to be the public-facing symbol. Otherwise, pick the symbol with the lowest
|
||||
// numeric value.
|
||||
if let Some(alias) = self.simple_aliases.get(symbol) {
|
||||
if let Some(alias) = self.default_aliases.get(symbol) {
|
||||
let kind = alias.kind();
|
||||
for other_symbol in &self.parse_table.symbols {
|
||||
if let Some(other_alias) = self.simple_aliases.get(other_symbol) {
|
||||
if let Some(other_alias) = self.default_aliases.get(other_symbol) {
|
||||
if other_symbol < mapping && other_alias == alias {
|
||||
mapping = other_symbol;
|
||||
}
|
||||
|
|
@ -230,13 +187,51 @@ impl Generator {
|
|||
})
|
||||
.collect();
|
||||
|
||||
field_names.sort_unstable();
|
||||
field_names.dedup();
|
||||
self.field_names = field_names.into_iter().cloned().collect();
|
||||
for production_info in &self.parse_table.production_infos {
|
||||
// Build a list of all field names
|
||||
for field_name in production_info.field_map.keys() {
|
||||
if let Err(i) = self.field_names.binary_search(&field_name) {
|
||||
self.field_names.insert(i, field_name.clone());
|
||||
}
|
||||
}
|
||||
|
||||
// If we are opting in to the new unstable language ABI, then use the concept of
|
||||
// "small parse states". Otherwise, use the same representation for all parse
|
||||
// states.
|
||||
for alias in &production_info.alias_sequence {
|
||||
// Generate a mapping from aliases to C identifiers.
|
||||
if let Some(alias) = &alias {
|
||||
let existing_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| {
|
||||
if let Some(default_alias) = self.default_aliases.get(symbol) {
|
||||
default_alias == alias
|
||||
} else {
|
||||
let (name, kind) = self.metadata_for_symbol(*symbol);
|
||||
name == alias.value && kind == alias.kind()
|
||||
}
|
||||
});
|
||||
|
||||
// Some aliases match an existing symbol in the grammar.
|
||||
let alias_id;
|
||||
if let Some(existing_symbol) = existing_symbol {
|
||||
alias_id = self.symbol_ids[&self.symbol_map[&existing_symbol]].clone();
|
||||
}
|
||||
// Other aliases don't match any existing symbol, and need their own identifiers.
|
||||
else {
|
||||
if let Err(i) = self.unique_aliases.binary_search(alias) {
|
||||
self.unique_aliases.insert(i, alias.clone());
|
||||
}
|
||||
|
||||
alias_id = if alias.is_named {
|
||||
format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
|
||||
} else {
|
||||
format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
|
||||
};
|
||||
}
|
||||
|
||||
self.alias_ids.entry(alias.clone()).or_insert(alias_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Determine which states should use the "small state" representation, and which should
|
||||
// use the normal array representation.
|
||||
let threshold = cmp::min(SMALL_STATE_THRESHOLD, self.parse_table.symbols.len() / 2);
|
||||
self.large_state_count = self
|
||||
.parse_table
|
||||
|
|
@ -361,7 +356,7 @@ impl Generator {
|
|||
indent!(self);
|
||||
for symbol in self.parse_table.symbols.iter() {
|
||||
let name = self.sanitize_string(
|
||||
self.simple_aliases
|
||||
self.default_aliases
|
||||
.get(symbol)
|
||||
.map(|alias| alias.value.as_str())
|
||||
.unwrap_or(self.metadata_for_symbol(*symbol).0),
|
||||
|
|
@ -444,7 +439,7 @@ impl Generator {
|
|||
for symbol in &self.parse_table.symbols {
|
||||
add_line!(self, "[{}] = {{", self.symbol_ids[&symbol]);
|
||||
indent!(self);
|
||||
if let Some(Alias { is_named, .. }) = self.simple_aliases.get(symbol) {
|
||||
if let Some(Alias { is_named, .. }) = self.default_aliases.get(symbol) {
|
||||
add_line!(self, ".visible = true,");
|
||||
add_line!(self, ".named = {},", is_named);
|
||||
} else {
|
||||
|
|
@ -519,19 +514,22 @@ impl Generator {
|
|||
}
|
||||
|
||||
fn add_non_terminal_alias_map(&mut self) {
|
||||
let mut aliases_by_symbol = HashMap::new();
|
||||
let mut alias_ids_by_symbol = HashMap::new();
|
||||
for variable in &self.syntax_grammar.variables {
|
||||
for production in &variable.productions {
|
||||
for step in &production.steps {
|
||||
if let Some(alias) = &step.alias {
|
||||
if step.symbol.is_non_terminal()
|
||||
&& !self.simple_aliases.contains_key(&step.symbol)
|
||||
&& Some(alias) != self.default_aliases.get(&step.symbol)
|
||||
{
|
||||
if self.symbol_ids.contains_key(&step.symbol) {
|
||||
let alias_ids =
|
||||
aliases_by_symbol.entry(step.symbol).or_insert(Vec::new());
|
||||
if let Err(i) = alias_ids.binary_search(&alias) {
|
||||
alias_ids.insert(i, alias);
|
||||
if let Some(alias_id) = self.alias_ids.get(&alias) {
|
||||
let alias_ids = alias_ids_by_symbol
|
||||
.entry(step.symbol)
|
||||
.or_insert(Vec::new());
|
||||
if let Err(i) = alias_ids.binary_search(&alias_id) {
|
||||
alias_ids.insert(i, alias_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -540,19 +538,19 @@ impl Generator {
|
|||
}
|
||||
}
|
||||
|
||||
let mut aliases_by_symbol = aliases_by_symbol.iter().collect::<Vec<_>>();
|
||||
aliases_by_symbol.sort_unstable_by_key(|e| e.0);
|
||||
let mut alias_ids_by_symbol = alias_ids_by_symbol.iter().collect::<Vec<_>>();
|
||||
alias_ids_by_symbol.sort_unstable_by_key(|e| e.0);
|
||||
|
||||
add_line!(self, "static uint16_t ts_non_terminal_alias_map[] = {{");
|
||||
indent!(self);
|
||||
for (symbol, aliases) in aliases_by_symbol {
|
||||
for (symbol, alias_ids) in alias_ids_by_symbol {
|
||||
let symbol_id = &self.symbol_ids[symbol];
|
||||
let public_symbol_id = &self.symbol_ids[&self.symbol_map[&symbol]];
|
||||
add_line!(self, "{}, {},", symbol_id, 1 + aliases.len());
|
||||
add_line!(self, "{}, {},", symbol_id, 1 + alias_ids.len());
|
||||
indent!(self);
|
||||
add_line!(self, "{},", public_symbol_id);
|
||||
for alias in aliases {
|
||||
add_line!(self, "{},", &self.alias_ids[&alias]);
|
||||
for alias_id in alias_ids {
|
||||
add_line!(self, "{},", alias_id);
|
||||
}
|
||||
dedent!(self);
|
||||
}
|
||||
|
|
@ -1545,7 +1543,7 @@ impl Generator {
|
|||
/// for keyword capture, if any.
|
||||
/// * `syntax_grammar` - The syntax grammar extracted from the language's grammar
|
||||
/// * `lexical_grammar` - The lexical grammar extracted from the language's grammar
|
||||
/// * `simple_aliases` - A map describing the global rename rules that should apply.
|
||||
/// * `default_aliases` - A map describing the global rename rules that should apply.
|
||||
/// the keys are symbols that are *always* aliased in the same way, and the values
|
||||
/// are the aliases that are applied to those symbols.
|
||||
/// * `next_abi` - A boolean indicating whether to opt into the new, unstable parse
|
||||
|
|
@ -1558,7 +1556,7 @@ pub(crate) fn render_c_code(
|
|||
keyword_capture_token: Option<Symbol>,
|
||||
syntax_grammar: SyntaxGrammar,
|
||||
lexical_grammar: LexicalGrammar,
|
||||
simple_aliases: AliasMap,
|
||||
default_aliases: AliasMap,
|
||||
next_abi: bool,
|
||||
) -> String {
|
||||
Generator {
|
||||
|
|
@ -1572,7 +1570,7 @@ pub(crate) fn render_c_code(
|
|||
keyword_capture_token,
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
simple_aliases,
|
||||
default_aliases,
|
||||
symbol_ids: HashMap::new(),
|
||||
symbol_order: HashMap::new(),
|
||||
alias_ids: HashMap::new(),
|
||||
|
|
|
|||
|
|
@ -367,6 +367,30 @@ fn test_query_errors_on_impossible_patterns() {
|
|||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_verifies_possible_patterns_with_aliased_parent_nodes() {
|
||||
allocations::record(|| {
|
||||
let ruby = get_language("ruby");
|
||||
|
||||
Query::new(ruby, "(destructured_parameter (identifier))").unwrap();
|
||||
|
||||
assert_eq!(
|
||||
Query::new(ruby, "(destructured_parameter (string))",),
|
||||
Err(QueryError {
|
||||
kind: QueryErrorKind::Structure,
|
||||
row: 0,
|
||||
offset: 24,
|
||||
column: 24,
|
||||
message: [
|
||||
"(destructured_parameter (string))", //
|
||||
" ^",
|
||||
]
|
||||
.join("\n")
|
||||
})
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_matches_with_simple_pattern() {
|
||||
allocations::record(|| {
|
||||
|
|
@ -1451,6 +1475,7 @@ fn test_query_matches_with_anonymous_tokens() {
|
|||
r#"
|
||||
";" @punctuation
|
||||
"&&" @operator
|
||||
"\"" @quote
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
|
@ -1458,9 +1483,11 @@ fn test_query_matches_with_anonymous_tokens() {
|
|||
assert_query_matches(
|
||||
language,
|
||||
&query,
|
||||
"foo(a && b);",
|
||||
r#"foo(a && "b");"#,
|
||||
&[
|
||||
(1, vec![("operator", "&&")]),
|
||||
(2, vec![("quote", "\"")]),
|
||||
(2, vec![("quote", "\"")]),
|
||||
(0, vec![("punctuation", ";")]),
|
||||
],
|
||||
);
|
||||
|
|
@ -1808,6 +1835,33 @@ fn test_query_matches_with_no_captures() {
|
|||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_matches_with_repeated_fields() {
|
||||
allocations::record(|| {
|
||||
let language = get_language("c");
|
||||
let query = Query::new(
|
||||
language,
|
||||
"(field_declaration declarator: (field_identifier) @field)",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_query_matches(
|
||||
language,
|
||||
&query,
|
||||
"
|
||||
struct S {
|
||||
int a, b, c;
|
||||
}
|
||||
",
|
||||
&[
|
||||
(0, vec![("field", "a")]),
|
||||
(0, vec![("field", "b")]),
|
||||
(0, vec![("field", "c")]),
|
||||
],
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_captures_basic() {
|
||||
allocations::record(|| {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue