Merge remote-tracking branch 'origin/master' into query-testy
This commit is contained in:
commit
c9c886d971
19 changed files with 721 additions and 459 deletions
4
Cargo.lock
generated
4
Cargo.lock
generated
|
|
@ -824,7 +824,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tree-sitter"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"regex",
|
||||
|
|
@ -832,7 +832,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tree-sitter-cli"
|
||||
version = "0.17.1"
|
||||
version = "0.17.3"
|
||||
dependencies = [
|
||||
"ansi_term",
|
||||
"atty",
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
name = "tree-sitter-cli"
|
||||
description = "CLI tool for developing, testing, and using Tree-sitter parsers"
|
||||
version = "0.17.1"
|
||||
version = "0.17.3"
|
||||
authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
|
||||
edition = "2018"
|
||||
license = "MIT"
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "tree-sitter-cli",
|
||||
"version": "0.17.1",
|
||||
"version": "0.17.3",
|
||||
"author": "Max Brunsfeld",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
|
|
|||
|
|
@ -146,7 +146,7 @@ impl ChildQuantity {
|
|||
pub(crate) fn get_variable_info(
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
simple_aliases: &AliasMap,
|
||||
default_aliases: &AliasMap,
|
||||
) -> Result<Vec<VariableInfo>> {
|
||||
let child_type_is_visible = |t: &ChildType| {
|
||||
variable_type_for_child_type(t, syntax_grammar, lexical_grammar) >= VariableType::Anonymous
|
||||
|
|
@ -185,7 +185,7 @@ pub(crate) fn get_variable_info(
|
|||
let child_symbol = step.symbol;
|
||||
let child_type = if let Some(alias) = &step.alias {
|
||||
ChildType::Aliased(alias.clone())
|
||||
} else if let Some(alias) = simple_aliases.get(&step.symbol) {
|
||||
} else if let Some(alias) = default_aliases.get(&step.symbol) {
|
||||
ChildType::Aliased(alias.clone())
|
||||
} else {
|
||||
ChildType::Normal(child_symbol)
|
||||
|
|
@ -358,7 +358,7 @@ pub(crate) fn get_variable_info(
|
|||
pub(crate) fn generate_node_types_json(
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
simple_aliases: &AliasMap,
|
||||
default_aliases: &AliasMap,
|
||||
variable_info: &Vec<VariableInfo>,
|
||||
) -> Vec<NodeInfoJSON> {
|
||||
let mut node_types_json = BTreeMap::new();
|
||||
|
|
@ -369,7 +369,7 @@ pub(crate) fn generate_node_types_json(
|
|||
named: alias.is_named,
|
||||
},
|
||||
ChildType::Normal(symbol) => {
|
||||
if let Some(alias) = simple_aliases.get(&symbol) {
|
||||
if let Some(alias) = default_aliases.get(&symbol) {
|
||||
NodeTypeJSON {
|
||||
kind: alias.value.clone(),
|
||||
named: alias.is_named,
|
||||
|
|
@ -417,22 +417,33 @@ pub(crate) fn generate_node_types_json(
|
|||
};
|
||||
|
||||
let mut aliases_by_symbol = HashMap::new();
|
||||
for (symbol, alias) in simple_aliases {
|
||||
for (symbol, alias) in default_aliases {
|
||||
aliases_by_symbol.insert(*symbol, {
|
||||
let mut aliases = HashSet::new();
|
||||
aliases.insert(Some(alias.clone()));
|
||||
aliases
|
||||
});
|
||||
}
|
||||
for extra_symbol in &syntax_grammar.extra_symbols {
|
||||
if !default_aliases.contains_key(extra_symbol) {
|
||||
aliases_by_symbol
|
||||
.entry(*extra_symbol)
|
||||
.or_insert(HashSet::new())
|
||||
.insert(None);
|
||||
}
|
||||
}
|
||||
for variable in &syntax_grammar.variables {
|
||||
for production in &variable.productions {
|
||||
for step in &production.steps {
|
||||
if !simple_aliases.contains_key(&step.symbol) {
|
||||
aliases_by_symbol
|
||||
.entry(step.symbol)
|
||||
.or_insert(HashSet::new())
|
||||
.insert(step.alias.clone());
|
||||
}
|
||||
aliases_by_symbol
|
||||
.entry(step.symbol)
|
||||
.or_insert(HashSet::new())
|
||||
.insert(
|
||||
step.alias
|
||||
.as_ref()
|
||||
.or_else(|| default_aliases.get(&step.symbol))
|
||||
.cloned(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -722,9 +733,18 @@ mod tests {
|
|||
kind: VariableType::Named,
|
||||
rule: Rule::string("x"),
|
||||
},
|
||||
// This rule is not reachable from the start symbol
|
||||
// so it won't be present in the node_types
|
||||
Variable {
|
||||
name: "v3".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::string("y"),
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
assert_eq!(node_types.len(), 3);
|
||||
|
||||
assert_eq!(
|
||||
node_types[0],
|
||||
NodeInfoJSON {
|
||||
|
|
@ -784,6 +804,112 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_node_types_simple_extras() {
|
||||
let node_types = get_node_types(InputGrammar {
|
||||
name: String::new(),
|
||||
extra_symbols: vec![Rule::named("v3")],
|
||||
external_tokens: Vec::new(),
|
||||
expected_conflicts: Vec::new(),
|
||||
variables_to_inline: Vec::new(),
|
||||
word_token: None,
|
||||
supertype_symbols: vec![],
|
||||
variables: vec![
|
||||
Variable {
|
||||
name: "v1".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::seq(vec![
|
||||
Rule::field("f1".to_string(), Rule::named("v2")),
|
||||
Rule::field("f2".to_string(), Rule::string(";")),
|
||||
]),
|
||||
},
|
||||
Variable {
|
||||
name: "v2".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::string("x"),
|
||||
},
|
||||
// This rule is not reachable from the start symbol, but
|
||||
// it is reachable from the 'extra_symbols' so it
|
||||
// should be present in the node_types
|
||||
Variable {
|
||||
name: "v3".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::string("y"),
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
assert_eq!(node_types.len(), 4);
|
||||
|
||||
assert_eq!(
|
||||
node_types[0],
|
||||
NodeInfoJSON {
|
||||
kind: "v1".to_string(),
|
||||
named: true,
|
||||
subtypes: None,
|
||||
children: None,
|
||||
fields: Some(
|
||||
vec![
|
||||
(
|
||||
"f1".to_string(),
|
||||
FieldInfoJSON {
|
||||
multiple: false,
|
||||
required: true,
|
||||
types: vec![NodeTypeJSON {
|
||||
kind: "v2".to_string(),
|
||||
named: true,
|
||||
}]
|
||||
}
|
||||
),
|
||||
(
|
||||
"f2".to_string(),
|
||||
FieldInfoJSON {
|
||||
multiple: false,
|
||||
required: true,
|
||||
types: vec![NodeTypeJSON {
|
||||
kind: ";".to_string(),
|
||||
named: false,
|
||||
}]
|
||||
}
|
||||
),
|
||||
]
|
||||
.into_iter()
|
||||
.collect()
|
||||
)
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
node_types[1],
|
||||
NodeInfoJSON {
|
||||
kind: ";".to_string(),
|
||||
named: false,
|
||||
subtypes: None,
|
||||
children: None,
|
||||
fields: None
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
node_types[2],
|
||||
NodeInfoJSON {
|
||||
kind: "v2".to_string(),
|
||||
named: true,
|
||||
subtypes: None,
|
||||
children: None,
|
||||
fields: None
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
node_types[3],
|
||||
NodeInfoJSON {
|
||||
kind: "v3".to_string(),
|
||||
named: true,
|
||||
subtypes: None,
|
||||
children: None,
|
||||
fields: None
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_node_types_with_supertypes() {
|
||||
let node_types = get_node_types(InputGrammar {
|
||||
|
|
@ -1685,14 +1811,14 @@ mod tests {
|
|||
}
|
||||
|
||||
fn get_node_types(grammar: InputGrammar) -> Vec<NodeInfoJSON> {
|
||||
let (syntax_grammar, lexical_grammar, _, simple_aliases) =
|
||||
let (syntax_grammar, lexical_grammar, _, default_aliases) =
|
||||
prepare_grammar(&grammar).unwrap();
|
||||
let variable_info =
|
||||
get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases).unwrap();
|
||||
get_variable_info(&syntax_grammar, &lexical_grammar, &default_aliases).unwrap();
|
||||
generate_node_types_json(
|
||||
&syntax_grammar,
|
||||
&lexical_grammar,
|
||||
&simple_aliases,
|
||||
&default_aliases,
|
||||
&variable_info,
|
||||
)
|
||||
}
|
||||
|
|
|
|||
293
cli/src/generate/prepare_grammar/extract_default_aliases.rs
Normal file
293
cli/src/generate/prepare_grammar/extract_default_aliases.rs
Normal file
|
|
@ -0,0 +1,293 @@
|
|||
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
|
||||
use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType};
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
struct SymbolStatus {
|
||||
aliases: Vec<(Alias, usize)>,
|
||||
appears_unaliased: bool,
|
||||
}
|
||||
|
||||
// Update the grammar by finding symbols that always are aliased, and for each such symbol,
|
||||
// promoting one of its aliases to a "default alias", which is applied globally instead
|
||||
// of in a context-specific way.
|
||||
//
|
||||
// This has two benefits:
|
||||
// * It reduces the overhead of storing production-specific alias info in the parse table.
|
||||
// * Within an `ERROR` node, no context-specific aliases will be applied. This transformation
|
||||
// ensures that the children of an `ERROR` node have symbols that are consistent with the
|
||||
// way that they would appear in a valid syntax tree.
|
||||
pub(super) fn extract_default_aliases(
|
||||
syntax_grammar: &mut SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
) -> AliasMap {
|
||||
let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()];
|
||||
let mut non_terminal_status_list =
|
||||
vec![SymbolStatus::default(); syntax_grammar.variables.len()];
|
||||
let mut external_status_list =
|
||||
vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()];
|
||||
|
||||
// For each grammar symbol, find all of the aliases under which the symbol appears,
|
||||
// and determine whether or not the symbol ever appears *unaliased*.
|
||||
for variable in syntax_grammar.variables.iter() {
|
||||
for production in variable.productions.iter() {
|
||||
for step in production.steps.iter() {
|
||||
let mut status = match step.symbol.kind {
|
||||
SymbolType::External => &mut external_status_list[step.symbol.index],
|
||||
SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index],
|
||||
SymbolType::Terminal => &mut terminal_status_list[step.symbol.index],
|
||||
SymbolType::End => panic!("Unexpected end token"),
|
||||
};
|
||||
|
||||
// Default aliases don't work for inlined variables.
|
||||
if syntax_grammar.variables_to_inline.contains(&step.symbol) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(alias) = &step.alias {
|
||||
if let Some(count_for_alias) = status
|
||||
.aliases
|
||||
.iter_mut()
|
||||
.find_map(|(a, count)| if a == alias { Some(count) } else { None })
|
||||
{
|
||||
*count_for_alias += 1;
|
||||
} else {
|
||||
status.aliases.push((alias.clone(), 1));
|
||||
}
|
||||
} else {
|
||||
status.appears_unaliased = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let symbols_with_statuses = (terminal_status_list
|
||||
.iter_mut()
|
||||
.enumerate()
|
||||
.map(|(i, status)| (Symbol::terminal(i), status)))
|
||||
.chain(
|
||||
non_terminal_status_list
|
||||
.iter_mut()
|
||||
.enumerate()
|
||||
.map(|(i, status)| (Symbol::non_terminal(i), status)),
|
||||
)
|
||||
.chain(
|
||||
external_status_list
|
||||
.iter_mut()
|
||||
.enumerate()
|
||||
.map(|(i, status)| (Symbol::external(i), status)),
|
||||
);
|
||||
|
||||
// For each symbol that always appears aliased, find the alias the occurs most often,
|
||||
// and designate that alias as the symbol's "default alias". Store all of these
|
||||
// default aliases in a map that will be returned.
|
||||
let mut result = AliasMap::new();
|
||||
for (symbol, status) in symbols_with_statuses {
|
||||
if status.appears_unaliased {
|
||||
status.aliases.clear();
|
||||
} else {
|
||||
if let Some(default_entry) = status
|
||||
.aliases
|
||||
.iter()
|
||||
.enumerate()
|
||||
.max_by_key(|(i, (_, count))| (count, -(*i as i64)))
|
||||
.map(|(_, entry)| entry.clone())
|
||||
{
|
||||
status.aliases.clear();
|
||||
status.aliases.push(default_entry.clone());
|
||||
result.insert(symbol, default_entry.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Wherever a symbol is aliased as its default alias, remove the usage of the alias,
|
||||
// because it will now be redundant.
|
||||
let mut alias_positions_to_clear = Vec::new();
|
||||
for variable in syntax_grammar.variables.iter_mut() {
|
||||
alias_positions_to_clear.clear();
|
||||
|
||||
for (i, production) in variable.productions.iter().enumerate() {
|
||||
for (j, step) in production.steps.iter().enumerate() {
|
||||
let status = match step.symbol.kind {
|
||||
SymbolType::External => &mut external_status_list[step.symbol.index],
|
||||
SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index],
|
||||
SymbolType::Terminal => &mut terminal_status_list[step.symbol.index],
|
||||
SymbolType::End => panic!("Unexpected end token"),
|
||||
};
|
||||
|
||||
// If this step is aliased as the symbol's default alias, then remove that alias.
|
||||
if step.alias.is_some()
|
||||
&& step.alias.as_ref() == status.aliases.get(0).map(|t| &t.0)
|
||||
{
|
||||
let mut other_productions_must_use_this_alias_at_this_index = false;
|
||||
for (other_i, other_production) in variable.productions.iter().enumerate() {
|
||||
if other_i != i
|
||||
&& other_production.steps.len() > j
|
||||
&& other_production.steps[j].alias == step.alias
|
||||
&& result.get(&other_production.steps[j].symbol) != step.alias.as_ref()
|
||||
{
|
||||
other_productions_must_use_this_alias_at_this_index = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if !other_productions_must_use_this_alias_at_this_index {
|
||||
alias_positions_to_clear.push((i, j));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (production_index, step_index) in &alias_positions_to_clear {
|
||||
variable.productions[*production_index].steps[*step_index].alias = None;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::generate::grammars::{
|
||||
LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType,
|
||||
};
|
||||
use crate::generate::nfa::Nfa;
|
||||
|
||||
#[test]
|
||||
fn test_extract_simple_aliases() {
|
||||
let mut syntax_grammar = SyntaxGrammar {
|
||||
variables: vec![
|
||||
SyntaxVariable {
|
||||
name: "v1".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
|
||||
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
|
||||
ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true),
|
||||
],
|
||||
}],
|
||||
},
|
||||
SyntaxVariable {
|
||||
name: "v2".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
// Token 0 is always aliased as "a1".
|
||||
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
|
||||
// Token 1 is aliased within rule `v1` above, but not here.
|
||||
ProductionStep::new(Symbol::terminal(1)),
|
||||
// Token 2 is aliased differently here than in `v1`. The alias from
|
||||
// `v1` should be promoted to the default alias, because `v1` appears
|
||||
// first in the grammar.
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true),
|
||||
// Token 3 is also aliased differently here than in `v1`. In this case,
|
||||
// this alias should be promoted to the default alias, because it is
|
||||
// used a greater number of times (twice).
|
||||
ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true),
|
||||
ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true),
|
||||
],
|
||||
}],
|
||||
},
|
||||
],
|
||||
extra_symbols: Vec::new(),
|
||||
expected_conflicts: Vec::new(),
|
||||
variables_to_inline: Vec::new(),
|
||||
supertype_symbols: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
word_token: None,
|
||||
};
|
||||
|
||||
let lexical_grammar = LexicalGrammar {
|
||||
nfa: Nfa::new(),
|
||||
variables: vec![
|
||||
LexicalVariable {
|
||||
name: "t0".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
LexicalVariable {
|
||||
name: "t1".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
LexicalVariable {
|
||||
name: "t2".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
LexicalVariable {
|
||||
name: "t3".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar);
|
||||
assert_eq!(default_aliases.len(), 3);
|
||||
|
||||
assert_eq!(
|
||||
default_aliases.get(&Symbol::terminal(0)),
|
||||
Some(&Alias {
|
||||
value: "a1".to_string(),
|
||||
is_named: true,
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
default_aliases.get(&Symbol::terminal(2)),
|
||||
Some(&Alias {
|
||||
value: "a3".to_string(),
|
||||
is_named: true,
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
default_aliases.get(&Symbol::terminal(3)),
|
||||
Some(&Alias {
|
||||
value: "a6".to_string(),
|
||||
is_named: true,
|
||||
})
|
||||
);
|
||||
assert_eq!(default_aliases.get(&Symbol::terminal(1)), None);
|
||||
|
||||
assert_eq!(
|
||||
syntax_grammar.variables,
|
||||
vec![
|
||||
SyntaxVariable {
|
||||
name: "v1".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
|
||||
ProductionStep::new(Symbol::terminal(2)),
|
||||
ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true),
|
||||
],
|
||||
},],
|
||||
},
|
||||
SyntaxVariable {
|
||||
name: "v2".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::terminal(1)),
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true),
|
||||
ProductionStep::new(Symbol::terminal(3)),
|
||||
ProductionStep::new(Symbol::terminal(3)),
|
||||
],
|
||||
},],
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,223 +0,0 @@
|
|||
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
|
||||
use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType};
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
struct SymbolStatus {
|
||||
alias: Option<Alias>,
|
||||
conflicting: bool,
|
||||
}
|
||||
|
||||
pub(super) fn extract_simple_aliases(
|
||||
syntax_grammar: &mut SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
) -> AliasMap {
|
||||
// Determine which symbols in the grammars are *always* aliased to a single name.
|
||||
let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()];
|
||||
let mut non_terminal_status_list =
|
||||
vec![SymbolStatus::default(); syntax_grammar.variables.len()];
|
||||
let mut external_status_list =
|
||||
vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()];
|
||||
for variable in syntax_grammar.variables.iter() {
|
||||
for production in variable.productions.iter() {
|
||||
for step in production.steps.iter() {
|
||||
let mut status = match step.symbol {
|
||||
Symbol {
|
||||
kind: SymbolType::External,
|
||||
index,
|
||||
} => &mut external_status_list[index],
|
||||
Symbol {
|
||||
kind: SymbolType::NonTerminal,
|
||||
index,
|
||||
} => &mut non_terminal_status_list[index],
|
||||
Symbol {
|
||||
kind: SymbolType::Terminal,
|
||||
index,
|
||||
} => &mut terminal_status_list[index],
|
||||
Symbol {
|
||||
kind: SymbolType::End,
|
||||
..
|
||||
} => panic!("Unexpected end token"),
|
||||
};
|
||||
|
||||
if step.alias.is_none() {
|
||||
status.alias = None;
|
||||
status.conflicting = true;
|
||||
}
|
||||
|
||||
if !status.conflicting {
|
||||
if status.alias.is_none() {
|
||||
status.alias = step.alias.clone();
|
||||
} else if status.alias != step.alias {
|
||||
status.alias = None;
|
||||
status.conflicting = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove the aliases for those symbols.
|
||||
for variable in syntax_grammar.variables.iter_mut() {
|
||||
for production in variable.productions.iter_mut() {
|
||||
for step in production.steps.iter_mut() {
|
||||
let status = match step.symbol {
|
||||
Symbol {
|
||||
kind: SymbolType::External,
|
||||
index,
|
||||
} => &external_status_list[index],
|
||||
Symbol {
|
||||
kind: SymbolType::NonTerminal,
|
||||
index,
|
||||
} => &non_terminal_status_list[index],
|
||||
Symbol {
|
||||
kind: SymbolType::Terminal,
|
||||
index,
|
||||
} => &terminal_status_list[index],
|
||||
Symbol {
|
||||
kind: SymbolType::End,
|
||||
..
|
||||
} => panic!("Unexpected end token"),
|
||||
};
|
||||
|
||||
if status.alias.is_some() {
|
||||
step.alias = None;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Populate a map of the symbols to their aliases.
|
||||
let mut result = AliasMap::new();
|
||||
for (i, status) in terminal_status_list.into_iter().enumerate() {
|
||||
if let Some(alias) = status.alias {
|
||||
result.insert(Symbol::terminal(i), alias);
|
||||
}
|
||||
}
|
||||
for (i, status) in non_terminal_status_list.into_iter().enumerate() {
|
||||
if let Some(alias) = status.alias {
|
||||
result.insert(Symbol::non_terminal(i), alias);
|
||||
}
|
||||
}
|
||||
for (i, status) in external_status_list.into_iter().enumerate() {
|
||||
if let Some(alias) = status.alias {
|
||||
result.insert(Symbol::external(i), alias);
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::generate::grammars::{
|
||||
LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType,
|
||||
};
|
||||
use crate::generate::nfa::Nfa;
|
||||
|
||||
#[test]
|
||||
fn test_extract_simple_aliases() {
|
||||
let mut syntax_grammar = SyntaxGrammar {
|
||||
variables: vec![
|
||||
SyntaxVariable {
|
||||
name: "v1".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
|
||||
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
|
||||
],
|
||||
}],
|
||||
},
|
||||
SyntaxVariable {
|
||||
name: "v2".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
// Token 0 is always aliased as "a1".
|
||||
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
|
||||
// Token 1 is aliased above, but not here.
|
||||
ProductionStep::new(Symbol::terminal(1)),
|
||||
// Token 2 is aliased differently than above.
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true),
|
||||
],
|
||||
}],
|
||||
},
|
||||
],
|
||||
extra_symbols: Vec::new(),
|
||||
expected_conflicts: Vec::new(),
|
||||
variables_to_inline: Vec::new(),
|
||||
supertype_symbols: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
word_token: None,
|
||||
};
|
||||
|
||||
let lexical_grammar = LexicalGrammar {
|
||||
nfa: Nfa::new(),
|
||||
variables: vec![
|
||||
LexicalVariable {
|
||||
name: "t1".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
LexicalVariable {
|
||||
name: "t2".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
LexicalVariable {
|
||||
name: "t3".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar);
|
||||
assert_eq!(simple_aliases.len(), 1);
|
||||
assert_eq!(
|
||||
simple_aliases[&Symbol::terminal(0)],
|
||||
Alias {
|
||||
value: "a1".to_string(),
|
||||
is_named: true,
|
||||
}
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
syntax_grammar.variables,
|
||||
vec![
|
||||
SyntaxVariable {
|
||||
name: "v1".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
// 'Simple' alias removed
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
// Other aliases unchanged
|
||||
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
|
||||
],
|
||||
},],
|
||||
},
|
||||
SyntaxVariable {
|
||||
name: "v2".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::terminal(1)),
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true),
|
||||
],
|
||||
},],
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
mod expand_repeats;
|
||||
mod expand_tokens;
|
||||
mod extract_simple_aliases;
|
||||
mod extract_default_aliases;
|
||||
mod extract_tokens;
|
||||
mod flatten_grammar;
|
||||
mod intern_symbols;
|
||||
|
|
@ -8,7 +8,7 @@ mod process_inlines;
|
|||
|
||||
use self::expand_repeats::expand_repeats;
|
||||
pub(crate) use self::expand_tokens::expand_tokens;
|
||||
use self::extract_simple_aliases::extract_simple_aliases;
|
||||
use self::extract_default_aliases::extract_default_aliases;
|
||||
use self::extract_tokens::extract_tokens;
|
||||
use self::flatten_grammar::flatten_grammar;
|
||||
use self::intern_symbols::intern_symbols;
|
||||
|
|
@ -52,7 +52,7 @@ pub(crate) fn prepare_grammar(
|
|||
let syntax_grammar = expand_repeats(syntax_grammar);
|
||||
let mut syntax_grammar = flatten_grammar(syntax_grammar)?;
|
||||
let lexical_grammar = expand_tokens(lexical_grammar)?;
|
||||
let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar);
|
||||
let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar);
|
||||
let inlines = process_inlines(&syntax_grammar);
|
||||
Ok((syntax_grammar, lexical_grammar, inlines, simple_aliases))
|
||||
Ok((syntax_grammar, lexical_grammar, inlines, default_aliases))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ struct Generator {
|
|||
keyword_capture_token: Option<Symbol>,
|
||||
syntax_grammar: SyntaxGrammar,
|
||||
lexical_grammar: LexicalGrammar,
|
||||
simple_aliases: AliasMap,
|
||||
default_aliases: AliasMap,
|
||||
symbol_order: HashMap<Symbol, usize>,
|
||||
symbol_ids: HashMap<Symbol, String>,
|
||||
alias_ids: HashMap<Alias, String>,
|
||||
|
|
@ -143,49 +143,6 @@ impl Generator {
|
|||
self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers);
|
||||
}
|
||||
|
||||
let mut field_names = Vec::new();
|
||||
for production_info in &self.parse_table.production_infos {
|
||||
for field_name in production_info.field_map.keys() {
|
||||
field_names.push(field_name);
|
||||
}
|
||||
|
||||
for alias in &production_info.alias_sequence {
|
||||
if let Some(alias) = &alias {
|
||||
let alias_kind = alias.kind();
|
||||
let matching_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| {
|
||||
let (name, kind) = self.metadata_for_symbol(*symbol);
|
||||
name == alias.value && kind == alias_kind
|
||||
});
|
||||
let alias_id = if let Some(symbol) = matching_symbol {
|
||||
self.symbol_ids[&symbol].clone()
|
||||
} else if alias.is_named {
|
||||
format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
|
||||
} else {
|
||||
format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
|
||||
};
|
||||
self.alias_ids.entry(alias.clone()).or_insert(alias_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.unique_aliases = self
|
||||
.alias_ids
|
||||
.keys()
|
||||
.filter(|alias| {
|
||||
self.parse_table
|
||||
.symbols
|
||||
.iter()
|
||||
.cloned()
|
||||
.find(|symbol| {
|
||||
let (name, kind) = self.metadata_for_symbol(*symbol);
|
||||
name == alias.value && kind == alias.kind()
|
||||
})
|
||||
.is_none()
|
||||
})
|
||||
.cloned()
|
||||
.collect();
|
||||
self.unique_aliases.sort_unstable();
|
||||
|
||||
self.symbol_map = self
|
||||
.parse_table
|
||||
.symbols
|
||||
|
|
@ -198,10 +155,10 @@ impl Generator {
|
|||
// public-facing symbol. If one of the symbols is not aliased, choose that one
|
||||
// to be the public-facing symbol. Otherwise, pick the symbol with the lowest
|
||||
// numeric value.
|
||||
if let Some(alias) = self.simple_aliases.get(symbol) {
|
||||
if let Some(alias) = self.default_aliases.get(symbol) {
|
||||
let kind = alias.kind();
|
||||
for other_symbol in &self.parse_table.symbols {
|
||||
if let Some(other_alias) = self.simple_aliases.get(other_symbol) {
|
||||
if let Some(other_alias) = self.default_aliases.get(other_symbol) {
|
||||
if other_symbol < mapping && other_alias == alias {
|
||||
mapping = other_symbol;
|
||||
}
|
||||
|
|
@ -230,13 +187,51 @@ impl Generator {
|
|||
})
|
||||
.collect();
|
||||
|
||||
field_names.sort_unstable();
|
||||
field_names.dedup();
|
||||
self.field_names = field_names.into_iter().cloned().collect();
|
||||
for production_info in &self.parse_table.production_infos {
|
||||
// Build a list of all field names
|
||||
for field_name in production_info.field_map.keys() {
|
||||
if let Err(i) = self.field_names.binary_search(&field_name) {
|
||||
self.field_names.insert(i, field_name.clone());
|
||||
}
|
||||
}
|
||||
|
||||
// If we are opting in to the new unstable language ABI, then use the concept of
|
||||
// "small parse states". Otherwise, use the same representation for all parse
|
||||
// states.
|
||||
for alias in &production_info.alias_sequence {
|
||||
// Generate a mapping from aliases to C identifiers.
|
||||
if let Some(alias) = &alias {
|
||||
let existing_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| {
|
||||
if let Some(default_alias) = self.default_aliases.get(symbol) {
|
||||
default_alias == alias
|
||||
} else {
|
||||
let (name, kind) = self.metadata_for_symbol(*symbol);
|
||||
name == alias.value && kind == alias.kind()
|
||||
}
|
||||
});
|
||||
|
||||
// Some aliases match an existing symbol in the grammar.
|
||||
let alias_id;
|
||||
if let Some(existing_symbol) = existing_symbol {
|
||||
alias_id = self.symbol_ids[&self.symbol_map[&existing_symbol]].clone();
|
||||
}
|
||||
// Other aliases don't match any existing symbol, and need their own identifiers.
|
||||
else {
|
||||
if let Err(i) = self.unique_aliases.binary_search(alias) {
|
||||
self.unique_aliases.insert(i, alias.clone());
|
||||
}
|
||||
|
||||
alias_id = if alias.is_named {
|
||||
format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
|
||||
} else {
|
||||
format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
|
||||
};
|
||||
}
|
||||
|
||||
self.alias_ids.entry(alias.clone()).or_insert(alias_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Determine which states should use the "small state" representation, and which should
|
||||
// use the normal array representation.
|
||||
let threshold = cmp::min(SMALL_STATE_THRESHOLD, self.parse_table.symbols.len() / 2);
|
||||
self.large_state_count = self
|
||||
.parse_table
|
||||
|
|
@ -361,7 +356,7 @@ impl Generator {
|
|||
indent!(self);
|
||||
for symbol in self.parse_table.symbols.iter() {
|
||||
let name = self.sanitize_string(
|
||||
self.simple_aliases
|
||||
self.default_aliases
|
||||
.get(symbol)
|
||||
.map(|alias| alias.value.as_str())
|
||||
.unwrap_or(self.metadata_for_symbol(*symbol).0),
|
||||
|
|
@ -444,7 +439,7 @@ impl Generator {
|
|||
for symbol in &self.parse_table.symbols {
|
||||
add_line!(self, "[{}] = {{", self.symbol_ids[&symbol]);
|
||||
indent!(self);
|
||||
if let Some(Alias { is_named, .. }) = self.simple_aliases.get(symbol) {
|
||||
if let Some(Alias { is_named, .. }) = self.default_aliases.get(symbol) {
|
||||
add_line!(self, ".visible = true,");
|
||||
add_line!(self, ".named = {},", is_named);
|
||||
} else {
|
||||
|
|
@ -519,19 +514,22 @@ impl Generator {
|
|||
}
|
||||
|
||||
fn add_non_terminal_alias_map(&mut self) {
|
||||
let mut aliases_by_symbol = HashMap::new();
|
||||
let mut alias_ids_by_symbol = HashMap::new();
|
||||
for variable in &self.syntax_grammar.variables {
|
||||
for production in &variable.productions {
|
||||
for step in &production.steps {
|
||||
if let Some(alias) = &step.alias {
|
||||
if step.symbol.is_non_terminal()
|
||||
&& !self.simple_aliases.contains_key(&step.symbol)
|
||||
&& Some(alias) != self.default_aliases.get(&step.symbol)
|
||||
{
|
||||
if self.symbol_ids.contains_key(&step.symbol) {
|
||||
let alias_ids =
|
||||
aliases_by_symbol.entry(step.symbol).or_insert(Vec::new());
|
||||
if let Err(i) = alias_ids.binary_search(&alias) {
|
||||
alias_ids.insert(i, alias);
|
||||
if let Some(alias_id) = self.alias_ids.get(&alias) {
|
||||
let alias_ids = alias_ids_by_symbol
|
||||
.entry(step.symbol)
|
||||
.or_insert(Vec::new());
|
||||
if let Err(i) = alias_ids.binary_search(&alias_id) {
|
||||
alias_ids.insert(i, alias_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -540,19 +538,19 @@ impl Generator {
|
|||
}
|
||||
}
|
||||
|
||||
let mut aliases_by_symbol = aliases_by_symbol.iter().collect::<Vec<_>>();
|
||||
aliases_by_symbol.sort_unstable_by_key(|e| e.0);
|
||||
let mut alias_ids_by_symbol = alias_ids_by_symbol.iter().collect::<Vec<_>>();
|
||||
alias_ids_by_symbol.sort_unstable_by_key(|e| e.0);
|
||||
|
||||
add_line!(self, "static uint16_t ts_non_terminal_alias_map[] = {{");
|
||||
indent!(self);
|
||||
for (symbol, aliases) in aliases_by_symbol {
|
||||
for (symbol, alias_ids) in alias_ids_by_symbol {
|
||||
let symbol_id = &self.symbol_ids[symbol];
|
||||
let public_symbol_id = &self.symbol_ids[&self.symbol_map[&symbol]];
|
||||
add_line!(self, "{}, {},", symbol_id, 1 + aliases.len());
|
||||
add_line!(self, "{}, {},", symbol_id, 1 + alias_ids.len());
|
||||
indent!(self);
|
||||
add_line!(self, "{},", public_symbol_id);
|
||||
for alias in aliases {
|
||||
add_line!(self, "{},", &self.alias_ids[&alias]);
|
||||
for alias_id in alias_ids {
|
||||
add_line!(self, "{},", alias_id);
|
||||
}
|
||||
dedent!(self);
|
||||
}
|
||||
|
|
@ -1545,7 +1543,7 @@ impl Generator {
|
|||
/// for keyword capture, if any.
|
||||
/// * `syntax_grammar` - The syntax grammar extracted from the language's grammar
|
||||
/// * `lexical_grammar` - The lexical grammar extracted from the language's grammar
|
||||
/// * `simple_aliases` - A map describing the global rename rules that should apply.
|
||||
/// * `default_aliases` - A map describing the global rename rules that should apply.
|
||||
/// the keys are symbols that are *always* aliased in the same way, and the values
|
||||
/// are the aliases that are applied to those symbols.
|
||||
/// * `next_abi` - A boolean indicating whether to opt into the new, unstable parse
|
||||
|
|
@ -1558,7 +1556,7 @@ pub(crate) fn render_c_code(
|
|||
keyword_capture_token: Option<Symbol>,
|
||||
syntax_grammar: SyntaxGrammar,
|
||||
lexical_grammar: LexicalGrammar,
|
||||
simple_aliases: AliasMap,
|
||||
default_aliases: AliasMap,
|
||||
next_abi: bool,
|
||||
) -> String {
|
||||
Generator {
|
||||
|
|
@ -1572,7 +1570,7 @@ pub(crate) fn render_c_code(
|
|||
keyword_capture_token,
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
simple_aliases,
|
||||
default_aliases,
|
||||
symbol_ids: HashMap::new(),
|
||||
symbol_order: HashMap::new(),
|
||||
alias_ids: HashMap::new(),
|
||||
|
|
|
|||
|
|
@ -367,6 +367,30 @@ fn test_query_errors_on_impossible_patterns() {
|
|||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_verifies_possible_patterns_with_aliased_parent_nodes() {
|
||||
allocations::record(|| {
|
||||
let ruby = get_language("ruby");
|
||||
|
||||
Query::new(ruby, "(destructured_parameter (identifier))").unwrap();
|
||||
|
||||
assert_eq!(
|
||||
Query::new(ruby, "(destructured_parameter (string))",),
|
||||
Err(QueryError {
|
||||
kind: QueryErrorKind::Structure,
|
||||
row: 0,
|
||||
offset: 24,
|
||||
column: 24,
|
||||
message: [
|
||||
"(destructured_parameter (string))", //
|
||||
" ^",
|
||||
]
|
||||
.join("\n")
|
||||
})
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_matches_with_simple_pattern() {
|
||||
allocations::record(|| {
|
||||
|
|
@ -1451,6 +1475,7 @@ fn test_query_matches_with_anonymous_tokens() {
|
|||
r#"
|
||||
";" @punctuation
|
||||
"&&" @operator
|
||||
"\"" @quote
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
|
@ -1458,9 +1483,11 @@ fn test_query_matches_with_anonymous_tokens() {
|
|||
assert_query_matches(
|
||||
language,
|
||||
&query,
|
||||
"foo(a && b);",
|
||||
r#"foo(a && "b");"#,
|
||||
&[
|
||||
(1, vec![("operator", "&&")]),
|
||||
(2, vec![("quote", "\"")]),
|
||||
(2, vec![("quote", "\"")]),
|
||||
(0, vec![("punctuation", ";")]),
|
||||
],
|
||||
);
|
||||
|
|
@ -1808,6 +1835,33 @@ fn test_query_matches_with_no_captures() {
|
|||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_matches_with_repeated_fields() {
|
||||
allocations::record(|| {
|
||||
let language = get_language("c");
|
||||
let query = Query::new(
|
||||
language,
|
||||
"(field_declaration declarator: (field_identifier) @field)",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_query_matches(
|
||||
language,
|
||||
&query,
|
||||
"
|
||||
struct S {
|
||||
int a, b, c;
|
||||
}
|
||||
",
|
||||
&[
|
||||
(0, vec![("field", "a")]),
|
||||
(0, vec![("field", "b")]),
|
||||
(0, vec![("field", "c")]),
|
||||
],
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_captures_basic() {
|
||||
allocations::record(|| {
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ Parsers for these languages are fairly complete:
|
|||
* [Elm](https://github.com/razzeee/tree-sitter-elm)
|
||||
* [Eno](https://github.com/eno-lang/tree-sitter-eno)
|
||||
* [ERB / EJS](https://github.com/tree-sitter/tree-sitter-embedded-template)
|
||||
- [Fennel](https://github.com/travonted/tree-sitter-fennel)
|
||||
* [Go](https://github.com/tree-sitter/tree-sitter-go)
|
||||
* [HTML](https://github.com/tree-sitter/tree-sitter-html)
|
||||
* [Java](https://github.com/tree-sitter/tree-sitter-java)
|
||||
|
|
@ -49,6 +50,7 @@ Parsers for these languages are fairly complete:
|
|||
* [TOML](https://github.com/ikatyang/tree-sitter-toml)
|
||||
* [TypeScript](https://github.com/tree-sitter/tree-sitter-typescript)
|
||||
* [Verilog](https://github.com/tree-sitter/tree-sitter-verilog)
|
||||
* [VHDL](https://github.com/alemuller/tree-sitter-vhdl)
|
||||
* [Vue](https://github.com/ikatyang/tree-sitter-vue)
|
||||
* [YAML](https://github.com/ikatyang/tree-sitter-yaml)
|
||||
* [WASM](https://github.com/wasm-lsp/tree-sitter-wasm)
|
||||
|
|
|
|||
|
|
@ -210,6 +210,7 @@ The following is a complete list of built-in functions you can use in your `gram
|
|||
* **Right Associativity : `prec.right([number], rule)`** - This function is like `prec.left`, but it instructs Tree-sitter to prefer matching a rule that ends *later*.
|
||||
* **Dynamic Precedence : `prec.dynamic(number, rule)`** - This function is similar to `prec`, but the given numerical precedence is applied at *runtime* instead of at parser generation time. This is only necessary when handling a conflict dynamically using the `conflicts` field in the grammar, and when there is a genuine *ambiguity*: multiple rules correctly match a given piece of code. In that event, Tree-sitter compares the total dynamic precedence associated with each rule, and selects the one with the highest total. This is similar to [dynamic precedence directives][bison-dprec] in Bison grammars.
|
||||
* **Tokens : `token(rule)`** - This function marks the given rule as producing only a single token. Tree-sitter's default is to treat each String or RegExp literal in the grammar as a separate token. Each token is matched separately by the lexer and returned as its own leaf node in the tree. The `token` function allows you to express a complex rule using the functions described above (rather than as a single regular expression) but still have Tree-sitter treat it as a single token.
|
||||
* **Immediate Tokens : `token.immediate(rule)`** - Usually, whitespace (and any other extras, such as comments) is optional before each token. This function means that the token will only match if there is no whitespace.
|
||||
* **Aliases : `alias(rule, name)`** - This function causes the given rule to *appear* with an alternative name in the syntax tree. If `name` is a *symbol*, as in `alias($.foo, $.bar)`, then the aliased rule will *appear* as a [named node][named-vs-anonymous-nodes-section] called `bar`. And if `name` is a *string literal*, as in `alias($.foo, 'bar')`, then the aliased rule will appear as an [anonymous node][named-vs-anonymous-nodes-section], as if the rule had been written as the simple string.
|
||||
* **Field Names : `field(name, rule)`** - This function assigns a *field name* to the child node(s) matched by the given rule. In the resulting syntax tree, you can then use that field name to access specific children.
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
name = "tree-sitter"
|
||||
description = "Rust bindings to the Tree-sitter parsing library"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
|
||||
license = "MIT"
|
||||
readme = "binding_rust/README.md"
|
||||
|
|
|
|||
34
lib/binding_web/tree-sitter-web.d.ts
vendored
34
lib/binding_web/tree-sitter-web.d.ts
vendored
|
|
@ -37,7 +37,7 @@ declare module 'web-tree-sitter' {
|
|||
|
||||
export type Logger = (
|
||||
message: string,
|
||||
params: {[param: string]: string},
|
||||
params: { [param: string]: string },
|
||||
type: "parse" | "lex"
|
||||
) => void;
|
||||
|
||||
|
|
@ -48,9 +48,9 @@ declare module 'web-tree-sitter' {
|
|||
) => string | null;
|
||||
|
||||
export interface SyntaxNode {
|
||||
id: number;
|
||||
tree: Tree;
|
||||
type: string;
|
||||
isNamed: boolean;
|
||||
text: string;
|
||||
startPosition: Point;
|
||||
endPosition: Point;
|
||||
|
|
@ -74,6 +74,7 @@ declare module 'web-tree-sitter' {
|
|||
hasError(): boolean;
|
||||
equals(other: SyntaxNode): boolean;
|
||||
isMissing(): boolean;
|
||||
isNamed(): boolean;
|
||||
toString(): string;
|
||||
child(index: number): SyntaxNode | null;
|
||||
namedChild(index: number): SyntaxNode | null;
|
||||
|
|
@ -131,8 +132,33 @@ declare module 'web-tree-sitter' {
|
|||
readonly version: number;
|
||||
readonly fieldCount: number;
|
||||
|
||||
fieldNameForId(fieldId: number): string | null
|
||||
fieldIdForName(fieldName: string): number | null
|
||||
fieldNameForId(fieldId: number): string | null;
|
||||
fieldIdForName(fieldName: string): number | null;
|
||||
query(source: string): Query;
|
||||
}
|
||||
|
||||
interface QueryCapture {
|
||||
name: string;
|
||||
node: SyntaxNode;
|
||||
}
|
||||
|
||||
interface QueryMatch {
|
||||
pattern: number;
|
||||
captures: QueryCapture[];
|
||||
}
|
||||
|
||||
interface PredicateResult {
|
||||
operator: string;
|
||||
operands: { name: string; type: string }[];
|
||||
}
|
||||
|
||||
class Query {
|
||||
captureNames: string[];
|
||||
|
||||
delete(): void;
|
||||
matches(node: SyntaxNode, startPosition?: Point, endPosition?: Point): QueryMatch[];
|
||||
captures(node: SyntaxNode, startPosition?: Point, endPosition?: Point): QueryCapture[];
|
||||
predicatesForPattern(patternIndex: number): PredicateResult[];
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -17,24 +17,28 @@ void *ts_record_realloc(void *, size_t);
|
|||
void ts_record_free(void *);
|
||||
bool ts_toggle_allocation_recording(bool);
|
||||
|
||||
static inline void *ts_malloc(size_t size) {
|
||||
return ts_record_malloc(size);
|
||||
}
|
||||
|
||||
static inline void *ts_calloc(size_t count, size_t size) {
|
||||
return ts_record_calloc(count, size);
|
||||
}
|
||||
|
||||
static inline void *ts_realloc(void *buffer, size_t size) {
|
||||
return ts_record_realloc(buffer, size);
|
||||
}
|
||||
|
||||
static inline void ts_free(void *buffer) {
|
||||
ts_record_free(buffer);
|
||||
}
|
||||
#define ts_malloc ts_record_malloc
|
||||
#define ts_calloc ts_record_calloc
|
||||
#define ts_realloc ts_record_realloc
|
||||
#define ts_free ts_record_free
|
||||
|
||||
#else
|
||||
|
||||
// Allow clients to override allocation functions
|
||||
|
||||
#ifndef ts_malloc
|
||||
#define ts_malloc ts_malloc_default
|
||||
#endif
|
||||
#ifndef ts_calloc
|
||||
#define ts_calloc ts_calloc_default
|
||||
#endif
|
||||
#ifndef ts_realloc
|
||||
#define ts_realloc ts_realloc_default
|
||||
#endif
|
||||
#ifndef ts_free
|
||||
#define ts_free ts_free_default
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
static inline bool ts_toggle_allocation_recording(bool value) {
|
||||
|
|
@ -42,7 +46,8 @@ static inline bool ts_toggle_allocation_recording(bool value) {
|
|||
return false;
|
||||
}
|
||||
|
||||
static inline void *ts_malloc(size_t size) {
|
||||
|
||||
static inline void *ts_malloc_default(size_t size) {
|
||||
void *result = malloc(size);
|
||||
if (size > 0 && !result) {
|
||||
fprintf(stderr, "tree-sitter failed to allocate %zu bytes", size);
|
||||
|
|
@ -51,7 +56,7 @@ static inline void *ts_malloc(size_t size) {
|
|||
return result;
|
||||
}
|
||||
|
||||
static inline void *ts_calloc(size_t count, size_t size) {
|
||||
static inline void *ts_calloc_default(size_t count, size_t size) {
|
||||
void *result = calloc(count, size);
|
||||
if (count > 0 && !result) {
|
||||
fprintf(stderr, "tree-sitter failed to allocate %zu bytes", count * size);
|
||||
|
|
@ -60,7 +65,7 @@ static inline void *ts_calloc(size_t count, size_t size) {
|
|||
return result;
|
||||
}
|
||||
|
||||
static inline void *ts_realloc(void *buffer, size_t size) {
|
||||
static inline void *ts_realloc_default(void *buffer, size_t size) {
|
||||
void *result = realloc(buffer, size);
|
||||
if (size > 0 && !result) {
|
||||
fprintf(stderr, "tree-sitter failed to reallocate %zu bytes", size);
|
||||
|
|
@ -69,7 +74,7 @@ static inline void *ts_realloc(void *buffer, size_t size) {
|
|||
return result;
|
||||
}
|
||||
|
||||
static inline void ts_free(void *buffer) {
|
||||
static inline void ts_free_default(void *buffer) {
|
||||
free(buffer);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -52,14 +52,24 @@ extern "C" {
|
|||
(self)->size += (count))
|
||||
|
||||
#define array_push_all(self, other) \
|
||||
array_splice((self), (self)->size, 0, (other)->size, (other)->contents)
|
||||
array_extend((self), (other)->size, (other)->contents)
|
||||
|
||||
// Append `count` elements to the end of the array, reading their values from the
|
||||
// `contents` pointer.
|
||||
#define array_extend(self, count, contents) \
|
||||
array__splice( \
|
||||
(VoidArray *)(self), array__elem_size(self), (self)->size, \
|
||||
0, count, contents \
|
||||
)
|
||||
|
||||
// Remove `old_count` elements from the array starting at the given `index`. At
|
||||
// the same index, insert `new_count` new elements, reading their values from the
|
||||
// `new_contents` pointer.
|
||||
#define array_splice(self, index, old_count, new_count, new_contents) \
|
||||
array__splice((VoidArray *)(self), array__elem_size(self), index, old_count, \
|
||||
new_count, new_contents)
|
||||
#define array_splice(self, index, old_count, new_count, new_contents) \
|
||||
array__splice( \
|
||||
(VoidArray *)(self), array__elem_size(self), index, \
|
||||
old_count, new_count, new_contents \
|
||||
)
|
||||
|
||||
// Insert one `element` into the array at the given `index`.
|
||||
#define array_insert(self, index, element) \
|
||||
|
|
|
|||
178
lib/src/query.c
178
lib/src/query.c
|
|
@ -214,6 +214,7 @@ struct TSQuery {
|
|||
Array(TSQueryPredicateStep) predicate_steps;
|
||||
Array(QueryPattern) patterns;
|
||||
Array(StepOffset) step_offsets;
|
||||
Array(char) string_buffer;
|
||||
const TSLanguage *language;
|
||||
uint16_t wildcard_root_pattern_count;
|
||||
TSSymbol *symbol_map;
|
||||
|
|
@ -439,67 +440,6 @@ static uint16_t symbol_table_insert_name(
|
|||
return self->slices.size - 1;
|
||||
}
|
||||
|
||||
static uint16_t symbol_table_insert_name_with_escapes(
|
||||
SymbolTable *self,
|
||||
const char *escaped_name,
|
||||
uint32_t escaped_length
|
||||
) {
|
||||
Slice slice = {
|
||||
.offset = self->characters.size,
|
||||
.length = 0,
|
||||
};
|
||||
array_grow_by(&self->characters, escaped_length + 1);
|
||||
|
||||
// Copy the contents of the literal into the characters buffer, processing escape
|
||||
// sequences like \n and \". This needs to be done before checking if the literal
|
||||
// is already present, in order to do the string comparison.
|
||||
bool is_escaped = false;
|
||||
for (unsigned i = 0; i < escaped_length; i++) {
|
||||
const char *src = &escaped_name[i];
|
||||
char *dest = &self->characters.contents[slice.offset + slice.length];
|
||||
if (is_escaped) {
|
||||
switch (*src) {
|
||||
case 'n':
|
||||
*dest = '\n';
|
||||
break;
|
||||
case 'r':
|
||||
*dest = '\r';
|
||||
break;
|
||||
case 't':
|
||||
*dest = '\t';
|
||||
break;
|
||||
case '0':
|
||||
*dest = '\0';
|
||||
break;
|
||||
default:
|
||||
*dest = *src;
|
||||
break;
|
||||
}
|
||||
is_escaped = false;
|
||||
slice.length++;
|
||||
} else {
|
||||
if (*src == '\\') {
|
||||
is_escaped = true;
|
||||
} else {
|
||||
*dest = *src;
|
||||
slice.length++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If the string is already present, remove the redundant content from the characters
|
||||
// buffer and return the existing id.
|
||||
int id = symbol_table_id_for_name(self, &self->characters.contents[slice.offset], slice.length);
|
||||
if (id >= 0) {
|
||||
self->characters.size -= (escaped_length + 1);
|
||||
return id;
|
||||
}
|
||||
|
||||
self->characters.contents[slice.offset + slice.length] = 0;
|
||||
array_push(&self->slices, slice);
|
||||
return self->slices.size - 1;
|
||||
}
|
||||
|
||||
/************
|
||||
* QueryStep
|
||||
************/
|
||||
|
|
@ -1393,6 +1333,59 @@ static void ts_query__finalize_steps(TSQuery *self) {
|
|||
}
|
||||
}
|
||||
|
||||
static TSQueryError ts_query__parse_string_literal(
|
||||
TSQuery *self,
|
||||
Stream *stream
|
||||
) {
|
||||
const char *string_start = stream->input;
|
||||
if (stream->next != '"') return TSQueryErrorSyntax;
|
||||
stream_advance(stream);
|
||||
const char *prev_position = stream->input;
|
||||
|
||||
bool is_escaped = false;
|
||||
array_clear(&self->string_buffer);
|
||||
for (;;) {
|
||||
if (is_escaped) {
|
||||
is_escaped = false;
|
||||
switch (stream->next) {
|
||||
case 'n':
|
||||
array_push(&self->string_buffer, '\n');
|
||||
break;
|
||||
case 'r':
|
||||
array_push(&self->string_buffer, '\r');
|
||||
break;
|
||||
case 't':
|
||||
array_push(&self->string_buffer, '\t');
|
||||
break;
|
||||
case '0':
|
||||
array_push(&self->string_buffer, '\0');
|
||||
break;
|
||||
default:
|
||||
array_extend(&self->string_buffer, stream->next_size, stream->input);
|
||||
break;
|
||||
}
|
||||
prev_position = stream->input + stream->next_size;
|
||||
} else {
|
||||
if (stream->next == '\\') {
|
||||
array_extend(&self->string_buffer, (stream->input - prev_position), prev_position);
|
||||
prev_position = stream->input + 1;
|
||||
is_escaped = true;
|
||||
} else if (stream->next == '"') {
|
||||
array_extend(&self->string_buffer, (stream->input - prev_position), prev_position);
|
||||
stream_advance(stream);
|
||||
return TSQueryErrorNone;
|
||||
} else if (stream->next == '\n') {
|
||||
stream_reset(stream, string_start);
|
||||
return TSQueryErrorSyntax;
|
||||
}
|
||||
}
|
||||
if (!stream_advance(stream)) {
|
||||
stream_reset(stream, string_start);
|
||||
return TSQueryErrorSyntax;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Parse a single predicate associated with a pattern, adding it to the
|
||||
// query's internal `predicate_steps` array. Predicates are arbitrary
|
||||
// S-expressions associated with a pattern which are meant to be handled at
|
||||
|
|
@ -1458,44 +1451,17 @@ static TSQueryError ts_query__parse_predicate(
|
|||
|
||||
// Parse a string literal
|
||||
else if (stream->next == '"') {
|
||||
stream_advance(stream);
|
||||
|
||||
// Parse the string content
|
||||
bool is_escaped = false;
|
||||
const char *string_content = stream->input;
|
||||
for (;;) {
|
||||
if (is_escaped) {
|
||||
is_escaped = false;
|
||||
} else {
|
||||
if (stream->next == '\\') {
|
||||
is_escaped = true;
|
||||
} else if (stream->next == '"') {
|
||||
break;
|
||||
} else if (stream->next == '\n') {
|
||||
stream_reset(stream, string_content - 1);
|
||||
return TSQueryErrorSyntax;
|
||||
}
|
||||
}
|
||||
if (!stream_advance(stream)) {
|
||||
stream_reset(stream, string_content - 1);
|
||||
return TSQueryErrorSyntax;
|
||||
}
|
||||
}
|
||||
uint32_t length = stream->input - string_content;
|
||||
|
||||
// Add a step for the node
|
||||
uint16_t id = symbol_table_insert_name_with_escapes(
|
||||
TSQueryError e = ts_query__parse_string_literal(self, stream);
|
||||
if (e) return e;
|
||||
uint16_t id = symbol_table_insert_name(
|
||||
&self->predicate_values,
|
||||
string_content,
|
||||
length
|
||||
self->string_buffer.contents,
|
||||
self->string_buffer.size
|
||||
);
|
||||
array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
|
||||
.type = TSQueryPredicateStepTypeString,
|
||||
.value_id = id,
|
||||
}));
|
||||
|
||||
if (stream->next != '"') return TSQueryErrorSyntax;
|
||||
stream_advance(stream);
|
||||
}
|
||||
|
||||
// Parse a bare symbol
|
||||
|
|
@ -1761,33 +1727,22 @@ static TSQueryError ts_query__parse_pattern(
|
|||
|
||||
// Parse a double-quoted anonymous leaf node expression
|
||||
else if (stream->next == '"') {
|
||||
stream_advance(stream);
|
||||
|
||||
// Parse the string content
|
||||
const char *string_content = stream->input;
|
||||
while (stream->next != '"') {
|
||||
if (!stream_advance(stream)) {
|
||||
stream_reset(stream, string_content - 1);
|
||||
return TSQueryErrorSyntax;
|
||||
}
|
||||
}
|
||||
uint32_t length = stream->input - string_content;
|
||||
const char *string_start = stream->input;
|
||||
TSQueryError e = ts_query__parse_string_literal(self, stream);
|
||||
if (e) return e;
|
||||
|
||||
// Add a step for the node
|
||||
TSSymbol symbol = ts_language_symbol_for_name(
|
||||
self->language,
|
||||
string_content,
|
||||
length,
|
||||
self->string_buffer.contents,
|
||||
self->string_buffer.size,
|
||||
false
|
||||
);
|
||||
if (!symbol) {
|
||||
stream_reset(stream, string_content);
|
||||
stream_reset(stream, string_start + 1);
|
||||
return TSQueryErrorNodeType;
|
||||
}
|
||||
array_push(&self->steps, query_step__new(symbol, depth, is_immediate));
|
||||
|
||||
if (stream->next != '"') return TSQueryErrorSyntax;
|
||||
stream_advance(stream);
|
||||
}
|
||||
|
||||
// Parse a field-prefixed pattern
|
||||
|
|
@ -1977,6 +1932,7 @@ TSQuery *ts_query_new(
|
|||
.predicate_steps = array_new(),
|
||||
.patterns = array_new(),
|
||||
.step_offsets = array_new(),
|
||||
.string_buffer = array_new(),
|
||||
.symbol_map = symbol_map,
|
||||
.wildcard_root_pattern_count = 0,
|
||||
.language = language,
|
||||
|
|
@ -2056,6 +2012,7 @@ TSQuery *ts_query_new(
|
|||
}
|
||||
|
||||
ts_query__finalize_steps(self);
|
||||
array_delete(&self->string_buffer);
|
||||
return self;
|
||||
}
|
||||
|
||||
|
|
@ -2066,6 +2023,7 @@ void ts_query_delete(TSQuery *self) {
|
|||
array_delete(&self->predicate_steps);
|
||||
array_delete(&self->patterns);
|
||||
array_delete(&self->step_offsets);
|
||||
array_delete(&self->string_buffer);
|
||||
symbol_table_delete(&self->captures);
|
||||
symbol_table_delete(&self->predicate_values);
|
||||
ts_free(self->symbol_map);
|
||||
|
|
|
|||
|
|
@ -330,7 +330,7 @@ void ts_tree_cursor_current_status(
|
|||
}
|
||||
}
|
||||
|
||||
#undef subtree_metadata
|
||||
#undef subtree_symbol
|
||||
|
||||
if (!ts_subtree_extra(*entry->subtree)) {
|
||||
const TSFieldMapEntry *field_map, *field_map_end;
|
||||
|
|
@ -345,7 +345,6 @@ void ts_tree_cursor_current_status(
|
|||
for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) {
|
||||
if (!i->inherited && i->child_index == entry->structural_child_index) {
|
||||
*field_id = i->field_id;
|
||||
*can_have_later_siblings_with_this_field = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -354,9 +353,14 @@ void ts_tree_cursor_current_status(
|
|||
// Determine if the current node can have later siblings with the same field name.
|
||||
if (*field_id) {
|
||||
for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) {
|
||||
if (i->field_id == *field_id && i->child_index > entry->structural_child_index) {
|
||||
*can_have_later_siblings_with_this_field = true;
|
||||
break;
|
||||
if (i->field_id == *field_id) {
|
||||
if (
|
||||
i->child_index > entry->structural_child_index ||
|
||||
(i->child_index == entry->structural_child_index && *has_later_named_siblings)
|
||||
) {
|
||||
*can_have_later_siblings_with_this_field = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
8
test/fixtures/error_corpus/readme.md
vendored
Normal file
8
test/fixtures/error_corpus/readme.md
vendored
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
The Error Corpus
|
||||
================
|
||||
|
||||
This directory contains corpus tests that exercise error recovery in a variety of languages.
|
||||
|
||||
These corpus tests provide a simple way of asserting that error recoveries are "reasonable" in a variety of situations. But they are also somewhat *overspecified*. It isn't critical that error recovery behaves *exactly* as these tests specify, just that most of the syntax tree is preserved despite the error.
|
||||
|
||||
Sometimes these tests can start failing when changes are pushed to the parser repositories like `tree-sitter-ruby`, `tree-sitter-javascript`, etc. Usually, we just need to tweak the expected syntax tree.
|
||||
2
test/fixtures/error_corpus/ruby_errors.txt
vendored
2
test/fixtures/error_corpus/ruby_errors.txt
vendored
|
|
@ -14,6 +14,6 @@ c
|
|||
method: (identifier)
|
||||
(ERROR (heredoc_beginning))
|
||||
arguments: (argument_list
|
||||
(heredoc_body (heredoc_end))
|
||||
(heredoc_body (heredoc_content) (heredoc_end))
|
||||
(identifier)
|
||||
(MISSING ")"))))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue