Merge remote-tracking branch 'origin/master' into query-testy

This commit is contained in:
Patrick Thomson 2020-11-10 16:16:53 -05:00
commit c9c886d971
19 changed files with 721 additions and 459 deletions

4
Cargo.lock generated
View file

@ -824,7 +824,7 @@ dependencies = [
[[package]]
name = "tree-sitter"
version = "0.17.0"
version = "0.17.1"
dependencies = [
"cc",
"regex",
@ -832,7 +832,7 @@ dependencies = [
[[package]]
name = "tree-sitter-cli"
version = "0.17.1"
version = "0.17.3"
dependencies = [
"ansi_term",
"atty",

View file

@ -1,7 +1,7 @@
[package]
name = "tree-sitter-cli"
description = "CLI tool for developing, testing, and using Tree-sitter parsers"
version = "0.17.1"
version = "0.17.3"
authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
edition = "2018"
license = "MIT"

View file

@ -1,6 +1,6 @@
{
"name": "tree-sitter-cli",
"version": "0.17.1",
"version": "0.17.3",
"author": "Max Brunsfeld",
"license": "MIT",
"repository": {

View file

@ -146,7 +146,7 @@ impl ChildQuantity {
pub(crate) fn get_variable_info(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
simple_aliases: &AliasMap,
default_aliases: &AliasMap,
) -> Result<Vec<VariableInfo>> {
let child_type_is_visible = |t: &ChildType| {
variable_type_for_child_type(t, syntax_grammar, lexical_grammar) >= VariableType::Anonymous
@ -185,7 +185,7 @@ pub(crate) fn get_variable_info(
let child_symbol = step.symbol;
let child_type = if let Some(alias) = &step.alias {
ChildType::Aliased(alias.clone())
} else if let Some(alias) = simple_aliases.get(&step.symbol) {
} else if let Some(alias) = default_aliases.get(&step.symbol) {
ChildType::Aliased(alias.clone())
} else {
ChildType::Normal(child_symbol)
@ -358,7 +358,7 @@ pub(crate) fn get_variable_info(
pub(crate) fn generate_node_types_json(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
simple_aliases: &AliasMap,
default_aliases: &AliasMap,
variable_info: &Vec<VariableInfo>,
) -> Vec<NodeInfoJSON> {
let mut node_types_json = BTreeMap::new();
@ -369,7 +369,7 @@ pub(crate) fn generate_node_types_json(
named: alias.is_named,
},
ChildType::Normal(symbol) => {
if let Some(alias) = simple_aliases.get(&symbol) {
if let Some(alias) = default_aliases.get(&symbol) {
NodeTypeJSON {
kind: alias.value.clone(),
named: alias.is_named,
@ -417,22 +417,33 @@ pub(crate) fn generate_node_types_json(
};
let mut aliases_by_symbol = HashMap::new();
for (symbol, alias) in simple_aliases {
for (symbol, alias) in default_aliases {
aliases_by_symbol.insert(*symbol, {
let mut aliases = HashSet::new();
aliases.insert(Some(alias.clone()));
aliases
});
}
for extra_symbol in &syntax_grammar.extra_symbols {
if !default_aliases.contains_key(extra_symbol) {
aliases_by_symbol
.entry(*extra_symbol)
.or_insert(HashSet::new())
.insert(None);
}
}
for variable in &syntax_grammar.variables {
for production in &variable.productions {
for step in &production.steps {
if !simple_aliases.contains_key(&step.symbol) {
aliases_by_symbol
.entry(step.symbol)
.or_insert(HashSet::new())
.insert(step.alias.clone());
}
aliases_by_symbol
.entry(step.symbol)
.or_insert(HashSet::new())
.insert(
step.alias
.as_ref()
.or_else(|| default_aliases.get(&step.symbol))
.cloned(),
);
}
}
}
@ -722,9 +733,18 @@ mod tests {
kind: VariableType::Named,
rule: Rule::string("x"),
},
// This rule is not reachable from the start symbol
// so it won't be present in the node_types
Variable {
name: "v3".to_string(),
kind: VariableType::Named,
rule: Rule::string("y"),
},
],
});
assert_eq!(node_types.len(), 3);
assert_eq!(
node_types[0],
NodeInfoJSON {
@ -784,6 +804,112 @@ mod tests {
);
}
#[test]
fn test_node_types_simple_extras() {
let node_types = get_node_types(InputGrammar {
name: String::new(),
extra_symbols: vec![Rule::named("v3")],
external_tokens: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
word_token: None,
supertype_symbols: vec![],
variables: vec![
Variable {
name: "v1".to_string(),
kind: VariableType::Named,
rule: Rule::seq(vec![
Rule::field("f1".to_string(), Rule::named("v2")),
Rule::field("f2".to_string(), Rule::string(";")),
]),
},
Variable {
name: "v2".to_string(),
kind: VariableType::Named,
rule: Rule::string("x"),
},
// This rule is not reachable from the start symbol, but
// it is reachable from the 'extra_symbols' so it
// should be present in the node_types
Variable {
name: "v3".to_string(),
kind: VariableType::Named,
rule: Rule::string("y"),
},
],
});
assert_eq!(node_types.len(), 4);
assert_eq!(
node_types[0],
NodeInfoJSON {
kind: "v1".to_string(),
named: true,
subtypes: None,
children: None,
fields: Some(
vec![
(
"f1".to_string(),
FieldInfoJSON {
multiple: false,
required: true,
types: vec![NodeTypeJSON {
kind: "v2".to_string(),
named: true,
}]
}
),
(
"f2".to_string(),
FieldInfoJSON {
multiple: false,
required: true,
types: vec![NodeTypeJSON {
kind: ";".to_string(),
named: false,
}]
}
),
]
.into_iter()
.collect()
)
}
);
assert_eq!(
node_types[1],
NodeInfoJSON {
kind: ";".to_string(),
named: false,
subtypes: None,
children: None,
fields: None
}
);
assert_eq!(
node_types[2],
NodeInfoJSON {
kind: "v2".to_string(),
named: true,
subtypes: None,
children: None,
fields: None
}
);
assert_eq!(
node_types[3],
NodeInfoJSON {
kind: "v3".to_string(),
named: true,
subtypes: None,
children: None,
fields: None
}
);
}
#[test]
fn test_node_types_with_supertypes() {
let node_types = get_node_types(InputGrammar {
@ -1685,14 +1811,14 @@ mod tests {
}
fn get_node_types(grammar: InputGrammar) -> Vec<NodeInfoJSON> {
let (syntax_grammar, lexical_grammar, _, simple_aliases) =
let (syntax_grammar, lexical_grammar, _, default_aliases) =
prepare_grammar(&grammar).unwrap();
let variable_info =
get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases).unwrap();
get_variable_info(&syntax_grammar, &lexical_grammar, &default_aliases).unwrap();
generate_node_types_json(
&syntax_grammar,
&lexical_grammar,
&simple_aliases,
&default_aliases,
&variable_info,
)
}

View file

@ -0,0 +1,293 @@
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType};
#[derive(Clone, Default)]
struct SymbolStatus {
aliases: Vec<(Alias, usize)>,
appears_unaliased: bool,
}
// Update the grammar by finding symbols that always are aliased, and for each such symbol,
// promoting one of its aliases to a "default alias", which is applied globally instead
// of in a context-specific way.
//
// This has two benefits:
// * It reduces the overhead of storing production-specific alias info in the parse table.
// * Within an `ERROR` node, no context-specific aliases will be applied. This transformation
// ensures that the children of an `ERROR` node have symbols that are consistent with the
// way that they would appear in a valid syntax tree.
pub(super) fn extract_default_aliases(
syntax_grammar: &mut SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
) -> AliasMap {
let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()];
let mut non_terminal_status_list =
vec![SymbolStatus::default(); syntax_grammar.variables.len()];
let mut external_status_list =
vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()];
// For each grammar symbol, find all of the aliases under which the symbol appears,
// and determine whether or not the symbol ever appears *unaliased*.
for variable in syntax_grammar.variables.iter() {
for production in variable.productions.iter() {
for step in production.steps.iter() {
let mut status = match step.symbol.kind {
SymbolType::External => &mut external_status_list[step.symbol.index],
SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index],
SymbolType::Terminal => &mut terminal_status_list[step.symbol.index],
SymbolType::End => panic!("Unexpected end token"),
};
// Default aliases don't work for inlined variables.
if syntax_grammar.variables_to_inline.contains(&step.symbol) {
continue;
}
if let Some(alias) = &step.alias {
if let Some(count_for_alias) = status
.aliases
.iter_mut()
.find_map(|(a, count)| if a == alias { Some(count) } else { None })
{
*count_for_alias += 1;
} else {
status.aliases.push((alias.clone(), 1));
}
} else {
status.appears_unaliased = true;
}
}
}
}
let symbols_with_statuses = (terminal_status_list
.iter_mut()
.enumerate()
.map(|(i, status)| (Symbol::terminal(i), status)))
.chain(
non_terminal_status_list
.iter_mut()
.enumerate()
.map(|(i, status)| (Symbol::non_terminal(i), status)),
)
.chain(
external_status_list
.iter_mut()
.enumerate()
.map(|(i, status)| (Symbol::external(i), status)),
);
// For each symbol that always appears aliased, find the alias the occurs most often,
// and designate that alias as the symbol's "default alias". Store all of these
// default aliases in a map that will be returned.
let mut result = AliasMap::new();
for (symbol, status) in symbols_with_statuses {
if status.appears_unaliased {
status.aliases.clear();
} else {
if let Some(default_entry) = status
.aliases
.iter()
.enumerate()
.max_by_key(|(i, (_, count))| (count, -(*i as i64)))
.map(|(_, entry)| entry.clone())
{
status.aliases.clear();
status.aliases.push(default_entry.clone());
result.insert(symbol, default_entry.0);
}
}
}
// Wherever a symbol is aliased as its default alias, remove the usage of the alias,
// because it will now be redundant.
let mut alias_positions_to_clear = Vec::new();
for variable in syntax_grammar.variables.iter_mut() {
alias_positions_to_clear.clear();
for (i, production) in variable.productions.iter().enumerate() {
for (j, step) in production.steps.iter().enumerate() {
let status = match step.symbol.kind {
SymbolType::External => &mut external_status_list[step.symbol.index],
SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index],
SymbolType::Terminal => &mut terminal_status_list[step.symbol.index],
SymbolType::End => panic!("Unexpected end token"),
};
// If this step is aliased as the symbol's default alias, then remove that alias.
if step.alias.is_some()
&& step.alias.as_ref() == status.aliases.get(0).map(|t| &t.0)
{
let mut other_productions_must_use_this_alias_at_this_index = false;
for (other_i, other_production) in variable.productions.iter().enumerate() {
if other_i != i
&& other_production.steps.len() > j
&& other_production.steps[j].alias == step.alias
&& result.get(&other_production.steps[j].symbol) != step.alias.as_ref()
{
other_productions_must_use_this_alias_at_this_index = true;
break;
}
}
if !other_productions_must_use_this_alias_at_this_index {
alias_positions_to_clear.push((i, j));
}
}
}
}
for (production_index, step_index) in &alias_positions_to_clear {
variable.productions[*production_index].steps[*step_index].alias = None;
}
}
result
}
#[cfg(test)]
mod tests {
use super::*;
use crate::generate::grammars::{
LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType,
};
use crate::generate::nfa::Nfa;
#[test]
fn test_extract_simple_aliases() {
let mut syntax_grammar = SyntaxGrammar {
variables: vec![
SyntaxVariable {
name: "v1".to_owned(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true),
],
}],
},
SyntaxVariable {
name: "v2".to_owned(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
// Token 0 is always aliased as "a1".
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
// Token 1 is aliased within rule `v1` above, but not here.
ProductionStep::new(Symbol::terminal(1)),
// Token 2 is aliased differently here than in `v1`. The alias from
// `v1` should be promoted to the default alias, because `v1` appears
// first in the grammar.
ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true),
// Token 3 is also aliased differently here than in `v1`. In this case,
// this alias should be promoted to the default alias, because it is
// used a greater number of times (twice).
ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true),
ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true),
],
}],
},
],
extra_symbols: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
supertype_symbols: Vec::new(),
external_tokens: Vec::new(),
word_token: None,
};
let lexical_grammar = LexicalGrammar {
nfa: Nfa::new(),
variables: vec![
LexicalVariable {
name: "t0".to_string(),
kind: VariableType::Anonymous,
implicit_precedence: 0,
start_state: 0,
},
LexicalVariable {
name: "t1".to_string(),
kind: VariableType::Anonymous,
implicit_precedence: 0,
start_state: 0,
},
LexicalVariable {
name: "t2".to_string(),
kind: VariableType::Anonymous,
implicit_precedence: 0,
start_state: 0,
},
LexicalVariable {
name: "t3".to_string(),
kind: VariableType::Anonymous,
implicit_precedence: 0,
start_state: 0,
},
],
};
let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar);
assert_eq!(default_aliases.len(), 3);
assert_eq!(
default_aliases.get(&Symbol::terminal(0)),
Some(&Alias {
value: "a1".to_string(),
is_named: true,
})
);
assert_eq!(
default_aliases.get(&Symbol::terminal(2)),
Some(&Alias {
value: "a3".to_string(),
is_named: true,
})
);
assert_eq!(
default_aliases.get(&Symbol::terminal(3)),
Some(&Alias {
value: "a6".to_string(),
is_named: true,
})
);
assert_eq!(default_aliases.get(&Symbol::terminal(1)), None);
assert_eq!(
syntax_grammar.variables,
vec![
SyntaxVariable {
name: "v1".to_owned(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
ProductionStep::new(Symbol::terminal(2)),
ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true),
],
},],
},
SyntaxVariable {
name: "v2".to_owned(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::terminal(1)),
ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true),
ProductionStep::new(Symbol::terminal(3)),
ProductionStep::new(Symbol::terminal(3)),
],
},],
},
]
);
}
}

View file

@ -1,223 +0,0 @@
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType};
#[derive(Clone, Default)]
struct SymbolStatus {
alias: Option<Alias>,
conflicting: bool,
}
pub(super) fn extract_simple_aliases(
syntax_grammar: &mut SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
) -> AliasMap {
// Determine which symbols in the grammars are *always* aliased to a single name.
let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()];
let mut non_terminal_status_list =
vec![SymbolStatus::default(); syntax_grammar.variables.len()];
let mut external_status_list =
vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()];
for variable in syntax_grammar.variables.iter() {
for production in variable.productions.iter() {
for step in production.steps.iter() {
let mut status = match step.symbol {
Symbol {
kind: SymbolType::External,
index,
} => &mut external_status_list[index],
Symbol {
kind: SymbolType::NonTerminal,
index,
} => &mut non_terminal_status_list[index],
Symbol {
kind: SymbolType::Terminal,
index,
} => &mut terminal_status_list[index],
Symbol {
kind: SymbolType::End,
..
} => panic!("Unexpected end token"),
};
if step.alias.is_none() {
status.alias = None;
status.conflicting = true;
}
if !status.conflicting {
if status.alias.is_none() {
status.alias = step.alias.clone();
} else if status.alias != step.alias {
status.alias = None;
status.conflicting = true;
}
}
}
}
}
// Remove the aliases for those symbols.
for variable in syntax_grammar.variables.iter_mut() {
for production in variable.productions.iter_mut() {
for step in production.steps.iter_mut() {
let status = match step.symbol {
Symbol {
kind: SymbolType::External,
index,
} => &external_status_list[index],
Symbol {
kind: SymbolType::NonTerminal,
index,
} => &non_terminal_status_list[index],
Symbol {
kind: SymbolType::Terminal,
index,
} => &terminal_status_list[index],
Symbol {
kind: SymbolType::End,
..
} => panic!("Unexpected end token"),
};
if status.alias.is_some() {
step.alias = None;
}
}
}
}
// Populate a map of the symbols to their aliases.
let mut result = AliasMap::new();
for (i, status) in terminal_status_list.into_iter().enumerate() {
if let Some(alias) = status.alias {
result.insert(Symbol::terminal(i), alias);
}
}
for (i, status) in non_terminal_status_list.into_iter().enumerate() {
if let Some(alias) = status.alias {
result.insert(Symbol::non_terminal(i), alias);
}
}
for (i, status) in external_status_list.into_iter().enumerate() {
if let Some(alias) = status.alias {
result.insert(Symbol::external(i), alias);
}
}
result
}
#[cfg(test)]
mod tests {
use super::*;
use crate::generate::grammars::{
LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType,
};
use crate::generate::nfa::Nfa;
#[test]
fn test_extract_simple_aliases() {
let mut syntax_grammar = SyntaxGrammar {
variables: vec![
SyntaxVariable {
name: "v1".to_owned(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
],
}],
},
SyntaxVariable {
name: "v2".to_owned(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
// Token 0 is always aliased as "a1".
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
// Token 1 is aliased above, but not here.
ProductionStep::new(Symbol::terminal(1)),
// Token 2 is aliased differently than above.
ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true),
],
}],
},
],
extra_symbols: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
supertype_symbols: Vec::new(),
external_tokens: Vec::new(),
word_token: None,
};
let lexical_grammar = LexicalGrammar {
nfa: Nfa::new(),
variables: vec![
LexicalVariable {
name: "t1".to_string(),
kind: VariableType::Anonymous,
implicit_precedence: 0,
start_state: 0,
},
LexicalVariable {
name: "t2".to_string(),
kind: VariableType::Anonymous,
implicit_precedence: 0,
start_state: 0,
},
LexicalVariable {
name: "t3".to_string(),
kind: VariableType::Anonymous,
implicit_precedence: 0,
start_state: 0,
},
],
};
let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar);
assert_eq!(simple_aliases.len(), 1);
assert_eq!(
simple_aliases[&Symbol::terminal(0)],
Alias {
value: "a1".to_string(),
is_named: true,
}
);
assert_eq!(
syntax_grammar.variables,
vec![
SyntaxVariable {
name: "v1".to_owned(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
// 'Simple' alias removed
ProductionStep::new(Symbol::terminal(0)),
// Other aliases unchanged
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
],
},],
},
SyntaxVariable {
name: "v2".to_owned(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::terminal(1)),
ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true),
],
},],
},
]
);
}
}

View file

@ -1,6 +1,6 @@
mod expand_repeats;
mod expand_tokens;
mod extract_simple_aliases;
mod extract_default_aliases;
mod extract_tokens;
mod flatten_grammar;
mod intern_symbols;
@ -8,7 +8,7 @@ mod process_inlines;
use self::expand_repeats::expand_repeats;
pub(crate) use self::expand_tokens::expand_tokens;
use self::extract_simple_aliases::extract_simple_aliases;
use self::extract_default_aliases::extract_default_aliases;
use self::extract_tokens::extract_tokens;
use self::flatten_grammar::flatten_grammar;
use self::intern_symbols::intern_symbols;
@ -52,7 +52,7 @@ pub(crate) fn prepare_grammar(
let syntax_grammar = expand_repeats(syntax_grammar);
let mut syntax_grammar = flatten_grammar(syntax_grammar)?;
let lexical_grammar = expand_tokens(lexical_grammar)?;
let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar);
let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar);
let inlines = process_inlines(&syntax_grammar);
Ok((syntax_grammar, lexical_grammar, inlines, simple_aliases))
Ok((syntax_grammar, lexical_grammar, inlines, default_aliases))
}

View file

@ -65,7 +65,7 @@ struct Generator {
keyword_capture_token: Option<Symbol>,
syntax_grammar: SyntaxGrammar,
lexical_grammar: LexicalGrammar,
simple_aliases: AliasMap,
default_aliases: AliasMap,
symbol_order: HashMap<Symbol, usize>,
symbol_ids: HashMap<Symbol, String>,
alias_ids: HashMap<Alias, String>,
@ -143,49 +143,6 @@ impl Generator {
self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers);
}
let mut field_names = Vec::new();
for production_info in &self.parse_table.production_infos {
for field_name in production_info.field_map.keys() {
field_names.push(field_name);
}
for alias in &production_info.alias_sequence {
if let Some(alias) = &alias {
let alias_kind = alias.kind();
let matching_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| {
let (name, kind) = self.metadata_for_symbol(*symbol);
name == alias.value && kind == alias_kind
});
let alias_id = if let Some(symbol) = matching_symbol {
self.symbol_ids[&symbol].clone()
} else if alias.is_named {
format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
} else {
format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
};
self.alias_ids.entry(alias.clone()).or_insert(alias_id);
}
}
}
self.unique_aliases = self
.alias_ids
.keys()
.filter(|alias| {
self.parse_table
.symbols
.iter()
.cloned()
.find(|symbol| {
let (name, kind) = self.metadata_for_symbol(*symbol);
name == alias.value && kind == alias.kind()
})
.is_none()
})
.cloned()
.collect();
self.unique_aliases.sort_unstable();
self.symbol_map = self
.parse_table
.symbols
@ -198,10 +155,10 @@ impl Generator {
// public-facing symbol. If one of the symbols is not aliased, choose that one
// to be the public-facing symbol. Otherwise, pick the symbol with the lowest
// numeric value.
if let Some(alias) = self.simple_aliases.get(symbol) {
if let Some(alias) = self.default_aliases.get(symbol) {
let kind = alias.kind();
for other_symbol in &self.parse_table.symbols {
if let Some(other_alias) = self.simple_aliases.get(other_symbol) {
if let Some(other_alias) = self.default_aliases.get(other_symbol) {
if other_symbol < mapping && other_alias == alias {
mapping = other_symbol;
}
@ -230,13 +187,51 @@ impl Generator {
})
.collect();
field_names.sort_unstable();
field_names.dedup();
self.field_names = field_names.into_iter().cloned().collect();
for production_info in &self.parse_table.production_infos {
// Build a list of all field names
for field_name in production_info.field_map.keys() {
if let Err(i) = self.field_names.binary_search(&field_name) {
self.field_names.insert(i, field_name.clone());
}
}
// If we are opting in to the new unstable language ABI, then use the concept of
// "small parse states". Otherwise, use the same representation for all parse
// states.
for alias in &production_info.alias_sequence {
// Generate a mapping from aliases to C identifiers.
if let Some(alias) = &alias {
let existing_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| {
if let Some(default_alias) = self.default_aliases.get(symbol) {
default_alias == alias
} else {
let (name, kind) = self.metadata_for_symbol(*symbol);
name == alias.value && kind == alias.kind()
}
});
// Some aliases match an existing symbol in the grammar.
let alias_id;
if let Some(existing_symbol) = existing_symbol {
alias_id = self.symbol_ids[&self.symbol_map[&existing_symbol]].clone();
}
// Other aliases don't match any existing symbol, and need their own identifiers.
else {
if let Err(i) = self.unique_aliases.binary_search(alias) {
self.unique_aliases.insert(i, alias.clone());
}
alias_id = if alias.is_named {
format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
} else {
format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
};
}
self.alias_ids.entry(alias.clone()).or_insert(alias_id);
}
}
}
// Determine which states should use the "small state" representation, and which should
// use the normal array representation.
let threshold = cmp::min(SMALL_STATE_THRESHOLD, self.parse_table.symbols.len() / 2);
self.large_state_count = self
.parse_table
@ -361,7 +356,7 @@ impl Generator {
indent!(self);
for symbol in self.parse_table.symbols.iter() {
let name = self.sanitize_string(
self.simple_aliases
self.default_aliases
.get(symbol)
.map(|alias| alias.value.as_str())
.unwrap_or(self.metadata_for_symbol(*symbol).0),
@ -444,7 +439,7 @@ impl Generator {
for symbol in &self.parse_table.symbols {
add_line!(self, "[{}] = {{", self.symbol_ids[&symbol]);
indent!(self);
if let Some(Alias { is_named, .. }) = self.simple_aliases.get(symbol) {
if let Some(Alias { is_named, .. }) = self.default_aliases.get(symbol) {
add_line!(self, ".visible = true,");
add_line!(self, ".named = {},", is_named);
} else {
@ -519,19 +514,22 @@ impl Generator {
}
fn add_non_terminal_alias_map(&mut self) {
let mut aliases_by_symbol = HashMap::new();
let mut alias_ids_by_symbol = HashMap::new();
for variable in &self.syntax_grammar.variables {
for production in &variable.productions {
for step in &production.steps {
if let Some(alias) = &step.alias {
if step.symbol.is_non_terminal()
&& !self.simple_aliases.contains_key(&step.symbol)
&& Some(alias) != self.default_aliases.get(&step.symbol)
{
if self.symbol_ids.contains_key(&step.symbol) {
let alias_ids =
aliases_by_symbol.entry(step.symbol).or_insert(Vec::new());
if let Err(i) = alias_ids.binary_search(&alias) {
alias_ids.insert(i, alias);
if let Some(alias_id) = self.alias_ids.get(&alias) {
let alias_ids = alias_ids_by_symbol
.entry(step.symbol)
.or_insert(Vec::new());
if let Err(i) = alias_ids.binary_search(&alias_id) {
alias_ids.insert(i, alias_id);
}
}
}
}
@ -540,19 +538,19 @@ impl Generator {
}
}
let mut aliases_by_symbol = aliases_by_symbol.iter().collect::<Vec<_>>();
aliases_by_symbol.sort_unstable_by_key(|e| e.0);
let mut alias_ids_by_symbol = alias_ids_by_symbol.iter().collect::<Vec<_>>();
alias_ids_by_symbol.sort_unstable_by_key(|e| e.0);
add_line!(self, "static uint16_t ts_non_terminal_alias_map[] = {{");
indent!(self);
for (symbol, aliases) in aliases_by_symbol {
for (symbol, alias_ids) in alias_ids_by_symbol {
let symbol_id = &self.symbol_ids[symbol];
let public_symbol_id = &self.symbol_ids[&self.symbol_map[&symbol]];
add_line!(self, "{}, {},", symbol_id, 1 + aliases.len());
add_line!(self, "{}, {},", symbol_id, 1 + alias_ids.len());
indent!(self);
add_line!(self, "{},", public_symbol_id);
for alias in aliases {
add_line!(self, "{},", &self.alias_ids[&alias]);
for alias_id in alias_ids {
add_line!(self, "{},", alias_id);
}
dedent!(self);
}
@ -1545,7 +1543,7 @@ impl Generator {
/// for keyword capture, if any.
/// * `syntax_grammar` - The syntax grammar extracted from the language's grammar
/// * `lexical_grammar` - The lexical grammar extracted from the language's grammar
/// * `simple_aliases` - A map describing the global rename rules that should apply.
/// * `default_aliases` - A map describing the global rename rules that should apply.
/// the keys are symbols that are *always* aliased in the same way, and the values
/// are the aliases that are applied to those symbols.
/// * `next_abi` - A boolean indicating whether to opt into the new, unstable parse
@ -1558,7 +1556,7 @@ pub(crate) fn render_c_code(
keyword_capture_token: Option<Symbol>,
syntax_grammar: SyntaxGrammar,
lexical_grammar: LexicalGrammar,
simple_aliases: AliasMap,
default_aliases: AliasMap,
next_abi: bool,
) -> String {
Generator {
@ -1572,7 +1570,7 @@ pub(crate) fn render_c_code(
keyword_capture_token,
syntax_grammar,
lexical_grammar,
simple_aliases,
default_aliases,
symbol_ids: HashMap::new(),
symbol_order: HashMap::new(),
alias_ids: HashMap::new(),

View file

@ -367,6 +367,30 @@ fn test_query_errors_on_impossible_patterns() {
});
}
#[test]
fn test_query_verifies_possible_patterns_with_aliased_parent_nodes() {
allocations::record(|| {
let ruby = get_language("ruby");
Query::new(ruby, "(destructured_parameter (identifier))").unwrap();
assert_eq!(
Query::new(ruby, "(destructured_parameter (string))",),
Err(QueryError {
kind: QueryErrorKind::Structure,
row: 0,
offset: 24,
column: 24,
message: [
"(destructured_parameter (string))", //
" ^",
]
.join("\n")
})
);
});
}
#[test]
fn test_query_matches_with_simple_pattern() {
allocations::record(|| {
@ -1451,6 +1475,7 @@ fn test_query_matches_with_anonymous_tokens() {
r#"
";" @punctuation
"&&" @operator
"\"" @quote
"#,
)
.unwrap();
@ -1458,9 +1483,11 @@ fn test_query_matches_with_anonymous_tokens() {
assert_query_matches(
language,
&query,
"foo(a && b);",
r#"foo(a && "b");"#,
&[
(1, vec![("operator", "&&")]),
(2, vec![("quote", "\"")]),
(2, vec![("quote", "\"")]),
(0, vec![("punctuation", ";")]),
],
);
@ -1808,6 +1835,33 @@ fn test_query_matches_with_no_captures() {
});
}
#[test]
fn test_query_matches_with_repeated_fields() {
allocations::record(|| {
let language = get_language("c");
let query = Query::new(
language,
"(field_declaration declarator: (field_identifier) @field)",
)
.unwrap();
assert_query_matches(
language,
&query,
"
struct S {
int a, b, c;
}
",
&[
(0, vec![("field", "a")]),
(0, vec![("field", "b")]),
(0, vec![("field", "c")]),
],
);
});
}
#[test]
fn test_query_captures_basic() {
allocations::record(|| {

View file

@ -34,6 +34,7 @@ Parsers for these languages are fairly complete:
* [Elm](https://github.com/razzeee/tree-sitter-elm)
* [Eno](https://github.com/eno-lang/tree-sitter-eno)
* [ERB / EJS](https://github.com/tree-sitter/tree-sitter-embedded-template)
- [Fennel](https://github.com/travonted/tree-sitter-fennel)
* [Go](https://github.com/tree-sitter/tree-sitter-go)
* [HTML](https://github.com/tree-sitter/tree-sitter-html)
* [Java](https://github.com/tree-sitter/tree-sitter-java)
@ -49,6 +50,7 @@ Parsers for these languages are fairly complete:
* [TOML](https://github.com/ikatyang/tree-sitter-toml)
* [TypeScript](https://github.com/tree-sitter/tree-sitter-typescript)
* [Verilog](https://github.com/tree-sitter/tree-sitter-verilog)
* [VHDL](https://github.com/alemuller/tree-sitter-vhdl)
* [Vue](https://github.com/ikatyang/tree-sitter-vue)
* [YAML](https://github.com/ikatyang/tree-sitter-yaml)
* [WASM](https://github.com/wasm-lsp/tree-sitter-wasm)

View file

@ -210,6 +210,7 @@ The following is a complete list of built-in functions you can use in your `gram
* **Right Associativity : `prec.right([number], rule)`** - This function is like `prec.left`, but it instructs Tree-sitter to prefer matching a rule that ends *later*.
* **Dynamic Precedence : `prec.dynamic(number, rule)`** - This function is similar to `prec`, but the given numerical precedence is applied at *runtime* instead of at parser generation time. This is only necessary when handling a conflict dynamically using the `conflicts` field in the grammar, and when there is a genuine *ambiguity*: multiple rules correctly match a given piece of code. In that event, Tree-sitter compares the total dynamic precedence associated with each rule, and selects the one with the highest total. This is similar to [dynamic precedence directives][bison-dprec] in Bison grammars.
* **Tokens : `token(rule)`** - This function marks the given rule as producing only a single token. Tree-sitter's default is to treat each String or RegExp literal in the grammar as a separate token. Each token is matched separately by the lexer and returned as its own leaf node in the tree. The `token` function allows you to express a complex rule using the functions described above (rather than as a single regular expression) but still have Tree-sitter treat it as a single token.
* **Immediate Tokens : `token.immediate(rule)`** - Usually, whitespace (and any other extras, such as comments) is optional before each token. This function means that the token will only match if there is no whitespace.
* **Aliases : `alias(rule, name)`** - This function causes the given rule to *appear* with an alternative name in the syntax tree. If `name` is a *symbol*, as in `alias($.foo, $.bar)`, then the aliased rule will *appear* as a [named node][named-vs-anonymous-nodes-section] called `bar`. And if `name` is a *string literal*, as in `alias($.foo, 'bar')`, then the aliased rule will appear as an [anonymous node][named-vs-anonymous-nodes-section], as if the rule had been written as the simple string.
* **Field Names : `field(name, rule)`** - This function assigns a *field name* to the child node(s) matched by the given rule. In the resulting syntax tree, you can then use that field name to access specific children.

View file

@ -1,7 +1,7 @@
[package]
name = "tree-sitter"
description = "Rust bindings to the Tree-sitter parsing library"
version = "0.17.0"
version = "0.17.1"
authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
license = "MIT"
readme = "binding_rust/README.md"

View file

@ -37,7 +37,7 @@ declare module 'web-tree-sitter' {
export type Logger = (
message: string,
params: {[param: string]: string},
params: { [param: string]: string },
type: "parse" | "lex"
) => void;
@ -48,9 +48,9 @@ declare module 'web-tree-sitter' {
) => string | null;
export interface SyntaxNode {
id: number;
tree: Tree;
type: string;
isNamed: boolean;
text: string;
startPosition: Point;
endPosition: Point;
@ -74,6 +74,7 @@ declare module 'web-tree-sitter' {
hasError(): boolean;
equals(other: SyntaxNode): boolean;
isMissing(): boolean;
isNamed(): boolean;
toString(): string;
child(index: number): SyntaxNode | null;
namedChild(index: number): SyntaxNode | null;
@ -131,8 +132,33 @@ declare module 'web-tree-sitter' {
readonly version: number;
readonly fieldCount: number;
fieldNameForId(fieldId: number): string | null
fieldIdForName(fieldName: string): number | null
fieldNameForId(fieldId: number): string | null;
fieldIdForName(fieldName: string): number | null;
query(source: string): Query;
}
interface QueryCapture {
name: string;
node: SyntaxNode;
}
interface QueryMatch {
pattern: number;
captures: QueryCapture[];
}
interface PredicateResult {
operator: string;
operands: { name: string; type: string }[];
}
class Query {
captureNames: string[];
delete(): void;
matches(node: SyntaxNode, startPosition?: Point, endPosition?: Point): QueryMatch[];
captures(node: SyntaxNode, startPosition?: Point, endPosition?: Point): QueryCapture[];
predicatesForPattern(patternIndex: number): PredicateResult[];
}
}

View file

@ -17,24 +17,28 @@ void *ts_record_realloc(void *, size_t);
void ts_record_free(void *);
bool ts_toggle_allocation_recording(bool);
static inline void *ts_malloc(size_t size) {
return ts_record_malloc(size);
}
static inline void *ts_calloc(size_t count, size_t size) {
return ts_record_calloc(count, size);
}
static inline void *ts_realloc(void *buffer, size_t size) {
return ts_record_realloc(buffer, size);
}
static inline void ts_free(void *buffer) {
ts_record_free(buffer);
}
#define ts_malloc ts_record_malloc
#define ts_calloc ts_record_calloc
#define ts_realloc ts_record_realloc
#define ts_free ts_record_free
#else
// Allow clients to override allocation functions
#ifndef ts_malloc
#define ts_malloc ts_malloc_default
#endif
#ifndef ts_calloc
#define ts_calloc ts_calloc_default
#endif
#ifndef ts_realloc
#define ts_realloc ts_realloc_default
#endif
#ifndef ts_free
#define ts_free ts_free_default
#endif
#include <stdlib.h>
static inline bool ts_toggle_allocation_recording(bool value) {
@ -42,7 +46,8 @@ static inline bool ts_toggle_allocation_recording(bool value) {
return false;
}
static inline void *ts_malloc(size_t size) {
static inline void *ts_malloc_default(size_t size) {
void *result = malloc(size);
if (size > 0 && !result) {
fprintf(stderr, "tree-sitter failed to allocate %zu bytes", size);
@ -51,7 +56,7 @@ static inline void *ts_malloc(size_t size) {
return result;
}
static inline void *ts_calloc(size_t count, size_t size) {
static inline void *ts_calloc_default(size_t count, size_t size) {
void *result = calloc(count, size);
if (count > 0 && !result) {
fprintf(stderr, "tree-sitter failed to allocate %zu bytes", count * size);
@ -60,7 +65,7 @@ static inline void *ts_calloc(size_t count, size_t size) {
return result;
}
static inline void *ts_realloc(void *buffer, size_t size) {
static inline void *ts_realloc_default(void *buffer, size_t size) {
void *result = realloc(buffer, size);
if (size > 0 && !result) {
fprintf(stderr, "tree-sitter failed to reallocate %zu bytes", size);
@ -69,7 +74,7 @@ static inline void *ts_realloc(void *buffer, size_t size) {
return result;
}
static inline void ts_free(void *buffer) {
static inline void ts_free_default(void *buffer) {
free(buffer);
}

View file

@ -52,14 +52,24 @@ extern "C" {
(self)->size += (count))
#define array_push_all(self, other) \
array_splice((self), (self)->size, 0, (other)->size, (other)->contents)
array_extend((self), (other)->size, (other)->contents)
// Append `count` elements to the end of the array, reading their values from the
// `contents` pointer.
#define array_extend(self, count, contents) \
array__splice( \
(VoidArray *)(self), array__elem_size(self), (self)->size, \
0, count, contents \
)
// Remove `old_count` elements from the array starting at the given `index`. At
// the same index, insert `new_count` new elements, reading their values from the
// `new_contents` pointer.
#define array_splice(self, index, old_count, new_count, new_contents) \
array__splice((VoidArray *)(self), array__elem_size(self), index, old_count, \
new_count, new_contents)
#define array_splice(self, index, old_count, new_count, new_contents) \
array__splice( \
(VoidArray *)(self), array__elem_size(self), index, \
old_count, new_count, new_contents \
)
// Insert one `element` into the array at the given `index`.
#define array_insert(self, index, element) \

View file

@ -214,6 +214,7 @@ struct TSQuery {
Array(TSQueryPredicateStep) predicate_steps;
Array(QueryPattern) patterns;
Array(StepOffset) step_offsets;
Array(char) string_buffer;
const TSLanguage *language;
uint16_t wildcard_root_pattern_count;
TSSymbol *symbol_map;
@ -439,67 +440,6 @@ static uint16_t symbol_table_insert_name(
return self->slices.size - 1;
}
static uint16_t symbol_table_insert_name_with_escapes(
SymbolTable *self,
const char *escaped_name,
uint32_t escaped_length
) {
Slice slice = {
.offset = self->characters.size,
.length = 0,
};
array_grow_by(&self->characters, escaped_length + 1);
// Copy the contents of the literal into the characters buffer, processing escape
// sequences like \n and \". This needs to be done before checking if the literal
// is already present, in order to do the string comparison.
bool is_escaped = false;
for (unsigned i = 0; i < escaped_length; i++) {
const char *src = &escaped_name[i];
char *dest = &self->characters.contents[slice.offset + slice.length];
if (is_escaped) {
switch (*src) {
case 'n':
*dest = '\n';
break;
case 'r':
*dest = '\r';
break;
case 't':
*dest = '\t';
break;
case '0':
*dest = '\0';
break;
default:
*dest = *src;
break;
}
is_escaped = false;
slice.length++;
} else {
if (*src == '\\') {
is_escaped = true;
} else {
*dest = *src;
slice.length++;
}
}
}
// If the string is already present, remove the redundant content from the characters
// buffer and return the existing id.
int id = symbol_table_id_for_name(self, &self->characters.contents[slice.offset], slice.length);
if (id >= 0) {
self->characters.size -= (escaped_length + 1);
return id;
}
self->characters.contents[slice.offset + slice.length] = 0;
array_push(&self->slices, slice);
return self->slices.size - 1;
}
/************
* QueryStep
************/
@ -1393,6 +1333,59 @@ static void ts_query__finalize_steps(TSQuery *self) {
}
}
static TSQueryError ts_query__parse_string_literal(
TSQuery *self,
Stream *stream
) {
const char *string_start = stream->input;
if (stream->next != '"') return TSQueryErrorSyntax;
stream_advance(stream);
const char *prev_position = stream->input;
bool is_escaped = false;
array_clear(&self->string_buffer);
for (;;) {
if (is_escaped) {
is_escaped = false;
switch (stream->next) {
case 'n':
array_push(&self->string_buffer, '\n');
break;
case 'r':
array_push(&self->string_buffer, '\r');
break;
case 't':
array_push(&self->string_buffer, '\t');
break;
case '0':
array_push(&self->string_buffer, '\0');
break;
default:
array_extend(&self->string_buffer, stream->next_size, stream->input);
break;
}
prev_position = stream->input + stream->next_size;
} else {
if (stream->next == '\\') {
array_extend(&self->string_buffer, (stream->input - prev_position), prev_position);
prev_position = stream->input + 1;
is_escaped = true;
} else if (stream->next == '"') {
array_extend(&self->string_buffer, (stream->input - prev_position), prev_position);
stream_advance(stream);
return TSQueryErrorNone;
} else if (stream->next == '\n') {
stream_reset(stream, string_start);
return TSQueryErrorSyntax;
}
}
if (!stream_advance(stream)) {
stream_reset(stream, string_start);
return TSQueryErrorSyntax;
}
}
}
// Parse a single predicate associated with a pattern, adding it to the
// query's internal `predicate_steps` array. Predicates are arbitrary
// S-expressions associated with a pattern which are meant to be handled at
@ -1458,44 +1451,17 @@ static TSQueryError ts_query__parse_predicate(
// Parse a string literal
else if (stream->next == '"') {
stream_advance(stream);
// Parse the string content
bool is_escaped = false;
const char *string_content = stream->input;
for (;;) {
if (is_escaped) {
is_escaped = false;
} else {
if (stream->next == '\\') {
is_escaped = true;
} else if (stream->next == '"') {
break;
} else if (stream->next == '\n') {
stream_reset(stream, string_content - 1);
return TSQueryErrorSyntax;
}
}
if (!stream_advance(stream)) {
stream_reset(stream, string_content - 1);
return TSQueryErrorSyntax;
}
}
uint32_t length = stream->input - string_content;
// Add a step for the node
uint16_t id = symbol_table_insert_name_with_escapes(
TSQueryError e = ts_query__parse_string_literal(self, stream);
if (e) return e;
uint16_t id = symbol_table_insert_name(
&self->predicate_values,
string_content,
length
self->string_buffer.contents,
self->string_buffer.size
);
array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
.type = TSQueryPredicateStepTypeString,
.value_id = id,
}));
if (stream->next != '"') return TSQueryErrorSyntax;
stream_advance(stream);
}
// Parse a bare symbol
@ -1761,33 +1727,22 @@ static TSQueryError ts_query__parse_pattern(
// Parse a double-quoted anonymous leaf node expression
else if (stream->next == '"') {
stream_advance(stream);
// Parse the string content
const char *string_content = stream->input;
while (stream->next != '"') {
if (!stream_advance(stream)) {
stream_reset(stream, string_content - 1);
return TSQueryErrorSyntax;
}
}
uint32_t length = stream->input - string_content;
const char *string_start = stream->input;
TSQueryError e = ts_query__parse_string_literal(self, stream);
if (e) return e;
// Add a step for the node
TSSymbol symbol = ts_language_symbol_for_name(
self->language,
string_content,
length,
self->string_buffer.contents,
self->string_buffer.size,
false
);
if (!symbol) {
stream_reset(stream, string_content);
stream_reset(stream, string_start + 1);
return TSQueryErrorNodeType;
}
array_push(&self->steps, query_step__new(symbol, depth, is_immediate));
if (stream->next != '"') return TSQueryErrorSyntax;
stream_advance(stream);
}
// Parse a field-prefixed pattern
@ -1977,6 +1932,7 @@ TSQuery *ts_query_new(
.predicate_steps = array_new(),
.patterns = array_new(),
.step_offsets = array_new(),
.string_buffer = array_new(),
.symbol_map = symbol_map,
.wildcard_root_pattern_count = 0,
.language = language,
@ -2056,6 +2012,7 @@ TSQuery *ts_query_new(
}
ts_query__finalize_steps(self);
array_delete(&self->string_buffer);
return self;
}
@ -2066,6 +2023,7 @@ void ts_query_delete(TSQuery *self) {
array_delete(&self->predicate_steps);
array_delete(&self->patterns);
array_delete(&self->step_offsets);
array_delete(&self->string_buffer);
symbol_table_delete(&self->captures);
symbol_table_delete(&self->predicate_values);
ts_free(self->symbol_map);

View file

@ -330,7 +330,7 @@ void ts_tree_cursor_current_status(
}
}
#undef subtree_metadata
#undef subtree_symbol
if (!ts_subtree_extra(*entry->subtree)) {
const TSFieldMapEntry *field_map, *field_map_end;
@ -345,7 +345,6 @@ void ts_tree_cursor_current_status(
for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) {
if (!i->inherited && i->child_index == entry->structural_child_index) {
*field_id = i->field_id;
*can_have_later_siblings_with_this_field = false;
break;
}
}
@ -354,9 +353,14 @@ void ts_tree_cursor_current_status(
// Determine if the current node can have later siblings with the same field name.
if (*field_id) {
for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) {
if (i->field_id == *field_id && i->child_index > entry->structural_child_index) {
*can_have_later_siblings_with_this_field = true;
break;
if (i->field_id == *field_id) {
if (
i->child_index > entry->structural_child_index ||
(i->child_index == entry->structural_child_index && *has_later_named_siblings)
) {
*can_have_later_siblings_with_this_field = true;
break;
}
}
}
}

8
test/fixtures/error_corpus/readme.md vendored Normal file
View file

@ -0,0 +1,8 @@
The Error Corpus
================
This directory contains corpus tests that exercise error recovery in a variety of languages.
These corpus tests provide a simple way of asserting that error recoveries are "reasonable" in a variety of situations. But they are also somewhat *overspecified*. It isn't critical that error recovery behaves *exactly* as these tests specify, just that most of the syntax tree is preserved despite the error.
Sometimes these tests can start failing when changes are pushed to the parser repositories like `tree-sitter-ruby`, `tree-sitter-javascript`, etc. Usually, we just need to tweak the expected syntax tree.

View file

@ -14,6 +14,6 @@ c
method: (identifier)
(ERROR (heredoc_beginning))
arguments: (argument_list
(heredoc_body (heredoc_end))
(heredoc_body (heredoc_content) (heredoc_end))
(identifier)
(MISSING ")"))))