From 02196f8ae91fa8ff76fed5b7c8ba0c6fc689c673 Mon Sep 17 00:00:00 2001 From: Kenneth Skovhus Date: Sun, 17 May 2020 20:33:53 +0200 Subject: [PATCH 01/22] Correct SyntaxNode isNamed type --- lib/binding_web/tree-sitter-web.d.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/binding_web/tree-sitter-web.d.ts b/lib/binding_web/tree-sitter-web.d.ts index 7ddae952..1d6f3761 100644 --- a/lib/binding_web/tree-sitter-web.d.ts +++ b/lib/binding_web/tree-sitter-web.d.ts @@ -50,7 +50,6 @@ declare module 'web-tree-sitter' { export interface SyntaxNode { tree: Tree; type: string; - isNamed: boolean; text: string; startPosition: Point; endPosition: Point; @@ -74,6 +73,7 @@ declare module 'web-tree-sitter' { hasError(): boolean; equals(other: SyntaxNode): boolean; isMissing(): boolean; + isNamed(): boolean; toString(): string; child(index: number): SyntaxNode | null; namedChild(index: number): SyntaxNode | null; From 45eab0ab24fdc391ffd69da99c09c0a2b2e2fbc3 Mon Sep 17 00:00:00 2001 From: TravonteD Date: Tue, 1 Sep 2020 22:34:45 -0400 Subject: [PATCH 02/22] add link to Fennel parser --- docs/index.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/index.md b/docs/index.md index 8551d1eb..03c1a60c 100644 --- a/docs/index.md +++ b/docs/index.md @@ -34,6 +34,7 @@ Parsers for these languages are fairly complete: * [Elm](https://github.com/razzeee/tree-sitter-elm) * [Eno](https://github.com/eno-lang/tree-sitter-eno) * [ERB / EJS](https://github.com/tree-sitter/tree-sitter-embedded-template) +- [Fennel](https://github.com/travonted/tree-sitter-fennel) * [Go](https://github.com/tree-sitter/tree-sitter-go) * [HTML](https://github.com/tree-sitter/tree-sitter-html) * [Java](https://github.com/tree-sitter/tree-sitter-java) From 43a3f1bbe05387cb692588bd1bd7ebbe6047856a Mon Sep 17 00:00:00 2001 From: Elijah Mooring <45398751+Vehmloewff@users.noreply.github.com> Date: Sat, 19 Sep 2020 11:19:26 -0500 Subject: [PATCH 03/22] Update section-3-creating-parsers.md --- docs/section-3-creating-parsers.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/section-3-creating-parsers.md b/docs/section-3-creating-parsers.md index 694f8dae..4d0befcf 100644 --- a/docs/section-3-creating-parsers.md +++ b/docs/section-3-creating-parsers.md @@ -210,6 +210,7 @@ The following is a complete list of built-in functions you can use in your `gram * **Right Associativity : `prec.right([number], rule)`** - This function is like `prec.left`, but it instructs Tree-sitter to prefer matching a rule that ends *later*. * **Dynamic Precedence : `prec.dynamic(number, rule)`** - This function is similar to `prec`, but the given numerical precedence is applied at *runtime* instead of at parser generation time. This is only necessary when handling a conflict dynamically using the `conflicts` field in the grammar, and when there is a genuine *ambiguity*: multiple rules correctly match a given piece of code. In that event, Tree-sitter compares the total dynamic precedence associated with each rule, and selects the one with the highest total. This is similar to [dynamic precedence directives][bison-dprec] in Bison grammars. * **Tokens : `token(rule)`** - This function marks the given rule as producing only a single token. Tree-sitter's default is to treat each String or RegExp literal in the grammar as a separate token. Each token is matched separately by the lexer and returned as its own leaf node in the tree. The `token` function allows you to express a complex rule using the functions described above (rather than as a single regular expression) but still have Tree-sitter treat it as a single token. +* **Immediate Tokens : `token.immediate(rule)`** - Usually, whitespace (and any other extras, such as comments) is optional before each token. This function means that the token will only match if there is no whitespace. * **Aliases : `alias(rule, name)`** - This function causes the given rule to *appear* with an alternative name in the syntax tree. If `name` is a *symbol*, as in `alias($.foo, $.bar)`, then the aliased rule will *appear* as a [named node][named-vs-anonymous-nodes-section] called `bar`. And if `name` is a *string literal*, as in `alias($.foo, 'bar')`, then the aliased rule will appear as an [anonymous node][named-vs-anonymous-nodes-section], as if the rule had been written as the simple string. * **Field Names : `field(name, rule)`** - This function assigns a *field name* to the child node(s) matched by the given rule. In the resulting syntax tree, you can then use that field name to access specific children. From a2d760e42694b9077e61bc0d5f48dfd5a4325baf Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 27 Oct 2020 15:46:09 -0700 Subject: [PATCH 04/22] Ensure nodes are aliased consistently within syntax error nodes Co-Authored-By: Rick Winfrey --- .../extract_default_aliases.rs | 293 ++++++++++++++++++ .../prepare_grammar/extract_simple_aliases.rs | 223 ------------- cli/src/generate/prepare_grammar/mod.rs | 8 +- cli/src/generate/render.rs | 18 +- cli/src/tests/query_test.rs | 24 ++ 5 files changed, 330 insertions(+), 236 deletions(-) create mode 100644 cli/src/generate/prepare_grammar/extract_default_aliases.rs delete mode 100644 cli/src/generate/prepare_grammar/extract_simple_aliases.rs diff --git a/cli/src/generate/prepare_grammar/extract_default_aliases.rs b/cli/src/generate/prepare_grammar/extract_default_aliases.rs new file mode 100644 index 00000000..3e08e3ad --- /dev/null +++ b/cli/src/generate/prepare_grammar/extract_default_aliases.rs @@ -0,0 +1,293 @@ +use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar}; +use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType}; + +#[derive(Clone, Default)] +struct SymbolStatus { + aliases: Vec<(Alias, usize)>, + appears_unaliased: bool, +} + +// Update the grammar by finding symbols that always are aliased, and for each such symbol, +// promoting one of its aliases to a "default alias", which is applied globally instead +// of in a context-specific way. +// +// This has two benefits: +// * It reduces the overhead of storing production-specific alias info in the parse table. +// * Within an `ERROR` node, no context-specific aliases will be applied. This transformation +// ensures that the children of an `ERROR` node have symbols that are consistent with the +// way that they would appear in a valid syntax tree. +pub(super) fn extract_default_aliases( + syntax_grammar: &mut SyntaxGrammar, + lexical_grammar: &LexicalGrammar, +) -> AliasMap { + let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()]; + let mut non_terminal_status_list = + vec![SymbolStatus::default(); syntax_grammar.variables.len()]; + let mut external_status_list = + vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()]; + + // For each grammar symbol, find all of the aliases under which the symbol appears, + // and determine whether or not the symbol ever appears *unaliased*. + for variable in syntax_grammar.variables.iter() { + for production in variable.productions.iter() { + for step in production.steps.iter() { + let mut status = match step.symbol.kind { + SymbolType::External => &mut external_status_list[step.symbol.index], + SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index], + SymbolType::Terminal => &mut terminal_status_list[step.symbol.index], + SymbolType::End => panic!("Unexpected end token"), + }; + + // Default aliases don't work for inlined variables. + if syntax_grammar.variables_to_inline.contains(&step.symbol) { + continue; + } + + if let Some(alias) = &step.alias { + if let Some(count_for_alias) = status + .aliases + .iter_mut() + .find_map(|(a, count)| if a == alias { Some(count) } else { None }) + { + *count_for_alias += 1; + } else { + status.aliases.push((alias.clone(), 1)); + } + } else { + status.appears_unaliased = true; + } + } + } + } + + let symbols_with_statuses = (terminal_status_list + .iter_mut() + .enumerate() + .map(|(i, status)| (Symbol::terminal(i), status))) + .chain( + non_terminal_status_list + .iter_mut() + .enumerate() + .map(|(i, status)| (Symbol::non_terminal(i), status)), + ) + .chain( + external_status_list + .iter_mut() + .enumerate() + .map(|(i, status)| (Symbol::external(i), status)), + ); + + // For each symbol that always appears aliased, find the alias the occurs most often, + // and designate that alias as the symbol's "default alias". Store all of these + // default aliases in a map that will be returned. + let mut result = AliasMap::new(); + for (symbol, status) in symbols_with_statuses { + if status.appears_unaliased { + status.aliases.clear(); + } else { + if let Some(default_entry) = status + .aliases + .iter() + .enumerate() + .max_by_key(|(i, (_, count))| (count, -(*i as i64))) + .map(|(_, entry)| entry.clone()) + { + status.aliases.clear(); + status.aliases.push(default_entry.clone()); + result.insert(symbol, default_entry.0); + } + } + } + + // Wherever a symbol is aliased as its default alias, remove the usage of the alias, + // because it will now be redundant. + let mut alias_positions_to_clear = Vec::new(); + for variable in syntax_grammar.variables.iter_mut() { + alias_positions_to_clear.clear(); + + for (i, production) in variable.productions.iter().enumerate() { + for (j, step) in production.steps.iter().enumerate() { + let status = match step.symbol.kind { + SymbolType::External => &mut external_status_list[step.symbol.index], + SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index], + SymbolType::Terminal => &mut terminal_status_list[step.symbol.index], + SymbolType::End => panic!("Unexpected end token"), + }; + + // If this step is aliased as the symbol's default alias, then remove that alias. + if step.alias.is_some() + && step.alias.as_ref() == status.aliases.get(0).map(|t| &t.0) + { + let mut other_productions_must_use_this_alias_at_this_index = false; + for (other_i, other_production) in variable.productions.iter().enumerate() { + if other_i != i + && other_production.steps.len() > j + && other_production.steps[j].alias == step.alias + && result.get(&other_production.steps[j].symbol) != step.alias.as_ref() + { + other_productions_must_use_this_alias_at_this_index = true; + break; + } + } + + if !other_productions_must_use_this_alias_at_this_index { + alias_positions_to_clear.push((i, j)); + } + } + } + } + + for (production_index, step_index) in &alias_positions_to_clear { + variable.productions[*production_index].steps[*step_index].alias = None; + } + } + + result +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::generate::grammars::{ + LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType, + }; + use crate::generate::nfa::Nfa; + + #[test] + fn test_extract_simple_aliases() { + let mut syntax_grammar = SyntaxGrammar { + variables: vec![ + SyntaxVariable { + name: "v1".to_owned(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true), + ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true), + ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true), + ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true), + ], + }], + }, + SyntaxVariable { + name: "v2".to_owned(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + // Token 0 is always aliased as "a1". + ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true), + // Token 1 is aliased within rule `v1` above, but not here. + ProductionStep::new(Symbol::terminal(1)), + // Token 2 is aliased differently here than in `v1`. The alias from + // `v1` should be promoted to the default alias, because `v1` appears + // first in the grammar. + ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true), + // Token 3 is also aliased differently here than in `v1`. In this case, + // this alias should be promoted to the default alias, because it is + // used a greater number of times (twice). + ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true), + ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true), + ], + }], + }, + ], + extra_symbols: Vec::new(), + expected_conflicts: Vec::new(), + variables_to_inline: Vec::new(), + supertype_symbols: Vec::new(), + external_tokens: Vec::new(), + word_token: None, + }; + + let lexical_grammar = LexicalGrammar { + nfa: Nfa::new(), + variables: vec![ + LexicalVariable { + name: "t0".to_string(), + kind: VariableType::Anonymous, + implicit_precedence: 0, + start_state: 0, + }, + LexicalVariable { + name: "t1".to_string(), + kind: VariableType::Anonymous, + implicit_precedence: 0, + start_state: 0, + }, + LexicalVariable { + name: "t2".to_string(), + kind: VariableType::Anonymous, + implicit_precedence: 0, + start_state: 0, + }, + LexicalVariable { + name: "t3".to_string(), + kind: VariableType::Anonymous, + implicit_precedence: 0, + start_state: 0, + }, + ], + }; + + let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar); + assert_eq!(default_aliases.len(), 3); + + assert_eq!( + default_aliases.get(&Symbol::terminal(0)), + Some(&Alias { + value: "a1".to_string(), + is_named: true, + }) + ); + assert_eq!( + default_aliases.get(&Symbol::terminal(2)), + Some(&Alias { + value: "a3".to_string(), + is_named: true, + }) + ); + assert_eq!( + default_aliases.get(&Symbol::terminal(3)), + Some(&Alias { + value: "a6".to_string(), + is_named: true, + }) + ); + assert_eq!(default_aliases.get(&Symbol::terminal(1)), None); + + assert_eq!( + syntax_grammar.variables, + vec![ + SyntaxVariable { + name: "v1".to_owned(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true), + ProductionStep::new(Symbol::terminal(2)), + ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true), + ], + },], + }, + SyntaxVariable { + name: "v2".to_owned(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::terminal(1)), + ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true), + ProductionStep::new(Symbol::terminal(3)), + ProductionStep::new(Symbol::terminal(3)), + ], + },], + }, + ] + ); + } +} diff --git a/cli/src/generate/prepare_grammar/extract_simple_aliases.rs b/cli/src/generate/prepare_grammar/extract_simple_aliases.rs deleted file mode 100644 index 6da009d5..00000000 --- a/cli/src/generate/prepare_grammar/extract_simple_aliases.rs +++ /dev/null @@ -1,223 +0,0 @@ -use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar}; -use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType}; - -#[derive(Clone, Default)] -struct SymbolStatus { - alias: Option, - conflicting: bool, -} - -pub(super) fn extract_simple_aliases( - syntax_grammar: &mut SyntaxGrammar, - lexical_grammar: &LexicalGrammar, -) -> AliasMap { - // Determine which symbols in the grammars are *always* aliased to a single name. - let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()]; - let mut non_terminal_status_list = - vec![SymbolStatus::default(); syntax_grammar.variables.len()]; - let mut external_status_list = - vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()]; - for variable in syntax_grammar.variables.iter() { - for production in variable.productions.iter() { - for step in production.steps.iter() { - let mut status = match step.symbol { - Symbol { - kind: SymbolType::External, - index, - } => &mut external_status_list[index], - Symbol { - kind: SymbolType::NonTerminal, - index, - } => &mut non_terminal_status_list[index], - Symbol { - kind: SymbolType::Terminal, - index, - } => &mut terminal_status_list[index], - Symbol { - kind: SymbolType::End, - .. - } => panic!("Unexpected end token"), - }; - - if step.alias.is_none() { - status.alias = None; - status.conflicting = true; - } - - if !status.conflicting { - if status.alias.is_none() { - status.alias = step.alias.clone(); - } else if status.alias != step.alias { - status.alias = None; - status.conflicting = true; - } - } - } - } - } - - // Remove the aliases for those symbols. - for variable in syntax_grammar.variables.iter_mut() { - for production in variable.productions.iter_mut() { - for step in production.steps.iter_mut() { - let status = match step.symbol { - Symbol { - kind: SymbolType::External, - index, - } => &external_status_list[index], - Symbol { - kind: SymbolType::NonTerminal, - index, - } => &non_terminal_status_list[index], - Symbol { - kind: SymbolType::Terminal, - index, - } => &terminal_status_list[index], - Symbol { - kind: SymbolType::End, - .. - } => panic!("Unexpected end token"), - }; - - if status.alias.is_some() { - step.alias = None; - } - } - } - } - - // Populate a map of the symbols to their aliases. - let mut result = AliasMap::new(); - for (i, status) in terminal_status_list.into_iter().enumerate() { - if let Some(alias) = status.alias { - result.insert(Symbol::terminal(i), alias); - } - } - for (i, status) in non_terminal_status_list.into_iter().enumerate() { - if let Some(alias) = status.alias { - result.insert(Symbol::non_terminal(i), alias); - } - } - for (i, status) in external_status_list.into_iter().enumerate() { - if let Some(alias) = status.alias { - result.insert(Symbol::external(i), alias); - } - } - result -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::generate::grammars::{ - LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType, - }; - use crate::generate::nfa::Nfa; - - #[test] - fn test_extract_simple_aliases() { - let mut syntax_grammar = SyntaxGrammar { - variables: vec![ - SyntaxVariable { - name: "v1".to_owned(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true), - ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true), - ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true), - ], - }], - }, - SyntaxVariable { - name: "v2".to_owned(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - // Token 0 is always aliased as "a1". - ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true), - // Token 1 is aliased above, but not here. - ProductionStep::new(Symbol::terminal(1)), - // Token 2 is aliased differently than above. - ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true), - ], - }], - }, - ], - extra_symbols: Vec::new(), - expected_conflicts: Vec::new(), - variables_to_inline: Vec::new(), - supertype_symbols: Vec::new(), - external_tokens: Vec::new(), - word_token: None, - }; - - let lexical_grammar = LexicalGrammar { - nfa: Nfa::new(), - variables: vec![ - LexicalVariable { - name: "t1".to_string(), - kind: VariableType::Anonymous, - implicit_precedence: 0, - start_state: 0, - }, - LexicalVariable { - name: "t2".to_string(), - kind: VariableType::Anonymous, - implicit_precedence: 0, - start_state: 0, - }, - LexicalVariable { - name: "t3".to_string(), - kind: VariableType::Anonymous, - implicit_precedence: 0, - start_state: 0, - }, - ], - }; - - let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar); - assert_eq!(simple_aliases.len(), 1); - assert_eq!( - simple_aliases[&Symbol::terminal(0)], - Alias { - value: "a1".to_string(), - is_named: true, - } - ); - - assert_eq!( - syntax_grammar.variables, - vec![ - SyntaxVariable { - name: "v1".to_owned(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - // 'Simple' alias removed - ProductionStep::new(Symbol::terminal(0)), - // Other aliases unchanged - ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true), - ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true), - ], - },], - }, - SyntaxVariable { - name: "v2".to_owned(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(0)), - ProductionStep::new(Symbol::terminal(1)), - ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true), - ], - },], - }, - ] - ); - } -} diff --git a/cli/src/generate/prepare_grammar/mod.rs b/cli/src/generate/prepare_grammar/mod.rs index 029483d3..8b094c56 100644 --- a/cli/src/generate/prepare_grammar/mod.rs +++ b/cli/src/generate/prepare_grammar/mod.rs @@ -1,6 +1,6 @@ mod expand_repeats; mod expand_tokens; -mod extract_simple_aliases; +mod extract_default_aliases; mod extract_tokens; mod flatten_grammar; mod intern_symbols; @@ -8,7 +8,7 @@ mod process_inlines; use self::expand_repeats::expand_repeats; pub(crate) use self::expand_tokens::expand_tokens; -use self::extract_simple_aliases::extract_simple_aliases; +use self::extract_default_aliases::extract_default_aliases; use self::extract_tokens::extract_tokens; use self::flatten_grammar::flatten_grammar; use self::intern_symbols::intern_symbols; @@ -52,7 +52,7 @@ pub(crate) fn prepare_grammar( let syntax_grammar = expand_repeats(syntax_grammar); let mut syntax_grammar = flatten_grammar(syntax_grammar)?; let lexical_grammar = expand_tokens(lexical_grammar)?; - let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar); + let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar); let inlines = process_inlines(&syntax_grammar); - Ok((syntax_grammar, lexical_grammar, inlines, simple_aliases)) + Ok((syntax_grammar, lexical_grammar, inlines, default_aliases)) } diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index f7f788d0..e1e75ee1 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -65,7 +65,7 @@ struct Generator { keyword_capture_token: Option, syntax_grammar: SyntaxGrammar, lexical_grammar: LexicalGrammar, - simple_aliases: AliasMap, + default_aliases: AliasMap, symbol_order: HashMap, symbol_ids: HashMap, alias_ids: HashMap, @@ -198,10 +198,10 @@ impl Generator { // public-facing symbol. If one of the symbols is not aliased, choose that one // to be the public-facing symbol. Otherwise, pick the symbol with the lowest // numeric value. - if let Some(alias) = self.simple_aliases.get(symbol) { + if let Some(alias) = self.default_aliases.get(symbol) { let kind = alias.kind(); for other_symbol in &self.parse_table.symbols { - if let Some(other_alias) = self.simple_aliases.get(other_symbol) { + if let Some(other_alias) = self.default_aliases.get(other_symbol) { if other_symbol < mapping && other_alias == alias { mapping = other_symbol; } @@ -361,7 +361,7 @@ impl Generator { indent!(self); for symbol in self.parse_table.symbols.iter() { let name = self.sanitize_string( - self.simple_aliases + self.default_aliases .get(symbol) .map(|alias| alias.value.as_str()) .unwrap_or(self.metadata_for_symbol(*symbol).0), @@ -444,7 +444,7 @@ impl Generator { for symbol in &self.parse_table.symbols { add_line!(self, "[{}] = {{", self.symbol_ids[&symbol]); indent!(self); - if let Some(Alias { is_named, .. }) = self.simple_aliases.get(symbol) { + if let Some(Alias { is_named, .. }) = self.default_aliases.get(symbol) { add_line!(self, ".visible = true,"); add_line!(self, ".named = {},", is_named); } else { @@ -525,7 +525,7 @@ impl Generator { for step in &production.steps { if let Some(alias) = &step.alias { if step.symbol.is_non_terminal() - && !self.simple_aliases.contains_key(&step.symbol) + && Some(alias) != self.default_aliases.get(&step.symbol) { if self.symbol_ids.contains_key(&step.symbol) { let alias_ids = @@ -1545,7 +1545,7 @@ impl Generator { /// for keyword capture, if any. /// * `syntax_grammar` - The syntax grammar extracted from the language's grammar /// * `lexical_grammar` - The lexical grammar extracted from the language's grammar -/// * `simple_aliases` - A map describing the global rename rules that should apply. +/// * `default_aliases` - A map describing the global rename rules that should apply. /// the keys are symbols that are *always* aliased in the same way, and the values /// are the aliases that are applied to those symbols. /// * `next_abi` - A boolean indicating whether to opt into the new, unstable parse @@ -1558,7 +1558,7 @@ pub(crate) fn render_c_code( keyword_capture_token: Option, syntax_grammar: SyntaxGrammar, lexical_grammar: LexicalGrammar, - simple_aliases: AliasMap, + default_aliases: AliasMap, next_abi: bool, ) -> String { Generator { @@ -1572,7 +1572,7 @@ pub(crate) fn render_c_code( keyword_capture_token, syntax_grammar, lexical_grammar, - simple_aliases, + default_aliases, symbol_ids: HashMap::new(), symbol_order: HashMap::new(), alias_ids: HashMap::new(), diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 1f7ddaff..067bb6f9 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -367,6 +367,30 @@ fn test_query_errors_on_impossible_patterns() { }); } +#[test] +fn test_query_verifies_possible_patterns_with_aliased_parent_nodes() { + allocations::record(|| { + let ruby = get_language("ruby"); + + Query::new(ruby, "(destructured_parameter (identifier))").unwrap(); + + assert_eq!( + Query::new(ruby, "(destructured_parameter (string))",), + Err(QueryError { + kind: QueryErrorKind::Structure, + row: 0, + offset: 24, + column: 24, + message: [ + "(destructured_parameter (string))", // + " ^", + ] + .join("\n") + }) + ); + }); +} + #[test] fn test_query_matches_with_simple_pattern() { allocations::record(|| { From 071f4e40f19b23e83e7b16ca9272d5a3ca1806df Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 28 Oct 2020 12:34:11 -0700 Subject: [PATCH 05/22] Fix generate error when there are aliases in unused rules --- cli/src/generate/render.rs | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index e1e75ee1..04f9e47b 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -519,7 +519,7 @@ impl Generator { } fn add_non_terminal_alias_map(&mut self) { - let mut aliases_by_symbol = HashMap::new(); + let mut alias_ids_by_symbol = HashMap::new(); for variable in &self.syntax_grammar.variables { for production in &variable.productions { for step in &production.steps { @@ -528,10 +528,13 @@ impl Generator { && Some(alias) != self.default_aliases.get(&step.symbol) { if self.symbol_ids.contains_key(&step.symbol) { - let alias_ids = - aliases_by_symbol.entry(step.symbol).or_insert(Vec::new()); - if let Err(i) = alias_ids.binary_search(&alias) { - alias_ids.insert(i, alias); + if let Some(alias_id) = self.alias_ids.get(&alias) { + let alias_ids = alias_ids_by_symbol + .entry(step.symbol) + .or_insert(Vec::new()); + if let Err(i) = alias_ids.binary_search(&alias_id) { + alias_ids.insert(i, alias_id); + } } } } @@ -540,19 +543,19 @@ impl Generator { } } - let mut aliases_by_symbol = aliases_by_symbol.iter().collect::>(); - aliases_by_symbol.sort_unstable_by_key(|e| e.0); + let mut alias_ids_by_symbol = alias_ids_by_symbol.iter().collect::>(); + alias_ids_by_symbol.sort_unstable_by_key(|e| e.0); add_line!(self, "static uint16_t ts_non_terminal_alias_map[] = {{"); indent!(self); - for (symbol, aliases) in aliases_by_symbol { + for (symbol, alias_ids) in alias_ids_by_symbol { let symbol_id = &self.symbol_ids[symbol]; let public_symbol_id = &self.symbol_ids[&self.symbol_map[&symbol]]; - add_line!(self, "{}, {},", symbol_id, 1 + aliases.len()); + add_line!(self, "{}, {},", symbol_id, 1 + alias_ids.len()); indent!(self); add_line!(self, "{},", public_symbol_id); - for alias in aliases { - add_line!(self, "{},", &self.alias_ids[&alias]); + for alias_id in alias_ids { + add_line!(self, "{},", alias_id); } dedent!(self); } From c2c63baf5bdba4768bed97427f5ac94f54df70df Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 28 Oct 2020 13:55:06 -0700 Subject: [PATCH 06/22] query: Fix escape sequence parsing in anonymous node patterns Fixes #776 Fixes #760 --- cli/src/tests/query_test.rs | 5 +- lib/src/array.h | 18 +++- lib/src/query.c | 178 ++++++++++++++---------------------- 3 files changed, 86 insertions(+), 115 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 067bb6f9..6bf6cbb0 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -1475,6 +1475,7 @@ fn test_query_matches_with_anonymous_tokens() { r#" ";" @punctuation "&&" @operator + "\"" @quote "#, ) .unwrap(); @@ -1482,9 +1483,11 @@ fn test_query_matches_with_anonymous_tokens() { assert_query_matches( language, &query, - "foo(a && b);", + r#"foo(a && "b");"#, &[ (1, vec![("operator", "&&")]), + (2, vec![("quote", "\"")]), + (2, vec![("quote", "\"")]), (0, vec![("punctuation", ";")]), ], ); diff --git a/lib/src/array.h b/lib/src/array.h index 13117194..5ff5580a 100644 --- a/lib/src/array.h +++ b/lib/src/array.h @@ -52,14 +52,24 @@ extern "C" { (self)->size += (count)) #define array_push_all(self, other) \ - array_splice((self), (self)->size, 0, (other)->size, (other)->contents) + array_extend((self), (other)->size, (other)->contents) + +// Append `count` elements to the end of the array, reading their values from the +// `contents` pointer. +#define array_extend(self, count, contents) \ + array__splice( \ + (VoidArray *)(self), array__elem_size(self), (self)->size, \ + 0, count, contents \ + ) // Remove `old_count` elements from the array starting at the given `index`. At // the same index, insert `new_count` new elements, reading their values from the // `new_contents` pointer. -#define array_splice(self, index, old_count, new_count, new_contents) \ - array__splice((VoidArray *)(self), array__elem_size(self), index, old_count, \ - new_count, new_contents) +#define array_splice(self, index, old_count, new_count, new_contents) \ + array__splice( \ + (VoidArray *)(self), array__elem_size(self), index, \ + old_count, new_count, new_contents \ + ) // Insert one `element` into the array at the given `index`. #define array_insert(self, index, element) \ diff --git a/lib/src/query.c b/lib/src/query.c index ae476c2a..bf0598ce 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -214,6 +214,7 @@ struct TSQuery { Array(TSQueryPredicateStep) predicate_steps; Array(QueryPattern) patterns; Array(StepOffset) step_offsets; + Array(char) string_buffer; const TSLanguage *language; uint16_t wildcard_root_pattern_count; TSSymbol *symbol_map; @@ -439,67 +440,6 @@ static uint16_t symbol_table_insert_name( return self->slices.size - 1; } -static uint16_t symbol_table_insert_name_with_escapes( - SymbolTable *self, - const char *escaped_name, - uint32_t escaped_length -) { - Slice slice = { - .offset = self->characters.size, - .length = 0, - }; - array_grow_by(&self->characters, escaped_length + 1); - - // Copy the contents of the literal into the characters buffer, processing escape - // sequences like \n and \". This needs to be done before checking if the literal - // is already present, in order to do the string comparison. - bool is_escaped = false; - for (unsigned i = 0; i < escaped_length; i++) { - const char *src = &escaped_name[i]; - char *dest = &self->characters.contents[slice.offset + slice.length]; - if (is_escaped) { - switch (*src) { - case 'n': - *dest = '\n'; - break; - case 'r': - *dest = '\r'; - break; - case 't': - *dest = '\t'; - break; - case '0': - *dest = '\0'; - break; - default: - *dest = *src; - break; - } - is_escaped = false; - slice.length++; - } else { - if (*src == '\\') { - is_escaped = true; - } else { - *dest = *src; - slice.length++; - } - } - } - - // If the string is already present, remove the redundant content from the characters - // buffer and return the existing id. - int id = symbol_table_id_for_name(self, &self->characters.contents[slice.offset], slice.length); - if (id >= 0) { - self->characters.size -= (escaped_length + 1); - return id; - } - - self->characters.contents[slice.offset + slice.length] = 0; - array_push(&self->slices, slice); - return self->slices.size - 1; -} - /************ * QueryStep ************/ @@ -1393,6 +1333,59 @@ static void ts_query__finalize_steps(TSQuery *self) { } } +static TSQueryError ts_query__parse_string_literal( + TSQuery *self, + Stream *stream +) { + const char *string_start = stream->input; + if (stream->next != '"') return TSQueryErrorSyntax; + stream_advance(stream); + const char *prev_position = stream->input; + + bool is_escaped = false; + array_clear(&self->string_buffer); + for (;;) { + if (is_escaped) { + is_escaped = false; + switch (stream->next) { + case 'n': + array_push(&self->string_buffer, '\n'); + break; + case 'r': + array_push(&self->string_buffer, '\r'); + break; + case 't': + array_push(&self->string_buffer, '\t'); + break; + case '0': + array_push(&self->string_buffer, '\0'); + break; + default: + array_extend(&self->string_buffer, stream->next_size, stream->input); + break; + } + prev_position = stream->input + stream->next_size; + } else { + if (stream->next == '\\') { + array_extend(&self->string_buffer, (stream->input - prev_position), prev_position); + prev_position = stream->input + 1; + is_escaped = true; + } else if (stream->next == '"') { + array_extend(&self->string_buffer, (stream->input - prev_position), prev_position); + stream_advance(stream); + return TSQueryErrorNone; + } else if (stream->next == '\n') { + stream_reset(stream, string_start); + return TSQueryErrorSyntax; + } + } + if (!stream_advance(stream)) { + stream_reset(stream, string_start); + return TSQueryErrorSyntax; + } + } +} + // Parse a single predicate associated with a pattern, adding it to the // query's internal `predicate_steps` array. Predicates are arbitrary // S-expressions associated with a pattern which are meant to be handled at @@ -1458,44 +1451,17 @@ static TSQueryError ts_query__parse_predicate( // Parse a string literal else if (stream->next == '"') { - stream_advance(stream); - - // Parse the string content - bool is_escaped = false; - const char *string_content = stream->input; - for (;;) { - if (is_escaped) { - is_escaped = false; - } else { - if (stream->next == '\\') { - is_escaped = true; - } else if (stream->next == '"') { - break; - } else if (stream->next == '\n') { - stream_reset(stream, string_content - 1); - return TSQueryErrorSyntax; - } - } - if (!stream_advance(stream)) { - stream_reset(stream, string_content - 1); - return TSQueryErrorSyntax; - } - } - uint32_t length = stream->input - string_content; - - // Add a step for the node - uint16_t id = symbol_table_insert_name_with_escapes( + TSQueryError e = ts_query__parse_string_literal(self, stream); + if (e) return e; + uint16_t id = symbol_table_insert_name( &self->predicate_values, - string_content, - length + self->string_buffer.contents, + self->string_buffer.size ); array_push(&self->predicate_steps, ((TSQueryPredicateStep) { .type = TSQueryPredicateStepTypeString, .value_id = id, })); - - if (stream->next != '"') return TSQueryErrorSyntax; - stream_advance(stream); } // Parse a bare symbol @@ -1761,33 +1727,22 @@ static TSQueryError ts_query__parse_pattern( // Parse a double-quoted anonymous leaf node expression else if (stream->next == '"') { - stream_advance(stream); - - // Parse the string content - const char *string_content = stream->input; - while (stream->next != '"') { - if (!stream_advance(stream)) { - stream_reset(stream, string_content - 1); - return TSQueryErrorSyntax; - } - } - uint32_t length = stream->input - string_content; + const char *string_start = stream->input; + TSQueryError e = ts_query__parse_string_literal(self, stream); + if (e) return e; // Add a step for the node TSSymbol symbol = ts_language_symbol_for_name( self->language, - string_content, - length, + self->string_buffer.contents, + self->string_buffer.size, false ); if (!symbol) { - stream_reset(stream, string_content); + stream_reset(stream, string_start + 1); return TSQueryErrorNodeType; } array_push(&self->steps, query_step__new(symbol, depth, is_immediate)); - - if (stream->next != '"') return TSQueryErrorSyntax; - stream_advance(stream); } // Parse a field-prefixed pattern @@ -1977,6 +1932,7 @@ TSQuery *ts_query_new( .predicate_steps = array_new(), .patterns = array_new(), .step_offsets = array_new(), + .string_buffer = array_new(), .symbol_map = symbol_map, .wildcard_root_pattern_count = 0, .language = language, @@ -2056,6 +2012,7 @@ TSQuery *ts_query_new( } ts_query__finalize_steps(self); + array_delete(&self->string_buffer); return self; } @@ -2066,6 +2023,7 @@ void ts_query_delete(TSQuery *self) { array_delete(&self->predicate_steps); array_delete(&self->patterns); array_delete(&self->step_offsets); + array_delete(&self->string_buffer); symbol_table_delete(&self->captures); symbol_table_delete(&self->predicate_values); ts_free(self->symbol_map); From 199273419562c66d7c1225213a55a186394422c2 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 28 Oct 2020 14:12:56 -0700 Subject: [PATCH 07/22] 0.17.2 --- Cargo.lock | 2 +- cli/Cargo.toml | 2 +- cli/npm/package.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ea918eb6..d052511d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -832,7 +832,7 @@ dependencies = [ [[package]] name = "tree-sitter-cli" -version = "0.17.1" +version = "0.17.2" dependencies = [ "ansi_term", "atty", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 21a8fa0f..a81ce16c 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter-cli" description = "CLI tool for developing, testing, and using Tree-sitter parsers" -version = "0.17.1" +version = "0.17.2" authors = ["Max Brunsfeld "] edition = "2018" license = "MIT" diff --git a/cli/npm/package.json b/cli/npm/package.json index 42f75c98..f327698c 100644 --- a/cli/npm/package.json +++ b/cli/npm/package.json @@ -1,6 +1,6 @@ { "name": "tree-sitter-cli", - "version": "0.17.1", + "version": "0.17.2", "author": "Max Brunsfeld", "license": "MIT", "repository": { From 3859e52198468c6328cb7508f747f51b4aef13be Mon Sep 17 00:00:00 2001 From: ikrima Date: Fri, 16 Oct 2020 12:42:26 -0700 Subject: [PATCH 08/22] add custom allocation override --- lib/src/alloc.h | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/lib/src/alloc.h b/lib/src/alloc.h index 0e0927a9..52e5ad3d 100644 --- a/lib/src/alloc.h +++ b/lib/src/alloc.h @@ -42,7 +42,20 @@ static inline bool ts_toggle_allocation_recording(bool value) { return false; } -static inline void *ts_malloc(size_t size) { +#ifndef ts_malloc +#define ts_malloc(_sz) ts_malloc_dflt(_sz) +#endif +#ifndef ts_calloc +#define ts_calloc(_cnt,_sz) ts_calloc_dflt(_cnt,_sz) +#endif +#ifndef ts_realloc +#define ts_realloc(_ptr,_sz) ts_realloc_dflt(_ptr,_sz) +#endif +#ifndef ts_free +#define ts_free(_ptr) ts_free_dflt(_ptr) +#endif + +static inline void *ts_malloc_dflt(size_t size) { void *result = malloc(size); if (size > 0 && !result) { fprintf(stderr, "tree-sitter failed to allocate %zu bytes", size); @@ -51,7 +64,7 @@ static inline void *ts_malloc(size_t size) { return result; } -static inline void *ts_calloc(size_t count, size_t size) { +static inline void *ts_calloc_dflt(size_t count, size_t size) { void *result = calloc(count, size); if (count > 0 && !result) { fprintf(stderr, "tree-sitter failed to allocate %zu bytes", count * size); @@ -60,7 +73,7 @@ static inline void *ts_calloc(size_t count, size_t size) { return result; } -static inline void *ts_realloc(void *buffer, size_t size) { +static inline void *ts_realloc_dflt(void *buffer, size_t size) { void *result = realloc(buffer, size); if (size > 0 && !result) { fprintf(stderr, "tree-sitter failed to reallocate %zu bytes", size); @@ -69,7 +82,7 @@ static inline void *ts_realloc(void *buffer, size_t size) { return result; } -static inline void ts_free(void *buffer) { +static inline void ts_free_dflt(void *buffer) { free(buffer); } From 336517fdc92fbc8ffcba199e3a4cd55e1c516181 Mon Sep 17 00:00:00 2001 From: ikrima Date: Fri, 16 Oct 2020 15:18:54 -0700 Subject: [PATCH 09/22] address CR comments - replace _dflt with _default - allow override in TREE_SITTER_TEST path --- lib/src/alloc.h | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/lib/src/alloc.h b/lib/src/alloc.h index 52e5ad3d..c6a3331b 100644 --- a/lib/src/alloc.h +++ b/lib/src/alloc.h @@ -9,6 +9,21 @@ extern "C" { #include #include +// Allow clients to override allocation functions + +#ifndef ts_malloc +#define ts_malloc(size) ts_malloc_default(size) +#endif +#ifndef ts_calloc +#define ts_calloc(count,size) ts_calloc_default(count,size) +#endif +#ifndef ts_realloc +#define ts_realloc(buffer,size) ts_realloc_default(buffer,size) +#endif +#ifndef ts_free +#define ts_free(buffer) ts_free_default(buffer) +#endif + #if defined(TREE_SITTER_TEST) void *ts_record_malloc(size_t); @@ -17,19 +32,19 @@ void *ts_record_realloc(void *, size_t); void ts_record_free(void *); bool ts_toggle_allocation_recording(bool); -static inline void *ts_malloc(size_t size) { +static inline void *ts_malloc_default(size_t size) { return ts_record_malloc(size); } -static inline void *ts_calloc(size_t count, size_t size) { +static inline void *ts_calloc_default(size_t count, size_t size) { return ts_record_calloc(count, size); } -static inline void *ts_realloc(void *buffer, size_t size) { +static inline void *ts_realloc_default(void *buffer, size_t size) { return ts_record_realloc(buffer, size); } -static inline void ts_free(void *buffer) { +static inline void ts_free_default(void *buffer) { ts_record_free(buffer); } @@ -42,20 +57,8 @@ static inline bool ts_toggle_allocation_recording(bool value) { return false; } -#ifndef ts_malloc -#define ts_malloc(_sz) ts_malloc_dflt(_sz) -#endif -#ifndef ts_calloc -#define ts_calloc(_cnt,_sz) ts_calloc_dflt(_cnt,_sz) -#endif -#ifndef ts_realloc -#define ts_realloc(_ptr,_sz) ts_realloc_dflt(_ptr,_sz) -#endif -#ifndef ts_free -#define ts_free(_ptr) ts_free_dflt(_ptr) -#endif -static inline void *ts_malloc_dflt(size_t size) { +static inline void *ts_malloc_default(size_t size) { void *result = malloc(size); if (size > 0 && !result) { fprintf(stderr, "tree-sitter failed to allocate %zu bytes", size); @@ -64,7 +67,7 @@ static inline void *ts_malloc_dflt(size_t size) { return result; } -static inline void *ts_calloc_dflt(size_t count, size_t size) { +static inline void *ts_calloc_default(size_t count, size_t size) { void *result = calloc(count, size); if (count > 0 && !result) { fprintf(stderr, "tree-sitter failed to allocate %zu bytes", count * size); @@ -73,7 +76,7 @@ static inline void *ts_calloc_dflt(size_t count, size_t size) { return result; } -static inline void *ts_realloc_dflt(void *buffer, size_t size) { +static inline void *ts_realloc_default(void *buffer, size_t size) { void *result = realloc(buffer, size); if (size > 0 && !result) { fprintf(stderr, "tree-sitter failed to reallocate %zu bytes", size); @@ -82,7 +85,7 @@ static inline void *ts_realloc_dflt(void *buffer, size_t size) { return result; } -static inline void ts_free_dflt(void *buffer) { +static inline void ts_free_default(void *buffer) { free(buffer); } From 23530ca599758a4d1d4c1393238b74830256e2db Mon Sep 17 00:00:00 2001 From: ikrima Date: Thu, 29 Oct 2020 09:23:58 -0700 Subject: [PATCH 10/22] CR fixes: don't allow override of allocfn during testing --- lib/src/alloc.h | 48 ++++++++++++++++++------------------------------ 1 file changed, 18 insertions(+), 30 deletions(-) diff --git a/lib/src/alloc.h b/lib/src/alloc.h index c6a3331b..cbedb71b 100644 --- a/lib/src/alloc.h +++ b/lib/src/alloc.h @@ -9,21 +9,6 @@ extern "C" { #include #include -// Allow clients to override allocation functions - -#ifndef ts_malloc -#define ts_malloc(size) ts_malloc_default(size) -#endif -#ifndef ts_calloc -#define ts_calloc(count,size) ts_calloc_default(count,size) -#endif -#ifndef ts_realloc -#define ts_realloc(buffer,size) ts_realloc_default(buffer,size) -#endif -#ifndef ts_free -#define ts_free(buffer) ts_free_default(buffer) -#endif - #if defined(TREE_SITTER_TEST) void *ts_record_malloc(size_t); @@ -32,24 +17,27 @@ void *ts_record_realloc(void *, size_t); void ts_record_free(void *); bool ts_toggle_allocation_recording(bool); -static inline void *ts_malloc_default(size_t size) { - return ts_record_malloc(size); -} - -static inline void *ts_calloc_default(size_t count, size_t size) { - return ts_record_calloc(count, size); -} - -static inline void *ts_realloc_default(void *buffer, size_t size) { - return ts_record_realloc(buffer, size); -} - -static inline void ts_free_default(void *buffer) { - ts_record_free(buffer); -} +#define ts_malloc ts_record_malloc +#define ts_calloc ts_record_calloc +#define ts_realloc ts_record_realloc +#define ts_free ts_record_free #else +// Allow clients to override allocation functions +#ifndef ts_malloc +#define ts_malloc ts_malloc_default +#endif +#ifndef ts_calloc +#define ts_calloc ts_calloc_default +#endif +#ifndef ts_realloc +#define ts_realloc ts_realloc_default +#endif +#ifndef ts_free +#define ts_free ts_free_default +#endif + #include static inline bool ts_toggle_allocation_recording(bool value) { From a99676282f1f18c8187bb02171ad1f261ea3c9ca Mon Sep 17 00:00:00 2001 From: ikrima Date: Thu, 29 Oct 2020 09:36:44 -0700 Subject: [PATCH 11/22] noop: touch file to retrigger github checks --- lib/src/alloc.h | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/src/alloc.h b/lib/src/alloc.h index cbedb71b..6e22a0ab 100644 --- a/lib/src/alloc.h +++ b/lib/src/alloc.h @@ -25,6 +25,7 @@ bool ts_toggle_allocation_recording(bool); #else // Allow clients to override allocation functions + #ifndef ts_malloc #define ts_malloc ts_malloc_default #endif From f07dda692e3a6f4f2229c3a064fa19b8be7bc225 Mon Sep 17 00:00:00 2001 From: Arthur Baars Date: Thu, 29 Oct 2020 18:05:24 +0100 Subject: [PATCH 12/22] Ensure "extras" symbols are included in the node-types.json file The symbols marked as "extras" are the start symbols of secondary languages. These should be included in the aliases map just as done for start symbol of the main language to ensure their node type and field information is included in the node-types.json file. --- cli/src/generate/node_types.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cli/src/generate/node_types.rs b/cli/src/generate/node_types.rs index 7962c7f3..a575d197 100644 --- a/cli/src/generate/node_types.rs +++ b/cli/src/generate/node_types.rs @@ -424,6 +424,14 @@ pub(crate) fn generate_node_types_json( aliases }); } + for extra_symbol in &syntax_grammar.extra_symbols { + if !simple_aliases.contains_key(extra_symbol) { + aliases_by_symbol + .entry(*extra_symbol) + .or_insert(HashSet::new()) + .insert(None); + } + } for variable in &syntax_grammar.variables { for production in &variable.productions { for step in &production.steps { From bcd48e3b9402326c4a34caed68cf9193edb91f37 Mon Sep 17 00:00:00 2001 From: "Alexandre A. Muller" Date: Thu, 29 Oct 2020 19:08:55 +0000 Subject: [PATCH 13/22] add link to VHDL parser --- docs/index.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/index.md b/docs/index.md index 03c1a60c..d9410cc2 100644 --- a/docs/index.md +++ b/docs/index.md @@ -50,6 +50,7 @@ Parsers for these languages are fairly complete: * [TOML](https://github.com/ikatyang/tree-sitter-toml) * [TypeScript](https://github.com/tree-sitter/tree-sitter-typescript) * [Verilog](https://github.com/tree-sitter/tree-sitter-verilog) +* [VHDL](https://github.com/alemuller/tree-sitter-vhdl) * [Vue](https://github.com/ikatyang/tree-sitter-vue) * [YAML](https://github.com/ikatyang/tree-sitter-yaml) * [WASM](https://github.com/wasm-lsp/tree-sitter-wasm) From d62e7f7d75f0417c0e1c35a9548031d16b31328e Mon Sep 17 00:00:00 2001 From: Arthur Baars Date: Thu, 29 Oct 2020 19:02:30 +0100 Subject: [PATCH 14/22] Add test case with extra_symbols --- cli/src/generate/node_types.rs | 115 +++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) diff --git a/cli/src/generate/node_types.rs b/cli/src/generate/node_types.rs index a575d197..9fb1fe8d 100644 --- a/cli/src/generate/node_types.rs +++ b/cli/src/generate/node_types.rs @@ -730,9 +730,18 @@ mod tests { kind: VariableType::Named, rule: Rule::string("x"), }, + // This rule is not reachable from the start symbol + // so it won't be present in the node_types + Variable { + name: "v3".to_string(), + kind: VariableType::Named, + rule: Rule::string("y"), + }, ], }); + assert_eq!(node_types.len(), 3); + assert_eq!( node_types[0], NodeInfoJSON { @@ -792,6 +801,112 @@ mod tests { ); } + #[test] + fn test_node_types_simple_extras() { + let node_types = get_node_types(InputGrammar { + name: String::new(), + extra_symbols: vec![Rule::named("v3")], + external_tokens: Vec::new(), + expected_conflicts: Vec::new(), + variables_to_inline: Vec::new(), + word_token: None, + supertype_symbols: vec![], + variables: vec![ + Variable { + name: "v1".to_string(), + kind: VariableType::Named, + rule: Rule::seq(vec![ + Rule::field("f1".to_string(), Rule::named("v2")), + Rule::field("f2".to_string(), Rule::string(";")), + ]), + }, + Variable { + name: "v2".to_string(), + kind: VariableType::Named, + rule: Rule::string("x"), + }, + // This rule is not reachable from the start symbol, but + // it is reachable from the 'extra_symbols' so it + // should be present in the node_types + Variable { + name: "v3".to_string(), + kind: VariableType::Named, + rule: Rule::string("y"), + }, + ], + }); + + assert_eq!(node_types.len(), 4); + + assert_eq!( + node_types[0], + NodeInfoJSON { + kind: "v1".to_string(), + named: true, + subtypes: None, + children: None, + fields: Some( + vec![ + ( + "f1".to_string(), + FieldInfoJSON { + multiple: false, + required: true, + types: vec![NodeTypeJSON { + kind: "v2".to_string(), + named: true, + }] + } + ), + ( + "f2".to_string(), + FieldInfoJSON { + multiple: false, + required: true, + types: vec![NodeTypeJSON { + kind: ";".to_string(), + named: false, + }] + } + ), + ] + .into_iter() + .collect() + ) + } + ); + assert_eq!( + node_types[1], + NodeInfoJSON { + kind: ";".to_string(), + named: false, + subtypes: None, + children: None, + fields: None + } + ); + assert_eq!( + node_types[2], + NodeInfoJSON { + kind: "v2".to_string(), + named: true, + subtypes: None, + children: None, + fields: None + } + ); + assert_eq!( + node_types[3], + NodeInfoJSON { + kind: "v3".to_string(), + named: true, + subtypes: None, + children: None, + fields: None + } + ); + } + #[test] fn test_node_types_with_supertypes() { let node_types = get_node_types(InputGrammar { From 505695040d9443e17d53bab4e39b498a8405e468 Mon Sep 17 00:00:00 2001 From: Stafford Brunk Date: Fri, 30 Oct 2020 06:57:04 -0600 Subject: [PATCH 15/22] Update TS definitions to support the Query API --- lib/binding_web/tree-sitter-web.d.ts | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/lib/binding_web/tree-sitter-web.d.ts b/lib/binding_web/tree-sitter-web.d.ts index 7ddae952..6958a9bf 100644 --- a/lib/binding_web/tree-sitter-web.d.ts +++ b/lib/binding_web/tree-sitter-web.d.ts @@ -37,7 +37,7 @@ declare module 'web-tree-sitter' { export type Logger = ( message: string, - params: {[param: string]: string}, + params: { [param: string]: string }, type: "parse" | "lex" ) => void; @@ -131,8 +131,15 @@ declare module 'web-tree-sitter' { readonly version: number; readonly fieldCount: number; - fieldNameForId(fieldId: number): string | null - fieldIdForName(fieldName: string): number | null + fieldNameForId(fieldId: number): string | null; + fieldIdForName(fieldName: string): number | null; + query(source: string): Query; + } + + class Query { + delete(): void; + matches(node: SyntaxNode, startPosition?: Point, endPosition?: Point); + captures(node: SyntaxNode, startPosition?: Point, endPosition?: Point); } } From a7a6139e70ea182ebf09aef8413d9916e20d9afc Mon Sep 17 00:00:00 2001 From: Stafford Brunk Date: Fri, 30 Oct 2020 10:20:12 -0600 Subject: [PATCH 16/22] Add additional Query API typedefs based on Elm Language Server's overrides --- lib/binding_web/tree-sitter-web.d.ts | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/lib/binding_web/tree-sitter-web.d.ts b/lib/binding_web/tree-sitter-web.d.ts index 6958a9bf..ae76e803 100644 --- a/lib/binding_web/tree-sitter-web.d.ts +++ b/lib/binding_web/tree-sitter-web.d.ts @@ -48,6 +48,7 @@ declare module 'web-tree-sitter' { ) => string | null; export interface SyntaxNode { + id: number; tree: Tree; type: string; isNamed: boolean; @@ -136,10 +137,23 @@ declare module 'web-tree-sitter' { query(source: string): Query; } + interface QueryResult { + pattern: number; + captures: { name: string; node: SyntaxNode }[]; + } + + interface PredicateResult { + operator: string; + operands: { name: string; type: string }[]; + } + class Query { + captureNames: string[]; + delete(): void; - matches(node: SyntaxNode, startPosition?: Point, endPosition?: Point); - captures(node: SyntaxNode, startPosition?: Point, endPosition?: Point); + matches(node: SyntaxNode, startPosition?: Point, endPosition?: Point): QueryResult[]; + captures(node: SyntaxNode, startPosition?: Point, endPosition?: Point): QueryResult[]; + predicatesForPattern(patternIndex: number): PredicateResult[]; } } From 3497f34dd78b960ef30f2aa18b3d03fc517a1a84 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 2 Nov 2020 13:43:28 -0800 Subject: [PATCH 17/22] Fix parser-generation bugs introduced in #782 --- cli/src/generate/node_types.rs | 33 ++++++------ cli/src/generate/render.rs | 93 ++++++++++++++++------------------ 2 files changed, 62 insertions(+), 64 deletions(-) diff --git a/cli/src/generate/node_types.rs b/cli/src/generate/node_types.rs index 9fb1fe8d..bc5a836f 100644 --- a/cli/src/generate/node_types.rs +++ b/cli/src/generate/node_types.rs @@ -146,7 +146,7 @@ impl ChildQuantity { pub(crate) fn get_variable_info( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, - simple_aliases: &AliasMap, + default_aliases: &AliasMap, ) -> Result> { let child_type_is_visible = |t: &ChildType| { variable_type_for_child_type(t, syntax_grammar, lexical_grammar) >= VariableType::Anonymous @@ -185,7 +185,7 @@ pub(crate) fn get_variable_info( let child_symbol = step.symbol; let child_type = if let Some(alias) = &step.alias { ChildType::Aliased(alias.clone()) - } else if let Some(alias) = simple_aliases.get(&step.symbol) { + } else if let Some(alias) = default_aliases.get(&step.symbol) { ChildType::Aliased(alias.clone()) } else { ChildType::Normal(child_symbol) @@ -358,7 +358,7 @@ pub(crate) fn get_variable_info( pub(crate) fn generate_node_types_json( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, - simple_aliases: &AliasMap, + default_aliases: &AliasMap, variable_info: &Vec, ) -> Vec { let mut node_types_json = BTreeMap::new(); @@ -369,7 +369,7 @@ pub(crate) fn generate_node_types_json( named: alias.is_named, }, ChildType::Normal(symbol) => { - if let Some(alias) = simple_aliases.get(&symbol) { + if let Some(alias) = default_aliases.get(&symbol) { NodeTypeJSON { kind: alias.value.clone(), named: alias.is_named, @@ -417,7 +417,7 @@ pub(crate) fn generate_node_types_json( }; let mut aliases_by_symbol = HashMap::new(); - for (symbol, alias) in simple_aliases { + for (symbol, alias) in default_aliases { aliases_by_symbol.insert(*symbol, { let mut aliases = HashSet::new(); aliases.insert(Some(alias.clone())); @@ -425,7 +425,7 @@ pub(crate) fn generate_node_types_json( }); } for extra_symbol in &syntax_grammar.extra_symbols { - if !simple_aliases.contains_key(extra_symbol) { + if !default_aliases.contains_key(extra_symbol) { aliases_by_symbol .entry(*extra_symbol) .or_insert(HashSet::new()) @@ -435,12 +435,15 @@ pub(crate) fn generate_node_types_json( for variable in &syntax_grammar.variables { for production in &variable.productions { for step in &production.steps { - if !simple_aliases.contains_key(&step.symbol) { - aliases_by_symbol - .entry(step.symbol) - .or_insert(HashSet::new()) - .insert(step.alias.clone()); - } + aliases_by_symbol + .entry(step.symbol) + .or_insert(HashSet::new()) + .insert( + step.alias + .as_ref() + .or_else(|| default_aliases.get(&step.symbol)) + .cloned(), + ); } } } @@ -1808,14 +1811,14 @@ mod tests { } fn get_node_types(grammar: InputGrammar) -> Vec { - let (syntax_grammar, lexical_grammar, _, simple_aliases) = + let (syntax_grammar, lexical_grammar, _, default_aliases) = prepare_grammar(&grammar).unwrap(); let variable_info = - get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases).unwrap(); + get_variable_info(&syntax_grammar, &lexical_grammar, &default_aliases).unwrap(); generate_node_types_json( &syntax_grammar, &lexical_grammar, - &simple_aliases, + &default_aliases, &variable_info, ) } diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index 04f9e47b..58d99cc4 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -143,49 +143,6 @@ impl Generator { self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers); } - let mut field_names = Vec::new(); - for production_info in &self.parse_table.production_infos { - for field_name in production_info.field_map.keys() { - field_names.push(field_name); - } - - for alias in &production_info.alias_sequence { - if let Some(alias) = &alias { - let alias_kind = alias.kind(); - let matching_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| { - let (name, kind) = self.metadata_for_symbol(*symbol); - name == alias.value && kind == alias_kind - }); - let alias_id = if let Some(symbol) = matching_symbol { - self.symbol_ids[&symbol].clone() - } else if alias.is_named { - format!("alias_sym_{}", self.sanitize_identifier(&alias.value)) - } else { - format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value)) - }; - self.alias_ids.entry(alias.clone()).or_insert(alias_id); - } - } - } - - self.unique_aliases = self - .alias_ids - .keys() - .filter(|alias| { - self.parse_table - .symbols - .iter() - .cloned() - .find(|symbol| { - let (name, kind) = self.metadata_for_symbol(*symbol); - name == alias.value && kind == alias.kind() - }) - .is_none() - }) - .cloned() - .collect(); - self.unique_aliases.sort_unstable(); - self.symbol_map = self .parse_table .symbols @@ -230,13 +187,51 @@ impl Generator { }) .collect(); - field_names.sort_unstable(); - field_names.dedup(); - self.field_names = field_names.into_iter().cloned().collect(); + for production_info in &self.parse_table.production_infos { + // Build a list of all field names + for field_name in production_info.field_map.keys() { + if let Err(i) = self.field_names.binary_search(&field_name) { + self.field_names.insert(i, field_name.clone()); + } + } - // If we are opting in to the new unstable language ABI, then use the concept of - // "small parse states". Otherwise, use the same representation for all parse - // states. + for alias in &production_info.alias_sequence { + // Generate a mapping from aliases to C identifiers. + if let Some(alias) = &alias { + let existing_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| { + if let Some(default_alias) = self.default_aliases.get(symbol) { + default_alias == alias + } else { + let (name, kind) = self.metadata_for_symbol(*symbol); + name == alias.value && kind == alias.kind() + } + }); + + // Some aliases match an existing symbol in the grammar. + let alias_id; + if let Some(existing_symbol) = existing_symbol { + alias_id = self.symbol_ids[&self.symbol_map[&existing_symbol]].clone(); + } + // Other aliases don't match any existing symbol, and need their own identifiers. + else { + if let Err(i) = self.unique_aliases.binary_search(alias) { + self.unique_aliases.insert(i, alias.clone()); + } + + alias_id = if alias.is_named { + format!("alias_sym_{}", self.sanitize_identifier(&alias.value)) + } else { + format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value)) + }; + } + + self.alias_ids.entry(alias.clone()).or_insert(alias_id); + } + } + } + + // Determine which states should use the "small state" representation, and which should + // use the normal array representation. let threshold = cmp::min(SMALL_STATE_THRESHOLD, self.parse_table.symbols.len() / 2); self.large_state_count = self .parse_table From 99cd283e39d8dfb766fb365262fd08a419dd20a2 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 2 Nov 2020 14:07:39 -0800 Subject: [PATCH 18/22] query: Fix detection of repeated field names Fixes #790 --- cli/src/tests/query_test.rs | 27 +++++++++++++++++++++++++++ lib/src/tree_cursor.c | 14 +++++++++----- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 6bf6cbb0..082686ac 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -1835,6 +1835,33 @@ fn test_query_matches_with_no_captures() { }); } +#[test] +fn test_query_matches_with_repeated_fields() { + allocations::record(|| { + let language = get_language("c"); + let query = Query::new( + language, + "(field_declaration declarator: (field_identifier) @field)", + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + struct S { + int a, b, c; + } + ", + &[ + (0, vec![("field", "a")]), + (0, vec![("field", "b")]), + (0, vec![("field", "c")]), + ], + ); + }); +} + #[test] fn test_query_captures_basic() { allocations::record(|| { diff --git a/lib/src/tree_cursor.c b/lib/src/tree_cursor.c index 8af44a34..98b86605 100644 --- a/lib/src/tree_cursor.c +++ b/lib/src/tree_cursor.c @@ -330,7 +330,7 @@ void ts_tree_cursor_current_status( } } - #undef subtree_metadata + #undef subtree_symbol if (!ts_subtree_extra(*entry->subtree)) { const TSFieldMapEntry *field_map, *field_map_end; @@ -345,7 +345,6 @@ void ts_tree_cursor_current_status( for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) { if (!i->inherited && i->child_index == entry->structural_child_index) { *field_id = i->field_id; - *can_have_later_siblings_with_this_field = false; break; } } @@ -354,9 +353,14 @@ void ts_tree_cursor_current_status( // Determine if the current node can have later siblings with the same field name. if (*field_id) { for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) { - if (i->field_id == *field_id && i->child_index > entry->structural_child_index) { - *can_have_later_siblings_with_this_field = true; - break; + if (i->field_id == *field_id) { + if ( + i->child_index > entry->structural_child_index || + (i->child_index == entry->structural_child_index && *has_later_named_siblings) + ) { + *can_have_later_siblings_with_this_field = true; + break; + } } } } From c439a676cf169e88234f768ca0f69d42e5bd68c5 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 2 Nov 2020 14:53:01 -0800 Subject: [PATCH 19/22] 0.17.3 --- Cargo.lock | 2 +- cli/Cargo.toml | 2 +- cli/npm/package.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d052511d..2c243973 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -832,7 +832,7 @@ dependencies = [ [[package]] name = "tree-sitter-cli" -version = "0.17.2" +version = "0.17.3" dependencies = [ "ansi_term", "atty", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index a81ce16c..48dbbff7 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter-cli" description = "CLI tool for developing, testing, and using Tree-sitter parsers" -version = "0.17.2" +version = "0.17.3" authors = ["Max Brunsfeld "] edition = "2018" license = "MIT" diff --git a/cli/npm/package.json b/cli/npm/package.json index f327698c..4c6dfe90 100644 --- a/cli/npm/package.json +++ b/cli/npm/package.json @@ -1,6 +1,6 @@ { "name": "tree-sitter-cli", - "version": "0.17.2", + "version": "0.17.3", "author": "Max Brunsfeld", "license": "MIT", "repository": { From 281e75d74d78b0cbb6441bf497fdef0988ab49e4 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 2 Nov 2020 20:53:08 -0800 Subject: [PATCH 20/22] rust binding: 0.17.1 --- Cargo.lock | 2 +- lib/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2c243973..cd411095 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -824,7 +824,7 @@ dependencies = [ [[package]] name = "tree-sitter" -version = "0.17.0" +version = "0.17.1" dependencies = [ "cc", "regex", diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 2d132788..8f88966f 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter" description = "Rust bindings to the Tree-sitter parsing library" -version = "0.17.0" +version = "0.17.1" authors = ["Max Brunsfeld "] license = "MIT" readme = "binding_rust/README.md" From 2f897b4d7333cb18dc1b62408cd38dad839f4789 Mon Sep 17 00:00:00 2001 From: Stafford Brunk Date: Tue, 3 Nov 2020 08:20:20 -0700 Subject: [PATCH 21/22] Change QueryResult to be QueryCapture and QueryMatch matches/captures return 2 different types of object so this change corrects the return types --- lib/binding_web/tree-sitter-web.d.ts | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/lib/binding_web/tree-sitter-web.d.ts b/lib/binding_web/tree-sitter-web.d.ts index ae76e803..80084c11 100644 --- a/lib/binding_web/tree-sitter-web.d.ts +++ b/lib/binding_web/tree-sitter-web.d.ts @@ -137,9 +137,14 @@ declare module 'web-tree-sitter' { query(source: string): Query; } - interface QueryResult { + interface QueryCapture { + name: string; + node: SyntaxNode; + } + + interface QueryMatch { pattern: number; - captures: { name: string; node: SyntaxNode }[]; + captures: QueryCapture[]; } interface PredicateResult { @@ -151,8 +156,8 @@ declare module 'web-tree-sitter' { captureNames: string[]; delete(): void; - matches(node: SyntaxNode, startPosition?: Point, endPosition?: Point): QueryResult[]; - captures(node: SyntaxNode, startPosition?: Point, endPosition?: Point): QueryResult[]; + matches(node: SyntaxNode, startPosition?: Point, endPosition?: Point): QueryMatch[]; + captures(node: SyntaxNode, startPosition?: Point, endPosition?: Point): QueryCapture[]; predicatesForPattern(patternIndex: number): PredicateResult[]; } } From 4e86b76e8c0fc8218eff7dcaa2029cf778f260a7 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 3 Nov 2020 10:28:17 -0800 Subject: [PATCH 22/22] Update ruby error recovery test to reflect grammar tweaks --- test/fixtures/error_corpus/readme.md | 8 ++++++++ test/fixtures/error_corpus/ruby_errors.txt | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 test/fixtures/error_corpus/readme.md diff --git a/test/fixtures/error_corpus/readme.md b/test/fixtures/error_corpus/readme.md new file mode 100644 index 00000000..d8b5da09 --- /dev/null +++ b/test/fixtures/error_corpus/readme.md @@ -0,0 +1,8 @@ +The Error Corpus +================ + +This directory contains corpus tests that exercise error recovery in a variety of languages. + +These corpus tests provide a simple way of asserting that error recoveries are "reasonable" in a variety of situations. But they are also somewhat *overspecified*. It isn't critical that error recovery behaves *exactly* as these tests specify, just that most of the syntax tree is preserved despite the error. + +Sometimes these tests can start failing when changes are pushed to the parser repositories like `tree-sitter-ruby`, `tree-sitter-javascript`, etc. Usually, we just need to tweak the expected syntax tree. diff --git a/test/fixtures/error_corpus/ruby_errors.txt b/test/fixtures/error_corpus/ruby_errors.txt index 9c35781c..49dc2b32 100644 --- a/test/fixtures/error_corpus/ruby_errors.txt +++ b/test/fixtures/error_corpus/ruby_errors.txt @@ -14,6 +14,6 @@ c method: (identifier) (ERROR (heredoc_beginning)) arguments: (argument_list - (heredoc_body (heredoc_end)) + (heredoc_body (heredoc_content) (heredoc_end)) (identifier) (MISSING ")"))))