diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 0d078411..5c98c959 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -82,18 +82,29 @@ fn test_query_errors_on_invalid_syntax() { 1, [ "((identifier) ()", // - " ^", + " ^", ] .join("\n") )) ); assert_eq!( - Query::new(language, r#"((identifier) @x (eq? @x a"#), + Query::new(language, r#"((identifier) (#a)"#), Err(QueryError::Syntax( 1, [ - r#"((identifier) @x (eq? @x a"#, - r#" ^"#, + "((identifier) (#a)", // + " ^", + ] + .join("\n") + )) + ); + assert_eq!( + Query::new(language, r#"((identifier) @x (#eq? @x a"#), + Err(QueryError::Syntax( + 1, + [ + r#"((identifier) @x (#eq? @x a"#, + r#" ^"#, ] .join("\n") )) @@ -136,18 +147,23 @@ fn test_query_errors_on_invalid_conditions() { assert_eq!( Query::new(language, "((identifier) @id (@id))"), - Err(QueryError::Predicate( - "Expected predicate to start with a function name. Got @id.".to_string() + Err(QueryError::Syntax( + 1, + [ + "((identifier) @id (@id))", // + " ^" + ] + .join("\n") )) ); assert_eq!( - Query::new(language, "((identifier) @id (eq? @id))"), + Query::new(language, "((identifier) @id (#eq? @id))"), Err(QueryError::Predicate( - "Wrong number of arguments to eq? predicate. Expected 2, got 1.".to_string() + "Wrong number of arguments to #eq? predicate. Expected 2, got 1.".to_string() )) ); assert_eq!( - Query::new(language, "((identifier) @id (eq? @id @ok))"), + Query::new(language, "((identifier) @id (#eq? @id @ok))"), Err(QueryError::Capture(1, "ok".to_string())) ); }); @@ -365,8 +381,8 @@ fn test_query_matches_with_named_wildcard() { let query = Query::new( language, " - (return_statement (*) @the-return-value) - (binary_expression operator: * @the-operator) + (return_statement (_) @the-return-value) + (binary_expression operator: _ @the-operator) ", ) .unwrap(); @@ -397,7 +413,7 @@ fn test_query_matches_with_wildcard_at_the_root() { let query = Query::new( language, " - (* + (_ (comment) @doc . (function_declaration @@ -416,10 +432,10 @@ fn test_query_matches_with_wildcard_at_the_root() { let query = Query::new( language, " - (* (string) @a) - (* (number) @b) - (* (true) @c) - (* (false) @d) + (_ (string) @a) + (_ (number) @b) + (_ (true) @c) + (_ (false) @d) ", ) .unwrap(); @@ -461,7 +477,7 @@ fn test_query_matches_with_immediate_siblings() { .) (list . - (*) @first-element) + (_) @first-element) ", ) .unwrap(); @@ -490,17 +506,19 @@ fn test_query_matches_with_repeated_leaf_nodes() { let query = Query::new( language, " - (* + ( (comment)+ @doc . (class_declaration - name: (identifier) @name)) + name: (identifier) @name) + ) - (* + ( (comment)+ @doc . (function_declaration - name: (identifier) @name)) + name: (identifier) @name) + ) ", ) .unwrap(); @@ -602,7 +620,7 @@ fn test_query_matches_with_non_terminal_repetitions_within_root() { let query = Query::new( language, r#" - (* + (_ (expression_statement (identifier) @id)+) "#, @@ -657,18 +675,19 @@ fn test_query_matches_with_nested_repetitions() { } #[test] -fn test_query_matches_with_leading_optional_repeated_leaf_nodes() { +fn test_query_matches_with_leading_zero_or_more_repeated_leaf_nodes() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( language, " - (* - (comment)+? @doc + ( + (comment)* @doc . (function_declaration - name: (identifier) @name)) + name: (identifier) @name) + ) ", ) .unwrap(); @@ -792,7 +811,7 @@ fn test_query_matches_with_repeated_internal_nodes() { let query = Query::new( language, " - (* + (_ (method_definition (decorator (identifier) @deco)+ name: (property_identifier) @name)) @@ -1083,12 +1102,12 @@ fn test_query_captures_basic() { language, r#" (pair - key: * @method.def + key: _ @method.def (function name: (identifier) @method.alias)) (variable_declarator - name: * @function.def + name: _ @function.def value: (function name: (identifier) @function.alias)) @@ -1158,13 +1177,13 @@ fn test_query_captures_with_text_conditions() { language, r#" ((identifier) @constant - (match? @constant "^[A-Z]{2,}$")) + (#match? @constant "^[A-Z]{2,}$")) ((identifier) @constructor - (match? @constructor "^[A-Z]")) + (#match? @constructor "^[A-Z]")) ((identifier) @function.builtin - (eq? @function.builtin "require")) + (#eq? @function.builtin "require")) (identifier) @variable "#, @@ -1207,13 +1226,13 @@ fn test_query_captures_with_predicates() { language, r#" ((call_expression (identifier) @foo) - (set! name something) - (set! cool) - (something! @foo omg)) + (#set! name something) + (#set! cool) + (#something! @foo omg)) ((property_identifier) @bar - (is? cool) - (is-not? name something))"#, + (#is? cool) + (#is-not? name something))"#, ) .unwrap(); @@ -1259,13 +1278,13 @@ fn test_query_captures_with_quoted_predicate_args() { language, r#" ((call_expression (identifier) @foo) - (set! one "\"something\ngreat\"")) + (#set! one "\"something\ngreat\"")) ((identifier) - (set! two "\\s(\r?\n)*$")) + (#set! two "\\s(\r?\n)*$")) ((function_declaration) - (set! three "\"something\ngreat\"")) + (#set! three "\"something\ngreat\"")) "#, ) .unwrap(); @@ -1336,7 +1355,7 @@ fn test_query_captures_with_many_nested_results_without_fields() { language, r#" (pair - key: * @method-def + key: _ @method-def (arrow_function)) ":" @colon @@ -1403,7 +1422,7 @@ fn test_query_captures_with_many_nested_results_with_fields() { consequence: (member_expression object: (identifier) @right) alternative: (null)) - (eq? @left @right)) + (#eq? @left @right)) "#, ) .unwrap(); @@ -1596,7 +1615,7 @@ fn test_query_captures_with_matches_removed() { r#" (binary_expression left: (identifier) @left - operator: * @op + operator: _ @op right: (identifier) @right) "#, ) @@ -1689,7 +1708,7 @@ fn test_query_start_byte_for_pattern() { .trim_start(); let patterns_3 = " - ((identifier) @b (match? @b i)) + ((identifier) @b (#match? @b i)) (function_declaration name: (identifier) @c) (method_definition name: (identifier) @d) " @@ -1719,13 +1738,13 @@ fn test_query_capture_names() { r#" (if_statement condition: (binary_expression - left: * @left-operand + left: _ @left-operand operator: "||" - right: * @right-operand) + right: _ @right-operand) consequence: (statement_block) @body) (while_statement - condition:* @loop-condition) + condition: _ @loop-condition) "#, ) .unwrap(); diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs index 41907a3c..fad8ebd8 100644 --- a/cli/src/tests/tags_test.rs +++ b/cli/src/tests/tags_test.rs @@ -6,46 +6,59 @@ use tree_sitter_tags::c_lib as c; use tree_sitter_tags::{Error, TagKind, TagsConfiguration, TagsContext}; const PYTHON_TAG_QUERY: &'static str = r#" -((function_definition - name: (identifier) @name - body: (block . (expression_statement (string) @doc))) @function - (strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")) +( + (function_definition + name: (identifier) @name + body: (block . (expression_statement (string) @doc))) @function + (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)") +) + (function_definition name: (identifier) @name) @function -((class_definition - name: (identifier) @name - body: (block . (expression_statement (string) @doc))) @class - (strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")) + +( + (class_definition + name: (identifier) @name + body: (block + . (expression_statement (string) @doc))) @class + (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)") +) + (class_definition name: (identifier) @name) @class + (call function: (identifier) @name) @call "#; const JS_TAG_QUERY: &'static str = r#" -((* - (comment)+ @doc . +( + (comment)* @doc . (class_declaration - name: (identifier) @name) @class) - (select-adjacent! @doc @class) - (strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")) + name: (identifier) @name) @class + (#select-adjacent! @doc @class) + (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)") +) -((* - (comment)+ @doc . +( + (comment)* @doc . (method_definition - name: (property_identifier) @name) @method) - (select-adjacent! @doc @method) - (strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")) + name: (property_identifier) @name) @method + (#select-adjacent! @doc @method) + (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)") +) -((* - (comment)+ @doc . +( + (comment)* @doc . (function_declaration - name: (identifier) @name) @function) - (select-adjacent! @doc @function) - (strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")) + name: (identifier) @name) @function + (#select-adjacent! @doc @function) + (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)") +) -(call_expression function: (identifier) @name) @call - "#; +(call_expression + function: (identifier) @name) @call +"#; const RUBY_TAG_QUERY: &'static str = r#" (method @@ -55,7 +68,7 @@ const RUBY_TAG_QUERY: &'static str = r#" method: (identifier) @name) @call ((identifier) @name @call - (is-not? local)) + (#is-not? local)) "#; #[test] diff --git a/docs/section-2-using-parsers.md b/docs/section-2-using-parsers.md index 406e8364..839cacf3 100644 --- a/docs/section-2-using-parsers.md +++ b/docs/section-2-using-parsers.md @@ -24,19 +24,22 @@ script/build-lib Alternatively, you can use the library in a larger project by adding one source file to the project. This source file needs two directories to be in the include path when compiled: **source file:** -* `tree-sitter/lib/src/lib.c` + +- `tree-sitter/lib/src/lib.c` **include directories:** -* `tree-sitter/lib/src` -* `tree-sitter/lib/include` + +- `tree-sitter/lib/src` +- `tree-sitter/lib/include` ### The Basic Objects There are four main types of objects involved when using Tree-sitter: languages, parsers, syntax trees, and syntax nodes. In C, these are called `TSLanguage`, `TSParser`, `TSTree`, and `TSNode`. -* A `TSLanguage` is an opaque object that defines how to parse a particular programming language. The code for each `TSLanguage` is generated by Tree-sitter. Many languages are already available in separate git repositories within the the [Tree-sitter GitHub organization](https://github.com/tree-sitter). See [the next page](./creating-parsers) for how to create new languages. -* A `TSParser` is a stateful object that can be assigned a `TSLanguage` and used to produce a `TSTree` based on some source code. -* A `TSTree` represents the syntax tree of an entire source code file. It contains `TSNode` instances that indicate the structure of the source code. It can also be edited and used to produce a new `TSTree` in the event that the source code changes. -* A `TSNode` represents a single node in the syntax tree. It tracks its start and end positions in the source code, as well as its relation to other nodes like its parent, siblings and children. + +- A `TSLanguage` is an opaque object that defines how to parse a particular programming language. The code for each `TSLanguage` is generated by Tree-sitter. Many languages are already available in separate git repositories within the the [Tree-sitter GitHub organization](https://github.com/tree-sitter). See [the next page](./creating-parsers) for how to create new languages. +- A `TSParser` is a stateful object that can be assigned a `TSLanguage` and used to produce a `TSTree` based on some source code. +- A `TSTree` represents the syntax tree of an entire source code file. It contains `TSNode` instances that indicate the structure of the source code. It can also be edited and used to produce a new `TSTree` in the event that the source code changes. +- A `TSNode` represents a single node in the syntax tree. It tracks its start and end positions in the source code, as well as its relation to other nodes like its parent, siblings and children. ### An Example Program @@ -128,7 +131,7 @@ TSTree *ts_parser_parse_string( ); ``` -You may want to parse source code that's stored in a custom data structure, like a [piece table](https://en.wikipedia.org/wiki/Piece_table) or a [rope](https://en.wikipedia.org/wiki/Rope_(data_structure)). In this case, you can use the more general `ts_parser_parse` function: +You may want to parse source code that's stored in a custom data structure, like a [piece table](https://en.wikipedia.org/wiki/Piece_table) or a [rope](). In this case, you can use the more general `ts_parser_parse` function: ```c TSTree *ts_parser_parse( @@ -155,7 +158,7 @@ typedef struct { ### Syntax Nodes -Tree-sitter provides a [DOM](https://en.wikipedia.org/wiki/Document_Object_Model)-style interface for inspecting syntax trees. A syntax node's *type* is a string that indicates which grammar rule the node represents. +Tree-sitter provides a [DOM](https://en.wikipedia.org/wiki/Document_Object_Model)-style interface for inspecting syntax trees. A syntax node's _type_ is a string that indicates which grammar rule the node represents. ```c const char *ts_node_type(TSNode); @@ -178,7 +181,7 @@ TSPoint ts_node_end_point(TSNode); ### Retrieving Nodes -Every tree has a *root node*: +Every tree has a _root node_: ```c TSNode ts_tree_root_node(const TSTree *); @@ -199,7 +202,7 @@ TSNode ts_node_prev_sibling(TSNode); TSNode ts_node_parent(TSNode); ``` -These methods may all return a *null node* to indicate, for example, that a node does not *have* a next sibling. You can check if a node is null: +These methods may all return a _null node_ to indicate, for example, that a node does not _have_ a next sibling. You can check if a node is null: ```c bool ts_node_is_null(TSNode); @@ -207,21 +210,15 @@ bool ts_node_is_null(TSNode); ### Named vs Anonymous Nodes -Tree-sitter produces [*concrete* syntax trees](https://en.wikipedia.org/wiki/Parse_tree) - trees that contain nodes for every individual token in the source code, including things like commas and parentheses. This is important for use-cases that deal with individual tokens, like [syntax highlighting](https://en.wikipedia.org/wiki/Syntax_highlighting). But some types of code analysis are easier to perform using an [*abstract* syntax tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) - a tree in which the less important details have been removed. Tree-sitter's trees support these use cases by making a distinction between *named* and *anonymous* nodes. +Tree-sitter produces [_concrete_ syntax trees](https://en.wikipedia.org/wiki/Parse_tree) - trees that contain nodes for every individual token in the source code, including things like commas and parentheses. This is important for use-cases that deal with individual tokens, like [syntax highlighting](https://en.wikipedia.org/wiki/Syntax_highlighting). But some types of code analysis are easier to perform using an [_abstract_ syntax tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) - a tree in which the less important details have been removed. Tree-sitter's trees support these use cases by making a distinction between _named_ and _anonymous_ nodes. Consider a grammar rule like this: ```js -if_statement: $ => seq( - 'if', - '(', - $._expression, - ')', - $._statement, -) +if_statement: ($) => seq("if", "(", $._expression, ")", $._statement); ``` -A syntax node representing an `if_statement` in this language would have 5 children: the condition expression, the body statement, as well as the `if`, `(`, and `)` tokens. The expression and the statement would be marked as *named* nodes, because they have been given explicit names in the grammar. But the `if`, `(`, and `)` nodes would *not* be named nodes, because they are represented in the grammar as simple strings. +A syntax node representing an `if_statement` in this language would have 5 children: the condition expression, the body statement, as well as the `if`, `(`, and `)` tokens. The expression and the statement would be marked as _named_ nodes, because they have been given explicit names in the grammar. But the `if`, `(`, and `)` nodes would _not_ be named nodes, because they are represented in the grammar as simple strings. You can check whether any given node is named: @@ -242,7 +239,7 @@ If you use this group of methods, the syntax tree functions much like an abstrac ### Node Field Names -To make syntax nodes easier to analyze, many grammars assign unique *field names* to particular child nodes. The next page [explains](./creating-parsers#using-fields) how to do this on your own grammars. If a syntax node has fields, you can access its children using their field name: +To make syntax nodes easier to analyze, many grammars assign unique _field names_ to particular child nodes. The next page [explains](./creating-parsers#using-fields) how to do this on your own grammars. If a syntax node has fields, you can access its children using their field name: ```c TSNode ts_node_child_by_field_name( @@ -270,7 +267,7 @@ TSNode ts_node_child_by_field_id(TSNode, TSFieldId); ### Editing -In applications like text editors, you often need to re-parse a file after its source code has changed. Tree-sitter is designed to support this use case efficiently. There are two steps required. First, you must *edit* the syntax tree, which adjusts the ranges of its nodes so that they stay in sync with the code. +In applications like text editors, you often need to re-parse a file after its source code has changed. Tree-sitter is designed to support this use case efficiently. There are two steps required. First, you must _edit_ the syntax tree, which adjusts the ranges of its nodes so that they stay in sync with the code. ```c typedef struct { @@ -293,13 +290,13 @@ When you edit a syntax tree, the positions of its nodes will change. If you have void ts_node_edit(TSNode *, const TSInputEdit *); ``` -This `ts_node_edit` function is *only* needed in the case where you have retrieved `TSNode` instances *before* editing the tree, and then *after* editing the tree, you want to continue to use those specific node instances. Often, you'll just want to re-fetch nodes from the edited tree, in which case `ts_node_edit` is not needed. +This `ts_node_edit` function is _only_ needed in the case where you have retrieved `TSNode` instances _before_ editing the tree, and then _after_ editing the tree, you want to continue to use those specific node instances. Often, you'll just want to re-fetch nodes from the edited tree, in which case `ts_node_edit` is not needed. ### Multi-language Documents Sometimes, different parts of a file may be written in different languages. For example, templating languages like [EJS](http://ejs.co) and [ERB](https://ruby-doc.org/stdlib-2.5.1/libdoc/erb/rdoc/ERB.html) allow you to generate HTML by writing a mixture of HTML and another language like JavaScript or Ruby. -Tree-sitter handles these types of documents by allowing you to create a syntax tree based on the text in certain *ranges* of a file. +Tree-sitter handles these types of documents by allowing you to create a syntax tree based on the text in certain _ranges_ of a file. ```c typedef struct { @@ -409,13 +406,13 @@ Tree-sitter supports multi-threaded use cases by making syntax trees very cheap TSTree *ts_tree_copy(const TSTree *); ``` -Internally, copying a syntax tree just entails incrementing an atomic reference count. Conceptually, it provides you a new tree which you can freely query, edit, reparse, or delete on a new thread while continuing to use the original tree on a different thread. Note that individual `TSTree` instances are *not* thread safe; you must copy a tree if you want to use it on multiple threads simultaneously. +Internally, copying a syntax tree just entails incrementing an atomic reference count. Conceptually, it provides you a new tree which you can freely query, edit, reparse, or delete on a new thread while continuing to use the original tree on a different thread. Note that individual `TSTree` instances are _not_ thread safe; you must copy a tree if you want to use it on multiple threads simultaneously. ## Other Tree Operations ### Walking Trees with Tree Cursors -You can access every node in a syntax tree using the `TSNode` APIs [described above](#retrieving-nodes), but if you need to access a large number of nodes, the fastest way to do so is with a *tree cursor*. A cursor is a stateful object that allows you to walk a syntax tree with maximum efficiency. +You can access every node in a syntax tree using the `TSNode` APIs [described above](#retrieving-nodes), but if you need to access a large number of nodes, the fastest way to do so is with a _tree cursor_. A cursor is a stateful object that allows you to walk a syntax tree with maximum efficiency. You can initialize a cursor from any node: @@ -441,19 +438,19 @@ const char *ts_tree_cursor_current_field_name(const TSTreeCursor *); TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *); ``` -### Pattern Matching with Queries +## Pattern Matching with Queries Many code analysis tasks involve searching for patterns in syntax trees. Tree-sitter provides a small declarative language for expressing these patterns and searching for matches. The language is similar to the format of Tree-sitter's [unit test system](./creating-parsers#command-test). -#### Basics +### Query Syntax -A *query* consists of one or more *patterns*, where each pattern is an [S-expression](https://en.wikipedia.org/wiki/S-expression) that matches a certain set of nodes in a syntax tree. The expression to match a given node consists of a pair of parentheses containing two things: the node's type, and optionally, a series of other S-expressions that match the node's children. For example, this pattern would match any `binary_expression` node whose children are both `number_literal` nodes: +A _query_ consists of one or more _patterns_, where each pattern is an [S-expression](https://en.wikipedia.org/wiki/S-expression) that matches a certain set of nodes in a syntax tree. The expression to match a given node consists of a pair of parentheses containing two things: the node's type, and optionally, a series of other S-expressions that match the node's children. For example, this pattern would match any `binary_expression` node whose children are both `number_literal` nodes: ``` (binary_expression (number_literal) (number_literal)) ``` -Children can also be omitted. For example, this would match any `binary_expression` where at least *one* of child is a `string_literal` node: +Children can also be omitted. For example, this would match any `binary_expression` where at least _one_ of child is a `string_literal` node: ``` (binary_expression (string_literal)) @@ -481,13 +478,13 @@ The parenthesized syntax for writing nodes only applies to [named nodes](#named- #### Capturing Nodes -When matching patterns, you may want to process specific nodes within the pattern. Captures allow you to associate names with specific nodes in a pattern, so that you can later refer to those nodes by those names. Capture names are written *after* the nodes that they refer to, and start with an `@` character. +When matching patterns, you may want to process specific nodes within the pattern. Captures allow you to associate names with specific nodes in a pattern, so that you can later refer to those nodes by those names. Capture names are written _after_ the nodes that they refer to, and start with an `@` character. -For example, this pattern would match any assignment of a `function` to an `identifier`, and it would associate the name `function-definition` with the identifier: +For example, this pattern would match any assignment of a `function` to an `identifier`, and it would associate the name `the-function-name` with the identifier: ``` (assignment_expression - left: (identifier) @function-definition + left: (identifier) @the-function-name right: (function)) ``` @@ -501,29 +498,79 @@ And this pattern would match all method definitions, associating the name `the-m name: (property_identifier) @the-method-name))) ``` +#### Quantification Operators + +You can match a repeating sequence of sibling nodes using the postfix `+` and `*` _repetition_ operators, which work analogously to the `+` and `*` operators [in regular expressions](https://en.wikipedia.org/wiki/Regular_expression#Basic_concepts). The `+` operator matches _one or more_ repetitions of a pattern, and the `*` operator matches _zero or more_. + +For example, this pattern would match a sequence of one or more comments: + +``` +(comment)+ +``` + +This pattern would match a class declaration, capturing all of the decorators if any were present: + +``` +(class_declaration + (decorator)* @the-decorator + name: (identifier) @the-name) +``` + +You can also mark a node as optional using the `?` operator. For example, this pattern would match all function calls, capturing a string argument if one was present: + +``` +(call_expression + function: (identifier) @the-function + arguments: (arguments (string)? @the-string-arg)) +``` + +#### Grouping Sibling Nodes + +You can also use parentheses for grouping a sequence of _sibling_ nodes. For example, this pattern would match a comment followed by a function declaration: + +``` +( + (comment) + (function_declaration) +) +``` + +Any of the quantification operators mentioned above (`+`, `*`, and `?`) can also be applied to groups. For example, this pattern would match a comma-separated series of numbers: + +``` +( + (number) + ("," (number))* +) +``` + #### Predicates -You can also specify other conditions that should restrict the nodes that match a given pattern. You do this by enclosing the pattern in an additional pair of parentheses, and specifying one or more *predicate* S-expressions after your main pattern. Predicate S-expressions must start with a predicate name, and contain either `@`-prefixed capture names or strings. +You can also specify arbitrary metadata and conditions associed with a pattern by adding _predicate_ S-expressions anywhere within your pattern. Predicate S-expressions start with a _predicate name_ beginning with a `#` character. After that, they can contain an arbitrary number of `@`-prefixed capture names or strings. For example, this pattern would match identifier whose names is written in `SCREAMING_SNAKE_CASE`: ``` -((identifier) @constant - (match? @constant "^[A-Z][A-Z_]+")) +( + (identifier) @constant + (#match? @constant "^[A-Z][A-Z_]+") +) ``` And this pattern would match key-value pairs where the `value` is an identifier with the same name as the key: ``` -((pair - key: (property_identifier) @key-name - value: (identifier) @value-name) - (eq? @key-name @value-name)) +( + (pair + key: (property_identifier) @key-name + value: (identifier) @value-name) + (#eq? @key-name @value-name) +) ``` -*Note* - Predicates are not handled directly by the Tree-sitter C library. They are just exposed in a structured form so that higher-level code can perform the filtering. However, higher-level bindings to Tree-sitter like [the Rust crate](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust) or the [WebAssembly binding](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web) implement a few common predicates like `eq?` and `match?`. +_Note_ - Predicates are not handled directly by the Tree-sitter C library. They are just exposed in a structured form so that higher-level code can perform the filtering. However, higher-level bindings to Tree-sitter like [the Rust crate](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust) or the [WebAssembly binding](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web) implement a few common predicates like `#eq?` and `#match?`. -#### The Query API +### The Query API Create a query by specifying a string containing one or more patterns: @@ -583,7 +630,7 @@ This function will return `false` when there are no more matches. Otherwise, it ## Static Node Types -In languages with static typing, it can be helpful for syntax trees to provide specific type information about individual syntax nodes. Tree-sitter makes this information available via a generated file called `node-types.json`. This *node types* file provides structured data about every possible syntax node in a grammar. +In languages with static typing, it can be helpful for syntax trees to provide specific type information about individual syntax nodes. Tree-sitter makes this information available via a generated file called `node-types.json`. This _node types_ file provides structured data about every possible syntax node in a grammar. You can use this data to generate type declarations in statically-typed programming languages. For example, GitHub's [Semantic](https://github.com/github/semantic) uses these node types files to [generate Haskell data types](https://github.com/github/semantic/tree/master/semantic-ast) for every possible syntax node, which allows for code analysis algorithms to be structurally verified by the Haskell type system. @@ -593,9 +640,8 @@ The node types file contains an array of objects, each of which describes a part Every object in this array has these two entries: -* `"type"` - A string that indicates which grammar rule the node represents. This corresponds to the `ts_node_type` function described [above](#syntax-nodes). -* `"named"` - A boolean that indicates whether this kind of node corresponds to a rule name in the grammar or just a string literal. See [above](#named-vs-anonymous-nodes) for more info. - +- `"type"` - A string that indicates which grammar rule the node represents. This corresponds to the `ts_node_type` function described [above](#syntax-nodes). +- `"named"` - A boolean that indicates whether this kind of node corresponds to a rule name in the grammar or just a string literal. See [above](#named-vs-anonymous-nodes) for more info. Examples: @@ -614,16 +660,16 @@ Together, these two fields constitute a unique identifier for a node type; no tw #### Internal Nodes -Many syntax nodes can have *children*. The node type object describes the possible children that a node can have using the following entries: +Many syntax nodes can have _children_. The node type object describes the possible children that a node can have using the following entries: -* `"fields"` - An object that describes the possible [fields](#node-field-names) that the node can have. The keys of this object are field names, and the values are *child type* objects, described below. -* `"children"` - Another *child type* object that describes all of the node's possible *named* children *without* fields. +- `"fields"` - An object that describes the possible [fields](#node-field-names) that the node can have. The keys of this object are field names, and the values are _child type_ objects, described below. +- `"children"` - Another _child type_ object that describes all of the node's possible _named_ children _without_ fields. -A *child type* object describes a set of child nodes using the following entries: +A _child type_ object describes a set of child nodes using the following entries: -* `"required"` - A boolean indicating whether there is always *at least one* node in this set. -* `"multiple"` - A boolean indicating whether there can be *multiple* nodes in this set. -* `"types"`- An array of objects that represent the possible types of nodes in this set. Each object has two keys: `"type"` and `"named"`, whose meanings are described above. +- `"required"` - A boolean indicating whether there is always _at least one_ node in this set. +- `"multiple"` - A boolean indicating whether there can be _multiple_ nodes in this set. +- `"types"`- An array of objects that represent the possible types of nodes in this set. Each object has two keys: `"type"` and `"named"`, whose meanings are described above. Example with fields: @@ -635,31 +681,25 @@ Example with fields: "body": { "multiple": false, "required": true, - "types": [ - {"type": "statement_block", "named": true} - ] + "types": [{ "type": "statement_block", "named": true }] }, "decorator": { "multiple": true, "required": false, - "types": [ - {"type": "decorator", "named": true} - ] + "types": [{ "type": "decorator", "named": true }] }, "name": { "multiple": false, "required": true, "types": [ - {"type": "computed_property_name", "named": true}, - {"type": "property_identifier", "named": true}, + { "type": "computed_property_name", "named": true }, + { "type": "property_identifier", "named": true } ] }, "parameters": { "multiple": false, "required": true, - "types": [ - {"type": "formal_parameters", "named": true} - ] + "types": [{ "type": "formal_parameters", "named": true }] } } } @@ -676,8 +716,8 @@ Example with children: "multiple": true, "required": false, "types": [ - {"type": "_expression", "named": true}, - {"type": "spread_element", "named": true} + { "type": "_expression", "named": true }, + { "type": "spread_element", "named": true } ] } } @@ -685,11 +725,11 @@ Example with children: #### Supertype Nodes -In Tree-sitter grammars, there are usually certain rules that represent abstract *categories* of syntax nodes (e.g. "expression", "type", "declaration"). In the `grammar.js` file, these are often written as [hidden rules](./creating-parsers#hiding-rules) whose definition is a simple [`choice`](./creating-parsers#the-grammar-dsl) where each member is just a single symbol. +In Tree-sitter grammars, there are usually certain rules that represent abstract _categories_ of syntax nodes (e.g. "expression", "type", "declaration"). In the `grammar.js` file, these are often written as [hidden rules](./creating-parsers#hiding-rules) whose definition is a simple [`choice`](./creating-parsers#the-grammar-dsl) where each member is just a single symbol. -Normally, hidden rules are not mentioned in the node types file, since they don't appear in the syntax tree. But if you add a hidden rule to the grammar's [`supertypes` list](./creating-parsers#the-grammar-dsl), then it *will* show up in the node types file, with the following special entry: +Normally, hidden rules are not mentioned in the node types file, since they don't appear in the syntax tree. But if you add a hidden rule to the grammar's [`supertypes` list](./creating-parsers#the-grammar-dsl), then it _will_ show up in the node types file, with the following special entry: -* `"subtypes"` - An array of objects that specify the *types* of nodes that this 'supertype' node can wrap. +- `"subtypes"` - An array of objects that specify the _types_ of nodes that this 'supertype' node can wrap. Example: @@ -698,11 +738,11 @@ Example: "type": "_declaration", "named": true, "subtypes": [ - {"type": "class_declaration", "named": true}, - {"type": "function_declaration", "named": true}, - {"type": "generator_function_declaration", "named": true}, - {"type": "lexical_declaration", "named": true}, - {"type": "variable_declaration", "named": true} + { "type": "class_declaration", "named": true }, + { "type": "function_declaration", "named": true }, + { "type": "generator_function_declaration", "named": true }, + { "type": "lexical_declaration", "named": true }, + { "type": "variable_declaration", "named": true } ] } ``` @@ -719,17 +759,13 @@ Example: "declaration": { "multiple": false, "required": false, - "types": [ - {"type": "_declaration", "named": true} - ] + "types": [{ "type": "_declaration", "named": true }] }, "source": { "multiple": false, "required": false, - "types": [ - {"type": "string", "named": true} - ] - }, + "types": [{ "type": "string", "named": true }] + } } } ``` diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index a13d9168..c0aba32f 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -1271,13 +1271,13 @@ impl Query { "eq?" | "not-eq?" => { if p.len() != 3 { return Err(QueryError::Predicate(format!( - "Wrong number of arguments to eq? predicate. Expected 2, got {}.", + "Wrong number of arguments to #eq? predicate. Expected 2, got {}.", p.len() - 1 ))); } if p[1].type_ != type_capture { return Err(QueryError::Predicate(format!( - "First argument to eq? predicate must be a capture name. Got literal \"{}\".", + "First argument to #eq? predicate must be a capture name. Got literal \"{}\".", string_values[p[1].value_id as usize], ))); } @@ -1301,19 +1301,19 @@ impl Query { "match?" => { if p.len() != 3 { return Err(QueryError::Predicate(format!( - "Wrong number of arguments to match? predicate. Expected 2, got {}.", + "Wrong number of arguments to #match? predicate. Expected 2, got {}.", p.len() - 1 ))); } if p[1].type_ != type_capture { return Err(QueryError::Predicate(format!( - "First argument to match? predicate must be a capture name. Got literal \"{}\".", + "First argument to #match? predicate must be a capture name. Got literal \"{}\".", string_values[p[1].value_id as usize], ))); } if p[2].type_ == type_capture { return Err(QueryError::Predicate(format!( - "Second argument to match? predicate must be a literal. Got capture @{}.", + "Second argument to #match? predicate must be a literal. Got capture @{}.", result.capture_names[p[2].value_id as usize], ))); } diff --git a/lib/binding_web/binding.js b/lib/binding_web/binding.js index f52f61d5..feedb37f 100644 --- a/lib/binding_web/binding.js +++ b/lib/binding_web/binding.js @@ -748,10 +748,10 @@ class Language { isPositive = false; case 'eq?': if (steps.length !== 3) throw new Error( - `Wrong number of arguments to \`eq?\` predicate. Expected 2, got ${steps.length - 1}` + `Wrong number of arguments to \`#eq?\` predicate. Expected 2, got ${steps.length - 1}` ); if (steps[1].type !== 'capture') throw new Error( - `First argument of \`eq?\` predicate must be a capture. Got "${steps[1].value}"` + `First argument of \`#eq?\` predicate must be a capture. Got "${steps[1].value}"` ); if (steps[2].type === 'capture') { const captureName1 = steps[1].name; @@ -780,13 +780,13 @@ class Language { case 'match?': if (steps.length !== 3) throw new Error( - `Wrong number of arguments to \`match?\` predicate. Expected 2, got ${steps.length - 1}.` + `Wrong number of arguments to \`#match?\` predicate. Expected 2, got ${steps.length - 1}.` ); if (steps[1].type !== 'capture') throw new Error( - `First argument of \`match?\` predicate must be a capture. Got "${steps[1].value}".` + `First argument of \`#match?\` predicate must be a capture. Got "${steps[1].value}".` ); if (steps[2].type !== 'string') throw new Error( - `Second argument of \`match?\` predicate must be a string. Got @${steps[2].value}.` + `Second argument of \`#match?\` predicate must be a string. Got @${steps[2].value}.` ); const captureName = steps[1].name; const regex = new RegExp(steps[2].value); @@ -800,10 +800,10 @@ class Language { case 'set!': if (steps.length < 2 || steps.length > 3) throw new Error( - `Wrong number of arguments to \`set!\` predicate. Expected 1 or 2. Got ${steps.length - 1}.` + `Wrong number of arguments to \`#set!\` predicate. Expected 1 or 2. Got ${steps.length - 1}.` ); if (steps.some(s => s.type !== 'string')) throw new Error( - `Arguments to \`set!\` predicate must be a strings.".` + `Arguments to \`#set!\` predicate must be a strings.".` ); if (!setProperties[i]) setProperties[i] = {}; setProperties[i][steps[1].value] = steps[2] ? steps[2].value : null; @@ -812,10 +812,10 @@ class Language { case 'is?': case 'is-not?': if (steps.length < 2 || steps.length > 3) throw new Error( - `Wrong number of arguments to \`${operator}\` predicate. Expected 1 or 2. Got ${steps.length - 1}.` + `Wrong number of arguments to \`#${operator}\` predicate. Expected 1 or 2. Got ${steps.length - 1}.` ); if (steps.some(s => s.type !== 'string')) throw new Error( - `Arguments to \`${operator}\` predicate must be a strings.".` + `Arguments to \`#${operator}\` predicate must be a strings.".` ); const properties = operator === 'is?' ? assertedProperties : refutedProperties; if (!properties[i]) properties[i] = {}; @@ -823,7 +823,7 @@ class Language { break; default: - throw new Error(`Unknown query predicate \`${steps[0].value}\``); + throw new Error(`Unknown query predicate \`#${steps[0].value}\``); } steps.length = 0; diff --git a/lib/binding_web/test/query-test.js b/lib/binding_web/test/query-test.js index b5a37ed9..8683214a 100644 --- a/lib/binding_web/test/query-test.js +++ b/lib/binding_web/test/query-test.js @@ -1,12 +1,10 @@ -const {assert} = require('chai'); +const { assert } = require("chai"); let Parser, JavaScript; describe("Query", () => { let parser, tree, query; - before(async () => - ({Parser, JavaScript} = await require('./helper')) - ); + before(async () => ({ Parser, JavaScript } = await require("./helper"))); beforeEach(() => { parser = new Parser().setLanguage(JavaScript); @@ -18,81 +16,75 @@ describe("Query", () => { if (query) query.delete(); }); - describe('construction', () => { - it('throws an error on invalid patterns', () => { + describe("construction", () => { + it("throws an error on invalid patterns", () => { assert.throws(() => { - JavaScript.query("(function_declaration wat)") - }, "Bad syntax at offset 22: \'wat)\'..."); + JavaScript.query("(function_declaration wat)"); + }, "Bad syntax at offset 22: 'wat)'..."); assert.throws(() => { - JavaScript.query("(non_existent)") + JavaScript.query("(non_existent)"); }, "Bad node name 'non_existent'"); assert.throws(() => { - JavaScript.query("(a)") + JavaScript.query("(a)"); }, "Bad node name 'a'"); assert.throws(() => { - JavaScript.query("(function_declaration non_existent:(identifier))") + JavaScript.query("(function_declaration non_existent:(identifier))"); }, "Bad field name 'non_existent'"); }); - it('throws an error on invalid predicates', () => { + it("throws an error on invalid predicates", () => { assert.throws(() => { - JavaScript.query("((identifier) @abc (eq? @ab hi))") + JavaScript.query("((identifier) @abc (#eq? @ab hi))"); }, "Bad capture name @ab"); assert.throws(() => { - JavaScript.query("((identifier) @abc (eq? @ab hi))") + JavaScript.query("((identifier) @abc (#eq? @ab hi))"); }, "Bad capture name @ab"); assert.throws(() => { - JavaScript.query("((identifier) @abc (eq?))") - }, "Wrong number of arguments to `eq?` predicate. Expected 2, got 0"); + JavaScript.query("((identifier) @abc (#eq?))"); + }, "Wrong number of arguments to `#eq?` predicate. Expected 2, got 0"); assert.throws(() => { - JavaScript.query("((identifier) @a (eq? @a @a @a))") - }, "Wrong number of arguments to `eq?` predicate. Expected 2, got 3"); + JavaScript.query("((identifier) @a (eq? @a @a @a))"); + }, "Wrong number of arguments to `#eq?` predicate. Expected 2, got 3"); assert.throws(() => { - JavaScript.query("((identifier) @a (something-else? @a))") - }, "Unknown query predicate `something-else?`"); + JavaScript.query("((identifier) @a (#something-else? @a))"); + }, "Unknown query predicate `#something-else?`"); }); }); - describe('.matches', () => { - it('returns all of the matches for the given query', () => { + describe(".matches", () => { + it("returns all of the matches for the given query", () => { tree = parser.parse("function one() { two(); function three() {} }"); query = JavaScript.query(` - (function_declaration name:(identifier) @fn-def) - (call_expression function:(identifier) @fn-ref) + (function_declaration name: (identifier) @fn-def) + (call_expression function: (identifier) @fn-ref) `); const matches = query.matches(tree.rootNode); - assert.deepEqual( - formatMatches(matches), - [ - {pattern: 0, captures: [{name: 'fn-def', text: 'one'}]}, - {pattern: 1, captures: [{name: 'fn-ref', text: 'two'}]}, - {pattern: 0, captures: [{name: 'fn-def', text: 'three'}]}, - ] - ); + assert.deepEqual(formatMatches(matches), [ + { pattern: 0, captures: [{ name: "fn-def", text: "one" }] }, + { pattern: 1, captures: [{ name: "fn-ref", text: "two" }] }, + { pattern: 0, captures: [{ name: "fn-def", text: "three" }] }, + ]); }); - it('can search in a specified ranges', () => { + it("can search in a specified ranges", () => { tree = parser.parse("[a, b,\nc, d,\ne, f,\ng, h]"); - query = JavaScript.query('(identifier) @element'); + query = JavaScript.query("(identifier) @element"); const matches = query.matches( tree.rootNode, - {row: 1, column: 1}, - {row: 3, column: 1} - ); - assert.deepEqual( - formatMatches(matches), - [ - {pattern: 0, captures: [{name: 'element', text: 'd'}]}, - {pattern: 0, captures: [{name: 'element', text: 'e'}]}, - {pattern: 0, captures: [{name: 'element', text: 'f'}]}, - {pattern: 0, captures: [{name: 'element', text: 'g'}]}, - ] + { row: 1, column: 1 }, + { row: 3, column: 1 } ); + assert.deepEqual(formatMatches(matches), [ + { pattern: 0, captures: [{ name: "element", text: "d" }] }, + { pattern: 0, captures: [{ name: "element", text: "e" }] }, + { pattern: 0, captures: [{ name: "element", text: "f" }] }, + { pattern: 0, captures: [{ name: "element", text: "g" }] }, + ]); }); }); - describe('.captures', () => { - it('returns all of the captures for the given query, in order', () => { + describe(".captures", () => { + it("returns all of the captures for the given query, in order", () => { tree = parser.parse(` a({ bc: function de() { @@ -105,12 +97,12 @@ describe("Query", () => { `); query = JavaScript.query(` (pair - key: * @method.def + key: _ @method.def (function name: (identifier) @method.alias)) (variable_declarator - name: * @function.def + name: _ @function.def value: (function name: (identifier) @function.alias)) @@ -119,26 +111,23 @@ describe("Query", () => { `); const captures = query.captures(tree.rootNode); - assert.deepEqual( - formatCaptures(captures), - [ - {name: "method.def", text: "bc"}, - {name: "delimiter", text: ":"}, - {name: "method.alias", text: "de"}, - {name: "function.def", text: "fg"}, - {name: "operator", text: "="}, - {name: "function.alias", text: "hi"}, - {name: "method.def", text: "jk"}, - {name: "delimiter", text: ":"}, - {name: "method.alias", text: "lm"}, - {name: "function.def", text: "no"}, - {name: "operator", text: "="}, - {name: "function.alias", text: "pq"}, - ] - ); + assert.deepEqual(formatCaptures(captures), [ + { name: "method.def", text: "bc" }, + { name: "delimiter", text: ":" }, + { name: "method.alias", text: "de" }, + { name: "function.def", text: "fg" }, + { name: "operator", text: "=" }, + { name: "function.alias", text: "hi" }, + { name: "method.def", text: "jk" }, + { name: "delimiter", text: ":" }, + { name: "method.alias", text: "lm" }, + { name: "function.def", text: "no" }, + { name: "operator", text: "=" }, + { name: "function.alias", text: "pq" }, + ]); }); - it('handles conditions that compare the text of capture to literal strings', () => { + it("handles conditions that compare the text of capture to literal strings", () => { tree = parser.parse(` const ab = require('./ab'); new Cd(EF); @@ -148,32 +137,29 @@ describe("Query", () => { (identifier) @variable ((identifier) @function.builtin - (eq? @function.builtin "require")) + (#eq? @function.builtin "require")) ((identifier) @constructor - (match? @constructor "^[A-Z]")) + (#match? @constructor "^[A-Z]")) ((identifier) @constant - (match? @constant "^[A-Z]{2,}$")) + (#match? @constant "^[A-Z]{2,}$")) `); const captures = query.captures(tree.rootNode); - assert.deepEqual( - formatCaptures(captures), - [ - {name: "variable", text: "ab"}, - {name: "variable", text: "require"}, - {name: "function.builtin", text: "require"}, - {name: "variable", text: "Cd"}, - {name: "constructor", text: "Cd"}, - {name: "variable", text: "EF"}, - {name: "constructor", text: "EF"}, - {name: "constant", text: "EF"}, - ] - ); + assert.deepEqual(formatCaptures(captures), [ + { name: "variable", text: "ab" }, + { name: "variable", text: "require" }, + { name: "function.builtin", text: "require" }, + { name: "variable", text: "Cd" }, + { name: "constructor", text: "Cd" }, + { name: "variable", text: "EF" }, + { name: "constructor", text: "EF" }, + { name: "constant", text: "EF" }, + ]); }); - it('handles conditions that compare the text of capture to each other', () => { + it("handles conditions that compare the text of capture to each other", () => { tree = parser.parse(` ab = abc + 1; def = de + 1; @@ -181,56 +167,60 @@ describe("Query", () => { `); query = JavaScript.query(` - ((assignment_expression + ( + (assignment_expression left: (identifier) @id1 right: (binary_expression left: (identifier) @id2)) - (eq? @id1 @id2)) - `); - - const captures = query.captures(tree.rootNode); - assert.deepEqual( - formatCaptures(captures), - [ - {name: "id1", text: "ghi"}, - {name: "id2", text: "ghi"}, - ] - ); - }); - - it('handles patterns with properties', () => { - tree = parser.parse(`a(b.c);`); - query = JavaScript.query(` - ((call_expression (identifier) @func) - (set! foo) - (set! bar baz)) - - ((property_identifier) @prop - (is? foo) - (is-not? bar baz)) + (#eq? @id1 @id2) + ) `); const captures = query.captures(tree.rootNode); assert.deepEqual(formatCaptures(captures), [ - {name: 'func', text: 'a', setProperties: {foo: null, bar: 'baz'}}, - {name: 'prop', text: 'c', assertedProperties: {foo: null}, refutedProperties: {bar: 'baz'}}, + { name: "id1", text: "ghi" }, + { name: "id2", text: "ghi" }, + ]); + }); + + it("handles patterns with properties", () => { + tree = parser.parse(`a(b.c);`); + query = JavaScript.query(` + ((call_expression (identifier) @func) + (#set! foo) + (#set! bar baz)) + + ((property_identifier) @prop + (#is? foo) + (#is-not? bar baz)) + `); + + const captures = query.captures(tree.rootNode); + assert.deepEqual(formatCaptures(captures), [ + { name: "func", text: "a", setProperties: { foo: null, bar: "baz" } }, + { + name: "prop", + text: "c", + assertedProperties: { foo: null }, + refutedProperties: { bar: "baz" }, + }, ]); }); }); }); function formatMatches(matches) { - return matches.map(({pattern, captures}) => ({ + return matches.map(({ pattern, captures }) => ({ pattern, - captures: formatCaptures(captures) - })) + captures: formatCaptures(captures), + })); } function formatCaptures(captures) { - return captures.map(c => { + return captures.map((c) => { const node = c.node; delete c.node; c.text = node.text; return c; - }) + }); } diff --git a/lib/src/query.c b/lib/src/query.c index 801b98e2..5c06ed0f 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -27,7 +27,7 @@ typedef struct { * represented as a sequence of these steps. Fields: * * - `symbol` - The grammar symbol to match. A zero value represents the - * wildcard symbol, '*'. + * wildcard symbol, '_'. * - `field` - The field name to match. A zero value means that a field name * was not specified. * - `capture_id` - An integer representing the name of the capture associated @@ -567,12 +567,22 @@ static TSQueryError ts_query__parse_predicate( TSQuery *self, Stream *stream ) { - if (stream->next == ')') return PARENT_DONE; - if (stream->next != '(') return TSQueryErrorSyntax; - stream_advance(stream); + if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; + const char *predicate_name = stream->input; + stream_scan_identifier(stream); + uint32_t length = stream->input - predicate_name; + uint16_t id = symbol_table_insert_name( + &self->predicate_values, + predicate_name, + length + ); + array_back(&self->predicates_by_pattern)->length++; + array_push(&self->predicate_steps, ((TSQueryPredicateStep) { + .type = TSQueryPredicateStepTypeString, + .value_id = id, + })); stream_skip_whitespace(stream); - unsigned step_count = 0; for (;;) { if (stream->next == ')') { stream_advance(stream); @@ -677,7 +687,6 @@ static TSQueryError ts_query__parse_predicate( return TSQueryErrorSyntax; } - step_count++; stream_skip_whitespace(stream); } @@ -703,35 +712,16 @@ static TSQueryError ts_query__parse_pattern( return PARENT_DONE; } - // Parse a parenthesized node expression + // An open parenthesis can be the start of three possible constructs: + // * A grouped sequence + // * A predicate + // * A named node else if (stream->next == '(') { stream_advance(stream); stream_skip_whitespace(stream); - // At the top-level, a nested list represents one root pattern followed by - // zero-or-more predicates. - if (stream->next == '(' && depth == 0) { - TSQueryError e = ts_query__parse_pattern(self, stream, 0, capture_count, is_immediate); - if (e) return e; - - // Parse the predicates. - stream_skip_whitespace(stream); - for (;;) { - TSQueryError e = ts_query__parse_predicate(self, stream); - if (e == PARENT_DONE) { - stream_advance(stream); - stream_skip_whitespace(stream); - return 0; - } else if (e) { - return e; - } - } - } - - // When nested inside of a larger pattern, a nested list just represents - // multiple sibling nodes which are grouped, possibly so that a postfix - // operator can be applied to the group. - else if (depth > 0 && (stream->next == '(' || stream->next == '"' )) { + // If this parenthesis is followed by a node, then it represents a grouped sequence. + if (stream->next == '(' || stream->next == '"') { bool child_is_immediate = false; for (;;) { if (stream->next == '.') { @@ -755,11 +745,26 @@ static TSQueryError ts_query__parse_pattern( child_is_immediate = false; } - } else { + } + + // A pound character indicates the start of a predicate. + else if (stream->next == '#') { + stream_advance(stream); + return ts_query__parse_predicate(self, stream); + } + + // Otherwise, this parenthesis is the start of a named node. + else { TSSymbol symbol; // Parse the wildcard symbol - if (stream->next == '*') { + if ( + stream->next == '_' || + + // TODO - remove. + // For temporary backward compatibility, handle '*' as a wildcard. + stream->next == '*' + ) { symbol = depth > 0 ? NAMED_WILDCARD_SYMBOL : WILDCARD_SYMBOL; stream_advance(stream); } @@ -769,6 +774,14 @@ static TSQueryError ts_query__parse_pattern( const char *node_name = stream->input; stream_scan_identifier(stream); uint32_t length = stream->input - node_name; + + // TODO - remove. + // For temporary backward compatibility, handle predicates without the leading '#' sign. + if (length > 0 && (node_name[length - 1] == '!' || node_name[length - 1] == '?')) { + stream_reset(stream, node_name); + return ts_query__parse_predicate(self, stream); + } + symbol = ts_language_symbol_for_name( self->language, node_name, @@ -819,6 +832,21 @@ static TSQueryError ts_query__parse_pattern( } } + // Parse a wildcard pattern + else if ( + stream->next == '_' || + + // TODO remove. + // For temporary backward compatibility, handle '*' as a wildcard. + stream->next == '*' + ) { + stream_advance(stream); + stream_skip_whitespace(stream); + + // Add a step that matches any kind of node + array_push(&self->steps, query_step__new(WILDCARD_SYMBOL, depth, is_immediate)); + } + // Parse a double-quoted anonymous leaf node expression else if (stream->next == '"') { stream_advance(stream); @@ -890,15 +918,6 @@ static TSQueryError ts_query__parse_pattern( self->steps.contents[step_index].field = field_id; } - // Parse a wildcard pattern - else if (stream->next == '*') { - stream_advance(stream); - stream_skip_whitespace(stream); - - // Add a step that matches any kind of node - array_push(&self->steps, query_step__new(WILDCARD_SYMBOL, depth, is_immediate)); - } - else { return TSQueryErrorSyntax; } @@ -911,18 +930,29 @@ static TSQueryError ts_query__parse_pattern( if (stream->next == '+') { stream_advance(stream); + stream_skip_whitespace(stream); QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false); repeat_step.alternative_index = starting_step_index; repeat_step.is_placeholder = true; repeat_step.alternative_is_immediate = true; array_push(&self->steps, repeat_step); - stream_skip_whitespace(stream); } else if (stream->next == '?') { stream_advance(stream); - step->alternative_index = self->steps.size; stream_skip_whitespace(stream); + step->alternative_index = self->steps.size; + } + + else if (stream->next == '*') { + stream_advance(stream); + stream_skip_whitespace(stream); + QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false); + repeat_step.alternative_index = starting_step_index; + repeat_step.is_placeholder = true; + repeat_step.alternative_is_immediate = true; + array_push(&self->steps, repeat_step); + step->alternative_index = self->steps.size; } // Parse an '@'-prefixed capture pattern