Merge pull request #2532 from amaanq/quantified-predicate-fix
feat!: properly handle predicates used on quantified captures
This commit is contained in:
commit
293f0d1ca3
6 changed files with 341 additions and 87 deletions
|
|
@ -4574,6 +4574,89 @@ fn test_capture_quantifiers() {
|
|||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_quantified_captures() {
|
||||
struct Row {
|
||||
description: &'static str,
|
||||
language: Language,
|
||||
code: &'static str,
|
||||
pattern: &'static str,
|
||||
captures: &'static [(&'static str, &'static str)],
|
||||
}
|
||||
|
||||
// #[rustfmt::skip]
|
||||
let rows = &[
|
||||
Row {
|
||||
description: "doc comments where all must match the prefix",
|
||||
language: get_language("c"),
|
||||
code: indoc! {"
|
||||
/// foo
|
||||
/// bar
|
||||
/// baz
|
||||
|
||||
void main() {}
|
||||
|
||||
/// qux
|
||||
/// quux
|
||||
// quuz
|
||||
"},
|
||||
pattern: r#"
|
||||
((comment)+ @comment.documentation
|
||||
(#match? @comment.documentation "^///"))
|
||||
"#,
|
||||
captures: &[
|
||||
("comment.documentation", "/// foo"),
|
||||
("comment.documentation", "/// bar"),
|
||||
("comment.documentation", "/// baz"),
|
||||
],
|
||||
},
|
||||
Row {
|
||||
description: "doc comments where one must match the prefix",
|
||||
language: get_language("c"),
|
||||
code: indoc! {"
|
||||
/// foo
|
||||
/// bar
|
||||
/// baz
|
||||
|
||||
void main() {}
|
||||
|
||||
/// qux
|
||||
/// quux
|
||||
// quuz
|
||||
"},
|
||||
pattern: r#"
|
||||
((comment)+ @comment.documentation
|
||||
(#any-match? @comment.documentation "^///"))
|
||||
"#,
|
||||
captures: &[
|
||||
("comment.documentation", "/// foo"),
|
||||
("comment.documentation", "/// bar"),
|
||||
("comment.documentation", "/// baz"),
|
||||
("comment.documentation", "/// qux"),
|
||||
("comment.documentation", "/// quux"),
|
||||
("comment.documentation", "// quuz"),
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
allocations::record(|| {
|
||||
for row in rows {
|
||||
eprintln!(" quantified query example: {:?}", row.description);
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(row.language).unwrap();
|
||||
let tree = parser.parse(row.code, None).unwrap();
|
||||
|
||||
let query = Query::new(row.language, row.pattern).unwrap();
|
||||
|
||||
let mut cursor = QueryCursor::new();
|
||||
let matches = cursor.captures(&query, tree.root_node(), row.code.as_bytes());
|
||||
|
||||
assert_eq!(collect_captures(matches, &query, row.code), row.captures);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_max_start_depth() {
|
||||
struct Row {
|
||||
|
|
|
|||
|
|
@ -21,21 +21,21 @@ Alternatively, you can incorporate the library in a larger project's build syste
|
|||
|
||||
**source file:**
|
||||
|
||||
* `tree-sitter/lib/src/lib.c`
|
||||
- `tree-sitter/lib/src/lib.c`
|
||||
|
||||
**include directories:**
|
||||
|
||||
* `tree-sitter/lib/src`
|
||||
* `tree-sitter/lib/include`
|
||||
- `tree-sitter/lib/src`
|
||||
- `tree-sitter/lib/include`
|
||||
|
||||
### The Basic Objects
|
||||
|
||||
There are four main types of objects involved when using Tree-sitter: languages, parsers, syntax trees, and syntax nodes. In C, these are called `TSLanguage`, `TSParser`, `TSTree`, and `TSNode`.
|
||||
|
||||
* A `TSLanguage` is an opaque object that defines how to parse a particular programming language. The code for each `TSLanguage` is generated by Tree-sitter. Many languages are already available in separate git repositories within the [Tree-sitter GitHub organization](https://github.com/tree-sitter). See [the next page](./creating-parsers) for how to create new languages.
|
||||
* A `TSParser` is a stateful object that can be assigned a `TSLanguage` and used to produce a `TSTree` based on some source code.
|
||||
* A `TSTree` represents the syntax tree of an entire source code file. It contains `TSNode` instances that indicate the structure of the source code. It can also be edited and used to produce a new `TSTree` in the event that the source code changes.
|
||||
* A `TSNode` represents a single node in the syntax tree. It tracks its start and end positions in the source code, as well as its relation to other nodes like its parent, siblings and children.
|
||||
- A `TSLanguage` is an opaque object that defines how to parse a particular programming language. The code for each `TSLanguage` is generated by Tree-sitter. Many languages are already available in separate git repositories within the [Tree-sitter GitHub organization](https://github.com/tree-sitter). See [the next page](./creating-parsers) for how to create new languages.
|
||||
- A `TSParser` is a stateful object that can be assigned a `TSLanguage` and used to produce a `TSTree` based on some source code.
|
||||
- A `TSTree` represents the syntax tree of an entire source code file. It contains `TSNode` instances that indicate the structure of the source code. It can also be edited and used to produce a new `TSTree` in the event that the source code changes.
|
||||
- A `TSNode` represents a single node in the syntax tree. It tracks its start and end positions in the source code, as well as its relation to other nodes like its parent, siblings and children.
|
||||
|
||||
### An Example Program
|
||||
|
||||
|
|
@ -629,18 +629,36 @@ The restrictions placed on a pattern by an anchor operator ignore anonymous node
|
|||
|
||||
#### Predicates
|
||||
|
||||
You can also specify arbitrary metadata and conditions associated with a pattern by adding _predicate_ S-expressions anywhere within your pattern. Predicate S-expressions start with a _predicate name_ beginning with a `#` character. After that, they can contain an arbitrary number of `@`-prefixed capture names or strings.
|
||||
You can also specify arbitrary metadata and conditions associated with a pattern
|
||||
by adding _predicate_ S-expressions anywhere within your pattern. Predicate S-expressions
|
||||
start with a _predicate name_ beginning with a `#` character. After that, they can
|
||||
contain an arbitrary number of `@`-prefixed capture names or strings.
|
||||
|
||||
For example, this pattern would match identifier whose names is written in `SCREAMING_SNAKE_CASE`:
|
||||
Tree-Sitter's CLI supports the following predicates by default:
|
||||
|
||||
##### eq?, not-eq?, any-eq?, any-not-eq?
|
||||
|
||||
This family of predicates allows you to match against a single capture or string
|
||||
value.
|
||||
|
||||
The first argument must be a capture, but the second can be either a capture to
|
||||
compare the two captures' text, or a string to compare first capture's text
|
||||
against.
|
||||
|
||||
The base predicate is "#eq?", but its complement "#not-eq?" can be used to _not_
|
||||
match a value.
|
||||
|
||||
Consider the following example targeting C:
|
||||
|
||||
```scheme
|
||||
(
|
||||
(identifier) @constant
|
||||
(#match? @constant "^[A-Z][A-Z_]+")
|
||||
)
|
||||
((identifier) @variable.builtin
|
||||
(#eq? @variable.builtin "self"))
|
||||
```
|
||||
|
||||
And this pattern would match key-value pairs where the `value` is an identifier with the same name as the key:
|
||||
This pattern would match any identifier that is `self` or `this`.
|
||||
|
||||
And this pattern would match key-value pairs where the `value` is an identifier
|
||||
with the same name as the key:
|
||||
|
||||
```scheme
|
||||
(
|
||||
|
|
@ -651,7 +669,87 @@ And this pattern would match key-value pairs where the `value` is an identifier
|
|||
)
|
||||
```
|
||||
|
||||
_Note_ - Predicates are not handled directly by the Tree-sitter C library. They are just exposed in a structured form so that higher-level code can perform the filtering. However, higher-level bindings to Tree-sitter like [the Rust crate](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust) or the [WebAssembly binding](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web) implement a few common predicates like `#eq?` and `#match?`.
|
||||
The prefix "any-" is meant for use with quantified captures. Here's
|
||||
an example finding a segment of empty comments
|
||||
|
||||
```scheme
|
||||
((comment)+ @comment.empty
|
||||
(#any-eq? @comment.empty "//"))
|
||||
```
|
||||
|
||||
Note that "#any-eq?" will match a quantified capture if
|
||||
_any_ of the nodes match the predicate, while by default a quantified capture
|
||||
will only match if _all_ the nodes match the predicate.
|
||||
|
||||
##### match?, not-match?, any-match?, any-not-match?
|
||||
|
||||
These predicates are similar to the eq? predicates, but they use regular expressions
|
||||
to match against the capture's text.
|
||||
|
||||
The first argument must be a capture, and the second must be a string containing
|
||||
a regular expression.
|
||||
|
||||
For example, this pattern would match identifier whose name is written in `SCREAMING_SNAKE_CASE`:
|
||||
|
||||
```scheme
|
||||
((identifier) @constant
|
||||
(#match? @constant "^[A-Z][A-Z_]+"))
|
||||
```
|
||||
|
||||
Here's an example finding potential documentation comments in C
|
||||
|
||||
```scheme
|
||||
((comment)+ @comment.documentation
|
||||
(#match? @comment.documentation "^///\s+.*"))
|
||||
```
|
||||
|
||||
Here's another example finding Cgo comments to potentially inject with C
|
||||
|
||||
```scheme
|
||||
((comment)+ @injection.content
|
||||
.
|
||||
(import_declaration
|
||||
(import_spec path: (interpreted_string_literal) @_import_c))
|
||||
(#eq? @_import_c "\"C\"")
|
||||
(#match? @injection.content "^//"))
|
||||
```
|
||||
|
||||
##### any-of?, not-any-of?
|
||||
|
||||
The "any-of?" predicate allows you to match a capture against multiple strings,
|
||||
and will match if the capture's text is equal to any of the strings.
|
||||
|
||||
Consider this example that targets JavaScript:
|
||||
|
||||
```scheme
|
||||
((identifier) @variable.builtin
|
||||
(#any-of? @variable.builtin
|
||||
"arguments"
|
||||
"module"
|
||||
"console"
|
||||
"window"
|
||||
"document"))
|
||||
```
|
||||
|
||||
This will match any of the builtin variables in JavaScript.
|
||||
|
||||
_Note_ — Predicates are not handled directly by the Tree-sitter C library.
|
||||
They are just exposed in a structured form so that higher-level code can perform
|
||||
the filtering. However, higher-level bindings to Tree-sitter like
|
||||
[the Rust Crate](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust)
|
||||
or the [WebAssembly binding](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web)
|
||||
do implement a few common predicates like the `#eq?`, `#match?`, and `#any-of?`
|
||||
predicates explained above.
|
||||
|
||||
To recap about the predicates Tree-Sitter's bindings support:
|
||||
|
||||
- `#eq?` checks for a direct match against a capture or string
|
||||
- `#match?` checks for a match against a regular expression
|
||||
- `#any-of?` checks for a match against a list of strings
|
||||
- Adding `not-` to the beginning of any of these predicates will negate the match
|
||||
- By default, a quantified capture will only match if _all_ of the nodes match the predicate
|
||||
- Adding `any-` before the `eq` or `match` predicates will instead match if any of the nodes match the predicate
|
||||
|
||||
|
||||
### The Query API
|
||||
|
||||
|
|
@ -723,8 +821,8 @@ The node types file contains an array of objects, each of which describes a part
|
|||
|
||||
Every object in this array has these two entries:
|
||||
|
||||
* `"type"` - A string that indicates which grammar rule the node represents. This corresponds to the `ts_node_type` function described [above](#syntax-nodes).
|
||||
* `"named"` - A boolean that indicates whether this kind of node corresponds to a rule name in the grammar or just a string literal. See [above](#named-vs-anonymous-nodes) for more info.
|
||||
- `"type"` - A string that indicates which grammar rule the node represents. This corresponds to the `ts_node_type` function described [above](#syntax-nodes).
|
||||
- `"named"` - A boolean that indicates whether this kind of node corresponds to a rule name in the grammar or just a string literal. See [above](#named-vs-anonymous-nodes) for more info.
|
||||
|
||||
Examples:
|
||||
|
||||
|
|
@ -745,14 +843,14 @@ Together, these two fields constitute a unique identifier for a node type; no tw
|
|||
|
||||
Many syntax nodes can have _children_. The node type object describes the possible children that a node can have using the following entries:
|
||||
|
||||
* `"fields"` - An object that describes the possible [fields](#node-field-names) that the node can have. The keys of this object are field names, and the values are _child type_ objects, described below.
|
||||
* `"children"` - Another _child type_ object that describes all of the node's possible _named_ children _without_ fields.
|
||||
- `"fields"` - An object that describes the possible [fields](#node-field-names) that the node can have. The keys of this object are field names, and the values are _child type_ objects, described below.
|
||||
- `"children"` - Another _child type_ object that describes all of the node's possible _named_ children _without_ fields.
|
||||
|
||||
A _child type_ object describes a set of child nodes using the following entries:
|
||||
|
||||
* `"required"` - A boolean indicating whether there is always _at least one_ node in this set.
|
||||
* `"multiple"` - A boolean indicating whether there can be _multiple_ nodes in this set.
|
||||
* `"types"`- An array of objects that represent the possible types of nodes in this set. Each object has two keys: `"type"` and `"named"`, whose meanings are described above.
|
||||
- `"required"` - A boolean indicating whether there is always _at least one_ node in this set.
|
||||
- `"multiple"` - A boolean indicating whether there can be _multiple_ nodes in this set.
|
||||
- `"types"`- An array of objects that represent the possible types of nodes in this set. Each object has two keys: `"type"` and `"named"`, whose meanings are described above.
|
||||
|
||||
Example with fields:
|
||||
|
||||
|
|
@ -812,7 +910,7 @@ In Tree-sitter grammars, there are usually certain rules that represent abstract
|
|||
|
||||
Normally, hidden rules are not mentioned in the node types file, since they don't appear in the syntax tree. But if you add a hidden rule to the grammar's [`supertypes` list](./creating-parsers#the-grammar-dsl), then it _will_ show up in the node types file, with the following special entry:
|
||||
|
||||
* `"subtypes"` - An array of objects that specify the _types_ of nodes that this 'supertype' node can wrap.
|
||||
- `"subtypes"` - An array of objects that specify the _types_ of nodes that this 'supertype' node can wrap.
|
||||
|
||||
Example:
|
||||
|
||||
|
|
|
|||
|
|
@ -118,7 +118,7 @@ pub struct Query {
|
|||
ptr: NonNull<ffi::TSQuery>,
|
||||
capture_names: Vec<String>,
|
||||
capture_quantifiers: Vec<Vec<CaptureQuantifier>>,
|
||||
text_predicates: Vec<Box<[TextPredicate]>>,
|
||||
text_predicates: Vec<Box<[TextPredicateCapture]>>,
|
||||
property_settings: Vec<Box<[QueryProperty]>>,
|
||||
property_predicates: Vec<Box<[(QueryProperty, bool)]>>,
|
||||
general_predicates: Vec<Box<[QueryPredicate]>>,
|
||||
|
|
@ -250,11 +250,16 @@ pub enum QueryErrorKind {
|
|||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum TextPredicate {
|
||||
CaptureEqString(u32, String, bool),
|
||||
CaptureEqCapture(u32, u32, bool),
|
||||
CaptureMatchString(u32, regex::bytes::Regex, bool),
|
||||
CaptureAnyString(u32, Vec<String>, bool),
|
||||
/// The first item is the capture index
|
||||
/// The next is capture specific, depending on what item is expected
|
||||
/// The first bool is if the capture is positive
|
||||
/// The last item is a bool signifying whether or not it's meant to match
|
||||
/// any or all captures
|
||||
enum TextPredicateCapture {
|
||||
EqString(u32, String, bool, bool),
|
||||
EqCapture(u32, u32, bool, bool),
|
||||
MatchString(u32, regex::bytes::Regex, bool, bool),
|
||||
AnyString(u32, Vec<String>, bool),
|
||||
}
|
||||
|
||||
// TODO: Remove this struct at at some point. If `core::str::lossy::Utf8Lossy`
|
||||
|
|
@ -1733,7 +1738,7 @@ impl Query {
|
|||
// Build a predicate for each of the known predicate function names.
|
||||
let operator_name = &string_values[p[0].value_id as usize];
|
||||
match operator_name.as_str() {
|
||||
"eq?" | "not-eq?" => {
|
||||
"eq?" | "not-eq?" | "any-eq?" | "any-not-eq?" => {
|
||||
if p.len() != 3 {
|
||||
return Err(predicate_error(
|
||||
row,
|
||||
|
|
@ -1750,23 +1755,30 @@ impl Query {
|
|||
)));
|
||||
}
|
||||
|
||||
let is_positive = operator_name == "eq?";
|
||||
let is_positive = operator_name == "eq?" || operator_name == "any-eq?";
|
||||
let match_all = match operator_name.as_str() {
|
||||
"eq?" | "not-eq?" => true,
|
||||
"any-eq?" | "any-not-eq?" => false,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
text_predicates.push(if p[2].type_ == type_capture {
|
||||
TextPredicate::CaptureEqCapture(
|
||||
TextPredicateCapture::EqCapture(
|
||||
p[1].value_id,
|
||||
p[2].value_id,
|
||||
is_positive,
|
||||
match_all,
|
||||
)
|
||||
} else {
|
||||
TextPredicate::CaptureEqString(
|
||||
TextPredicateCapture::EqString(
|
||||
p[1].value_id,
|
||||
string_values[p[2].value_id as usize].clone(),
|
||||
is_positive,
|
||||
match_all,
|
||||
)
|
||||
});
|
||||
}
|
||||
|
||||
"match?" | "not-match?" => {
|
||||
"match?" | "not-match?" | "any-match?" | "any-not-match?" => {
|
||||
if p.len() != 3 {
|
||||
return Err(predicate_error(row, format!(
|
||||
"Wrong number of arguments to #match? predicate. Expected 2, got {}.",
|
||||
|
|
@ -1786,20 +1798,27 @@ impl Query {
|
|||
)));
|
||||
}
|
||||
|
||||
let is_positive = operator_name == "match?";
|
||||
let is_positive =
|
||||
operator_name == "match?" || operator_name == "any-match?";
|
||||
let match_all = match operator_name.as_str() {
|
||||
"match?" | "not-match?" => true,
|
||||
"any-match?" | "any-not-match?" => false,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let regex = &string_values[p[2].value_id as usize];
|
||||
text_predicates.push(TextPredicate::CaptureMatchString(
|
||||
text_predicates.push(TextPredicateCapture::MatchString(
|
||||
p[1].value_id,
|
||||
regex::bytes::Regex::new(regex).map_err(|_| {
|
||||
predicate_error(row, format!("Invalid regex '{}'", regex))
|
||||
})?,
|
||||
is_positive,
|
||||
match_all,
|
||||
));
|
||||
}
|
||||
|
||||
"set!" => property_settings.push(Self::parse_property(
|
||||
row,
|
||||
&operator_name,
|
||||
operator_name,
|
||||
&result.capture_names,
|
||||
&string_values,
|
||||
&p[1..],
|
||||
|
|
@ -1808,7 +1827,7 @@ impl Query {
|
|||
"is?" | "is-not?" => property_predicates.push((
|
||||
Self::parse_property(
|
||||
row,
|
||||
&operator_name,
|
||||
operator_name,
|
||||
&result.capture_names,
|
||||
&string_values,
|
||||
&p[1..],
|
||||
|
|
@ -1841,7 +1860,7 @@ impl Query {
|
|||
}
|
||||
values.push(string_values[arg.value_id as usize].clone());
|
||||
}
|
||||
text_predicates.push(TextPredicate::CaptureAnyString(
|
||||
text_predicates.push(TextPredicateCapture::AnyString(
|
||||
p[1].value_id,
|
||||
values,
|
||||
is_positive,
|
||||
|
|
@ -2203,7 +2222,7 @@ impl<'tree> QueryMatch<'_, 'tree> {
|
|||
) -> impl Iterator<Item = Node<'tree>> + '_ {
|
||||
self.captures
|
||||
.iter()
|
||||
.filter_map(move |capture| (capture.index == capture_ix).then(|| capture.node))
|
||||
.filter_map(move |capture| (capture.index == capture_ix).then_some(capture.node))
|
||||
}
|
||||
|
||||
fn new(m: ffi::TSQueryMatch, cursor: *mut ffi::TSQueryCursor) -> Self {
|
||||
|
|
@ -2266,52 +2285,61 @@ impl<'tree> QueryMatch<'_, 'tree> {
|
|||
query.text_predicates[self.pattern_index]
|
||||
.iter()
|
||||
.all(|predicate| match predicate {
|
||||
TextPredicate::CaptureEqCapture(i, j, is_positive) => {
|
||||
let node1 = self.nodes_for_capture_index(*i).next();
|
||||
let node2 = self.nodes_for_capture_index(*j).next();
|
||||
match (node1, node2) {
|
||||
(Some(node1), Some(node2)) => {
|
||||
let mut text1 = text_provider.text(node1);
|
||||
let mut text2 = text_provider.text(node2);
|
||||
let text1 = node_text1.get_text(&mut text1);
|
||||
let text2 = node_text2.get_text(&mut text2);
|
||||
(text1 == text2) == *is_positive
|
||||
TextPredicateCapture::EqCapture(i, j, is_positive, match_all_nodes) => {
|
||||
let mut nodes_1 = self.nodes_for_capture_index(*i);
|
||||
let mut nodes_2 = self.nodes_for_capture_index(*j);
|
||||
while let (Some(node1), Some(node2)) = (nodes_1.next(), nodes_2.next()) {
|
||||
let mut text1 = text_provider.text(node1);
|
||||
let mut text2 = text_provider.text(node2);
|
||||
let text1 = node_text1.get_text(&mut text1);
|
||||
let text2 = node_text2.get_text(&mut text2);
|
||||
if (text1 == text2) != *is_positive && *match_all_nodes {
|
||||
return false;
|
||||
}
|
||||
if (text1 == text2) == *is_positive && !*match_all_nodes {
|
||||
return true;
|
||||
}
|
||||
_ => true,
|
||||
}
|
||||
nodes_1.next().is_none() && nodes_2.next().is_none()
|
||||
}
|
||||
TextPredicate::CaptureEqString(i, s, is_positive) => {
|
||||
let node = self.nodes_for_capture_index(*i).next();
|
||||
match node {
|
||||
Some(node) => {
|
||||
let mut text = text_provider.text(node);
|
||||
let text = node_text1.get_text(&mut text);
|
||||
(text == s.as_bytes()) == *is_positive
|
||||
TextPredicateCapture::EqString(i, s, is_positive, match_all_nodes) => {
|
||||
let nodes = self.nodes_for_capture_index(*i);
|
||||
for node in nodes {
|
||||
let mut text = text_provider.text(node);
|
||||
let text = node_text1.get_text(&mut text);
|
||||
if (text == s.as_bytes()) != *is_positive && *match_all_nodes {
|
||||
return false;
|
||||
}
|
||||
if (text == s.as_bytes()) == *is_positive && !*match_all_nodes {
|
||||
return true;
|
||||
}
|
||||
None => true,
|
||||
}
|
||||
true
|
||||
}
|
||||
TextPredicate::CaptureMatchString(i, r, is_positive) => {
|
||||
let node = self.nodes_for_capture_index(*i).next();
|
||||
match node {
|
||||
Some(node) => {
|
||||
let mut text = text_provider.text(node);
|
||||
let text = node_text1.get_text(&mut text);
|
||||
r.is_match(text) == *is_positive
|
||||
TextPredicateCapture::MatchString(i, r, is_positive, match_all_nodes) => {
|
||||
let nodes = self.nodes_for_capture_index(*i);
|
||||
for node in nodes {
|
||||
let mut text = text_provider.text(node);
|
||||
let text = node_text1.get_text(&mut text);
|
||||
if (r.is_match(text)) != *is_positive && *match_all_nodes {
|
||||
return false;
|
||||
}
|
||||
if (r.is_match(text)) == *is_positive && !*match_all_nodes {
|
||||
return true;
|
||||
}
|
||||
None => true,
|
||||
}
|
||||
true
|
||||
}
|
||||
TextPredicate::CaptureAnyString(i, v, is_positive) => {
|
||||
let node = self.nodes_for_capture_index(*i).next();
|
||||
match node {
|
||||
Some(node) => {
|
||||
let mut text = text_provider.text(node);
|
||||
let text = node_text1.get_text(&mut text);
|
||||
v.iter().any(|s| text == s.as_bytes()) == *is_positive
|
||||
TextPredicateCapture::AnyString(i, v, is_positive) => {
|
||||
let nodes = self.nodes_for_capture_index(*i);
|
||||
for node in nodes {
|
||||
let mut text = text_provider.text(node);
|
||||
let text = node_text1.get_text(&mut text);
|
||||
if (v.iter().any(|s| text == s.as_bytes())) != *is_positive {
|
||||
return false;
|
||||
}
|
||||
None => true,
|
||||
}
|
||||
true
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -841,7 +841,13 @@ class Language {
|
|||
}
|
||||
const operator = steps[0].value;
|
||||
let isPositive = true;
|
||||
let matchAll = true;
|
||||
switch (operator) {
|
||||
case 'any-not-eq?':
|
||||
isPositive = false;
|
||||
matchAll = false;
|
||||
case 'any-eq?':
|
||||
matchAll = false;
|
||||
case 'not-eq?':
|
||||
isPositive = false;
|
||||
case 'eq?':
|
||||
|
|
@ -855,28 +861,36 @@ class Language {
|
|||
const captureName1 = steps[1].name;
|
||||
const captureName2 = steps[2].name;
|
||||
textPredicates[i].push(function(captures) {
|
||||
let node1, node2
|
||||
let nodes_1 = [];
|
||||
let nodes_2 = [];
|
||||
for (const c of captures) {
|
||||
if (c.name === captureName1) node1 = c.node;
|
||||
if (c.name === captureName2) node2 = c.node;
|
||||
if (c.name === captureName1) nodes_1.push(c.node);
|
||||
if (c.name === captureName2) nodes_2.push(c.node);
|
||||
}
|
||||
if(node1 === undefined || node2 === undefined) return true;
|
||||
return (node1.text === node2.text) === isPositive;
|
||||
return matchAll
|
||||
? nodes_1.every(n1 => nodes_2.some(n2 => n1.text === n2.text)) === isPositive
|
||||
: nodes_1.some(n1 => nodes_2.some(n2 => n1.text === n2.text)) === isPositive;
|
||||
});
|
||||
} else {
|
||||
const captureName = steps[1].name;
|
||||
const stringValue = steps[2].value;
|
||||
textPredicates[i].push(function(captures) {
|
||||
let nodes = [];
|
||||
for (const c of captures) {
|
||||
if (c.name === captureName) {
|
||||
return (c.node.text === stringValue) === isPositive;
|
||||
};
|
||||
if (c.name === captureName) nodes.push(c.node);
|
||||
}
|
||||
return true;
|
||||
return matchAll
|
||||
? nodes.every(n => n.text === stringValue) === isPositive
|
||||
: nodes.some(n => n.text === stringValue) === isPositive;
|
||||
});
|
||||
}
|
||||
break;
|
||||
|
||||
case 'not-any-match?':
|
||||
isPositive = false;
|
||||
matchAll = false;
|
||||
case 'any-match?':
|
||||
matchAll = false;
|
||||
case 'not-match?':
|
||||
isPositive = false;
|
||||
case 'match?':
|
||||
|
|
@ -892,10 +906,14 @@ class Language {
|
|||
const captureName = steps[1].name;
|
||||
const regex = new RegExp(steps[2].value);
|
||||
textPredicates[i].push(function(captures) {
|
||||
const nodes = [];
|
||||
for (const c of captures) {
|
||||
if (c.name === captureName) return regex.test(c.node.text) === isPositive;
|
||||
if (c.name === captureName) nodes.push(c.node.text);
|
||||
}
|
||||
return true;
|
||||
if (nodes.length === 0) return !isPositive;
|
||||
return matchAll
|
||||
? nodes.every(text => regex.test(text)) === isPositive
|
||||
: nodes.some(text => regex.test(text)) === isPositive;
|
||||
});
|
||||
break;
|
||||
|
||||
|
|
@ -923,6 +941,32 @@ class Language {
|
|||
properties[i][steps[1].value] = steps[2] ? steps[2].value : null;
|
||||
break;
|
||||
|
||||
case 'not-any-of?':
|
||||
isPositive = false;
|
||||
case 'any-of?':
|
||||
if (steps.length < 2) throw new Error(
|
||||
`Wrong number of arguments to \`#${operator}\` predicate. Expected at least 1. Got ${steps.length - 1}.`
|
||||
);
|
||||
if (steps[1].type !== 'capture') throw new Error(
|
||||
`First argument of \`#${operator}\` predicate must be a capture. Got "${steps[1].value}".`
|
||||
);
|
||||
for (let i = 2; i < steps.length; i++) {
|
||||
if (steps[i].type !== 'string') throw new Error(
|
||||
`Arguments to \`#${operator}\` predicate must be a strings.".`
|
||||
);
|
||||
}
|
||||
captureName = steps[1].name;
|
||||
const values = steps.slice(2).map(s => s.value);
|
||||
textPredicates[i].push(function(captures) {
|
||||
const nodes = [];
|
||||
for (const c of captures) {
|
||||
if (c.name === captureName) nodes.push(c.node.text);
|
||||
}
|
||||
if (nodes.length === 0) return !isPositive;
|
||||
return nodes.every(text => values.includes(text)) === isPositive;
|
||||
});
|
||||
break;
|
||||
|
||||
default:
|
||||
predicates[i].push({operator, operands: steps.slice(1)});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
"__Znwm",
|
||||
"___cxa_atexit",
|
||||
"_abort",
|
||||
"_isalpha",
|
||||
"_iswalnum",
|
||||
"_iswalpha",
|
||||
"_iswdigit",
|
||||
|
|
|
|||
|
|
@ -139,7 +139,7 @@ describe("Parser", () => {
|
|||
'redirect: (file_redirect descriptor: (file_descriptor) destination: (word)) ' +
|
||||
'redirect: (file_redirect destination: (word)) ' +
|
||||
'(heredoc_body ' +
|
||||
'(expansion (variable_name))) (heredoc_end))))'
|
||||
'(expansion (variable_name)) (heredoc_content)) (heredoc_end))))'
|
||||
);
|
||||
}).timeout(5000);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue