Merge pull request #612 from tree-sitter/optionals-in-queries

Tree-queries - add optional node syntax, improve handling of repeated nodes
This commit is contained in:
Max Brunsfeld 2020-05-11 12:28:42 -07:00 committed by GitHub
commit e2271ac46c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 758 additions and 418 deletions

1
.gitignore vendored
View file

@ -15,6 +15,7 @@ docs/assets/js/tree-sitter.js
/target
*.rs.bk
*.a
*.dylib
*.o
*.obj
*.exp

View file

@ -2,8 +2,8 @@ use super::helpers::allocations;
use super::helpers::fixtures::get_language;
use std::fmt::Write;
use tree_sitter::{
Node, Parser, Query, QueryCapture, QueryCursor, QueryError, QueryMatch, QueryPredicate,
QueryPredicateArg, QueryProperty,
Language, Node, Parser, Query, QueryCapture, QueryCursor, QueryError, QueryMatch,
QueryPredicate, QueryPredicateArg, QueryProperty,
};
#[test]
@ -163,19 +163,13 @@ fn test_query_matches_with_simple_pattern() {
)
.unwrap();
let source = "function one() { two(); function three() {} }";
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse(source, None).unwrap();
let mut cursor = QueryCursor::new();
let matches = cursor.matches(&query, tree.root_node(), to_callback(source));
assert_eq!(
collect_matches(matches, &query, source),
assert_query_matches(
language,
&query,
"function one() { two(); function three() {} }",
&[
(0, vec![("fn-name", "one")]),
(0, vec![("fn-name", "three")])
(0, vec![("fn-name", "three")]),
],
);
});
@ -195,7 +189,10 @@ fn test_query_matches_with_multiple_on_same_root() {
)
.unwrap();
let source = "
assert_query_matches(
language,
&query,
"
class Person {
// the constructor
constructor(name) { this.name = name; }
@ -203,30 +200,21 @@ fn test_query_matches_with_multiple_on_same_root() {
// the getter
getFullName() { return this.name; }
}
";
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse(source, None).unwrap();
let mut cursor = QueryCursor::new();
let matches = cursor.matches(&query, tree.root_node(), to_callback(source));
assert_eq!(
collect_matches(matches, &query, source),
",
&[
(
0,
vec![
("the-class-name", "Person"),
("the-method-name", "constructor")
]
("the-method-name", "constructor"),
],
),
(
0,
vec![
("the-class-name", "Person"),
("the-method-name", "getFullName")
]
("the-method-name", "getFullName"),
],
),
],
);
@ -246,20 +234,14 @@ fn test_query_matches_with_multiple_patterns_different_roots() {
)
.unwrap();
let source = "
assert_query_matches(
language,
&query,
"
function f1() {
f2(f3());
}
";
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse(source, None).unwrap();
let mut cursor = QueryCursor::new();
let matches = cursor.matches(&query, tree.root_node(), to_callback(source));
assert_eq!(
collect_matches(matches, &query, source),
",
&[
(0, vec![("fn-def", "f1")]),
(1, vec![("fn-ref", "f2")]),
@ -287,21 +269,15 @@ fn test_query_matches_with_multiple_patterns_same_root() {
)
.unwrap();
let source = "
assert_query_matches(
language,
&query,
"
a = {
b: () => { return c; },
d: function() { return d; }
};
";
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse(source, None).unwrap();
let mut cursor = QueryCursor::new();
let matches = cursor.matches(&query, tree.root_node(), to_callback(source));
assert_eq!(
collect_matches(matches, &query, source),
",
&[
(1, vec![("method-def", "b")]),
(0, vec![("method-def", "d")]),
@ -325,20 +301,14 @@ fn test_query_matches_with_nesting_and_no_fields() {
)
.unwrap();
let source = "
assert_query_matches(
language,
&query,
"
[[a]];
[[c, d], [e, f, g, h]];
[[h], [i]];
";
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse(source, None).unwrap();
let mut cursor = QueryCursor::new();
let matches = cursor.matches(&query, tree.root_node(), to_callback(source));
assert_eq!(
collect_matches(matches, &query, source),
",
&[
(0, vec![("x1", "c"), ("x2", "d")]),
(0, vec![("x1", "e"), ("x2", "f")]),
@ -358,17 +328,11 @@ fn test_query_matches_with_many() {
let language = get_language("javascript");
let query = Query::new(language, "(array (identifier) @element)").unwrap();
let source = "[hello];\n".repeat(50);
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse(&source, None).unwrap();
let mut cursor = QueryCursor::new();
let matches = cursor.matches(&query, tree.root_node(), to_callback(&source));
assert_eq!(
collect_matches(matches, &query, source.as_str()),
vec![(0, vec![("element", "hello")]); 50],
assert_query_matches(
language,
&query,
&"[hello];\n".repeat(50),
&vec![(0, vec![("element", "hello")]); 50],
);
});
}
@ -385,20 +349,11 @@ fn test_query_matches_capturing_error_nodes() {
)
.unwrap();
let source = "function a(b,, c, d :e:) {}";
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse(source, None).unwrap();
let mut cursor = QueryCursor::new();
let matches = cursor.matches(&query, tree.root_node(), to_callback(source));
assert_eq!(
collect_matches(matches, &query, source),
&[(
0,
vec![("the-error", ":e:"), ("the-error-identifier", "e"),]
),]
assert_query_matches(
language,
&query,
"function a(b,, c, d :e:) {}",
&[(0, vec![("the-error", ":e:"), ("the-error-identifier", "e")])],
);
});
}
@ -439,10 +394,6 @@ fn test_query_matches_with_named_wildcard() {
fn test_query_matches_with_wildcard_at_the_root() {
allocations::record(|| {
let language = get_language("javascript");
let mut cursor = QueryCursor::new();
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let query = Query::new(
language,
"
@ -455,13 +406,11 @@ fn test_query_matches_with_wildcard_at_the_root() {
)
.unwrap();
let source = "/* one */ var x; /* two */ function y() {} /* three */ class Z {}";
let tree = parser.parse(source, None).unwrap();
let matches = cursor.matches(&query, tree.root_node(), to_callback(source));
assert_eq!(
collect_matches(matches, &query, source),
&[(0, vec![("doc", "/* two */"), ("name", "y")]),]
assert_query_matches(
language,
&query,
"/* one */ var x; /* two */ function y() {} /* three */ class Z {}",
&[(0, vec![("doc", "/* two */"), ("name", "y")])],
);
let query = Query::new(
@ -475,17 +424,15 @@ fn test_query_matches_with_wildcard_at_the_root() {
)
.unwrap();
let source = "['hi', x(true), {y: false}]";
let tree = parser.parse(source, None).unwrap();
let matches = cursor.matches(&query, tree.root_node(), to_callback(source));
assert_eq!(
collect_matches(matches, &query, source),
assert_query_matches(
language,
&query,
"['hi', x(true), {y: false}]",
&[
(0, vec![("a", "'hi'")]),
(2, vec![("c", "true")]),
(3, vec![("d", "false")]),
]
],
);
});
}
@ -519,16 +466,10 @@ fn test_query_matches_with_immediate_siblings() {
)
.unwrap();
let source = "import a.b.c.d; return [w, [1, y], z]";
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse(source, None).unwrap();
let mut cursor = QueryCursor::new();
let matches = cursor.matches(&query, tree.root_node(), to_callback(source));
assert_eq!(
collect_matches(matches, &query, source),
assert_query_matches(
language,
&query,
"import a.b.c.d; return [w, [1, y], z]",
&[
(0, vec![("parent", "a"), ("child", "b")]),
(0, vec![("parent", "b"), ("child", "c")]),
@ -536,7 +477,7 @@ fn test_query_matches_with_immediate_siblings() {
(0, vec![("parent", "c"), ("child", "d")]),
(2, vec![("first-element", "w")]),
(2, vec![("first-element", "1")]),
]
],
);
});
}
@ -564,7 +505,10 @@ fn test_query_matches_with_repeated_leaf_nodes() {
)
.unwrap();
let source = "
assert_query_matches(
language,
&query,
"
// one
// two
a();
@ -582,16 +526,7 @@ fn test_query_matches_with_repeated_leaf_nodes() {
// eight
function d() {}
}
";
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse(source, None).unwrap();
let mut cursor = QueryCursor::new();
let matches = cursor.matches(&query, tree.root_node(), to_callback(source));
assert_eq!(
collect_matches(matches, &query, source),
",
&[
(
0,
@ -599,11 +534,253 @@ fn test_query_matches_with_repeated_leaf_nodes() {
("doc", "// four"),
("doc", "// five"),
("doc", "// six"),
("name", "B")
]
("name", "B"),
],
),
(1, vec![("doc", "// eight"), ("name", "d")]),
]
],
);
});
}
#[test]
fn test_query_matches_with_optional_nodes_inside_of_repetitions() {
allocations::record(|| {
let language = get_language("javascript");
let query = Query::new(language, r#"(array (","? (number) @num)+)"#).unwrap();
assert_query_matches(
language,
&query,
r#"
var a = [1, 2, 3, 4]
"#,
&[(
0,
vec![("num", "1"), ("num", "2"), ("num", "3"), ("num", "4")],
)],
);
});
}
#[test]
fn test_query_matches_with_top_level_repetitions() {
allocations::record(|| {
let language = get_language("javascript");
let query = Query::new(
language,
r#"
(comment)+ @doc
"#,
)
.unwrap();
assert_query_matches(
language,
&query,
r#"
// a
// b
// c
d()
// e
"#,
&[
(0, vec![("doc", "// a"), ("doc", "// b"), ("doc", "// c")]),
(0, vec![("doc", "// e")]),
],
);
});
}
#[test]
fn test_query_matches_with_non_terminal_repetitions_within_root() {
allocations::record(|| {
let language = get_language("javascript");
let query = Query::new(
language,
r#"
(*
(expression_statement
(identifier) @id)+)
"#,
)
.unwrap();
assert_query_matches(
language,
&query,
r#"
a;
b;
c;
"#,
&[(0, vec![("id", "a"), ("id", "b"), ("id", "c")])],
);
});
}
#[test]
fn test_query_matches_with_nested_repetitions() {
allocations::record(|| {
let language = get_language("javascript");
let query = Query::new(
language,
r#"
(variable_declaration
(","? (variable_declarator name: (identifier) @x))+)+
"#,
)
.unwrap();
assert_query_matches(
language,
&query,
r#"
var a = b, c, d
var e, f
// more
var g
"#,
&[
(
0,
vec![("x", "a"), ("x", "c"), ("x", "d"), ("x", "e"), ("x", "f")],
),
(0, vec![("x", "g")]),
],
);
});
}
#[test]
fn test_query_matches_with_leading_optional_repeated_leaf_nodes() {
allocations::record(|| {
let language = get_language("javascript");
let query = Query::new(
language,
"
(*
(comment)+? @doc
.
(function_declaration
name: (identifier) @name))
",
)
.unwrap();
assert_query_matches(
language,
&query,
"
function a() {
// one
var b;
function c() {}
// two
// three
var d;
// four
// five
function e() {
}
}
// six
",
&[
(0, vec![("name", "a")]),
(0, vec![("name", "c")]),
(
0,
vec![("doc", "// four"), ("doc", "// five"), ("name", "e")],
),
],
);
});
}
#[test]
fn test_query_matches_with_trailing_optional_nodes() {
allocations::record(|| {
let language = get_language("javascript");
let query = Query::new(
language,
"
(class_declaration
name: (identifier) @class
(class_heritage
(identifier) @superclass)?)
",
)
.unwrap();
assert_query_matches(language, &query, "class A {}", &[(0, vec![("class", "A")])]);
assert_query_matches(
language,
&query,
"
class A {}
class B extends C {}
class D extends (E.F) {}
",
&[
(0, vec![("class", "A")]),
(0, vec![("class", "B"), ("superclass", "C")]),
(0, vec![("class", "D")]),
],
);
});
}
#[test]
fn test_query_matches_with_nested_optional_nodes() {
allocations::record(|| {
let language = get_language("javascript");
// A function call, optionally containing a function call, which optionally contains a number
let query = Query::new(
language,
"
(call_expression
function: (identifier) @outer-fn
arguments: (arguments
(call_expression
function: (identifier) @inner-fn
arguments: (arguments
(number)? @num))?))
",
)
.unwrap();
assert_query_matches(
language,
&query,
r#"
a(b, c(), d(null, 1, 2))
e()
f(g())
"#,
&[
(0, vec![("outer-fn", "a"), ("inner-fn", "c")]),
(0, vec![("outer-fn", "c")]),
(0, vec![("outer-fn", "a"), ("inner-fn", "d"), ("num", "1")]),
(0, vec![("outer-fn", "a"), ("inner-fn", "d"), ("num", "2")]),
(0, vec![("outer-fn", "d")]),
(0, vec![("outer-fn", "e")]),
(0, vec![("outer-fn", "f"), ("inner-fn", "g")]),
(0, vec![("outer-fn", "g")]),
],
);
});
}
@ -612,10 +789,6 @@ fn test_query_matches_with_repeated_leaf_nodes() {
fn test_query_matches_with_repeated_internal_nodes() {
allocations::record(|| {
let language = get_language("javascript");
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let mut cursor = QueryCursor::new();
let query = Query::new(
language,
"
@ -626,18 +799,18 @@ fn test_query_matches_with_repeated_internal_nodes() {
",
)
.unwrap();
let source = "
assert_query_matches(
language,
&query,
"
class A {
@c
@d
e() {}
}
";
let tree = parser.parse(source, None).unwrap();
let matches = cursor.matches(&query, tree.root_node(), to_callback(source));
assert_eq!(
collect_matches(matches, &query, source),
&[(0, vec![("deco", "c"), ("deco", "d"), ("name", "e")]),]
",
&[(0, vec![("deco", "c"), ("deco", "d"), ("name", "e")])],
);
})
}
@ -651,20 +824,16 @@ fn test_query_matches_in_language_with_simple_aliases() {
// tag names, script tag names, and style tag names. All of
// these tokens are aliased to `tag_name`.
let query = Query::new(language, "(tag_name) @tag").unwrap();
let source = "
assert_query_matches(
language,
&query,
"
<div>
<script>hi</script>
<style>hi</style>
</div>";
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse(&source, None).unwrap();
let mut cursor = QueryCursor::new();
let matches = cursor.matches(&query, tree.root_node(), to_callback(&source));
assert_eq!(
collect_matches(matches, &query, source),
</div>
",
&[
(0, vec![("tag", "div")]),
(0, vec![("tag", "script")]),
@ -680,6 +849,8 @@ fn test_query_matches_in_language_with_simple_aliases() {
#[test]
fn test_query_matches_with_different_tokens_with_the_same_string_value() {
allocations::record(|| {
// In Rust, there are two '<' tokens: one for the binary operator,
// and one with higher precedence for generics.
let language = get_language("rust");
let query = Query::new(
language,
@ -690,24 +861,16 @@ fn test_query_matches_with_different_tokens_with_the_same_string_value() {
)
.unwrap();
// In Rust, there are two '<' tokens: one for the binary operator,
// and one with higher precedence for generics.
let source = "const A: B<C> = d < e || f > g;";
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse(&source, None).unwrap();
let mut cursor = QueryCursor::new();
let matches = cursor.matches(&query, tree.root_node(), to_callback(source));
assert_eq!(
collect_matches(matches, &query, source),
assert_query_matches(
language,
&query,
"const A: B<C> = d < e || f > g;",
&[
(0, vec![("less", "<")]),
(1, vec![("greater", ">")]),
(0, vec![("less", "<")]),
(1, vec![("greater", ">")]),
]
],
);
});
}
@ -757,20 +920,14 @@ fn test_query_matches_with_anonymous_tokens() {
)
.unwrap();
let source = "foo(a && b);";
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse(&source, None).unwrap();
let mut cursor = QueryCursor::new();
let matches = cursor.matches(&query, tree.root_node(), to_callback(source));
assert_eq!(
collect_matches(matches, &query, source),
assert_query_matches(
language,
&query,
"foo(a && b);",
&[
(1, vec![("operator", "&&")]),
(0, vec![("punctuation", ";")]),
]
],
);
});
}
@ -1663,6 +1820,20 @@ fn test_query_disable_pattern() {
});
}
fn assert_query_matches(
language: Language,
query: &Query,
source: &str,
expected: &[(usize, Vec<(&str, &str)>)],
) {
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse(source, None).unwrap();
let mut cursor = QueryCursor::new();
let matches = cursor.matches(&query, tree.root_node(), to_callback(source));
assert_eq!(collect_matches(matches, &query, source), expected);
}
fn collect_matches<'a>(
matches: impl Iterator<Item = QueryMatch<'a>>,
query: &'a Query,

View file

@ -35,21 +35,21 @@ typedef struct {
* captured in this pattern.
* - `depth` - The depth where this node occurs in the pattern. The root node
* of the pattern has depth zero.
* - `repeat_step_index` - If this step is part of a repetition, the index of
* the beginning of the repetition. A `NONE` value means this step is not
* part of a repetition.
* - `alternative_index` - The index of a different query step that serves as
* an alternative to this step.
*/
typedef struct {
TSSymbol symbol;
TSFieldId field;
uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT];
uint16_t repeat_step_index;
uint16_t depth: 11;
uint16_t alternative_index;
uint8_t depth;
bool contains_captures: 1;
bool is_pattern_start: 1;
bool is_immediate: 1;
bool is_last: 1;
bool is_repeated: 1;
bool is_placeholder: 1;
bool alternative_is_immediate: 1;
} QueryStep;
/*
@ -88,7 +88,25 @@ typedef struct {
* QueryState - The state of an in-progress match of a particular pattern
* in a query. While executing, a `TSQueryCursor` must keep track of a number
* of possible in-progress matches. Each of those possible matches is
* represented as one of these states.
* represented as one of these states. Fields:
* - `id` - A numeric id that is exposed to the public API. This allows the
* caller to remove a given match, preventing any more of its captures
* from being returned.
* - `start_depth` - The depth in the tree where the first step of the state's
* pattern was matched.
* - `pattern_index` - The pattern that the state is matching.
* - `consumed_capture_count` - The number of captures from this match that
* have already been returned.
* - `capture_list_id` - A numeric id that can be used to retrieve the state's
* list of captures from the `CaptureListPool`.
* - `seeking_immediate_match` - A flag that indicates that the state's next
* step must be matched by the very next sibling. This is used when
* processing repetitions.
* - `has_in_progress_alternatives` - A flag that indicates that there is are
* other states that have the same captures as this state, but are at
* different steps in their pattern. This means that in order to obey the
* 'longest-match' rule, this state should not be returned as a match until
* it is clear that there can be no longer match.
*/
typedef struct {
uint32_t id;
@ -96,10 +114,9 @@ typedef struct {
uint16_t pattern_index;
uint16_t step_index;
uint16_t consumed_capture_count;
uint16_t repeat_match_count;
uint16_t step_index_on_failure;
uint8_t capture_list_id;
bool seeking_non_match;
bool seeking_immediate_match: 1;
bool has_in_progress_alternatives: 1;
} QueryState;
typedef Array(TSQueryCapture) CaptureList;
@ -417,12 +434,13 @@ static QueryStep query_step__new(
.depth = depth,
.field = 0,
.capture_ids = {NONE, NONE, NONE, NONE},
.alternative_index = NONE,
.contains_captures = false,
.is_repeated = false,
.is_last = false,
.is_pattern_start = false,
.is_placeholder = false,
.is_immediate = is_immediate,
.repeat_step_index = NONE,
.alternative_is_immediate = false,
};
}
@ -511,13 +529,14 @@ static inline bool ts_query__pattern_map_search(
static inline void ts_query__pattern_map_insert(
TSQuery *self,
TSSymbol symbol,
uint32_t start_step_index
uint32_t start_step_index,
uint32_t pattern_index
) {
uint32_t index;
ts_query__pattern_map_search(self, symbol, &index);
array_insert(&self->pattern_map, index, ((PatternEntry) {
.step_index = start_step_index,
.pattern_index = self->pattern_map.size,
.pattern_index = pattern_index,
}));
}
@ -689,7 +708,7 @@ static TSQueryError ts_query__parse_pattern(
stream_advance(stream);
stream_skip_whitespace(stream);
// Parse a nested list, which represents a pattern followed by
// At the top-level, a nested list represents one root pattern followed by
// zero-or-more predicates.
if (stream->next == '(' && depth == 0) {
TSQueryError e = ts_query__parse_pattern(self, stream, 0, capture_count, is_immediate);
@ -709,65 +728,94 @@ static TSQueryError ts_query__parse_pattern(
}
}
TSSymbol symbol;
// When nested inside of a larger pattern, a nested list just represents
// multiple sibling nodes which are grouped, possibly so that a postfix
// operator can be applied to the group.
else if (depth > 0 && (stream->next == '(' || stream->next == '"' )) {
bool child_is_immediate = false;
for (;;) {
if (stream->next == '.') {
child_is_immediate = true;
stream_advance(stream);
stream_skip_whitespace(stream);
}
TSQueryError e = ts_query__parse_pattern(
self,
stream,
depth,
capture_count,
child_is_immediate
);
if (e == PARENT_DONE) {
stream_advance(stream);
break;
} else if (e) {
return e;
}
// Parse the wildcard symbol
if (stream->next == '*') {
symbol = depth > 0 ? NAMED_WILDCARD_SYMBOL : WILDCARD_SYMBOL;
stream_advance(stream);
}
// Parse a normal node name
else if (stream_is_ident_start(stream)) {
const char *node_name = stream->input;
stream_scan_identifier(stream);
uint32_t length = stream->input - node_name;
symbol = ts_language_symbol_for_name(
self->language,
node_name,
length,
true
);
if (!symbol) {
stream_reset(stream, node_name);
return TSQueryErrorNodeType;
child_is_immediate = false;
}
} else {
return TSQueryErrorSyntax;
}
TSSymbol symbol;
// Add a step for the node.
array_push(&self->steps, query_step__new(symbol, depth, is_immediate));
// Parse the child patterns
stream_skip_whitespace(stream);
bool child_is_immediate = false;
uint16_t child_start_step_index = self->steps.size;
for (;;) {
if (stream->next == '.') {
child_is_immediate = true;
// Parse the wildcard symbol
if (stream->next == '*') {
symbol = depth > 0 ? NAMED_WILDCARD_SYMBOL : WILDCARD_SYMBOL;
stream_advance(stream);
stream_skip_whitespace(stream);
}
TSQueryError e = ts_query__parse_pattern(
self,
stream,
depth + 1,
capture_count,
child_is_immediate
);
if (e == PARENT_DONE) {
if (child_is_immediate) {
self->steps.contents[child_start_step_index].is_last = true;
// Parse a normal node name
else if (stream_is_ident_start(stream)) {
const char *node_name = stream->input;
stream_scan_identifier(stream);
uint32_t length = stream->input - node_name;
symbol = ts_language_symbol_for_name(
self->language,
node_name,
length,
true
);
if (!symbol) {
stream_reset(stream, node_name);
return TSQueryErrorNodeType;
}
stream_advance(stream);
break;
} else if (e) {
return e;
} else {
return TSQueryErrorSyntax;
}
child_is_immediate = false;
// Add a step for the node.
array_push(&self->steps, query_step__new(symbol, depth, is_immediate));
// Parse the child patterns
stream_skip_whitespace(stream);
bool child_is_immediate = false;
uint16_t child_start_step_index = self->steps.size;
for (;;) {
if (stream->next == '.') {
child_is_immediate = true;
stream_advance(stream);
stream_skip_whitespace(stream);
}
TSQueryError e = ts_query__parse_pattern(
self,
stream,
depth + 1,
capture_count,
child_is_immediate
);
if (e == PARENT_DONE) {
if (child_is_immediate) {
self->steps.contents[child_start_step_index].is_last = true;
}
stream_advance(stream);
break;
} else if (e) {
return e;
}
child_is_immediate = false;
}
}
}
@ -863,8 +911,17 @@ static TSQueryError ts_query__parse_pattern(
if (stream->next == '+') {
stream_advance(stream);
step->is_repeated = true;
array_back(&self->steps)->repeat_step_index = starting_step_index;
QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false);
repeat_step.alternative_index = starting_step_index;
repeat_step.is_placeholder = true;
repeat_step.alternative_is_immediate = true;
array_push(&self->steps, repeat_step);
stream_skip_whitespace(stream);
}
else if (stream->next == '?') {
stream_advance(stream);
step->alternative_index = self->steps.size;
stream_skip_whitespace(stream);
}
@ -950,6 +1007,7 @@ TSQuery *ts_query_new(
Stream stream = stream_new(source, source_len);
stream_skip_whitespace(&stream);
while (stream.input < stream.end) {
uint32_t pattern_index = self->predicates_by_pattern.size;
uint32_t start_step_index = self->steps.size;
uint32_t capture_count = 0;
array_push(&self->start_bytes_by_pattern, stream.input - source);
@ -980,14 +1038,18 @@ TSQuery *ts_query_new(
}
// Maintain a map that can look up patterns for a given root symbol.
self->steps.contents[start_step_index].is_pattern_start = true;
ts_query__pattern_map_insert(
self,
self->steps.contents[start_step_index].symbol,
start_step_index
);
if (self->steps.contents[start_step_index].symbol == WILDCARD_SYMBOL) {
self->wildcard_root_pattern_count++;
for (;;) {
QueryStep *step = &self->steps.contents[start_step_index];
step->is_pattern_start = true;
ts_query__pattern_map_insert(self, step->symbol, start_step_index, pattern_index);
if (step->symbol == WILDCARD_SYMBOL) {
self->wildcard_root_pattern_count++;
}
if (step->alternative_index != NONE) {
start_step_index = step->alternative_index;
} else {
break;
}
}
}
@ -1191,23 +1253,85 @@ static bool ts_query_cursor__first_in_progress_capture(
return result;
}
static bool ts_query__cursor_add_state(
// Determine which node is first in a depth-first traversal
int ts_query_cursor__compare_nodes(TSNode left, TSNode right) {
if (left.id != right.id) {
uint32_t left_start = ts_node_start_byte(left);
uint32_t right_start = ts_node_start_byte(right);
if (left_start < right_start) return -1;
if (left_start > right_start) return 1;
uint32_t left_node_count = ts_node_end_byte(left);
uint32_t right_node_count = ts_node_end_byte(right);
if (left_node_count > right_node_count) return -1;
if (left_node_count < right_node_count) return 1;
}
return 0;
}
// Determine if either state contains a superset of the other state's captures.
void ts_query_cursor__compare_captures(
TSQueryCursor *self,
QueryState *left_state,
QueryState *right_state,
bool *left_contains_right,
bool *right_contains_left
) {
CaptureList *left_captures = capture_list_pool_get(
&self->capture_list_pool,
left_state->capture_list_id
);
CaptureList *right_captures = capture_list_pool_get(
&self->capture_list_pool,
right_state->capture_list_id
);
*left_contains_right = true;
*right_contains_left = true;
unsigned i = 0, j = 0;
for (;;) {
if (i < left_captures->size) {
if (j < right_captures->size) {
TSQueryCapture *left = &left_captures->contents[i];
TSQueryCapture *right = &right_captures->contents[j];
if (left->node.id == right->node.id && left->index == right->index) {
i++;
j++;
} else {
switch (ts_query_cursor__compare_nodes(left->node, right->node)) {
case -1:
*right_contains_left = false;
i++;
break;
case 1:
*left_contains_right = false;
j++;
break;
default:
*right_contains_left = false;
*left_contains_right = false;
i++;
j++;
break;
}
}
} else {
*right_contains_left = false;
break;
}
} else {
if (j < right_captures->size) {
*left_contains_right = false;
}
break;
}
}
}
static bool ts_query_cursor__add_state(
TSQueryCursor *self,
const PatternEntry *pattern
) {
QueryStep *step = &self->query->steps.contents[pattern->step_index];
// If this pattern begins with a repetition, then avoid creating
// new states after already matching the repetition one or more times.
// The query should only one match for the repetition - the one that
// started the earliest.
if (step->is_repeated) {
for (unsigned i = 0; i < self->states.size; i++) {
QueryState *state = &self->states.contents[i];
if (state->step_index == pattern->step_index) return true;
}
}
uint32_t list_id = capture_list_pool_acquire(&self->capture_list_pool);
// If there are no capture lists left in the pool, then terminate whichever
@ -1244,21 +1368,23 @@ static bool ts_query__cursor_add_state(
.pattern_index = pattern->pattern_index,
.start_depth = self->depth - step->depth,
.consumed_capture_count = 0,
.repeat_match_count = 0,
.step_index_on_failure = NONE,
.seeking_non_match = false,
.seeking_immediate_match = false,
}));
return true;
}
// Duplicate the given state and insert the newly-created state immediately after
// the given state in the `states` array.
static QueryState *ts_query__cursor_copy_state(
TSQueryCursor *self,
const QueryState *state
) {
uint32_t new_list_id = capture_list_pool_acquire(&self->capture_list_pool);
if (new_list_id == NONE) return NULL;
array_push(&self->states, *state);
QueryState *new_state = array_back(&self->states);
uint32_t index = (state - self->states.contents) + 1;
QueryState copy = *state;
array_insert(&self->states, index, copy);
QueryState *new_state = &self->states.contents[index];
new_state->capture_list_id = new_list_id;
CaptureList *old_captures = capture_list_pool_get(
&self->capture_list_pool,
@ -1281,56 +1407,62 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
if (self->ascending) {
LOG("leave node. type:%s\n", ts_node_type(ts_tree_cursor_current_node(&self->cursor)));
// When leaving a node, remove any unfinished states whose next step
// needed to match something within that node.
uint32_t deleted_count = 0;
for (unsigned i = 0, n = self->states.size; i < n; i++) {
QueryState *state = &self->states.contents[i];
QueryStep *step = &self->query->steps.contents[state->step_index];
if ((uint32_t)state->start_depth + (uint32_t)step->depth > self->depth) {
LOG(
" failed to match. pattern:%u, step:%u\n",
state->pattern_index,
state->step_index
);
capture_list_pool_release(
&self->capture_list_pool,
state->capture_list_id
);
deleted_count++;
} else if (deleted_count > 0) {
self->states.contents[i - deleted_count] = *state;
}
}
self->states.size -= deleted_count;
// Leave this node by stepping to its next sibling or to its parent.
bool did_move = true;
if (ts_tree_cursor_goto_next_sibling(&self->cursor)) {
self->ascending = false;
} else if (ts_tree_cursor_goto_parent(&self->cursor)) {
self->depth--;
} else {
did_move = false;
}
// After leaving a node, remove any states that cannot make further progress.
uint32_t deleted_count = 0;
for (unsigned i = 0, n = self->states.size; i < n; i++) {
QueryState *state = &self->states.contents[i];
QueryStep *step = &self->query->steps.contents[state->step_index];
// If a state completed its pattern inside of this node, but was deferred from finishing
// in order to search for longer matches, mark it as finished.
if (step->depth == PATTERN_DONE_MARKER) {
if (state->start_depth > self->depth || !did_move) {
LOG(" finish pattern %u\n", state->pattern_index);
state->id = self->next_state_id++;
array_push(&self->finished_states, *state);
deleted_count++;
continue;
}
}
// If a state needed to match something within this node, then remove that state
// as it has failed to match.
else if ((uint32_t)state->start_depth + (uint32_t)step->depth > self->depth) {
LOG(
" failed to match. pattern:%u, step:%u\n",
state->pattern_index,
state->step_index
);
capture_list_pool_release(
&self->capture_list_pool,
state->capture_list_id
);
deleted_count++;
continue;
}
if (deleted_count > 0) {
self->states.contents[i - deleted_count] = *state;
}
}
self->states.size -= deleted_count;
if (!did_move) {
return self->finished_states.size > 0;
}
} else {
bool has_later_siblings;
bool can_have_later_siblings_with_this_field;
TSFieldId field_id = ts_tree_cursor_current_status(
&self->cursor,
&has_later_siblings,
&can_have_later_siblings_with_this_field
);
// If this node is before the selected range, then avoid descending into it.
TSNode node = ts_tree_cursor_current_node(&self->cursor);
TSSymbol symbol = ts_node_symbol(node);
bool is_named = ts_node_is_named(node);
if (symbol != ts_builtin_sym_error && self->query->symbol_map) {
symbol = self->query->symbol_map[symbol];
}
// If this node is before the selected range, then avoid descending
// into it.
if (
ts_node_end_byte(node) <= self->start_byte ||
point_lte(ts_node_end_point(node), self->start_point)
@ -1347,18 +1479,26 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
point_lte(self->end_point, ts_node_start_point(node))
) return false;
// Get the properties of the current node.
TSSymbol symbol = ts_node_symbol(node);
bool is_named = ts_node_is_named(node);
if (symbol != ts_builtin_sym_error && self->query->symbol_map) {
symbol = self->query->symbol_map[symbol];
}
bool can_have_later_siblings;
bool can_have_later_siblings_with_this_field;
TSFieldId field_id = ts_tree_cursor_current_status(
&self->cursor,
&can_have_later_siblings,
&can_have_later_siblings_with_this_field
);
LOG(
"enter node. "
"type:%s, field:%s, row:%u state_count:%u, "
"finished_state_count:%u, has_later_siblings:%d, "
"can_have_later_siblings_with_this_field:%d\n",
"enter node. type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u\n",
ts_node_type(node),
ts_language_field_name_for_id(self->query->language, field_id),
ts_node_start_point(node).row,
self->states.size,
self->finished_states.size,
has_later_siblings,
can_have_later_siblings_with_this_field
self->finished_states.size
);
// Add new states for any patterns whose root node is a wildcard.
@ -1369,7 +1509,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
// If this node matches the first step of the pattern, then add a new
// state at the start of this pattern.
if (step->field && field_id != step->field) continue;
if (!ts_query__cursor_add_state(self, pattern)) break;
if (!ts_query_cursor__add_state(self, pattern)) break;
}
// Add new states for any patterns whose root node matches this node.
@ -1381,7 +1521,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
// If this node matches the first step of the pattern, then add a new
// state at the start of this pattern.
if (step->field && field_id != step->field) continue;
if (!ts_query__cursor_add_state(self, pattern)) break;
if (!ts_query_cursor__add_state(self, pattern)) break;
// Advance to the next pattern whose root node matches this node.
i++;
@ -1392,9 +1532,10 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
}
// Update all of the in-progress states with current node.
for (unsigned i = 0, n = self->states.size; i < n; i++) {
for (unsigned i = 0; i < self->states.size; i++) {
QueryState *state = &self->states.contents[i];
QueryStep *step = &self->query->steps.contents[state->step_index];
state->has_in_progress_alternatives = false;
// Check that the node matches all of the criteria for the next
// step of the pattern.
@ -1407,11 +1548,11 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
step->symbol == symbol ||
step->symbol == WILDCARD_SYMBOL ||
(step->symbol == NAMED_WILDCARD_SYMBOL && is_named);
bool later_sibling_can_match = has_later_siblings;
if (step->is_immediate && is_named) {
bool later_sibling_can_match = can_have_later_siblings;
if ((step->is_immediate && is_named) || state->seeking_immediate_match) {
later_sibling_can_match = false;
}
if (step->is_last && has_later_siblings) {
if (step->is_last && can_have_later_siblings) {
node_does_match = false;
}
if (step->field) {
@ -1424,25 +1565,8 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
}
}
// Remove states immediately if it is ever clear that they cannot match.
if (!node_does_match) {
// If this QueryState has processed a repeating sequence, and that repeating
// sequence has ended, move on to the *next* step of this state's pattern.
if (
state->step_index_on_failure != NONE &&
(!later_sibling_can_match || step->is_repeated)
) {
LOG(
" finish repetition state. pattern:%u, step:%u\n",
state->pattern_index,
state->step_index
);
state->step_index = state->step_index_on_failure;
state->step_index_on_failure = NONE;
state->repeat_match_count = 0;
i--;
continue;
}
if (!later_sibling_can_match) {
LOG(
" discard state. pattern:%u, step:%u\n",
@ -1455,114 +1579,158 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
);
array_erase(&self->states, i);
i--;
n--;
}
state->seeking_non_match = false;
continue;
}
// The `seeking_non_match` flag indicates that a previous QueryState
// has already begun processing this repeating sequence, so that *this*
// QueryState should not begin matching until a separate repeating sequence
// is found.
if (state->seeking_non_match) continue;
// Some patterns can match their root node in multiple ways,
// capturing different children. If this pattern step could match
// later children within the same parent, then this query state
// cannot simply be updated in place. It must be split into two
// states: one that matches this node, and one which skips over
// this node, to preserve the possibility of matching later
// siblings.
QueryState *next_state = state;
// Some patterns can match their root node in multiple ways, capturing different
// children. If this pattern step could match later children within the same
// parent, then this query state cannot simply be updated in place. It must be
// split into two states: one that matches this node, and one which skips over
// this node, to preserve the possibility of matching later siblings.
if (
!step->is_pattern_start &&
step->contains_captures &&
later_sibling_can_match &&
state->repeat_match_count == 0
!step->is_pattern_start &&
step->contains_captures
) {
QueryState *copy = ts_query__cursor_copy_state(self, state);
// The QueryState that matched this node has begun matching a repeating
// sequence. The QueryState that *skipped* this node should not start
// matching later elements of the same repeating sequence.
if (step->is_repeated) {
state->seeking_non_match = true;
}
if (copy) {
if (ts_query__cursor_copy_state(self, state)) {
LOG(
" split state. pattern:%u, step:%u\n",
copy->pattern_index,
copy->step_index
" split state for capture. pattern:%u, step:%u\n",
state->pattern_index,
state->step_index
);
next_state = copy;
} else {
LOG(" cannot split state.\n");
i++;
}
}
// If the current node is captured in this pattern, add it to the
// capture list.
// If the current node is captured in this pattern, add it to the capture list.
CaptureList *capture_list = capture_list_pool_get(
&self->capture_list_pool,
state->capture_list_id
);
for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) {
uint16_t capture_id = step->capture_ids[j];
if (step->capture_ids[j] == NONE) break;
CaptureList *capture_list = capture_list_pool_get(
&self->capture_list_pool,
next_state->capture_list_id
);
array_push(capture_list, ((TSQueryCapture) {
node,
capture_id
}));
array_push(capture_list, ((TSQueryCapture) { node, capture_id }));
LOG(
" capture node. pattern:%u, capture_id:%u, capture_count:%u\n",
next_state->pattern_index,
state->pattern_index,
capture_id,
capture_list->size
);
}
// If this is the end of a repetition, then jump back to the beginning
// of that repetition.
if (step->repeat_step_index != NONE) {
next_state->step_index_on_failure = next_state->step_index + 1;
next_state->step_index = step->repeat_step_index;
next_state->repeat_match_count++;
LOG(
" continue repeat. pattern:%u, match_count:%u\n",
next_state->pattern_index,
next_state->repeat_match_count
);
} else {
next_state->step_index++;
LOG(
" advance state. pattern:%u, step:%u\n",
next_state->pattern_index,
next_state->step_index
);
// Advance this state to the next step of its pattern.
state->step_index++;
state->seeking_immediate_match = false;
LOG(
" advance state. pattern:%u, step:%u\n",
state->pattern_index,
state->step_index
);
QueryStep *next_step = step + 1;
// If the pattern is now done, then remove it from the list of
// in-progress states, and add it to the list of finished states.
if (next_step->depth == PATTERN_DONE_MARKER) {
LOG(" finish pattern %u\n", next_state->pattern_index);
next_state->id = self->next_state_id++;
array_push(&self->finished_states, *next_state);
if (next_state == state) {
array_erase(&self->states, i);
i--;
n--;
} else {
self->states.size--;
// If this state's next step has an 'alternative' step (the step is either optional,
// or is the end of a repetition), then copy the state in order to pursue both
// alternatives. The alternative step itself may have an alternative, so this is
// an interative process.
unsigned start_index = state - self->states.contents;
unsigned end_index = start_index + 1;
for (unsigned j = start_index; j < end_index; j++) {
QueryState *state = &self->states.contents[j];
QueryStep *next_step = &self->query->steps.contents[state->step_index];
if (next_step->alternative_index != NONE) {
QueryState *copy = ts_query__cursor_copy_state(self, state);
if (next_step->is_placeholder) {
state->step_index++;
j--;
}
if (copy) {
i++;
end_index++;
copy->step_index = next_step->alternative_index;
if (next_step->alternative_is_immediate) {
copy->seeking_immediate_match = true;
}
LOG(
" split state for branch. pattern:%u, step:%u, step:%u\n",
copy->pattern_index,
state->step_index,
copy->step_index
);
}
}
}
}
for (unsigned i = 0; i < self->states.size; i++) {
QueryState *state = &self->states.contents[i];
bool did_remove = false;
// Enfore the longest-match criteria. When a query pattern contains optional or
// repeated nodes, this is necesssary to avoid multiple redundant states, where
// one state has a strict subset of another state's captures.
for (unsigned j = i + 1; j < self->states.size; j++) {
QueryState *other_state = &self->states.contents[j];
if (
state->pattern_index == other_state->pattern_index &&
state->start_depth == other_state->start_depth
) {
bool left_contains_right, right_contains_left;
ts_query_cursor__compare_captures(
self,
state,
other_state,
&left_contains_right,
&right_contains_left
);
if (left_contains_right) {
if (state->step_index == other_state->step_index) {
LOG(
" drop shorter state. pattern: %u, step_index: %u\n",
state->pattern_index,
state->step_index
);
capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id);
array_erase(&self->states, j);
j--;
continue;
}
other_state->has_in_progress_alternatives = true;
}
if (right_contains_left) {
if (state->step_index == other_state->step_index) {
LOG(
" drop shorter state. pattern: %u, step_index: %u\n",
state->pattern_index,
state->step_index
);
capture_list_pool_release(&self->capture_list_pool, state->capture_list_id);
array_erase(&self->states, i);
did_remove = true;
break;
}
state->has_in_progress_alternatives = true;
}
}
}
// If there the state is at the end of its pattern, remove it from the list
// of in-progress states and add it to the list of finished states.
if (!did_remove) {
QueryStep *next_step = &self->query->steps.contents[state->step_index];
if (next_step->depth == PATTERN_DONE_MARKER) {
if (state->has_in_progress_alternatives) {
LOG(" defer finishing pattern %u\n", state->pattern_index);
} else {
LOG(" finish pattern %u\n", state->pattern_index);
state->id = self->next_state_id++;
array_push(&self->finished_states, *state);
array_erase(&self->states, state - self->states.contents);
i--;
}
}
}
}
// Continue descending if possible.
if (ts_tree_cursor_goto_first_child(&self->cursor)) {