From 570b83e2b23ed95ae38d191ba86f9c4b8b03c71f Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 19 Feb 2020 11:47:52 -0800 Subject: [PATCH] query: Add immediate child operator --- cli/src/tests/query_test.rs | 51 ++++++++++++++++++++++++ lib/src/query.c | 79 ++++++++++++++++++++++++++++--------- 2 files changed, 112 insertions(+), 18 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 278c0875..1be86bae 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -434,6 +434,57 @@ fn test_query_matches_with_named_wildcard() { }); } +#[test] +fn test_query_with_immediate_siblings() { + allocations::record(|| { + let language = get_language("python"); + + // The immediate child operator '.' can be used in three similar ways: + // 1. Before the first child node in a pattern, it means that there cannot be any + // named siblings before that child node. + // 2. After the last child node in a pattern, it means that there cannot be any named + // sibling after that child node. + // 2. Between two child nodes in a pattern, it specifies that there cannot be any + // named siblings between those two child snodes. + let query = Query::new( + language, + " + (dotted_name + (identifier) @parent + . + (identifier) @child) + (dotted_name + (identifier) @last-child + .) + (list + . + (*) @first-element) + ", + ) + .unwrap(); + + let source = "import a.b.c.d; return [w, [1, y], z]"; + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + let mut cursor = QueryCursor::new(); + let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); + + assert_eq!( + collect_matches(matches, &query, source), + &[ + (0, vec![("parent", "a"), ("child", "b")]), + (0, vec![("parent", "b"), ("child", "c")]), + (1, vec![("last-child", "d")]), + (0, vec![("parent", "c"), ("child", "d")]), + (2, vec![("first-element", "w")]), + (2, vec![("first-element", "1")]), + ] + ); + }); +} + #[test] fn test_query_matches_in_language_with_simple_aliases() { allocations::record(|| { diff --git a/lib/src/query.c b/lib/src/query.c index cc1c72f4..dd26b0f4 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -40,8 +40,10 @@ typedef struct { TSSymbol symbol; TSFieldId field; uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT]; - uint16_t depth: 15; + uint16_t depth: 13; bool contains_captures: 1; + bool is_immediate: 1; + bool is_last: 1; } QueryStep; /* @@ -333,13 +335,18 @@ static uint16_t symbol_table_insert_name( * QueryStep ************/ -static QueryStep query_step__new(TSSymbol symbol, uint16_t depth) { +static QueryStep query_step__new( + TSSymbol symbol, + uint16_t depth, + bool is_immediate +) { return (QueryStep) { .symbol = symbol, .depth = depth, .field = 0, .capture_ids = {NONE, NONE, NONE, NONE}, .contains_captures = false, + .is_immediate = is_immediate, }; } @@ -576,7 +583,8 @@ static TSQueryError ts_query__parse_pattern( TSQuery *self, Stream *stream, uint32_t depth, - uint32_t *capture_count + uint32_t *capture_count, + bool is_immediate ) { uint16_t starting_step_index = self->steps.size; @@ -595,7 +603,7 @@ static TSQueryError ts_query__parse_pattern( // Parse a nested list, which represents a pattern followed by // zero-or-more predicates. if (stream->next == '(' && depth == 0) { - TSQueryError e = ts_query__parse_pattern(self, stream, 0, capture_count); + TSQueryError e = ts_query__parse_pattern(self, stream, 0, capture_count, is_immediate); if (e) return e; // Parse the predicates. @@ -640,18 +648,37 @@ static TSQueryError ts_query__parse_pattern( } // Add a step for the node. - array_push(&self->steps, query_step__new(symbol, depth)); + array_push(&self->steps, query_step__new(symbol, depth, is_immediate)); // Parse the child patterns stream_skip_whitespace(stream); + bool child_is_immediate = false; + uint16_t child_start_step_index = self->steps.size; for (;;) { - TSQueryError e = ts_query__parse_pattern(self, stream, depth + 1, capture_count); + if (stream->next == '.') { + child_is_immediate = true; + stream_advance(stream); + stream_skip_whitespace(stream); + } + + TSQueryError e = ts_query__parse_pattern( + self, + stream, + depth + 1, + capture_count, + child_is_immediate + ); if (e == PARENT_DONE) { + if (child_is_immediate) { + self->steps.contents[child_start_step_index].is_last = true; + } stream_advance(stream); break; } else if (e) { return e; } + + child_is_immediate = false; } } @@ -680,7 +707,7 @@ static TSQueryError ts_query__parse_pattern( stream_reset(stream, string_content); return TSQueryErrorNodeType; } - array_push(&self->steps, query_step__new(symbol, depth)); + array_push(&self->steps, query_step__new(symbol, depth, is_immediate)); if (stream->next != '"') return TSQueryErrorSyntax; stream_advance(stream); @@ -703,7 +730,13 @@ static TSQueryError ts_query__parse_pattern( // Parse the pattern uint32_t step_index = self->steps.size; - TSQueryError e = ts_query__parse_pattern(self, stream, depth, capture_count); + TSQueryError e = ts_query__parse_pattern( + self, + stream, + depth, + capture_count, + is_immediate + ); if (e == PARENT_DONE) return TSQueryErrorSyntax; if (e) return e; @@ -726,7 +759,7 @@ static TSQueryError ts_query__parse_pattern( stream_skip_whitespace(stream); // Add a step that matches any kind of node - array_push(&self->steps, query_step__new(WILDCARD_SYMBOL, depth)); + array_push(&self->steps, query_step__new(WILDCARD_SYMBOL, depth, is_immediate)); } else { @@ -821,8 +854,8 @@ TSQuery *ts_query_new( .offset = self->predicate_steps.size, .length = 0, })); - *error_type = ts_query__parse_pattern(self, &stream, 0, &capture_count); - array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER)); + *error_type = ts_query__parse_pattern(self, &stream, 0, &capture_count, false); + array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER, false)); // If any pattern could not be parsed, then report the error information // and terminate. @@ -1155,15 +1188,16 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { return self->finished_states.size > 0; } } else { - bool can_have_later_siblings; + bool has_later_siblings; bool can_have_later_siblings_with_this_field; TSFieldId field_id = ts_tree_cursor_current_status( &self->cursor, - &can_have_later_siblings, + &has_later_siblings, &can_have_later_siblings_with_this_field ); TSNode node = ts_tree_cursor_current_node(&self->cursor); TSSymbol symbol = ts_node_symbol(node); + bool is_named = ts_node_is_named(node); if (symbol != ts_builtin_sym_error && self->query->symbol_map) { symbol = self->query->symbol_map[symbol]; } @@ -1187,13 +1221,16 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { ) return false; LOG( - "enter node. type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u, can_have_later_siblings:%d, can_have_later_siblings_with_this_field:%d\n", + "enter node. " + "type:%s, field:%s, row:%u state_count:%u, " + "finished_state_count:%u, has_later_siblings:%d, " + "can_have_later_siblings_with_this_field:%d\n", ts_node_type(node), ts_language_field_name_for_id(self->query->language, field_id), ts_node_start_point(node).row, self->states.size, self->finished_states.size, - can_have_later_siblings, + has_later_siblings, can_have_later_siblings_with_this_field ); @@ -1233,7 +1270,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { QueryStep *step = &self->query->steps.contents[state->step_index]; // Check that the node matches all of the criteria for the next - // step of the pattern.if ( + // step of the pattern. if ((uint32_t)state->start_depth + (uint32_t)step->depth != self->depth) continue; // Determine if this node matches this step of the pattern, and also @@ -1242,8 +1279,14 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { bool node_does_match = step->symbol == symbol || step->symbol == WILDCARD_SYMBOL || - (step->symbol == NAMED_WILDCARD_SYMBOL && ts_node_is_named(node)); - bool later_sibling_can_match = can_have_later_siblings; + (step->symbol == NAMED_WILDCARD_SYMBOL && is_named); + bool later_sibling_can_match = has_later_siblings; + if (step->is_immediate && is_named) { + later_sibling_can_match = false; + } + if (step->is_last && has_later_siblings) { + node_does_match = false; + } if (step->field) { if (step->field == field_id) { if (!can_have_later_siblings_with_this_field) {