Merge pull request #971 from tree-sitter/fix-query-analsyis-with-extras

Fix handling of extra nodes in query analysis
2021-03-08 09:54:02 -08:00 · 2021-03-08 09:54:02 -08:00 · 0b4794106d
commit 0b4794106d
parent 0965aec5ba 6ba11d7778
2 changed files with 83 additions and 5 deletions
--- a/cli/src/tests/query_test.rs
+++ b/cli/src/tests/query_test.rs
@ -118,6 +118,18 @@ fn test_query_errors_on_invalid_syntax() {
            ]
            .join("\n")
        );
+
+        // tree-sitter/tree-sitter/issues/968
+        assert_eq!(
+            Query::new(get_language("c"), r#"(parameter_list [ ")" @foo)"#)
+                .unwrap_err()
+                .message,
+            [
+                r#"(parameter_list [ ")" @foo)"#,
+                r#"                          ^"#
+            ]
+            .join("\n")
+        );
    });
 }

@ -642,6 +654,49 @@ fn test_query_matches_capturing_error_nodes() {
    });
 }

+#[test]
+fn test_query_matches_with_extra_children() {
+    allocations::record(|| {
+        let language = get_language("ruby");
+        let query = Query::new(
+            language,
+            "
+            (program(comment) @top_level_comment)
+            (argument_list (heredoc_body) @heredoc_in_args)
+            ",
+        )
+        .unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            "
+            # top-level
+            puts(
+                # not-top-level
+                <<-IN_ARGS, bar.baz
+                HELLO
+                IN_ARGS
+            )
+
+            puts <<-NOT_IN_ARGS
+            NO
+            NOT_IN_ARGS
+            ",
+            &[
+                (0, vec![("top_level_comment", "# top-level")]),
+                (
+                    1,
+                    vec![(
+                        "heredoc_in_args",
+                        "\n                HELLO\n                IN_ARGS",
+                    )],
+                ),
+            ],
+        );
+    });
+}
+
 #[test]
 fn test_query_matches_with_named_wildcard() {
    allocations::record(|| {
@ -3077,6 +3132,20 @@ fn test_query_step_is_definite() {
            "#,
            results_by_substring: &[("name:", true)],
        },
+        Row {
+            description: "top-level non-terminal extra nodes",
+            language: get_language("ruby"),
+            pattern: r#"
+            (heredoc_body
+                (interpolation)
+                (heredoc_end) @end)
+            "#,
+            results_by_substring: &[
+                ("(heredoc_body", false),
+                ("(interpolation)", false),
+                ("(heredoc_end)", true),
+            ],
+        },
    ];

    allocations::record(|| {
--- a/lib/src/query.c
+++ b/lib/src/query.c
@ -784,8 +784,10 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
            state_predecessor_map_add(&predecessor_map, next_state, state);
          }
        }
-      } else if (lookahead_iterator.next_state != 0 && lookahead_iterator.next_state != state) {
-        state_predecessor_map_add(&predecessor_map, lookahead_iterator.next_state, state);
+      } else if (lookahead_iterator.next_state != 0) {
+        if (lookahead_iterator.next_state != state) {
+          state_predecessor_map_add(&predecessor_map, lookahead_iterator.next_state, state);
+        }
        const TSSymbol *aliases, *aliases_end;
        ts_language_aliases_for_symbol(
          self->language,
@ -959,6 +961,10 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
        }
      #endif

+      // If no further progress can be made within the current recursion depth limit, then
+      // bump the depth limit by one, and continue to process the states the exceeded the
+      // limit. But only allow this if progress has been made since the last time the depth
+      // limit was increased.
      if (states.size == 0) {
        if (deeper_states.size > 0 && final_step_indices.size > prev_final_step_count) {
          #ifdef DEBUG_ANALYZE_QUERY
@ -1019,12 +1025,12 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
          TSStateId next_parse_state;
          if (lookahead_iterator.action_count) {
            const TSParseAction *action = &lookahead_iterator.actions[lookahead_iterator.action_count - 1];
-            if (action->type == TSParseActionTypeShift && !action->shift.extra) {
-              next_parse_state = action->shift.state;
+            if (action->type == TSParseActionTypeShift) {
+              next_parse_state = action->shift.extra ? parse_state : action->shift.state;
            } else {
              continue;
            }
-          } else if (lookahead_iterator.next_state != 0 && lookahead_iterator.next_state != parse_state) {
+          } else if (lookahead_iterator.next_state != 0) {
            next_parse_state = lookahead_iterator.next_state;
          } else {
            continue;
@ -1127,6 +1133,8 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
                  next_step->depth <= parent_depth + 1
                ) break;
              }
+            } else if (next_parse_state == parse_state) {
+              continue;
            }

            for (;;) {
@ -1535,6 +1543,7 @@ static TSQueryError ts_query__parse_pattern(
        stream_advance(stream);
        break;
      } else if (e) {
+        if (e == PARENT_DONE) e = TSQueryErrorSyntax;
        array_delete(&branch_step_indices);
        return e;
      }