Add guard to prevent infinite loops in error recovery

2018-11-08 11:29:21 -08:00 · 2018-11-08 11:29:21 -08:00 · 361fd6ecea
commit 361fd6ecea
parent 0f887429ae
3 changed files with 40 additions and 2 deletions
--- a/src/runtime/parser.c
+++ b/src/runtime/parser.c
@ -341,7 +341,22 @@ static Subtree ts_parser__lex(TSParser *self, StackVersion version, TSStateId pa
          self->lexer.data.mark_end(&self->lexer.data);
        }

-        if (!error_mode || self->lexer.token_end_position.bytes > current_position.bytes) {
+        // Zero-length external tokens are generally allowed, but they're not
+        // allowed right after a syntax error. This is for two reasons:
+        // 1. After a syntax error, the lexer is looking for any possible token,
+        //    as opposed to the specific set of tokens that are valid in some
+        //    parse state. In this situation, it's very easy for an external
+        //    scanner to produce unwanted zero-length tokens.
+        // 2. The parser sometimes inserts *missing* tokens to recover from
+        //    errors. These tokens are also zero-length. If we allow more
+        //    zero-length tokens to be created after missing tokens, it
+        //    can lead to infinite loops. Forbidding zero-length tokens
+        //    right at the point of error recovery is a conservative strategy
+        //    for preventing this kind of infinite loop.
+        if (
+          self->lexer.token_end_position.bytes > current_position.bytes ||
+          (!error_mode && ts_stack_has_advanced_since_error(self->stack, version))
+        ) {
          found_external_token = true;
          break;
        }
@ -470,7 +485,7 @@ static Subtree ts_parser__lex(TSParser *self, StackVersion version, TSStateId pa
  LOG(
    "lexed_lookahead sym:%s, size:%u",
    SYM_NAME(ts_subtree_symbol(result)),
-    ts_subtree_size(result).bytes
+    ts_subtree_total_size(result).bytes
  );
  return result;
 }
--- a/src/runtime/stack.c
+++ b/src/runtime/stack.c
@ -580,6 +580,27 @@ int ts_stack_dynamic_precedence(Stack *self, StackVersion version) {
  return array_get(&self->heads, version)->node->dynamic_precedence;
 }

+bool ts_stack_has_advanced_since_error(const Stack *self, StackVersion version) {
+  const StackHead *head = array_get(&self->heads, version);
+  const StackNode *node = head->node;
+  if (node->error_cost == 0) return true;
+  while (node) {
+    if (node->link_count > 0) {
+      Subtree subtree = node->links[0].subtree;
+      if (subtree.ptr) {
+        if (ts_subtree_total_bytes(subtree) > 0) {
+          return true;
+        } else if (node->node_count > head->node_count_at_last_error) {
+          node = node->links[0].node;
+          continue;
+        }
+      }
+    }
+    break;
+  }
+  return false;
+}
+
 void ts_stack_remove_version(Stack *self, StackVersion version) {
  stack_head_delete(array_get(&self->heads, version), &self->node_pool, self->subtree_pool);
  array_erase(&self->heads, version);
--- a/src/runtime/stack.h
+++ b/src/runtime/stack.h
@ -79,6 +79,8 @@ unsigned ts_stack_node_count_since_error(const Stack *, StackVersion);

 int ts_stack_dynamic_precedence(Stack *, StackVersion);

+bool ts_stack_has_advanced_since_error(const Stack *, StackVersion);
+
 // Compute a summary of all the parse states near the top of the given
 // version of the stack and store the summary for later retrieval.
 void ts_stack_record_summary(Stack *, StackVersion, unsigned max_depth);