From 6b8e5bd1f96ab63f17873ef9f7a72569a421810f Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 20 Nov 2018 12:03:12 -0800
Subject: [PATCH] Add code to handle failure case with unknown repro steps

---
 src/runtime/parser.c | 59 ++++++++++++++++++++++++++++++++++++++------
 1 file changed, 52 insertions(+), 7 deletions(-)

diff --git a/src/runtime/parser.c b/src/runtime/parser.c
index 556a11ec..d64e8b6e 100644
--- a/src/runtime/parser.c
+++ b/src/runtime/parser.c
@@ -899,8 +899,11 @@ static bool ts_parser__do_all_potential_reductions(TSParser *self,
 
 static void ts_parser__handle_error(TSParser *self, StackVersion version,
                                     TSSymbol lookahead_symbol) {
-  // Perform any reductions that could have happened in this state, regardless of the lookahead.
   uint32_t previous_version_count = ts_stack_version_count(self->stack);
+
+  // Perform any reductions that can happen in this state, regardless of the lookahead. After
+  // skipping one or more invalid tokens, the parser might find a token that would have allowed
+  // a reduction to take place.
   ts_parser__do_all_potential_reductions(self, version, 0);
   uint32_t version_count = ts_stack_version_count(self->stack);
   Length position = ts_stack_position(self->stack, version);
@@ -1073,6 +1076,18 @@ static void ts_parser__recover(TSParser *self, StackVersion version, Subtree loo
   unsigned node_count_since_error = ts_stack_node_count_since_error(self->stack, version);
   unsigned current_error_cost = ts_stack_error_cost(self->stack, version);
 
+  // When the parser is in the error state, there are two strategies for recovering with a
+  // given lookahead token:
+  // 1. Find a previous state on the stack in which that lookahead token would be valid. Then,
+  //    create a new stack version that is in that state again. This entails popping all of the
+  //    subtrees that have been pushed onto the stack since that previous state, and wrapping
+  //    them in an ERROR node.
+  // 2. Wrap the lookahead token in an ERROR node, push that ERROR node onto the stack, and
+  //    move on to the next lookahead token, remaining in the error state.
+  //
+  // First, try the strategy 1. Upon entering the error state, the parser recorded a summary
+  // of the previous parse states and their depths. Look at each state in the summary, to see
+  // if the current lookahead token would be valid in that state.
   if (summary && !ts_subtree_is_error(lookahead)) {
     for (unsigned i = 0; i < summary->size; i++) {
       StackSummaryEntry entry = summary->contents[i];
@@ -1082,6 +1097,7 @@ static void ts_parser__recover(TSParser *self, StackVersion version, Subtree loo
       unsigned depth = entry.depth;
       if (node_count_since_error > 0) depth++;
 
+      // Do not recover in ways that create redundant stack versions.
       bool would_merge = false;
       for (unsigned j = 0; j < previous_version_count; j++) {
         if (
@@ -1092,9 +1108,9 @@ static void ts_parser__recover(TSParser *self, StackVersion version, Subtree loo
           break;
         }
       }
-
       if (would_merge) continue;
 
+      // Do not recover if the result would clearly be worse than some existing stack version.
       unsigned new_cost =
         current_error_cost +
         entry.depth * ERROR_COST_PER_SKIPPED_TREE +
@@ -1102,6 +1118,8 @@ static void ts_parser__recover(TSParser *self, StackVersion version, Subtree loo
         (position.extent.row - entry.position.extent.row) * ERROR_COST_PER_SKIPPED_LINE;
       if (ts_parser__better_version_exists(self, version, false, new_cost)) break;
 
+      // If the current lookahead token is valid in some previous state, recover to that state.
+      // Then stop looking for further recoveries.
       if (ts_language_has_actions(self->language, entry.state, ts_subtree_symbol(lookahead))) {
         if (ts_parser__recover_to_state(self, version, depth, entry.state)) {
           did_recover = true;
@@ -1113,18 +1131,27 @@ static void ts_parser__recover(TSParser *self, StackVersion version, Subtree loo
     }
   }
 
+  // In the process of attemping to recover, some stack versions may have been created
+  // and subsequently halted. Remove those versions.
   for (unsigned i = previous_version_count; i < ts_stack_version_count(self->stack); i++) {
     if (!ts_stack_is_active(self->stack, i)) {
       ts_stack_remove_version(self->stack, i--);
     }
   }
 
+  // If strategy 1 succeeded, a new stack version will have been created which is able to handle
+  // the current lookahead token. Now, in addition, try strategy 2 described above: skip the
+  // current lookahead token by wrapping it in an ERROR node.
+
+  // Don't pursue this additional strategy if there are already too many stack versions.
   if (did_recover && ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) {
     ts_stack_halt(self->stack, version);
     ts_subtree_release(&self->tree_pool, lookahead);
     return;
   }
 
+  // If the parser is still in the error state at the end of the file, just wrap everything
+  // in an ERROR node and terminate.
   if (ts_subtree_is_eof(lookahead)) {
     LOG("recover_eof");
     SubtreeArray children = array_new();
@@ -1134,17 +1161,19 @@ static void ts_parser__recover(TSParser *self, StackVersion version, Subtree loo
     return;
   }
 
+  // Do not recover if the result would clearly be worse than some existing stack version.
   unsigned new_cost =
     current_error_cost + ERROR_COST_PER_SKIPPED_TREE +
     ts_subtree_total_bytes(lookahead) * ERROR_COST_PER_SKIPPED_CHAR +
     ts_subtree_total_size(lookahead).extent.row * ERROR_COST_PER_SKIPPED_LINE;
-
   if (ts_parser__better_version_exists(self, version, false, new_cost)) {
     ts_stack_halt(self->stack, version);
     ts_subtree_release(&self->tree_pool, lookahead);
     return;
   }
 
+  // If the current lookahead token is an extra token, mark it as extra. This means it won't
+  // be counted in error cost calculations.
   unsigned n;
   const TSParseAction *actions = ts_language_actions(self->language, 1, ts_subtree_symbol(lookahead), &n);
   if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].params.extra) {
@@ -1153,6 +1182,7 @@ static void ts_parser__recover(TSParser *self, StackVersion version, Subtree loo
     lookahead = ts_subtree_from_mut(mutable_lookahead);
   }
 
+  // Wrap the lookahead token in an ERROR.
   LOG("skip_token symbol:%s", TREE_NAME(lookahead));
   SubtreeArray children = array_new();
   array_reserve(&children, 1);
@@ -1165,10 +1195,25 @@ static void ts_parser__recover(TSParser *self, StackVersion version, Subtree loo
     self->language
   );
 
+  // If other tokens have already been skipped, so there is already an ERROR at the top of the
+  // stack, then pop that ERROR off the stack and wrap the two ERRORs together into one larger
+  // ERROR.
   if (node_count_since_error > 0) {
     StackSliceArray pop = ts_stack_pop_count(self->stack, version, 1);
-    assert(pop.size == 1);
-    assert(pop.contents[0].subtrees.size == 1);
+
+    // TODO: Figure out how to make this condition occur.
+    // See https://github.com/atom/atom/issues/18450#issuecomment-439579778
+    // If multiple stack versions have merged at this point, just pick one of the errors
+    // arbitrarily and discard the rest.
+    if (pop.size > 1) {
+      for (unsigned i = 1; i < pop.size; i++) {
+        ts_subtree_array_delete(&self->tree_pool, &pop.contents[i].subtrees);
+      }
+      while (ts_stack_version_count(self->stack) > pop.contents[0].version + 1) {
+        ts_stack_remove_version(self->stack, pop.contents[0].version + 1);
+      }
+    }
+
     ts_stack_renumber_version(self->stack, pop.contents[0].version, version);
     array_push(&pop.contents[0].subtrees, ts_subtree_from_mut(error_repeat));
     error_repeat = ts_subtree_new_node(
@@ -1180,8 +1225,8 @@ static void ts_parser__recover(TSParser *self, StackVersion version, Subtree loo
     );
   }
 
+  // Push the new ERROR onto the stack.
   ts_stack_push(self->stack, version, ts_subtree_from_mut(error_repeat), false, ERROR_STATE);
-
   if (ts_subtree_has_external_tokens(lookahead)) {
     ts_stack_set_last_external_token(
       self->stack, version, ts_subtree_last_external_token(lookahead)
@@ -1232,7 +1277,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_
           TSStateId next_state;
           if (action.params.extra) {
 
-            // TODO remove when TREE_SITTER_LANGUAGE_VERSION 9 is out.
+            // TODO: remove when TREE_SITTER_LANGUAGE_VERSION 9 is out.
             if (state == ERROR_STATE) continue;
 
             next_state = state;