Merge pull request #14 from maxbrunsfeld/more-incremental-fixes

Fix more errors in incremental subtree reuse
2015-12-10 21:30:48 -08:00 · 2015-12-10 21:30:48 -08:00 · 8747e2d3b9
commit 8747e2d3b9
parent dc28a10bf6 da2dfd703a
30 changed files with 41066 additions and 31483 deletions
--- a/include/tree_sitter/parser.h
+++ b/include/tree_sitter/parser.h
@ -34,7 +34,7 @@ typedef struct TSLexer {
  void (*start_token_fn)(struct TSLexer *);
  bool (*advance_fn)(struct TSLexer *, TSStateId);
  TSTree *(*accept_fn)(struct TSLexer *, TSSymbol, TSSymbolMetadata,
-                       const char *);
+                       const char *, bool fragile);

  const char *chunk;
  size_t chunk_start;
@ -107,9 +107,13 @@ struct TSLanguage {
    GO_TO_STATE(state_index);              \
  }

+#define ACCEPT_FRAGILE_TOKEN(symbol)                                 \
+  return lexer->accept_fn(lexer, symbol, ts_symbol_metadata[symbol], \
+                          ts_symbol_names[symbol], true);
+
 #define ACCEPT_TOKEN(symbol)                                         \
  return lexer->accept_fn(lexer, symbol, ts_symbol_metadata[symbol], \
-                          ts_symbol_names[symbol]);
+                          ts_symbol_names[symbol], false);

 #define LEX_ERROR()                      \
  if (error_mode) {                      \
--- a/script/util/run_tests.sh
+++ b/script/util/run_tests.sh
@ -27,7 +27,7 @@ function run_tests {
  local cmd="out/Debug/${target}"
  shift

-  while getopts "df:ghpv" option; do
+  while getopts "df:s:ghpv" option; do
    case ${option} in
      h)
        usage
@ -48,6 +48,9 @@ function run_tests {
      v)
        args+=("--reporter=spec")
        ;;
+      s)
+        export TREE_SITTER_SEED=${OPTARG}
+        ;;
    esac
  done

--- a/spec/compiler/build_tables/item_set_closure_spec.cc
+++ b/spec/compiler/build_tables/item_set_closure_spec.cc
@ -14,23 +14,23 @@ describe("item_set_closure", []() {
    SyntaxGrammar grammar{{
      SyntaxVariable("rule0", VariableTypeNamed, {
        Production({
-          {Symbol(1), 0, AssociativityNone, 100},
-          {Symbol(11, true), 0, AssociativityNone, 101},
+          {Symbol(1), 0, AssociativityNone},
+          {Symbol(11, true), 0, AssociativityNone},
        }),
      }),
      SyntaxVariable("rule1", VariableTypeNamed, {
        Production({
-          {Symbol(12, true), 0, AssociativityNone, 102},
-          {Symbol(13, true), 0, AssociativityNone, 103},
+          {Symbol(12, true), 0, AssociativityNone},
+          {Symbol(13, true), 0, AssociativityNone},
        }),
        Production({
-          {Symbol(2), 0, AssociativityNone, 104},
+          {Symbol(2), 0, AssociativityNone},
        })
      }),
      SyntaxVariable("rule2", VariableTypeNamed, {
        Production({
-          {Symbol(14, true), 0, AssociativityNone, 105},
-          {Symbol(15, true), 0, AssociativityNone, 106},
+          {Symbol(14, true), 0, AssociativityNone},
+          {Symbol(15, true), 0, AssociativityNone},
        })
      }),
    }, {}, {}};
@ -70,14 +70,14 @@ describe("item_set_closure", []() {
    SyntaxGrammar grammar{{
      SyntaxVariable("rule0", VariableTypeNamed, {
        Production({
-          {Symbol(1), 0, AssociativityNone, 100},
-          {Symbol(11, true), 0, AssociativityNone, 101},
+          {Symbol(1), 0, AssociativityNone},
+          {Symbol(11, true), 0, AssociativityNone},
        }),
      }),
      SyntaxVariable("rule1", VariableTypeNamed, {
        Production({
-          {Symbol(12, true), 0, AssociativityNone, 102},
-          {Symbol(13, true), 0, AssociativityNone, 103},
+          {Symbol(12, true), 0, AssociativityNone},
+          {Symbol(13, true), 0, AssociativityNone},
        }),
        Production({})
      }),
--- a/spec/compiler/build_tables/lex_conflict_manager_spec.cc
+++ b/spec/compiler/build_tables/lex_conflict_manager_spec.cc
@ -38,6 +38,11 @@ describe("LexConflictManager::resolve(new_action, old_action)", []() {
        update = conflict_manager.resolve(LexAction::Accept(sym1, 2, false), LexAction::Accept(sym2, 1, false));
        AssertThat(update, IsTrue());
      });
+
+      it("adds the discarded token to the 'fragile tokens' set", [&]() {
+        update = conflict_manager.resolve(LexAction::Accept(sym2, 1, false), LexAction::Accept(sym1, 2, false));
+        AssertThat(conflict_manager.fragile_tokens, Contains(sym2));
+      });
    });

    describe("when one token is string-based and the other is regexp-based", [&]() {
--- a/spec/compiler/build_tables/parse_item_spec.cc
+++ b/spec/compiler/build_tables/parse_item_spec.cc
@ -12,15 +12,15 @@ describe("ParseItem::completion_status()", [&]() {
  SyntaxGrammar grammar{{
    SyntaxVariable("rule_0", VariableTypeNamed, {
      Production({
-        {Symbol(11, true), 0, AssociativityNone, 101},
-        {Symbol(12, true), 0, AssociativityNone, 102},
-        {Symbol(13), 0, AssociativityNone, 103},
-        {Symbol(14, true), 4, AssociativityLeft, 104},
+        {Symbol(11, true), 0, AssociativityNone},
+        {Symbol(12, true), 0, AssociativityNone},
+        {Symbol(13), 0, AssociativityNone},
+        {Symbol(14, true), 4, AssociativityLeft},
      }),
      Production({
-        {Symbol(15, true), 0, AssociativityNone, 101},
-        {Symbol(16, true), 0, AssociativityNone, 102},
-        {Symbol(17, true), 5, AssociativityRight, 104},
+        {Symbol(15, true), 0, AssociativityNone},
+        {Symbol(16, true), 0, AssociativityNone},
+        {Symbol(17, true), 5, AssociativityRight},
      }),
      Production({}),
    }),
@ -57,26 +57,26 @@ describe("ParseItemSet::transitions())", [&]() {
  SyntaxGrammar grammar{{
    SyntaxVariable("rule_0", VariableTypeNamed, {
      Production({
-        {Symbol(11, true), 0, AssociativityNone, 101},
-        {Symbol(12, true), 0, AssociativityNone, 102},
-        {Symbol(13), 5, AssociativityNone, 103},
-        {Symbol(14, true), 0, AssociativityNone, 104},
+        {Symbol(11, true), 0, AssociativityNone},
+        {Symbol(12, true), 0, AssociativityNone},
+        {Symbol(13), 5, AssociativityNone},
+        {Symbol(14, true), 0, AssociativityNone},
      }),
      Production({
-        {Symbol(11, true), 0, AssociativityNone, 105},
-        {Symbol(12, true), 0, AssociativityNone, 106},
-        {Symbol(15), 6, AssociativityNone, 107},
+        {Symbol(11, true), 0, AssociativityNone},
+        {Symbol(12, true), 0, AssociativityNone},
+        {Symbol(15), 6, AssociativityNone},
      })
    }),
    SyntaxVariable("rule_1", VariableTypeNamed, {
      Production({
-        {Symbol(15), 7, AssociativityNone, 109},
-        {Symbol(16, true), 0, AssociativityNone, 110},
+        {Symbol(15), 7, AssociativityNone},
+        {Symbol(16, true), 0, AssociativityNone},
      })
    }),
    SyntaxVariable("rule_2", VariableTypeNamed, {
      Production({
-        {Symbol(18, true), 0, AssociativityNone, 111},
+        {Symbol(18, true), 0, AssociativityNone},
      })
    })
  }, {}, {}};
--- a/spec/compiler/prepare_grammar/flatten_grammar_spec.cc
+++ b/spec/compiler/prepare_grammar/flatten_grammar_spec.cc
@ -87,14 +87,6 @@ describe("flatten_grammar", []() {
    });
  };

-  auto get_rule_id_sequences = [&](vector<Production> productions) {
-    return collect(productions, [](Production p) {
-      return collect(p, [](ProductionStep e) {
-        return e.rule_id;
-      });
-    });
-  };
-
  it("preserves the names and types of the grammar's variables", [&]() {
    AssertThat(grammar.variables[0].name, Equals("variable0"));
    AssertThat(grammar.variables[1].name, Equals("variable1"));
@ -159,21 +151,6 @@ describe("flatten_grammar", []() {
        { none, AssociativityLeft, AssociativityLeft, none, none }
      })));
  });
-
-  it("associates each unique remaining subsequence of symbols and precedences with a rule_id", [&]() {
-    // Variable 0: only the last symbol is the same for both productions.
-    auto variable0_step_ids = get_rule_id_sequences(grammar.variables[0].productions);
-    AssertThat(variable0_step_ids[0][0], !Equals(variable0_step_ids[1][0]));
-    AssertThat(variable0_step_ids[0][1], !Equals(variable0_step_ids[1][1]));
-    AssertThat(variable0_step_ids[0][2],  Equals(variable0_step_ids[1][2]));
-
-    // Variable 1: the last *two* symbols are the same for both productions.
-    auto variable1_step_ids = get_rule_id_sequences(grammar.variables[1].productions);
-    AssertThat(variable1_step_ids[0][0], !Equals(variable1_step_ids[1][0]));
-    AssertThat(variable1_step_ids[0][1], !Equals(variable1_step_ids[1][1]));
-    AssertThat(variable1_step_ids[0][4],  Equals(variable1_step_ids[1][3]));
-    AssertThat(variable1_step_ids[0][5],  Equals(variable1_step_ids[1][4]));
-  });
 });

 END_TEST
--- a/spec/fixtures/parsers/arithmetic.c
+++ b/spec/fixtures/parsers/arithmetic.c
@ -479,12 +479,12 @@ static const TSParseAction *ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
        [sym_comment] = ACTIONS(SHIFT_EXTRA()),
    },
    [17] = {
-        [anon_sym_PLUS] = ACTIONS(REDUCE_FRAGILE(sym_exponent, 3)),
-        [anon_sym_DASH] = ACTIONS(REDUCE_FRAGILE(sym_exponent, 3)),
-        [anon_sym_STAR] = ACTIONS(REDUCE_FRAGILE(sym_exponent, 3)),
-        [anon_sym_SLASH] = ACTIONS(REDUCE_FRAGILE(sym_exponent, 3)),
-        [anon_sym_CARET] = ACTIONS(REDUCE_FRAGILE(sym_exponent, 3)),
-        [anon_sym_RPAREN] = ACTIONS(REDUCE_FRAGILE(sym_exponent, 3)),
+        [anon_sym_PLUS] = ACTIONS(REDUCE(sym_exponent, 3)),
+        [anon_sym_DASH] = ACTIONS(REDUCE(sym_exponent, 3)),
+        [anon_sym_STAR] = ACTIONS(REDUCE(sym_exponent, 3)),
+        [anon_sym_SLASH] = ACTIONS(REDUCE(sym_exponent, 3)),
+        [anon_sym_CARET] = ACTIONS(REDUCE(sym_exponent, 3)),
+        [anon_sym_RPAREN] = ACTIONS(REDUCE(sym_exponent, 3)),
        [sym_comment] = ACTIONS(SHIFT_EXTRA()),
    },
    [18] = {
@ -598,12 +598,12 @@ static const TSParseAction *ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
        [sym_comment] = ACTIONS(SHIFT_EXTRA()),
    },
    [28] = {
-        [ts_builtin_sym_end] = ACTIONS(REDUCE_FRAGILE(sym_exponent, 3)),
-        [anon_sym_PLUS] = ACTIONS(REDUCE_FRAGILE(sym_exponent, 3)),
-        [anon_sym_DASH] = ACTIONS(REDUCE_FRAGILE(sym_exponent, 3)),
-        [anon_sym_STAR] = ACTIONS(REDUCE_FRAGILE(sym_exponent, 3)),
-        [anon_sym_SLASH] = ACTIONS(REDUCE_FRAGILE(sym_exponent, 3)),
-        [anon_sym_CARET] = ACTIONS(REDUCE_FRAGILE(sym_exponent, 3)),
+        [ts_builtin_sym_end] = ACTIONS(REDUCE(sym_exponent, 3)),
+        [anon_sym_PLUS] = ACTIONS(REDUCE(sym_exponent, 3)),
+        [anon_sym_DASH] = ACTIONS(REDUCE(sym_exponent, 3)),
+        [anon_sym_STAR] = ACTIONS(REDUCE(sym_exponent, 3)),
+        [anon_sym_SLASH] = ACTIONS(REDUCE(sym_exponent, 3)),
+        [anon_sym_CARET] = ACTIONS(REDUCE(sym_exponent, 3)),
        [sym_comment] = ACTIONS(SHIFT_EXTRA()),
    },
    [29] = {
--- a/spec/fixtures/parsers/c.c
+++ b/spec/fixtures/parsers/c.c
--- a/spec/fixtures/parsers/cpp.c
+++ b/spec/fixtures/parsers/cpp.c
--- a/spec/fixtures/parsers/golang.c
+++ b/spec/fixtures/parsers/golang.c
--- a/spec/fixtures/parsers/javascript.c
+++ b/spec/fixtures/parsers/javascript.c
--- a/spec/runtime/language_specs.cc
+++ b/spec/runtime/language_specs.cc
@ -1,5 +1,7 @@
 #include "runtime/runtime_spec_helper.h"
 #include <functional>
+#include <set>
+#include <utility>
 #include "runtime/length.h"
 #include "runtime/helpers/read_test_entries.h"
 #include "runtime/helpers/spy_input.h"
@ -13,44 +15,6 @@ extern "C" const TSLanguage *ts_language_golang();
 extern "C" const TSLanguage *ts_language_c();
 extern "C" const TSLanguage *ts_language_cpp();

-void expect_the_correct_tree(TSNode node, TSDocument *doc, string tree_string) {
-  const char *node_string = ts_node_string(node, doc);
-  AssertThat(node_string, Equals(tree_string));
-  free((void *)node_string);
-}
-
-void expect_a_consistent_tree(TSNode node, TSDocument *doc) {
-  size_t child_count = ts_node_child_count(node);
-  size_t start = ts_node_start_char(node);
-  size_t end = ts_node_end_char(node);
-  TSPoint start_point = ts_node_start_point(node);
-  TSPoint end_point = ts_node_end_point(node);
-
-  bool has_changes = ts_node_has_changes(node);
-  bool some_child_has_changes = false;
-
-  for (size_t i = 0; i < child_count; i++) {
-    TSNode child = ts_node_child(node, i);
-    size_t child_start = ts_node_start_char(child);
-    size_t child_end = ts_node_end_char(child);
-    TSPoint child_start_point = ts_node_start_point(child);
-    TSPoint child_end_point = ts_node_end_point(child);
-
-    AssertThat(child_start, IsGreaterThan(start) || Equals(start));
-    AssertThat(child_end, IsLessThan(end) || Equals(end));
-    AssertThat(child_start_point, IsGreaterThan(start_point) || Equals(start_point));
-    AssertThat(child_end_point, IsLessThan(end_point) || Equals(end_point));
-
-    if (ts_node_has_changes(child))
-      some_child_has_changes = true;
-  }
-
-  if (child_count > 0)
-    AssertThat(has_changes, Equals(some_child_has_changes));
-}
-
-START_TEST
-
 map<string, const TSLanguage *> languages({
  {"json", ts_language_json()},
  {"arithmetic", ts_language_arithmetic()},
@ -60,22 +24,102 @@ map<string, const TSLanguage *> languages({
  {"cpp", ts_language_cpp()},
 });

+void expect_the_correct_tree(TSNode node, TSDocument *doc, string tree_string) {
+  const char *node_string = ts_node_string(node, doc);
+  AssertThat(node_string, Equals(tree_string));
+  free((void *)node_string);
+}
+
+void expect_a_consistent_tree(TSNode node, TSDocument *doc) {
+  size_t child_count = ts_node_child_count(node);
+  size_t start_char = ts_node_start_char(node);
+  size_t end_char = ts_node_end_char(node);
+  TSPoint start_point = ts_node_start_point(node);
+  TSPoint end_point = ts_node_end_point(node);
+  bool has_changes = ts_node_has_changes(node);
+  bool some_child_has_changes = false;
+
+  AssertThat(start_char, !IsGreaterThan(end_char));
+  AssertThat(start_point, !IsGreaterThan(end_point));
+
+  size_t last_child_end_char = 0;
+  TSPoint last_child_end_point = {0, 0};
+
+  for (size_t i = 0; i < child_count; i++) {
+    TSNode child = ts_node_child(node, i);
+    size_t child_start_char = ts_node_start_char(child);
+    size_t child_end_char = ts_node_end_char(child);
+    TSPoint child_start_point = ts_node_start_point(child);
+    TSPoint child_end_point = ts_node_end_point(child);
+
+    if (i > 0) {
+      AssertThat(child_start_char, !IsLessThan(last_child_end_char));
+      AssertThat(child_start_point, !IsLessThan(last_child_end_point));
+      last_child_end_char = child_end_char;
+      last_child_end_point = child_end_point;
+    }
+
+    AssertThat(child_start_char, !IsLessThan(start_char));
+    AssertThat(child_end_char, !IsGreaterThan(end_char));
+    AssertThat(child_start_point, !IsLessThan(start_point));
+    AssertThat(child_end_point, !IsGreaterThan(end_point));
+
+    expect_a_consistent_tree(child, doc);
+
+    if (ts_node_has_changes(child))
+      some_child_has_changes = true;
+  }
+
+  if (child_count > 0)
+    AssertThat(has_changes, Equals(some_child_has_changes));
+}
+
+string random_string(char min, char max) {
+  string result;
+  size_t length = random() % 12;
+  for (size_t i = 0; i < length; i++) {
+    char inserted_char = min + (random() % (max - min));
+    result += inserted_char;
+  }
+  return result;
+}
+
+string random_char(string characters) {
+  size_t index = random() % characters.size();
+  return string() + characters[index];
+}
+
+string random_words(size_t count) {
+  string result;
+  bool just_inserted_word = false;
+  for (size_t i = 0; i < count; i++) {
+    if (random() % 10 < 6) {
+      result += random_char("!(){}[]<>+-=");
+    } else {
+      if (just_inserted_word)
+        result += " ";
+      result += random_string('a', 'z');
+      just_inserted_word = true;
+    }
+  }
+  return result;
+}
+
+START_TEST
+
 describe("Languages", [&]() {
-  TSDocument *doc;
-
-  before_each([&]() {
-    doc = ts_document_make();
-  });
-
-  after_each([&]() {
-    ts_document_free(doc);
-  });
-
  for (const auto &pair : languages) {
    describe(("The " + pair.first + " parser").c_str(), [&]() {
+      TSDocument *doc;
+
      before_each([&]() {
+        doc = ts_document_make();
        ts_document_set_language(doc, pair.second);
-        // ts_document_set_debugger(doc, log_debugger_make(false));
+        // ts_document_set_debugger(doc, log_debugger_make(true));
+      });
+
+      after_each([&]() {
+        ts_document_free(doc);
      });

      for (auto &entry : test_entries_for_language(pair.first)) {
@ -97,49 +141,57 @@ describe("Languages", [&]() {
          ts_document_parse(doc);
        });

-        srand(2);
+        std::set<std::pair<size_t, size_t>> deletions;
+        std::set<std::pair<size_t, string>> insertions;

-        for (int i = 0; i < 5; i++) {
-          size_t edit_position = rand() % entry.input.size();
-          size_t deletion_amount = rand() % (entry.input.size() - edit_position);
-          string pos_string = to_string(edit_position);
+        for (size_t i = 0; i < 50; i++) {
+          size_t edit_position = random() % entry.input.size();
+          size_t deletion_size = random() % (entry.input.size() - edit_position);
+          string inserted_text = random_words(random() % 4 + 1);

-          it_handles_edit_sequence("repairing an inserted error at " + pos_string, [&]() {
-            ts_document_edit(doc, input->replace(edit_position, 0, "%^&*"));
-            ts_document_parse(doc);
+          if (insertions.insert({edit_position, inserted_text}).second) {
+            string description = "\"" + inserted_text + "\" at " + to_string(edit_position);

-            ts_document_edit(doc, input->undo());
-            ts_document_parse(doc);
-          });
+            it_handles_edit_sequence("repairing an insertion of " + description, [&]() {
+              ts_document_edit(doc, input->replace(edit_position, 0, inserted_text));
+              ts_document_parse(doc);

-          it_handles_edit_sequence("creating and repairing an inserted error at " + pos_string, [&]() {
-            ts_document_parse(doc);
+              ts_document_edit(doc, input->undo());
+              ts_document_parse(doc);
+            });

-            ts_document_edit(doc, input->replace(edit_position, 0, "%^&*"));
+            it_handles_edit_sequence("performing and repairing an insertion of " + description, [&]() {
+              ts_document_parse(doc);

-            ts_document_parse(doc);
+              ts_document_edit(doc, input->replace(edit_position, 0, inserted_text));
+              ts_document_parse(doc);

-            ts_document_edit(doc, input->undo());
-            ts_document_parse(doc);
-          });
+              ts_document_edit(doc, input->undo());
+              ts_document_parse(doc);
+            });
+          }

-          it_handles_edit_sequence("repairing an errant deletion at " + pos_string, [&]() {
-            ts_document_parse(doc);
+          if (deletions.insert({edit_position, deletion_size}).second) {
+            string desription = to_string(edit_position) + "-" + to_string(edit_position + deletion_size);

-            ts_document_edit(doc, input->replace(edit_position, deletion_amount, ""));
-            ts_document_parse(doc);
+            it_handles_edit_sequence("repairing a deletion of " + desription, [&]() {
+              ts_document_edit(doc, input->replace(edit_position, deletion_size, ""));
+              ts_document_parse(doc);

-            ts_document_edit(doc, input->undo());
-            ts_document_parse(doc);
-          });
+              ts_document_edit(doc, input->undo());
+              ts_document_parse(doc);
+            });

-          it_handles_edit_sequence("creating and repairing an errant deletion at " + pos_string, [&]() {
-            ts_document_edit(doc, input->replace(edit_position, deletion_amount, ""));
-            ts_document_parse(doc);
+            it_handles_edit_sequence("performing and repairing a deletion of " + desription, [&]() {
+              ts_document_parse(doc);

-            ts_document_edit(doc, input->undo());
-            ts_document_parse(doc);
-          });
+              ts_document_edit(doc, input->replace(edit_position, deletion_size, ""));
+              ts_document_parse(doc);
+
+              ts_document_edit(doc, input->undo());
+              ts_document_parse(doc);
+            });
+          }
        }
      }
    });
--- a/spec/runtime/runtime_specs.cc
+++ b/spec/runtime/runtime_specs.cc
@ -1,5 +1,16 @@
 #include "runtime/runtime_spec_helper.h"

 int main(int argc, char *argv[]) {
+  int seed;
+  const char *seed_env = getenv("TREE_SITTER_SEED");
+  if (seed_env) {
+    seed = atoi(seed_env);
+  } else {
+    seed = time(nullptr);
+  }
+
+  printf("Random seed: %d\n", seed);
+  srandom(seed);
+
  return bandit::run(argc, argv);
 }
--- a/src/compiler/build_tables/build_lex_table.cc
+++ b/src/compiler/build_tables/build_lex_table.cc
@ -32,7 +32,7 @@ using rules::Symbol;

 class LexTableBuilder {
  const LexicalGrammar lex_grammar;
-  const LexConflictManager conflict_manager;
+  LexConflictManager conflict_manager;
  ParseTable *parse_table;
  unordered_map<const LexItemSet, LexStateId, LexItemSet::Hash> lex_state_ids;
  LexTable lex_table;
@ -59,6 +59,8 @@ class LexTableBuilder {
      build_lex_item_set(parse_table->all_symbols(), true);
    populate_lex_state(error_item_set, LexTable::ERROR_STATE_ID);

+    mark_fragile_tokens();
+
    return lex_table;
  }

@ -153,6 +155,17 @@ class LexTableBuilder {
      if (item.is_token_start())
        lex_table.state(state_id).is_token_start = true;
  }
+
+  void mark_fragile_tokens() {
+    for (LexState &state : lex_table.states)
+      if (state.default_action.type == LexActionTypeAccept)
+        if (has_fragile_token(state.default_action.symbol))
+          state.default_action.type = LexActionTypeAcceptFragile;
+  }
+
+  bool has_fragile_token(const Symbol &symbol) {
+    return conflict_manager.fragile_tokens.find(symbol) != conflict_manager.fragile_tokens.end();
+  }
 };

 LexTable build_lex_table(ParseTable *table, const LexicalGrammar &grammar) {
--- a/src/compiler/build_tables/build_parse_table.cc
+++ b/src/compiler/build_tables/build_parse_table.cc
@ -37,6 +37,7 @@ class ParseTableBuilder {
  ParseTable parse_table;
  std::set<string> conflicts;
  ParseItemSet null_item_set;
+  std::set<const Production *> fragile_productions;

 public:
  ParseTableBuilder(const SyntaxGrammar &grammar,
@ -48,7 +49,7 @@ class ParseTableBuilder {
  pair<ParseTable, const GrammarError *> build() {
    Symbol start_symbol = Symbol(0, grammar.variables.empty());
    Production start_production({
-      ProductionStep(start_symbol, 0, rules::AssociativityNone, -2),
+      ProductionStep(start_symbol, 0, rules::AssociativityNone),
    });

    add_parse_state(ParseItemSet({
@ -78,6 +79,9 @@ class ParseTableBuilder {
      add_reduce_extra_actions(state);
    }

+    mark_fragile_reductions();
+    remove_duplicate_states();
+
    parse_table.symbols.insert({ rules::ERROR(), {} });

    return { parse_table, nullptr };
@ -153,6 +157,65 @@ class ParseTableBuilder {
    }
  }

+  void mark_fragile_reductions() {
+    for (ParseState &state : parse_table.states) {
+      for (auto &entry : state.actions) {
+        for (ParseAction &action : entry.second) {
+          if (action.type == ParseActionTypeReduce) {
+            if (has_fragile_production(action.production))
+              action.type = ParseActionTypeReduceFragile;
+            action.production = NULL;
+          }
+        }
+      }
+    }
+  }
+
+  void remove_duplicate_states() {
+    bool done = false;
+    while (!done) {
+      done = true;
+      map<ParseStateId, ParseStateId> replacements;
+      for (size_t i = 0, size = parse_table.states.size(); i < size; i++) {
+        for (size_t j = 0; j < i; j++) {
+          if (parse_table.states[i].actions == parse_table.states[j].actions) {
+            replacements.insert({ i, j });
+            done = false;
+            break;
+          }
+        }
+      }
+
+      for (ParseState &state : parse_table.states) {
+        for (auto &entry : state.actions) {
+          for (ParseAction &action : entry.second) {
+            if (action.type == ParseActionTypeShift) {
+              ParseStateId state_index = action.state_index;
+              auto replacement = replacements.find(action.state_index);
+              if (replacement != replacements.end()) {
+                state_index = replacement->second;
+              }
+
+              size_t prior_removed = 0;
+              for (const auto &replacement : replacements) {
+                if (replacement.first >= state_index)
+                  break;
+                prior_removed++;
+              }
+
+              state_index -= prior_removed;
+              action.state_index = state_index;
+            }
+          }
+        }
+      }
+
+      for (auto replacement = replacements.rbegin(); replacement != replacements.rend(); ++replacement) {
+        parse_table.states.erase(parse_table.states.begin() + replacement->first);
+      }
+    }
+  }
+
  ParseAction *add_action(ParseStateId state_id, Symbol lookahead,
                          const ParseAction &new_action,
                          const ParseItemSet &item_set) {
@ -174,18 +237,23 @@ class ParseTableBuilder {
      case ConflictTypeResolved: {
        if (resolution.first) {
          if (old_action.type == ParseActionTypeReduce)
-            parse_table.fragile_productions.insert(old_action.production);
+            fragile_productions.insert(old_action.production);
          return &parse_table.set_action(state_id, lookahead, new_action);
        } else {
          if (new_action.type == ParseActionTypeReduce)
-            parse_table.fragile_productions.insert(new_action.production);
+            fragile_productions.insert(new_action.production);
          break;
        }
      }

      case ConflictTypeUnresolved: {
-        if (handle_unresolved_conflict(item_set, lookahead))
+        if (handle_unresolved_conflict(item_set, lookahead)) {
+          if (old_action.type == ParseActionTypeReduce)
+            fragile_productions.insert(old_action.production);
+          if (new_action.type == ParseActionTypeReduce)
+            fragile_productions.insert(new_action.production);
          return &parse_table.add_action(state_id, lookahead, new_action);
+        }
        break;
      }
    }
@ -315,6 +383,11 @@ class ParseTableBuilder {
      return grammar.variables[symbol.index].name;
    }
  }
+
+  bool has_fragile_production(const Production *production) {
+    auto end = fragile_productions.end();
+    return std::find(fragile_productions.begin(), end, production) != end;
+  }
 };

 pair<ParseTable, const GrammarError *> build_parse_table(
--- a/src/compiler/build_tables/conflict_type.h
+++ b/src/compiler/build_tables/conflict_type.h
@ -0,0 +1,16 @@
+#ifndef COMPILER_BUILD_TABLES_CONFLICT_TYPE_H_
+#define COMPILER_BUILD_TABLES_CONFLICT_TYPE_H_
+
+namespace tree_sitter {
+namespace build_tables {
+
+enum ConflictType {
+  ConflictTypeNone,
+  ConflictTypeResolved,
+  ConflictTypeUnresolved
+};
+
+}  // namespace build_tables
+}  // namespace tree_sitter
+
+#endif  // COMPILER_BUILD_TABLES_CONFLICT_TYPE_H_
--- a/src/compiler/build_tables/lex_conflict_manager.cc
+++ b/src/compiler/build_tables/lex_conflict_manager.cc
@ -10,7 +10,7 @@ LexConflictManager::LexConflictManager(const LexicalGrammar &grammar)
    : grammar(grammar) {}

 bool LexConflictManager::resolve(const LexAction &new_action,
-                                 const LexAction &old_action) const {
+                                 const LexAction &old_action) {
  if (new_action.type < old_action.type)
    return !resolve(old_action, new_action);

@ -24,16 +24,27 @@ bool LexConflictManager::resolve(const LexAction &new_action,
      switch (new_action.type) {
        case LexActionTypeAccept: {
          int new_precedence = new_action.precedence_range.min;
+
+          bool result;
          if (new_precedence > old_precedence)
-            return true;
+            result = true;
          else if (new_precedence < old_precedence)
-            return false;
+            result = false;
          else if (new_action.is_string && !old_action.is_string)
-            return true;
+            result = true;
          else if (old_action.is_string && !new_action.is_string)
-            return false;
+            result = false;
+          else if (new_action.symbol.index < old_action.symbol.index)
+            result = true;
          else
-            return new_action.symbol.index < old_action.symbol.index;
+            result = false;
+
+          if (result)
+            fragile_tokens.insert(old_action.symbol);
+          else
+            fragile_tokens.insert(new_action.symbol);
+
+          return result;
        }

        case LexActionTypeAdvance:
--- a/src/compiler/build_tables/lex_conflict_manager.h
+++ b/src/compiler/build_tables/lex_conflict_manager.h
@ -1,8 +1,10 @@
 #ifndef COMPILER_BUILD_TABLES_LEX_CONFLICT_MANAGER_H_
 #define COMPILER_BUILD_TABLES_LEX_CONFLICT_MANAGER_H_

+#include <set>
 #include "tree_sitter/compiler.h"
 #include "compiler/lexical_grammar.h"
+#include "compiler/rules/symbol.h"

 namespace tree_sitter {

@ -15,7 +17,9 @@ class LexConflictManager {

 public:
  explicit LexConflictManager(const LexicalGrammar &);
-  bool resolve(const LexAction &, const LexAction &) const;
+  bool resolve(const LexAction &, const LexAction &);
+
+  std::set<rules::Symbol> fragile_tokens;
 };

 }  // namespace build_tables
--- a/src/compiler/build_tables/parse_item.cc
+++ b/src/compiler/build_tables/parse_item.cc
@ -22,7 +22,7 @@ ParseItem::ParseItem(const Symbol &lhs, const Production &production,
 bool ParseItem::operator==(const ParseItem &other) const {
  return ((variable_index == other.variable_index) &&
          (step_index == other.step_index) &&
-          (remaining_rule_id() == other.remaining_rule_id()));
+          (production == other.production));
 }

 bool ParseItem::operator<(const ParseItem &other) const {
@ -34,7 +34,7 @@ bool ParseItem::operator<(const ParseItem &other) const {
    return true;
  if (step_index > other.step_index)
    return false;
-  return remaining_rule_id() < other.remaining_rule_id();
+  return production < other.production;
 }

 Symbol ParseItem::lhs() const {
@ -72,21 +72,10 @@ rules::Associativity ParseItem::associativity() const {
    return production->at(step_index).associativity;
 }

-pair<int, int> ParseItem::remaining_rule_id() const {
-  if (production->empty())
-    return { -2, -1 };
-  else if (completion_status().is_done)
-    return { production->back().associativity, production->back().precedence };
-  else
-    return { -1, production->at(step_index).rule_id };
-}
-
 size_t ParseItem::Hash::operator()(const ParseItem &item) const {
  size_t result = hash<int>()(item.variable_index);
  result ^= hash<unsigned int>()(item.step_index);
-  result ^= hash<size_t>()(item.production->size());
-  pair<int, int> id = item.remaining_rule_id();
-  result ^= hash<int>()(id.first) ^ hash<int>()(id.second);
+  result ^= hash<void *>()((void *)item.production);
  return result;
 }

--- a/src/compiler/build_tables/parse_item.h
+++ b/src/compiler/build_tables/parse_item.h
@ -29,7 +29,6 @@ class ParseItem {
  bool operator==(const ParseItem &other) const;
  bool operator<(const ParseItem &other) const;
  rules::Symbol lhs() const;
-  std::pair<int, int> remaining_rule_id() const;
  int precedence() const;
  rules::Associativity associativity() const;
  CompletionStatus completion_status() const;
--- a/src/compiler/generate_code/c_code.cc
+++ b/src/compiler/generate_code/c_code.cc
@ -302,6 +302,9 @@ class CCodeGenerator {
      case LexActionTypeAccept:
        line("ACCEPT_TOKEN(" + symbol_id(action.symbol) + ");");
        break;
+      case LexActionTypeAcceptFragile:
+        line("ACCEPT_FRAGILE_TOKEN(" + symbol_id(action.symbol) + ");");
+        break;
      case LexActionTypeError:
        line("LEX_ERROR();");
        break;
@ -324,13 +327,13 @@ class CCodeGenerator {
        case ParseActionTypeShiftExtra:
          add("SHIFT_EXTRA()");
          break;
+        case ParseActionTypeReduceFragile:
+          add("REDUCE_FRAGILE(" + symbol_id(action.symbol) + ", " +
+              to_string(action.consumed_symbol_count) + ")");
+          break;
        case ParseActionTypeReduce:
-          if (reduce_action_is_fragile(action))
-            add("REDUCE_FRAGILE(" + symbol_id(action.symbol) + ", " +
-                to_string(action.consumed_symbol_count) + ")");
-          else
-            add("REDUCE(" + symbol_id(action.symbol) + ", " +
-                to_string(action.consumed_symbol_count) + ")");
+          add("REDUCE(" + symbol_id(action.symbol) + ", " +
+              to_string(action.consumed_symbol_count) + ")");
          break;
        case ParseActionTypeReduceExtra:
          add("REDUCE_EXTRA(" + symbol_id(action.symbol) + ")");
@ -392,11 +395,6 @@ class CCodeGenerator {
    }
  }

-  bool reduce_action_is_fragile(const ParseAction &action) const {
-    return parse_table.fragile_productions.find(action.production) !=
-           parse_table.fragile_productions.end();
-  }
-
  // C-code generation functions

  void _switch(string condition, function<void()> body) {
--- a/src/compiler/lex_table.h
+++ b/src/compiler/lex_table.h
@ -14,6 +14,7 @@ namespace tree_sitter {
 typedef enum {
  LexActionTypeError,
  LexActionTypeAccept,
+  LexActionTypeAcceptFragile,
  LexActionTypeAdvance
 } LexActionType;

--- a/src/compiler/parse_table.cc
+++ b/src/compiler/parse_table.cc
@ -69,14 +69,13 @@ ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count,
 }

 bool ParseAction::operator==(const ParseAction &other) const {
-  bool types_eq = type == other.type;
-  bool symbols_eq = symbol == other.symbol;
-  bool state_indices_eq = state_index == other.state_index;
-  bool consumed_symbol_counts_eq =
-    consumed_symbol_count == other.consumed_symbol_count;
-  bool precedences_eq = precedence_range == other.precedence_range;
-  return types_eq && symbols_eq && state_indices_eq &&
-         consumed_symbol_counts_eq && precedences_eq;
+  return (
+    type == other.type &&
+    symbol == other.symbol &&
+    state_index == other.state_index &&
+    production == other.production &&
+    consumed_symbol_count == other.consumed_symbol_count
+  );
 }

 bool ParseAction::operator<(const ParseAction &other) const {
@ -92,6 +91,10 @@ bool ParseAction::operator<(const ParseAction &other) const {
    return true;
  if (other.state_index < state_index)
    return false;
+  if (production < other.production)
+    return true;
+  if (other.production < production)
+    return false;
  return consumed_symbol_count < other.consumed_symbol_count;
 }

--- a/src/compiler/parse_table.h
+++ b/src/compiler/parse_table.h
@ -18,6 +18,7 @@ typedef uint64_t ParseStateId;
 typedef enum {
  ParseActionTypeError,
  ParseActionTypeReduceExtra,
+  ParseActionTypeReduceFragile,
  ParseActionTypeShiftExtra,

  ParseActionTypeShift,
@ -97,7 +98,6 @@ class ParseTable {

  std::vector<ParseState> states;
  std::map<rules::Symbol, ParseTableSymbolMetadata> symbols;
-  std::set<const Production *> fragile_productions;
 };

 }  // namespace tree_sitter
--- a/src/compiler/prepare_grammar/flatten_grammar.cc
+++ b/src/compiler/prepare_grammar/flatten_grammar.cc
@ -72,39 +72,6 @@ class FlattenRule : public rules::RuleFn<void> {
  }
 };

-struct ProductionSlice {
-  vector<ProductionStep>::const_iterator start;
-  vector<ProductionStep>::const_iterator end;
-
-  bool operator==(const ProductionSlice &other) const {
-    if (end - start != other.end - other.start)
-      return false;
-    for (auto iter1 = start, iter2 = other.start; iter1 != end; ++iter1, ++iter2)
-      if (!(iter1->symbol == iter2->symbol &&
-            iter1->precedence == iter2->precedence &&
-            iter1->associativity == iter2->associativity))
-        return false;
-    return true;
-  }
-};
-
-void assign_rule_ids(Production *production,
-                     vector<ProductionSlice> *unique_slices) {
-  auto end = production->end();
-
-  for (auto iter = production->begin(); iter != end; ++iter) {
-    ProductionSlice slice{ iter, end };
-    auto existing_id =
-      find(unique_slices->cbegin(), unique_slices->cend(), slice);
-    if (existing_id == unique_slices->end()) {
-      unique_slices->push_back(slice);
-      iter->rule_id = unique_slices->size();
-    } else {
-      iter->rule_id = existing_id - unique_slices->cbegin() + 1;
-    }
-  }
-}
-
 SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &grammar) {
  SyntaxGrammar result;
  result.expected_conflicts = grammar.expected_conflicts;
@ -112,17 +79,15 @@ SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &grammar) {

  for (const Variable &variable : grammar.variables) {
    vector<Production> productions;
-    for (const rule_ptr &rule_component : extract_choices(variable.rule))
-      productions.push_back(FlattenRule().flatten(rule_component));
+    for (const rule_ptr &rule_component : extract_choices(variable.rule)) {
+      Production production = FlattenRule().flatten(rule_component);
+      if (std::find(productions.begin(), productions.end(), production) == productions.end())
+        productions.push_back(production);
+    }
    result.variables.push_back(
      SyntaxVariable(variable.name, variable.type, productions));
  }

-  vector<ProductionSlice> unique_slices;
-  for (SyntaxVariable &variable : result.variables)
-    for (Production &production : variable.productions)
-      assign_rule_ids(&production, &unique_slices);
-
  return result;
 }

--- a/src/compiler/syntax_grammar.cc
+++ b/src/compiler/syntax_grammar.cc
@ -23,19 +23,11 @@ ProductionStep::ProductionStep(const rules::Symbol &symbol, int precedence,
                               rules::Associativity associativity)
    : symbol(symbol),
      precedence(precedence),
-      associativity(associativity),
-      rule_id(0) {}
-
-ProductionStep::ProductionStep(const rules::Symbol &symbol, int precedence,
-                               rules::Associativity associativity, int rule_id)
-    : symbol(symbol),
-      precedence(precedence),
-      associativity(associativity),
-      rule_id(rule_id) {}
+      associativity(associativity) {}

 bool ProductionStep::operator==(const ProductionStep &other) const {
  return symbol == other.symbol && precedence == other.precedence &&
-         rule_id == other.rule_id && associativity == other.associativity;
+         associativity == other.associativity;
 }

 const vector<Production> &SyntaxGrammar::productions(
--- a/src/compiler/syntax_grammar.h
+++ b/src/compiler/syntax_grammar.h
@ -13,13 +13,11 @@ namespace tree_sitter {

 struct ProductionStep {
  ProductionStep(const rules::Symbol &, int, rules::Associativity);
-  ProductionStep(const rules::Symbol &, int, rules::Associativity, int);
  bool operator==(const ProductionStep &) const;

  rules::Symbol symbol;
  int precedence;
  rules::Associativity associativity;
-  int rule_id;
 };

 typedef std::vector<ProductionStep> Production;
--- a/src/runtime/lexer.c
+++ b/src/runtime/lexer.c
@ -85,20 +85,24 @@ static bool ts_lexer__advance(TSLexer *self, TSStateId state) {

 static TSTree *ts_lexer__accept(TSLexer *self, TSSymbol symbol,
                                TSSymbolMetadata metadata,
-                                const char *symbol_name) {
+                                const char *symbol_name, bool fragile) {
  TSLength size =
    ts_length_sub(self->current_position, self->token_start_position);
  TSLength padding =
    ts_length_sub(self->token_start_position, self->token_end_position);
  self->token_end_position = self->current_position;

+  TSTree *result;
  if (symbol == ts_builtin_sym_error) {
    LOG("error_char");
-    return ts_tree_make_error(size, padding, self->lookahead);
+    result = ts_tree_make_error(size, padding, self->lookahead);
  } else {
    LOG("accept_token sym:%s", symbol_name);
-    return ts_tree_make_leaf(symbol, padding, size, metadata);
+    result = ts_tree_make_leaf(symbol, padding, size, metadata);
  }
+
+  result->options.fragile_left = fragile;
+  return result;
 }

 /*
--- a/src/runtime/parser.c
+++ b/src/runtime/parser.c
@ -71,7 +71,10 @@ static void ts_parser__breakdown_top_of_stack(TSParser *self, int head) {
        merged = ts_stack_push(self->stack, pop_result->head_index, state, pop_result->trees[j]);
      }

-      assert(i == 0 || merged);
+      if (i == 0)
+        assert(!merged);
+      else
+        assert(merged);
    }

    free(removed_trees);
@ -140,14 +143,31 @@ static TSTree *ts_parser__get_next_lookahead(TSParser *self, int head) {
      continue;
    }

-    if (ts_tree_has_changes(state->reusable_subtree) ||
-        ts_tree_is_fragile(state->reusable_subtree) ||
-        ts_tree_is_extra(state->reusable_subtree) ||
-        (state->reusable_subtree->child_count > 0 &&
-         !ts_parser__can_reuse(self, head, state->reusable_subtree))) {
-      LOG("breakdown sym:%s", SYM_NAME(state->reusable_subtree->symbol));
-      if (!ts_parser__breakdown_reusable_subtree(state))
+    bool can_reuse = true;
+    if (ts_tree_has_changes(state->reusable_subtree)) {
+      if (state->is_verifying) {
+        ts_parser__breakdown_top_of_stack(self, head);
+        state->is_verifying = false;
+      }
+      LOG("breakdown_changed sym:%s", SYM_NAME(state->reusable_subtree->symbol));
+      can_reuse = false;
+    } else if (ts_tree_is_fragile(state->reusable_subtree)) {
+      LOG("breakdown_fragile sym:%s", SYM_NAME(state->reusable_subtree->symbol));
+      can_reuse = false;
+    } else if (ts_tree_is_extra(state->reusable_subtree)) {
+      LOG("breakdown_extra sym:%s", SYM_NAME(state->reusable_subtree->symbol));
+      can_reuse = false;
+    } else if (state->reusable_subtree->child_count > 0 &&
+         !ts_parser__can_reuse(self, head, state->reusable_subtree)) {
+      LOG("breakdown_unexpected sym:%s", SYM_NAME(state->reusable_subtree->symbol));
+      can_reuse = false;
+    }
+
+    if (!can_reuse) {
+      if (!ts_parser__breakdown_reusable_subtree(state)) {
+        LOG("dont_reuse sym:%s", SYM_NAME(state->reusable_subtree->symbol));
        ts_parser__pop_reusable_subtree(state);
+      }
      continue;
    }

@ -276,7 +296,6 @@ static bool ts_parser__reduce(TSParser *self, int head, TSSymbol symbol,
    if (i > 0) {
      if (symbol == ts_builtin_sym_error) {
        ts_stack_remove_head(self->stack, new_head);
-        free(pop_result->trees);
        continue;
      }

@ -331,7 +350,7 @@ static bool ts_parser__reduce(TSParser *self, int head, TSSymbol symbol,
    }
  }

-  if (ts_stack_head_count(self->stack) > 1) {
+  if (self->is_split || ts_stack_head_count(self->stack) > 1) {
    for (size_t i = 0, size = self->reduce_parents.size; i < size; i++) {
      TSTree **parent = vector_get(&self->reduce_parents, i);
      (*parent)->options.fragile_left = true;
@ -614,6 +633,7 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) {
    TSTree *lookahead = NULL;
    TSLength position = ts_length_zero(), last_position;

+    self->is_split = ts_stack_head_count(self->stack) > 1;
    for (int head = 0; head < ts_stack_head_count(self->stack);) {
      StackEntry *entry = ts_stack_head(self->stack, head);
      last_position = position;
--- a/src/runtime/parser.h
+++ b/src/runtime/parser.h
@ -15,6 +15,7 @@ typedef struct {
  Vector lookahead_states;
  Vector reduce_parents;
  int finished_stack_head;
+  bool is_split;
 } TSParser;

 TSParser ts_parser_make();