Use LexTableBuilder to detect conflicts between tokens more correctly

2017-03-06 09:47:00 -08:00 · 2017-03-06 09:47:00 -08:00 · 64e9230071
commit 64e9230071
parent abf8a4f2c2
11 changed files with 203 additions and 309 deletions
--- a/project.gyp
+++ b/project.gyp
@ -11,13 +11,12 @@
        'externals/json-parser',
      ],
      'sources': [
-        'src/compiler/build_tables/build_lex_table.cc',
        'src/compiler/build_tables/build_parse_table.cc',
        'src/compiler/build_tables/build_tables.cc',
-        'src/compiler/build_tables/compatible_tokens.cc',
        'src/compiler/build_tables/lex_item.cc',
        'src/compiler/build_tables/lex_item_transitions.cc',
        'src/compiler/build_tables/lex_conflict_manager.cc',
+        'src/compiler/build_tables/lex_table_builder.cc',
        'src/compiler/build_tables/lookahead_set.cc',
        'src/compiler/build_tables/parse_item.cc',
        'src/compiler/build_tables/parse_item_set_builder.cc',
--- a/spec/compiler/build_tables/compatible_tokens_spec.cc
+++ b/spec/compiler/build_tables/compatible_tokens_spec.cc
@ -1,38 +0,0 @@
-#include "spec_helper.h"
-#include "compiler/rules/character_set.h"
-#include "compiler/build_tables/compatible_tokens.h"
-#include "compiler/lexical_grammar.h"
-#include "helpers/rule_helpers.h"
-#include "helpers/stream_methods.h"
-#include "compiler/rules.h"
-
-using namespace rules;
-using namespace build_tables;
-
-START_TEST
-
-describe("recovery_tokens(rule)", []() {
-  it("includes rules that can only begin and end with an explicit set of characters", [&]() {
-    LexicalGrammar grammar;
-
-    grammar.separators = {
-      character({ ' ' }),
-    };
-
-    grammar.variables = {
-      LexicalVariable{"var0", VariableTypeNamed, character({}, false), false},
-      LexicalVariable{"var1", VariableTypeNamed, seq({
-        character({ 'a', 'b' }),
-        character({}, false),
-        character({ 'c', 'd' }),
-      }), false},
-    };
-
-    AssertThat(
-      get_compatible_tokens(grammar).recovery_tokens,
-      Equals<set<Symbol>>({ Symbol(1, Symbol::Terminal) })
-    );
-  });
-});
-
-END_TEST
--- a/spec/compiler/build_tables/lex_conflict_manager_spec.cc
+++ b/spec/compiler/build_tables/lex_conflict_manager_spec.cc
@ -20,6 +20,10 @@ describe("LexConflictManager::resolve(new_action, old_action)", []() {
  Symbol sym4(3, Symbol::Terminal);
  LexItemSet item_set({ LexItem(sym4, blank() )});

+  before_each([&]() {
+    conflict_manager = LexConflictManager();
+  });
+
  it("favors advance actions over empty accept token actions", [&]() {
    update = conflict_manager.resolve(item_set, AdvanceAction(2, {0, 0}, true), AcceptTokenAction());
    AssertThat(update, IsTrue());
@ -65,6 +69,7 @@ describe("LexConflictManager::resolve(new_action, old_action)", []() {
  describe("advance/accept-token conflicts", [&]() {
    describe("when the token to accept has higher precedence", [&]() {
      it("prefers the accept-token action", [&]() {
+        AssertThat(conflict_manager.possible_extensions, IsEmpty());
        update = conflict_manager.resolve(item_set, AdvanceAction(1, { 1, 2 }, true), AcceptTokenAction(sym3, 3, true));
        AssertThat(update, IsFalse());
        AssertThat(conflict_manager.possible_extensions, IsEmpty());
@ -72,13 +77,9 @@ describe("LexConflictManager::resolve(new_action, old_action)", []() {
    });

    describe("when the token to accept does not have a higher precedence", [&]() {
-      it("favors the advance action", [&]() {
+      it("favors the advance action and adds the in-progress tokens as possible extensions of the discarded token", [&]() {
        update = conflict_manager.resolve(item_set, AdvanceAction(1, { 1, 2 }, true), AcceptTokenAction(sym3, 2, true));
        AssertThat(update, IsTrue());
-      });
-
-      it("adds the in-progress tokens as possible extensions of the discarded token", [&]() {
-        conflict_manager.resolve(item_set, AdvanceAction(1, { 1, 2 }, true), AcceptTokenAction(sym3, 3, true));
        AssertThat(conflict_manager.possible_extensions[sym3.index], Contains(sym4.index));
      });
    });
--- a/src/compiler/build_tables/build_lex_table.h
+++ b/src/compiler/build_tables/build_lex_table.h
@ -1,18 +0,0 @@
-#ifndef COMPILER_BUILD_TABLES_BUILD_LEX_TABLE_H_
-#define COMPILER_BUILD_TABLES_BUILD_LEX_TABLE_H_
-
-#include "compiler/lex_table.h"
-
-namespace tree_sitter {
-
-struct LexicalGrammar;
-struct ParseTable;
-
-namespace build_tables {
-
-LexTable build_lex_table(ParseTable *, const LexicalGrammar &);
-
-}  // namespace build_tables
-}  // namespace tree_sitter
-
-#endif  // COMPILER_BUILD_TABLES_BUILD_LEX_TABLE_H_
--- a/src/compiler/build_tables/build_parse_table.cc
+++ b/src/compiler/build_tables/build_parse_table.cc
@ -12,7 +12,7 @@
 #include "compiler/syntax_grammar.h"
 #include "compiler/rules/symbol.h"
 #include "compiler/rules/built_in_symbols.h"
-#include "compiler/build_tables/compatible_tokens.h"
+#include "compiler/build_tables/lex_table_builder.h"

 namespace tree_sitter {
 namespace build_tables {
@ -40,7 +40,7 @@ class ParseTableBuilder {
  set<string> conflicts;
  ParseItemSetBuilder item_set_builder;
  set<const Production *> fragile_productions;
-  CompatibleTokensResult compatible_tokens;
+  vector<set<Symbol::Index>> incompatible_token_indices_by_index;
  bool allow_any_conflict;

 public:
@ -49,7 +49,6 @@ class ParseTableBuilder {
      : grammar(grammar),
        lexical_grammar(lex_grammar),
        item_set_builder(grammar, lex_grammar),
-        compatible_tokens(get_compatible_tokens(lex_grammar)),
        allow_any_conflict(false) {}

  pair<ParseTable, CompileError> build() {
@ -76,7 +75,7 @@ class ParseTableBuilder {
      return { parse_table, error };
    }

-    update_unmergable_token_pairs();
+    compute_unmergable_token_pairs();

    build_error_parse_state();

@ -112,8 +111,18 @@ class ParseTableBuilder {
  void build_error_parse_state() {
    ParseState error_state;

-    for (const Symbol symbol : compatible_tokens.recovery_tokens) {
-      add_out_of_context_parse_state(&error_state, symbol);
+    for (Symbol::Index i = 0; i < lexical_grammar.variables.size(); i++) {
+      bool has_non_reciprocal_conflict = false;
+      for (Symbol::Index incompatible_index : incompatible_token_indices_by_index[i]) {
+        if (!incompatible_token_indices_by_index[incompatible_index].count(i)) {
+          has_non_reciprocal_conflict = true;
+          break;
+        }
+      }
+
+      if (!has_non_reciprocal_conflict) {
+        add_out_of_context_parse_state(&error_state, Symbol(i, Symbol::Terminal));
+      }
    }

    for (const Symbol &symbol : grammar.extra_tokens) {
@ -294,20 +303,29 @@ class ParseTableBuilder {
    }
  }

-  void update_unmergable_token_pairs() {
-    for (const ParseState &state : parse_table.states) {
-      for (Symbol::Index token_index = 0, token_count = lexical_grammar.variables.size(); token_index < token_count; token_index++) {
-        Symbol token(token_index, Symbol::Terminal);
-        if (state.terminal_entries.count(token)) {
-          auto &incompatible_token_indices = compatible_tokens.unmergeable_pairs[token_index];
-          auto iter = incompatible_token_indices.begin();
-          while (iter != incompatible_token_indices.end()) {
-            if (state.terminal_entries.count(Symbol(*iter, Symbol::NonTerminal))) {
-              iter = incompatible_token_indices.erase(iter);
-            } else {
-              ++iter;
-            }
-          }
+  void compute_unmergable_token_pairs() {
+    incompatible_token_indices_by_index.resize(lexical_grammar.variables.size());
+
+    // First, assume that all tokens are mutually incompatible.
+    for (Symbol::Index i = 0, n = lexical_grammar.variables.size(); i < n; i++) {
+      auto &incompatible_indices = incompatible_token_indices_by_index[i];
+      for (Symbol::Index j = 0; j < n; j++) {
+        if (j != i) incompatible_indices.insert(j);
+      }
+    }
+
+    // For the remaining possibly-incompatible pairs of tokens, check if they
+    // are actually incompatible by actually generating lexical states that
+    // contain them both.
+    auto lex_table_builder = LexTableBuilder::create(lexical_grammar);
+    for (Symbol::Index i = 0, n = lexical_grammar.variables.size(); i < n; i++) {
+      auto &incompatible_indices = incompatible_token_indices_by_index[i];
+      auto iter = incompatible_indices.begin();
+      while (iter != incompatible_indices.end()) {
+        if (lex_table_builder->detect_conflict(i, *iter)) {
+          ++iter;
+        } else {
+          iter = incompatible_indices.erase(iter);
        }
      }
    }
@ -403,17 +421,15 @@ class ParseTableBuilder {
    for (auto &entry : state.terminal_entries) {
      Symbol lookahead = entry.first;
      const vector<ParseAction> &actions = entry.second.actions;
-      auto &incompatible_token_indices = compatible_tokens.unmergeable_pairs[lookahead.index];
+      auto &incompatible_token_indices = incompatible_token_indices_by_index[lookahead.index];

      const auto &other_entry = other.terminal_entries.find(lookahead);
      if (other_entry == other.terminal_entries.end()) {
+        if (lookahead.is_external()) return false;
        if (!lookahead.is_built_in()) {
-          if (!compatible_tokens.recovery_tokens.count(lookahead))
-            return false;
          for (Symbol::Index incompatible_index : incompatible_token_indices) {
-            if (other.terminal_entries.count(Symbol(incompatible_index, Symbol::Terminal))) {
-              return false;
-            }
+            Symbol incompatible_symbol(incompatible_index, Symbol::Terminal);
+            if (other.terminal_entries.count(incompatible_symbol)) return false;
          }
        }
        if (actions.back().type != ParseActionTypeReduce)
@ -430,16 +446,14 @@ class ParseTableBuilder {
    for (auto &entry : other.terminal_entries) {
      Symbol lookahead = entry.first;
      const vector<ParseAction> &actions = entry.second.actions;
-      auto &incompatible_token_indices = compatible_tokens.unmergeable_pairs[lookahead.index];
+      auto &incompatible_token_indices = incompatible_token_indices_by_index[lookahead.index];

      if (!state.terminal_entries.count(lookahead)) {
+        if (lookahead.is_external()) return false;
        if (!lookahead.is_built_in()) {
-          if (!compatible_tokens.recovery_tokens.count(lookahead))
-            return false;
          for (Symbol::Index incompatible_index : incompatible_token_indices) {
-            if (state.terminal_entries.count(Symbol(incompatible_index, Symbol::Terminal))) {
-              return false;
-            }
+            Symbol incompatible_symbol(incompatible_index, Symbol::Terminal);
+            if (state.terminal_entries.count(incompatible_symbol)) return false;
          }
        }
        if (actions.back().type != ParseActionTypeReduce)
--- a/src/compiler/build_tables/build_tables.cc
+++ b/src/compiler/build_tables/build_tables.cc
@ -1,6 +1,6 @@
 #include "compiler/build_tables/build_tables.h"
 #include <tuple>
-#include "compiler/build_tables/build_lex_table.h"
+#include "compiler/build_tables/lex_table_builder.h"
 #include "compiler/build_tables/build_parse_table.h"
 #include "compiler/syntax_grammar.h"
 #include "compiler/lexical_grammar.h"
@ -16,12 +16,12 @@ using std::make_tuple;

 tuple<ParseTable, LexTable, CompileError> build_tables(
  const SyntaxGrammar &grammar,
-  const LexicalGrammar &lex_grammar
+  const LexicalGrammar &lexical_grammar
 ) {
-  auto parse_table_result = build_parse_table(grammar, lex_grammar);
+  auto parse_table_result = build_parse_table(grammar, lexical_grammar);
  ParseTable parse_table = parse_table_result.first;
  const CompileError error = parse_table_result.second;
-  LexTable lex_table = build_lex_table(&parse_table, lex_grammar);
+  LexTable lex_table = LexTableBuilder::create(lexical_grammar)->build(&parse_table);
  return make_tuple(parse_table, lex_table, error);
 }

--- a/src/compiler/build_tables/compatible_tokens.cc
+++ b/src/compiler/build_tables/compatible_tokens.cc
@ -1,136 +0,0 @@
-#include "compiler/build_tables/compatible_tokens.h"
-#include "compiler/lexical_grammar.h"
-#include "compiler/rules/choice.h"
-#include "compiler/rules/character_set.h"
-#include "compiler/rules/repeat.h"
-#include "compiler/rules/visitor.h"
-#include "compiler/rules/seq.h"
-#include "compiler/rules/metadata.h"
-
-namespace tree_sitter {
-namespace build_tables {
-
-using rules::Symbol;
-using std::set;
-
-template <bool left, bool right>
-class CharacterAggregator : public rules::RuleFn<void> {
-  void apply_to(const rules::Seq *rule) {
-    if (left) apply(rule->left);
-    if (right) apply(rule->right);
-  }
-
-  void apply_to(const rules::Choice *rule) {
-    for (const rule_ptr &element : rule->elements) {
-      apply(element);
-    }
-  }
-
-  void apply_to(const rules::Repeat *rule) {
-    apply(rule->content);
-  }
-
-  void apply_to(const rules::Metadata *rule) {
-    apply(rule->rule);
-  }
-
-  void apply_to(const rules::CharacterSet *rule) {
-    result.add_set(*rule);
-  }
-
- public:
-  rules::CharacterSet result;
-};
-
-template <bool left, bool right>
-class CharacterIntersector : public rules::RuleFn<bool> {
-  bool apply_to(const rules::Seq *rule) {
-    bool result = false;
-    if (left) result = apply(rule->left);
-    if (right && !result) result = apply(rule->right);
-    return result;
-  }
-
-  bool apply_to(const rules::Choice *rule) {
-    for (const rule_ptr &element : rule->elements) {
-      if (apply(element)) return true;
-    }
-    return false;
-  }
-
-  bool apply_to(const rules::Repeat *rule) {
-    return apply(rule->content);
-  }
-
-  bool apply_to(const rules::Metadata *rule) {
-    return apply(rule->rule);
-  }
-
-  bool apply_to(const rules::CharacterSet *rule) {
-    return character_set->intersects(*rule);
-  }
-
- public:
-  rules::CharacterSet *character_set;
-
-  CharacterIntersector(rules::CharacterSet *set) : character_set {set} {}
-};
-
-using FirstCharacters = CharacterAggregator<true, false>;
-using LastCharacters = CharacterAggregator<false, true>;
-using AllCharacters = CharacterAggregator<true, true>;
-using FirstCharactersIntersector = CharacterIntersector<true, false>;
-
-CompatibleTokensResult get_compatible_tokens(const LexicalGrammar &grammar) {
-  CompatibleTokensResult result;
-  result.unmergeable_pairs.resize(grammar.variables.size());
-
-  AllCharacters all_separator_characters;
-  for (const rule_ptr &separator : grammar.separators)
-    all_separator_characters.apply(separator);
-
-  for (size_t i = 0; i < grammar.variables.size(); i++) {
-    Symbol symbol(i, Symbol::Terminal);
-    const LexicalVariable &variable = grammar.variables[i];
-    rule_ptr rule = variable.rule;
-
-    FirstCharacters first_characters;
-    first_characters.apply(rule);
-
-    LastCharacters last_characters;
-    last_characters.apply(rule);
-
-    AllCharacters all_characters;
-    all_characters.apply(rule);
-
-    bool has_distinct_start =
-      !first_characters.result.includes_all &&
-      !first_characters.result.intersects(all_separator_characters.result);
-
-    bool has_distinct_end =
-      !last_characters.result.includes_all &&
-      !last_characters.result.intersects(all_separator_characters.result);
-
-    bool has_separators =
-      all_characters.result.intersects(all_separator_characters.result);
-
-    if ((has_distinct_start && has_distinct_end) || !has_separators)
-      result.recovery_tokens.insert(symbol);
-
-    for (size_t j = 0; j < i; j++) {
-      const LexicalVariable &other_variable = grammar.variables[j];
-      if (has_separators) {
-        FirstCharactersIntersector intersector(&first_characters.result);
-        if (intersector.apply(other_variable.rule)) {
-          result.unmergeable_pairs[i].insert(j);
-          result.unmergeable_pairs[j].insert(i);
-        }
-      }
-    }
-  }
-
-  return result;
-}
-
-}  // namespace build_tables
-}  // namespace tree_sitter
--- a/src/compiler/build_tables/compatible_tokens.h
+++ b/src/compiler/build_tables/compatible_tokens.h
@ -1,26 +0,0 @@
-#ifndef COMPILER_BUILD_TABLES_COMPATIBLE_TOKENS_H_
-#define COMPILER_BUILD_TABLES_COMPATIBLE_TOKENS_H_
-
-#include "compiler/rule.h"
-#include "compiler/rules/symbol.h"
-#include <vector>
-#include <set>
-#include <unordered_set>
-
-namespace tree_sitter {
-
-struct LexicalGrammar;
-
-namespace build_tables {
-
-struct CompatibleTokensResult {
-  std::set<rules::Symbol> recovery_tokens;
-  std::vector<std::unordered_set<rules::Symbol::Index>> unmergeable_pairs;
-};
-
-CompatibleTokensResult get_compatible_tokens(const LexicalGrammar &);
-
-}  // namespace build_tables
-}  // namespace tree_sitter
-
-#endif  // COMPILER_BUILD_TABLES_COMPATIBLE_TOKENS_H_
--- a/src/compiler/build_tables/lex_conflict_manager.cc
+++ b/src/compiler/build_tables/lex_conflict_manager.cc
@ -10,11 +10,10 @@ namespace build_tables {
 bool LexConflictManager::resolve(const LexItemSet &item_set,
                                 const AdvanceAction &new_action,
                                 const AcceptTokenAction &old_action) {
-  if (!old_action.is_present())
-    return true;
  if (new_action.precedence_range.max >= old_action.precedence) {
-    for (const LexItem &item : item_set.entries)
+    for (const LexItem &item : item_set.entries) {
      possible_extensions[old_action.symbol.index].insert(item.lhs.index);
+    }
    return true;
  } else {
    return false;
@ -23,30 +22,26 @@ bool LexConflictManager::resolve(const LexItemSet &item_set,

 bool LexConflictManager::resolve(const AcceptTokenAction &new_action,
                                 const AcceptTokenAction &old_action) {
-  if (!old_action.is_present())
-    return true;
-
-  int old_precedence = old_action.precedence;
-  int new_precedence = new_action.precedence;
-
  bool result;
-  if (new_precedence > old_precedence)
+  if (new_action.precedence > old_action.precedence) {
    result = true;
-  else if (new_precedence < old_precedence)
+  } else if (new_action.precedence < old_action.precedence) {
    result = false;
-  else if (new_action.is_string && !old_action.is_string)
+  } else if (new_action.is_string && !old_action.is_string) {
    result = true;
-  else if (old_action.is_string && !new_action.is_string)
+  } else if (old_action.is_string && !new_action.is_string) {
    result = false;
-  else if (new_action.symbol.index < old_action.symbol.index)
+  } else if (new_action.symbol.index < old_action.symbol.index) {
    result = true;
-  else
+  } else {
    result = false;
+  }

-  if (result)
+  if (result) {
    possible_homonyms[old_action.symbol.index].insert(new_action.symbol.index);
-  else
+  } else {
    possible_homonyms[new_action.symbol.index].insert(old_action.symbol.index);
+  }

  return result;
 }
--- a/src/compiler/build_tables/lex_table_builder.cc
+++ b/src/compiler/build_tables/lex_table_builder.cc
@ -1,4 +1,4 @@
-#include "compiler/build_tables/build_lex_table.h"
+#include "compiler/build_tables/lex_table_builder.h"
 #include <climits>
 #include <map>
 #include <set>
@ -16,15 +16,18 @@
 #include "compiler/rules/repeat.h"
 #include "compiler/rules/seq.h"
 #include "compiler/rules/blank.h"
+#include "compiler/rules/visitor.h"

 namespace tree_sitter {
 namespace build_tables {

 using std::map;
+using std::pair;
 using std::set;
 using std::string;
 using std::vector;
 using std::unordered_map;
+using std::unique_ptr;
 using rules::Blank;
 using rules::Choice;
 using rules::CharacterSet;
@ -33,37 +36,74 @@ using rules::Symbol;
 using rules::Metadata;
 using rules::Seq;

-class LexTableBuilder {
+class StartingCharacterAggregator : public rules::RuleFn<void> {
+  void apply_to(const rules::Seq *rule) {
+    apply(rule->left);
+  }
+
+  void apply_to(const rules::Choice *rule) {
+    for (const rule_ptr &element : rule->elements) apply(element);
+  }
+
+  void apply_to(const rules::Repeat *rule) {
+    apply(rule->content);
+  }
+
+  void apply_to(const rules::Metadata *rule) {
+    apply(rule->rule);
+  }
+
+  void apply_to(const rules::CharacterSet *rule) {
+    result.add_set(*rule);
+  }
+
+ public:
+  CharacterSet result;
+};
+
+class LexTableBuilderImpl : public LexTableBuilder {
  LexTable lex_table;
-  ParseTable *parse_table;
-  const LexicalGrammar lex_grammar;
+  const LexicalGrammar grammar;
  vector<rule_ptr> separator_rules;
+  CharacterSet first_separator_characters;
  LexConflictManager conflict_manager;
  unordered_map<LexItemSet, LexStateId> lex_state_ids;

 public:
-  LexTableBuilder(ParseTable *parse_table, const LexicalGrammar &lex_grammar)
-      : parse_table(parse_table), lex_grammar(lex_grammar) {
-    for (const rule_ptr &rule : lex_grammar.separators)
+  vector<bool> shadowed_token_indices;
+
+  LexTableBuilderImpl(const LexicalGrammar &grammar) : grammar(grammar) {
+    StartingCharacterAggregator starting_character_aggregator;
+    for (const rule_ptr &rule : grammar.separators) {
      separator_rules.push_back(Repeat::build(rule));
+      starting_character_aggregator.apply(rule);
+    }
    separator_rules.push_back(Blank::build());
+    first_separator_characters = starting_character_aggregator.result;
+    shadowed_token_indices.resize(grammar.variables.size());
  }

-  LexTable build() {
-    for (ParseState &parse_state : parse_table->states)
-      add_lex_state_for_parse_state(&parse_state);
-
-    mark_fragile_tokens();
-    remove_duplicate_lex_states();
-
+  LexTable build(ParseTable *parse_table) {
+    for (ParseState &parse_state : parse_table->states) {
+      parse_state.lex_state_id = add_lex_state(
+        item_set_for_terminals(parse_state.terminal_entries)
+      );
+    }
+    mark_fragile_tokens(parse_table);
+    remove_duplicate_lex_states(parse_table);
    return lex_table;
  }

- private:
-  void add_lex_state_for_parse_state(ParseState *parse_state) {
-    parse_state->lex_state_id = add_lex_state(
-      item_set_for_terminals(parse_state->terminal_entries)
-    );
+  bool detect_conflict(Symbol::Index left, Symbol::Index right) {
+    clear();
+
+    map<Symbol, ParseTableEntry> terminals;
+    terminals[Symbol(left, Symbol::Terminal)];
+    terminals[Symbol(right, Symbol::Terminal)];
+
+    add_lex_state(item_set_for_terminals(terminals));
+
+    return shadowed_token_indices[right];
  }

  LexStateId add_lex_state(const LexItemSet &item_set) {
@ -80,6 +120,13 @@ class LexTableBuilder {
    }
  }

+  void clear() {
+    lex_table.states.clear();
+    lex_state_ids.clear();
+    shadowed_token_indices.assign(grammar.variables.size(), false);
+  }
+
+ private:
  void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
    for (const auto &pair : item_set.transitions()) {
      const CharacterSet &characters = pair.first;
@ -87,11 +134,28 @@ class LexTableBuilder {

      AdvanceAction action(-1, transition.precedence, transition.in_main_token);
      auto current_action = lex_table.states[state_id].accept_action;
-      if (conflict_manager.resolve(transition.destination, action,
-                                   current_action)) {
-        action.state_index = add_lex_state(transition.destination);
-        lex_table.states[state_id].advance_actions[characters] = action;
+      if (current_action.is_present()) {
+        bool prefer_advancing = conflict_manager.resolve(transition.destination, action, current_action);
+        bool matches_accepted_token = false;
+        for (const LexItem &item : transition.destination.entries) {
+          if (item.lhs == current_action.symbol) {
+            matches_accepted_token = true;
+          } else if (!transition.in_main_token && !item.lhs.is_built_in() && !prefer_advancing) {
+            shadowed_token_indices[item.lhs.index] = true;
+          }
+        }
+
+        if (!matches_accepted_token && characters.intersects(first_separator_characters)) {
+          shadowed_token_indices[current_action.symbol.index] = true;
+        }
+
+        if (!prefer_advancing) {
+          continue;
+        }
      }
+
+      action.state_index = add_lex_state(transition.destination);
+      lex_table.states[state_id].advance_actions[characters] = action;
    }
  }

@ -101,16 +165,21 @@ class LexTableBuilder {
      if (completion_status.is_done) {
        AcceptTokenAction action(item.lhs, completion_status.precedence.max,
                                 item.lhs.is_built_in() ||
-                                 lex_grammar.variables[item.lhs.index].is_string);
+                                 grammar.variables[item.lhs.index].is_string);

        auto current_action = lex_table.states[state_id].accept_action;
-        if (conflict_manager.resolve(action, current_action))
-          lex_table.states[state_id].accept_action = action;
+        if (current_action.is_present()) {
+          if (!conflict_manager.resolve(action, current_action)) {
+            continue;
+          }
+        }
+
+        lex_table.states[state_id].accept_action = action;
      }
    }
  }

-  void mark_fragile_tokens() {
+  void mark_fragile_tokens(ParseTable *parse_table) {
    for (ParseState &state : parse_table->states) {
      for (auto &entry : state.terminal_entries) {
        Symbol symbol = entry.first;
@ -138,7 +207,7 @@ class LexTableBuilder {
    }
  }

-  void remove_duplicate_lex_states() {
+  void remove_duplicate_lex_states(ParseTable *parse_table) {
    for (LexState &state : lex_table.states) {
      state.accept_action.is_string = false;
      state.accept_action.precedence = 0;
@ -229,7 +298,7 @@ class LexTableBuilder {
    if (symbol == rules::END_OF_INPUT())
      return { CharacterSet().include(0).copy() };

-    rule_ptr rule = lex_grammar.variables[symbol.index].rule;
+    rule_ptr rule = grammar.variables[symbol.index].rule;

    auto choice = rule->as<Choice>();
    if (choice)
@ -239,8 +308,16 @@ class LexTableBuilder {
  }
 };

-LexTable build_lex_table(ParseTable *table, const LexicalGrammar &grammar) {
-  return LexTableBuilder(table, grammar).build();
+unique_ptr<LexTableBuilder> LexTableBuilder::create(const LexicalGrammar &grammar) {
+  return unique_ptr<LexTableBuilder>(new LexTableBuilderImpl(grammar));
+}
+
+LexTable LexTableBuilder::build(ParseTable *parse_table) {
+  return static_cast<LexTableBuilderImpl *>(this)->build(parse_table);
+}
+
+bool LexTableBuilder::detect_conflict(Symbol::Index left, Symbol::Index right) {
+  return static_cast<LexTableBuilderImpl *>(this)->detect_conflict(left, right);
 }

 }  // namespace build_tables
--- a/src/compiler/build_tables/lex_table_builder.h
+++ b/src/compiler/build_tables/lex_table_builder.h
@ -0,0 +1,26 @@
+#ifndef COMPILER_BUILD_TABLES_LEX_TABLE_BUILDER_H_
+#define COMPILER_BUILD_TABLES_LEX_TABLE_BUILDER_H_
+
+#include <memory>
+#include "compiler/lex_table.h"
+
+namespace tree_sitter {
+
+struct ParseTable;
+struct LexicalGrammar;
+
+namespace build_tables {
+
+class LexTableBuilder {
+ public:
+  static std::unique_ptr<LexTableBuilder> create(const LexicalGrammar &);
+  LexTable build(ParseTable *);
+  bool detect_conflict(rules::Symbol::Index, rules::Symbol::Index);
+ protected:
+  LexTableBuilder() = default;
+};
+
+}  // namespace build_tables
+}  // namespace tree_sitter
+
+#endif  // COMPILER_BUILD_TABLES_LEX_TABLE_BUILDER_H_