Merge compatible starting token states before constructing lex table

2017-09-01 14:22:50 -07:00 · 2017-09-01 14:22:50 -07:00 · 4c9c05806a
commit 4c9c05806a
parent 9d668c5004
5 changed files with 219 additions and 141 deletions
--- a/src/compiler/build_tables/lex_table_builder.cc
+++ b/src/compiler/build_tables/lex_table_builder.cc
@ -34,12 +34,13 @@ using rules::Symbol;
 using rules::Metadata;
 using rules::Seq;

-class StartingCharacterAggregator {
+template <bool is_start>
+class StartOrEndCharacterAggregator {
 public:
  void apply(const Rule &rule) {
    rule.match(
      [this](const Seq &sequence) {
-        apply(*sequence.left);
+        apply(is_start ? *sequence.left : *sequence.right);
      },

      [this](const rules::Choice &rule) {
@ -48,20 +49,9 @@ class StartingCharacterAggregator {
        }
      },

-      [this](const rules::Repeat &rule) {
-        apply(*rule.rule);
-      },
-
-      [this](const rules::Metadata &rule) {
-        apply(*rule.rule);
-      },
-
-      [this](const rules::CharacterSet &rule) {
-        result.add_set(rule);
-      },
-
-      [this](const rules::Blank) {},
-
+      [this](const rules::Repeat &rule) { apply(*rule.rule); },
+      [this](const rules::Metadata &rule) { apply(*rule.rule); },
+      [this](const rules::CharacterSet &rule) { result.add_set(rule); },
      [](auto) {}
    );
  }
@ -69,26 +59,37 @@ class StartingCharacterAggregator {
  CharacterSet result;
 };

+using StartingCharacterAggregator = StartOrEndCharacterAggregator<true>;
+using EndingCharacterAggregator = StartOrEndCharacterAggregator<false>;
+
 class LexTableBuilderImpl : public LexTableBuilder {
  LexTable lex_table;
  const LexicalGrammar grammar;
  vector<Rule> separator_rules;
  LexConflictManager conflict_manager;
  unordered_map<LexItemSet, LexStateId> lex_state_ids;
-
-  map<Symbol::Index, CharacterSet> following_characters_by_token_index;
-  vector<set<Symbol>> incompatible_tokens_by_token_index;
  CharacterSet separator_start_characters;
-  CharacterSet current_conflict_detection_following_characters;
-  Symbol::Index current_conflict_detection_token_index;
-  bool current_conflict_value;
+  vector<CharacterSet> starting_characters_by_token;
+  vector<CharacterSet> following_characters_by_token;
+  vector<set<Symbol>> shadowed_tokens_by_token;
+  const vector<LookaheadSet> &coincident_tokens_by_token;
+  vector<bool> conflict_status_by_token;
+  bool conflict_detection_mode;

 public:
  LexTableBuilderImpl(const SyntaxGrammar &syntax_grammar,
                      const LexicalGrammar &lexical_grammar,
-                      const vector<set<Symbol::Index>> &following_tokens_by_token_index) :
-    grammar(lexical_grammar),
-    incompatible_tokens_by_token_index(lexical_grammar.variables.size()) {
+                      const vector<LookaheadSet> &following_tokens_by_token,
+                      const vector<LookaheadSet> &coincident_tokens)
+    : grammar(lexical_grammar),
+      starting_characters_by_token(lexical_grammar.variables.size()),
+      following_characters_by_token(lexical_grammar.variables.size()),
+      shadowed_tokens_by_token(lexical_grammar.variables.size()),
+      coincident_tokens_by_token(coincident_tokens),
+      conflict_detection_mode(false) {
+
+    // Compute the possible separator rules and the set of separator characters that can occur
+    // immediately after any token.
    StartingCharacterAggregator separator_character_aggregator;
    for (const auto &rule : grammar.separators) {
      separator_rules.push_back(Repeat{rule});
@ -96,34 +97,84 @@ class LexTableBuilderImpl : public LexTableBuilder {
    }
    separator_rules.push_back(Blank{});
    separator_start_characters = separator_character_aggregator.result;
-    clear();

+    // Compute the set of characters that each token can start with and the set of non-separator
+    // characters that can follow each token.
    for (unsigned i = 0, n = grammar.variables.size(); i < n; i++) {
-      Symbol token = Symbol::terminal(i);
-      auto &incompatible_indices = incompatible_tokens_by_token_index[i];
+      StartingCharacterAggregator starting_character_aggregator;
+      starting_character_aggregator.apply(grammar.variables[i].rule);
+      starting_characters_by_token[i] = starting_character_aggregator.result;

-      for (unsigned j = 0; j < n; j++) {
-        if (i == j) continue;
-        if (detect_conflict(i, j, following_tokens_by_token_index)) {
-          incompatible_indices.insert(Symbol::terminal(j));
-        }
+      StartingCharacterAggregator following_character_aggregator;
+      following_tokens_by_token[i].for_each([&](Symbol following_token) {
+        following_character_aggregator.apply(grammar.variables[following_token.index].rule);
+      });
+
+      // TODO - Refactor this. In general, a keyword token cannot be followed immediately by
+      // another alphanumeric character. But this requirement is currently not expressed anywhere in
+      // the grammar. So without this hack, we would be overly conservative about merging parse
+      // states because we would often consider `identifier` tokens to *conflict* with keyword
+      // tokens.
+      if (is_keyword(grammar.variables[i])) {
+        following_character_aggregator.result
+          .exclude('a', 'z')
+          .exclude('A', 'Z')
+          .exclude('0', '9')
+          .exclude('_')
+          .exclude('$');
      }

-      for (const ExternalToken &external_token : syntax_grammar.external_tokens) {
-        if (external_token.corresponding_internal_token == token) {
-          for (unsigned j = 0; j < syntax_grammar.external_tokens.size(); j++) {
-            incompatible_indices.insert(Symbol::external(j));
-          }
+      following_characters_by_token[i] = following_character_aggregator.result;
+    }
+
+    // For each pair of tokens, generate a lex table for just those two tokens and record what
+    // conflicts arise.
+    conflict_detection_mode = true;
+    for (Symbol::Index i = 0, n = grammar.variables.size(); i < n; i++) {
+      for (Symbol::Index j = 0; j < i; j++) {
+        if (starting_characters_by_token[i].intersects(starting_characters_by_token[j]) ||
+            starting_characters_by_token[i].intersects(separator_start_characters) ||
+            starting_characters_by_token[j].intersects(separator_start_characters)) {
+          clear();
+          add_lex_state(item_set_for_terminals(LookaheadSet({
+            Symbol::terminal(i),
+            Symbol::terminal(j)
+          })));
+          if (conflict_status_by_token[i]) shadowed_tokens_by_token[j].insert(Symbol::terminal(i));
+          if (conflict_status_by_token[j]) shadowed_tokens_by_token[i].insert(Symbol::terminal(j));
        }
      }
    }
  }

  LexTable build(ParseTable *parse_table) {
+    clear();
+    conflict_detection_mode = false;
+    vector<pair<LookaheadSet, vector<ParseState *>>> starting_token_sets;
+
    for (ParseState &parse_state : parse_table->states) {
-      parse_state.lex_state_id = add_lex_state(
-        item_set_for_terminals(parse_state.terminal_entries)
-      );
+      LookaheadSet token_set;
+      for (auto &entry : parse_state.terminal_entries) {
+        token_set.insert(entry.first);
+      }
+
+      bool did_merge = false;
+      for (auto &pair : starting_token_sets) {
+        if (merge_token_set(&pair.first, token_set)) {
+          did_merge = true;
+          pair.second.push_back(&parse_state);
+          break;
+        }
+      }
+
+      if (!did_merge) starting_token_sets.push_back({token_set, {&parse_state}});
+    }
+
+    for (auto &pair : starting_token_sets) {
+      LexStateId state_id = add_lex_state(item_set_for_terminals(pair.first));
+      for (ParseState *parse_state : pair.second) {
+        parse_state->lex_state_id = state_id;
+      }
    }
    mark_fragile_tokens(parse_table);
    remove_duplicate_lex_states(parse_table);
@ -131,64 +182,17 @@ class LexTableBuilderImpl : public LexTableBuilder {
  }

  const set<Symbol> &get_incompatible_tokens(Symbol::Index index) const {
-    return incompatible_tokens_by_token_index[index];
-  }
-
-  bool detect_conflict(Symbol::Index left, Symbol::Index right,
-                       const vector<set<Symbol::Index>> &following_tokens_by_token_index) {
-    StartingCharacterAggregator left_starting_characters;
-    StartingCharacterAggregator right_starting_characters;
-    left_starting_characters.apply(grammar.variables[left].rule);
-    right_starting_characters.apply(grammar.variables[right].rule);
-    if (!left_starting_characters.result.intersects(right_starting_characters.result) &&
-        !left_starting_characters.result.intersects(separator_start_characters) &&
-        !right_starting_characters.result.intersects(separator_start_characters)) {
-      return false;
-    }
-
-    auto following_characters_entry = following_characters_by_token_index.find(right);
-    if (following_characters_entry == following_characters_by_token_index.end()) {
-      StartingCharacterAggregator aggregator;
-      for (auto following_token_index : following_tokens_by_token_index[right]) {
-        aggregator.apply(grammar.variables[following_token_index].rule);
-      }
-      following_characters_entry =
-        following_characters_by_token_index.insert({right, aggregator.result}).first;
-
-      // TODO - Refactor this. In general, a keyword token cannot be followed immediately by
-      // another alphanumeric character. But this requirement is currently not expressed anywhere in
-      // the grammar. So without this hack, we would be overly conservative about merging parse
-      // states because we would often consider `identifier` tokens to *conflict* with keyword
-      // tokens.
-      if (is_keyword(grammar.variables[right])) {
-        following_characters_entry->second
-          .exclude('a', 'z')
-          .exclude('A', 'Z')
-          .exclude('0', '9')
-          .exclude('_')
-          .exclude('$');
-      }
-    }
-
-    current_conflict_detection_token_index = right;
-    current_conflict_detection_following_characters = following_characters_entry->second;
-    add_lex_state(item_set_for_terminals({{Symbol::terminal(left), {}}, {Symbol::terminal(right), {}}}));
-    bool result = current_conflict_value;
-    clear();
-    return result;
+    return shadowed_tokens_by_token[index];
  }

+ private:
  bool is_keyword(const LexicalVariable &variable) {
-    return variable.is_string && iswalpha(get_last_character(variable.rule));
-  }
-
-  static uint32_t get_last_character(const Rule &rule) {
-    return rule.match(
-      [](const Seq &sequence) { return get_last_character(*sequence.right); },
-      [](const rules::CharacterSet &rule) { return *rule.included_chars.begin(); },
-      [](const rules::Metadata &rule) { return get_last_character(*rule.rule); },
-      [](auto) { return 0; }
-    );
+    EndingCharacterAggregator aggregator;
+    aggregator.apply(variable.rule);
+    return
+      !aggregator.result.includes_all &&
+      aggregator.result.included_chars.size() == 1 &&
+      iswalpha(*aggregator.result.included_chars.begin());
  }

  LexStateId add_lex_state(const LexItemSet &item_set) {
@ -208,11 +212,9 @@ class LexTableBuilderImpl : public LexTableBuilder {
  void clear() {
    lex_table.states.clear();
    lex_state_ids.clear();
-    current_conflict_detection_following_characters = CharacterSet();
-    current_conflict_value = false;
+    conflict_status_by_token = vector<bool>(grammar.variables.size(), false);
  }

- private:
  void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
    for (const auto &pair : item_set.transitions()) {
      const CharacterSet &characters = pair.first;
@ -221,23 +223,28 @@ class LexTableBuilderImpl : public LexTableBuilder {
      AdvanceAction action(-1, transition.precedence, transition.in_main_token);
      AcceptTokenAction &accept_action = lex_table.states[state_id].accept_action;
      if (accept_action.is_present()) {
-        bool prefer_advancing = conflict_manager.resolve(transition.destination, action, accept_action);
-        bool can_advance_for_accepted_token = false;
-        for (const LexItem &item : transition.destination.entries) {
-          if (item.lhs == accept_action.symbol) {
-            can_advance_for_accepted_token = true;
-          } else if (item.lhs.index == current_conflict_detection_token_index &&
-                     !prefer_advancing && !transition.in_main_token) {
-            current_conflict_value = true;
-          }
-        }
+        bool prefer_advancing = conflict_manager.resolve(
+          transition.destination,
+          action,
+          accept_action
+        );

-        if (accept_action.symbol.index == current_conflict_detection_token_index &&
-            !can_advance_for_accepted_token &&
-            (characters.intersects(separator_start_characters) ||
-             (characters.intersects(current_conflict_detection_following_characters) &&
-              grammar.variables[accept_action.symbol.index].is_string))) {
-          current_conflict_value = true;
+        if (conflict_detection_mode) {
+          bool next_item_set_can_yield_this_token = false;
+          for (const LexItem &item : transition.destination.entries) {
+            if (item.lhs == accept_action.symbol) {
+              next_item_set_can_yield_this_token = true;
+            } else if (!prefer_advancing && !transition.in_main_token) {
+              conflict_status_by_token[item.lhs.index] = true;
+            }
+          }
+
+          if (prefer_advancing &&
+              !next_item_set_can_yield_this_token &&
+              (characters.intersects(following_characters_by_token[accept_action.symbol.index]) ||
+               characters.intersects(separator_start_characters))) {
+            conflict_status_by_token[accept_action.symbol.index] = true;
+          }
        }

        if (!prefer_advancing) continue;
@ -256,10 +263,15 @@ class LexTableBuilderImpl : public LexTableBuilder {
                                 item.lhs.is_built_in() ||
                                 grammar.variables[item.lhs.index].is_string);
        AcceptTokenAction &existing_action = lex_table.states[state_id].accept_action;
-        if (!existing_action.is_present() ||
-            conflict_manager.resolve(action, existing_action)) {
-          lex_table.states[state_id].accept_action = action;
+        if (existing_action.is_present()) {
+          if (conflict_manager.resolve(action, existing_action)) {
+            conflict_status_by_token[existing_action.symbol.index] = true;
+          } else {
+            conflict_status_by_token[action.symbol.index] = true;
+            continue;
+          }
        }
+        lex_table.states[state_id].accept_action = action;
      }
    }
  }
@ -292,6 +304,39 @@ class LexTableBuilderImpl : public LexTableBuilder {
    }
  }

+  bool merge_token_set(LookaheadSet *left, const LookaheadSet &right) const {
+    bool is_compatible = true;
+
+    left->for_each([&](Symbol left_symbol) {
+      if (left_symbol.is_terminal() && !left_symbol.is_built_in() && !right.contains(left_symbol)) {
+        right.for_each([&](Symbol right_symbol) {
+          if (shadowed_tokens_by_token[left_symbol.index].count(right_symbol) ||
+              !coincident_tokens_by_token[left_symbol.index].contains(right_symbol)) {
+            is_compatible = false;
+            return;
+          }
+        });
+      }
+      if (!is_compatible) return;
+    });
+
+    right.for_each([&](Symbol right_symbol) {
+      if (right_symbol.is_terminal() && !right_symbol.is_built_in() && !left->contains(right_symbol)) {
+        left->for_each([&](Symbol left_symbol) {
+          if (shadowed_tokens_by_token[right_symbol.index].count(left_symbol) ||
+              !coincident_tokens_by_token[right_symbol.index].contains(left_symbol)) {
+            is_compatible = false;
+            return;
+          }
+        });
+      }
+      if (!is_compatible) return;
+    });
+
+    if (is_compatible) left->insert_all(right);
+    return is_compatible;
+  }
+
  void remove_duplicate_lex_states(ParseTable *parse_table) {
    for (LexState &state : lex_table.states) {
      state.accept_action.is_string = false;
@ -359,10 +404,9 @@ class LexTableBuilderImpl : public LexTableBuilder {
    }
  }

-  LexItemSet item_set_for_terminals(const map<Symbol, ParseTableEntry> &terminals) {
+  LexItemSet item_set_for_terminals(const LookaheadSet &terminals) {
    LexItemSet result;
-    for (const auto &pair : terminals) {
-      Symbol symbol = pair.first;
+    terminals.for_each([&](Symbol symbol) {
      if (symbol.is_terminal()) {
        for (const auto &rule : rules_for_symbol(symbol)) {
          for (const auto &separator_rule : separator_rules) {
@ -378,7 +422,7 @@ class LexTableBuilderImpl : public LexTableBuilder {
          }
        }
      }
-    }
+    });
    return result;
  }

@ -401,11 +445,13 @@ class LexTableBuilderImpl : public LexTableBuilder {

 unique_ptr<LexTableBuilder> LexTableBuilder::create(const SyntaxGrammar &syntax_grammar,
                                                    const LexicalGrammar &lexical_grammar,
-                                                    const vector<set<Symbol::Index>> &following_tokens) {
+                                                    const vector<LookaheadSet> &following_tokens,
+                                                    const vector<LookaheadSet> &coincident_tokens) {
  return unique_ptr<LexTableBuilder>(new LexTableBuilderImpl(
    syntax_grammar,
    lexical_grammar,
-    following_tokens
+    following_tokens,
+    coincident_tokens
  ));
 }

--- a/src/compiler/build_tables/lex_table_builder.h
+++ b/src/compiler/build_tables/lex_table_builder.h
@ -14,11 +14,14 @@ struct LexicalGrammar;

 namespace build_tables {

+class LookaheadSet;
+
 class LexTableBuilder {
 public:
  static std::unique_ptr<LexTableBuilder> create(const SyntaxGrammar &,
                                                 const LexicalGrammar &,
-                                                 const std::vector<std::set<rules::Symbol::Index>> &);
+                                                 const std::vector<LookaheadSet> &,
+                                                 const std::vector<LookaheadSet> &);
  LexTable build(ParseTable *);
  const std::set<rules::Symbol> &get_incompatible_tokens(rules::Symbol::Index) const;

--- a/src/compiler/build_tables/parse_table_builder.cc
+++ b/src/compiler/build_tables/parse_table_builder.cc
@ -52,7 +52,8 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
  ParseItemSetBuilder item_set_builder;
  unique_ptr<LexTableBuilder> lex_table_builder;
  set<ParseAction> fragile_reductions;
-  vector<set<Symbol::Index>> following_tokens_by_token_index;
+  vector<LookaheadSet> following_tokens_by_token;
+  vector<LookaheadSet> coincident_tokens_by_token;
  bool processing_recovery_states;

 public:
@ -60,8 +61,22 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
    : grammar(syntax_grammar),
      lexical_grammar(lexical_grammar),
      item_set_builder(syntax_grammar, lexical_grammar),
-      following_tokens_by_token_index(lexical_grammar.variables.size()),
-      processing_recovery_states(false) {}
+      following_tokens_by_token(lexical_grammar.variables.size()),
+      coincident_tokens_by_token(lexical_grammar.variables.size()),
+      processing_recovery_states(false) {
+
+    for (unsigned i = 0, n = lexical_grammar.variables.size(); i < n; i++) {
+      coincident_tokens_by_token[i].insert(rules::END_OF_INPUT());
+      if (lexical_grammar.variables[i].is_string) {
+        for (unsigned j = 0; j < i; j++) {
+          if (lexical_grammar.variables[j].is_string) {
+            coincident_tokens_by_token[i].insert(Symbol::terminal(j));
+            coincident_tokens_by_token[j].insert(Symbol::terminal(i));
+          }
+        }
+      }
+    }
+  }

  tuple<ParseTable, LexTable, CompileError> build() {
    // Ensure that the empty rename sequence has index 0.
@ -90,7 +105,8 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
    lex_table_builder = LexTableBuilder::create(
      grammar,
      lexical_grammar,
-      following_tokens_by_token_index
+      following_tokens_by_token,
+      coincident_tokens_by_token
    );

    processing_recovery_states = true;
@ -130,17 +146,18 @@ class ParseTableBuilderImpl : public ParseTableBuilder {

    for (unsigned i = 0; i < lexical_grammar.variables.size(); i++) {
      Symbol token = Symbol::terminal(i);
-      bool has_non_reciprocal_conflict = false;
+      const LexicalVariable &variable = lexical_grammar.variables[i];

+      bool exclude_from_recovery_state = false;
      for (Symbol incompatible_token : lex_table_builder->get_incompatible_tokens(i)) {
-        if (incompatible_token.is_terminal() &&
-            !lex_table_builder->get_incompatible_tokens(incompatible_token.index).count(token)) {
-          has_non_reciprocal_conflict = true;
+        if (!coincident_tokens_by_token[i].contains(incompatible_token) &&
+            ((lexical_grammar.variables[incompatible_token.index].is_string && !variable.is_string) ||
+             !lex_table_builder->get_incompatible_tokens(incompatible_token.index).count(token))) {
+          exclude_from_recovery_state = true;
          break;
        }
      }
-
-      if (!has_non_reciprocal_conflict) {
+      if (!exclude_from_recovery_state) {
        add_out_of_context_parse_state(&error_state, Symbol::terminal(i));
      }
    }
@ -163,8 +180,7 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
    parse_table.states[state_id] = error_state;
  }

-  void add_out_of_context_parse_state(ParseState *error_state,
-                                      const rules::Symbol &symbol) {
+  void add_out_of_context_parse_state(ParseState *error_state, const rules::Symbol &symbol) {
    const ParseItemSet &item_set = recovery_item_sets_by_lookahead[symbol];
    if (!item_set.entries.empty()) {
      ParseStateId state = add_parse_state({}, item_set);
@ -300,6 +316,16 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
      }
    }

+    auto &terminals = state.terminal_entries;
+    for (auto iter = terminals.begin(), end = terminals.end(); iter != end; ++iter) {
+      if (iter->first.is_built_in() || iter->first.is_external()) continue;
+      for (auto other_iter = terminals.begin(); other_iter != iter; ++other_iter) {
+        if (other_iter->first.is_built_in() || other_iter->first.is_external()) continue;
+        coincident_tokens_by_token[iter->first.index].insert(other_iter->first);
+        coincident_tokens_by_token[other_iter->first.index].insert(iter->first);
+      }
+    }
+
    return "";
  }

@ -767,7 +793,7 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
          if (left_symbol.is_terminal() && !left_symbol.is_built_in()) {
            right_tokens.for_each([&](Symbol right_symbol) {
              if (right_symbol.is_terminal() && !right_symbol.is_built_in()) {
-                following_tokens_by_token_index[left_symbol.index].insert(right_symbol.index);
+                following_tokens_by_token[left_symbol.index].insert(right_symbol);
              }
            });
          }
--- a/src/compiler/lex_table.h
+++ b/src/compiler/lex_table.h
@ -16,6 +16,7 @@ struct AdvanceAction {
  AdvanceAction();
  AdvanceAction(size_t, PrecedenceRange, bool);
  bool operator==(const AdvanceAction &other) const;
+  inline bool operator!=(const AdvanceAction &other) const { return !operator==(other); }

  LexStateId state_index;
  PrecedenceRange precedence_range;
@ -26,7 +27,8 @@ struct AcceptTokenAction {
  AcceptTokenAction();
  AcceptTokenAction(rules::Symbol, int, bool);
  bool is_present() const;
-  bool operator==(const AcceptTokenAction &action) const;
+  bool operator==(const AcceptTokenAction &other) const;
+  inline bool operator!=(const AcceptTokenAction &other) const { return !operator==(other); }

  rules::Symbol symbol;
  int precedence;
--- a/src/runtime/parser.c
+++ b/src/runtime/parser.c
@ -234,6 +234,7 @@ static Tree *parser__lex(Parser *self, StackVersion version) {

  bool found_external_token = false;
  bool skipped_error = false;
+  bool error_mode = parse_state == ERROR_STATE;
  int32_t first_error_character = 0;
  Length error_start_position, error_end_position;
  uint32_t last_byte_scanned = start_position.bytes;
@ -260,8 +261,7 @@ static Tree *parser__lex(Parser *self, StackVersion version) {
          self->lexer.token_end_position = self->lexer.current_position;
        }

-        if (lex_mode.lex_state == ERROR_STATE &&
-            self->lexer.token_end_position.bytes <= current_position.bytes) {
+        if (error_mode && self->lexer.token_end_position.bytes <= current_position.bytes) {
          LOG("disregard_empty_token");
        } else {
          found_external_token = true;
@ -291,6 +291,7 @@ static Tree *parser__lex(Parser *self, StackVersion version) {

    if (lex_mode.lex_state != self->language->lex_modes[ERROR_STATE].lex_state) {
      LOG("retry_in_error_mode");
+      error_mode = true;
      lex_mode = self->language->lex_modes[ERROR_STATE];
      valid_external_tokens = ts_language_enabled_external_tokens(
        self->language,