diff --git a/project.gyp b/project.gyp index 5cd7b1d1..8871fc16 100644 --- a/project.gyp +++ b/project.gyp @@ -11,13 +11,12 @@ 'externals/json-parser', ], 'sources': [ - 'src/compiler/build_tables/build_lex_table.cc', 'src/compiler/build_tables/build_parse_table.cc', 'src/compiler/build_tables/build_tables.cc', - 'src/compiler/build_tables/compatible_tokens.cc', 'src/compiler/build_tables/lex_item.cc', 'src/compiler/build_tables/lex_item_transitions.cc', 'src/compiler/build_tables/lex_conflict_manager.cc', + 'src/compiler/build_tables/lex_table_builder.cc', 'src/compiler/build_tables/lookahead_set.cc', 'src/compiler/build_tables/parse_item.cc', 'src/compiler/build_tables/parse_item_set_builder.cc', diff --git a/spec/compiler/build_tables/compatible_tokens_spec.cc b/spec/compiler/build_tables/compatible_tokens_spec.cc deleted file mode 100644 index 30d88d96..00000000 --- a/spec/compiler/build_tables/compatible_tokens_spec.cc +++ /dev/null @@ -1,38 +0,0 @@ -#include "spec_helper.h" -#include "compiler/rules/character_set.h" -#include "compiler/build_tables/compatible_tokens.h" -#include "compiler/lexical_grammar.h" -#include "helpers/rule_helpers.h" -#include "helpers/stream_methods.h" -#include "compiler/rules.h" - -using namespace rules; -using namespace build_tables; - -START_TEST - -describe("recovery_tokens(rule)", []() { - it("includes rules that can only begin and end with an explicit set of characters", [&]() { - LexicalGrammar grammar; - - grammar.separators = { - character({ ' ' }), - }; - - grammar.variables = { - LexicalVariable{"var0", VariableTypeNamed, character({}, false), false}, - LexicalVariable{"var1", VariableTypeNamed, seq({ - character({ 'a', 'b' }), - character({}, false), - character({ 'c', 'd' }), - }), false}, - }; - - AssertThat( - get_compatible_tokens(grammar).recovery_tokens, - Equals>({ Symbol(1, Symbol::Terminal) }) - ); - }); -}); - -END_TEST diff --git a/spec/compiler/build_tables/lex_conflict_manager_spec.cc b/spec/compiler/build_tables/lex_conflict_manager_spec.cc index 3aa75a4c..f7382a74 100644 --- a/spec/compiler/build_tables/lex_conflict_manager_spec.cc +++ b/spec/compiler/build_tables/lex_conflict_manager_spec.cc @@ -20,6 +20,10 @@ describe("LexConflictManager::resolve(new_action, old_action)", []() { Symbol sym4(3, Symbol::Terminal); LexItemSet item_set({ LexItem(sym4, blank() )}); + before_each([&]() { + conflict_manager = LexConflictManager(); + }); + it("favors advance actions over empty accept token actions", [&]() { update = conflict_manager.resolve(item_set, AdvanceAction(2, {0, 0}, true), AcceptTokenAction()); AssertThat(update, IsTrue()); @@ -65,6 +69,7 @@ describe("LexConflictManager::resolve(new_action, old_action)", []() { describe("advance/accept-token conflicts", [&]() { describe("when the token to accept has higher precedence", [&]() { it("prefers the accept-token action", [&]() { + AssertThat(conflict_manager.possible_extensions, IsEmpty()); update = conflict_manager.resolve(item_set, AdvanceAction(1, { 1, 2 }, true), AcceptTokenAction(sym3, 3, true)); AssertThat(update, IsFalse()); AssertThat(conflict_manager.possible_extensions, IsEmpty()); @@ -72,13 +77,9 @@ describe("LexConflictManager::resolve(new_action, old_action)", []() { }); describe("when the token to accept does not have a higher precedence", [&]() { - it("favors the advance action", [&]() { + it("favors the advance action and adds the in-progress tokens as possible extensions of the discarded token", [&]() { update = conflict_manager.resolve(item_set, AdvanceAction(1, { 1, 2 }, true), AcceptTokenAction(sym3, 2, true)); AssertThat(update, IsTrue()); - }); - - it("adds the in-progress tokens as possible extensions of the discarded token", [&]() { - conflict_manager.resolve(item_set, AdvanceAction(1, { 1, 2 }, true), AcceptTokenAction(sym3, 3, true)); AssertThat(conflict_manager.possible_extensions[sym3.index], Contains(sym4.index)); }); }); diff --git a/src/compiler/build_tables/build_lex_table.h b/src/compiler/build_tables/build_lex_table.h deleted file mode 100644 index 5310591e..00000000 --- a/src/compiler/build_tables/build_lex_table.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef COMPILER_BUILD_TABLES_BUILD_LEX_TABLE_H_ -#define COMPILER_BUILD_TABLES_BUILD_LEX_TABLE_H_ - -#include "compiler/lex_table.h" - -namespace tree_sitter { - -struct LexicalGrammar; -struct ParseTable; - -namespace build_tables { - -LexTable build_lex_table(ParseTable *, const LexicalGrammar &); - -} // namespace build_tables -} // namespace tree_sitter - -#endif // COMPILER_BUILD_TABLES_BUILD_LEX_TABLE_H_ diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index 5fc26c52..1f8a6939 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -12,7 +12,7 @@ #include "compiler/syntax_grammar.h" #include "compiler/rules/symbol.h" #include "compiler/rules/built_in_symbols.h" -#include "compiler/build_tables/compatible_tokens.h" +#include "compiler/build_tables/lex_table_builder.h" namespace tree_sitter { namespace build_tables { @@ -40,7 +40,7 @@ class ParseTableBuilder { set conflicts; ParseItemSetBuilder item_set_builder; set fragile_productions; - CompatibleTokensResult compatible_tokens; + vector> incompatible_token_indices_by_index; bool allow_any_conflict; public: @@ -49,7 +49,6 @@ class ParseTableBuilder { : grammar(grammar), lexical_grammar(lex_grammar), item_set_builder(grammar, lex_grammar), - compatible_tokens(get_compatible_tokens(lex_grammar)), allow_any_conflict(false) {} pair build() { @@ -76,7 +75,7 @@ class ParseTableBuilder { return { parse_table, error }; } - update_unmergable_token_pairs(); + compute_unmergable_token_pairs(); build_error_parse_state(); @@ -112,8 +111,18 @@ class ParseTableBuilder { void build_error_parse_state() { ParseState error_state; - for (const Symbol symbol : compatible_tokens.recovery_tokens) { - add_out_of_context_parse_state(&error_state, symbol); + for (Symbol::Index i = 0; i < lexical_grammar.variables.size(); i++) { + bool has_non_reciprocal_conflict = false; + for (Symbol::Index incompatible_index : incompatible_token_indices_by_index[i]) { + if (!incompatible_token_indices_by_index[incompatible_index].count(i)) { + has_non_reciprocal_conflict = true; + break; + } + } + + if (!has_non_reciprocal_conflict) { + add_out_of_context_parse_state(&error_state, Symbol(i, Symbol::Terminal)); + } } for (const Symbol &symbol : grammar.extra_tokens) { @@ -294,20 +303,29 @@ class ParseTableBuilder { } } - void update_unmergable_token_pairs() { - for (const ParseState &state : parse_table.states) { - for (Symbol::Index token_index = 0, token_count = lexical_grammar.variables.size(); token_index < token_count; token_index++) { - Symbol token(token_index, Symbol::Terminal); - if (state.terminal_entries.count(token)) { - auto &incompatible_token_indices = compatible_tokens.unmergeable_pairs[token_index]; - auto iter = incompatible_token_indices.begin(); - while (iter != incompatible_token_indices.end()) { - if (state.terminal_entries.count(Symbol(*iter, Symbol::NonTerminal))) { - iter = incompatible_token_indices.erase(iter); - } else { - ++iter; - } - } + void compute_unmergable_token_pairs() { + incompatible_token_indices_by_index.resize(lexical_grammar.variables.size()); + + // First, assume that all tokens are mutually incompatible. + for (Symbol::Index i = 0, n = lexical_grammar.variables.size(); i < n; i++) { + auto &incompatible_indices = incompatible_token_indices_by_index[i]; + for (Symbol::Index j = 0; j < n; j++) { + if (j != i) incompatible_indices.insert(j); + } + } + + // For the remaining possibly-incompatible pairs of tokens, check if they + // are actually incompatible by actually generating lexical states that + // contain them both. + auto lex_table_builder = LexTableBuilder::create(lexical_grammar); + for (Symbol::Index i = 0, n = lexical_grammar.variables.size(); i < n; i++) { + auto &incompatible_indices = incompatible_token_indices_by_index[i]; + auto iter = incompatible_indices.begin(); + while (iter != incompatible_indices.end()) { + if (lex_table_builder->detect_conflict(i, *iter)) { + ++iter; + } else { + iter = incompatible_indices.erase(iter); } } } @@ -403,17 +421,15 @@ class ParseTableBuilder { for (auto &entry : state.terminal_entries) { Symbol lookahead = entry.first; const vector &actions = entry.second.actions; - auto &incompatible_token_indices = compatible_tokens.unmergeable_pairs[lookahead.index]; + auto &incompatible_token_indices = incompatible_token_indices_by_index[lookahead.index]; const auto &other_entry = other.terminal_entries.find(lookahead); if (other_entry == other.terminal_entries.end()) { + if (lookahead.is_external()) return false; if (!lookahead.is_built_in()) { - if (!compatible_tokens.recovery_tokens.count(lookahead)) - return false; for (Symbol::Index incompatible_index : incompatible_token_indices) { - if (other.terminal_entries.count(Symbol(incompatible_index, Symbol::Terminal))) { - return false; - } + Symbol incompatible_symbol(incompatible_index, Symbol::Terminal); + if (other.terminal_entries.count(incompatible_symbol)) return false; } } if (actions.back().type != ParseActionTypeReduce) @@ -430,16 +446,14 @@ class ParseTableBuilder { for (auto &entry : other.terminal_entries) { Symbol lookahead = entry.first; const vector &actions = entry.second.actions; - auto &incompatible_token_indices = compatible_tokens.unmergeable_pairs[lookahead.index]; + auto &incompatible_token_indices = incompatible_token_indices_by_index[lookahead.index]; if (!state.terminal_entries.count(lookahead)) { + if (lookahead.is_external()) return false; if (!lookahead.is_built_in()) { - if (!compatible_tokens.recovery_tokens.count(lookahead)) - return false; for (Symbol::Index incompatible_index : incompatible_token_indices) { - if (state.terminal_entries.count(Symbol(incompatible_index, Symbol::Terminal))) { - return false; - } + Symbol incompatible_symbol(incompatible_index, Symbol::Terminal); + if (state.terminal_entries.count(incompatible_symbol)) return false; } } if (actions.back().type != ParseActionTypeReduce) diff --git a/src/compiler/build_tables/build_tables.cc b/src/compiler/build_tables/build_tables.cc index 21ed9f40..de9fa466 100644 --- a/src/compiler/build_tables/build_tables.cc +++ b/src/compiler/build_tables/build_tables.cc @@ -1,6 +1,6 @@ #include "compiler/build_tables/build_tables.h" #include -#include "compiler/build_tables/build_lex_table.h" +#include "compiler/build_tables/lex_table_builder.h" #include "compiler/build_tables/build_parse_table.h" #include "compiler/syntax_grammar.h" #include "compiler/lexical_grammar.h" @@ -16,12 +16,12 @@ using std::make_tuple; tuple build_tables( const SyntaxGrammar &grammar, - const LexicalGrammar &lex_grammar + const LexicalGrammar &lexical_grammar ) { - auto parse_table_result = build_parse_table(grammar, lex_grammar); + auto parse_table_result = build_parse_table(grammar, lexical_grammar); ParseTable parse_table = parse_table_result.first; const CompileError error = parse_table_result.second; - LexTable lex_table = build_lex_table(&parse_table, lex_grammar); + LexTable lex_table = LexTableBuilder::create(lexical_grammar)->build(&parse_table); return make_tuple(parse_table, lex_table, error); } diff --git a/src/compiler/build_tables/compatible_tokens.cc b/src/compiler/build_tables/compatible_tokens.cc deleted file mode 100644 index 98099612..00000000 --- a/src/compiler/build_tables/compatible_tokens.cc +++ /dev/null @@ -1,136 +0,0 @@ -#include "compiler/build_tables/compatible_tokens.h" -#include "compiler/lexical_grammar.h" -#include "compiler/rules/choice.h" -#include "compiler/rules/character_set.h" -#include "compiler/rules/repeat.h" -#include "compiler/rules/visitor.h" -#include "compiler/rules/seq.h" -#include "compiler/rules/metadata.h" - -namespace tree_sitter { -namespace build_tables { - -using rules::Symbol; -using std::set; - -template -class CharacterAggregator : public rules::RuleFn { - void apply_to(const rules::Seq *rule) { - if (left) apply(rule->left); - if (right) apply(rule->right); - } - - void apply_to(const rules::Choice *rule) { - for (const rule_ptr &element : rule->elements) { - apply(element); - } - } - - void apply_to(const rules::Repeat *rule) { - apply(rule->content); - } - - void apply_to(const rules::Metadata *rule) { - apply(rule->rule); - } - - void apply_to(const rules::CharacterSet *rule) { - result.add_set(*rule); - } - - public: - rules::CharacterSet result; -}; - -template -class CharacterIntersector : public rules::RuleFn { - bool apply_to(const rules::Seq *rule) { - bool result = false; - if (left) result = apply(rule->left); - if (right && !result) result = apply(rule->right); - return result; - } - - bool apply_to(const rules::Choice *rule) { - for (const rule_ptr &element : rule->elements) { - if (apply(element)) return true; - } - return false; - } - - bool apply_to(const rules::Repeat *rule) { - return apply(rule->content); - } - - bool apply_to(const rules::Metadata *rule) { - return apply(rule->rule); - } - - bool apply_to(const rules::CharacterSet *rule) { - return character_set->intersects(*rule); - } - - public: - rules::CharacterSet *character_set; - - CharacterIntersector(rules::CharacterSet *set) : character_set {set} {} -}; - -using FirstCharacters = CharacterAggregator; -using LastCharacters = CharacterAggregator; -using AllCharacters = CharacterAggregator; -using FirstCharactersIntersector = CharacterIntersector; - -CompatibleTokensResult get_compatible_tokens(const LexicalGrammar &grammar) { - CompatibleTokensResult result; - result.unmergeable_pairs.resize(grammar.variables.size()); - - AllCharacters all_separator_characters; - for (const rule_ptr &separator : grammar.separators) - all_separator_characters.apply(separator); - - for (size_t i = 0; i < grammar.variables.size(); i++) { - Symbol symbol(i, Symbol::Terminal); - const LexicalVariable &variable = grammar.variables[i]; - rule_ptr rule = variable.rule; - - FirstCharacters first_characters; - first_characters.apply(rule); - - LastCharacters last_characters; - last_characters.apply(rule); - - AllCharacters all_characters; - all_characters.apply(rule); - - bool has_distinct_start = - !first_characters.result.includes_all && - !first_characters.result.intersects(all_separator_characters.result); - - bool has_distinct_end = - !last_characters.result.includes_all && - !last_characters.result.intersects(all_separator_characters.result); - - bool has_separators = - all_characters.result.intersects(all_separator_characters.result); - - if ((has_distinct_start && has_distinct_end) || !has_separators) - result.recovery_tokens.insert(symbol); - - for (size_t j = 0; j < i; j++) { - const LexicalVariable &other_variable = grammar.variables[j]; - if (has_separators) { - FirstCharactersIntersector intersector(&first_characters.result); - if (intersector.apply(other_variable.rule)) { - result.unmergeable_pairs[i].insert(j); - result.unmergeable_pairs[j].insert(i); - } - } - } - } - - return result; -} - -} // namespace build_tables -} // namespace tree_sitter diff --git a/src/compiler/build_tables/compatible_tokens.h b/src/compiler/build_tables/compatible_tokens.h deleted file mode 100644 index f8061902..00000000 --- a/src/compiler/build_tables/compatible_tokens.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef COMPILER_BUILD_TABLES_COMPATIBLE_TOKENS_H_ -#define COMPILER_BUILD_TABLES_COMPATIBLE_TOKENS_H_ - -#include "compiler/rule.h" -#include "compiler/rules/symbol.h" -#include -#include -#include - -namespace tree_sitter { - -struct LexicalGrammar; - -namespace build_tables { - -struct CompatibleTokensResult { - std::set recovery_tokens; - std::vector> unmergeable_pairs; -}; - -CompatibleTokensResult get_compatible_tokens(const LexicalGrammar &); - -} // namespace build_tables -} // namespace tree_sitter - -#endif // COMPILER_BUILD_TABLES_COMPATIBLE_TOKENS_H_ diff --git a/src/compiler/build_tables/lex_conflict_manager.cc b/src/compiler/build_tables/lex_conflict_manager.cc index 3fc22ed2..0fbdf4d9 100644 --- a/src/compiler/build_tables/lex_conflict_manager.cc +++ b/src/compiler/build_tables/lex_conflict_manager.cc @@ -10,11 +10,10 @@ namespace build_tables { bool LexConflictManager::resolve(const LexItemSet &item_set, const AdvanceAction &new_action, const AcceptTokenAction &old_action) { - if (!old_action.is_present()) - return true; if (new_action.precedence_range.max >= old_action.precedence) { - for (const LexItem &item : item_set.entries) + for (const LexItem &item : item_set.entries) { possible_extensions[old_action.symbol.index].insert(item.lhs.index); + } return true; } else { return false; @@ -23,30 +22,26 @@ bool LexConflictManager::resolve(const LexItemSet &item_set, bool LexConflictManager::resolve(const AcceptTokenAction &new_action, const AcceptTokenAction &old_action) { - if (!old_action.is_present()) - return true; - - int old_precedence = old_action.precedence; - int new_precedence = new_action.precedence; - bool result; - if (new_precedence > old_precedence) + if (new_action.precedence > old_action.precedence) { result = true; - else if (new_precedence < old_precedence) + } else if (new_action.precedence < old_action.precedence) { result = false; - else if (new_action.is_string && !old_action.is_string) + } else if (new_action.is_string && !old_action.is_string) { result = true; - else if (old_action.is_string && !new_action.is_string) + } else if (old_action.is_string && !new_action.is_string) { result = false; - else if (new_action.symbol.index < old_action.symbol.index) + } else if (new_action.symbol.index < old_action.symbol.index) { result = true; - else + } else { result = false; + } - if (result) + if (result) { possible_homonyms[old_action.symbol.index].insert(new_action.symbol.index); - else + } else { possible_homonyms[new_action.symbol.index].insert(old_action.symbol.index); + } return result; } diff --git a/src/compiler/build_tables/build_lex_table.cc b/src/compiler/build_tables/lex_table_builder.cc similarity index 63% rename from src/compiler/build_tables/build_lex_table.cc rename to src/compiler/build_tables/lex_table_builder.cc index 472e3b59..e0a18914 100644 --- a/src/compiler/build_tables/build_lex_table.cc +++ b/src/compiler/build_tables/lex_table_builder.cc @@ -1,4 +1,4 @@ -#include "compiler/build_tables/build_lex_table.h" +#include "compiler/build_tables/lex_table_builder.h" #include #include #include @@ -16,15 +16,18 @@ #include "compiler/rules/repeat.h" #include "compiler/rules/seq.h" #include "compiler/rules/blank.h" +#include "compiler/rules/visitor.h" namespace tree_sitter { namespace build_tables { using std::map; +using std::pair; using std::set; using std::string; using std::vector; using std::unordered_map; +using std::unique_ptr; using rules::Blank; using rules::Choice; using rules::CharacterSet; @@ -33,37 +36,74 @@ using rules::Symbol; using rules::Metadata; using rules::Seq; -class LexTableBuilder { +class StartingCharacterAggregator : public rules::RuleFn { + void apply_to(const rules::Seq *rule) { + apply(rule->left); + } + + void apply_to(const rules::Choice *rule) { + for (const rule_ptr &element : rule->elements) apply(element); + } + + void apply_to(const rules::Repeat *rule) { + apply(rule->content); + } + + void apply_to(const rules::Metadata *rule) { + apply(rule->rule); + } + + void apply_to(const rules::CharacterSet *rule) { + result.add_set(*rule); + } + + public: + CharacterSet result; +}; + +class LexTableBuilderImpl : public LexTableBuilder { LexTable lex_table; - ParseTable *parse_table; - const LexicalGrammar lex_grammar; + const LexicalGrammar grammar; vector separator_rules; + CharacterSet first_separator_characters; LexConflictManager conflict_manager; unordered_map lex_state_ids; public: - LexTableBuilder(ParseTable *parse_table, const LexicalGrammar &lex_grammar) - : parse_table(parse_table), lex_grammar(lex_grammar) { - for (const rule_ptr &rule : lex_grammar.separators) + vector shadowed_token_indices; + + LexTableBuilderImpl(const LexicalGrammar &grammar) : grammar(grammar) { + StartingCharacterAggregator starting_character_aggregator; + for (const rule_ptr &rule : grammar.separators) { separator_rules.push_back(Repeat::build(rule)); + starting_character_aggregator.apply(rule); + } separator_rules.push_back(Blank::build()); + first_separator_characters = starting_character_aggregator.result; + shadowed_token_indices.resize(grammar.variables.size()); } - LexTable build() { - for (ParseState &parse_state : parse_table->states) - add_lex_state_for_parse_state(&parse_state); - - mark_fragile_tokens(); - remove_duplicate_lex_states(); - + LexTable build(ParseTable *parse_table) { + for (ParseState &parse_state : parse_table->states) { + parse_state.lex_state_id = add_lex_state( + item_set_for_terminals(parse_state.terminal_entries) + ); + } + mark_fragile_tokens(parse_table); + remove_duplicate_lex_states(parse_table); return lex_table; } - private: - void add_lex_state_for_parse_state(ParseState *parse_state) { - parse_state->lex_state_id = add_lex_state( - item_set_for_terminals(parse_state->terminal_entries) - ); + bool detect_conflict(Symbol::Index left, Symbol::Index right) { + clear(); + + map terminals; + terminals[Symbol(left, Symbol::Terminal)]; + terminals[Symbol(right, Symbol::Terminal)]; + + add_lex_state(item_set_for_terminals(terminals)); + + return shadowed_token_indices[right]; } LexStateId add_lex_state(const LexItemSet &item_set) { @@ -80,6 +120,13 @@ class LexTableBuilder { } } + void clear() { + lex_table.states.clear(); + lex_state_ids.clear(); + shadowed_token_indices.assign(grammar.variables.size(), false); + } + + private: void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) { for (const auto &pair : item_set.transitions()) { const CharacterSet &characters = pair.first; @@ -87,11 +134,28 @@ class LexTableBuilder { AdvanceAction action(-1, transition.precedence, transition.in_main_token); auto current_action = lex_table.states[state_id].accept_action; - if (conflict_manager.resolve(transition.destination, action, - current_action)) { - action.state_index = add_lex_state(transition.destination); - lex_table.states[state_id].advance_actions[characters] = action; + if (current_action.is_present()) { + bool prefer_advancing = conflict_manager.resolve(transition.destination, action, current_action); + bool matches_accepted_token = false; + for (const LexItem &item : transition.destination.entries) { + if (item.lhs == current_action.symbol) { + matches_accepted_token = true; + } else if (!transition.in_main_token && !item.lhs.is_built_in() && !prefer_advancing) { + shadowed_token_indices[item.lhs.index] = true; + } + } + + if (!matches_accepted_token && characters.intersects(first_separator_characters)) { + shadowed_token_indices[current_action.symbol.index] = true; + } + + if (!prefer_advancing) { + continue; + } } + + action.state_index = add_lex_state(transition.destination); + lex_table.states[state_id].advance_actions[characters] = action; } } @@ -101,16 +165,21 @@ class LexTableBuilder { if (completion_status.is_done) { AcceptTokenAction action(item.lhs, completion_status.precedence.max, item.lhs.is_built_in() || - lex_grammar.variables[item.lhs.index].is_string); + grammar.variables[item.lhs.index].is_string); auto current_action = lex_table.states[state_id].accept_action; - if (conflict_manager.resolve(action, current_action)) - lex_table.states[state_id].accept_action = action; + if (current_action.is_present()) { + if (!conflict_manager.resolve(action, current_action)) { + continue; + } + } + + lex_table.states[state_id].accept_action = action; } } } - void mark_fragile_tokens() { + void mark_fragile_tokens(ParseTable *parse_table) { for (ParseState &state : parse_table->states) { for (auto &entry : state.terminal_entries) { Symbol symbol = entry.first; @@ -138,7 +207,7 @@ class LexTableBuilder { } } - void remove_duplicate_lex_states() { + void remove_duplicate_lex_states(ParseTable *parse_table) { for (LexState &state : lex_table.states) { state.accept_action.is_string = false; state.accept_action.precedence = 0; @@ -229,7 +298,7 @@ class LexTableBuilder { if (symbol == rules::END_OF_INPUT()) return { CharacterSet().include(0).copy() }; - rule_ptr rule = lex_grammar.variables[symbol.index].rule; + rule_ptr rule = grammar.variables[symbol.index].rule; auto choice = rule->as(); if (choice) @@ -239,8 +308,16 @@ class LexTableBuilder { } }; -LexTable build_lex_table(ParseTable *table, const LexicalGrammar &grammar) { - return LexTableBuilder(table, grammar).build(); +unique_ptr LexTableBuilder::create(const LexicalGrammar &grammar) { + return unique_ptr(new LexTableBuilderImpl(grammar)); +} + +LexTable LexTableBuilder::build(ParseTable *parse_table) { + return static_cast(this)->build(parse_table); +} + +bool LexTableBuilder::detect_conflict(Symbol::Index left, Symbol::Index right) { + return static_cast(this)->detect_conflict(left, right); } } // namespace build_tables diff --git a/src/compiler/build_tables/lex_table_builder.h b/src/compiler/build_tables/lex_table_builder.h new file mode 100644 index 00000000..91f24f70 --- /dev/null +++ b/src/compiler/build_tables/lex_table_builder.h @@ -0,0 +1,26 @@ +#ifndef COMPILER_BUILD_TABLES_LEX_TABLE_BUILDER_H_ +#define COMPILER_BUILD_TABLES_LEX_TABLE_BUILDER_H_ + +#include +#include "compiler/lex_table.h" + +namespace tree_sitter { + +struct ParseTable; +struct LexicalGrammar; + +namespace build_tables { + +class LexTableBuilder { + public: + static std::unique_ptr create(const LexicalGrammar &); + LexTable build(ParseTable *); + bool detect_conflict(rules::Symbol::Index, rules::Symbol::Index); + protected: + LexTableBuilder() = default; +}; + +} // namespace build_tables +} // namespace tree_sitter + +#endif // COMPILER_BUILD_TABLES_LEX_TABLE_BUILDER_H_