From 9d668c5004cc6569b26ace4e4ac1b7df307f9fc8 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 31 Aug 2017 15:40:43 -0700 Subject: [PATCH] Move incompatible token map into LexTableBuilder --- project.gyp | 1 - src/compiler/build_tables/build_tables.cc | 35 ----- src/compiler/build_tables/build_tables.h | 24 ---- .../build_tables/lex_table_builder.cc | 51 ++++++-- src/compiler/build_tables/lex_table_builder.h | 12 +- .../build_tables/parse_table_builder.cc | 68 ++++------ .../build_tables/parse_table_builder.h | 12 +- src/compiler/compile.cc | 6 +- .../build_tables/lex_table_builder_test.cc | 122 ------------------ tests.gyp | 1 - 10 files changed, 78 insertions(+), 254 deletions(-) delete mode 100644 src/compiler/build_tables/build_tables.cc delete mode 100644 src/compiler/build_tables/build_tables.h delete mode 100644 test/compiler/build_tables/lex_table_builder_test.cc diff --git a/project.gyp b/project.gyp index bbb88438..56f742b3 100644 --- a/project.gyp +++ b/project.gyp @@ -11,7 +11,6 @@ 'externals/json-parser', ], 'sources': [ - 'src/compiler/build_tables/build_tables.cc', 'src/compiler/build_tables/lex_item.cc', 'src/compiler/build_tables/lex_item_transitions.cc', 'src/compiler/build_tables/lex_conflict_manager.cc', diff --git a/src/compiler/build_tables/build_tables.cc b/src/compiler/build_tables/build_tables.cc deleted file mode 100644 index a15aede3..00000000 --- a/src/compiler/build_tables/build_tables.cc +++ /dev/null @@ -1,35 +0,0 @@ -#include "compiler/build_tables/build_tables.h" -#include -#include "compiler/build_tables/lex_table_builder.h" -#include "compiler/build_tables/parse_table_builder.h" -#include "compiler/syntax_grammar.h" -#include "compiler/lexical_grammar.h" -#include "compiler/compile_error.h" - -namespace tree_sitter { -namespace build_tables { - -using std::tuple; -using std::make_tuple; - -tuple build_tables( - const SyntaxGrammar &syntax_grammar, - const LexicalGrammar &lexical_grammar -) { - auto lex_table_builder = LexTableBuilder::create(lexical_grammar); - auto parse_table_builder = ParseTableBuilder::create( - syntax_grammar, - lexical_grammar, - lex_table_builder.get() - ); - - auto parse_table_result = parse_table_builder->build(); - ParseTable parse_table = parse_table_result.first; - const CompileError error = parse_table_result.second; - - LexTable lex_table = lex_table_builder->build(&parse_table); - return make_tuple(parse_table, lex_table, error); -} - -} // namespace build_tables -} // namespace tree_sitter diff --git a/src/compiler/build_tables/build_tables.h b/src/compiler/build_tables/build_tables.h deleted file mode 100644 index ed1f4770..00000000 --- a/src/compiler/build_tables/build_tables.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef COMPILER_BUILD_TABLES_BUILD_TABLES_H_ -#define COMPILER_BUILD_TABLES_BUILD_TABLES_H_ - -#include -#include "compiler/parse_table.h" -#include "compiler/lex_table.h" -#include "compiler/compile_error.h" - -namespace tree_sitter { - -struct SyntaxGrammar; -struct LexicalGrammar; - -namespace build_tables { - -std::tuple build_tables( - const SyntaxGrammar &, - const LexicalGrammar & -); - -} // namespace build_tables -} // namespace tree_sitter - -#endif // COMPILER_BUILD_TABLES_BUILD_TABLES_H_ diff --git a/src/compiler/build_tables/lex_table_builder.cc b/src/compiler/build_tables/lex_table_builder.cc index b50182f3..8e8cff8a 100644 --- a/src/compiler/build_tables/lex_table_builder.cc +++ b/src/compiler/build_tables/lex_table_builder.cc @@ -9,6 +9,7 @@ #include #include "compiler/build_tables/lex_conflict_manager.h" #include "compiler/build_tables/lex_item.h" +#include "compiler/build_tables/lookahead_set.h" #include "compiler/parse_table.h" #include "compiler/lexical_grammar.h" #include "compiler/rule.h" @@ -76,13 +77,18 @@ class LexTableBuilderImpl : public LexTableBuilder { unordered_map lex_state_ids; map following_characters_by_token_index; + vector> incompatible_tokens_by_token_index; CharacterSet separator_start_characters; CharacterSet current_conflict_detection_following_characters; Symbol::Index current_conflict_detection_token_index; bool current_conflict_value; public: - LexTableBuilderImpl(const LexicalGrammar &grammar) : grammar(grammar) { + LexTableBuilderImpl(const SyntaxGrammar &syntax_grammar, + const LexicalGrammar &lexical_grammar, + const vector> &following_tokens_by_token_index) : + grammar(lexical_grammar), + incompatible_tokens_by_token_index(lexical_grammar.variables.size()) { StartingCharacterAggregator separator_character_aggregator; for (const auto &rule : grammar.separators) { separator_rules.push_back(Repeat{rule}); @@ -91,6 +97,26 @@ class LexTableBuilderImpl : public LexTableBuilder { separator_rules.push_back(Blank{}); separator_start_characters = separator_character_aggregator.result; clear(); + + for (unsigned i = 0, n = grammar.variables.size(); i < n; i++) { + Symbol token = Symbol::terminal(i); + auto &incompatible_indices = incompatible_tokens_by_token_index[i]; + + for (unsigned j = 0; j < n; j++) { + if (i == j) continue; + if (detect_conflict(i, j, following_tokens_by_token_index)) { + incompatible_indices.insert(Symbol::terminal(j)); + } + } + + for (const ExternalToken &external_token : syntax_grammar.external_tokens) { + if (external_token.corresponding_internal_token == token) { + for (unsigned j = 0; j < syntax_grammar.external_tokens.size(); j++) { + incompatible_indices.insert(Symbol::external(j)); + } + } + } + } } LexTable build(ParseTable *parse_table) { @@ -104,8 +130,12 @@ class LexTableBuilderImpl : public LexTableBuilder { return lex_table; } + const set &get_incompatible_tokens(Symbol::Index index) const { + return incompatible_tokens_by_token_index[index]; + } + bool detect_conflict(Symbol::Index left, Symbol::Index right, - const vector> &following_terminals_by_terminal_index) { + const vector> &following_tokens_by_token_index) { StartingCharacterAggregator left_starting_characters; StartingCharacterAggregator right_starting_characters; left_starting_characters.apply(grammar.variables[left].rule); @@ -119,7 +149,7 @@ class LexTableBuilderImpl : public LexTableBuilder { auto following_characters_entry = following_characters_by_token_index.find(right); if (following_characters_entry == following_characters_by_token_index.end()) { StartingCharacterAggregator aggregator; - for (auto following_token_index : following_terminals_by_terminal_index[right]) { + for (auto following_token_index : following_tokens_by_token_index[right]) { aggregator.apply(grammar.variables[following_token_index].rule); } following_characters_entry = @@ -369,17 +399,22 @@ class LexTableBuilderImpl : public LexTableBuilder { } }; -unique_ptr LexTableBuilder::create(const LexicalGrammar &grammar) { - return unique_ptr(new LexTableBuilderImpl(grammar)); +unique_ptr LexTableBuilder::create(const SyntaxGrammar &syntax_grammar, + const LexicalGrammar &lexical_grammar, + const vector> &following_tokens) { + return unique_ptr(new LexTableBuilderImpl( + syntax_grammar, + lexical_grammar, + following_tokens + )); } LexTable LexTableBuilder::build(ParseTable *parse_table) { return static_cast(this)->build(parse_table); } -bool LexTableBuilder::detect_conflict(Symbol::Index left, Symbol::Index right, - const vector> &following_terminals) { - return static_cast(this)->detect_conflict(left, right, following_terminals); +const set &LexTableBuilder::get_incompatible_tokens(Symbol::Index token) const { + return static_cast(this)->get_incompatible_tokens(token); } } // namespace build_tables diff --git a/src/compiler/build_tables/lex_table_builder.h b/src/compiler/build_tables/lex_table_builder.h index 3b896bb7..2bb7a56a 100644 --- a/src/compiler/build_tables/lex_table_builder.h +++ b/src/compiler/build_tables/lex_table_builder.h @@ -9,19 +9,19 @@ namespace tree_sitter { struct ParseTable; +struct SyntaxGrammar; struct LexicalGrammar; namespace build_tables { class LexTableBuilder { public: - static std::unique_ptr create(const LexicalGrammar &); + static std::unique_ptr create(const SyntaxGrammar &, + const LexicalGrammar &, + const std::vector> &); LexTable build(ParseTable *); - bool detect_conflict( - rules::Symbol::Index, - rules::Symbol::Index, - const std::vector> &following_terminals_by_terminal_index - ); + const std::set &get_incompatible_tokens(rules::Symbol::Index) const; + protected: LexTableBuilder() = default; }; diff --git a/src/compiler/build_tables/parse_table_builder.cc b/src/compiler/build_tables/parse_table_builder.cc index 7e67b650..3b59c8ae 100644 --- a/src/compiler/build_tables/parse_table_builder.cc +++ b/src/compiler/build_tables/parse_table_builder.cc @@ -19,9 +19,10 @@ namespace build_tables { using std::deque; using std::find; -using std::pair; using std::vector; using std::set; +using std::tuple; +using std::make_tuple; using std::map; using std::move; using std::string; @@ -49,26 +50,20 @@ class ParseTableBuilderImpl : public ParseTableBuilder { deque parse_state_queue; ParseTable parse_table; ParseItemSetBuilder item_set_builder; - LexTableBuilder *lex_table_builder; + unique_ptr lex_table_builder; set fragile_reductions; - vector> incompatible_tokens_by_token_index; vector> following_tokens_by_token_index; bool processing_recovery_states; public: - ParseTableBuilderImpl( - const SyntaxGrammar &syntax_grammar, - const LexicalGrammar &lexical_grammar, - LexTableBuilder *lex_table_builder - ) : grammar(syntax_grammar), + ParseTableBuilderImpl(const SyntaxGrammar &syntax_grammar, const LexicalGrammar &lexical_grammar) + : grammar(syntax_grammar), lexical_grammar(lexical_grammar), item_set_builder(syntax_grammar, lexical_grammar), - lex_table_builder(lex_table_builder), - incompatible_tokens_by_token_index(lexical_grammar.variables.size()), following_tokens_by_token_index(lexical_grammar.variables.size()), processing_recovery_states(false) {} - pair build() { + tuple build() { // Ensure that the empty rename sequence has index 0. parse_table.alias_sequences.push_back({}); @@ -90,9 +85,13 @@ class ParseTableBuilderImpl : public ParseTableBuilder { }}); CompileError error = process_part_state_queue(); - if (error) return {parse_table, error}; + if (error) return make_tuple(parse_table, LexTable(), error); - compute_unmergable_token_pairs(); + lex_table_builder = LexTableBuilder::create( + grammar, + lexical_grammar, + following_tokens_by_token_index + ); processing_recovery_states = true; build_error_parse_state(error_state_id); @@ -100,7 +99,9 @@ class ParseTableBuilderImpl : public ParseTableBuilder { mark_fragile_actions(); remove_duplicate_parse_states(); - return {parse_table, CompileError::none()}; + + auto lex_table = lex_table_builder->build(&parse_table); + return make_tuple(parse_table, lex_table, CompileError::none()); } private: @@ -131,9 +132,9 @@ class ParseTableBuilderImpl : public ParseTableBuilder { Symbol token = Symbol::terminal(i); bool has_non_reciprocal_conflict = false; - for (Symbol incompatible_token : incompatible_tokens_by_token_index[i]) { + for (Symbol incompatible_token : lex_table_builder->get_incompatible_tokens(i)) { if (incompatible_token.is_terminal() && - !incompatible_tokens_by_token_index[incompatible_token.index].count(token)) { + !lex_table_builder->get_incompatible_tokens(incompatible_token.index).count(token)) { has_non_reciprocal_conflict = true; break; } @@ -355,28 +356,6 @@ class ParseTableBuilderImpl : public ParseTableBuilder { return false; } - void compute_unmergable_token_pairs() { - for (unsigned i = 0, n = lexical_grammar.variables.size(); i < n; i++) { - Symbol token = Symbol::terminal(i); - auto &incompatible_indices = incompatible_tokens_by_token_index[i]; - - for (unsigned j = 0; j < n; j++) { - if (i == j) continue; - if (lex_table_builder->detect_conflict(i, j, following_tokens_by_token_index)) { - incompatible_indices.insert(Symbol::terminal(j)); - } - } - - for (const ExternalToken &external_token : grammar.external_tokens) { - if (external_token.corresponding_internal_token == token) { - for (unsigned j = 0; j < grammar.external_tokens.size(); j++) { - incompatible_indices.insert(Symbol::external(j)); - } - } - } - } - } - void remove_duplicate_parse_states() { unordered_map> state_indices_by_signature; @@ -474,7 +453,7 @@ class ParseTableBuilderImpl : public ParseTableBuilder { if (left_entry.second.actions.back().type != ParseActionTypeReduce) return false; if (!has_actions(right_state, left_entry.second)) return false; if (!lookahead.is_built_in()) { - for (const Symbol &incompatible_token : incompatible_tokens_by_token_index[lookahead.index]) { + for (const Symbol &incompatible_token : lex_table_builder->get_incompatible_tokens(lookahead.index)) { if (right_state.terminal_entries.count(incompatible_token)) return false; } } @@ -492,7 +471,7 @@ class ParseTableBuilderImpl : public ParseTableBuilder { if (right_entry.second.actions.back().type != ParseActionTypeReduce) return false; if (!has_actions(left_state, right_entry.second)) return false; if (!lookahead.is_built_in()) { - for (const Symbol &incompatible_token : incompatible_tokens_by_token_index[lookahead.index]) { + for (const Symbol &incompatible_token : lex_table_builder->get_incompatible_tokens(lookahead.index)) { if (left_state.terminal_entries.count(incompatible_token)) return false; } } @@ -805,15 +784,12 @@ class ParseTableBuilderImpl : public ParseTableBuilder { unique_ptr ParseTableBuilder::create( const SyntaxGrammar &syntax_grammar, - const LexicalGrammar &lexical_grammar, - LexTableBuilder *lex_table_builder + const LexicalGrammar &lexical_grammar ) { - return unique_ptr( - new ParseTableBuilderImpl(syntax_grammar, lexical_grammar, lex_table_builder) - ); + return unique_ptr(new ParseTableBuilderImpl(syntax_grammar, lexical_grammar)); } -pair ParseTableBuilder::build() { +tuple ParseTableBuilder::build() { return static_cast(this)->build(); } diff --git a/src/compiler/build_tables/parse_table_builder.h b/src/compiler/build_tables/parse_table_builder.h index bab96243..1cbecb49 100644 --- a/src/compiler/build_tables/parse_table_builder.h +++ b/src/compiler/build_tables/parse_table_builder.h @@ -8,21 +8,17 @@ namespace tree_sitter { struct ParseTable; +struct LexTable; struct SyntaxGrammar; struct LexicalGrammar; namespace build_tables { -class LexTableBuilder; - class ParseTableBuilder { public: - static std::unique_ptr create( - const SyntaxGrammar &, - const LexicalGrammar &, - LexTableBuilder * - ); - std::pair build(); + static std::unique_ptr create(const SyntaxGrammar &, const LexicalGrammar &); + std::tuple build(); + protected: ParseTableBuilder() = default; }; diff --git a/src/compiler/compile.cc b/src/compiler/compile.cc index a887c4fc..ad3a64cb 100644 --- a/src/compiler/compile.cc +++ b/src/compiler/compile.cc @@ -1,6 +1,6 @@ #include "tree_sitter/compiler.h" #include "compiler/prepare_grammar/prepare_grammar.h" -#include "compiler/build_tables/build_tables.h" +#include "compiler/build_tables/parse_table_builder.h" #include "compiler/generate_code/c_code.h" #include "compiler/syntax_grammar.h" #include "compiler/lexical_grammar.h" @@ -30,8 +30,8 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input) { return { nullptr, strdup(error.message.c_str()), error.type }; } - auto table_build_result = - build_tables::build_tables(syntax_grammar, lexical_grammar); + auto builder = build_tables::ParseTableBuilder::create(syntax_grammar, lexical_grammar); + auto table_build_result = builder->build(); const ParseTable &parse_table = get<0>(table_build_result); const LexTable &lex_table = get<1>(table_build_result); error = get<2>(table_build_result); diff --git a/test/compiler/build_tables/lex_table_builder_test.cc b/test/compiler/build_tables/lex_table_builder_test.cc deleted file mode 100644 index e9f70aee..00000000 --- a/test/compiler/build_tables/lex_table_builder_test.cc +++ /dev/null @@ -1,122 +0,0 @@ -#include "test_helper.h" -#include "compiler/lexical_grammar.h" -#include "compiler/build_tables/lex_table_builder.h" - -using namespace build_tables; -using namespace rules; - -START_TEST - -describe("LexTableBuilder::detect_conflict", []() { - vector separators({ - CharacterSet({ ' ', '\t' }), - }); - - it("returns false for tokens that don't match the same string", [&]() { - auto builder = LexTableBuilder::create(LexicalGrammar{ - { - LexicalVariable{ - "token_0", - VariableTypeNamed, - Rule::seq({ - CharacterSet({ 'a' }), - CharacterSet({ 'b' }), - CharacterSet({ 'c' }), - }), - false - }, - LexicalVariable{ - "token_1", - VariableTypeNamed, - Rule::seq({ - CharacterSet({ 'b' }), - CharacterSet({ 'c' }), - CharacterSet({ 'd' }), - }), - false - }, - }, - separators - }); - - AssertThat(builder->detect_conflict(0, 1, {{}, {}}), IsFalse()); - AssertThat(builder->detect_conflict(1, 0, {{}, {}}), IsFalse()); - }); - - it("returns true when the left token can match a string that the right token matches, " - "plus a separator character", [&]() { - LexicalGrammar grammar{ - { - LexicalVariable{ - "token_0", - VariableTypeNamed, - Rule::repeat(CharacterSet().include_all().exclude('\n')), // regex: /.+/ - false - }, - LexicalVariable{ - "token_1", - VariableTypeNamed, - Rule::seq({ CharacterSet({ 'a' }), CharacterSet({ 'b' }), CharacterSet({ 'c' }) }), // string: 'abc' - true - }, - }, - separators - }; - - auto builder = LexTableBuilder::create(grammar); - AssertThat(builder->detect_conflict(0, 1, {{}, {}}), IsTrue()); - AssertThat(builder->detect_conflict(1, 0, {{}, {}}), IsFalse()); - - grammar.variables[1].is_string = false; - AssertThat(builder->detect_conflict(0, 1, {{}, {}}), IsTrue()); - AssertThat(builder->detect_conflict(1, 0, {{}, {}}), IsFalse()); - }); - - it("returns true when the left token matches a string that the right token matches, " - "plus the first character of some token that can follow the right token", [&]() { - LexicalGrammar grammar{ - { - LexicalVariable{ - "token_0", - VariableTypeNamed, - Rule::seq({ - CharacterSet({ '>' }), - CharacterSet({ '=' }), - }), - true - }, - LexicalVariable{ - "token_1", - VariableTypeNamed, - Rule::seq({ - CharacterSet({ '>' }), - }), - true - }, - LexicalVariable{ - "token_2", - VariableTypeNamed, - Rule::seq({ - CharacterSet({ '=' }), - }), - true - }, - }, - separators - }; - - // If no tokens can follow token_1, then there's no conflict - auto builder = LexTableBuilder::create(grammar); - vector> following_tokens_by_token_index(3); - AssertThat(builder->detect_conflict(0, 1, following_tokens_by_token_index), IsFalse()); - AssertThat(builder->detect_conflict(1, 0, following_tokens_by_token_index), IsFalse()); - - // If token_2 can follow token_1, then token_0 conflicts with token_1 - builder = LexTableBuilder::create(grammar); - following_tokens_by_token_index[1].insert(2); - AssertThat(builder->detect_conflict(0, 1, following_tokens_by_token_index), IsTrue()); - AssertThat(builder->detect_conflict(1, 0, following_tokens_by_token_index), IsFalse()); - }); -}); - -END_TEST diff --git a/tests.gyp b/tests.gyp index af11b878..02012a0a 100644 --- a/tests.gyp +++ b/tests.gyp @@ -39,7 +39,6 @@ 'sources': [ 'test/compiler/build_tables/lex_conflict_manager_test.cc', 'test/compiler/build_tables/lex_item_test.cc', - 'test/compiler/build_tables/lex_table_builder_test.cc', 'test/compiler/build_tables/parse_item_set_builder_test.cc', 'test/compiler/build_tables/rule_can_be_blank_test.cc', 'test/compiler/prepare_grammar/expand_repeats_test.cc',