From f71d7bae1687ae94ebdf83e47a4bee19af179293 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 8 Apr 2014 18:47:42 -0700 Subject: [PATCH] Record parse conflicts when compiling grammars Need to remove duplicate conflicts --- examples/parsers/arithmetic.c | 15 +-- examples/parsers/golang.c | 5 +- examples/parsers/javascript.c | 5 +- examples/parsers/json.c | 5 +- .../build_tables/conflict_manager_spec.cc | 62 +++++++---- .../name_symbols/name_symbols_spec.cc | 31 +++--- src/compiler/build_tables/build_tables.cc | 44 +++++--- src/compiler/build_tables/build_tables.h | 4 +- src/compiler/build_tables/conflict_manager.cc | 100 ++++++++++++++++++ .../build_tables/conflict_manager.cpp | 92 ---------------- src/compiler/build_tables/conflict_manager.h | 14 ++- src/compiler/compile.cc | 6 +- src/compiler/name_symbols/name_symbols.cc | 28 +++-- src/compiler/name_symbols/name_symbols.h | 2 +- 14 files changed, 246 insertions(+), 167 deletions(-) create mode 100644 src/compiler/build_tables/conflict_manager.cc delete mode 100644 src/compiler/build_tables/conflict_manager.cpp diff --git a/examples/parsers/arithmetic.c b/examples/parsers/arithmetic.c index ca43988a..8b8fc5c5 100644 --- a/examples/parsers/arithmetic.c +++ b/examples/parsers/arithmetic.c @@ -45,8 +45,8 @@ SYMBOL_NAMES = { [ts_aux_sym_token5] = "'*'", [ts_aux_sym_token6] = "'/'", [ts_aux_sym_token7] = "'^'", - [ts_builtin_sym_end] = "end", - [ts_builtin_sym_error] = "error", + [ts_builtin_sym_end] = "EOF", + [ts_builtin_sym_error] = "ERROR", }; HIDDEN_SYMBOLS = { @@ -361,6 +361,7 @@ LEX_FN() { ADVANCE(11); LEX_ERROR(); case ts_lex_state_error: + START_TOKEN(); if (lookahead == '\0') ADVANCE(1); if (('\t' <= lookahead && lookahead <= '\n') || @@ -568,7 +569,7 @@ PARSE_TABLE = { [4] = { [ts_aux_sym_token5] = SHIFT(5), [ts_aux_sym_token6] = SHIFT(85), - [ts_builtin_sym_end] = REDUCE(ts_sym_quotient, 1), + [ts_builtin_sym_end] = REDUCE(ts_sym_product, 1), }, [5] = { [ts_sym__operand2] = SHIFT(6), @@ -636,7 +637,7 @@ PARSE_TABLE = { [ts_aux_sym_token2] = REDUCE(ts_sym_sum, 3), }, [15] = { - [ts_aux_sym_token2] = REDUCE(ts_sym_quotient, 1), + [ts_aux_sym_token2] = REDUCE(ts_sym_product, 1), [ts_aux_sym_token5] = SHIFT(16), [ts_aux_sym_token6] = SHIFT(61), }, @@ -686,9 +687,9 @@ PARSE_TABLE = { [ts_builtin_sym_error] = SHIFT(54), }, [23] = { - [ts_aux_sym_token2] = REDUCE(ts_sym_quotient, 1), - [ts_aux_sym_token3] = REDUCE(ts_sym_quotient, 1), - [ts_aux_sym_token4] = REDUCE(ts_sym_quotient, 1), + [ts_aux_sym_token2] = REDUCE(ts_sym_product, 1), + [ts_aux_sym_token3] = REDUCE(ts_sym_product, 1), + [ts_aux_sym_token4] = REDUCE(ts_sym_product, 1), [ts_aux_sym_token5] = SHIFT(24), [ts_aux_sym_token6] = SHIFT(52), }, diff --git a/examples/parsers/golang.c b/examples/parsers/golang.c index 8ec926a4..42311d0e 100644 --- a/examples/parsers/golang.c +++ b/examples/parsers/golang.c @@ -107,8 +107,8 @@ SYMBOL_NAMES = { [ts_aux_sym_token7] = "'['", [ts_aux_sym_token8] = "']'", [ts_aux_sym_token9] = "','", - [ts_builtin_sym_end] = "end", - [ts_builtin_sym_error] = "error", + [ts_builtin_sym_end] = "EOF", + [ts_builtin_sym_error] = "ERROR", }; HIDDEN_SYMBOLS = { @@ -1047,6 +1047,7 @@ LEX_FN() { ADVANCE(20); ACCEPT_TOKEN(ts_sym__var); case ts_lex_state_error: + START_TOKEN(); if (lookahead == '\0') ADVANCE(1); if (('\t' <= lookahead && lookahead <= '\n') || diff --git a/examples/parsers/javascript.c b/examples/parsers/javascript.c index 4e3b6ee7..d7983d72 100644 --- a/examples/parsers/javascript.c +++ b/examples/parsers/javascript.c @@ -113,8 +113,8 @@ SYMBOL_NAMES = { [ts_aux_sym_token7] = "','", [ts_aux_sym_token8] = "'.'", [ts_aux_sym_token9] = "'['", - [ts_builtin_sym_end] = "end", - [ts_builtin_sym_error] = "error", + [ts_builtin_sym_end] = "EOF", + [ts_builtin_sym_error] = "ERROR", }; HIDDEN_SYMBOLS = { @@ -2010,6 +2010,7 @@ LEX_FN() { ADVANCE(59); ACCEPT_TOKEN(ts_sym__terminator); case ts_lex_state_error: + START_TOKEN(); if (lookahead == '\0') ADVANCE(1); if ((lookahead == '\t') || diff --git a/examples/parsers/json.c b/examples/parsers/json.c index 31fe9404..b5358bec 100644 --- a/examples/parsers/json.c +++ b/examples/parsers/json.c @@ -39,8 +39,8 @@ SYMBOL_NAMES = { [ts_aux_sym_token4] = "'}'", [ts_aux_sym_token5] = "'['", [ts_aux_sym_token6] = "']'", - [ts_builtin_sym_end] = "end", - [ts_builtin_sym_error] = "error", + [ts_builtin_sym_end] = "EOF", + [ts_builtin_sym_error] = "ERROR", }; HIDDEN_SYMBOLS = { @@ -326,6 +326,7 @@ LEX_FN() { ADVANCE(4); LEX_ERROR(); case ts_lex_state_error: + START_TOKEN(); if (lookahead == '\0') ADVANCE(1); if (('\t' <= lookahead && lookahead <= '\n') || diff --git a/spec/compiler/build_tables/conflict_manager_spec.cc b/spec/compiler/build_tables/conflict_manager_spec.cc index 6b1dd6b5..c3afdf91 100644 --- a/spec/compiler/build_tables/conflict_manager_spec.cc +++ b/spec/compiler/build_tables/conflict_manager_spec.cc @@ -7,8 +7,7 @@ using namespace build_tables; START_TEST describe("resolving parse conflicts", []() { - LexAction lex_action; - ParseAction parse_action; + bool should_update; ConflictManager *manager; PreparedGrammar parse_grammar({ @@ -22,7 +21,12 @@ describe("resolving parse conflicts", []() { }, {}); before_each([&]() { - manager = new ConflictManager(parse_grammar, lex_grammar); + manager = new ConflictManager(parse_grammar, lex_grammar, { + { Symbol("rule1"), "rule1" }, + { Symbol("rule2"), "rule2" }, + { Symbol("token1"), "token1" }, + { Symbol("token2"), "token2" }, + }); }); after_each([&]() { @@ -34,19 +38,19 @@ describe("resolving parse conflicts", []() { Symbol sym2("token2"); it("favors non-errors over lexical errors", [&]() { - lex_action = manager->resolve_lex_action(LexAction::Error(), LexAction::Advance(2)); - AssertThat(lex_action, Equals(LexAction::Advance(2))); + should_update = manager->resolve_lex_action(LexAction::Error(), LexAction::Advance(2)); + AssertThat(should_update, IsTrue()); - lex_action = manager->resolve_lex_action(LexAction::Advance(2), LexAction::Error()); - AssertThat(lex_action, Equals(LexAction::Advance(2))); + should_update = manager->resolve_lex_action(LexAction::Advance(2), LexAction::Error()); + AssertThat(should_update, IsFalse()); }); it("prefers tokens that are listed earlier in the grammar", [&]() { - lex_action = manager->resolve_lex_action(LexAction::Accept(sym1), LexAction::Accept(sym2)); - AssertThat(lex_action, Equals(LexAction::Accept(sym1))); + should_update = manager->resolve_lex_action(LexAction::Accept(sym1), LexAction::Accept(sym2)); + AssertThat(should_update, IsFalse()); - lex_action = manager->resolve_lex_action(LexAction::Accept(sym2), LexAction::Accept(sym1)); - AssertThat(lex_action, Equals(LexAction::Accept(sym1))); + should_update = manager->resolve_lex_action(LexAction::Accept(sym2), LexAction::Accept(sym1)); + AssertThat(should_update, IsTrue()); }); }); @@ -55,15 +59,15 @@ describe("resolving parse conflicts", []() { Symbol sym2("rule2"); it("favors non-errors over parse errors", [&]() { - parse_action = manager->resolve_parse_action(sym1, ParseAction::Error(), ParseAction::Shift(2)); - AssertThat(parse_action, Equals(ParseAction::Shift(2))); + should_update = manager->resolve_parse_action(sym1, ParseAction::Error(), ParseAction::Shift(2)); + AssertThat(should_update, IsTrue()); - parse_action = manager->resolve_parse_action(sym1, ParseAction::Shift(2), ParseAction::Error()); - AssertThat(parse_action, Equals(ParseAction::Shift(2))); + should_update = manager->resolve_parse_action(sym1, ParseAction::Shift(2), ParseAction::Error()); + AssertThat(should_update, IsFalse()); }); describe("shift/reduce conflicts", [&]() { - it("records shift/reduce conflicts, favoring the shift", [&]() { + it("records a conflict", [&]() { manager->resolve_parse_action(sym1, ParseAction::Reduce(sym2, 1), ParseAction::Shift(2)); manager->resolve_parse_action(sym1, ParseAction::Shift(2), ParseAction::Reduce(sym2, 1)); @@ -72,16 +76,30 @@ describe("resolving parse conflicts", []() { }); it("favors the shift", [&]() { - parse_action = manager->resolve_parse_action(sym1, ParseAction::Reduce(sym2, 1), ParseAction::Shift(2)); - AssertThat(parse_action, Equals(ParseAction::Shift(2))); + should_update = manager->resolve_parse_action(sym1, ParseAction::Reduce(sym2, 1), ParseAction::Shift(2)); + AssertThat(should_update, IsTrue()); - parse_action = manager->resolve_parse_action(sym1, ParseAction::Shift(2), ParseAction::Reduce(sym2, 1)); - AssertThat(parse_action, Equals(ParseAction::Shift(2))); + should_update = manager->resolve_parse_action(sym1, ParseAction::Shift(2), ParseAction::Reduce(sym2, 1)); + AssertThat(should_update, IsFalse()); }); }); - it("records reduce/reduce conflicts, favoring the symbol listed earlier in the grammar", [&]() { - + describe("reduce/reduce conflicts", [&]() { + it("records a conflict", [&]() { + manager->resolve_parse_action(sym1, ParseAction::Reduce(sym2, 1), ParseAction::Reduce(sym1, 1)); + manager->resolve_parse_action(sym1, ParseAction::Reduce(sym1, 1), ParseAction::Reduce(sym2, 1)); + + AssertThat(manager->conflicts()[0], Equals(Conflict("rule1: reduce rule2 / reduce rule1"))); + AssertThat(manager->conflicts()[1], Equals(Conflict("rule1: reduce rule1 / reduce rule2"))); + }); + + it("favors the symbol listed earlier in the grammar", [&]() { + should_update = manager->resolve_parse_action(sym1, ParseAction::Reduce(sym2, 1), ParseAction::Reduce(sym1, 1)); + AssertThat(should_update, IsTrue()); + + should_update = manager->resolve_parse_action(sym1, ParseAction::Reduce(sym1, 1), ParseAction::Reduce(sym2, 1)); + AssertThat(should_update, IsFalse()); + }); }); }); }); diff --git a/spec/compiler/name_symbols/name_symbols_spec.cc b/spec/compiler/name_symbols/name_symbols_spec.cc index 36b93fdd..abdd3550 100644 --- a/spec/compiler/name_symbols/name_symbols_spec.cc +++ b/spec/compiler/name_symbols/name_symbols_spec.cc @@ -1,6 +1,7 @@ #include "compiler_spec_helper.h" #include "compiler/name_symbols/name_symbols.h" #include "compiler/prepared_grammar.h" +#include "compiler/rules/built_in_symbols.h" using namespace rules; using name_symbols::name_symbols; @@ -8,6 +9,13 @@ using name_symbols::name_symbols; START_TEST describe("assigning user-visible names to symbols", [&]() { + PreparedGrammar syntactic_grammar({ + { "some_syntactic_symbol", seq({ + make_shared("some_given_name", SymbolTypeNormal), + make_shared("some_generated_string_name", SymbolTypeAuxiliary), + make_shared("some_generated_pattern_name", SymbolTypeAuxiliary), }) }, + }, {}); + PreparedGrammar lexical_grammar({ { "some_given_name", str("the-string") }, }, { @@ -15,37 +23,36 @@ describe("assigning user-visible names to symbols", [&]() { { "some_generated_pattern_name", pattern("the-pattern") }, }); + map result = name_symbols::name_symbols(syntactic_grammar, lexical_grammar); + describe("for symbols that are not in the lexical grammar (syntactic rules)", [&]() { it("uses the symbol's normal name", [&]() { auto symbol = Symbol("some_syntactic_symbol"); - AssertThat(name_symbols::name_symbols({ symbol }, lexical_grammar), Equals(map({ - { symbol, "some_syntactic_symbol" } - }))); + AssertThat(result[symbol], Equals("some_syntactic_symbol")); }); }); describe("for symbols that are in the lexical grammar", [&]() { it("uses symbols' normal names when they are given by the user", [&]() { auto symbol = Symbol("some_given_name"); - AssertThat(name_symbols::name_symbols({ symbol }, lexical_grammar), Equals(map({ - { symbol, "some_given_name" } - }))); + AssertThat(result[symbol], Equals("some_given_name")); }); it("assigns names to string rules based on their string value", [&]() { auto symbol = Symbol("some_generated_string_name", rules::SymbolTypeAuxiliary); - AssertThat(name_symbols::name_symbols({ symbol }, lexical_grammar), Equals(map({ - { symbol, "'the-string'" } - }))); + AssertThat(result[symbol], Equals("'the-string'")); }); it("assigns names to pattern rules based on their pattern value", [&]() { auto symbol = Symbol("some_generated_pattern_name", rules::SymbolTypeAuxiliary); - AssertThat(name_symbols::name_symbols({ symbol }, lexical_grammar), Equals(map({ - { symbol, "/the-pattern/" } - }))); + AssertThat(result[symbol], Equals("/the-pattern/")); }); }); + + it("assigns names to the built-in symbols", [&]() { + AssertThat(result[rules::END_OF_INPUT()], Equals("EOF")); + AssertThat(result[rules::ERROR()], Equals("ERROR")); + }); }); END_TEST \ No newline at end of file diff --git a/src/compiler/build_tables/build_tables.cc b/src/compiler/build_tables/build_tables.cc index 66a80a2d..11ed9a17 100644 --- a/src/compiler/build_tables/build_tables.cc +++ b/src/compiler/build_tables/build_tables.cc @@ -32,11 +32,17 @@ namespace tree_sitter { unordered_map lex_state_ids; void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) { - for (auto transition : sym_transitions(item_set, grammar)) { - Symbol symbol = transition.first; - ParseItemSet item_set = transition.second; - ParseStateId new_state_id = add_parse_state(item_set); - parse_table.add_action(state_id, symbol, ParseAction::Shift(new_state_id)); + for (auto &transition : sym_transitions(item_set, grammar)) { + const Symbol &symbol = transition.first; + const ParseItemSet &item_set = transition.second; + + auto current_actions = parse_table.states[state_id].actions; + auto current_action = current_actions.find(symbol); + if (current_action == current_actions.end() || + conflict_manager.resolve_parse_action(symbol, current_action->second, ParseAction::Shift(0))) { + ParseStateId new_state_id = add_parse_state(item_set); + parse_table.add_action(state_id, symbol, ParseAction::Shift(new_state_id)); + } } } @@ -61,8 +67,8 @@ namespace tree_sitter { if (item.is_done()) { auto current_action = lex_table.state(state_id).default_action; auto new_action = LexAction::Accept(item.lhs); - auto action = conflict_manager.resolve_lex_action(current_action, new_action); - lex_table.add_default_action(state_id, action); + if (conflict_manager.resolve_lex_action(current_action, new_action)) + lex_table.add_default_action(state_id, new_action); } } } @@ -73,7 +79,11 @@ namespace tree_sitter { ParseAction action = (item.lhs == rules::START()) ? ParseAction::Accept() : ParseAction::Reduce(item.lhs, item.consumed_symbol_count); - parse_table.add_action(state_id, item.lookahead_sym, action); + auto current_actions = parse_table.states[state_id].actions; + auto current_action = current_actions.find(item.lookahead_sym); + if (current_action == current_actions.end() || + conflict_manager.resolve_parse_action(item.lookahead_sym, current_action->second, action)) + parse_table.add_action(state_id, item.lookahead_sym, action); } } } @@ -145,10 +155,12 @@ namespace tree_sitter { } public: - TableBuilder(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar) : + TableBuilder(const PreparedGrammar &grammar, + const PreparedGrammar &lex_grammar, + const map &rule_names) : grammar(grammar), lex_grammar(lex_grammar), - conflict_manager(ConflictManager(grammar, lex_grammar)) + conflict_manager(ConflictManager(grammar, lex_grammar, rule_names)) {} void build() { @@ -159,17 +171,21 @@ namespace tree_sitter { add_error_lex_state(); } - vector conflicts; + const vector & conflicts() { + return conflict_manager.conflicts(); + }; + ParseTable parse_table; LexTable lex_table; }; pair, vector> build_tables(const PreparedGrammar &grammar, - const PreparedGrammar &lex_grammar) { - TableBuilder builder(grammar, lex_grammar); + const PreparedGrammar &lex_grammar, + const map &rule_names) { + TableBuilder builder(grammar, lex_grammar, rule_names); builder.build(); - return { { builder.parse_table, builder.lex_table }, builder.conflicts }; + return { { builder.parse_table, builder.lex_table }, builder.conflicts() }; } } } diff --git a/src/compiler/build_tables/build_tables.h b/src/compiler/build_tables/build_tables.h index 1f9e78d1..2aad9838 100644 --- a/src/compiler/build_tables/build_tables.h +++ b/src/compiler/build_tables/build_tables.h @@ -12,7 +12,9 @@ namespace tree_sitter { namespace build_tables { std::pair, std::vector> - build_tables(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar); + build_tables(const PreparedGrammar &grammar, + const PreparedGrammar &lex_grammar, + const std::map &rule_names); } } diff --git a/src/compiler/build_tables/conflict_manager.cc b/src/compiler/build_tables/conflict_manager.cc new file mode 100644 index 00000000..c2df0121 --- /dev/null +++ b/src/compiler/build_tables/conflict_manager.cc @@ -0,0 +1,100 @@ +#include "compiler/build_tables/conflict_manager.h" +#include +#include +#include + +namespace tree_sitter { + namespace build_tables { + using rules::Symbol; + using std::vector; + using std::string; + using std::map; + + string message_for_action(const ParseAction &action, const map &rule_names) { + switch (action.type) { + case ParseActionTypeShift: + return "shift"; + case ParseActionTypeReduce: { + auto pair = rule_names.find(action.symbol); + if (pair != rule_names.end()) + return "reduce " + pair->second; + else + return "ERROR " + action.symbol.name; + } + case ParseActionTypeAccept: + return "accept"; + case ParseActionTypeError: + return "error"; + break; + } + } + + void ConflictManager::record_conflict(const rules::Symbol &symbol, const ParseAction &left, const ParseAction &right) { + conflicts_.push_back(Conflict(rule_names.find(symbol)->second + ": " + message_for_action(left, rule_names) + " / " + message_for_action(right, rule_names))); + } + + ConflictManager::ConflictManager(const PreparedGrammar &parse_grammar, + const PreparedGrammar &lex_grammar, + const map &rule_names) : + parse_grammar(parse_grammar), + lex_grammar(lex_grammar), + rule_names(rule_names) + {} + + bool ConflictManager::resolve_parse_action(const rules::Symbol &symbol, + const ParseAction &old_action, + const ParseAction &new_action) { + if (new_action.type < old_action.type) + return !resolve_parse_action(symbol, new_action, old_action); + + switch (old_action.type) { + case ParseActionTypeError: + return true; + case ParseActionTypeShift: + switch (new_action.type) { + case ParseActionTypeShift: + record_conflict(symbol, old_action, new_action); + return false; + case ParseActionTypeReduce: + record_conflict(symbol, old_action, new_action); + return false; + default: + return false; + } + case ParseActionTypeReduce: + switch (new_action.type) { + case ParseActionTypeReduce: { + record_conflict(symbol, old_action, new_action); + size_t old_index = parse_grammar.index_of(old_action.symbol); + size_t new_index = parse_grammar.index_of(new_action.symbol); + return new_index < old_index; + } + default: + return false; + } + default: + return false; + } + } + + bool ConflictManager::resolve_lex_action(const LexAction &old_action, + const LexAction &new_action) { + switch (old_action.type) { + case LexActionTypeError: + return true; + case LexActionTypeAccept: + if (new_action.type == LexActionTypeAccept) { + size_t old_index = lex_grammar.index_of(old_action.symbol); + size_t new_index = lex_grammar.index_of(new_action.symbol); + return (new_index < old_index); + } + default:; + } + return false; + } + + const vector & ConflictManager::conflicts() const { + return conflicts_; + } + } +} \ No newline at end of file diff --git a/src/compiler/build_tables/conflict_manager.cpp b/src/compiler/build_tables/conflict_manager.cpp deleted file mode 100644 index a0e47dc1..00000000 --- a/src/compiler/build_tables/conflict_manager.cpp +++ /dev/null @@ -1,92 +0,0 @@ -#include "compiler/build_tables/conflict_manager.h" -#include - -namespace tree_sitter { - namespace build_tables { - using rules::Symbol; - using std::vector; - using std::string; - - string message_for_action(const ParseAction &action) { - switch (action.type) { - case ParseActionTypeShift: - return "shift"; - case ParseActionTypeReduce: - return "reduce " + action.symbol.name; - case ParseActionTypeAccept: - return "accept"; - case ParseActionTypeError: - return "error"; - break; - } - } - - void ConflictManager::record_conflict(const rules::Symbol &symbol, const ParseAction &left, const ParseAction &right) { - conflicts_.push_back(Conflict(symbol.name + ": " + - message_for_action(left) + - " / " + - message_for_action(right))); - } - - ConflictManager::ConflictManager(const PreparedGrammar &parse_grammar, const PreparedGrammar &lex_grammar) : - parse_grammar(parse_grammar), - lex_grammar(lex_grammar) {} - - ParseAction ConflictManager::resolve_parse_action(const rules::Symbol &symbol, ParseAction left, ParseAction right) { - if (right.type < left.type) { - ParseAction swap = right; - right = left; - left = swap; - } - - switch (left.type) { - case ParseActionTypeError: - return right; - case ParseActionTypeShift: - switch (right.type) { - case ParseActionTypeShift: - record_conflict(symbol, left, right); - return left; - case ParseActionTypeReduce: - record_conflict(symbol, left, right); - return left; - default: - return left; - } - case ParseActionTypeReduce: - switch (right.type) { - case ParseActionTypeReduce: { - size_t left_index = parse_grammar.index_of(left.symbol); - size_t right_index = parse_grammar.index_of(right.symbol); - return (right_index < left_index) ? right : left; - } - default: - return right; - } - default: - return left; - } - } - - LexAction ConflictManager::resolve_lex_action(const LexAction &left, const LexAction &right) { - switch (left.type) { - case LexActionTypeError: - return right; - case LexActionTypeAccept: - if (right.type == LexActionTypeAccept) { - size_t left_index = lex_grammar.index_of(left.symbol); - size_t right_index = lex_grammar.index_of(right.symbol); - return (right_index < left_index) ? right : left; - } else { - return left; - } - default: - return left; - } - } - - const vector & ConflictManager::conflicts() const { - return conflicts_; - } - } -} \ No newline at end of file diff --git a/src/compiler/build_tables/conflict_manager.h b/src/compiler/build_tables/conflict_manager.h index c3c5253b..4947fbc2 100644 --- a/src/compiler/build_tables/conflict_manager.h +++ b/src/compiler/build_tables/conflict_manager.h @@ -2,6 +2,8 @@ #define COMPILER_BUILD_TABLES_CONFLICT_MANAGER_H_ #include +#include +#include #include "tree_sitter/compiler.h" #include "compiler/parse_table.h" #include "compiler/prepared_grammar.h" @@ -11,13 +13,19 @@ namespace tree_sitter { class ConflictManager { const PreparedGrammar parse_grammar; const PreparedGrammar lex_grammar; + const std::map rule_names; std::vector conflicts_; public: - ConflictManager(const PreparedGrammar &parse_grammar, const PreparedGrammar &lex_grammar); + ConflictManager(const PreparedGrammar &parse_grammar, + const PreparedGrammar &lex_grammar, + const std::map &rule_names); - LexAction resolve_lex_action(const LexAction &left, const LexAction &right); - ParseAction resolve_parse_action(const rules::Symbol &symbol, ParseAction left, ParseAction right); + bool resolve_lex_action(const LexAction &old_action, + const LexAction &new_action); + bool resolve_parse_action(const rules::Symbol &symbol, + const ParseAction &old_action, + const ParseAction &new_action); void record_conflict(const rules::Symbol &symbol, const ParseAction &left, const ParseAction &right); const std::vector & conflicts() const; diff --git a/src/compiler/compile.cc b/src/compiler/compile.cc index 66c669d2..8372cb62 100644 --- a/src/compiler/compile.cc +++ b/src/compiler/compile.cc @@ -15,13 +15,15 @@ namespace tree_sitter { PreparedGrammar &syntax_grammar = grammars.first; PreparedGrammar &lexical_grammar = grammars.second; - auto table_build_result = build_tables::build_tables(syntax_grammar, lexical_grammar); + auto symbol_names = name_symbols::name_symbols(syntax_grammar, lexical_grammar); + + auto table_build_result = build_tables::build_tables(syntax_grammar, lexical_grammar, symbol_names); auto tables = table_build_result.first; auto conflicts = table_build_result.second; + ParseTable &parse_table = tables.first; LexTable &lex_table = tables.second; - auto symbol_names = name_symbols::name_symbols(parse_table.symbols, lexical_grammar); return { generate_code::c_code(name, parse_table, lex_table, symbol_names), conflicts }; } } diff --git a/src/compiler/name_symbols/name_symbols.cc b/src/compiler/name_symbols/name_symbols.cc index 6ca4d252..3dbd363e 100644 --- a/src/compiler/name_symbols/name_symbols.cc +++ b/src/compiler/name_symbols/name_symbols.cc @@ -5,12 +5,16 @@ #include "compiler/rules/pattern.h" #include "compiler/rules/string.h" #include "compiler/util/string_helpers.h" +#include "compiler/rules/built_in_symbols.h" namespace tree_sitter { namespace name_symbols { using std::map; using std::set; using std::string; + using rules::Symbol; + using rules::SymbolTypeNormal; + using rules::SymbolTypeAuxiliary; class TokenName : public rules::RuleFn { protected: @@ -23,15 +27,25 @@ namespace tree_sitter { } }; - map name_symbols(const set &symbols, + map name_symbols(const PreparedGrammar &syntactic_grammar, const PreparedGrammar &lexical_grammar) { map result; - for (auto &symbol : symbols) { - string name = (symbol.is_auxiliary() && lexical_grammar.has_definition(symbol)) ? - TokenName().apply(lexical_grammar.rule(symbol)) : - symbol.name; - result.insert({ symbol, name }); - } + + for (const auto &pair : syntactic_grammar.rules) + result.insert({ Symbol(pair.first, SymbolTypeNormal), pair.first }); + for (const auto &pair : lexical_grammar.rules) + result.insert({ Symbol(pair.first, SymbolTypeNormal), pair.first }); + for (const auto &pair : syntactic_grammar.aux_rules) + result.insert({ Symbol(pair.first, SymbolTypeAuxiliary), pair.first }); + for (const auto &pair : lexical_grammar.aux_rules) + result.insert({ + Symbol(pair.first, SymbolTypeAuxiliary), + TokenName().apply(pair.second) + }); + + result.insert({ rules::END_OF_INPUT(), "EOF" }); + result.insert({ rules::ERROR(), "ERROR" }); + return result; } } diff --git a/src/compiler/name_symbols/name_symbols.h b/src/compiler/name_symbols/name_symbols.h index 81547307..8577a149 100644 --- a/src/compiler/name_symbols/name_symbols.h +++ b/src/compiler/name_symbols/name_symbols.h @@ -10,7 +10,7 @@ namespace tree_sitter { class PreparedGrammar; namespace name_symbols { - std::map name_symbols(const std::set &symbols, + std::map name_symbols(const PreparedGrammar &syntactic_grammar, const PreparedGrammar &lexical_grammar); } }