From 3982b73ed6df033b0e2e47ba1377ca4accf58455 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 8 Apr 2014 08:19:55 -0700 Subject: [PATCH] Start work on recording parse action conflicts --- include/tree_sitter/compiler.h | 11 ++- .../build_tables/conflict_manager_spec.cc | 89 ++++++++++++++++++ spec/compiler/compile_examples.cc | 8 +- src/compiler/build_tables/build_tables.cc | 35 ++++--- src/compiler/build_tables/build_tables.h | 6 +- .../build_tables/conflict_manager.cpp | 92 +++++++++++++++++++ src/compiler/build_tables/conflict_manager.h | 28 ++++++ src/compiler/compile.cc | 12 ++- src/compiler/conflict.cc | 16 ++++ src/compiler/parse_table.cc | 2 + src/compiler/parse_table.h | 3 +- src/compiler/prepared_grammar.h | 2 - 12 files changed, 278 insertions(+), 26 deletions(-) create mode 100644 spec/compiler/build_tables/conflict_manager_spec.cc create mode 100644 src/compiler/build_tables/conflict_manager.cpp create mode 100644 src/compiler/build_tables/conflict_manager.h create mode 100644 src/compiler/conflict.cc diff --git a/include/tree_sitter/compiler.h b/include/tree_sitter/compiler.h index f2a48a12..cade7a0e 100644 --- a/include/tree_sitter/compiler.h +++ b/include/tree_sitter/compiler.h @@ -31,9 +31,16 @@ namespace tree_sitter { const std::vector> rules; }; - std::ostream& operator<<(std::ostream &stream, const Grammar &grammar); + struct Conflict { + Conflict(std::string description); + std::string description; + bool operator==(const Conflict &other) const; + }; - std::string compile(const Grammar &grammar, std::string name); + std::ostream& operator<<(std::ostream &stream, const Grammar &grammar); + std::ostream& operator<<(std::ostream &stream, const Conflict &conflict); + + std::pair> compile(const Grammar &grammar, std::string name); } #endif // TREE_SITTER_COMPILER_H_ diff --git a/spec/compiler/build_tables/conflict_manager_spec.cc b/spec/compiler/build_tables/conflict_manager_spec.cc new file mode 100644 index 00000000..6b1dd6b5 --- /dev/null +++ b/spec/compiler/build_tables/conflict_manager_spec.cc @@ -0,0 +1,89 @@ +#include "compiler_spec_helper.h" +#include "compiler/build_tables/conflict_manager.h" + +using namespace rules; +using namespace build_tables; + +START_TEST + +describe("resolving parse conflicts", []() { + LexAction lex_action; + ParseAction parse_action; + ConflictManager *manager; + + PreparedGrammar parse_grammar({ + { "rule1", seq({ sym("rule2"), sym("token2") }) }, + { "rule2", sym("token1") }, + }, {}); + + PreparedGrammar lex_grammar({ + { "token1", pattern("[a-c]") }, + { "token2", pattern("[b-d]") }, + }, {}); + + before_each([&]() { + manager = new ConflictManager(parse_grammar, lex_grammar); + }); + + after_each([&]() { + delete manager; + }); + + describe("lexical conflicts", [&]() { + Symbol sym1("token1"); + Symbol sym2("token2"); + + it("favors non-errors over lexical errors", [&]() { + lex_action = manager->resolve_lex_action(LexAction::Error(), LexAction::Advance(2)); + AssertThat(lex_action, Equals(LexAction::Advance(2))); + + lex_action = manager->resolve_lex_action(LexAction::Advance(2), LexAction::Error()); + AssertThat(lex_action, Equals(LexAction::Advance(2))); + }); + + it("prefers tokens that are listed earlier in the grammar", [&]() { + lex_action = manager->resolve_lex_action(LexAction::Accept(sym1), LexAction::Accept(sym2)); + AssertThat(lex_action, Equals(LexAction::Accept(sym1))); + + lex_action = manager->resolve_lex_action(LexAction::Accept(sym2), LexAction::Accept(sym1)); + AssertThat(lex_action, Equals(LexAction::Accept(sym1))); + }); + }); + + describe("syntactic conflicts", [&]() { + Symbol sym1("rule1"); + Symbol sym2("rule2"); + + it("favors non-errors over parse errors", [&]() { + parse_action = manager->resolve_parse_action(sym1, ParseAction::Error(), ParseAction::Shift(2)); + AssertThat(parse_action, Equals(ParseAction::Shift(2))); + + parse_action = manager->resolve_parse_action(sym1, ParseAction::Shift(2), ParseAction::Error()); + AssertThat(parse_action, Equals(ParseAction::Shift(2))); + }); + + describe("shift/reduce conflicts", [&]() { + it("records shift/reduce conflicts, favoring the shift", [&]() { + manager->resolve_parse_action(sym1, ParseAction::Reduce(sym2, 1), ParseAction::Shift(2)); + manager->resolve_parse_action(sym1, ParseAction::Shift(2), ParseAction::Reduce(sym2, 1)); + + AssertThat(manager->conflicts()[0], Equals(Conflict("rule1: shift / reduce rule2"))); + AssertThat(manager->conflicts()[1], Equals(Conflict("rule1: shift / reduce rule2"))); + }); + + it("favors the shift", [&]() { + parse_action = manager->resolve_parse_action(sym1, ParseAction::Reduce(sym2, 1), ParseAction::Shift(2)); + AssertThat(parse_action, Equals(ParseAction::Shift(2))); + + parse_action = manager->resolve_parse_action(sym1, ParseAction::Shift(2), ParseAction::Reduce(sym2, 1)); + AssertThat(parse_action, Equals(ParseAction::Shift(2))); + }); + }); + + it("records reduce/reduce conflicts, favoring the symbol listed earlier in the grammar", [&]() { + + }); + }); +}); + +END_TEST \ No newline at end of file diff --git a/spec/compiler/compile_examples.cc b/spec/compiler/compile_examples.cc index 7e53d5df..1c81d6ea 100644 --- a/spec/compiler/compile_examples.cc +++ b/spec/compiler/compile_examples.cc @@ -1,6 +1,8 @@ #include "compiler_spec_helper.h" #include +#include "stream_methods.h" + static string src_dir() { const char * dir = getenv("TREESITTER_DIR"); if (!dir) dir = getenv("PWD"); @@ -22,7 +24,11 @@ describe("compiling the example grammars", []() { auto compile_grammar = [&](const Grammar &grammar, string language) { it(("compiles the " + language + " grammar").c_str(), [&]() { ofstream file(example_parser_dir + language + ".c"); - file << compile(grammar, language); + auto result = compile(grammar, language); + +// cout << "\n conflicts for " << language << "\n:" << result.second; + + file << result.first; file.close(); }); }; diff --git a/src/compiler/build_tables/build_tables.cc b/src/compiler/build_tables/build_tables.cc index 433d86aa..66a80a2d 100644 --- a/src/compiler/build_tables/build_tables.cc +++ b/src/compiler/build_tables/build_tables.cc @@ -7,6 +7,7 @@ #include "compiler/rules/metadata.h" #include "compiler/rules/repeat.h" #include "compiler/rules/seq.h" +#include "compiler/build_tables/conflict_manager.h" #include "compiler/build_tables/item.h" #include "compiler/build_tables/item_set_closure.h" #include "compiler/build_tables/item_set_transitions.h" @@ -16,6 +17,7 @@ namespace tree_sitter { using std::pair; using std::string; using std::map; + using std::vector; using std::unordered_map; using std::make_shared; using rules::Symbol; @@ -25,10 +27,9 @@ namespace tree_sitter { class TableBuilder { const PreparedGrammar grammar; const PreparedGrammar lex_grammar; + ConflictManager conflict_manager; unordered_map parse_state_ids; unordered_map lex_state_ids; - ParseTable parse_table; - LexTable lex_table; void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) { for (auto transition : sym_transitions(item_set, grammar)) { @@ -58,13 +59,10 @@ namespace tree_sitter { void add_accept_token_actions(const LexItemSet &item_set, LexStateId state_id) { for (LexItem item : item_set) { if (item.is_done()) { - const Symbol &new_symbol = item.lhs; - auto &action = lex_table.states[state_id].default_action; - if (action.type == LexActionTypeAccept) { - const Symbol &old_symbol = action.symbol; - if (lex_grammar.index_of(new_symbol) >= lex_grammar.index_of(old_symbol)) continue; - } - lex_table.add_default_action(state_id, LexAction::Accept(new_symbol)); + auto current_action = lex_table.state(state_id).default_action; + auto new_action = LexAction::Accept(item.lhs); + auto action = conflict_manager.resolve_lex_action(current_action, new_action); + lex_table.add_default_action(state_id, action); } } } @@ -147,24 +145,31 @@ namespace tree_sitter { } public: - TableBuilder(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar) : grammar(grammar), - lex_grammar(lex_grammar) {} + lex_grammar(lex_grammar), + conflict_manager(ConflictManager(grammar, lex_grammar)) + {} - pair build() { + void build() { auto start_symbol = make_shared(grammar.start_rule_name()); ParseItem item(rules::START(), start_symbol, {}, rules::END_OF_INPUT()); ParseItemSet item_set = item_set_closure(ParseItemSet({ item }), grammar); add_parse_state(item_set); add_error_lex_state(); - return pair(parse_table, lex_table); } + + vector conflicts; + ParseTable parse_table; + LexTable lex_table; }; - pair build_tables(const PreparedGrammar &grammar, + pair, vector> + build_tables(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar) { - return TableBuilder(grammar, lex_grammar).build(); + TableBuilder builder(grammar, lex_grammar); + builder.build(); + return { { builder.parse_table, builder.lex_table }, builder.conflicts }; } } } diff --git a/src/compiler/build_tables/build_tables.h b/src/compiler/build_tables/build_tables.h index 500a285d..1f9e78d1 100644 --- a/src/compiler/build_tables/build_tables.h +++ b/src/compiler/build_tables/build_tables.h @@ -2,6 +2,8 @@ #define COMPILER_BUILD_TABLES_BUILD_TABLES_H_ #include +#include +#include "tree_sitter/compiler.h" #include "compiler/parse_table.h" #include "compiler/lex_table.h" @@ -9,8 +11,8 @@ namespace tree_sitter { class PreparedGrammar; namespace build_tables { - std::pair build_tables(const PreparedGrammar &grammar, - const PreparedGrammar &lex_grammar); + std::pair, std::vector> + build_tables(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar); } } diff --git a/src/compiler/build_tables/conflict_manager.cpp b/src/compiler/build_tables/conflict_manager.cpp new file mode 100644 index 00000000..a0e47dc1 --- /dev/null +++ b/src/compiler/build_tables/conflict_manager.cpp @@ -0,0 +1,92 @@ +#include "compiler/build_tables/conflict_manager.h" +#include + +namespace tree_sitter { + namespace build_tables { + using rules::Symbol; + using std::vector; + using std::string; + + string message_for_action(const ParseAction &action) { + switch (action.type) { + case ParseActionTypeShift: + return "shift"; + case ParseActionTypeReduce: + return "reduce " + action.symbol.name; + case ParseActionTypeAccept: + return "accept"; + case ParseActionTypeError: + return "error"; + break; + } + } + + void ConflictManager::record_conflict(const rules::Symbol &symbol, const ParseAction &left, const ParseAction &right) { + conflicts_.push_back(Conflict(symbol.name + ": " + + message_for_action(left) + + " / " + + message_for_action(right))); + } + + ConflictManager::ConflictManager(const PreparedGrammar &parse_grammar, const PreparedGrammar &lex_grammar) : + parse_grammar(parse_grammar), + lex_grammar(lex_grammar) {} + + ParseAction ConflictManager::resolve_parse_action(const rules::Symbol &symbol, ParseAction left, ParseAction right) { + if (right.type < left.type) { + ParseAction swap = right; + right = left; + left = swap; + } + + switch (left.type) { + case ParseActionTypeError: + return right; + case ParseActionTypeShift: + switch (right.type) { + case ParseActionTypeShift: + record_conflict(symbol, left, right); + return left; + case ParseActionTypeReduce: + record_conflict(symbol, left, right); + return left; + default: + return left; + } + case ParseActionTypeReduce: + switch (right.type) { + case ParseActionTypeReduce: { + size_t left_index = parse_grammar.index_of(left.symbol); + size_t right_index = parse_grammar.index_of(right.symbol); + return (right_index < left_index) ? right : left; + } + default: + return right; + } + default: + return left; + } + } + + LexAction ConflictManager::resolve_lex_action(const LexAction &left, const LexAction &right) { + switch (left.type) { + case LexActionTypeError: + return right; + case LexActionTypeAccept: + if (right.type == LexActionTypeAccept) { + size_t left_index = lex_grammar.index_of(left.symbol); + size_t right_index = lex_grammar.index_of(right.symbol); + return (right_index < left_index) ? right : left; + } else { + return left; + } + default: + return left; + } + } + + const vector & ConflictManager::conflicts() const { + return conflicts_; + } + } +} \ No newline at end of file diff --git a/src/compiler/build_tables/conflict_manager.h b/src/compiler/build_tables/conflict_manager.h new file mode 100644 index 00000000..c3c5253b --- /dev/null +++ b/src/compiler/build_tables/conflict_manager.h @@ -0,0 +1,28 @@ +#ifndef COMPILER_BUILD_TABLES_CONFLICT_MANAGER_H_ +#define COMPILER_BUILD_TABLES_CONFLICT_MANAGER_H_ + +#include +#include "tree_sitter/compiler.h" +#include "compiler/parse_table.h" +#include "compiler/prepared_grammar.h" + +namespace tree_sitter { + namespace build_tables { + class ConflictManager { + const PreparedGrammar parse_grammar; + const PreparedGrammar lex_grammar; + std::vector conflicts_; + + public: + ConflictManager(const PreparedGrammar &parse_grammar, const PreparedGrammar &lex_grammar); + + LexAction resolve_lex_action(const LexAction &left, const LexAction &right); + ParseAction resolve_parse_action(const rules::Symbol &symbol, ParseAction left, ParseAction right); + + void record_conflict(const rules::Symbol &symbol, const ParseAction &left, const ParseAction &right); + const std::vector & conflicts() const; + }; + } +} + +#endif // COMPILER_BUILD_TABLES_CONFLICT_MANAGER_H_ diff --git a/src/compiler/compile.cc b/src/compiler/compile.cc index 8c294096..66c669d2 100644 --- a/src/compiler/compile.cc +++ b/src/compiler/compile.cc @@ -6,16 +6,22 @@ #include "compiler/name_symbols/name_symbols.h" namespace tree_sitter { - std::string compile(const Grammar &grammar, std::string name) { + using std::pair; + using std::string; + using std::vector; + + pair> compile(const Grammar &grammar, std::string name) { auto grammars = prepare_grammar::prepare_grammar(grammar); PreparedGrammar &syntax_grammar = grammars.first; PreparedGrammar &lexical_grammar = grammars.second; - auto tables = build_tables::build_tables(syntax_grammar, lexical_grammar); + auto table_build_result = build_tables::build_tables(syntax_grammar, lexical_grammar); + auto tables = table_build_result.first; + auto conflicts = table_build_result.second; ParseTable &parse_table = tables.first; LexTable &lex_table = tables.second; auto symbol_names = name_symbols::name_symbols(parse_table.symbols, lexical_grammar); - return generate_code::c_code(name, parse_table, lex_table, symbol_names); + return { generate_code::c_code(name, parse_table, lex_table, symbol_names), conflicts }; } } diff --git a/src/compiler/conflict.cc b/src/compiler/conflict.cc new file mode 100644 index 00000000..5b6f6886 --- /dev/null +++ b/src/compiler/conflict.cc @@ -0,0 +1,16 @@ +#include "tree_sitter/compiler.h" +#include + +namespace tree_sitter { + using std::string; + + Conflict::Conflict(string description) : description(description) {} + + bool Conflict::operator==(const tree_sitter::Conflict &other) const { + return other.description == description; + } + + std::ostream& operator<<(std::ostream &stream, const Conflict &conflict) { + return stream << "#"; + } +} \ No newline at end of file diff --git a/src/compiler/parse_table.cc b/src/compiler/parse_table.cc index 7ceacd0a..2414806e 100644 --- a/src/compiler/parse_table.cc +++ b/src/compiler/parse_table.cc @@ -18,6 +18,8 @@ namespace tree_sitter { state_index(state_index), consumed_symbol_count(consumed_symbol_count) {} + ParseAction::ParseAction() : ParseAction(ParseActionTypeError, -1, Symbol(""), {}) {} + ParseAction ParseAction::Error() { return ParseAction(ParseActionTypeError, -1, Symbol(""), {}); } diff --git a/src/compiler/parse_table.h b/src/compiler/parse_table.h index 0b52183d..bd7073f4 100644 --- a/src/compiler/parse_table.h +++ b/src/compiler/parse_table.h @@ -11,9 +11,9 @@ namespace tree_sitter { typedef enum { ParseActionTypeError, - ParseActionTypeAccept, ParseActionTypeShift, ParseActionTypeReduce, + ParseActionTypeAccept, } ParseActionType; class ParseAction { @@ -22,6 +22,7 @@ namespace tree_sitter { rules::Symbol symbol, size_t consumed_symbol_count); public: + ParseAction(); static ParseAction Accept(); static ParseAction Error(); static ParseAction Shift(size_t state_index); diff --git a/src/compiler/prepared_grammar.h b/src/compiler/prepared_grammar.h index ce78f1d9..df60126a 100644 --- a/src/compiler/prepared_grammar.h +++ b/src/compiler/prepared_grammar.h @@ -23,8 +23,6 @@ namespace tree_sitter { }; std::ostream& operator<<(std::ostream &stream, const PreparedGrammar &grammar); - - std::string compile(const Grammar &grammar, std::string name); } #endif // COMPILER_PREPARED_GRAMMAR_H_