diff --git a/project.gyp b/project.gyp index 91001727..081a3a88 100644 --- a/project.gyp +++ b/project.gyp @@ -21,7 +21,6 @@ 'src/compiler/build_tables/lookahead_set.cc', 'src/compiler/build_tables/parse_item.cc', 'src/compiler/build_tables/parse_item_set_builder.cc', - 'src/compiler/build_tables/parse_conflict_manager.cc', 'src/compiler/build_tables/rule_can_be_blank.cc', 'src/compiler/compile.cc', 'src/compiler/generate_code/c_code.cc', diff --git a/spec/compiler/build_tables/parse_conflict_manager_spec.cc b/spec/compiler/build_tables/parse_conflict_manager_spec.cc deleted file mode 100644 index c2dfe8fc..00000000 --- a/spec/compiler/build_tables/parse_conflict_manager_spec.cc +++ /dev/null @@ -1,194 +0,0 @@ -#include "spec_helper.h" -#include "compiler/rules/built_in_symbols.h" -#include "compiler/parse_table.h" -#include "compiler/build_tables/parse_conflict_manager.h" - -using namespace rules; -using namespace build_tables; - -START_TEST - -describe("ParseConflictManager", []() { - pair result; - Symbol sym1(0); - Symbol sym2(1); - Symbol lookahead_sym(1, true); - const Production production; - ParseConflictManager *conflict_manager; - - before_each([&]() { - conflict_manager = new ParseConflictManager; - }); - - after_each([&]() { - delete conflict_manager; - }); - - describe(".resolve", [&]() { - describe("errors", [&]() { - ParseAction error = ParseAction::Error(); - ParseAction non_error = ParseAction::Shift(2, { 0, 0 }); - - it("favors non-errors and reports no conflict", [&]() { - result = conflict_manager->resolve(non_error, error); - AssertThat(result.first, IsTrue()); - AssertThat(result.second, Equals(ConflictTypeNone)); - - result = conflict_manager->resolve(error, non_error); - AssertThat(result.first, IsFalse()); - AssertThat(result.second, Equals(ConflictTypeNone)); - }); - }); - - describe("shift-extra actions", [&]() { - ParseAction shift_extra = ParseAction::ShiftExtra(); - ParseAction shift = ParseAction::Shift(2, { 0, 0 }); - ParseAction reduce = ParseAction::Reduce(sym2, 1, -1, AssociativityRight, production); - - it("favors any shift action over a shift-extra actions", [&]() { - result = conflict_manager->resolve(shift, shift_extra); - AssertThat(result.first, IsTrue()); - AssertThat(result.second, Equals(ConflictTypeNone)); - - result = conflict_manager->resolve(shift_extra, shift); - AssertThat(result.first, IsFalse()); - AssertThat(result.second, Equals(ConflictTypeNone)); - }); - - it("favors any reduce action over a shift-extra actions", [&]() { - result = conflict_manager->resolve(reduce, shift_extra); - AssertThat(result.first, IsTrue()); - AssertThat(result.second, Equals(ConflictTypeNone)); - - result = conflict_manager->resolve(shift_extra, reduce); - AssertThat(result.first, IsFalse()); - AssertThat(result.second, Equals(ConflictTypeNone)); - }); - }); - - describe("shift/reduce conflicts", [&]() { - describe("when the shift has higher precedence", [&]() { - ParseAction shift = ParseAction::Shift(2, {3, 4}); - ParseAction reduce = ParseAction::Reduce(sym2, 1, 2, AssociativityLeft, production); - - it("favors the shift and reports the conflict as resolved", [&]() { - result = conflict_manager->resolve(shift, reduce); - AssertThat(result.first, IsTrue()); - AssertThat(result.second, Equals(ConflictTypeResolved)); - - result = conflict_manager->resolve(reduce, shift); - AssertThat(result.first, IsFalse()); - AssertThat(result.second, Equals(ConflictTypeResolved)); - }); - }); - - describe("when the reduce has higher precedence", [&]() { - ParseAction shift = ParseAction::Shift(2, {1, 2}); - ParseAction reduce = ParseAction::Reduce(sym2, 1, 3, AssociativityLeft, production); - - it("favors the reduce and reports the conflict as resolved", [&]() { - result = conflict_manager->resolve(shift, reduce); - AssertThat(result.first, IsFalse()); - AssertThat(result.second, Equals(ConflictTypeResolved)); - - result = conflict_manager->resolve(reduce, shift); - AssertThat(result.first, IsTrue()); - AssertThat(result.second, Equals(ConflictTypeResolved)); - }); - }); - - describe("when the precedences are equal and the reduce's rule is left associative", [&]() { - ParseAction shift = ParseAction::Shift(2, { 0, 0 }); - ParseAction reduce = ParseAction::Reduce(sym2, 1, 0, AssociativityLeft, production); - - it("favors the reduce and reports the conflict as resolved", [&]() { - result = conflict_manager->resolve(reduce, shift); - AssertThat(result.first, IsTrue()); - AssertThat(result.second, Equals(ConflictTypeResolved)); - - result = conflict_manager->resolve(shift, reduce); - AssertThat(result.first, IsFalse()); - AssertThat(result.second, Equals(ConflictTypeResolved)); - }); - }); - - describe("when the precedences are equal and the reduce's rule is right-associative", [&]() { - ParseAction shift = ParseAction::Shift(2, { 0, 0 }); - ParseAction reduce = ParseAction::Reduce(sym2, 1, 0, AssociativityRight, production); - - it("favors the shift, and reports the conflict as resolved", [&]() { - result = conflict_manager->resolve(reduce, shift); - AssertThat(result.first, IsFalse()); - AssertThat(result.second, Equals(ConflictTypeResolved)); - - result = conflict_manager->resolve(shift, reduce); - AssertThat(result.first, IsTrue()); - AssertThat(result.second, Equals(ConflictTypeResolved)); - }); - }); - - describe("when the precedences are equal and the reduce's rule has no associativity", [&]() { - it("reports an unresolved conflict", [&]() { - ParseAction shift = ParseAction::Shift(2, { 0, 0 }); - ParseAction reduce = ParseAction::Reduce(Symbol(2), 1, 0, AssociativityNone, production); - - result = conflict_manager->resolve(reduce, shift); - AssertThat(result.first, IsFalse()); - AssertThat(result.second, Equals(ConflictTypeUnresolved)); - - result = conflict_manager->resolve(shift, reduce); - AssertThat(result.first, IsTrue()); - }); - }); - - describe("when the shift has conflicting precedences compared to the reduce", [&]() { - ParseAction shift = ParseAction::Shift(2, { 1, 3 }); - ParseAction reduce = ParseAction::Reduce(Symbol(2), 1, 2, AssociativityLeft, production); - - it("returns false and reports an unresolved conflict", [&]() { - result = conflict_manager->resolve(reduce, shift); - AssertThat(result.first, IsFalse()); - AssertThat(result.second, Equals(ConflictTypeUnresolved)); - - result = conflict_manager->resolve(shift, reduce); - AssertThat(result.first, IsTrue()); - AssertThat(result.second, Equals(ConflictTypeUnresolved)); - }); - }); - }); - - describe("reduce/reduce conflicts", [&]() { - describe("when one action has higher precedence", [&]() { - ParseAction left = ParseAction::Reduce(sym2, 1, 0, AssociativityLeft, production); - ParseAction right = ParseAction::Reduce(sym2, 1, 2, AssociativityLeft, production); - - it("favors that action", [&]() { - result = conflict_manager->resolve(left, right); - AssertThat(result.first, IsFalse()); - AssertThat(result.second, Equals(ConflictTypeResolved)); - - result = conflict_manager->resolve(right, left); - AssertThat(result.first, IsTrue()); - AssertThat(result.second, Equals(ConflictTypeResolved)); - }); - }); - - describe("when the actions have the same precedence", [&]() { - it("returns false and reports a conflict", [&]() { - ParseAction left = ParseAction::Reduce(Symbol(2), 1, 0, AssociativityLeft, production); - ParseAction right = ParseAction::Reduce(Symbol(3), 1, 0, AssociativityLeft, production); - - result = conflict_manager->resolve(right, left); - AssertThat(result.first, IsFalse()); - AssertThat(result.second, Equals(ConflictTypeUnresolved)); - - result = conflict_manager->resolve(left, right); - AssertThat(result.first, IsFalse()); - AssertThat(result.second, Equals(ConflictTypeUnresolved)); - }); - }); - }); - }); -}); - -END_TEST diff --git a/spec/compiler/build_tables/parse_item_spec.cc b/spec/compiler/build_tables/parse_item_spec.cc deleted file mode 100644 index 83c9121a..00000000 --- a/spec/compiler/build_tables/parse_item_spec.cc +++ /dev/null @@ -1,156 +0,0 @@ -#include "spec_helper.h" -#include "compiler/build_tables/parse_item.h" -#include "compiler/syntax_grammar.h" -#include "helpers/rule_helpers.h" - -using namespace rules; -using namespace build_tables; - -START_TEST - -describe("ParseItem::completion_status()", [&]() { - SyntaxGrammar grammar{{ - SyntaxVariable("rule_0", VariableTypeNamed, { - Production({ - {Symbol(11, true), 0, AssociativityNone}, - {Symbol(12, true), 0, AssociativityNone}, - {Symbol(13), 0, AssociativityNone}, - {Symbol(14, true), 4, AssociativityLeft}, - }), - Production({ - {Symbol(15, true), 0, AssociativityNone}, - {Symbol(16, true), 0, AssociativityNone}, - {Symbol(17, true), 5, AssociativityRight}, - }), - Production({}), - }), - }, {}, {}}; - - auto production = [&](int variable_index, int production_index) -> const Production & { - return grammar.variables[variable_index].productions[production_index]; - }; - - it("indicates whether the parse item is done, and its associativity and precedence", [&]() { - ParseItem item(Symbol(0), production(0, 0), 3); - AssertThat(item.completion_status().is_done, IsFalse()); - AssertThat(item.completion_status().precedence, Equals(0)); - AssertThat(item.completion_status().associativity, Equals(AssociativityNone)); - - item = ParseItem(Symbol(0), production(0, 0), 4); - AssertThat(item.completion_status().is_done, IsTrue()); - AssertThat(item.completion_status().precedence, Equals(4)); - AssertThat(item.completion_status().associativity, Equals(AssociativityLeft)); - - item = ParseItem(Symbol(0), production(0, 1), 3); - AssertThat(item.completion_status().is_done, IsTrue()); - AssertThat(item.completion_status().precedence, Equals(5)); - AssertThat(item.completion_status().associativity, Equals(AssociativityRight)); - - item = ParseItem(Symbol(0), production(0, 2), 0); - AssertThat(item.completion_status().is_done, IsTrue()); - AssertThat(item.completion_status().precedence, Equals(0)); - AssertThat(item.completion_status().associativity, Equals(AssociativityNone)); - }); -}); - -describe("ParseItemSet::transitions())", [&]() { - SyntaxGrammar grammar{{ - SyntaxVariable("rule_0", VariableTypeNamed, { - Production({ - {Symbol(11, true), 0, AssociativityNone}, - {Symbol(12, true), 0, AssociativityNone}, - {Symbol(13), 5, AssociativityNone}, - {Symbol(14, true), 0, AssociativityNone}, - }), - Production({ - {Symbol(11, true), 0, AssociativityNone}, - {Symbol(12, true), 0, AssociativityNone}, - {Symbol(15), 6, AssociativityNone}, - }) - }), - SyntaxVariable("rule_1", VariableTypeNamed, { - Production({ - {Symbol(15), 7, AssociativityNone}, - {Symbol(16, true), 0, AssociativityNone}, - }) - }), - SyntaxVariable("rule_2", VariableTypeNamed, { - Production({ - {Symbol(18, true), 0, AssociativityNone}, - }) - }) - }, {}, {}}; - - auto production = [&](int variable_index, int production_index) -> const Production & { - return grammar.variables[variable_index].productions[production_index]; - }; - - it("computes the ParseItemSet that would occur after consuming each lookahead symbol, along with its precedence", [&]() { - ParseItemSet item_set({ - - // Two symbols into the first production for rule_0 - { - ParseItem(Symbol(0), production(0, 0), 2), - LookaheadSet({ 21 }) - }, - - // Two symbols into the second production for rule_0 - { - ParseItem(Symbol(0), production(0, 1), 2), - LookaheadSet({ 21 }) - }, - - // At the beginning of the first production for rule_1 - { - ParseItem(Symbol(1), production(1, 0), 0), - LookaheadSet({ 22 }) - }, - - // At the end of the first production for rule_2 - { - ParseItem(Symbol(2), production(2, 0), 1), - LookaheadSet({ 22 }) - } - }); - - AssertThat(item_set.transitions(), Equals(ParseItemSet::TransitionMap({ - - // For the first item, symbol 13 is next, with precedence 5. - { - Symbol(13), - { - ParseItemSet({ - { - ParseItem(Symbol(0), production(0, 0), 3), - LookaheadSet({ 21 }) - } - }), - PrecedenceRange(5, 5) - } - }, - - // For the second and third item, symbol 15 is next, with two different - // precedence values. - { - Symbol(15), - { - ParseItemSet({ - { - ParseItem(Symbol(0), production(0, 1), 3), - LookaheadSet({ 21 }) - }, - { - ParseItem(Symbol(1), production(1, 0), 1), - LookaheadSet({ 22 }) - }, - }), - PrecedenceRange(6, 7) - } - }, - - // The third item is at the end of its production: no transitions. - }))); - }); -}); - -END_TEST diff --git a/spec/compiler/prepare_grammar/flatten_grammar_spec.cc b/spec/compiler/prepare_grammar/flatten_grammar_spec.cc index eab2b61c..3efd4e03 100644 --- a/spec/compiler/prepare_grammar/flatten_grammar_spec.cc +++ b/spec/compiler/prepare_grammar/flatten_grammar_spec.cc @@ -2,153 +2,87 @@ #include "compiler/prepare_grammar/flatten_grammar.h" #include "compiler/prepare_grammar/initial_syntax_grammar.h" #include "compiler/syntax_grammar.h" -#include "compiler/rules/built_in_symbols.h" #include "helpers/rule_helpers.h" - -template -vector::type> collect(const vector &v, Func f) { - vector::type> result; - for (const T &item : v) - result.push_back(f(item)); - return result; -} +#include "helpers/stream_methods.h" START_TEST using namespace rules; -using prepare_grammar::flatten_grammar; -using prepare_grammar::InitialSyntaxGrammar; +using prepare_grammar::flatten_rule; describe("flatten_grammar", []() { - auto get_symbol_sequences = [&](vector productions) { - return collect(productions, [](Production p) { - return collect(p, [](ProductionStep e) { - return e.symbol; - }); - }); - }; + it("associates each symbol with the precedence and associativity binding it to its successor", [&]() { + SyntaxVariable result = flatten_rule(Variable( + "test", + VariableTypeNamed, + seq({ + i_sym(1), + prec_left(101, seq({ + i_sym(2), + choice({ + prec_right(102, seq({ + i_sym(3), + i_sym(4) + })), + i_sym(5), + }), + i_sym(6), + })), + i_sym(7), + }) + )); - auto get_precedence_sequences = [&](vector productions) { - return collect(productions, [](Production p) { - return collect(p, [](ProductionStep e) { - return e.precedence; - }); - }); - }; + AssertThat(result.name, Equals("test")); + AssertThat(result.type, Equals(VariableTypeNamed)); + AssertThat(result.productions, Equals(vector({ + Production({ + {Symbol(1), 0, AssociativityNone}, + {Symbol(2), 101, AssociativityLeft}, + {Symbol(3), 102, AssociativityRight}, + {Symbol(4), 101, AssociativityLeft}, + {Symbol(6), 0, AssociativityNone}, + {Symbol(7), 0, AssociativityNone}, + }), + Production({ + {Symbol(1), 0, AssociativityNone}, + {Symbol(2), 101, AssociativityLeft}, + {Symbol(5), 101, AssociativityLeft}, + {Symbol(6), 0, AssociativityNone}, + {Symbol(7), 0, AssociativityNone}, + }) + }))) + }); - auto get_associativity_sequences = [&](vector productions) { - return collect(productions, [](Production p) { - return collect(p, [](ProductionStep e) { - return e.associativity; - }); - }); - }; - - InitialSyntaxGrammar input_grammar{{ - - // Choices within rules are extracted, resulting in multiple productions. - Variable("variable0", VariableTypeNamed, seq({ - i_sym(1), - choice({ i_sym(2), i_sym(3) }), - i_sym(4), - })), - - // When multiple precedence values are nested, the inner precedence wins. - Variable("variable1", VariableTypeNamed, seq({ - i_sym(1), + it("uses the last assigned precedence", [&]() { + SyntaxVariable result = flatten_rule(Variable( + "test1", + VariableTypeNamed, prec_left(101, seq({ - i_sym(2), - choice({ - prec_right(102, seq({ - i_sym(3), - i_sym(4) - })), - i_sym(5), - }), - i_sym(6), - })), - i_sym(7), - })), - - // When a precedence is applied to the end of a rule, its value is assigned - // to the last step of the corresponding production. - Variable("variable2", VariableTypeHidden, seq({ - prec_left(102, seq({ i_sym(1), i_sym(2), - })), - prec_left(103, seq({ - i_sym(3), - i_sym(4), - })), - })) - }, {}, {}}; + })) + )); - SyntaxGrammar grammar = flatten_grammar(input_grammar); + AssertThat(result.productions, Equals(vector({ + Production({ + {Symbol(1), 101, AssociativityLeft}, + {Symbol(2), 101, AssociativityLeft}, + }) + }))) - it("preserves the names and types of the grammar's variables", [&]() { - AssertThat(grammar.variables[0].name, Equals("variable0")); - AssertThat(grammar.variables[1].name, Equals("variable1")); - AssertThat(grammar.variables[2].name, Equals("variable2")); + result = flatten_rule(Variable( + "test2", + VariableTypeNamed, + prec_left(101, seq({ + i_sym(1), + })) + )); - AssertThat(grammar.variables[0].type, Equals(VariableTypeNamed)); - AssertThat(grammar.variables[1].type, Equals(VariableTypeNamed)); - AssertThat(grammar.variables[2].type, Equals(VariableTypeHidden)); - }); - - it("turns each variable's rule with a vector of possible symbol sequences", [&]() { - AssertThat( - get_symbol_sequences(grammar.variables[0].productions), - Equals(vector>({ - { Symbol(1), Symbol(2), Symbol(4) }, - { Symbol(1), Symbol(3), Symbol(4) } - }))); - - AssertThat( - get_symbol_sequences(grammar.variables[1].productions), - Equals(vector>({ - { Symbol(1), Symbol(2), Symbol(3), Symbol(4), Symbol(6), Symbol(7) }, - { Symbol(1), Symbol(2), Symbol(5), Symbol(6), Symbol(7) } - }))); - - AssertThat( - get_symbol_sequences(grammar.variables[2].productions), - Equals(vector>({ - { Symbol(1), Symbol(2), Symbol(3), Symbol(4) }, - }))); - }); - - it("associates each symbol with the precedence binding it to its previous neighbor", [&]() { - AssertThat( - get_precedence_sequences(grammar.variables[0].productions), - Equals(vector>({ - { 0, 0, 0 }, - { 0, 0, 0 } - }))); - - AssertThat( - get_precedence_sequences(grammar.variables[1].productions), - Equals(vector>({ - { 0, 101, 102, 101, 0, 0 }, - { 0, 101, 101, 0, 0 } - }))); - - AssertThat( - get_precedence_sequences(grammar.variables[2].productions), - Equals(vector>({ - { 102, 0, 103, 103 }, - }))); - }); - - it("associates each symbol with the correct associativity", [&]() { - Associativity none = AssociativityNone; - - AssertThat( - get_associativity_sequences(grammar.variables[1].productions), - Equals(vector>({ - { none, AssociativityLeft, AssociativityRight, AssociativityLeft, none, none }, - { none, AssociativityLeft, AssociativityLeft, none, none } - }))); + AssertThat(result.productions, Equals(vector({ + Production({ + {Symbol(1), 101, AssociativityLeft}, + }) + }))) }); }); diff --git a/spec/helpers/load_language.cc b/spec/helpers/load_language.cc index 8276caaf..c7e0317b 100644 --- a/spec/helpers/load_language.cc +++ b/spec/helpers/load_language.cc @@ -8,6 +8,7 @@ #include #include #include +#include #include "tree_sitter/compiler.h" using std::map; @@ -18,6 +19,7 @@ using std::istreambuf_iterator; map loaded_languages; int libcompiler_mtime = -1; +int compile_result_count = 0; const char *libcompiler_path = #if defined(__linux) @@ -63,24 +65,17 @@ static int get_modified_time(const string &path) { return file_stat.st_mtime; } -const TSLanguage *load_language(const string &name, const string &code, int timestamp) { - mkdir("out/tmp", 0777); - - string pwd(getenv("PWD")); - string language_function_name = "ts_language_" + name; - string header_dir = pwd + "/include"; - string source_filename = pwd + "/out/tmp/" + name + ".c"; - string obj_filename = source_filename + ".o"; - string lib_filename = source_filename + ".so"; - - int lib_mtime = get_modified_time(lib_filename); +const TSLanguage *load_language(const string &source_filename, + const string &lib_filename, + const string &language_name) { + string language_function_name = "ts_language_" + language_name; + string header_dir = getenv("PWD") + string("/include"); + int source_mtime = get_modified_time(source_filename); int header_mtime = get_modified_time(header_dir + "/tree_sitter/parser.h"); - if (!timestamp || !header_mtime || lib_mtime < timestamp || lib_mtime < header_mtime) { - ofstream source_file; - source_file.open(source_filename); - source_file << code; - source_file.close(); + int lib_mtime = get_modified_time(lib_filename); + if (!header_mtime || lib_mtime < header_mtime || lib_mtime < source_mtime) { + string obj_filename = lib_filename + ".o"; const char *compiler_name = getenv("CC"); if (!compiler_name) { compiler_name = "gcc"; @@ -135,13 +130,23 @@ const TSLanguage *load_language(const string &name, const string &code, int time return language_fn(); } -const TSLanguage *load_language(const string &name, const TSCompileResult &compile_result) { +const TSLanguage *load_compile_result(const string &name, const TSCompileResult &compile_result) { if (compile_result.error_type != TSCompileErrorTypeNone) { Assert::Failure(string("Compilation failed ") + compile_result.error_message); return nullptr; } - const TSLanguage *language = load_language(name, compile_result.code, 0); + mkdir("out/tmp", 0777); + string source_filename = "out/tmp/compile-result-" + to_string(compile_result_count) + ".c"; + string lib_filename = source_filename + ".so"; + compile_result_count++; + + ofstream source_file; + source_file.open(source_filename); + source_file << compile_result.code; + source_file.close(); + + const TSLanguage *language = load_language(source_filename, lib_filename, name); free(compile_result.code); return language; } @@ -150,12 +155,6 @@ const TSLanguage *get_test_language(const string &language_name) { if (loaded_languages[language_name]) return loaded_languages[language_name]; - if (libcompiler_mtime == -1) { - libcompiler_mtime = get_modified_time(libcompiler_path); - if (!libcompiler_mtime) - return nullptr; - } - string language_dir = string("spec/fixtures/grammars/") + language_name; string grammar_filename = language_dir + "/src/grammar.json"; string parser_filename = language_dir + "/src/parser.c"; @@ -164,19 +163,21 @@ const TSLanguage *get_test_language(const string &language_name) { if (!grammar_mtime) return nullptr; + if (libcompiler_mtime == -1) { + libcompiler_mtime = get_modified_time(libcompiler_path); + if (!libcompiler_mtime) + return nullptr; + } + int parser_mtime = get_modified_time(parser_filename); - int input_mtime = (grammar_mtime > libcompiler_mtime) ? - grammar_mtime : - libcompiler_mtime; - - string parser_code; - if (!parser_mtime || parser_mtime < input_mtime) { + if (parser_mtime < grammar_mtime || parser_mtime < libcompiler_mtime) { printf("\n" "Regenerating the %s parser...\n", language_name.c_str()); ifstream grammar_file(grammar_filename); istreambuf_iterator grammar_file_iterator(grammar_file), end_iterator; - std::string grammar_json(grammar_file_iterator, end_iterator); + string grammar_json(grammar_file_iterator, end_iterator); + grammar_file.close(); TSCompileResult result = ts_compile_grammar(grammar_json.c_str()); if (result.error_type != TSCompileErrorTypeNone) { @@ -186,17 +187,11 @@ const TSLanguage *get_test_language(const string &language_name) { ofstream parser_file(parser_filename); parser_file << result.code; - parser_code = result.code; - - grammar_file.close(); parser_file.close(); - } else { - ifstream parser_file(parser_filename); - istreambuf_iterator grammar_file_iterator(parser_file), end_iterator; - parser_code.assign(grammar_file_iterator, end_iterator); } - const TSLanguage *language = load_language(language_name, parser_code, input_mtime); + string lib_filename = "out/tmp/" + language_name + ".so"; + const TSLanguage *language = load_language(parser_filename, lib_filename, language_name); loaded_languages[language_name] = language; return language; }; diff --git a/spec/helpers/load_language.h b/spec/helpers/load_language.h index 37102df1..41b1458e 100644 --- a/spec/helpers/load_language.h +++ b/spec/helpers/load_language.h @@ -5,7 +5,7 @@ #include "tree_sitter/runtime.h" #include -const TSLanguage *load_language(const std::string &, const TSCompileResult &); +const TSLanguage *load_compile_result(const std::string &, const TSCompileResult &); const TSLanguage *get_test_language(const std::string &language_name); #endif // HELPERS_LOAD_LANGUAGE_H_ diff --git a/spec/helpers/stream_methods.cc b/spec/helpers/stream_methods.cc index 69483ed3..cf35512d 100644 --- a/spec/helpers/stream_methods.cc +++ b/spec/helpers/stream_methods.cc @@ -65,8 +65,7 @@ ostream &operator<<(ostream &stream, const ParseAction &action) { case ParseActionTypeAccept: return stream << string("#"); case ParseActionTypeShift: - return stream << string("#"; + return stream << string("#"; case ParseActionTypeReduce: return stream << ("#"); @@ -87,7 +86,16 @@ ostream &operator<<(ostream &stream, const ParseState &state) { } ostream &operator<<(ostream &stream, const ProductionStep &step) { - return stream << string("(production_step symbol:") << step.symbol << string(" precedence:") << to_string(step.precedence) << ")"; + stream << "(symbol: " << step.symbol << ", precedence:" << to_string(step.precedence); + stream << ", associativity: "; + switch (step.associativity) { + case rules::AssociativityLeft: + return stream << "left)"; + case rules::AssociativityRight: + return stream << "right)"; + default: + return stream << "none)"; + } } ostream &operator<<(ostream &stream, const PrecedenceRange &range) { diff --git a/spec/integration/compile_grammar_spec.cc b/spec/integration/compile_grammar_spec.cc index a8f8988d..0d55bd56 100644 --- a/spec/integration/compile_grammar_spec.cc +++ b/spec/integration/compile_grammar_spec.cc @@ -1,6 +1,26 @@ #include "spec_helper.h" #include "runtime/alloc.h" #include "helpers/load_language.h" +#include "compiler/util/string_helpers.h" +#include + +static string dedent(string input) { + size_t indent_level = input.find_first_not_of("\n ") - input.find_first_not_of("\n"); + string whitespace = "\n" + string(indent_level, ' '); + util::str_replace(&input, whitespace, "\n"); + return input.substr( + input.find_first_not_of("\n "), + input.find_last_not_of("\n ") + 1 + ); +} + +static string fill_template(string input, map parameters) { + string result = input; + for (const auto &pair : parameters) { + util::str_replace(&result, "{{" + pair.first + "}}", pair.second); + } + return result; +} START_TEST @@ -22,6 +42,212 @@ describe("compile_grammar", []() { ts_free(node_string); }; + describe("conflicts", [&]() { + it("can resolve shift/reduce conflicts using associativities", [&]() { + string grammar_template = R"JSON({ + "name": "associativity_example", + + "rules": { + "expression": { + "type": "CHOICE", + "members": [ + {"type": "SYMBOL", "name": "math_operation"}, + {"type": "SYMBOL", "name": "identifier"} + ] + }, + + "math_operation": { + "type": "{{math_operation_prec_type}}", + "value": 0, + "content": { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "expression"}, + {"type": "STRING", "value": "+"}, + {"type": "SYMBOL", "name": "expression"} + ] + } + }, + + "identifier": { + "type": "PATTERN", + "value": "[a-zA-Z]+" + } + } + })JSON"; + + // Ambiguity, which '+' applies first? + ts_document_set_input_string(document, "x+y+z"); + + TSCompileResult result = ts_compile_grammar(fill_template(grammar_template, { + {"math_operation_prec_type", "PREC"} + }).c_str()); + + AssertThat(result.error_message, Equals(dedent(R"MESSAGE( + Unresolved conflict for symbol sequence: + + expression '+' expression • '+' … + + Possible interpretations: + + (math_operation expression '+' expression) • '+' … + + expression '+' (math_operation expression • '+' expression) + + Possible resolutions: + + Specify left or right associativity in the rules: math_operation + + Add a conflict for the rules: math_operation + )MESSAGE"))); + + result = ts_compile_grammar(fill_template(grammar_template, { + {"math_operation_prec_type", "PREC_LEFT"} + }).c_str()); + + ts_document_set_language(document, load_compile_result("associativity_example", result)); + ts_document_parse(document); + assert_root_node("(expression (math_operation " + "(expression (math_operation (expression (identifier)) (expression (identifier)))) " + "(expression (identifier))))"); + + result = ts_compile_grammar(fill_template(grammar_template, { + {"math_operation_prec_type", "PREC_RIGHT"} + }).c_str()); + + ts_document_set_language(document, load_compile_result("associativity_example", result)); + ts_document_parse(document); + assert_root_node("(expression (math_operation " + "(expression (identifier)) " + "(expression (math_operation (expression (identifier)) (expression (identifier))))))"); + }); + + it("can resolve shift/reduce conflicts involving single-child rules using precedence", [&]() { + string grammar_template = R"JSON({ + "name": "associativity_example", + + "extras": [ + {"type": "PATTERN", "value": "\\s"} + ], + + "rules": { + "expression": { + "type": "CHOICE", + "members": [ + {"type": "SYMBOL", "name": "function_call"}, + {"type": "SYMBOL", "name": "identifier"} + ] + }, + + "function_call": { + "type": "PREC_RIGHT", + "value": {{function_call_precedence}}, + "content": { + "type": "CHOICE", + "members": [ + { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "identifier"}, + {"type": "SYMBOL", "name": "expression"} + ] + }, + { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "identifier"}, + {"type": "SYMBOL", "name": "block"} + ] + }, + { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "identifier"}, + {"type": "SYMBOL", "name": "expression"}, + {"type": "SYMBOL", "name": "block"} + ] + } + ] + } + }, + + "block": { + "type": "SEQ", + "members": [ + {"type": "STRING", "value": "{"}, + {"type": "SYMBOL", "name": "expression"}, + {"type": "STRING", "value": "}"} + ] + }, + + "identifier": { + "type": "PATTERN", + "value": "[a-zA-Z]+" + } + } + })JSON"; + + // Ambiguity: is the trailing block associated with `bar` or `foo`? + ts_document_set_input_string(document, "foo bar { baz }"); + + TSCompileResult result = ts_compile_grammar(fill_template(grammar_template, { + {"function_call_precedence", "0"} + }).c_str()); + + AssertThat(result.error_message, Equals(dedent(R"MESSAGE( + Unresolved conflict for symbol sequence: + + identifier • '{' … + + Possible interpretations: + + (expression identifier) • '{' … + + (function_call identifier • block) + + Possible resolutions: + + Use different precedences in the rules: expression function_call + + Specify left or right associativity in the rules: expression + + Add a conflict for the rules: expression function_call + )MESSAGE"))); + + // Giving function calls lower precedence than expressions causes `bar` + // to be treated as an expression passed to `foo`, not as a function + // that's being called with a block. + result = ts_compile_grammar(fill_template(grammar_template, { + {"function_call_precedence", "-1"} + }).c_str()); + + AssertThat(result.error_message, IsNull()); + ts_document_set_language(document, load_compile_result("associativity_example", result)); + ts_document_parse(document); + assert_root_node("(expression (function_call " + "(identifier) " + "(expression (identifier)) " + "(block (expression (identifier)))))"); + + // Giving function calls higher precedence than expressions causes `bar` + // to be treated as a function that's being called with a block, not as + // an expression passed to `foo`. + result = ts_compile_grammar(fill_template(grammar_template, { + {"function_call_precedence", "1"} + }).c_str()); + + AssertThat(result.error_message, IsNull()); + ts_document_set_language(document, load_compile_result("associativity_example", result)); + ts_document_set_input_string(document, "foo bar { baz }"); + ts_document_parse(document); + assert_root_node("(expression (function_call " + "(identifier) " + "(expression (function_call " + "(identifier) " + "(block (expression (identifier)))))))"); + }); + }); + describe("when the grammar's start symbol is a token", [&]() { it("parses the token", [&]() { TSCompileResult result = ts_compile_grammar(R"JSON( @@ -33,7 +259,7 @@ describe("compile_grammar", []() { } )JSON"); - ts_document_set_language(document, load_language("one_token_language", result)); + ts_document_set_language(document, load_compile_result("one_token_language", result)); ts_document_set_input_string(document, "the-value"); ts_document_parse(document); @@ -52,7 +278,7 @@ describe("compile_grammar", []() { } )JSON"); - ts_document_set_language(document, load_language("blank_language", result)); + ts_document_set_language(document, load_compile_result("blank_language", result)); ts_document_set_input_string(document, ""); ts_document_parse(document); @@ -79,7 +305,7 @@ describe("compile_grammar", []() { } )JSON"); - ts_document_set_language(document, load_language("escaped_char_language", result)); + ts_document_set_language(document, load_compile_result("escaped_char_language", result)); ts_document_set_input_string(document, "1234"); ts_document_parse(document); @@ -167,7 +393,7 @@ describe("compile_grammar", []() { } )JSON"); - const TSLanguage *language = load_language("arithmetic", result); + const TSLanguage *language = load_compile_result("arithmetic", result); ts_document_set_language(document, language); ts_document_set_input_string(document, "a + b * c"); diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index 17fbdea6..d50deb5b 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -1,4 +1,5 @@ #include "compiler/build_tables/build_parse_table.h" + #include #include #include #include @@ -6,7 +7,6 @@ #include #include #include "compiler/parse_table.h" -#include "compiler/build_tables/parse_conflict_manager.h" #include "compiler/build_tables/remove_duplicate_states.h" #include "compiler/build_tables/parse_item.h" #include "compiler/build_tables/parse_item_set_builder.h" @@ -28,13 +28,13 @@ using std::string; using std::to_string; using std::unordered_map; using std::make_shared; +using rules::Associativity; using rules::Symbol; using rules::END_OF_INPUT; class ParseTableBuilder { const SyntaxGrammar grammar; const LexicalGrammar lexical_grammar; - ParseConflictManager conflict_manager; unordered_map recovery_states; unordered_map parse_state_ids; vector> item_sets_to_process; @@ -95,13 +95,10 @@ class ParseTableBuilder { item_sets_to_process.pop_back(); item_set_builder.apply_transitive_closure(&item_set); - add_reduce_actions(item_set, state_id); - add_shift_actions(item_set, state_id); - add_shift_extra_actions(state_id); + string conflict = add_actions(item_set, state_id); - if (!conflicts.empty()) { - return CompileError(TSCompileErrorTypeParseConflict, - "Unresolved conflict.\n\n" + *conflicts.begin()); + if (!conflict.empty()) { + return CompileError(TSCompileErrorTypeParseConflict, conflict); } } @@ -155,81 +152,116 @@ class ParseTableBuilder { } } - void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) { - for (const auto &transition : item_set.transitions()) { - const Symbol &symbol = transition.first; - const ParseItemSet &next_item_set = transition.second.first; - const PrecedenceRange &precedence = transition.second.second; + string add_actions(const ParseItemSet &item_set, ParseStateId state_id) { + map terminal_successors; + map nonterminal_successors; + set lookaheads_with_conflicts; - if (!allow_any_conflict) { - recovery_states[symbol].add(next_item_set); - } - - if (symbol.is_token) { - ParseAction *new_action = add_terminal_action( - state_id, symbol.index, ParseAction::Shift(0, precedence), item_set); - if (new_action) { - new_action->state_index = add_parse_state(next_item_set); - } - } else { - ParseStateId next_state = add_parse_state(next_item_set); - parse_table.set_nonterminal_action(state_id, symbol.index, next_state); - } - } - } - - void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) { for (const auto &pair : item_set.entries) { const ParseItem &item = pair.first; - const auto &lookahead_symbols = pair.second; + const LookaheadSet &lookahead_symbols = pair.second; - ParseItem::CompletionStatus status = item.completion_status(); - if (status.is_done) { - ParseAction action; - if (item.lhs() == rules::START()) { - action = ParseAction::Accept(); - } else { - action = ParseAction::Reduce(Symbol(item.variable_index), - item.step_index, status.precedence, - status.associativity, *item.production); + // If the item is finished, immediately add a Reduce or Accept action to + // the parse table for each of its lookahead terminals. + if (item.is_done()) { + ParseAction action = (item.lhs() == rules::START()) ? + ParseAction::Accept() : + ParseAction::Reduce(item.lhs(), item.step_index, *item.production); + + int precedence = item.precedence(); + for (const Symbol::Index lookahead : *lookahead_symbols.entries) { + ParseTableEntry &entry = parse_table.states[state_id].terminal_entries[lookahead]; + + if (entry.actions.empty()) { + parse_table.add_terminal_action(state_id, lookahead, action); + } else { + ParseAction &existing_action = entry.actions[0]; + if (allow_any_conflict) { + entry.actions.push_back(action); + } else { + int existing_precedence = existing_action.precedence(); + if (precedence > existing_precedence) { + for (const ParseAction &old_action : entry.actions) + fragile_productions.insert(old_action.production); + entry.actions.clear(); + entry.actions.push_back(action); + } else if (precedence == existing_precedence) { + lookaheads_with_conflicts.insert(lookahead); + entry.actions.push_back(action); + } else { + fragile_productions.insert(item.production); + } + } + } } - for (const Symbol::Index lookahead : *lookahead_symbols.entries) { - add_terminal_action(state_id, lookahead, action, item_set); + // If the item is unfinished, create a new item by advancing one symbol. + // Add that new item to a successor item set. + } else { + Symbol symbol = item.production->at(item.step_index).symbol; + ParseItem new_item(item.lhs(), *item.production, item.step_index + 1); + + if (symbol.is_token) { + terminal_successors[symbol.index].entries[new_item] = lookahead_symbols; + } else { + nonterminal_successors[symbol.index].entries[new_item] = lookahead_symbols; } } } - } - void add_shift_extra_actions(ParseStateId state_id) { - ParseAction action = ParseAction::ShiftExtra(); + for (auto &pair : terminal_successors) { + Symbol::Index lookahead = pair.first; + ParseItemSet &next_item_set = pair.second; + ParseStateId next_state_id = add_parse_state(next_item_set); + bool had_existing_action = !parse_table.states[state_id].terminal_entries[lookahead].actions.empty(); + parse_table.add_terminal_action(state_id, lookahead, ParseAction::Shift(next_state_id)); + if (!allow_any_conflict) { + if (had_existing_action) { + lookaheads_with_conflicts.insert(lookahead); + } + recovery_states[Symbol(lookahead, true)].add(next_item_set); + } + } + + // Add a Shift action for each non-terminal transition. + for (auto &pair : nonterminal_successors) { + Symbol::Index lookahead = pair.first; + ParseItemSet &next_item_set = pair.second; + ParseStateId next_state = add_parse_state(next_item_set); + parse_table.set_nonterminal_action(state_id, lookahead, next_state); + + if (!allow_any_conflict) + recovery_states[Symbol(lookahead, false)].add(next_item_set); + } + + for (Symbol::Index lookahead : lookaheads_with_conflicts) { + string conflict = handle_conflict(item_set, state_id, lookahead); + if (!conflict.empty()) return conflict; + } + + ParseAction shift_extra = ParseAction::ShiftExtra(); ParseState &state = parse_table.states[state_id]; - for (const Symbol &extra_symbol : grammar.extra_tokens) + for (const Symbol &extra_symbol : grammar.extra_tokens) { if (!state.terminal_entries.count(extra_symbol.index) || - state.has_shift_action() || allow_any_conflict) - parse_table.add_terminal_action(state_id, extra_symbol.index, action); + state.has_shift_action() || allow_any_conflict) { + parse_table.add_terminal_action(state_id, extra_symbol.index, shift_extra); + } + } + + return ""; } void mark_fragile_actions() { for (ParseState &state : parse_table.states) { - set symbols_with_multiple_actions; - for (auto &entry : state.terminal_entries) { const Symbol symbol(entry.first, true); auto &actions = entry.second.actions; - if (actions.size() > 1) { - symbols_with_multiple_actions.insert(symbol); - } - for (ParseAction &action : actions) { if (action.type == ParseActionTypeReduce) { if (has_fragile_production(action.production)) action.fragile = true; - action.production = NULL; - action.precedence_range = PrecedenceRange(); - action.associativity = rules::AssociativityNone; } } @@ -323,154 +355,178 @@ class ParseTableBuilder { } } - ParseAction *add_terminal_action(ParseStateId state_id, Symbol::Index lookahead, - const ParseAction &new_action, - const ParseItemSet &item_set) { - const ParseState &state = parse_table.states[state_id]; - const auto ¤t_entry = state.terminal_entries.find(lookahead); - if (current_entry == state.terminal_entries.end()) - return &parse_table.set_terminal_action(state_id, lookahead, new_action); - if (allow_any_conflict) - return &parse_table.add_terminal_action(state_id, lookahead, new_action); + string handle_conflict(const ParseItemSet &item_set, ParseStateId state_id, + Symbol::Index lookahead) { + ParseTableEntry &entry = parse_table.states[state_id].terminal_entries[lookahead]; + int reduction_precedence = entry.actions.front().precedence(); + set shift_items; - const ParseAction old_action = current_entry->second.actions[0]; - auto resolution = conflict_manager.resolve(new_action, old_action); + for (const ParseAction &action : entry.actions) + if (action.type == ParseActionTypeReduce) + fragile_productions.insert(action.production); - switch (resolution.second) { - case ConflictTypeNone: - if (resolution.first) { - return &parse_table.set_terminal_action(state_id, lookahead, new_action); - } - break; - - case ConflictTypeResolved: { - if (resolution.first) { - if (old_action.type == ParseActionTypeReduce) - fragile_productions.insert(old_action.production); - return &parse_table.set_terminal_action(state_id, lookahead, new_action); - } else { - if (new_action.type == ParseActionTypeReduce) - fragile_productions.insert(new_action.production); - break; + if (entry.actions.back().type == ParseActionTypeShift) { + PrecedenceRange shift_precedence; + for (const auto &item_set_entry : item_set.entries) { + const ParseItem &item = item_set_entry.first; + if (item.step_index > 0 && !item.is_done()) { + LookaheadSet first_set = item_set_builder.get_first_set(item.next_symbol()); + if (first_set.contains(lookahead)) { + shift_items.insert(item); + shift_precedence.add(item.precedence()); + } } } - case ConflictTypeUnresolved: { - if (handle_unresolved_conflict(item_set, lookahead)) { - if (old_action.type == ParseActionTypeReduce) - fragile_productions.insert(old_action.production); - if (new_action.type == ParseActionTypeReduce) - fragile_productions.insert(new_action.production); - return &parse_table.add_terminal_action(state_id, lookahead, new_action); + // If the shift action has higher precedence, prefer it over any of the + // reduce actions. + if (shift_precedence.min > reduction_precedence || + (shift_precedence.min == reduction_precedence && + shift_precedence.max > reduction_precedence)) { + for (const ParseAction &action : entry.actions) { + if (action.type == ParseActionTypeShift) break; + fragile_productions.insert(action.production); } - break; + entry.actions.assign({ entry.actions.back() }); } - } - return nullptr; - } + // If the shift action has lower precedence, prefer the reduce actions. + else if (shift_precedence.max < reduction_precedence || + (shift_precedence.max == reduction_precedence && + shift_precedence.min < reduction_precedence)) { + entry.actions.pop_back(); + } - bool handle_unresolved_conflict(const ParseItemSet &item_set, - const Symbol::Index lookahead) { - set involved_symbols; - set reduce_items; - set core_shift_items; - set other_shift_items; + // If the shift action has the same precedence as the reduce actions, + // consider the reduce actions' associativity. If they are all left + // associative, prefer the reduce actions. If they are all right + // associative, prefer the shift. + else if (shift_precedence.min == reduction_precedence && + shift_precedence.max == reduction_precedence) { + bool has_non_associative_reductions = false; + bool has_left_associative_reductions = false; + bool has_right_associative_reductions = false; + for (const ParseAction &action : entry.actions) { + if (action.type != ParseActionTypeReduce) break; + switch (action.associativity()) { + case rules::AssociativityLeft: + has_left_associative_reductions = true; + break; + case rules::AssociativityRight: + has_right_associative_reductions = true; + break; + default: + has_non_associative_reductions = true; + break; + } + } - for (const auto &pair : item_set.entries) { - const ParseItem &item = pair.first; - const LookaheadSet &lookahead_set = pair.second; - - Symbol next_symbol = item.next_symbol(); - if (next_symbol == rules::NONE()) { - if (lookahead_set.contains(lookahead)) { - involved_symbols.insert(item.lhs()); - reduce_items.insert(item); + if (!has_non_associative_reductions) { + if (has_right_associative_reductions && !has_left_associative_reductions) { + for (const ParseAction &action : entry.actions) { + if (action.type == ParseActionTypeShift) break; + fragile_productions.insert(action.production); + } + entry.actions.assign({ entry.actions.back() }); + } else if (has_left_associative_reductions && !has_right_associative_reductions) { + entry.actions.pop_back(); + } } } else { - if (item.step_index > 0) { - LookaheadSet first_set = item_set_builder.get_first_set(next_symbol); - if (first_set.contains(lookahead)) { - involved_symbols.insert(item.lhs()); - core_shift_items.insert(item); - } - } else if (next_symbol.is_token && next_symbol.index == lookahead) { - other_shift_items.insert(item); - } + return "Mismatched precedence"; } } - for (const auto &conflict_set : grammar.expected_conflicts) - if (involved_symbols == conflict_set) - return true; + if (entry.actions.size() == 1) return ""; - string description = "Lookahead symbol: " + symbol_name(Symbol(lookahead, true)) + "\n"; + set actual_conflict; + for (const ParseItem &item : shift_items) + actual_conflict.insert(item.lhs()); + for (const ParseAction &action : entry.actions) + if (action.type == ParseActionTypeReduce) + actual_conflict.insert(action.symbol); - if (!reduce_items.empty()) { - description += "Reduce items:\n"; - for (const ParseItem &item : reduce_items) - description += " " + item_string(item) + "\n"; + for (const auto &expected_conflict : grammar.expected_conflicts) + if (expected_conflict == actual_conflict) + return ""; + + ParseItem earliest_starting_item; + for (const ParseAction &action : entry.actions) + if (action.type == ParseActionTypeReduce) + if (action.consumed_symbol_count > earliest_starting_item.step_index) + earliest_starting_item = ParseItem(action.symbol, *action.production, action.consumed_symbol_count); + + for (const ParseItem &shift_item : shift_items) + if (shift_item.step_index > earliest_starting_item.step_index) + earliest_starting_item = shift_item; + + string description = "Unresolved conflict for symbol sequence:\n\n"; + for (size_t i = 0; i < earliest_starting_item.step_index; i++) { + description += " " + symbol_name(earliest_starting_item.production->at(i).symbol); } - if (!core_shift_items.empty()) { - description += "Core shift items:\n"; - for (const ParseItem &item : core_shift_items) - description += " " + item_string(item) + "\n"; + description += " \u2022 " + symbol_name(Symbol(lookahead, true)) + " \u2026"; + description += "\n\n"; + + description += "Possible interpretations:\n\n"; + + for (const ParseAction &action : entry.actions) { + if (action.type == ParseActionTypeReduce) { + for (size_t i = 0; i < earliest_starting_item.step_index - action.consumed_symbol_count; i++) { + description += " " + symbol_name(earliest_starting_item.production->at(i).symbol); + } + + description += " (" + symbol_name(action.symbol); + for (const ProductionStep &step : *action.production) { + description += " " + symbol_name(step.symbol); + } + description += ")"; + description += " \u2022 " + symbol_name(Symbol(lookahead, true)) + " \u2026"; + description += "\n\n"; + } } - if (!other_shift_items.empty()) { - description += "Other shift items:\n"; - for (const ParseItem &item : other_shift_items) - description += " " + item_string(item) + "\n"; + for (const ParseItem &shift_item : shift_items) { + for (size_t i = 0; i < earliest_starting_item.step_index - shift_item.step_index; i++) { + description += " " + symbol_name(earliest_starting_item.production->at(i).symbol); + } + + description += " (" + symbol_name(shift_item.lhs()); + for (size_t i = 0; i < shift_item.production->size(); i++) { + if (i == shift_item.step_index) + description += " \u2022"; + description += " " + symbol_name(shift_item.production->at(i).symbol); + } + description += ")"; + description += "\n\n"; } - conflicts.insert(description); - return false; - } + description += "Possible resolutions:\n\n"; - string item_string(const ParseItem &item) const { - string result = symbol_name(item.lhs()) + " ->"; - size_t i = 0; - for (const ProductionStep &step : *item.production) { - if (i == item.step_index) - result += " \u2022"; - result += " " + symbol_name(step.symbol); - i++; - } - if (i == item.step_index) - result += " \u2022"; - - result += " (prec " + to_string(item.precedence()); - - switch (item.associativity()) { - case rules::AssociativityNone: - result += ")"; - break; - case rules::AssociativityLeft: - result += ", assoc left)"; - break; - case rules::AssociativityRight: - result += ", assoc right)"; - break; + if (actual_conflict.size() > 1) { + description += " Use different precedences in the rules:"; + for (const Symbol &conflict_symbol : actual_conflict) { + description += " " + symbol_name(conflict_symbol); + } + description += "\n\n"; } - return result; - } - - set get_first_set(const Symbol &start_symbol) { - set result; - vector symbols_to_process({ start_symbol }); - - while (!symbols_to_process.empty()) { - Symbol symbol = symbols_to_process.back(); - symbols_to_process.pop_back(); - if (result.insert(symbol).second) - for (const Production &production : grammar.productions(symbol)) - if (!production.empty()) - symbols_to_process.push_back(production[0].symbol); + if (shift_items.size() > 0) { + description += " Specify left or right associativity in the rules:"; + for (const ParseAction &action : entry.actions) { + if (action.type == ParseActionTypeReduce) { + description += " " + symbol_name(action.symbol); + } + } + description += "\n\n"; } - return result; + description += " Add a conflict for the rules:"; + for (const Symbol &conflict_symbol : actual_conflict) { + description += " " + symbol_name(conflict_symbol); + } + description += "\n"; + return description; } string symbol_name(const rules::Symbol &symbol) const { @@ -491,8 +547,7 @@ class ParseTableBuilder { } bool has_fragile_production(const Production *production) { - auto end = fragile_productions.end(); - return std::find(fragile_productions.begin(), end, production) != end; + return fragile_productions.find(production) != fragile_productions.end(); } }; diff --git a/src/compiler/build_tables/parse_conflict_manager.cc b/src/compiler/build_tables/parse_conflict_manager.cc deleted file mode 100644 index ca612926..00000000 --- a/src/compiler/build_tables/parse_conflict_manager.cc +++ /dev/null @@ -1,80 +0,0 @@ -#include "compiler/build_tables/parse_conflict_manager.h" -#include -#include "compiler/parse_table.h" -#include "compiler/rules/built_in_symbols.h" - -namespace tree_sitter { -namespace build_tables { - -using std::pair; -using std::vector; - -pair ParseConflictManager::resolve( - const ParseAction &new_action, const ParseAction &old_action) const { - if (new_action.type < old_action.type) { - auto opposite = resolve(old_action, new_action); - return { !opposite.first, opposite.second }; - } - - switch (old_action.type) { - case ParseActionTypeError: - return { true, ConflictTypeNone }; - - case ParseActionTypeShift: - if (new_action.extra) { - return { false, ConflictTypeNone }; - } else if (old_action.extra) { - return { true, ConflictTypeNone }; - } else if (new_action.type == ParseActionTypeReduce) { - int min_precedence = old_action.precedence_range.min; - int max_precedence = old_action.precedence_range.max; - int new_precedence = new_action.precedence_range.max; - if (new_precedence < min_precedence || - (new_precedence == min_precedence && - min_precedence < max_precedence)) { - return { false, ConflictTypeResolved }; - } else if (new_precedence > max_precedence || - (new_precedence == max_precedence && - min_precedence < max_precedence)) { - return { true, ConflictTypeResolved }; - } else if (min_precedence == max_precedence) { - switch (new_action.associativity) { - case rules::AssociativityLeft: - return { true, ConflictTypeResolved }; - case rules::AssociativityRight: - return { false, ConflictTypeResolved }; - default: - return { false, ConflictTypeUnresolved }; - } - } else { - return { false, ConflictTypeUnresolved }; - } - } - break; - - case ParseActionTypeReduce: - if (new_action.type == ParseActionTypeReduce) { - if (new_action.extra) - return { false, ConflictTypeNone }; - if (old_action.extra) - return { true, ConflictTypeNone }; - int old_precedence = old_action.precedence_range.min; - int new_precedence = new_action.precedence_range.min; - if (new_precedence > old_precedence) { - return { true, ConflictTypeResolved }; - } else if (new_precedence < old_precedence) { - return { false, ConflictTypeResolved }; - } else { - return { false, ConflictTypeUnresolved }; - } - } - - default: - break; - } - - return { false, ConflictTypeNone }; -} - -} // namespace build_tables -} // namespace tree_sitter diff --git a/src/compiler/build_tables/parse_conflict_manager.h b/src/compiler/build_tables/parse_conflict_manager.h deleted file mode 100644 index 567ecaa0..00000000 --- a/src/compiler/build_tables/parse_conflict_manager.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef COMPILER_BUILD_TABLES_PARSE_CONFLICT_MANAGER_H_ -#define COMPILER_BUILD_TABLES_PARSE_CONFLICT_MANAGER_H_ - -#include -#include "compiler/syntax_grammar.h" -#include "compiler/build_tables/parse_item.h" - -namespace tree_sitter { - -class ParseAction; -namespace rules { -class Symbol; -} - -namespace build_tables { - -enum ConflictType { - ConflictTypeNone, - ConflictTypeResolved, - ConflictTypeUnresolved -}; - -class ParseConflictManager { - public: - std::pair resolve(const ParseAction &, - const ParseAction &) const; -}; - -} // namespace build_tables -} // namespace tree_sitter - -#endif // COMPILER_BUILD_TABLES_PARSE_CONFLICT_MANAGER_H_ diff --git a/src/compiler/build_tables/parse_item.cc b/src/compiler/build_tables/parse_item.cc index d691998f..39b131cb 100644 --- a/src/compiler/build_tables/parse_item.cc +++ b/src/compiler/build_tables/parse_item.cc @@ -12,6 +12,7 @@ using std::pair; using std::string; using std::to_string; using rules::Symbol; +using rules::Associativity; using util::hash_combine; ParseItem::ParseItem() : variable_index(-1), production(nullptr), step_index(0) {} @@ -43,26 +44,32 @@ Symbol ParseItem::lhs() const { return Symbol(variable_index); } -ParseItem::CompletionStatus ParseItem::completion_status() const { - CompletionStatus result = { false, 0, rules::AssociativityNone }; - if (step_index == production->size()) { - result.is_done = true; - if (step_index > 0) { - const ProductionStep &last_step = production->at(step_index - 1); - result.precedence = last_step.precedence; - result.associativity = last_step.associativity; - } - } - return result; +bool ParseItem::is_done() const { + return step_index >= production->size(); } int ParseItem::precedence() const { - if (production->empty()) - return 0; - else if (completion_status().is_done) - return production->back().precedence; - else + if (is_done()) { + if (production->empty()) { + return 0; + } else { + return production->back().precedence; + } + } else { return production->at(step_index).precedence; + } +} + +rules::Associativity ParseItem::associativity() const { + if (is_done()) { + if (production->empty()) { + return rules::AssociativityNone; + } else { + return production->back().associativity; + } + } else { + return production->at(step_index).associativity; + } } Symbol ParseItem::next_symbol() const { @@ -72,15 +79,6 @@ Symbol ParseItem::next_symbol() const { return production->at(step_index).symbol; } -rules::Associativity ParseItem::associativity() const { - if (production->empty()) - return rules::AssociativityNone; - else if (completion_status().is_done) - return production->back().associativity; - else - return production->at(step_index).associativity; -} - ParseItemSet::ParseItemSet() {} ParseItemSet::ParseItemSet(const map &entries) @@ -107,21 +105,33 @@ size_t ParseItemSet::unfinished_item_signature() const { return result; } -ParseItemSet::TransitionMap ParseItemSet::transitions() const { - ParseItemSet::TransitionMap result; +ParseItemSet::ActionMap ParseItemSet::actions() const { + ParseItemSet::ActionMap result; + for (const auto &pair : entries) { const ParseItem &item = pair.first; const LookaheadSet &lookahead_symbols = pair.second; - if (item.step_index == item.production->size()) - continue; - size_t step = item.step_index + 1; - Symbol symbol = item.production->at(item.step_index).symbol; - int precedence = item.production->at(item.step_index).precedence; - ParseItem new_item(item.lhs(), *item.production, step); + if (item.step_index == item.production->size()) { + int precedence = item.precedence(); + for (const Symbol::Index lookahead : *lookahead_symbols.entries) { + Action &action = result.terminal_actions[lookahead]; + if (precedence > action.completion_precedence) { + action.completions.assign({ &item }); + } else if (precedence == action.completion_precedence) { + action.completions.push_back({ &item }); + } + } + } else { + Symbol symbol = item.production->at(item.step_index).symbol; + ParseItem new_item(item.lhs(), *item.production, item.step_index + 1); - result[symbol].first.entries[new_item] = lookahead_symbols; - result[symbol].second.add(precedence); + if (symbol.is_token) { + result.terminal_actions[symbol.index].continuation.entries[new_item] = lookahead_symbols; + } else { + result.nonterminal_continuations[symbol.index].entries[new_item] = lookahead_symbols; + } + } } return result; diff --git a/src/compiler/build_tables/parse_item.h b/src/compiler/build_tables/parse_item.h index b8a78480..a091ac9d 100644 --- a/src/compiler/build_tables/parse_item.h +++ b/src/compiler/build_tables/parse_item.h @@ -29,7 +29,7 @@ class ParseItem { rules::Symbol next_symbol() const; int precedence() const; rules::Associativity associativity() const; - CompletionStatus completion_status() const; + bool is_done() const; int variable_index; const Production *production; @@ -41,10 +41,16 @@ class ParseItemSet { ParseItemSet(); explicit ParseItemSet(const std::map &); - typedef std::map> - TransitionMap; + struct Completion; + struct Action; + + struct ActionMap { + std::map terminal_actions; + std::map nonterminal_continuations; + }; + + ActionMap actions() const; - TransitionMap transitions() const; bool operator==(const ParseItemSet &) const; void add(const ParseItemSet &); size_t unfinished_item_signature() const; @@ -52,6 +58,22 @@ class ParseItemSet { std::map entries; }; +struct ParseItemSet::Completion { + const ParseItem *item; + int precedence; + rules::Associativity associativity; + + bool operator<(const ParseItemSet::Completion &other) { + return precedence < other.precedence; + } +}; + +struct ParseItemSet::Action { + ParseItemSet continuation; + std::vector completions; + int completion_precedence; +}; + } // namespace build_tables } // namespace tree_sitter diff --git a/src/compiler/build_tables/parse_item_set_builder.cc b/src/compiler/build_tables/parse_item_set_builder.cc index 8259662f..9f41fc9c 100644 --- a/src/compiler/build_tables/parse_item_set_builder.cc +++ b/src/compiler/build_tables/parse_item_set_builder.cc @@ -20,9 +20,9 @@ using std::make_shared; using rules::Symbol; using rules::NONE; -static map build_first_sets(const SyntaxGrammar &grammar, - const LexicalGrammar &lexical_grammar) { - map result; +ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar, + const LexicalGrammar &lexical_grammar) : + grammar{&grammar} { vector symbol_stack; set processed_symbols; @@ -47,20 +47,13 @@ static map build_first_sets(const SyntaxGrammar &grammar, } } - result.insert({symbol, first_set}); + first_sets.insert({symbol, first_set}); } for (int i = 0; i < lexical_grammar.variables.size(); i++) { Symbol symbol(i, true); - result.insert({symbol, LookaheadSet({ i })}); + first_sets.insert({symbol, LookaheadSet({ i })}); } - - return result; -} - -ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar, - const LexicalGrammar &lexical_grammar) : - grammar{&grammar}, first_sets{build_first_sets(grammar, lexical_grammar)} { } void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) { @@ -109,7 +102,7 @@ void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) { } } -LookaheadSet ParseItemSetBuilder::get_first_set(rules::Symbol &symbol) const { +LookaheadSet ParseItemSetBuilder::get_first_set(const rules::Symbol &symbol) const { return first_sets.find(symbol)->second; } diff --git a/src/compiler/build_tables/parse_item_set_builder.h b/src/compiler/build_tables/parse_item_set_builder.h index 8043437e..db3ca930 100644 --- a/src/compiler/build_tables/parse_item_set_builder.h +++ b/src/compiler/build_tables/parse_item_set_builder.h @@ -20,7 +20,7 @@ class ParseItemSetBuilder { public: ParseItemSetBuilder(const SyntaxGrammar &, const LexicalGrammar &); void apply_transitive_closure(ParseItemSet *); - LookaheadSet get_first_set(rules::Symbol &) const; + LookaheadSet get_first_set(const rules::Symbol &) const; }; } // namespace build_tables diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index 78a8c707..b7058603 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -108,7 +108,7 @@ class CCodeGenerator { private: void add_includes() { - add("#include \"tree_sitter/parser.h\""); + add("#include "); line(); } diff --git a/src/compiler/parse_table.cc b/src/compiler/parse_table.cc index 944036a6..108a3db3 100644 --- a/src/compiler/parse_table.cc +++ b/src/compiler/parse_table.cc @@ -14,8 +14,6 @@ using rules::Symbol; ParseAction::ParseAction(ParseActionType type, ParseStateId state_index, Symbol symbol, size_t consumed_symbol_count, - PrecedenceRange precedence_range, - rules::Associativity associativity, const Production *production) : type(type), extra(false), @@ -23,8 +21,6 @@ ParseAction::ParseAction(ParseActionType type, ParseStateId state_index, symbol(symbol), state_index(state_index), consumed_symbol_count(consumed_symbol_count), - precedence_range(precedence_range), - associativity(associativity), production(production) {} ParseAction::ParseAction() @@ -34,7 +30,6 @@ ParseAction::ParseAction() symbol(Symbol(-1)), state_index(-1), consumed_symbol_count(0), - associativity(rules::AssociativityNone), production(nullptr) {} ParseAction ParseAction::Error() { @@ -47,15 +42,13 @@ ParseAction ParseAction::Accept() { return action; } -ParseAction ParseAction::Shift(ParseStateId state_index, - PrecedenceRange precedence_range) { - return ParseAction(ParseActionTypeShift, state_index, Symbol(-1), 0, - precedence_range, rules::AssociativityNone, nullptr); +ParseAction ParseAction::Shift(ParseStateId state_index) { + return ParseAction(ParseActionTypeShift, state_index, Symbol(-1), 0, nullptr); } ParseAction ParseAction::Recover(ParseStateId state_index) { return ParseAction(ParseActionTypeRecover, state_index, Symbol(-1), 0, - PrecedenceRange(), rules::AssociativityNone, nullptr); + nullptr); } ParseAction ParseAction::ShiftExtra() { @@ -66,11 +59,33 @@ ParseAction ParseAction::ShiftExtra() { } ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count, - int precedence, - rules::Associativity associativity, const Production &production) { return ParseAction(ParseActionTypeReduce, 0, symbol, consumed_symbol_count, - { precedence, precedence }, associativity, &production); + &production); +} + +int ParseAction::precedence() const { + if (consumed_symbol_count >= production->size()) { + if (production->empty()) { + return 0; + } else { + return production->back().precedence; + } + } else { + return production->at(consumed_symbol_count).precedence; + } +} + +rules::Associativity ParseAction::associativity() const { + if (consumed_symbol_count >= production->size()) { + if (production->empty()) { + return rules::AssociativityNone; + } else { + return production->back().associativity; + } + } else { + return production->at(consumed_symbol_count).associativity; + } } bool ParseAction::operator==(const ParseAction &other) const { diff --git a/src/compiler/parse_table.h b/src/compiler/parse_table.h index 81142f75..ea4e5cad 100644 --- a/src/compiler/parse_table.h +++ b/src/compiler/parse_table.h @@ -26,30 +26,30 @@ enum ParseActionType { class ParseAction { ParseAction(ParseActionType type, ParseStateId state_index, rules::Symbol symbol, size_t consumed_symbol_count, - PrecedenceRange range, rules::Associativity, const Production *); + const Production *); public: ParseAction(); static ParseAction Accept(); static ParseAction Error(); - static ParseAction Shift(ParseStateId state_index, PrecedenceRange precedence); + static ParseAction Shift(ParseStateId state_index); static ParseAction Recover(ParseStateId state_index); static ParseAction Reduce(rules::Symbol symbol, size_t consumed_symbol_count, - int precedence, rules::Associativity, const Production &); static ParseAction ShiftExtra(); bool operator==(const ParseAction &) const; bool operator<(const ParseAction &) const; + rules::Associativity associativity() const; + int precedence() const; + ParseActionType type; bool extra; bool fragile; - rules::Symbol symbol; ParseStateId state_index; - size_t consumed_symbol_count; - PrecedenceRange precedence_range; - rules::Associativity associativity; + rules::Symbol symbol; + size_t consumed_symbol_count; const Production *production; }; diff --git a/src/compiler/prepare_grammar/flatten_grammar.cc b/src/compiler/prepare_grammar/flatten_grammar.cc index 76b6e769..85093298 100644 --- a/src/compiler/prepare_grammar/flatten_grammar.cc +++ b/src/compiler/prepare_grammar/flatten_grammar.cc @@ -19,6 +19,8 @@ class FlattenRule : public rules::RuleFn { private: vector precedence_stack; vector associativity_stack; + int last_precedence; + rules::Associativity last_associativity; Production production; void apply_to(const rules::Symbol *sym) { @@ -39,11 +41,13 @@ class FlattenRule : public rules::RuleFn { apply(metadata->rule); if (precedence.second) { + last_precedence = precedence_stack.back(); precedence_stack.pop_back(); production.back().precedence = precedence_stack.back(); } if (associativity.second) { + last_associativity = associativity_stack.back(); associativity_stack.pop_back(); production.back().associativity = associativity_stack.back(); } @@ -51,40 +55,49 @@ class FlattenRule : public rules::RuleFn { void apply_to(const rules::Seq *seq) { apply(seq->left); + last_precedence = 0; + last_associativity = rules::AssociativityNone; apply(seq->right); } public: FlattenRule() : precedence_stack({ 0 }), - associativity_stack({ rules::AssociativityNone }) {} + associativity_stack({ rules::AssociativityNone }), + last_precedence(0), + last_associativity(rules::AssociativityNone) {} Production flatten(const rule_ptr &rule) { apply(rule); - size_t size = production.size(); - if (size > 1) { - production[size - 1].precedence = production[size - 2].precedence; - production[size - 1].associativity = production[size - 2].associativity; + if (!production.empty()) { + production.back().precedence = last_precedence; + production.back().associativity = last_associativity; } return production; } }; +SyntaxVariable flatten_rule(const Variable &variable) { + vector productions; + + for (const rule_ptr &rule_component : extract_choices(variable.rule)) { + Production production = FlattenRule().flatten(rule_component); + auto end = productions.end(); + if (find(productions.begin(), end, production) == end) { + productions.push_back(production); + } + } + + return SyntaxVariable(variable.name, variable.type, productions); +} + SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &grammar) { SyntaxGrammar result; result.expected_conflicts = grammar.expected_conflicts; result.extra_tokens = grammar.extra_tokens; for (const Variable &variable : grammar.variables) { - vector productions; - for (const rule_ptr &rule_component : extract_choices(variable.rule)) { - Production production = FlattenRule().flatten(rule_component); - auto end = productions.end(); - if (find(productions.begin(), end, production) == end) - productions.push_back(production); - } - result.variables.push_back( - SyntaxVariable(variable.name, variable.type, productions)); + result.variables.push_back(flatten_rule(variable)); } return result; diff --git a/src/compiler/prepare_grammar/flatten_grammar.h b/src/compiler/prepare_grammar/flatten_grammar.h index e728af17..1fb6eb9d 100644 --- a/src/compiler/prepare_grammar/flatten_grammar.h +++ b/src/compiler/prepare_grammar/flatten_grammar.h @@ -10,6 +10,7 @@ namespace prepare_grammar { struct InitialSyntaxGrammar; +SyntaxVariable flatten_rule(const Variable &variable); SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &); } // namespace prepare_grammar