From 5dc08ccce9f914638dec0ef94323809d54310d13 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 5 Nov 2014 18:39:50 -0800 Subject: [PATCH] Include names of in-progress rules in shift/reduce conflicts --- project.gyp | 3 +- script/trim.sh | 2 +- .../build_tables/build_conflict_spec.cc | 80 +++++++++ .../build_tables/conflict_manager_spec.cc | 153 ++++++++++-------- spec/compiler/compile_examples.cc | 4 + .../build_tables/action_takes_precedence.cc | 72 +++++++++ .../build_tables/action_takes_precedence.h | 21 +++ src/compiler/build_tables/build_conflict.cc | 65 ++++++++ src/compiler/build_tables/build_conflict.h | 23 +++ .../build_tables/build_parse_table.cc | 53 ++++-- .../build_tables/parse_conflict_manager.cc | 136 ---------------- .../build_tables/parse_conflict_manager.h | 36 ----- src/compiler/generate_code/c_code.cc | 70 ++++---- src/runtime/parser.c | 3 +- 14 files changed, 426 insertions(+), 295 deletions(-) create mode 100644 spec/compiler/build_tables/build_conflict_spec.cc create mode 100644 src/compiler/build_tables/action_takes_precedence.cc create mode 100644 src/compiler/build_tables/action_takes_precedence.h create mode 100644 src/compiler/build_tables/build_conflict.cc create mode 100644 src/compiler/build_tables/build_conflict.h delete mode 100644 src/compiler/build_tables/parse_conflict_manager.cc delete mode 100644 src/compiler/build_tables/parse_conflict_manager.h diff --git a/project.gyp b/project.gyp index f5c60007..294cb32e 100644 --- a/project.gyp +++ b/project.gyp @@ -10,6 +10,8 @@ 'externals/utf8proc', ], 'sources': [ + 'src/compiler/build_tables/action_takes_precedence.cc', + 'src/compiler/build_tables/build_conflict.cc', 'src/compiler/build_tables/build_lex_table.cc', 'src/compiler/build_tables/build_parse_table.cc', 'src/compiler/build_tables/build_tables.cc', @@ -20,7 +22,6 @@ 'src/compiler/build_tables/item_set_transitions.cc', 'src/compiler/build_tables/lex_conflict_manager.cc', 'src/compiler/build_tables/lex_item.cc', - 'src/compiler/build_tables/parse_conflict_manager.cc', 'src/compiler/build_tables/parse_item.cc', 'src/compiler/build_tables/rule_can_be_blank.cc', 'src/compiler/build_tables/rule_transitions.cc', diff --git a/script/trim.sh b/script/trim.sh index eb5abc6b..19df440c 100755 --- a/script/trim.sh +++ b/script/trim.sh @@ -1,3 +1,3 @@ #!/usr/bin/env bash -find src spec include examples -type f | xargs perl -pi -e 's/ +$//' +find src spec include -type f | xargs perl -pi -e 's/ +$//' diff --git a/spec/compiler/build_tables/build_conflict_spec.cc b/spec/compiler/build_tables/build_conflict_spec.cc new file mode 100644 index 00000000..50572180 --- /dev/null +++ b/spec/compiler/build_tables/build_conflict_spec.cc @@ -0,0 +1,80 @@ +#include "compiler/compiler_spec_helper.h" +#include "compiler/build_tables/build_conflict.h" +#include "compiler/prepared_grammar.h" + +using namespace rules; +using namespace build_tables; + +START_TEST + +describe("build_conflict", []() { + Conflict conflict(""); + + SyntaxGrammar parse_grammar({ + { "in_progress_rule1", i_token(0) }, + { "in_progress_rule2", i_token(0) }, + { "reduced_rule", i_token(0) }, + { "other_ruel1", i_token(0) }, + { "other_rule2", i_token(0) }, + }, {}, { Symbol(2, SymbolOptionToken) }); + + LexicalGrammar lex_grammar({ + { "other_token", pattern("[a-b]") }, + { "lookahead_token", pattern("[c-d]") }, + }, {}); + + it("uses the given item-set to determine which symbols are involved in the shift", [&]() { + conflict = build_conflict( + ParseAction::Shift(2, set()), + ParseAction::Reduce(Symbol(2), 1, 0), // reduced_rule + ParseItemSet({ + { + ParseItem(Symbol(0), blank(), 2), // in_progress_rule1 + set({ Symbol(2, SymbolOptionToken) }) + }, + { + ParseItem(Symbol(1), blank(), 2), // in_progress_rule2 + set({ Symbol(2, SymbolOptionToken) }) + }, + { + ParseItem(Symbol(3), blank(), 0), // other_rule1 + set({ Symbol(2, SymbolOptionToken) }) + }, + }), + Symbol(1, SymbolOptionToken), // lookahead_token + parse_grammar, lex_grammar + ); + + AssertThat(conflict.description, Equals( + "lookahead_token: " + "shift ( in_progress_rule2 in_progress_rule1 ) / " + "reduce ( reduced_rule )")); + }); + + it("always puts shift actions before reduce actions", [&]() { + conflict = build_conflict( + ParseAction::Reduce(Symbol(2), 1, 0), // reduced_rule + ParseAction::Shift(2, set()), + ParseItemSet({ + { + ParseItem(Symbol(0), blank(), 2), // in_progress_rule1 + set({ Symbol(2, SymbolOptionToken) }) + }, + { + ParseItem(Symbol(1), blank(), 2), // in_progress_rule2 + set({ Symbol(2, SymbolOptionToken) }) + }, + }), + Symbol(1, SymbolOptionToken), // lookahead_token + parse_grammar, lex_grammar + ); + + AssertThat(conflict.description, Equals( + "lookahead_token: " + "shift ( in_progress_rule2 in_progress_rule1 ) / " + "reduce ( reduced_rule )")); + }); +}); + + +END_TEST diff --git a/spec/compiler/build_tables/conflict_manager_spec.cc b/spec/compiler/build_tables/conflict_manager_spec.cc index c40e0b6f..8ae88906 100644 --- a/spec/compiler/build_tables/conflict_manager_spec.cc +++ b/spec/compiler/build_tables/conflict_manager_spec.cc @@ -1,6 +1,7 @@ #include "compiler/compiler_spec_helper.h" #include "compiler/rules/built_in_symbols.h" -#include "compiler/build_tables/parse_conflict_manager.h" +#include "compiler/parse_table.h" +#include "compiler/build_tables/action_takes_precedence.h" #include "compiler/build_tables/lex_conflict_manager.h" #include "compiler/prepared_grammar.h" @@ -79,25 +80,30 @@ describe("resolving parse conflicts", []() { }); }); - describe("ParseConflictManager", [&]() { + describe("action_takes_precedence", [&]() { + pair result; Symbol sym1(0); Symbol sym2(1); - ParseConflictManager *manager; - before_each([&]() { - manager = new ParseConflictManager(parse_grammar, lex_grammar); - }); + describe("errors", [&]() { + ParseAction error = ParseAction::Error(); + ParseAction non_error = ParseAction::Shift(2, { 0 }); - after_each([&]() { - delete manager; - }); + it("favors non-errors", [&]() { + result = action_takes_precedence(non_error, error, sym1, parse_grammar); + AssertThat(result.first, IsTrue()); - it("favors non-errors over parse errors", [&]() { - update = manager->resolve_parse_action(sym1, ParseAction::Error(), ParseAction::Shift(2, { 0 })); - AssertThat(update, IsTrue()); + result = action_takes_precedence(error, non_error, sym1, parse_grammar); + AssertThat(result.first, IsFalse()); + }); - update = manager->resolve_parse_action(sym1, ParseAction::Shift(2, { 0 }), ParseAction::Error()); - AssertThat(update, IsFalse()); + it("is not a conflict", [&]() { + result = action_takes_precedence(non_error, error, sym1, parse_grammar); + AssertThat(result.second, IsFalse()); + + result = action_takes_precedence(error, non_error, sym1, parse_grammar); + AssertThat(result.second, IsFalse()); + }); }); describe("shift/reduce conflicts", [&]() { @@ -105,15 +111,20 @@ describe("resolving parse conflicts", []() { ParseAction shift = ParseAction::Shift(2, { 3 }); ParseAction reduce = ParseAction::Reduce(sym2, 1, 1); - it("does not record a conflict", [&]() { - manager->resolve_parse_action(sym1, shift, reduce); - manager->resolve_parse_action(sym1, reduce, shift); - AssertThat(manager->conflicts(), IsEmpty()); + it("is not a conflict", [&]() { + result = action_takes_precedence(shift, reduce, sym1, parse_grammar); + AssertThat(result.second, IsFalse()); + + result = action_takes_precedence(reduce, shift, sym1, parse_grammar); + AssertThat(result.second, IsFalse()); }); it("favors the shift", [&]() { - AssertThat(manager->resolve_parse_action(sym1, shift, reduce), IsFalse()); - AssertThat(manager->resolve_parse_action(sym1, reduce, shift), IsTrue()); + result = action_takes_precedence(shift, reduce, sym1, parse_grammar); + AssertThat(result.first, IsTrue()); + + result = action_takes_precedence(reduce, shift, sym1, parse_grammar); + AssertThat(result.first, IsFalse()); }); }); @@ -121,15 +132,20 @@ describe("resolving parse conflicts", []() { ParseAction shift = ParseAction::Shift(2, { 1 }); ParseAction reduce = ParseAction::Reduce(sym2, 1, 3); - it("does not record a conflict", [&]() { - manager->resolve_parse_action(sym1, reduce, shift); - manager->resolve_parse_action(sym1, shift, reduce); - AssertThat(manager->conflicts(), IsEmpty()); + it("is not a conflict", [&]() { + result = action_takes_precedence(shift, reduce, sym1, parse_grammar); + AssertThat(result.second, IsFalse()); + + result = action_takes_precedence(reduce, shift, sym1, parse_grammar); + AssertThat(result.second, IsFalse()); }); it("favors the reduce", [&]() { - AssertThat(manager->resolve_parse_action(sym1, reduce, shift), IsFalse()); - AssertThat(manager->resolve_parse_action(sym1, shift, reduce), IsTrue()); + result = action_takes_precedence(shift, reduce, sym1, parse_grammar); + AssertThat(result.first, IsFalse()); + + result = action_takes_precedence(reduce, shift, sym1, parse_grammar); + AssertThat(result.first, IsTrue()); }); }); @@ -137,31 +153,20 @@ describe("resolving parse conflicts", []() { ParseAction shift = ParseAction::Shift(2, { 0 }); ParseAction reduce = ParseAction::Reduce(sym2, 1, 0); - it("records a conflict", [&]() { - manager->resolve_parse_action(sym1, reduce, shift); - manager->resolve_parse_action(sym1, shift, reduce); - AssertThat(manager->conflicts(), Equals(vector({ - Conflict("rule1: shift (precedence 0) / reduce rule2 (precedence 0)") - }))); + it("is a conflict", [&]() { + result = action_takes_precedence(reduce, shift, sym1, parse_grammar); + AssertThat(result.second, IsTrue()); + + result = action_takes_precedence(shift, reduce, sym1, parse_grammar); + AssertThat(result.second, IsTrue()); }); it("favors the shift", [&]() { - AssertThat(manager->resolve_parse_action(sym1, shift, reduce), IsFalse()); - AssertThat(manager->resolve_parse_action(sym1, reduce, shift), IsTrue()); - }); + result = action_takes_precedence(reduce, shift, sym1, parse_grammar); + AssertThat(result.first, IsFalse()); - describe("when the symbols is a built-in symbol", [&]() { - it("records a conflict", [&]() { - manager->resolve_parse_action(rules::ERROR(), reduce, shift); - AssertThat(manager->conflicts()[0], Equals( - Conflict("ERROR: shift (precedence 0) / reduce rule2 (precedence 0)") - )); - - manager->resolve_parse_action(rules::END_OF_INPUT(), reduce, shift); - AssertThat(manager->conflicts()[1], Equals( - Conflict("END_OF_INPUT: shift (precedence 0) / reduce rule2 (precedence 0)") - )); - }); + result = action_takes_precedence(shift, reduce, sym1, parse_grammar); + AssertThat(result.first, IsTrue()); }); }); @@ -169,17 +174,20 @@ describe("resolving parse conflicts", []() { ParseAction shift = ParseAction::Shift(2, { 0, 1, 3 }); ParseAction reduce = ParseAction::Reduce(sym2, 1, 2); - it("records a conflict", [&]() { - manager->resolve_parse_action(sym1, reduce, shift); - manager->resolve_parse_action(sym1, shift, reduce); - AssertThat(manager->conflicts(), Equals(vector({ - Conflict("rule1: shift (precedence 0, 1, 3) / reduce rule2 (precedence 2)") - }))); + it("is a conflict", [&]() { + result = action_takes_precedence(reduce, shift, sym1, parse_grammar); + AssertThat(result.second, IsTrue()); + + result = action_takes_precedence(shift, reduce, sym1, parse_grammar); + AssertThat(result.second, IsTrue()); }); it("favors the shift", [&]() { - AssertThat(manager->resolve_parse_action(sym1, shift, reduce), IsFalse()); - AssertThat(manager->resolve_parse_action(sym1, reduce, shift), IsTrue()); + result = action_takes_precedence(reduce, shift, sym1, parse_grammar); + AssertThat(result.first, IsFalse()); + + result = action_takes_precedence(shift, reduce, sym1, parse_grammar); + AssertThat(result.first, IsTrue()); }); }); }); @@ -190,14 +198,19 @@ describe("resolving parse conflicts", []() { ParseAction right = ParseAction::Reduce(sym2, 1, 3); it("favors that action", [&]() { - AssertThat(manager->resolve_parse_action(sym1, left, right), IsTrue()); - AssertThat(manager->resolve_parse_action(sym1, right, left), IsFalse()); + result = action_takes_precedence(left, right, sym1, parse_grammar); + AssertThat(result.first, IsFalse()); + + result = action_takes_precedence(right, left, sym1, parse_grammar); + AssertThat(result.first, IsTrue()); }); - it("does not record a conflict", [&]() { - manager->resolve_parse_action(sym1, left, right); - manager->resolve_parse_action(sym1, right, left); - AssertThat(manager->conflicts(), IsEmpty()); + it("is not a conflict", [&]() { + result = action_takes_precedence(left, right, sym1, parse_grammar); + AssertThat(result.second, IsFalse()); + + result = action_takes_precedence(right, left, sym1, parse_grammar); + AssertThat(result.second, IsFalse()); }); }); @@ -206,17 +219,19 @@ describe("resolving parse conflicts", []() { ParseAction right = ParseAction::Reduce(sym2, 1, 0); it("favors the symbol listed earlier in the grammar", [&]() { - AssertThat(manager->resolve_parse_action(sym1, right, left), IsTrue()); - AssertThat(manager->resolve_parse_action(sym1, left, right), IsFalse()); + result = action_takes_precedence(left, right, sym1, parse_grammar); + AssertThat(result.first, IsTrue()); + + result = action_takes_precedence(right, left, sym1, parse_grammar); + AssertThat(result.first, IsFalse()); }); it("records a conflict", [&]() { - manager->resolve_parse_action(sym1, left, right); - manager->resolve_parse_action(sym1, right, left); - AssertThat(manager->conflicts(), Equals(vector({ - Conflict("rule1: reduce rule2 (precedence 0) / reduce rule1 (precedence 0)"), - Conflict("rule1: reduce rule1 (precedence 0) / reduce rule2 (precedence 0)") - }))); + result = action_takes_precedence(left, right, sym1, parse_grammar); + AssertThat(result.second, IsTrue()); + + result = action_takes_precedence(right, left, sym1, parse_grammar); + AssertThat(result.second, IsTrue()); }); }); }); diff --git a/spec/compiler/compile_examples.cc b/spec/compiler/compile_examples.cc index 87a239d0..a3a493c3 100644 --- a/spec/compiler/compile_examples.cc +++ b/spec/compiler/compile_examples.cc @@ -1,5 +1,6 @@ #include "compiler/compiler_spec_helper.h" #include +#include static string src_dir() { const char * dir = getenv("TREESITTER_DIR"); @@ -30,6 +31,9 @@ describe("compiling the example grammars", []() { AssertThat(error, Equals((GrammarError *)nullptr)); + // for (const auto &conflict : conflicts) + // std::cout << conflict << std::endl; + ofstream file(example_parser_dir + language + ".c"); file << get<0>(result); file.close(); diff --git a/src/compiler/build_tables/action_takes_precedence.cc b/src/compiler/build_tables/action_takes_precedence.cc new file mode 100644 index 00000000..c22e2748 --- /dev/null +++ b/src/compiler/build_tables/action_takes_precedence.cc @@ -0,0 +1,72 @@ +#include "compiler/build_tables/action_takes_precedence.h" + +namespace tree_sitter { +namespace build_tables { + +using std::pair; + +pair action_takes_precedence(const ParseAction &new_action, + const ParseAction &old_action, + const rules::Symbol &symbol, + const SyntaxGrammar &grammar) { + if (new_action.type < old_action.type) { + auto opposite = + action_takes_precedence(old_action, new_action, symbol, grammar); + return { !opposite.first, opposite.second }; + } + + bool has_precedence = false, has_conflict = false; + + switch (old_action.type) { + case ParseActionTypeError: + has_precedence = true; + break; + + case ParseActionTypeShift: { + int min_precedence = *old_action.precedence_values.begin(); + int max_precedence = *old_action.precedence_values.rbegin(); + switch (new_action.type) { + case ParseActionTypeReduce: { + int new_precedence = *new_action.precedence_values.rbegin(); + if (new_precedence < max_precedence) { + if (new_precedence > min_precedence) + has_conflict = true; + } else if (new_precedence > max_precedence) { + has_precedence = true; + } else { + has_conflict = true; + } + break; + } + default: + break; + } + break; + } + + case ParseActionTypeReduce: + switch (new_action.type) { + case ParseActionTypeReduce: { + int old_precedence = *old_action.precedence_values.begin(); + int new_precedence = *new_action.precedence_values.begin(); + if (new_precedence > old_precedence) { + has_precedence = true; + } else if (new_precedence == old_precedence) { + has_precedence = new_action.symbol.index < old_action.symbol.index; + has_conflict = true; + } + break; + } + default: + break; + } + + default: + break; + } + + return { has_precedence, has_conflict }; +} + +} // namespace build_tables +} // namespace tree_sitter diff --git a/src/compiler/build_tables/action_takes_precedence.h b/src/compiler/build_tables/action_takes_precedence.h new file mode 100644 index 00000000..66ef158b --- /dev/null +++ b/src/compiler/build_tables/action_takes_precedence.h @@ -0,0 +1,21 @@ +#ifndef COMPILER_BUILD_TABLES_RESOLVE_PARSE_ACTION_H_ +#define COMPILER_BUILD_TABLES_RESOLVE_PARSE_ACTION_H_ + +#include +#include "tree_sitter/compiler.h" +#include "compiler/parse_table.h" +#include "compiler/rules/symbol.h" +#include "compiler/prepared_grammar.h" + +namespace tree_sitter { +namespace build_tables { + +std::pair action_takes_precedence(const ParseAction &new_action, + const ParseAction &old_action, + const rules::Symbol &symbol, + const SyntaxGrammar &grammar); + +} // namespace build_tables +} // namespace tree_sitter + +#endif // COMPILER_BUILD_TABLES_RESOLVE_PARSE_ACTION_H_ diff --git a/src/compiler/build_tables/build_conflict.cc b/src/compiler/build_tables/build_conflict.cc new file mode 100644 index 00000000..f870c03c --- /dev/null +++ b/src/compiler/build_tables/build_conflict.cc @@ -0,0 +1,65 @@ +#include "compiler/build_tables/build_conflict.h" +#include "compiler/rules/symbol.h" +#include "compiler/rules/built_in_symbols.h" +#include "compiler/prepared_grammar.h" +#include "compiler/build_tables/parse_item.h" +#include + +namespace tree_sitter { +namespace build_tables { + +using std::string; +using rules::Symbol; + +static string symbol_name(const Symbol &symbol, const SyntaxGrammar &grammar, + const LexicalGrammar &lex_grammar) { + if (symbol.is_built_in()) { + if (symbol == rules::ERROR()) + return "ERROR"; + else if (symbol == rules::END_OF_INPUT()) + return "END_OF_INPUT"; + else + return ""; + } + + if (symbol.is_token()) + return lex_grammar.rule_name(symbol); + else + return grammar.rule_name(symbol); +} + +static string action_description(const ParseAction &action, + const ParseItemSet &item_set, + const SyntaxGrammar &grammar, + const LexicalGrammar &lex_grammar) { + switch (action.type) { + case ParseActionTypeShift: { + string result("shift ("); + for (const auto &item : item_set) + if (item.first.consumed_symbol_count > 0) + result += " " + symbol_name(item.first.lhs, grammar, lex_grammar); + return result + " )"; + } + case ParseActionTypeReduce: + return "reduce ( " + symbol_name(action.symbol, grammar, lex_grammar) + + " )"; + default: + return ""; + } +} + +Conflict build_conflict(const ParseAction &left, const ParseAction &right, + const ParseItemSet &item_set, const Symbol &sym, + const SyntaxGrammar &grammar, + const LexicalGrammar &lex_grammar) { + if (right.type < left.type) + return build_conflict(right, left, item_set, sym, grammar, lex_grammar); + + return Conflict(symbol_name(sym, grammar, lex_grammar) + ": " + + action_description(left, item_set, grammar, lex_grammar) + + " / " + + action_description(right, item_set, grammar, lex_grammar)); +} + +} // namespace build_tables +} // namespace tree_sitter diff --git a/src/compiler/build_tables/build_conflict.h b/src/compiler/build_tables/build_conflict.h new file mode 100644 index 00000000..4e47a0ff --- /dev/null +++ b/src/compiler/build_tables/build_conflict.h @@ -0,0 +1,23 @@ +#ifndef COMPILER_BUILD_TABLES_BUILD_CONFLICT_H_ +#define COMPILER_BUILD_TABLES_BUILD_CONFLICT_H_ + +#include "tree_sitter/compiler.h" +#include "compiler/parse_table.h" +#include "compiler/rules/symbol.h" +#include "compiler/build_tables/parse_item.h" + +namespace tree_sitter { + +class SyntaxGrammar; +class LexicalGrammar; + +namespace build_tables { + +Conflict build_conflict(const ParseAction &left, const ParseAction &right, + const ParseItemSet &item_set, const rules::Symbol &, + const SyntaxGrammar &, const LexicalGrammar &); + +} // namespace build_tables +} // namespace tree_sitter + +#endif // COMPILER_BUILD_TABLES_BUILD_CONFLICT_H_ diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index 8b162d82..af68a2fa 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -1,14 +1,17 @@ #include #include +#include #include #include +#include "compiler/parse_table.h" #include "compiler/build_tables/item_set_closure.h" #include "compiler/build_tables/item_set_transitions.h" -#include "compiler/build_tables/parse_conflict_manager.h" +#include "compiler/build_tables/action_takes_precedence.h" +#include "compiler/build_tables/build_conflict.h" #include "compiler/build_tables/parse_item.h" #include "compiler/prepared_grammar.h" -#include "compiler/rules/built_in_symbols.h" #include "compiler/rules/symbol.h" +#include "compiler/rules/built_in_symbols.h" namespace tree_sitter { namespace build_tables { @@ -17,22 +20,23 @@ using std::pair; using std::vector; using std::set; using std::map; +using std::string; using std::unordered_map; using std::make_shared; using rules::Symbol; class ParseTableBuilder { const SyntaxGrammar grammar; - ParseConflictManager conflict_manager; + const LexicalGrammar lex_grammar; unordered_map parse_state_ids; vector> item_sets_to_process; ParseTable parse_table; + std::set conflicts; public: ParseTableBuilder(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) - : grammar(grammar), - conflict_manager(ParseConflictManager(grammar, lex_grammar)) {} + : grammar(grammar), lex_grammar(lex_grammar) {} pair> build() { auto start_symbol = grammar.rules.empty() @@ -59,7 +63,7 @@ class ParseTableBuilder { parse_table.symbols.insert(rules::ERROR()); parse_table.symbols.insert(rules::DOCUMENT()); - return { parse_table, conflict_manager.conflicts() }; + return { parse_table, conflicts_vector() }; } private: @@ -82,7 +86,7 @@ class ParseTableBuilder { ParseAction new_action = ParseAction::Shift(0, precedence_values_for_item_set(next_item_set)); - if (should_add_action(state_id, symbol, new_action)) { + if (should_add_action(state_id, symbol, new_action, next_item_set)) { ParseStateId new_state_id = add_parse_state(next_item_set); new_action.state_index = new_state_id; parse_table.add_action(state_id, symbol, new_action); @@ -103,7 +107,7 @@ class ParseTableBuilder { item.precedence()); for (const auto &lookahead_sym : lookahead_symbols) - if (should_add_action(state_id, lookahead_sym, action)) + if (should_add_action(state_id, lookahead_sym, action, ParseItemSet())) parse_table.add_action(state_id, lookahead_sym, action); } } @@ -135,7 +139,8 @@ class ParseTableBuilder { for (const auto &pair : actions) { const Symbol &lookahead_sym = pair.first; ParseAction reduce_extra = ParseAction::ReduceExtra(ubiquitous_symbol); - if (should_add_action(shift_state_id, lookahead_sym, reduce_extra)) + if (should_add_action(shift_state_id, lookahead_sym, reduce_extra, + ParseItemSet())) parse_table.add_action(shift_state_id, lookahead_sym, reduce_extra); } } @@ -143,12 +148,20 @@ class ParseTableBuilder { } bool should_add_action(ParseStateId state_id, const Symbol &symbol, - const ParseAction &action) { - auto current_actions = parse_table.states[state_id].actions; + const ParseAction &action, + const ParseItemSet &item_set) { + auto ¤t_actions = parse_table.states[state_id].actions; auto current_action = current_actions.find(symbol); - return (current_action == current_actions.end() || - conflict_manager.resolve_parse_action( - symbol, current_action->second, action)); + if (current_action == current_actions.end()) + return true; + + auto result = action_takes_precedence(action, current_action->second, + symbol, grammar); + + if (result.second) + record_conflict(symbol, current_action->second, action, item_set); + + return result.first; } set precedence_values_for_item_set(const ParseItemSet &item_set) { @@ -160,6 +173,18 @@ class ParseTableBuilder { } return result; } + + void record_conflict(const Symbol &sym, const ParseAction &left, + const ParseAction &right, const ParseItemSet &item_set) { + conflicts.insert( + build_conflict(left, right, item_set, sym, grammar, lex_grammar)); + } + + vector conflicts_vector() const { + vector result; + result.insert(result.end(), conflicts.begin(), conflicts.end()); + return result; + } }; pair> build_parse_table( diff --git a/src/compiler/build_tables/parse_conflict_manager.cc b/src/compiler/build_tables/parse_conflict_manager.cc deleted file mode 100644 index e66eef1d..00000000 --- a/src/compiler/build_tables/parse_conflict_manager.cc +++ /dev/null @@ -1,136 +0,0 @@ -#include "compiler/build_tables/parse_conflict_manager.h" -#include -#include -#include "compiler/util/string_helpers.h" -#include "compiler/rules/built_in_symbols.h" -#include "compiler/prepared_grammar.h" - -namespace tree_sitter { -namespace build_tables { - -using std::string; -using std::to_string; -using std::vector; - -ParseConflictManager::ParseConflictManager(const SyntaxGrammar &parse_grammar, - const LexicalGrammar &lex_grammar) - : parse_grammar(parse_grammar), lex_grammar(lex_grammar) {} - -bool ParseConflictManager::resolve_parse_action(const rules::Symbol &symbol, - const ParseAction &old_action, - const ParseAction &new_action) { - if (new_action.type < old_action.type) - return !resolve_parse_action(symbol, new_action, old_action); - - switch (old_action.type) { - case ParseActionTypeError: - return true; - - case ParseActionTypeShift: { - int min_precedence = *old_action.precedence_values.begin(); - int max_precedence = *old_action.precedence_values.rbegin(); - switch (new_action.type) { - case ParseActionTypeReduce: { - int new_precedence = *new_action.precedence_values.rbegin(); - if (max_precedence > new_precedence) { - if (min_precedence < new_precedence) - record_conflict(symbol, old_action, new_action); - return false; - } else if (max_precedence < new_precedence) { - return true; - } else { - record_conflict(symbol, old_action, new_action); - return false; - } - } - default: - return false; - } - } - - case ParseActionTypeReduce: - switch (new_action.type) { - case ParseActionTypeReduce: { - int old_precedence = *old_action.precedence_values.begin(); - int new_precedence = *new_action.precedence_values.begin(); - if (new_precedence > old_precedence) { - return true; - } else if (new_precedence < old_precedence) { - return false; - } else { - record_conflict(symbol, old_action, new_action); - return new_action.symbol.index < old_action.symbol.index; - } - } - default: - return false; - } - - default: - return false; - } -} - -const vector ParseConflictManager::conflicts() const { - vector result; - result.insert(result.end(), conflicts_.begin(), conflicts_.end()); - return result; -} - -string precedence_string(const ParseAction &action) { - string precedences = "(precedence "; - bool started = false; - for (auto value : action.precedence_values) { - if (started) - precedences += ", "; - started = true; - precedences += to_string(value); - } - return precedences + ")"; -} - -string message_for_action(const ParseAction &action, - const SyntaxGrammar &parse_grammar) { - switch (action.type) { - case ParseActionTypeShift: - return "shift " + precedence_string(action); - case ParseActionTypeReduce: { - string name = parse_grammar.rule_name(action.symbol); - if (name == "") - return "ERROR" + to_string(action.symbol.index); - else - return "reduce " + name + " " + precedence_string(action); - } - case ParseActionTypeAccept: - return "accept"; - default: - return "error"; - } -} - -string ParseConflictManager::symbol_name(const rules::Symbol &symbol) { - if (symbol.is_built_in()) { - if (symbol == rules::ERROR()) - return "ERROR"; - else if (symbol == rules::END_OF_INPUT()) - return "END_OF_INPUT"; - else - return ""; - } - - if (symbol.is_token()) - return lex_grammar.rule_name(symbol); - else - return parse_grammar.rule_name(symbol); -} - -void ParseConflictManager::record_conflict(const rules::Symbol &symbol, - const ParseAction &left, - const ParseAction &right) { - conflicts_.insert(Conflict(symbol_name(symbol) + ": " + - message_for_action(left, parse_grammar) + " / " + - message_for_action(right, parse_grammar))); -} - -} // namespace build_tables -} // namespace tree_sitter diff --git a/src/compiler/build_tables/parse_conflict_manager.h b/src/compiler/build_tables/parse_conflict_manager.h deleted file mode 100644 index db04cc98..00000000 --- a/src/compiler/build_tables/parse_conflict_manager.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef COMPILER_BUILD_TABLES_PARSE_CONFLICT_MANAGER_H_ -#define COMPILER_BUILD_TABLES_PARSE_CONFLICT_MANAGER_H_ - -#include -#include -#include -#include "compiler/parse_table.h" -#include "compiler/prepared_grammar.h" -#include "compiler/rules/symbol.h" -#include "tree_sitter/compiler.h" - -namespace tree_sitter { -namespace build_tables { - -class ParseConflictManager { - const SyntaxGrammar parse_grammar; - const LexicalGrammar lex_grammar; - std::set conflicts_; - - public: - ParseConflictManager(const SyntaxGrammar &, const LexicalGrammar &); - bool resolve_parse_action(const rules::Symbol &symbol, - const ParseAction &old_action, - const ParseAction &new_action); - const std::vector conflicts() const; - - private: - std::string symbol_name(const rules::Symbol &); - void record_conflict(const rules::Symbol &, const ParseAction &, - const ParseAction &); -}; - -} // namespace build_tables -} // namespace tree_sitter - -#endif // COMPILER_BUILD_TABLES_PARSE_CONFLICT_MANAGER_H_ diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index f7a54cd5..09ba02ff 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -22,42 +22,40 @@ using std::to_string; using std::vector; using util::escape_char; -static const map REPLACEMENTS({ - { '~', "TILDE" }, - { '`', "BQUOTE" }, - { '!', "BANG" }, - { '@', "AT" }, - { '#', "POUND" }, - { '$', "DOLLAR" }, - { '%', "PERCENT" }, - { '^', "CARET" }, - { '&', "AMP" }, - { '*', "STAR" }, - { '(', "LPAREN" }, - { ')', "RPAREN" }, - { '-', "DASH" }, - { '+', "PLUS" }, - { '=', "EQ" }, - { '{', "LBRACE" }, - { '}', "RBRACE" }, - { '[', "LBRACK" }, - { ']', "RBRACK" }, - { '\\', "BSLASH" }, - { '|', "PIPE" }, - { ':', "COLON" }, - { ';', "SEMI" }, - { '"', "DQUOTE" }, - { '\'', "SQUOTE" }, - { '<', "LT" }, - { '>', "GT" }, - { ',', "COMMA" }, - { '.', "DOT" }, - { '?', "QMARK" }, - { '/', "SLASH" }, - { '\n', "LB" }, - { '\r', "CR" }, - { '\t', "TAB" }, -}); +static const map REPLACEMENTS({ { '~', "TILDE" }, + { '`', "BQUOTE" }, + { '!', "BANG" }, + { '@', "AT" }, + { '#', "POUND" }, + { '$', "DOLLAR" }, + { '%', "PERCENT" }, + { '^', "CARET" }, + { '&', "AMP" }, + { '*', "STAR" }, + { '(', "LPAREN" }, + { ')', "RPAREN" }, + { '-', "DASH" }, + { '+', "PLUS" }, + { '=', "EQ" }, + { '{', "LBRACE" }, + { '}', "RBRACE" }, + { '[', "LBRACK" }, + { ']', "RBRACK" }, + { '\\', "BSLASH" }, + { '|', "PIPE" }, + { ':', "COLON" }, + { ';', "SEMI" }, + { '"', "DQUOTE" }, + { '\'', "SQUOTE" }, + { '<', "LT" }, + { '>', "GT" }, + { ',', "COMMA" }, + { '.', "DOT" }, + { '?', "QMARK" }, + { '/', "SLASH" }, + { '\n', "LB" }, + { '\r', "CR" }, + { '\t', "TAB" }, }); class CCodeGenerator { string buffer; diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 2826173f..641e7c8b 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -106,8 +106,7 @@ static TSTree *break_down_right_stack(TSParser *parser) { TSParseAction action = get_action(parser->language, state, node->symbol); bool is_usable = (action.type != TSParseActionTypeError) && - !ts_tree_has_error(node) && - !ts_tree_is_extra(node); + !ts_tree_has_error(node) && !ts_tree_is_extra(node); if (is_usable && right_subtree_start == current_position.chars) { ts_stack_shrink(&parser->right_stack, parser->right_stack.size - 1); return node;