diff --git a/include/tree_sitter/compiler.h b/include/tree_sitter/compiler.h index 29303cb6..cd47a11c 100644 --- a/include/tree_sitter/compiler.h +++ b/include/tree_sitter/compiler.h @@ -19,6 +19,7 @@ namespace tree_sitter { rule_ptr sym(const std::string &name); rule_ptr pattern(const std::string &value); rule_ptr str(const std::string &value); + rule_ptr keyword(const std::string &value); rule_ptr err(const rule_ptr &rule); rule_ptr prec(int precedence, rule_ptr rule); rule_ptr token(rule_ptr rule); diff --git a/spec/compiler/build_tables/conflict_manager_spec.cc b/spec/compiler/build_tables/conflict_manager_spec.cc index 1aa85c66..db2b89f5 100644 --- a/spec/compiler/build_tables/conflict_manager_spec.cc +++ b/spec/compiler/build_tables/conflict_manager_spec.cc @@ -18,6 +18,7 @@ describe("resolving parse conflicts", []() { PreparedGrammar lex_grammar({ { "token1", pattern("[a-c]") }, { "token2", pattern("[b-d]") }, + { "token3", keyword("stuff") }, }, {}); before_each([&]() { @@ -29,8 +30,9 @@ describe("resolving parse conflicts", []() { }); describe("lexical conflicts", [&]() { - Symbol sym1(1, SymbolOptionToken); - Symbol sym2(2, SymbolOptionToken); + Symbol sym1(0, SymbolOptionToken); + Symbol sym2(1, SymbolOptionToken); + Symbol sym3(2, SymbolOptionToken); it("favors non-errors over lexical errors", [&]() { should_update = manager->resolve_lex_action(LexAction::Error(), LexAction::Advance(2)); @@ -40,12 +42,26 @@ describe("resolving parse conflicts", []() { AssertThat(should_update, IsFalse()); }); - it("prefers tokens that are listed earlier in the grammar", [&]() { - should_update = manager->resolve_lex_action(LexAction::Accept(sym1), LexAction::Accept(sym2)); - AssertThat(should_update, IsFalse()); - - should_update = manager->resolve_lex_action(LexAction::Accept(sym2), LexAction::Accept(sym1)); - AssertThat(should_update, IsTrue()); + describe("accept-token/accept-token conflicts", [&]() { + describe("when one token has a higher precedence than the other", [&]() { + it("prefers the token with the higher precedence", [&]() { + should_update = manager->resolve_lex_action(LexAction::Accept(sym3, 2), LexAction::Accept(sym2, 0)); + AssertThat(should_update, IsFalse()); + + should_update = manager->resolve_lex_action(LexAction::Accept(sym2, 0), LexAction::Accept(sym3, 2)); + AssertThat(should_update, IsTrue()); + }); + }); + + describe("when both tokens have the same precedence", [&]() { + it("prefers the token listed earlier in the grammar", [&]() { + should_update = manager->resolve_lex_action(LexAction::Accept(sym1, 0), LexAction::Accept(sym2, 0)); + AssertThat(should_update, IsFalse()); + + should_update = manager->resolve_lex_action(LexAction::Accept(sym2, 0), LexAction::Accept(sym1, 0)); + AssertThat(should_update, IsTrue()); + }); + }); }); }); diff --git a/spec/compiler/build_tables/lex_item_spec.cc b/spec/compiler/build_tables/lex_item_spec.cc index 1e467a48..2098c62b 100644 --- a/spec/compiler/build_tables/lex_item_spec.cc +++ b/spec/compiler/build_tables/lex_item_spec.cc @@ -1,5 +1,6 @@ #include "compiler_spec_helper.h" #include "compiler/build_tables/item_set_transitions.h" +#include "compiler/rules/metadata.h" #include "compiler/prepared_grammar.h" using namespace rules; diff --git a/src/compiler/build_tables/build_tables.cc b/src/compiler/build_tables/build_tables.cc index e8921be0..f1d25e32 100644 --- a/src/compiler/build_tables/build_tables.cc +++ b/src/compiler/build_tables/build_tables.cc @@ -81,7 +81,7 @@ namespace tree_sitter { for (LexItem item : item_set) { if (item.is_done()) { auto current_action = lex_table.state(state_id).default_action; - auto new_action = LexAction::Accept(item.lhs); + auto new_action = LexAction::Accept(item.lhs, item.precedence()); if (conflict_manager.resolve_lex_action(current_action, new_action)) lex_table.add_default_action(state_id, new_action); } diff --git a/src/compiler/build_tables/conflict_manager.cc b/src/compiler/build_tables/conflict_manager.cc index a7d506cb..c0683aa5 100644 --- a/src/compiler/build_tables/conflict_manager.cc +++ b/src/compiler/build_tables/conflict_manager.cc @@ -76,7 +76,11 @@ namespace tree_sitter { case LexActionTypeError: return true; case LexActionTypeAccept: - if (new_action.type == LexActionTypeAccept) { + if (new_action.precedence > old_action.precedence) { + return true; + } else if (new_action.precedence < old_action.precedence) { + return false; + } else { return new_action.symbol.index < old_action.symbol.index; } default: diff --git a/src/compiler/build_tables/item.cc b/src/compiler/build_tables/item.cc index a421290b..410e6167 100644 --- a/src/compiler/build_tables/item.cc +++ b/src/compiler/build_tables/item.cc @@ -1,5 +1,7 @@ #include "compiler/build_tables/item.h" #include "compiler/build_tables/rule_can_be_blank.h" +#include "compiler/rules/metadata.h" +#include "compiler/build_tables/get_metadata.h" #include "tree_sitter/compiler.h" namespace tree_sitter { @@ -11,6 +13,9 @@ namespace tree_sitter { bool Item::is_done() const { return rule_can_be_blank(rule); } + + int Item::precedence() const { + return get_metadata(rule, rules::PRECEDENCE); + } } } - diff --git a/src/compiler/build_tables/item.h b/src/compiler/build_tables/item.h index b533b7e2..de7e501c 100644 --- a/src/compiler/build_tables/item.h +++ b/src/compiler/build_tables/item.h @@ -10,6 +10,7 @@ namespace tree_sitter { public: Item(const rules::Symbol &lhs, rules::rule_ptr rule); bool is_done() const; + int precedence() const; rules::Symbol lhs; rules::rule_ptr rule; diff --git a/src/compiler/build_tables/parse_item.cc b/src/compiler/build_tables/parse_item.cc index 2b1814b3..2b92f2be 100644 --- a/src/compiler/build_tables/parse_item.cc +++ b/src/compiler/build_tables/parse_item.cc @@ -1,5 +1,4 @@ #include "compiler/build_tables/parse_item.h" -#include "compiler/build_tables/get_metadata.h" #include "tree_sitter/compiler.h" namespace tree_sitter { @@ -24,10 +23,6 @@ namespace tree_sitter { (other.rule == rule || other.rule->operator==(*rule)); } - int ParseItem::precedence() const { - return get_metadata(rule, rules::PRECEDENCE); - } - ostream& operator<<(ostream &stream, const ParseItem &item) { return stream << string("# #include "compiler/rules/symbol.h" #include "compiler/build_tables/item.h" -#include "compiler/rules/metadata.h" namespace tree_sitter { namespace build_tables { @@ -16,7 +15,6 @@ namespace tree_sitter { const size_t consumed_symbol_count, const rules::Symbol &lookahead_sym); bool operator==(const ParseItem &other) const; - int precedence() const; size_t consumed_symbol_count; rules::Symbol lookahead_sym; diff --git a/src/compiler/lex_table.cc b/src/compiler/lex_table.cc index 77780467..74ded562 100644 --- a/src/compiler/lex_table.cc +++ b/src/compiler/lex_table.cc @@ -14,21 +14,22 @@ namespace tree_sitter { symbol(Symbol(-1)), state_index(-1) {} - LexAction::LexAction(LexActionType type, size_t state_index, Symbol symbol) : + LexAction::LexAction(LexActionType type, size_t state_index, Symbol symbol, int precedence) : type(type), symbol(symbol), - state_index(state_index) {} + state_index(state_index), + precedence(precedence) {} LexAction LexAction::Error() { - return LexAction(LexActionTypeError, -1, Symbol(-1)); + return LexAction(LexActionTypeError, -1, Symbol(-1), 0); } LexAction LexAction::Advance(size_t state_index) { - return LexAction(LexActionTypeAdvance, state_index, Symbol(-1)); + return LexAction(LexActionTypeAdvance, state_index, Symbol(-1), 0); } - LexAction LexAction::Accept(Symbol symbol) { - return LexAction(LexActionTypeAccept, -1, symbol); + LexAction LexAction::Accept(Symbol symbol, int precedence) { + return LexAction(LexActionTypeAccept, -1, symbol, precedence); } bool LexAction::operator==(const LexAction &other) const { diff --git a/src/compiler/lex_table.h b/src/compiler/lex_table.h index 15808ead..4ef0a36c 100644 --- a/src/compiler/lex_table.h +++ b/src/compiler/lex_table.h @@ -16,10 +16,10 @@ namespace tree_sitter { } LexActionType; class LexAction { - LexAction(LexActionType type, size_t state_index, rules::Symbol symbol); + LexAction(LexActionType type, size_t state_index, rules::Symbol symbol, int precedence); public: LexAction(); - static LexAction Accept(rules::Symbol symbol); + static LexAction Accept(rules::Symbol symbol, int precedence); static LexAction Error(); static LexAction Advance(size_t state_index); bool operator==(const LexAction &action) const; @@ -27,6 +27,7 @@ namespace tree_sitter { LexActionType type; rules::Symbol symbol; size_t state_index; + int precedence; }; std::ostream& operator<<(std::ostream &stream, const LexAction &item); diff --git a/src/compiler/prepare_grammar/prepare_grammar.cc b/src/compiler/prepare_grammar/prepare_grammar.cc index 97ef3356..02468b30 100644 --- a/src/compiler/prepare_grammar/prepare_grammar.cc +++ b/src/compiler/prepare_grammar/prepare_grammar.cc @@ -11,7 +11,7 @@ namespace tree_sitter { pair prepare_grammar(const Grammar &input_grammar) { auto interned = intern_symbols(input_grammar); if (interned.second) { - printf("Error!"); + printf("Error! %s", interned.second->message().c_str()); exit(1); } auto grammars = extract_tokens(interned.first); diff --git a/src/compiler/rules/rules.cc b/src/compiler/rules/rules.cc index 9752ec4d..28b1dda5 100644 --- a/src/compiler/rules/rules.cc +++ b/src/compiler/rules/rules.cc @@ -55,6 +55,10 @@ namespace tree_sitter { return make_shared(value); } + rule_ptr keyword(const string &value) { + return metadata(make_shared(value), { { PRECEDENCE, 100}, { IS_TOKEN, 1 } }); + } + rule_ptr err(const rule_ptr &rule) { return choice({ rule, ERROR().copy() }); }