diff --git a/spec/compiler/build_tables/lex_conflict_manager_spec.cc b/spec/compiler/build_tables/lex_conflict_manager_spec.cc index ab3c17b6..cd80b5a8 100644 --- a/spec/compiler/build_tables/lex_conflict_manager_spec.cc +++ b/spec/compiler/build_tables/lex_conflict_manager_spec.cc @@ -8,7 +8,7 @@ using namespace build_tables; START_TEST -describe("LexConflictManager", []() { +describe("LexConflictManager::resolve(new_action, old_action)", []() { LexicalGrammar lexical_grammar{{ Variable("other_token", VariableTypeNamed, pattern("[a-b]")), Variable("lookahead_token", VariableTypeNamed, pattern("[c-d]")) @@ -29,36 +29,58 @@ describe("LexConflictManager", []() { AssertThat(update, IsFalse()); }); - describe("accept-token/advance conflicts", [&]() { - it("prefers the advance", [&]() { - update = conflict_manager.resolve(LexAction::Advance(1, { 0, 0 }), LexAction::Accept(sym3, 3)); - AssertThat(update, IsTrue()); - - update = conflict_manager.resolve(LexAction::Accept(sym3, 3), LexAction::Advance(1, { 0, 0 })); - AssertThat(update, IsFalse()); - }); - }); - describe("accept-token/accept-token conflicts", [&]() { - describe("when one token has a higher precedence than the other", [&]() { - it("prefers the token with the higher precedence", [&]() { - update = conflict_manager.resolve(LexAction::Accept(sym2, 0), LexAction::Accept(sym3, 2)); + describe("when one tokens' precedence values differ", [&]() { + it("favors the token with higher precedence", [&]() { + update = conflict_manager.resolve(LexAction::Accept(sym2, 1, false), LexAction::Accept(sym1, 2, false)); AssertThat(update, IsFalse()); - update = conflict_manager.resolve(LexAction::Accept(sym3, 2), LexAction::Accept(sym2, 0)); + update = conflict_manager.resolve(LexAction::Accept(sym1, 2, false), LexAction::Accept(sym2, 1, false)); AssertThat(update, IsTrue()); }); }); - describe("when both tokens have the same precedence", [&]() { - it("prefers the token listed earlier in the grammar", [&]() { - update = conflict_manager.resolve(LexAction::Accept(sym2, 0), LexAction::Accept(sym1, 0)); + describe("when one token is string-based and the other is regexp-based", [&]() { + it("favors the string-based token", [&]() { + update = conflict_manager.resolve(LexAction::Accept(sym1, 0, false), LexAction::Accept(sym2, 0, true)); AssertThat(update, IsFalse()); - update = conflict_manager.resolve(LexAction::Accept(sym1, 0), LexAction::Accept(sym2, 0)); + update = conflict_manager.resolve(LexAction::Accept(sym2, 0, true), LexAction::Accept(sym1, 0, false)); AssertThat(update, IsTrue()); }); }); + + describe("when the tokens have equal precedence", [&]() { + it("favors the token listed earlier in the grammar", [&]() { + update = conflict_manager.resolve(LexAction::Accept(sym2, 0, false), LexAction::Accept(sym1, 0, false)); + AssertThat(update, IsFalse()); + + update = conflict_manager.resolve(LexAction::Accept(sym1, 0, false), LexAction::Accept(sym2, 0, false)); + AssertThat(update, IsTrue()); + }); + }); + }); + + describe("advance/accept-token conflicts", [&]() { + describe("when the token to accept has higher precedence", [&]() { + it("prefers the accept-token action", [&]() { + update = conflict_manager.resolve(LexAction::Advance(1, { 1, 2 }), LexAction::Accept(sym3, 3, true)); + AssertThat(update, IsFalse()); + + update = conflict_manager.resolve(LexAction::Accept(sym3, 3, true), LexAction::Advance(1, { 1, 2 })); + AssertThat(update, IsTrue()); + }); + }); + + describe("when the token to accept does not have a higher precedence", [&]() { + it("favors the advance action", [&]() { + update = conflict_manager.resolve(LexAction::Advance(1, { 1, 2 }), LexAction::Accept(sym3, 2, true)); + AssertThat(update, IsTrue()); + + update = conflict_manager.resolve(LexAction::Accept(sym3, 2, true), LexAction::Advance(1, { 1, 2 })); + AssertThat(update, IsFalse()); + }); + }); }); }); diff --git a/spec/compiler/prepare_grammar/expand_tokens_spec.cc b/spec/compiler/prepare_grammar/expand_tokens_spec.cc index b0c7462f..3b394452 100644 --- a/spec/compiler/prepare_grammar/expand_tokens_spec.cc +++ b/spec/compiler/prepare_grammar/expand_tokens_spec.cc @@ -29,7 +29,7 @@ describe("expand_tokens", []() { character({ 'y' }), character({ 'z' }), }), { - {PRECEDENCE, 1}, + {IS_STRING, 1}, {IS_TOKEN, 1}, }), i_sym(11), @@ -50,7 +50,7 @@ describe("expand_tokens", []() { character({ ' ' }), character({ 946 }), }), { - {PRECEDENCE, 1}, + {IS_STRING, 1}, {IS_TOKEN, 1}, })), }))); diff --git a/spec/fixtures/parsers/javascript.c b/spec/fixtures/parsers/javascript.c index 216af2d8..c172dce0 100644 --- a/spec/fixtures/parsers/javascript.c +++ b/spec/fixtures/parsers/javascript.c @@ -594,7 +594,7 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { (lookahead == '\n') || (lookahead == 'g'))) ADVANCE(29); - ACCEPT_TOKEN(sym_regex); + ACCEPT_TOKEN(sym_comment); case 28: if (!((lookahead == 0) || (lookahead == '\n'))) diff --git a/src/compiler/build_tables/build_lex_table.cc b/src/compiler/build_tables/build_lex_table.cc index 802dff48..be5f9a31 100644 --- a/src/compiler/build_tables/build_lex_table.cc +++ b/src/compiler/build_tables/build_lex_table.cc @@ -135,7 +135,7 @@ class LexTableBuilder { LexItem::CompletionStatus completion_status = item.completion_status(); if (completion_status.is_done) { auto current_action = lex_table.state(state_id).default_action; - auto action = LexAction::Accept(item.lhs, completion_status.precedence); + auto action = LexAction::Accept(item.lhs, completion_status.precedence, completion_status.is_string); if (conflict_manager.resolve(action, current_action)) lex_table.state(state_id).default_action = action; } diff --git a/src/compiler/build_tables/lex_conflict_manager.cc b/src/compiler/build_tables/lex_conflict_manager.cc index c272996b..e1702c79 100644 --- a/src/compiler/build_tables/lex_conflict_manager.cc +++ b/src/compiler/build_tables/lex_conflict_manager.cc @@ -28,12 +28,19 @@ bool LexConflictManager::resolve(const LexAction &new_action, return true; else if (new_precedence < old_precedence) return false; + else if (new_action.is_string && !old_action.is_string) + return true; + else if (old_action.is_string && !new_action.is_string) + return false; else return new_action.symbol.index < old_action.symbol.index; } case LexActionTypeAdvance: - return true; + if (old_precedence > new_action.precedence_range.max) + return false; + else + return true; default: return false; diff --git a/src/compiler/lex_table.cc b/src/compiler/lex_table.cc index 255c24f4..6b2b93fc 100644 --- a/src/compiler/lex_table.cc +++ b/src/compiler/lex_table.cc @@ -17,24 +17,25 @@ LexAction::LexAction() precedence_range({ 0, 0 }) {} LexAction::LexAction(LexActionType type, size_t state_index, Symbol symbol, - PrecedenceRange precedence_range) + PrecedenceRange precedence_range, bool is_string) : type(type), symbol(symbol), state_index(state_index), - precedence_range(precedence_range) {} + precedence_range(precedence_range), + is_string(is_string) {} LexAction LexAction::Error() { - return LexAction(LexActionTypeError, -1, Symbol(-1), { 0, 0 }); + return LexAction(LexActionTypeError, -1, Symbol(-1), { 0, 0 }, false); } LexAction LexAction::Advance(size_t state_index, PrecedenceRange precedence_range) { return LexAction(LexActionTypeAdvance, state_index, Symbol(-1), - precedence_range); + precedence_range, false); } -LexAction LexAction::Accept(Symbol symbol, int precedence) { - return LexAction(LexActionTypeAccept, -1, symbol, { precedence, precedence }); +LexAction LexAction::Accept(Symbol symbol, int precedence, bool is_string) { + return LexAction(LexActionTypeAccept, -1, symbol, { precedence, precedence }, is_string); } bool LexAction::operator==(const LexAction &other) const { diff --git a/src/compiler/lex_table.h b/src/compiler/lex_table.h index ab6c4a3f..b6288a07 100644 --- a/src/compiler/lex_table.h +++ b/src/compiler/lex_table.h @@ -19,11 +19,11 @@ typedef enum { class LexAction { LexAction(LexActionType type, size_t state_index, rules::Symbol symbol, - PrecedenceRange precedence_range); + PrecedenceRange precedence_range, bool is_string); public: LexAction(); - static LexAction Accept(rules::Symbol symbol, int precedence); + static LexAction Accept(rules::Symbol symbol, int precedence, bool is_string); static LexAction Error(); static LexAction Advance(size_t state_index, PrecedenceRange precedence_range); bool operator==(const LexAction &action) const; @@ -32,6 +32,7 @@ class LexAction { rules::Symbol symbol; size_t state_index; PrecedenceRange precedence_range; + bool is_string; }; } // namespace tree_sitter diff --git a/src/compiler/prepare_grammar/expand_tokens.cc b/src/compiler/prepare_grammar/expand_tokens.cc index 05e32e46..21023990 100644 --- a/src/compiler/prepare_grammar/expand_tokens.cc +++ b/src/compiler/prepare_grammar/expand_tokens.cc @@ -47,7 +47,7 @@ class ExpandTokens : public rules::IdentityRuleFn { return make_shared( rules::Seq::build(elements), std::map({ - { rules::IS_TOKEN, 1 }, { rules::PRECEDENCE, 1 }, + { rules::IS_TOKEN, 1 }, { rules::IS_STRING, 1 }, })); }