diff --git a/spec/compiler/build_tables/lex_conflict_manager_spec.cc b/spec/compiler/build_tables/lex_conflict_manager_spec.cc index 6e517452..caa99ef6 100644 --- a/spec/compiler/build_tables/lex_conflict_manager_spec.cc +++ b/spec/compiler/build_tables/lex_conflict_manager_spec.cc @@ -15,46 +15,43 @@ describe("LexConflictManager::resolve(new_action, old_action)", []() { Symbol sym2(1, true); Symbol sym3(2, true); - it("favors non-errors over lexical errors", [&]() { - update = conflict_manager.resolve(LexAction::Advance(2, {0, 0}), LexAction::Error()); + it("favors advance actions over empty accept token actions", [&]() { + update = conflict_manager.resolve(AdvanceAction(2, {0, 0}), AcceptTokenAction()); AssertThat(update, IsTrue()); - - update = conflict_manager.resolve(LexAction::Error(), LexAction::Advance(2, {0, 0})); - AssertThat(update, IsFalse()); }); describe("accept-token/accept-token conflicts", [&]() { describe("when one tokens' precedence values differ", [&]() { it("favors the token with higher precedence", [&]() { - update = conflict_manager.resolve(LexAction::Accept(sym2, 1, false), LexAction::Accept(sym1, 2, false)); + update = conflict_manager.resolve(AcceptTokenAction(sym2, 1, false), AcceptTokenAction(sym1, 2, false)); AssertThat(update, IsFalse()); - update = conflict_manager.resolve(LexAction::Accept(sym1, 2, false), LexAction::Accept(sym2, 1, false)); + update = conflict_manager.resolve(AcceptTokenAction(sym1, 2, false), AcceptTokenAction(sym2, 1, false)); AssertThat(update, IsTrue()); }); it("adds the discarded token to the 'fragile tokens' set", [&]() { - update = conflict_manager.resolve(LexAction::Accept(sym2, 1, false), LexAction::Accept(sym1, 2, false)); + update = conflict_manager.resolve(AcceptTokenAction(sym2, 1, false), AcceptTokenAction(sym1, 2, false)); AssertThat(conflict_manager.fragile_tokens, Contains(sym2)); }); }); describe("when one token is string-based and the other is regexp-based", [&]() { it("favors the string-based token", [&]() { - update = conflict_manager.resolve(LexAction::Accept(sym1, 0, false), LexAction::Accept(sym2, 0, true)); + update = conflict_manager.resolve(AcceptTokenAction(sym1, 0, false), AcceptTokenAction(sym2, 0, true)); AssertThat(update, IsFalse()); - update = conflict_manager.resolve(LexAction::Accept(sym2, 0, true), LexAction::Accept(sym1, 0, false)); + update = conflict_manager.resolve(AcceptTokenAction(sym2, 0, true), AcceptTokenAction(sym1, 0, false)); AssertThat(update, IsTrue()); }); }); describe("when the tokens have equal precedence", [&]() { it("favors the token listed earlier in the grammar", [&]() { - update = conflict_manager.resolve(LexAction::Accept(sym2, 0, false), LexAction::Accept(sym1, 0, false)); + update = conflict_manager.resolve(AcceptTokenAction(sym2, 0, false), AcceptTokenAction(sym1, 0, false)); AssertThat(update, IsFalse()); - update = conflict_manager.resolve(LexAction::Accept(sym1, 0, false), LexAction::Accept(sym2, 0, false)); + update = conflict_manager.resolve(AcceptTokenAction(sym1, 0, false), AcceptTokenAction(sym2, 0, false)); AssertThat(update, IsTrue()); }); }); @@ -63,21 +60,15 @@ describe("LexConflictManager::resolve(new_action, old_action)", []() { describe("advance/accept-token conflicts", [&]() { describe("when the token to accept has higher precedence", [&]() { it("prefers the accept-token action", [&]() { - update = conflict_manager.resolve(LexAction::Advance(1, { 1, 2 }), LexAction::Accept(sym3, 3, true)); + update = conflict_manager.resolve(AdvanceAction(1, { 1, 2 }), AcceptTokenAction(sym3, 3, true)); AssertThat(update, IsFalse()); - - update = conflict_manager.resolve(LexAction::Accept(sym3, 3, true), LexAction::Advance(1, { 1, 2 })); - AssertThat(update, IsTrue()); }); }); describe("when the token to accept does not have a higher precedence", [&]() { it("favors the advance action", [&]() { - update = conflict_manager.resolve(LexAction::Advance(1, { 1, 2 }), LexAction::Accept(sym3, 2, true)); + update = conflict_manager.resolve(AdvanceAction(1, { 1, 2 }), AcceptTokenAction(sym3, 2, true)); AssertThat(update, IsTrue()); - - update = conflict_manager.resolve(LexAction::Accept(sym3, 2, true), LexAction::Advance(1, { 1, 2 })); - AssertThat(update, IsFalse()); }); }); }); diff --git a/spec/helpers/stream_methods.cc b/spec/helpers/stream_methods.cc index 37e17019..d6c6c1e3 100644 --- a/spec/helpers/stream_methods.cc +++ b/spec/helpers/stream_methods.cc @@ -50,19 +50,12 @@ ostream &operator<<(ostream &stream, const SyntaxVariable &variable) { return stream << string("{") << variable.name << string(", ") << variable.productions << string(", ") << to_string(variable.type) << string("}"); } -std::ostream &operator<<(std::ostream &stream, const LexAction &action) { - switch (action.type) { - case LexActionTypeError: - return stream << string("#"); - case LexActionTypeAccept: - return stream << string("#"; - case LexActionTypeAdvance: - return stream << string("#"; - default: - return stream; - } +std::ostream &operator<<(std::ostream &stream, const AdvanceAction &action) { + return stream << string("#"; +} + +std::ostream &operator<<(std::ostream &stream, const AcceptTokenAction &action) { + return stream << string("#"; } ostream &operator<<(ostream &stream, const ParseAction &action) { diff --git a/spec/helpers/stream_methods.h b/spec/helpers/stream_methods.h index 395d050e..e3ecbc15 100644 --- a/spec/helpers/stream_methods.h +++ b/spec/helpers/stream_methods.h @@ -92,7 +92,8 @@ using std::string; using std::to_string; struct Variable; struct SyntaxVariable; -class LexAction; +struct AdvanceAction; +struct AcceptTokenAction; class ParseAction; class ParseState; struct ProductionStep; @@ -104,7 +105,8 @@ ostream &operator<<(ostream &, const Rule &); ostream &operator<<(ostream &, const rule_ptr &); ostream &operator<<(ostream &, const Variable &); ostream &operator<<(ostream &, const SyntaxVariable &); -ostream &operator<<(ostream &, const LexAction &); +ostream &operator<<(ostream &, const AdvanceAction &); +ostream &operator<<(ostream &, const AcceptTokenAction &); ostream &operator<<(ostream &, const ParseAction &); ostream &operator<<(ostream &, const ParseState &); ostream &operator<<(ostream &, const ProductionStep &); diff --git a/src/compiler/build_tables/build_lex_table.cc b/src/compiler/build_tables/build_lex_table.cc index 92dbc39a..50c0caff 100644 --- a/src/compiler/build_tables/build_lex_table.cc +++ b/src/compiler/build_tables/build_lex_table.cc @@ -119,11 +119,12 @@ class LexTableBuilder { const CharacterSet &rule = transition.first; const LexItemSet &new_item_set = transition.second.first; const PrecedenceRange &precedence = transition.second.second; - auto current_action = lex_table.state(state_id).default_action; - auto action = LexAction::Advance(-1, precedence); + AdvanceAction action(-1, precedence); + + auto current_action = lex_table.state(state_id).accept_action; if (conflict_manager.resolve(action, current_action)) { action.state_index = add_lex_state(new_item_set); - lex_table.state(state_id).actions[rule] = action; + lex_table.state(state_id).advance_actions[rule] = action; } } } @@ -132,12 +133,12 @@ class LexTableBuilder { for (const LexItem &item : item_set.entries) { LexItem::CompletionStatus completion_status = item.completion_status(); if (completion_status.is_done) { - auto current_action = lex_table.state(state_id).default_action; - auto action = - LexAction::Accept(item.lhs, completion_status.precedence.max, - completion_status.is_string); + AcceptTokenAction action(item.lhs, completion_status.precedence.max, + completion_status.is_string); + + auto current_action = lex_table.state(state_id).accept_action; if (conflict_manager.resolve(action, current_action)) - lex_table.state(state_id).default_action = action; + lex_table.state(state_id).accept_action = action; } } } @@ -150,15 +151,19 @@ class LexTableBuilder { void mark_fragile_tokens() { for (LexState &state : lex_table.states) - if (state.default_action.type == LexActionTypeAccept) - if (conflict_manager.fragile_tokens.count(state.default_action.symbol)) - state.default_action.type = LexActionTypeAcceptFragile; + if (state.accept_action.is_present()) + if (conflict_manager.fragile_tokens.count(state.accept_action.symbol)) + state.accept_action.is_fragile = true; } void remove_duplicate_lex_states() { + for (LexState &state : lex_table.states) { + state.accept_action.is_string = false; + state.accept_action.precedence = 0; + } + auto replacements = - remove_duplicate_states( - &lex_table.states); + remove_duplicate_states(&lex_table.states); for (ParseState &parse_state : parse_table->states) { auto replacement = replacements.find(parse_state.lex_state_id); diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index 32ff6ce1..37ca7d88 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -193,8 +193,7 @@ class ParseTableBuilder { } void remove_duplicate_parse_states() { - remove_duplicate_states( - &parse_table.states); + remove_duplicate_states(&parse_table.states); } ParseAction *add_action(ParseStateId state_id, Symbol lookahead, diff --git a/src/compiler/build_tables/lex_conflict_manager.cc b/src/compiler/build_tables/lex_conflict_manager.cc index b9493dfb..45537081 100644 --- a/src/compiler/build_tables/lex_conflict_manager.cc +++ b/src/compiler/build_tables/lex_conflict_manager.cc @@ -6,59 +6,41 @@ namespace tree_sitter { namespace build_tables { -bool LexConflictManager::resolve(const LexAction &new_action, - const LexAction &old_action) { - if (new_action.type < old_action.type) - return !resolve(old_action, new_action); +bool LexConflictManager::resolve(const AdvanceAction &new_action, + const AcceptTokenAction &old_action) { + if (!old_action.is_present()) + return true; + return new_action.precedence_range.max >= old_action.precedence; +} - switch (old_action.type) { - case LexActionTypeError: - return true; +bool LexConflictManager::resolve(const AcceptTokenAction &new_action, + const AcceptTokenAction &old_action) { + if (!old_action.is_present()) + return true; - case LexActionTypeAccept: { - int old_precedence = old_action.precedence_range.min; + int old_precedence = old_action.precedence; + int new_precedence = new_action.precedence; - switch (new_action.type) { - case LexActionTypeAccept: { - int new_precedence = new_action.precedence_range.min; + bool result; + if (new_precedence > old_precedence) + result = true; + else if (new_precedence < old_precedence) + result = false; + else if (new_action.is_string && !old_action.is_string) + result = true; + else if (old_action.is_string && !new_action.is_string) + result = false; + else if (new_action.symbol.index < old_action.symbol.index) + result = true; + else + result = false; - bool result; - if (new_precedence > old_precedence) - result = true; - else if (new_precedence < old_precedence) - result = false; - else if (new_action.is_string && !old_action.is_string) - result = true; - else if (old_action.is_string && !new_action.is_string) - result = false; - else if (new_action.symbol.index < old_action.symbol.index) - result = true; - else - result = false; + if (result) + fragile_tokens.insert(old_action.symbol); + else + fragile_tokens.insert(new_action.symbol); - if (result) - fragile_tokens.insert(old_action.symbol); - else - fragile_tokens.insert(new_action.symbol); - - return result; - } - - case LexActionTypeAdvance: - if (old_precedence > new_action.precedence_range.max) - return false; - else - return true; - - default: - return false; - } - return true; - } - - default: - return false; - } + return result; } } // namespace build_tables diff --git a/src/compiler/build_tables/lex_conflict_manager.h b/src/compiler/build_tables/lex_conflict_manager.h index b62bc5d7..906326bf 100644 --- a/src/compiler/build_tables/lex_conflict_manager.h +++ b/src/compiler/build_tables/lex_conflict_manager.h @@ -7,13 +7,15 @@ namespace tree_sitter { -class LexAction; +struct AdvanceAction; +struct AcceptTokenAction; namespace build_tables { class LexConflictManager { public: - bool resolve(const LexAction &, const LexAction &); + bool resolve(const AdvanceAction &, const AcceptTokenAction &); + bool resolve(const AcceptTokenAction &, const AcceptTokenAction &); std::set fragile_tokens; }; diff --git a/src/compiler/build_tables/remove_duplicate_states.h b/src/compiler/build_tables/remove_duplicate_states.h index dd5ded02..2a6a9cdb 100644 --- a/src/compiler/build_tables/remove_duplicate_states.h +++ b/src/compiler/build_tables/remove_duplicate_states.h @@ -7,7 +7,7 @@ namespace tree_sitter { namespace build_tables { -template +template std::map remove_duplicate_states(std::vector *states) { std::map replacements; @@ -46,13 +46,12 @@ std::map remove_duplicate_states(std::vector *states) } for (StateType &state : *states) - state.each_action([&duplicates, &new_replacements](ActionType *action) { - if (action->type == advance_action) { + state.each_advance_action( + [&duplicates, &new_replacements](ActionType *action) { auto new_replacement = new_replacements.find(action->state_index); if (new_replacement != new_replacements.end()) action->state_index = new_replacement->second; - } - }); + }); for (auto i = duplicates.rbegin(); i != duplicates.rend(); ++i) states->erase(states->begin() + i->first); diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index 5a54f032..3eb8ca5e 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -253,14 +253,18 @@ class CCodeGenerator { } void add_lex_state(const LexState &lex_state) { - auto expected_inputs = lex_state.expected_inputs(); if (lex_state.is_token_start) line("START_TOKEN();"); - for (const auto &pair : lex_state.actions) + + for (const auto &pair : lex_state.advance_actions) if (!pair.first.is_empty()) _if([&]() { add_character_set_condition(pair.first); }, - [&]() { add_lex_actions(pair.second, expected_inputs); }); - add_lex_actions(lex_state.default_action, expected_inputs); + [&]() { add_advance_action(pair.second); }); + + if (lex_state.accept_action.is_present()) + add_accept_token_action(lex_state.accept_action); + else + line("LEX_ERROR();"); } void add_character_set_condition(const rules::CharacterSet &rule) { @@ -304,23 +308,15 @@ class CCodeGenerator { } } - void add_lex_actions(const LexAction &action, - const set &expected_inputs) { - switch (action.type) { - case LexActionTypeAdvance: - line("ADVANCE(" + to_string(action.state_index) + ");"); - break; - case LexActionTypeAccept: - line("ACCEPT_TOKEN(" + symbol_id(action.symbol) + ");"); - break; - case LexActionTypeAcceptFragile: - line("ACCEPT_FRAGILE_TOKEN(" + symbol_id(action.symbol) + ");"); - break; - case LexActionTypeError: - line("LEX_ERROR();"); - break; - default: {} - } + void add_advance_action(const AdvanceAction &action) { + line("ADVANCE(" + to_string(action.state_index) + ");"); + } + + void add_accept_token_action(const AcceptTokenAction &action) { + if (action.is_fragile) + line("ACCEPT_FRAGILE_TOKEN(" + symbol_id(action.symbol) + ");"); + else + line("ACCEPT_TOKEN(" + symbol_id(action.symbol) + ");"); } void add_parse_action_list() { diff --git a/src/compiler/lex_table.cc b/src/compiler/lex_table.cc index 8a57d69e..0d7bec4c 100644 --- a/src/compiler/lex_table.cc +++ b/src/compiler/lex_table.cc @@ -1,5 +1,6 @@ #include "compiler/lex_table.h" #include "compiler/rules/symbol.h" +#include "compiler/rules/built_in_symbols.h" namespace tree_sitter { @@ -11,56 +12,53 @@ using std::set; using rules::Symbol; using rules::CharacterSet; -LexAction::LexAction() - : type(LexActionTypeError), - symbol(Symbol(-1)), - state_index(-1), - precedence_range({ 0, 0 }) {} +AdvanceAction::AdvanceAction() : state_index(-1) {} -LexAction::LexAction(LexActionType type, size_t state_index, Symbol symbol, - PrecedenceRange precedence_range, bool is_string) - : type(type), - symbol(symbol), - state_index(state_index), - precedence_range(precedence_range), - is_string(is_string) {} +AdvanceAction::AdvanceAction(size_t state_index, + PrecedenceRange precedence_range) + : state_index(state_index), precedence_range(precedence_range) {} -LexAction LexAction::Error() { - return LexAction(LexActionTypeError, -1, Symbol(-1), { 0, 0 }, false); +bool AdvanceAction::operator==(const AdvanceAction &other) const { + return (state_index == other.state_index) && + (precedence_range == other.precedence_range); } -LexAction LexAction::Advance(size_t state_index, - PrecedenceRange precedence_range) { - return LexAction(LexActionTypeAdvance, state_index, Symbol(-1), - precedence_range, false); +AcceptTokenAction::AcceptTokenAction() + : symbol(rules::NONE()), precedence(0), is_string(false), is_fragile(false) {} + +AcceptTokenAction::AcceptTokenAction(Symbol symbol, int precedence, + bool is_string) + : symbol(symbol), + precedence(precedence), + is_string(is_string), + is_fragile(false) {} + +bool AcceptTokenAction::is_present() const { + return symbol != rules::NONE(); } -LexAction LexAction::Accept(Symbol symbol, int precedence, bool is_string) { - return LexAction(LexActionTypeAccept, -1, symbol, { precedence, precedence }, - is_string); -} - -bool LexAction::operator==(const LexAction &other) const { - return (type == other.type) && (state_index == other.state_index) && - (symbol == other.symbol); +bool AcceptTokenAction::operator==(const AcceptTokenAction &other) const { + return (symbol == other.symbol) && (precedence == other.precedence) && + (is_string == other.is_string) && (is_fragile == other.is_fragile); } LexState::LexState() : is_token_start(false) {} set LexState::expected_inputs() const { set result; - for (auto &pair : actions) + for (auto &pair : advance_actions) result.insert(pair.first); return result; } bool LexState::operator==(const LexState &other) const { - return actions == other.actions && default_action == other.default_action && + return advance_actions == other.advance_actions && + accept_action == other.accept_action && is_token_start == other.is_token_start; } -void LexState::each_action(function fn) { - for (auto &entry : actions) +void LexState::each_advance_action(function fn) { + for (auto &entry : advance_actions) fn(&entry.second); } diff --git a/src/compiler/lex_table.h b/src/compiler/lex_table.h index 5e739628..708774d7 100644 --- a/src/compiler/lex_table.h +++ b/src/compiler/lex_table.h @@ -18,38 +18,32 @@ typedef enum { LexActionTypeAdvance } LexActionType; -class LexAction { - LexAction(LexActionType type, size_t state_index, rules::Symbol symbol, - PrecedenceRange precedence_range, bool is_string); +struct AdvanceAction { + AdvanceAction(); + AdvanceAction(size_t, PrecedenceRange); - public: - LexAction(); - static LexAction Accept(rules::Symbol symbol, int precedence, bool is_string); - static LexAction Error(); - static LexAction Advance(size_t state_index, PrecedenceRange precedence_range); - bool operator==(const LexAction &action) const; + bool operator==(const AdvanceAction &action) const; - LexActionType type; - rules::Symbol symbol; size_t state_index; PrecedenceRange precedence_range; +}; + +struct AcceptTokenAction { + AcceptTokenAction(); + AcceptTokenAction(rules::Symbol, int, bool); + + bool is_present() const; + bool operator==(const AcceptTokenAction &action) const; + + rules::Symbol symbol; + int precedence; bool is_string; + bool is_fragile; }; } // namespace tree_sitter -namespace std { - -template <> -struct hash { - size_t operator()(const tree_sitter::LexAction &action) const { - return (hash()(action.type) ^ - hash()(action.symbol) ^ - hash()(action.state_index)); - } -}; - -} // namespace std +namespace std {} // namespace std namespace tree_sitter { @@ -58,10 +52,10 @@ class LexState { LexState(); std::set expected_inputs() const; bool operator==(const LexState &) const; - void each_action(std::function); + void each_advance_action(std::function); - std::map actions; - LexAction default_action; + std::map advance_actions; + AcceptTokenAction accept_action; bool is_token_start; }; diff --git a/src/compiler/parse_table.cc b/src/compiler/parse_table.cc index cc92c3d3..81e77df6 100644 --- a/src/compiler/parse_table.cc +++ b/src/compiler/parse_table.cc @@ -128,10 +128,11 @@ set ParseState::expected_inputs() const { return result; } -void ParseState::each_action(function fn) { +void ParseState::each_advance_action(function fn) { for (auto &entry : actions) for (ParseAction &action : entry.second) - fn(&action); + if (action.type == ParseActionTypeShift) + fn(&action); } bool ParseState::operator==(const ParseState &other) const { diff --git a/src/compiler/parse_table.h b/src/compiler/parse_table.h index df61a8a6..c8bf124f 100644 --- a/src/compiler/parse_table.h +++ b/src/compiler/parse_table.h @@ -81,7 +81,7 @@ class ParseState { ParseState(); std::set expected_inputs() const; bool operator==(const ParseState &) const; - void each_action(std::function); + void each_advance_action(std::function); std::map> actions; LexStateId lex_state_id;