From aac07864495b1086d455a89c93bb0b0eb76018ab Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 24 Mar 2014 19:18:06 -0700 Subject: [PATCH] Resolve token conflicts by tokens' order in grammar --- .../build_tables/build_tables_spec.cc | 5 +-- src/compiler/build_tables/build_tables.cc | 8 +++- src/compiler/generate_code/c_code.cc | 41 ++++++++----------- src/compiler/grammar.cc | 2 +- src/compiler/lex_table.cc | 6 ++- src/compiler/lex_table.h | 7 ++-- src/compiler/parse_table.cc | 17 +++----- src/compiler/parse_table.h | 4 +- src/compiler/prepared_grammar.cc | 9 ++++ src/compiler/prepared_grammar.h | 1 + 10 files changed, 52 insertions(+), 48 deletions(-) diff --git a/spec/compiler/build_tables/build_tables_spec.cc b/spec/compiler/build_tables/build_tables_spec.cc index 1122ac40..b7e2adde 100644 --- a/spec/compiler/build_tables/build_tables_spec.cc +++ b/spec/compiler/build_tables/build_tables_spec.cc @@ -6,10 +6,7 @@ using namespace rules; using build_tables::build_tables; -typedef set parse_actions; -typedef set lex_actions; - -static set keys(const map &map) { +static set keys(const map &map) { set result; for (auto pair : map) { result.insert(pair.first); diff --git a/src/compiler/build_tables/build_tables.cc b/src/compiler/build_tables/build_tables.cc index 0e698afb..d8fb9566 100644 --- a/src/compiler/build_tables/build_tables.cc +++ b/src/compiler/build_tables/build_tables.cc @@ -59,7 +59,13 @@ namespace tree_sitter { void add_accept_token_actions(const LexItemSet &item_set, LexStateId state_id) { for (LexItem item : item_set) { if (item.is_done()) { - lex_table.add_default_action(state_id, LexAction::Accept(item.lhs)); + const Symbol &new_symbol = item.lhs; + auto &action = lex_table.states[state_id].default_action; + if (action.type == LexActionTypeAccept) { + const Symbol &old_symbol = action.symbol; + if (lex_grammar.index_of(new_symbol) >= lex_grammar.index_of(old_symbol)) continue; + } + lex_table.add_default_action(state_id, LexAction::Accept(new_symbol)); } } } diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index e36b56d7..a99255da 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -125,20 +125,19 @@ namespace tree_sitter { return result; } - string code_for_parse_actions(const rules::Symbol &symbol, const set &actions) { + string code_for_parse_actions(const rules::Symbol &symbol, const ParseAction &action) { string sym_id = symbol_id(symbol); - auto action = actions.begin(); - switch (action->type) { + switch (action.type) { case ParseActionTypeAccept: return "ACCEPT_INPUT(" + sym_id + ")"; case ParseActionTypeShift: - return "SHIFT(" + sym_id + ", " + to_string(action->state_index) + ")"; + return "SHIFT(" + sym_id + ", " + to_string(action.state_index) + ")"; case ParseActionTypeReduce: return "REDUCE(" + sym_id + ", " + - symbol_id(action->symbol) + ", " + - to_string(action->child_flags.size()) + ", " + - "COLLAPSE({" + collapse_flags(action->child_flags) + "}))"; + symbol_id(action.symbol) + ", " + + to_string(action.child_flags.size()) + ", " + + "COLLAPSE({" + collapse_flags(action.child_flags) + "}))"; default: return ""; } @@ -156,20 +155,15 @@ namespace tree_sitter { return result; } - string code_for_lex_actions(const set &actions, + string code_for_lex_actions(const LexAction &action, const set &expected_inputs) { - auto action = actions.begin(); - if (action == actions.end()) { - return "LEX_ERROR();"; - } else { - switch (action->type) { - case LexActionTypeAdvance: - return "ADVANCE(" + to_string(action->state_index) + ");"; - case LexActionTypeAccept: - return "ACCEPT_TOKEN(" + symbol_id(action->symbol) + ");"; - case LexActionTypeError: - return ""; - } + switch (action.type) { + case LexActionTypeAdvance: + return "ADVANCE(" + to_string(action.state_index) + ");"; + case LexActionTypeAccept: + return "ACCEPT_TOKEN(" + symbol_id(action.symbol) + ");"; + case LexActionTypeError: + return "LEX_ERROR();"; } } @@ -177,9 +171,10 @@ namespace tree_sitter { string result = ""; auto expected_inputs = parse_state.expected_inputs(); for (auto pair : parse_state.actions) - result += _if(condition_for_character_rule(pair.first), - code_for_lex_actions(pair.second, expected_inputs)); - result += code_for_lex_actions(parse_state.default_actions, expected_inputs); + if (!pair.first.is_empty()) + result += _if(condition_for_character_rule(pair.first), + code_for_lex_actions(pair.second, expected_inputs)); + result += code_for_lex_actions(parse_state.default_action, expected_inputs); return result; } diff --git a/src/compiler/grammar.cc b/src/compiler/grammar.cc index d6f46fac..d571fb34 100644 --- a/src/compiler/grammar.cc +++ b/src/compiler/grammar.cc @@ -21,7 +21,7 @@ namespace tree_sitter { return true; } - + string Grammar::start_rule_name() const { return rules.front().first; } diff --git a/src/compiler/lex_table.cc b/src/compiler/lex_table.cc index 18cbf48c..870df8f0 100644 --- a/src/compiler/lex_table.cc +++ b/src/compiler/lex_table.cc @@ -8,6 +8,8 @@ namespace tree_sitter { using rules::Symbol; using rules::CharacterSet; + LexAction::LexAction() : LexAction(LexActionTypeError, -1, Symbol("")) {} + LexAction::LexAction(LexActionType type, size_t state_index, Symbol symbol) : type(type), symbol(symbol), @@ -71,11 +73,11 @@ namespace tree_sitter { } void LexTable::add_action(LexStateId id, CharacterSet match, LexAction action) { - state(this, id).actions[match].insert(action); + state(this, id).actions[match] = action; } void LexTable::add_default_action(LexStateId id, LexAction action) { - state(this, id).default_actions.insert(action); + state(this, id).default_action = action; } const LexStateId LexTable::ERROR_STATE_ID = -1; diff --git a/src/compiler/lex_table.h b/src/compiler/lex_table.h index c58c21f1..8f158462 100644 --- a/src/compiler/lex_table.h +++ b/src/compiler/lex_table.h @@ -10,14 +10,15 @@ namespace tree_sitter { typedef enum { - LexActionTypeAccept, LexActionTypeError, + LexActionTypeAccept, LexActionTypeAdvance } LexActionType; class LexAction { LexAction(LexActionType type, size_t state_index, rules::Symbol symbol); public: + LexAction(); static LexAction Accept(rules::Symbol symbol); static LexAction Error(); static LexAction Advance(size_t state_index); @@ -46,8 +47,8 @@ namespace std { namespace tree_sitter { class LexState { public: - std::map> actions; - std::set default_actions; + std::map actions; + LexAction default_action; std::set expected_inputs() const; }; diff --git a/src/compiler/parse_table.cc b/src/compiler/parse_table.cc index 7f1b9f2d..b073e232 100644 --- a/src/compiler/parse_table.cc +++ b/src/compiler/parse_table.cc @@ -73,18 +73,11 @@ namespace tree_sitter { ostream& operator<<(ostream &stream, const ParseState &state) { stream << string("# #"); - started1 = true; + if (started) stream << string(", "); + stream << pair.first << string(" => ") << pair.second; + started = true; } stream << string(">"); return stream; @@ -97,6 +90,6 @@ namespace tree_sitter { void ParseTable::add_action(ParseStateId id, Symbol symbol, ParseAction action) { symbols.insert(symbol); - states[id].actions[symbol].insert(action); + states[id].actions.insert({ symbol, action }); } } diff --git a/src/compiler/parse_table.h b/src/compiler/parse_table.h index f3dc8eb8..5b688da8 100644 --- a/src/compiler/parse_table.h +++ b/src/compiler/parse_table.h @@ -10,8 +10,8 @@ namespace tree_sitter { typedef enum { - ParseActionTypeAccept, ParseActionTypeError, + ParseActionTypeAccept, ParseActionTypeShift, ParseActionTypeReduce, } ParseActionType; @@ -55,7 +55,7 @@ namespace tree_sitter { class ParseState { public: ParseState(); - std::map> actions; + std::map actions; std::set expected_inputs() const; LexStateId lex_state_id; }; diff --git a/src/compiler/prepared_grammar.cc b/src/compiler/prepared_grammar.cc index 73d822c1..ac9584d6 100644 --- a/src/compiler/prepared_grammar.cc +++ b/src/compiler/prepared_grammar.cc @@ -44,6 +44,15 @@ namespace tree_sitter { return rule(symbol).get() != nullptr; } + size_t PreparedGrammar::index_of(const rules::Symbol &symbol) const { + for (size_t i = 0; i < rules.size(); i++) { + if (rules[i].first == symbol.name) { + return i; + } + } + return -1; + } + ostream& operator<<(ostream &stream, const PreparedGrammar &grammar) { stream << string("#> aux_rules; };