Resolve token conflicts by tokens' order in grammar
This commit is contained in:
parent
2a5dd3434f
commit
aac0786449
10 changed files with 52 additions and 48 deletions
|
|
@ -6,10 +6,7 @@
|
|||
using namespace rules;
|
||||
using build_tables::build_tables;
|
||||
|
||||
typedef set<ParseAction> parse_actions;
|
||||
typedef set<LexAction> lex_actions;
|
||||
|
||||
static set<Symbol> keys(const map<Symbol, parse_actions> &map) {
|
||||
static set<Symbol> keys(const map<Symbol, ParseAction> &map) {
|
||||
set<Symbol> result;
|
||||
for (auto pair : map) {
|
||||
result.insert(pair.first);
|
||||
|
|
|
|||
|
|
@ -59,7 +59,13 @@ namespace tree_sitter {
|
|||
void add_accept_token_actions(const LexItemSet &item_set, LexStateId state_id) {
|
||||
for (LexItem item : item_set) {
|
||||
if (item.is_done()) {
|
||||
lex_table.add_default_action(state_id, LexAction::Accept(item.lhs));
|
||||
const Symbol &new_symbol = item.lhs;
|
||||
auto &action = lex_table.states[state_id].default_action;
|
||||
if (action.type == LexActionTypeAccept) {
|
||||
const Symbol &old_symbol = action.symbol;
|
||||
if (lex_grammar.index_of(new_symbol) >= lex_grammar.index_of(old_symbol)) continue;
|
||||
}
|
||||
lex_table.add_default_action(state_id, LexAction::Accept(new_symbol));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -125,20 +125,19 @@ namespace tree_sitter {
|
|||
return result;
|
||||
}
|
||||
|
||||
string code_for_parse_actions(const rules::Symbol &symbol, const set<ParseAction> &actions) {
|
||||
string code_for_parse_actions(const rules::Symbol &symbol, const ParseAction &action) {
|
||||
string sym_id = symbol_id(symbol);
|
||||
auto action = actions.begin();
|
||||
switch (action->type) {
|
||||
switch (action.type) {
|
||||
case ParseActionTypeAccept:
|
||||
return "ACCEPT_INPUT(" + sym_id + ")";
|
||||
case ParseActionTypeShift:
|
||||
return "SHIFT(" + sym_id + ", " + to_string(action->state_index) + ")";
|
||||
return "SHIFT(" + sym_id + ", " + to_string(action.state_index) + ")";
|
||||
case ParseActionTypeReduce:
|
||||
return "REDUCE(" +
|
||||
sym_id + ", " +
|
||||
symbol_id(action->symbol) + ", " +
|
||||
to_string(action->child_flags.size()) + ", " +
|
||||
"COLLAPSE({" + collapse_flags(action->child_flags) + "}))";
|
||||
symbol_id(action.symbol) + ", " +
|
||||
to_string(action.child_flags.size()) + ", " +
|
||||
"COLLAPSE({" + collapse_flags(action.child_flags) + "}))";
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
|
|
@ -156,20 +155,15 @@ namespace tree_sitter {
|
|||
return result;
|
||||
}
|
||||
|
||||
string code_for_lex_actions(const set<LexAction> &actions,
|
||||
string code_for_lex_actions(const LexAction &action,
|
||||
const set<rules::CharacterSet> &expected_inputs) {
|
||||
auto action = actions.begin();
|
||||
if (action == actions.end()) {
|
||||
return "LEX_ERROR();";
|
||||
} else {
|
||||
switch (action->type) {
|
||||
case LexActionTypeAdvance:
|
||||
return "ADVANCE(" + to_string(action->state_index) + ");";
|
||||
case LexActionTypeAccept:
|
||||
return "ACCEPT_TOKEN(" + symbol_id(action->symbol) + ");";
|
||||
case LexActionTypeError:
|
||||
return "";
|
||||
}
|
||||
switch (action.type) {
|
||||
case LexActionTypeAdvance:
|
||||
return "ADVANCE(" + to_string(action.state_index) + ");";
|
||||
case LexActionTypeAccept:
|
||||
return "ACCEPT_TOKEN(" + symbol_id(action.symbol) + ");";
|
||||
case LexActionTypeError:
|
||||
return "LEX_ERROR();";
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -177,9 +171,10 @@ namespace tree_sitter {
|
|||
string result = "";
|
||||
auto expected_inputs = parse_state.expected_inputs();
|
||||
for (auto pair : parse_state.actions)
|
||||
result += _if(condition_for_character_rule(pair.first),
|
||||
code_for_lex_actions(pair.second, expected_inputs));
|
||||
result += code_for_lex_actions(parse_state.default_actions, expected_inputs);
|
||||
if (!pair.first.is_empty())
|
||||
result += _if(condition_for_character_rule(pair.first),
|
||||
code_for_lex_actions(pair.second, expected_inputs));
|
||||
result += code_for_lex_actions(parse_state.default_action, expected_inputs);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ namespace tree_sitter {
|
|||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
string Grammar::start_rule_name() const {
|
||||
return rules.front().first;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,6 +8,8 @@ namespace tree_sitter {
|
|||
using rules::Symbol;
|
||||
using rules::CharacterSet;
|
||||
|
||||
LexAction::LexAction() : LexAction(LexActionTypeError, -1, Symbol("")) {}
|
||||
|
||||
LexAction::LexAction(LexActionType type, size_t state_index, Symbol symbol) :
|
||||
type(type),
|
||||
symbol(symbol),
|
||||
|
|
@ -71,11 +73,11 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
void LexTable::add_action(LexStateId id, CharacterSet match, LexAction action) {
|
||||
state(this, id).actions[match].insert(action);
|
||||
state(this, id).actions[match] = action;
|
||||
}
|
||||
|
||||
void LexTable::add_default_action(LexStateId id, LexAction action) {
|
||||
state(this, id).default_actions.insert(action);
|
||||
state(this, id).default_action = action;
|
||||
}
|
||||
|
||||
const LexStateId LexTable::ERROR_STATE_ID = -1;
|
||||
|
|
|
|||
|
|
@ -10,14 +10,15 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
typedef enum {
|
||||
LexActionTypeAccept,
|
||||
LexActionTypeError,
|
||||
LexActionTypeAccept,
|
||||
LexActionTypeAdvance
|
||||
} LexActionType;
|
||||
|
||||
class LexAction {
|
||||
LexAction(LexActionType type, size_t state_index, rules::Symbol symbol);
|
||||
public:
|
||||
LexAction();
|
||||
static LexAction Accept(rules::Symbol symbol);
|
||||
static LexAction Error();
|
||||
static LexAction Advance(size_t state_index);
|
||||
|
|
@ -46,8 +47,8 @@ namespace std {
|
|||
namespace tree_sitter {
|
||||
class LexState {
|
||||
public:
|
||||
std::map<rules::CharacterSet, std::set<LexAction>> actions;
|
||||
std::set<LexAction> default_actions;
|
||||
std::map<rules::CharacterSet, LexAction> actions;
|
||||
LexAction default_action;
|
||||
std::set<rules::CharacterSet> expected_inputs() const;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -73,18 +73,11 @@ namespace tree_sitter {
|
|||
|
||||
ostream& operator<<(ostream &stream, const ParseState &state) {
|
||||
stream << string("#<parse_state ");
|
||||
bool started1 = false;
|
||||
bool started = false;
|
||||
for (auto pair : state.actions) {
|
||||
if (started1) stream << string(", ");
|
||||
stream << pair.first << string(" => #<set: ");
|
||||
bool started2 = false;
|
||||
for (auto action : pair.second) {
|
||||
if (started2) stream << string(", ");
|
||||
stream << action;
|
||||
started2 = true;
|
||||
}
|
||||
stream << string(">");
|
||||
started1 = true;
|
||||
if (started) stream << string(", ");
|
||||
stream << pair.first << string(" => ") << pair.second;
|
||||
started = true;
|
||||
}
|
||||
stream << string(">");
|
||||
return stream;
|
||||
|
|
@ -97,6 +90,6 @@ namespace tree_sitter {
|
|||
|
||||
void ParseTable::add_action(ParseStateId id, Symbol symbol, ParseAction action) {
|
||||
symbols.insert(symbol);
|
||||
states[id].actions[symbol].insert(action);
|
||||
states[id].actions.insert({ symbol, action });
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,8 +10,8 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
typedef enum {
|
||||
ParseActionTypeAccept,
|
||||
ParseActionTypeError,
|
||||
ParseActionTypeAccept,
|
||||
ParseActionTypeShift,
|
||||
ParseActionTypeReduce,
|
||||
} ParseActionType;
|
||||
|
|
@ -55,7 +55,7 @@ namespace tree_sitter {
|
|||
class ParseState {
|
||||
public:
|
||||
ParseState();
|
||||
std::map<rules::Symbol, std::set<ParseAction>> actions;
|
||||
std::map<rules::Symbol, ParseAction> actions;
|
||||
std::set<rules::Symbol> expected_inputs() const;
|
||||
LexStateId lex_state_id;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -44,6 +44,15 @@ namespace tree_sitter {
|
|||
return rule(symbol).get() != nullptr;
|
||||
}
|
||||
|
||||
size_t PreparedGrammar::index_of(const rules::Symbol &symbol) const {
|
||||
for (size_t i = 0; i < rules.size(); i++) {
|
||||
if (rules[i].first == symbol.name) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
ostream& operator<<(ostream &stream, const PreparedGrammar &grammar) {
|
||||
stream << string("#<grammar");
|
||||
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ namespace tree_sitter {
|
|||
bool operator==(const PreparedGrammar &other) const;
|
||||
bool has_definition(const rules::Symbol &symbol) const;
|
||||
const rules::rule_ptr rule(const rules::Symbol &symbol) const;
|
||||
size_t index_of(const rules::Symbol &symbol) const;
|
||||
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> aux_rules;
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue