Refine logic for deciding when tokens need to be re-lexed

* While generating the lex table, note which tokens can match the
  same string. A token needs to be relexed when it has possible
  homonyms in the current state.
* Also note which tokens can match substrings of each other tokens.
  A token needs to be relexed when there are viable tokens that
  could match longer strings in the current state and the next
  token has been edited.
* Remove the logic for marking tokens as fragile on creation.
* Store the reusability/non-reusability of symbols off of individual
  actions and onto the entire entry for the state & symbol.
This commit is contained in:
Max Brunsfeld 2016-06-21 07:28:04 -07:00
parent 45f7cee0c8
commit 38c144b4a3
19 changed files with 337 additions and 257 deletions

View file

@ -72,7 +72,7 @@ class CCodeGenerator {
const SyntaxGrammar syntax_grammar;
const LexicalGrammar lexical_grammar;
map<string, string> sanitized_names;
vector<pair<size_t, vector<ParseAction>>> parse_actions;
vector<pair<size_t, ParseTableEntry>> parse_table_entries;
vector<pair<size_t, set<rules::Symbol>>> in_progress_symbols;
size_t next_parse_action_list_index;
size_t next_in_progress_symbol_list_index;
@ -155,35 +155,28 @@ class CCodeGenerator {
for (const auto &entry : parse_table.symbols) {
const rules::Symbol &symbol = entry.first;
line("[" + symbol_id(symbol) + "] = {");
indent([&]() {
switch (symbol_type(symbol)) {
case VariableTypeNamed:
line(".visible = true,");
line(".named = true,");
break;
case VariableTypeAnonymous:
line(".visible = true,");
line(".named = false,");
break;
case VariableTypeHidden:
case VariableTypeAuxiliary:
line(".visible = false,");
line(".named = false,");
break;
}
switch (symbol_type(symbol)) {
case VariableTypeNamed:
add(".visible = true, .named = true");
break;
case VariableTypeAnonymous:
add(".visible = true, .named = false");
break;
case VariableTypeHidden:
case VariableTypeAuxiliary:
add(".visible = false, .named = false");
break;
}
line(".structural = " + _boolean(entry.second.structural) + ",");
line(".extra = " + _boolean(entry.second.extra) + ",");
});
add(", ");
if (entry.second.structural)
add(".structural = true");
else
add(".structural = false");
add(", ");
if (syntax_grammar.extra_tokens.count(symbol))
add(".extra = true");
else
add(".extra = false");
add("},");
line("},");
}
});
line("};");
@ -221,11 +214,10 @@ class CCodeGenerator {
void add_recovery_parse_states_list() {
line("static TSParseAction ts_recovery_actions[SYMBOL_COUNT] = {");
indent([&]() {
for (const auto &entry : parse_table.error_state.actions) {
const rules::Symbol &symbol = entry.first;
if (!entry.second.empty()) {
line("[" + symbol_id(symbol) + "] = ");
ParseAction action = entry.second[0];
for (const auto &entry : parse_table.error_state.entries) {
if (!entry.second.actions.empty()) {
line("[" + symbol_id(entry.first) + "] = ");
ParseAction action = entry.second.actions[0];
if (action.extra) {
add("RECOVER_EXTRA(),");
} else {
@ -239,7 +231,8 @@ class CCodeGenerator {
}
void add_parse_table() {
add_parse_action_list_id({ ParseAction::Error() });
add_parse_action_list_id(
ParseTableEntry{ { ParseAction::Error() }, true, false });
size_t state_id = 0;
line("#pragma GCC diagnostic push");
@ -251,9 +244,9 @@ class CCodeGenerator {
for (const auto &state : parse_table.states) {
line("[" + to_string(state_id++) + "] = {");
indent([&]() {
for (const auto &pair : state.actions) {
line("[" + symbol_id(pair.first) + "] = ");
add(to_string(add_parse_action_list_id(pair.second)));
for (const auto &entry : state.entries) {
line("[" + symbol_id(entry.first) + "] = ");
add(to_string(add_parse_action_list_id(entry.second)));
add(",");
}
});
@ -338,22 +331,21 @@ class CCodeGenerator {
}
void add_accept_token_action(const AcceptTokenAction &action) {
if (action.is_fragile)
line("ACCEPT_FRAGILE_TOKEN(" + symbol_id(action.symbol) + ");");
else
line("ACCEPT_TOKEN(" + symbol_id(action.symbol) + ");");
line("ACCEPT_TOKEN(" + symbol_id(action.symbol) + ");");
}
void add_parse_action_list() {
line("static TSParseActionEntry ts_parse_actions[] = {");
indent([&]() {
for (const auto &pair : parse_actions) {
for (const auto &pair : parse_table_entries) {
size_t index = pair.first;
line("[" + to_string(index) + "] = {.count = " +
to_string(pair.second.size()) + "},");
to_string(pair.second.actions.size()) + ", .reusable = " +
_boolean(pair.second.reusable) + ", .depends_on_lookahead = " +
_boolean(pair.second.depends_on_lookahead) + "},");
for (const ParseAction &action : pair.second) {
for (const ParseAction &action : pair.second.actions) {
add(" ");
switch (action.type) {
case ParseActionTypeError:
@ -366,19 +358,18 @@ class CCodeGenerator {
if (action.extra) {
add("SHIFT_EXTRA()");
} else {
add("SHIFT(" + to_string(action.state_index) + ", ");
add_action_flags(action);
add(")");
add("SHIFT(" + to_string(action.state_index) + ")");
}
break;
case ParseActionTypeReduce:
if (action.extra) {
add("REDUCE_EXTRA(" + symbol_id(action.symbol) + ")");
} else if (action.fragile) {
add("REDUCE_FRAGILE(" + symbol_id(action.symbol) + ", " +
to_string(action.consumed_symbol_count) + ")");
} else {
add("REDUCE(" + symbol_id(action.symbol) + ", " +
to_string(action.consumed_symbol_count) + ", ");
add_action_flags(action);
add(")");
to_string(action.consumed_symbol_count) + ")");
}
break;
default: {}
@ -391,16 +382,16 @@ class CCodeGenerator {
line("};");
}
size_t add_parse_action_list_id(const vector<ParseAction> &actions) {
for (const auto &pair : parse_actions) {
if (pair.second == actions) {
size_t add_parse_action_list_id(const ParseTableEntry &entry) {
for (const auto &pair : parse_table_entries) {
if (pair.second == entry) {
return pair.first;
}
}
size_t result = next_parse_action_list_index;
parse_actions.push_back({ next_parse_action_list_index, actions });
next_parse_action_list_index += 1 + actions.size();
parse_table_entries.push_back({ next_parse_action_list_index, entry });
next_parse_action_list_index += 1 + entry.actions.size();
return result;
}
@ -417,17 +408,6 @@ class CCodeGenerator {
return result;
}
void add_action_flags(const ParseAction &action) {
if (action.fragile && action.can_hide_split)
add("FRAGILE|CAN_HIDE_SPLIT");
else if (action.fragile)
add("FRAGILE");
else if (action.can_hide_split)
add("CAN_HIDE_SPLIT");
else
add("0");
}
// Helper functions
string symbol_id(const rules::Symbol &symbol) {