Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery states in the parse table. For each token T, there was an error recovery state in which the parser looked for *any* token that could follow T. Unfortunately, sometimes the set of tokens that could follow T contained conflicts. For example, in JS, the token '}' can be followed by the open-ended 'template_chars' token, but also by ordinary tokens like 'identifier'. So with the old algorithm, when recovering from an unexpected '}' token, the lexer had no way to distinguish identifiers from template_chars. This commit drops the error recovery states. Instead, when we encounter an unexpected token T, we recover from the error by finding a previous state S in the stack in which T would be valid, popping all of the nodes after S, and wrapping them in an error. This way, the lexer is always invoked in a normal parse state, in which it is looking for a non-conflicting set of tokens. Eliminating the error recovery states also shrinks the lex state machine significantly. Signed-off-by: Rick Winfrey <rewinfrey@github.com>
This commit is contained in:
parent
8b3941764f
commit
99d048e016
15 changed files with 327 additions and 639 deletions
|
|
@ -2,33 +2,22 @@
|
|||
#include "runtime/tree.h"
|
||||
#include "runtime/error_costs.h"
|
||||
|
||||
static const TSParseAction SHIFT_ERROR = {
|
||||
.type = TSParseActionTypeShift,
|
||||
.params.state = ERROR_STATE,
|
||||
};
|
||||
|
||||
void ts_language_table_entry(const TSLanguage *self, TSStateId state,
|
||||
TSSymbol symbol, TableEntry *result) {
|
||||
uint32_t action_index;
|
||||
if (symbol == ts_builtin_sym_error) {
|
||||
if (state == ERROR_STATE) {
|
||||
result->action_count = 1;
|
||||
result->is_reusable = false;
|
||||
result->depends_on_lookahead = false;
|
||||
result->actions = &SHIFT_ERROR;
|
||||
return;
|
||||
}
|
||||
action_index = 0;
|
||||
result->action_count = 0;
|
||||
result->is_reusable = false;
|
||||
result->actions = NULL;
|
||||
return;
|
||||
} else {
|
||||
assert(symbol < self->token_count);
|
||||
action_index = self->parse_table[state * self->symbol_count + symbol];
|
||||
uint32_t action_index = self->parse_table[state * self->symbol_count + symbol];
|
||||
const TSParseActionEntry *entry = &self->parse_actions[action_index];
|
||||
result->action_count = entry->count;
|
||||
result->is_reusable = entry->reusable;
|
||||
result->depends_on_lookahead = entry->depends_on_lookahead;
|
||||
result->actions = (const TSParseAction *)(entry + 1);
|
||||
}
|
||||
|
||||
const TSParseActionEntry *entry = &self->parse_actions[action_index];
|
||||
result->action_count = entry->count;
|
||||
result->is_reusable = entry->reusable;
|
||||
result->depends_on_lookahead = entry->depends_on_lookahead;
|
||||
result->actions = (const TSParseAction *)(entry + 1);
|
||||
}
|
||||
|
||||
uint32_t ts_language_symbol_count(const TSLanguage *language) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue