Retain information about the lexer's lookahead for the token where an error was detected
This commit is contained in:
parent
0bdd9b640c
commit
0fb864c1a0
2 changed files with 101 additions and 96 deletions
193
lib/src/parser.c
193
lib/src/parser.c
|
|
@ -1060,88 +1060,6 @@ static bool ts_parser__do_all_potential_reductions(
|
|||
return can_shift_lookahead_symbol;
|
||||
}
|
||||
|
||||
static void ts_parser__handle_error(
|
||||
TSParser *self,
|
||||
StackVersion version,
|
||||
Subtree lookahead
|
||||
) {
|
||||
uint32_t previous_version_count = ts_stack_version_count(self->stack);
|
||||
|
||||
// Perform any reductions that can happen in this state, regardless of the lookahead. After
|
||||
// skipping one or more invalid tokens, the parser might find a token that would have allowed
|
||||
// a reduction to take place.
|
||||
ts_parser__do_all_potential_reductions(self, version, 0);
|
||||
uint32_t version_count = ts_stack_version_count(self->stack);
|
||||
Length position = ts_stack_position(self->stack, version);
|
||||
|
||||
// Push a discontinuity onto the stack. Merge all of the stack versions that
|
||||
// were created in the previous step.
|
||||
bool did_insert_missing_token = false;
|
||||
for (StackVersion v = version; v < version_count;) {
|
||||
if (!did_insert_missing_token) {
|
||||
TSStateId state = ts_stack_state(self->stack, v);
|
||||
for (TSSymbol missing_symbol = 1;
|
||||
missing_symbol < self->language->token_count;
|
||||
missing_symbol++) {
|
||||
TSStateId state_after_missing_symbol = ts_language_next_state(
|
||||
self->language, state, missing_symbol
|
||||
);
|
||||
if (state_after_missing_symbol == 0 || state_after_missing_symbol == state) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ts_language_has_reduce_action(
|
||||
self->language,
|
||||
state_after_missing_symbol,
|
||||
ts_subtree_leaf_symbol(lookahead)
|
||||
)) {
|
||||
// In case the parser is currently outside of any included range, the lexer will
|
||||
// snap to the beginning of the next included range. The missing token's padding
|
||||
// must be assigned to position it within the next included range.
|
||||
ts_lexer_reset(&self->lexer, position);
|
||||
ts_lexer_mark_end(&self->lexer);
|
||||
Length padding = length_sub(self->lexer.token_end_position, position);
|
||||
|
||||
StackVersion version_with_missing_tree = ts_stack_copy_version(self->stack, v);
|
||||
Subtree missing_tree = ts_subtree_new_missing_leaf(
|
||||
&self->tree_pool, missing_symbol, padding, self->language
|
||||
);
|
||||
ts_stack_push(
|
||||
self->stack, version_with_missing_tree,
|
||||
missing_tree, false,
|
||||
state_after_missing_symbol
|
||||
);
|
||||
|
||||
if (ts_parser__do_all_potential_reductions(
|
||||
self, version_with_missing_tree,
|
||||
ts_subtree_leaf_symbol(lookahead)
|
||||
)) {
|
||||
LOG(
|
||||
"recover_with_missing symbol:%s, state:%u",
|
||||
SYM_NAME(missing_symbol),
|
||||
ts_stack_state(self->stack, version_with_missing_tree)
|
||||
);
|
||||
did_insert_missing_token = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ts_stack_push(self->stack, v, NULL_SUBTREE, false, ERROR_STATE);
|
||||
v = (v == version) ? previous_version_count : v + 1;
|
||||
}
|
||||
|
||||
for (unsigned i = previous_version_count; i < version_count; i++) {
|
||||
bool did_merge = ts_stack_merge(self->stack, version, previous_version_count);
|
||||
assert(did_merge);
|
||||
}
|
||||
|
||||
ts_stack_record_summary(self->stack, version, MAX_SUMMARY_DEPTH);
|
||||
ts_subtree_release(&self->tree_pool, lookahead);
|
||||
LOG_STACK();
|
||||
}
|
||||
|
||||
static bool ts_parser__recover_to_state(
|
||||
TSParser *self,
|
||||
StackVersion version,
|
||||
|
|
@ -1369,6 +1287,98 @@ static void ts_parser__recover(
|
|||
}
|
||||
}
|
||||
|
||||
static void ts_parser__handle_error(
|
||||
TSParser *self,
|
||||
StackVersion version,
|
||||
Subtree lookahead
|
||||
) {
|
||||
uint32_t previous_version_count = ts_stack_version_count(self->stack);
|
||||
|
||||
// Perform any reductions that can happen in this state, regardless of the lookahead. After
|
||||
// skipping one or more invalid tokens, the parser might find a token that would have allowed
|
||||
// a reduction to take place.
|
||||
ts_parser__do_all_potential_reductions(self, version, 0);
|
||||
uint32_t version_count = ts_stack_version_count(self->stack);
|
||||
Length position = ts_stack_position(self->stack, version);
|
||||
|
||||
// Push a discontinuity onto the stack. Merge all of the stack versions that
|
||||
// were created in the previous step.
|
||||
bool did_insert_missing_token = false;
|
||||
for (StackVersion v = version; v < version_count;) {
|
||||
if (!did_insert_missing_token) {
|
||||
TSStateId state = ts_stack_state(self->stack, v);
|
||||
for (TSSymbol missing_symbol = 1;
|
||||
missing_symbol < self->language->token_count;
|
||||
missing_symbol++) {
|
||||
TSStateId state_after_missing_symbol = ts_language_next_state(
|
||||
self->language, state, missing_symbol
|
||||
);
|
||||
if (state_after_missing_symbol == 0 || state_after_missing_symbol == state) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ts_language_has_reduce_action(
|
||||
self->language,
|
||||
state_after_missing_symbol,
|
||||
ts_subtree_leaf_symbol(lookahead)
|
||||
)) {
|
||||
// In case the parser is currently outside of any included range, the lexer will
|
||||
// snap to the beginning of the next included range. The missing token's padding
|
||||
// must be assigned to position it within the next included range.
|
||||
ts_lexer_reset(&self->lexer, position);
|
||||
ts_lexer_mark_end(&self->lexer);
|
||||
Length padding = length_sub(self->lexer.token_end_position, position);
|
||||
|
||||
StackVersion version_with_missing_tree = ts_stack_copy_version(self->stack, v);
|
||||
Subtree missing_tree = ts_subtree_new_missing_leaf(
|
||||
&self->tree_pool, missing_symbol, padding, self->language
|
||||
);
|
||||
ts_stack_push(
|
||||
self->stack, version_with_missing_tree,
|
||||
missing_tree, false,
|
||||
state_after_missing_symbol
|
||||
);
|
||||
|
||||
if (ts_parser__do_all_potential_reductions(
|
||||
self, version_with_missing_tree,
|
||||
ts_subtree_leaf_symbol(lookahead)
|
||||
)) {
|
||||
LOG(
|
||||
"recover_with_missing symbol:%s, state:%u",
|
||||
SYM_NAME(missing_symbol),
|
||||
ts_stack_state(self->stack, version_with_missing_tree)
|
||||
);
|
||||
did_insert_missing_token = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ts_stack_push(self->stack, v, NULL_SUBTREE, false, ERROR_STATE);
|
||||
v = (v == version) ? previous_version_count : v + 1;
|
||||
}
|
||||
|
||||
for (unsigned i = previous_version_count; i < version_count; i++) {
|
||||
bool did_merge = ts_stack_merge(self->stack, version, previous_version_count);
|
||||
assert(did_merge);
|
||||
}
|
||||
|
||||
ts_stack_record_summary(self->stack, version, MAX_SUMMARY_DEPTH);
|
||||
|
||||
// Begin recovery with the current lookahead node, rather than waiting for the
|
||||
// next turn of the parse loop. This ensures that the tree accounts for the the
|
||||
// current lookahead token's "lookahead bytes" value, which describes how far
|
||||
// the lexer needed to look ahead beyond the content of the token in order to
|
||||
// recognize it.
|
||||
if (ts_subtree_child_count(lookahead) > 0) {
|
||||
ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node);
|
||||
}
|
||||
ts_parser__recover(self, version, lookahead);
|
||||
|
||||
LOG_STACK();
|
||||
}
|
||||
|
||||
static bool ts_parser__advance(
|
||||
TSParser *self,
|
||||
StackVersion version,
|
||||
|
|
@ -1511,23 +1521,18 @@ static bool ts_parser__advance(
|
|||
// on the current parse state.
|
||||
if (!lookahead.ptr) {
|
||||
needs_lex = true;
|
||||
continue;
|
||||
} else {
|
||||
ts_language_table_entry(
|
||||
self->language,
|
||||
state,
|
||||
ts_subtree_leaf_symbol(lookahead),
|
||||
&table_entry
|
||||
);
|
||||
}
|
||||
|
||||
ts_language_table_entry(
|
||||
self->language,
|
||||
state,
|
||||
ts_subtree_leaf_symbol(lookahead),
|
||||
&table_entry
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!lookahead.ptr) {
|
||||
ts_stack_pause(self->stack, version, lookahead);
|
||||
return true;
|
||||
}
|
||||
|
||||
// If there were no parse actions for the current lookahead token, then
|
||||
// it is not valid in this state. If the current lookahead token is a
|
||||
// keyword, then switch to treating it as the normal word token if that
|
||||
|
|
|
|||
4
test/fixtures/error_corpus/c_errors.txt
vendored
4
test/fixtures/error_corpus/c_errors.txt
vendored
|
|
@ -128,8 +128,8 @@ int main() {
|
|||
(declaration (primitive_type) (init_declarator
|
||||
(identifier)
|
||||
(parenthesized_expression
|
||||
(number_literal)
|
||||
(ERROR (number_literal))))))))
|
||||
(ERROR (number_literal))
|
||||
(number_literal)))))))
|
||||
|
||||
========================================
|
||||
Extra identifiers in declarations
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue