Add guard to prevent infinite loops in error recovery

This commit is contained in:
Max Brunsfeld 2018-11-08 11:29:21 -08:00
parent 0f887429ae
commit 361fd6ecea
3 changed files with 40 additions and 2 deletions

View file

@ -341,7 +341,22 @@ static Subtree ts_parser__lex(TSParser *self, StackVersion version, TSStateId pa
self->lexer.data.mark_end(&self->lexer.data);
}
if (!error_mode || self->lexer.token_end_position.bytes > current_position.bytes) {
// Zero-length external tokens are generally allowed, but they're not
// allowed right after a syntax error. This is for two reasons:
// 1. After a syntax error, the lexer is looking for any possible token,
// as opposed to the specific set of tokens that are valid in some
// parse state. In this situation, it's very easy for an external
// scanner to produce unwanted zero-length tokens.
// 2. The parser sometimes inserts *missing* tokens to recover from
// errors. These tokens are also zero-length. If we allow more
// zero-length tokens to be created after missing tokens, it
// can lead to infinite loops. Forbidding zero-length tokens
// right at the point of error recovery is a conservative strategy
// for preventing this kind of infinite loop.
if (
self->lexer.token_end_position.bytes > current_position.bytes ||
(!error_mode && ts_stack_has_advanced_since_error(self->stack, version))
) {
found_external_token = true;
break;
}
@ -470,7 +485,7 @@ static Subtree ts_parser__lex(TSParser *self, StackVersion version, TSStateId pa
LOG(
"lexed_lookahead sym:%s, size:%u",
SYM_NAME(ts_subtree_symbol(result)),
ts_subtree_size(result).bytes
ts_subtree_total_size(result).bytes
);
return result;
}

View file

@ -580,6 +580,27 @@ int ts_stack_dynamic_precedence(Stack *self, StackVersion version) {
return array_get(&self->heads, version)->node->dynamic_precedence;
}
bool ts_stack_has_advanced_since_error(const Stack *self, StackVersion version) {
const StackHead *head = array_get(&self->heads, version);
const StackNode *node = head->node;
if (node->error_cost == 0) return true;
while (node) {
if (node->link_count > 0) {
Subtree subtree = node->links[0].subtree;
if (subtree.ptr) {
if (ts_subtree_total_bytes(subtree) > 0) {
return true;
} else if (node->node_count > head->node_count_at_last_error) {
node = node->links[0].node;
continue;
}
}
}
break;
}
return false;
}
void ts_stack_remove_version(Stack *self, StackVersion version) {
stack_head_delete(array_get(&self->heads, version), &self->node_pool, self->subtree_pool);
array_erase(&self->heads, version);

View file

@ -79,6 +79,8 @@ unsigned ts_stack_node_count_since_error(const Stack *, StackVersion);
int ts_stack_dynamic_precedence(Stack *, StackVersion);
bool ts_stack_has_advanced_since_error(const Stack *, StackVersion);
// Compute a summary of all the parse states near the top of the given
// version of the stack and store the summary for later retrieval.
void ts_stack_record_summary(Stack *, StackVersion, unsigned max_depth);