diff --git a/src/runtime/parser.c b/src/runtime/parser.c index c4fbfe5c..0bdefcbf 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -341,7 +341,22 @@ static Subtree ts_parser__lex(TSParser *self, StackVersion version, TSStateId pa self->lexer.data.mark_end(&self->lexer.data); } - if (!error_mode || self->lexer.token_end_position.bytes > current_position.bytes) { + // Zero-length external tokens are generally allowed, but they're not + // allowed right after a syntax error. This is for two reasons: + // 1. After a syntax error, the lexer is looking for any possible token, + // as opposed to the specific set of tokens that are valid in some + // parse state. In this situation, it's very easy for an external + // scanner to produce unwanted zero-length tokens. + // 2. The parser sometimes inserts *missing* tokens to recover from + // errors. These tokens are also zero-length. If we allow more + // zero-length tokens to be created after missing tokens, it + // can lead to infinite loops. Forbidding zero-length tokens + // right at the point of error recovery is a conservative strategy + // for preventing this kind of infinite loop. + if ( + self->lexer.token_end_position.bytes > current_position.bytes || + (!error_mode && ts_stack_has_advanced_since_error(self->stack, version)) + ) { found_external_token = true; break; } @@ -470,7 +485,7 @@ static Subtree ts_parser__lex(TSParser *self, StackVersion version, TSStateId pa LOG( "lexed_lookahead sym:%s, size:%u", SYM_NAME(ts_subtree_symbol(result)), - ts_subtree_size(result).bytes + ts_subtree_total_size(result).bytes ); return result; } diff --git a/src/runtime/stack.c b/src/runtime/stack.c index 4828163a..1cb526eb 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -580,6 +580,27 @@ int ts_stack_dynamic_precedence(Stack *self, StackVersion version) { return array_get(&self->heads, version)->node->dynamic_precedence; } +bool ts_stack_has_advanced_since_error(const Stack *self, StackVersion version) { + const StackHead *head = array_get(&self->heads, version); + const StackNode *node = head->node; + if (node->error_cost == 0) return true; + while (node) { + if (node->link_count > 0) { + Subtree subtree = node->links[0].subtree; + if (subtree.ptr) { + if (ts_subtree_total_bytes(subtree) > 0) { + return true; + } else if (node->node_count > head->node_count_at_last_error) { + node = node->links[0].node; + continue; + } + } + } + break; + } + return false; +} + void ts_stack_remove_version(Stack *self, StackVersion version) { stack_head_delete(array_get(&self->heads, version), &self->node_pool, self->subtree_pool); array_erase(&self->heads, version); diff --git a/src/runtime/stack.h b/src/runtime/stack.h index dab0360d..272bb4ee 100644 --- a/src/runtime/stack.h +++ b/src/runtime/stack.h @@ -79,6 +79,8 @@ unsigned ts_stack_node_count_since_error(const Stack *, StackVersion); int ts_stack_dynamic_precedence(Stack *, StackVersion); +bool ts_stack_has_advanced_since_error(const Stack *, StackVersion); + // Compute a summary of all the parse states near the top of the given // version of the stack and store the summary for later retrieval. void ts_stack_record_summary(Stack *, StackVersion, unsigned max_depth);