Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery states in the parse table. For each token T, there was an error recovery state in which the parser looked for *any* token that could follow T. Unfortunately, sometimes the set of tokens that could follow T contained conflicts. For example, in JS, the token '}' can be followed by the open-ended 'template_chars' token, but also by ordinary tokens like 'identifier'. So with the old algorithm, when recovering from an unexpected '}' token, the lexer had no way to distinguish identifiers from template_chars. This commit drops the error recovery states. Instead, when we encounter an unexpected token T, we recover from the error by finding a previous state S in the stack in which T would be valid, popping all of the nodes after S, and wrapping them in an error. This way, the lexer is always invoked in a normal parse state, in which it is looking for a non-conflicting set of tokens. Eliminating the error recovery states also shrinks the lex state machine significantly. Signed-off-by: Rick Winfrey <rewinfrey@github.com>
This commit is contained in:
parent
8b3941764f
commit
99d048e016
15 changed files with 327 additions and 639 deletions
39
test/fixtures/error_corpus/c_errors.txt
vendored
39
test/fixtures/error_corpus/c_errors.txt
vendored
|
|
@ -9,9 +9,11 @@ int x // no semicolon
|
|||
int a;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int c() { return 5; }
|
||||
|
||||
int b;
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
@ -23,20 +25,23 @@ int c;
|
|||
---
|
||||
|
||||
(translation_unit
|
||||
(preproc_ifdef (identifier)
|
||||
(preproc_ifdef
|
||||
(identifier)
|
||||
(ERROR (type_identifier) (identifier))
|
||||
(comment))
|
||||
|
||||
(declaration (type_identifier) (identifier))
|
||||
|
||||
(preproc_ifdef (identifier)
|
||||
(ERROR (string_literal)))
|
||||
|
||||
(declaration (type_identifier) (identifier))
|
||||
|
||||
(preproc_ifdef (identifier)
|
||||
(ERROR))
|
||||
|
||||
(preproc_ifdef
|
||||
(identifier)
|
||||
(linkage_specification
|
||||
(string_literal)
|
||||
(declaration_list
|
||||
(ERROR)
|
||||
(function_definition
|
||||
(type_identifier)
|
||||
(function_declarator (identifier) (parameter_list))
|
||||
(compound_statement (return_statement (number_literal))))
|
||||
(declaration (type_identifier) (identifier))
|
||||
(ERROR (identifier)))))
|
||||
(declaration (type_identifier) (identifier)))
|
||||
|
||||
========================================
|
||||
|
|
@ -76,8 +81,8 @@ int main() {
|
|||
(declaration (type_identifier) (init_declarator
|
||||
(identifier)
|
||||
(parenthesized_expression
|
||||
(ERROR (number_literal))
|
||||
(number_literal)))))))
|
||||
(number_literal)
|
||||
(ERROR (number_literal))))))))
|
||||
|
||||
========================================
|
||||
Errors in declarations
|
||||
|
|
@ -124,13 +129,15 @@ int b() {
|
|||
(compound_statement
|
||||
(declaration
|
||||
(type_identifier)
|
||||
(ERROR (identifier))
|
||||
(init_declarator
|
||||
(identifier)
|
||||
(ERROR (identifier) (identifier))
|
||||
(ERROR (identifier))
|
||||
(number_literal)))
|
||||
(declaration
|
||||
(type_identifier)
|
||||
(ERROR (identifier))
|
||||
(init_declarator
|
||||
(identifier)
|
||||
(ERROR (identifier) (identifier))
|
||||
(ERROR (identifier))
|
||||
(number_literal))))))
|
||||
|
|
|
|||
34
test/fixtures/error_corpus/javascript_errors.txt
vendored
34
test/fixtures/error_corpus/javascript_errors.txt
vendored
|
|
@ -12,12 +12,13 @@ e f;
|
|||
(program
|
||||
(if_statement
|
||||
(parenthesized_expression
|
||||
(ERROR (identifier))
|
||||
(identifier))
|
||||
(identifier)
|
||||
(ERROR (identifier)))
|
||||
(statement_block
|
||||
(ERROR (identifier))
|
||||
(expression_statement (identifier))))
|
||||
(expression_statement (ERROR (identifier)) (identifier)))
|
||||
(ERROR (identifier))
|
||||
(expression_statement (identifier)))
|
||||
|
||||
=======================================================
|
||||
multiple invalid tokens right after the viable prefix
|
||||
|
|
@ -33,16 +34,13 @@ h i j k;
|
|||
(program
|
||||
(if_statement
|
||||
(parenthesized_expression
|
||||
(ERROR (identifier))
|
||||
(identifier)
|
||||
(ERROR (identifier)))
|
||||
(ERROR (identifier) (identifier)))
|
||||
(statement_block
|
||||
(expression_statement
|
||||
(identifier)
|
||||
(ERROR (jsx_attribute (property_identifier)) (jsx_attribute (property_identifier)) (identifier)))))
|
||||
(expression_statement
|
||||
(identifier)
|
||||
(ERROR (jsx_attribute (property_identifier)) (jsx_attribute (property_identifier)) (identifier))))
|
||||
(ERROR (identifier) (identifier) (identifier))
|
||||
(expression_statement (identifier))))
|
||||
(ERROR (identifier) (identifier) (identifier))
|
||||
(expression_statement (identifier)))
|
||||
|
||||
===================================================
|
||||
one invalid subtree right after the viable prefix
|
||||
|
|
@ -136,3 +134,17 @@ var x = !!!
|
|||
(function (identifier) (formal_parameters) (statement_block))
|
||||
(function (identifier) (formal_parameters) (statement_block))
|
||||
(ERROR (identifier)))
|
||||
|
||||
=========================================================
|
||||
Errors inside of a template string substitution
|
||||
=========================================================
|
||||
|
||||
const a = `b c ${d +} f g`
|
||||
|
||||
---
|
||||
|
||||
(program
|
||||
(lexical_declaration
|
||||
(variable_declarator
|
||||
(identifier)
|
||||
(template_string (template_substitution (identifier) (ERROR))))))
|
||||
|
|
|
|||
|
|
@ -166,7 +166,7 @@ describe("Parser", [&]() {
|
|||
ts_document_set_language(document, load_real_language("javascript"));
|
||||
set_text("a; ' this string never ends");
|
||||
assert_root_node(
|
||||
"(ERROR (program (expression_statement (identifier))) (UNEXPECTED EOF))");
|
||||
"(program (expression_statement (identifier)) (ERROR (UNEXPECTED EOF)))");
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -198,7 +198,7 @@ describe("Parser", [&]() {
|
|||
|
||||
free(string);
|
||||
|
||||
assert_root_node("(ERROR (UNEXPECTED INVALID))");
|
||||
assert_root_node("(program (ERROR (UNEXPECTED INVALID)))");
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -124,12 +124,6 @@ describe("Stack", [&]() {
|
|||
{1, 3},
|
||||
})));
|
||||
});
|
||||
|
||||
it("increments the version's push count", [&]() {
|
||||
AssertThat(ts_stack_push_count(stack, 0), Equals<unsigned>(0));
|
||||
ts_stack_push(stack, 0, trees[0], false, stateA);
|
||||
AssertThat(ts_stack_push_count(stack, 0), Equals<unsigned>(1));
|
||||
});
|
||||
});
|
||||
|
||||
describe("merge()", [&]() {
|
||||
|
|
@ -221,7 +215,6 @@ describe("Stack", [&]() {
|
|||
// ↑
|
||||
// └─*
|
||||
StackPopResult pop = ts_stack_pop_count(stack, 0, 2);
|
||||
AssertThat(pop.stopped_at_error, Equals(false));
|
||||
AssertThat(pop.slices.size, Equals<size_t>(1));
|
||||
AssertThat(ts_stack_version_count(stack), Equals<size_t>(2));
|
||||
|
||||
|
|
@ -240,7 +233,6 @@ describe("Stack", [&]() {
|
|||
// ↑
|
||||
// └─*
|
||||
StackPopResult pop = ts_stack_pop_count(stack, 0, 2);
|
||||
AssertThat(pop.stopped_at_error, Equals(false));
|
||||
AssertThat(pop.slices.size, Equals<size_t>(1));
|
||||
|
||||
StackSlice slice = pop.slices.contents[0];
|
||||
|
|
@ -250,40 +242,6 @@ describe("Stack", [&]() {
|
|||
free_slice_array(&pop.slices);
|
||||
});
|
||||
|
||||
it("stops popping entries early if it reaches an error tree", [&]() {
|
||||
// . <──0── A <──1── B <──2── C <──3── ERROR <──4── D*
|
||||
ts_stack_push(stack, 0, trees[3], false, ERROR_STATE);
|
||||
ts_stack_push(stack, 0, trees[4], false, stateD);
|
||||
|
||||
// . <──0── A <──1── B <──2── C <──3── ERROR <──4── D*
|
||||
// ↑
|
||||
// └─*
|
||||
StackPopResult pop = ts_stack_pop_count(stack, 0, 3);
|
||||
AssertThat(pop.stopped_at_error, Equals(true));
|
||||
|
||||
AssertThat(ts_stack_version_count(stack), Equals<size_t>(2));
|
||||
AssertThat(ts_stack_top_state(stack, 1), Equals(ERROR_STATE));
|
||||
|
||||
AssertThat(pop.slices.size, Equals<size_t>(1));
|
||||
StackSlice slice = pop.slices.contents[0];
|
||||
AssertThat(slice.version, Equals<StackVersion>(1));
|
||||
AssertThat(slice.trees, Equals(vector<Tree *>({ trees[4] })));
|
||||
|
||||
free_slice_array(&pop.slices);
|
||||
});
|
||||
|
||||
it("preserves the push count of the popped version", [&]() {
|
||||
// . <──0── A <──1── B <──2── C*
|
||||
// ↑
|
||||
// └─*
|
||||
StackPopResult pop = ts_stack_pop_count(stack, 0, 2);
|
||||
|
||||
AssertThat(ts_stack_push_count(stack, 0), Equals<unsigned>(3));
|
||||
AssertThat(ts_stack_push_count(stack, 1), Equals<unsigned>(3));
|
||||
|
||||
free_slice_array(&pop.slices);
|
||||
});
|
||||
|
||||
describe("when the version has been merged", [&]() {
|
||||
before_each([&]() {
|
||||
// . <──0── A <──1── B <──2── C <──3── D <──10── I*
|
||||
|
|
@ -475,7 +433,6 @@ describe("Stack", [&]() {
|
|||
ts_stack_push(stack, 0, trees[1], true, stateB);
|
||||
|
||||
StackPopResult pop = ts_stack_pop_pending(stack, 0);
|
||||
AssertThat(pop.stopped_at_error, Equals(false));
|
||||
AssertThat(pop.slices.size, Equals<size_t>(1));
|
||||
|
||||
AssertThat(get_stack_entries(stack, 0), Equals(vector<StackEntry>({
|
||||
|
|
@ -496,7 +453,6 @@ describe("Stack", [&]() {
|
|||
ts_stack_push(stack, 0, trees[3], false, stateB);
|
||||
|
||||
StackPopResult pop = ts_stack_pop_pending(stack, 0);
|
||||
AssertThat(pop.stopped_at_error, Equals(false));
|
||||
AssertThat(pop.slices.size, Equals<size_t>(1));
|
||||
|
||||
AssertThat(pop.slices.contents[0].trees, Equals(vector<Tree *>({ trees[1], trees[2], trees[3] })));
|
||||
|
|
@ -513,7 +469,6 @@ describe("Stack", [&]() {
|
|||
ts_stack_push(stack, 0, trees[1], false, stateB);
|
||||
|
||||
StackPopResult pop = ts_stack_pop_pending(stack, 0);
|
||||
AssertThat(pop.stopped_at_error, Equals(false));
|
||||
AssertThat(pop.slices.size, Equals<size_t>(0));
|
||||
|
||||
AssertThat(get_stack_entries(stack, 0), Equals(vector<StackEntry>({
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue