diff --git a/spec/runtime/languages/arithmetic/errors.txt b/spec/runtime/languages/arithmetic/errors.txt index 9a5a803a..c8675ba4 100644 --- a/spec/runtime/languages/arithmetic/errors.txt +++ b/spec/runtime/languages/arithmetic/errors.txt @@ -6,7 +6,7 @@ x * * y --- -(variable) (ERROR '*') +(variable) (ERROR (UNEXPECTED '*') (variable)) ===================================================== errors inside parenthesized expressions @@ -17,5 +17,6 @@ x + (y * + z) * 5 --- (sum - (variable) - (product (group (ERROR '+')) (number))) + (variable) + (product + (group (ERROR (variable) (UNEXPECTED '+') (variable))) (number))) diff --git a/spec/runtime/languages/javascript/errors.txt b/spec/runtime/languages/javascript/errors.txt index 5170b4d9..77ab2aac 100644 --- a/spec/runtime/languages/javascript/errors.txt +++ b/spec/runtime/languages/javascript/errors.txt @@ -6,7 +6,7 @@ stuff(|||); --- -(expression_statement (function_call (identifier) (ERROR '|'))) +(expression_statement (function_call (identifier) (ERROR (UNEXPECTED '|')))) ========================================== errors in if statements @@ -21,8 +21,9 @@ moreStuff(); --- (program - (expression_statement (function_call (identifier))) - (if_statement (ERROR '*') - (statement_block (expression_statement (ERROR '*')))) - (expression_statement (function_call (identifier)))) - + (expression_statement (function_call (identifier))) + (if_statement + (ERROR (UNEXPECTED '*') (identifier)) + (statement_block + (expression_statement (ERROR (UNEXPECTED '*') (identifier) (identifier))))) + (expression_statement (function_call (identifier)))) diff --git a/spec/runtime/languages/json/errors.txt b/spec/runtime/languages/json/errors.txt index 8082598a..2220b911 100644 --- a/spec/runtime/languages/json/errors.txt +++ b/spec/runtime/languages/json/errors.txt @@ -6,7 +6,7 @@ top-level errors --- -(ERROR '}') +(ERROR (UNEXPECTED '}')) ========================================== unexpected tokens @@ -16,7 +16,7 @@ barf --- -(ERROR 'b') +(ERROR (UNEXPECTED 'b')) ========================================== errors inside arrays @@ -27,7 +27,7 @@ errors inside arrays --- (array (number) - (ERROR ',') + (ERROR (UNEXPECTED ',')) (number)) ========================================== @@ -38,7 +38,7 @@ errors inside objects --- -(object (string) (number) (ERROR 'o')) +(object (string) (number) (ERROR (UNEXPECTED 'o'))) ========================================== errors inside nested objects @@ -49,6 +49,8 @@ errors inside nested objects --- (object - (string) (object (string) (number) (ERROR '2')) - (ERROR '[') + (string) (object + (string) (number) + (ERROR (UNEXPECTED '2') (number))) + (ERROR (UNEXPECTED '[')) (string) (number)) diff --git a/spec/runtime/parser_spec.cc b/spec/runtime/parser_spec.cc index b5807ea2..d363d3a7 100644 --- a/spec/runtime/parser_spec.cc +++ b/spec/runtime/parser_spec.cc @@ -76,13 +76,13 @@ describe("Parser", [&]() { set_text(" [123, @@@@@, true]"); AssertThat(ts_node_string(root), Equals( - "(DOCUMENT (array (number) (ERROR '@') (true)))")); + "(DOCUMENT (array (number) (ERROR (UNEXPECTED '@')) (true)))")); TSNode *array = ts_node_child(root, 0); TSNode *error = ts_node_child(array, 1); TSNode *last = ts_node_child(array, 2); - AssertThat(ts_node_name(error), Equals("error")); + AssertThat(ts_node_name(error), Equals("ERROR")); AssertThat(ts_node_pos(error).bytes, Equals(strlen(" [123, "))) AssertThat(ts_node_size(error).bytes, Equals(strlen("@@@@@"))) @@ -100,13 +100,13 @@ describe("Parser", [&]() { set_text(" [123, faaaaalse, true]"); AssertThat(ts_node_string(root), Equals( - "(DOCUMENT (array (number) (ERROR 'a') (true)))")); + "(DOCUMENT (array (number) (ERROR (UNEXPECTED 'a')) (true)))")); TSNode *array = ts_node_child(root, 0); TSNode *error = ts_node_child(array, 1); TSNode *last = ts_node_child(array, 2); - AssertThat(ts_node_name(error), Equals("error")); + AssertThat(ts_node_name(error), Equals("ERROR")); AssertThat(ts_node_pos(error).bytes, Equals(strlen(" [123, "))) AssertThat(ts_node_size(error).bytes, Equals(strlen("faaaaalse"))) @@ -124,13 +124,13 @@ describe("Parser", [&]() { set_text(" [123, true false, true]"); AssertThat(ts_node_string(root), Equals( - "(DOCUMENT (array (number) (ERROR 'f') (true)))")); + "(DOCUMENT (array (number) (ERROR (true) (UNEXPECTED 'f') (false)) (true)))")); TSNode *array = ts_node_child(root, 0); TSNode *error = ts_node_child(array, 1); TSNode *last = ts_node_child(array, 2); - AssertThat(ts_node_name(error), Equals("error")); + AssertThat(ts_node_name(error), Equals("ERROR")); AssertThat(ts_node_pos(error).bytes, Equals(strlen(" [123, "))); AssertThat(ts_node_size(error).bytes, Equals(strlen("true false"))); @@ -148,13 +148,13 @@ describe("Parser", [&]() { set_text(" [123, , true]"); AssertThat(ts_node_string(root), Equals( - "(DOCUMENT (array (number) (ERROR ',') (true)))")); + "(DOCUMENT (array (number) (ERROR (UNEXPECTED ',')) (true)))")); TSNode *array = ts_node_child(root, 0); TSNode *error = ts_node_child(array, 1); TSNode *last = ts_node_child(array, 2); - AssertThat(ts_node_name(error), Equals("error")); + AssertThat(ts_node_name(error), Equals("ERROR")); AssertThat(ts_node_pos(error).bytes, Equals(strlen(" [123, "))); AssertThat(ts_node_size(error).bytes, Equals(0)) @@ -286,7 +286,7 @@ describe("Parser", [&]() { insert_text(strlen("var x = y"), " *"); AssertThat(ts_node_string(root), Equals( - "(DOCUMENT (var_declaration (ERROR ';')))")); + "(DOCUMENT (var_declaration (ERROR (identifier) (identifier) (UNEXPECTED ';'))))")); insert_text(strlen("var x = y *"), " z"); @@ -296,25 +296,6 @@ describe("Parser", [&]() { }); }); - describe("fixing an error", [&]() { - it("doesn't try to reuse the error node", [&]() { - ts_document_set_language(doc, ts_language_javascript()); - set_text( - "var y = z\n" - "var x = y;"); - - AssertThat(ts_node_string(root), Equals( - "(DOCUMENT (program " - "(var_declaration (var_assignment (identifier) (identifier))) " - "(var_declaration (var_assignment (identifier) (identifier)))))")); - - delete_text(strlen("var y = "), 1); - - AssertThat(ts_node_string(root), Equals( - "(DOCUMENT (var_declaration (ERROR 'x')))")); - }); - }); - describe("into the middle of an existing token", [&]() { before_each([&]() { set_text("abc * 123"); @@ -411,7 +392,7 @@ describe("Parser", [&]() { it("updates the parse tree, creating an error", [&]() { AssertThat(ts_node_string(root), Equals( - "(DOCUMENT (number) (ERROR '4'))")); + "(DOCUMENT (number) (ERROR (UNEXPECTED '4') (number)))")); }); }); }); diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 52b43052..ed825f0e 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -48,7 +48,7 @@ static TSLength break_down_left_stack(TSParser *parser, TSInputEdit edit) { size_t child_count; TSTree **children = ts_tree_children(node, &child_count); - if (left_subtree_end.chars < edit.position && !children) + if (left_subtree_end.chars < edit.position && !children && node->symbol != ts_builtin_sym_error) break; DEBUG("pop_left sym:%s, state:%u", SYM_NAME(node->symbol), @@ -107,7 +107,9 @@ static TSTree *break_down_right_stack(TSParser *parser) { TSParseAction action = get_action(parser->language, state, node->symbol); bool is_usable = (action.type != TSParseActionTypeError) && !ts_tree_is_extra(node) && - !ts_tree_is_fragile_left(node) && !ts_tree_is_fragile_right(node); + !ts_tree_is_empty(node) && + !ts_tree_is_fragile_left(node) && + !ts_tree_is_fragile_right(node); if (is_usable && right_subtree_start == current_position.chars) { ts_stack_shrink(&parser->right_stack, parser->right_stack.size - 1); return node; @@ -156,12 +158,6 @@ static TSTree *get_next_node(TSParser *parser, TSStateId lex_state) { return node; } -static void resize_error(TSParser *parser, TSTree *error) { - TSLength distance = ts_length_sub(parser->lexer.token_start_position, - ts_stack_total_tree_size(&parser->stack)); - error->size = ts_length_sub(distance, error->padding); -} - /* * Parse Actions */ @@ -179,7 +175,7 @@ static void shift_extra(TSParser *parser) { shift(parser, 0); } -static TSTree * reduce_helper(TSParser *parser, TSSymbol symbol, size_t child_count, bool extra) { +static TSTree * reduce_helper(TSParser *parser, TSSymbol symbol, size_t child_count, bool extra, bool count_extra) { /* * Walk down the stack to determine which symbols will be reduced. @@ -187,12 +183,14 @@ static TSTree * reduce_helper(TSParser *parser, TSSymbol symbol, size_t child_co * may be ubiquitous tokens, which don't count. */ TSStack *stack = &parser->stack; - for (size_t i = 0; i < child_count; i++) { - if (child_count == stack->size) - break; - TSTree *child = stack->entries[stack->size - 1 - i].node; - if (ts_tree_is_extra(child)) - child_count++; + if (!count_extra) { + for (size_t i = 0; i < child_count; i++) { + if (child_count == stack->size) + break; + TSTree *child = stack->entries[stack->size - 1 - i].node; + if (ts_tree_is_extra(child)) + child_count++; + } } size_t start_index = stack->size - child_count; @@ -215,23 +213,30 @@ static TSTree * reduce_helper(TSParser *parser, TSSymbol symbol, size_t child_co } static void reduce(TSParser *parser, TSSymbol symbol, size_t child_count) { - reduce_helper(parser, symbol, child_count, false); + reduce_helper(parser, symbol, child_count, false, false); } static void reduce_extra(TSParser *parser, TSSymbol symbol) { - TSTree *reduced = reduce_helper(parser, symbol, 1, true); + TSTree *reduced = reduce_helper(parser, symbol, 1, true, false); ts_tree_set_extra(reduced); } static void reduce_fragile(TSParser *parser, TSSymbol symbol, size_t child_count) { - TSTree *reduced = reduce_helper(parser, symbol, child_count, false); + TSTree *reduced = reduce_helper(parser, symbol, child_count, false, false); + ts_tree_set_fragile_left(reduced); + ts_tree_set_fragile_right(reduced); +} + +static void reduce_error(TSParser *parser, size_t child_count) { + TSTree *reduced = reduce_helper(parser, ts_builtin_sym_error, child_count, false, true); + reduced->size = ts_length_add(reduced->size, parser->lookahead->padding); + parser->lookahead->padding = ts_length_zero(); ts_tree_set_fragile_left(reduced); ts_tree_set_fragile_right(reduced); } static int handle_error(TSParser *parser) { - TSTree *error = parser->lookahead; - ts_tree_retain(error); + size_t index_before_error = parser->stack.size - 1; for (;;) { @@ -239,10 +244,10 @@ static int handle_error(TSParser *parser) { * Unwind the parse stack until a state is found in which an error is * expected and the current lookahead token is expected afterwards. */ - TS_STACK_FROM_TOP(parser->stack, entry) { - TSStateId stack_state = entry->state; - TSParseAction action_on_error = - get_action(parser->language, stack_state, ts_builtin_sym_error); + for (size_t i = index_before_error; i + 1 > 0; i--) { + TSStateId stack_state = parser->stack.entries[i].state; + TSParseAction action_on_error = get_action( + parser->language, stack_state, ts_builtin_sym_error); if (action_on_error.type == TSParseActionTypeShift) { TSStateId state_after_error = action_on_error.data.to_state; @@ -250,14 +255,8 @@ static int handle_error(TSParser *parser) { parser->language, state_after_error, parser->lookahead->symbol); if (action_after_error.type != TSParseActionTypeError) { - DEBUG("recover state:%u", state_after_error); - - ts_stack_shrink(&parser->stack, entry - parser->stack.entries + 1); - parser->lookahead->padding = ts_length_zero(); - - resize_error(parser, error); - ts_stack_push(&parser->stack, state_after_error, error); - ts_tree_release(error); + DEBUG("recover state:%u, count:%lu", state_after_error, parser->stack.size - i); + reduce_error(parser, parser->stack.size - i - 1); return 1; } } @@ -265,38 +264,25 @@ static int handle_error(TSParser *parser) { /* * If there is no state in the stack for which we can recover with the - * current lookahead token, advance to the next token. If no characters - * were consumed, advance the lexer to the next character. + * current lookahead token, advance to the next token. */ - DEBUG("skip_token"); - if (parser->lookahead) - ts_tree_release(parser->lookahead); + DEBUG("skip token:%s", SYM_NAME(parser->lookahead->symbol)); + shift(parser, ts_stack_top_state(&parser->stack)); parser->lookahead = get_next_node(parser, ts_lex_state_error); /* - * If the current lookahead character cannot be the start of any token, - * just skip it. If the end of input is reached, exit. + * If the end of input is reached, exit. */ if (parser->lookahead->symbol == ts_builtin_sym_end) { DEBUG("fail_to_recover"); - - resize_error(parser, error); - ts_stack_push(&parser->stack, 0, error); - ts_tree_release(error); + reduce_error(parser, parser->stack.size - index_before_error - 1); return 0; } } } static TSTree *finish(TSParser *parser) { - if (parser->stack.size == 0) { - TSTree *err = ts_tree_make_error(ts_length_zero(), ts_length_zero(), 0); - ts_stack_push(&parser->stack, 0, err); - } - reduce(parser, ts_builtin_sym_document, parser->stack.size); - parser->lookahead->options = 0; - shift(parser, 0); return parser->stack.entries[0].node; } diff --git a/src/runtime/stack.h b/src/runtime/stack.h index bbf3580d..eb7905ec 100644 --- a/src/runtime/stack.h +++ b/src/runtime/stack.h @@ -26,10 +26,6 @@ TSStateId ts_stack_top_state(const TSStack *stack); TSTree *ts_stack_top_node(const TSStack *stack); TSLength ts_stack_total_tree_size(const TSStack *stack); -#define TS_STACK_FROM_TOP(stack, entry) \ - for (TSStackEntry *entry = (stack).entries + (stack).size - 1; \ - entry >= (stack).entries; entry--) - #ifdef __cplusplus } #endif diff --git a/src/runtime/tree.c b/src/runtime/tree.c index 31fccd5f..49f3e676 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -56,15 +56,21 @@ TSTree *ts_tree_make_node(TSSymbol symbol, size_t child_count, * Mark the tree as hidden if it wraps a single child node. */ TSTreeOptions options = 0; - if (is_hidden) - options |= TSTreeOptionsHidden; - if (child_count == 1 && - (ts_tree_is_visible(children[0]) || ts_tree_is_wrapper(children[0]))) - options |= (TSTreeOptionsWrapper | TSTreeOptionsHidden); - if (child_count > 0 && ts_tree_is_fragile_left(children[0])) - options |= (TSTreeOptionsFragileLeft); - if (child_count > 0 && ts_tree_is_fragile_right(children[child_count - 1])) - options |= (TSTreeOptionsFragileRight); + if (symbol == ts_builtin_sym_error) { + options |= (TSTreeOptionsFragileLeft | TSTreeOptionsFragileRight); + } else { + if (is_hidden) + options |= TSTreeOptionsHidden; + if (child_count == 1 && + (ts_tree_is_visible(children[0]) || ts_tree_is_wrapper(children[0]))) + options |= (TSTreeOptionsWrapper | TSTreeOptionsHidden); + if (child_count > 0) { + if (ts_tree_is_fragile_left(children[0])) + options |= (TSTreeOptionsFragileLeft); + if (ts_tree_is_fragile_right(children[child_count - 1])) + options |= (TSTreeOptionsFragileRight); + } + } /* * Store the visible child array adjacent to the tree itself. This avoids @@ -162,7 +168,7 @@ TSTree **ts_tree_children(const TSTree *tree, size_t *count) { } TSTreeChild *ts_tree_visible_children(const TSTree *tree, size_t *count) { - if (tree->symbol == ts_builtin_sym_error) { + if (tree->child_count == 0) { if (count) *count = 0; return NULL; @@ -196,14 +202,15 @@ static size_t tree_write_to_string(const TSTree *tree, const char **symbol_names cursor += snprintf(*writer, limit, " "); if (visible) { - if (tree->symbol == ts_builtin_sym_error) { - cursor += snprintf(*writer, limit, "(ERROR "); + if (tree->symbol == ts_builtin_sym_error && tree->child_count == 0) { + cursor += snprintf(*writer, limit, "(UNEXPECTED "); cursor += write_lookahead_to_string(*writer, limit, tree->lookahead_char); } else { cursor += snprintf(*writer, limit, "(%s", symbol_names[tree->symbol]); } } + for (size_t i = 0; i < tree->child_count; i++) { TSTree *child = tree->children[i]; cursor += tree_write_to_string(child, symbol_names, *writer, limit, 0); diff --git a/src/runtime/tree.h b/src/runtime/tree.h index f230990c..9b39e710 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -21,10 +21,10 @@ struct TSTree { TSTreeOptions options; TSLength padding; TSLength size; + size_t child_count; union { struct { struct TSTree **children; - size_t child_count; size_t visible_child_count; }; char lookahead_char; @@ -85,6 +85,10 @@ TSTree **ts_tree_children(const TSTree *tree, size_t *count); TSTreeChild *ts_tree_visible_children(const TSTree *tree, size_t *count); TSLength ts_tree_total_size(const TSTree *tree); +static inline bool ts_tree_is_empty(TSTree *tree) { + return ts_tree_total_size(tree).bytes == 0; +} + #ifdef __cplusplus } #endif diff --git a/todo.md b/todo.md index 449ac5e1..8373b9be 100644 --- a/todo.md +++ b/todo.md @@ -14,7 +14,6 @@ TODO of their parent node. * Error handling - * Preserve tokens within error nodes * Try to minimize size of error node by looking ahead a few tokens * Grammar Features