diff --git a/spec/integration/corpus_specs.cc b/spec/integration/corpus_specs.cc index 1909c280..b4612f9d 100644 --- a/spec/integration/corpus_specs.cc +++ b/spec/integration/corpus_specs.cc @@ -10,63 +10,69 @@ #include "helpers/random_helpers.h" #include -static void expect_the_correct_tree(TSNode node, TSDocument *document, string tree_string) { - const char *node_string = ts_node_string(node, document); +static void assert_correct_tree_shape(const TSDocument *document, string tree_string) { + TSNode root_node = ts_document_root_node(document); + const char *node_string = ts_node_string(root_node, document); string result(node_string); ts_free((void *)node_string); AssertThat(result, Equals(tree_string)); } -static void expect_a_consistent_tree(TSNode node, TSDocument *document) { +static void assert_consistent_sizes(TSNode node) { size_t child_count = ts_node_child_count(node); size_t start_byte = ts_node_start_byte(node); size_t end_byte = ts_node_end_byte(node); - size_t start_char = ts_node_start_char(node); - size_t end_char = ts_node_end_char(node); TSPoint start_point = ts_node_start_point(node); TSPoint end_point = ts_node_end_point(node); - bool has_changes = ts_node_has_changes(node); bool some_child_has_changes = false; AssertThat(start_byte, !IsGreaterThan(end_byte)); - AssertThat(start_char, !IsGreaterThan(end_char)); AssertThat(start_point, !IsGreaterThan(end_point)); size_t last_child_end_byte = start_byte; - size_t last_child_end_char = start_char; TSPoint last_child_end_point = start_point; for (size_t i = 0; i < child_count; i++) { TSNode child = ts_node_child(node, i); size_t child_start_byte = ts_node_start_byte(child); - size_t child_end_byte = ts_node_end_byte(child); - size_t child_start_char = ts_node_start_char(child); - size_t child_end_char = ts_node_end_char(child); TSPoint child_start_point = ts_node_start_point(child); - TSPoint child_end_point = ts_node_end_point(child); AssertThat(child_start_byte, !IsLessThan(last_child_end_byte)); - AssertThat(child_start_char, !IsLessThan(last_child_end_char)); AssertThat(child_start_point, !IsLessThan(last_child_end_point)); - last_child_end_byte = child_end_byte; - last_child_end_char = child_end_char; - last_child_end_point = child_end_point; - - expect_a_consistent_tree(child, document); - + assert_consistent_sizes(child); if (ts_node_has_changes(child)) some_child_has_changes = true; + + last_child_end_byte = ts_node_end_byte(child); + last_child_end_point = ts_node_end_point(child); } if (child_count > 0) { AssertThat(end_byte, !IsLessThan(last_child_end_byte)); AssertThat(end_point, !IsLessThan(last_child_end_point)); - - if (!has_changes) - AssertThat(end_char, !IsLessThan(last_child_end_char)); - - AssertThat(has_changes, Equals(some_child_has_changes)); } + + if (some_child_has_changes) { + AssertThat(ts_node_has_changes(node), IsTrue()); + } +} + +static void assert_correct_tree_size(TSDocument *document, string content) { + TSNode root_node = ts_document_root_node(document); + size_t expected_size = content.size(); + + // In the JSON grammar, the start rule (`_value`) is hidden, so the node + // returned from `ts_document_root_node` (e.g. an `object` node), does not + // actually point to the root of the tree. In this weird case, trailing + // whitespace is not included in the root node's size. + // + // TODO: Fix this inconsistency. Maybe disallow the start rule being hidden? + if (ts_document_language(document) == get_test_language("json") && + string(ts_node_type(root_node, document)) != "ERROR") + expected_size = content.find_last_not_of("\n ") + 1; + + AssertThat(ts_node_end_byte(root_node), Equals(expected_size)); + assert_consistent_sizes(root_node); } START_TEST @@ -105,9 +111,8 @@ describe("The Corpus", []() { input = new SpyInput(entry.input, 3); ts_document_set_input(document, input->input()); edit_sequence(); - TSNode root_node = ts_document_root_node(document); - expect_the_correct_tree(root_node, document, entry.tree_string); - expect_a_consistent_tree(root_node, document); + assert_correct_tree_shape(document, entry.tree_string); + assert_correct_tree_size(document, input->content); delete input; }); }; @@ -130,10 +135,10 @@ describe("The Corpus", []() { it_handles_edit_sequence("repairing an insertion of " + description, [&]() { ts_document_edit(document, input->replace(edit_position, 0, inserted_text)); ts_document_parse(document); - - expect_a_consistent_tree(ts_document_root_node(document), document); + assert_correct_tree_size(document, input->content); ts_document_edit(document, input->undo()); + assert_correct_tree_size(document, input->content); ts_document_parse(document); }); } @@ -144,10 +149,10 @@ describe("The Corpus", []() { it_handles_edit_sequence("repairing a deletion of " + desription, [&]() { ts_document_edit(document, input->replace(edit_position, deletion_size, "")); ts_document_parse(document); - - expect_a_consistent_tree(ts_document_root_node(document), document); + assert_correct_tree_size(document, input->content); ts_document_edit(document, input->undo()); + assert_correct_tree_size(document, input->content); ts_document_parse(document); }); } diff --git a/spec/runtime/document_spec.cc b/spec/runtime/document_spec.cc index c36c5aa3..bd07ff3c 100644 --- a/spec/runtime/document_spec.cc +++ b/spec/runtime/document_spec.cc @@ -109,7 +109,7 @@ describe("Document", [&]() { assert_node_string_equals( new_root, "(object (pair (string) (array (null) (number))))"); - AssertThat(spy_input->strings_read, Equals(vector({" [null, 2", ""}))); + AssertThat(spy_input->strings_read, Equals(vector({" [null, 2"}))); }); it("reads from the new input correctly when the old input was blank", [&]() { diff --git a/spec/runtime/node_spec.cc b/spec/runtime/node_spec.cc index 056362a6..085e4d31 100644 --- a/spec/runtime/node_spec.cc +++ b/spec/runtime/node_spec.cc @@ -138,19 +138,8 @@ describe("Node", []() { it("returns an iterator that yields each of the node's symbols", [&]() { const TSLanguage *language = ts_document_language(document); - TSSymbolIterator iterator = ts_node_symbols(array_node); - AssertThat(iterator.done, Equals(false)); - AssertThat(ts_language_symbol_name(language, iterator.value), Equals("array")); - - ts_symbol_iterator_next(&iterator); - AssertThat(iterator.done, Equals(false)); - AssertThat(ts_language_symbol_name(language, iterator.value), Equals("_value")); - - ts_symbol_iterator_next(&iterator); - AssertThat(iterator.done, Equals(true)); - TSNode false_node = ts_node_descendant_for_char_range(array_node, false_index, false_index + 1); - iterator = ts_node_symbols(false_node); + TSSymbolIterator iterator = ts_node_symbols(false_node); AssertThat(iterator.done, Equals(false)); AssertThat(ts_language_symbol_name(language, iterator.value), Equals("false")); diff --git a/spec/runtime/parser_spec.cc b/spec/runtime/parser_spec.cc index 0f0b5fa1..0e6bb97b 100644 --- a/spec/runtime/parser_spec.cc +++ b/spec/runtime/parser_spec.cc @@ -127,7 +127,6 @@ describe("Parser", [&]() { TSNode error = ts_node_named_child(root, 1); AssertThat(ts_node_symbol(error), Equals(ts_builtin_sym_error)); AssertThat(ts_node_type(error, doc), Equals("ERROR")); - AssertThat(get_node_text(error), Equals(", faaaaalse")); AssertThat(ts_node_child_count(error), Equals(2)); TSNode comma = ts_node_child(error, 0); @@ -162,6 +161,15 @@ describe("Parser", [&]() { }); }); + describe("when there is an unexpected string at the end of a token", [&]() { + it("computes the error's size and position correctly", [&]() { + set_text(" [123, \"hi\n, true]"); + + assert_root_node( + "(array (number) (ERROR (UNEXPECTED '\\n')) (true))"); + }); + }); + describe("when there is an unterminated error", [&]() { it("maintains a consistent tree", [&]() { ts_document_set_language(doc, get_test_language("javascript")); @@ -244,7 +252,7 @@ describe("Parser", [&]() { "(identifier) " "(math_op (number) (member_access (identifier) (identifier))))))"); - AssertThat(input->strings_read, Equals(vector({ " + abc.d)", "" }))); + AssertThat(input->strings_read, Equals(vector({ " + abc.d)" }))); }); }); @@ -268,7 +276,7 @@ describe("Parser", [&]() { "(number) " "(math_op (number) (math_op (number) (identifier)))))))"); - AssertThat(input->strings_read, Equals(vector({ "123 || 5 +", "" }))); + AssertThat(input->strings_read, Equals(vector({ "123 || 5 +" }))); }); }); @@ -517,7 +525,6 @@ describe("Parser", [&]() { ts_document_free(doc); doc = nullptr; - AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty()); } record_alloc::stop(); diff --git a/src/runtime/document.c b/src/runtime/document.c index edb31ca4..325534b6 100644 --- a/src/runtime/document.c +++ b/src/runtime/document.c @@ -82,7 +82,7 @@ void ts_document_edit(TSDocument *self, TSInputEdit edit) { size_t max_bytes = ts_tree_total_bytes(self->tree); if (edit.start_byte > max_bytes) - edit.start_byte = max_bytes; + return; if (edit.bytes_removed > max_bytes - edit.start_byte) edit.bytes_removed = max_bytes - edit.start_byte; diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 09a8757a..75b2b4df 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -247,7 +247,6 @@ static TSTree *parser__lex(Parser *self, TSStateId parse_state) { TSStateId start_state = self->language->lex_states[parse_state]; TSStateId current_state = start_state; TSLength start_position = self->lexer.current_position; - TSLength position = start_position; LOG("lex state:%d", start_state); bool skipped_error = false; @@ -259,34 +258,32 @@ static TSTree *parser__lex(Parser *self, TSStateId parse_state) { while (!self->language->lex_fn(&self->lexer, current_state)) { if (current_state != TS_STATE_ERROR) { LOG("retry_in_error_mode"); - ts_lexer_reset(&self->lexer, position); - ts_lexer_start(&self->lexer, start_state); current_state = TS_STATE_ERROR; + ts_lexer_reset(&self->lexer, start_position); + ts_lexer_start(&self->lexer, current_state); continue; } - if (self->lexer.lookahead == 0) { - self->lexer.result_symbol = ts_builtin_sym_error; - break; + if (!skipped_error) { + error_start_position = self->lexer.token_start_position; + first_error_character = self->lexer.lookahead; } - if (self->lexer.current_position.bytes == position.bytes) { - if (!skipped_error) { - error_start_position = self->lexer.current_position; - first_error_character = self->lexer.lookahead; + if (self->lexer.current_position.bytes == error_end_position.bytes) { + if (self->lexer.lookahead == 0) { + self->lexer.result_symbol = ts_builtin_sym_error; + break; } - skipped_error = true; self->lexer.advance(&self->lexer, TS_STATE_ERROR, false); - error_end_position = self->lexer.current_position; } - position = self->lexer.current_position; + skipped_error = true; + error_end_position = self->lexer.current_position; } TSTree *result; if (skipped_error) { - error_start_position = ts_length_min(error_start_position, self->lexer.token_start_position); TSLength padding = ts_length_sub(error_start_position, start_position); TSLength size = ts_length_sub(error_end_position, error_start_position); ts_lexer_reset(&self->lexer, error_end_position); @@ -804,7 +801,10 @@ static void parser__start(Parser *self, TSInput input, TSTree *previous_tree) { self->finished_tree = NULL; } -static bool parser__accept(Parser *self, StackVersion version) { +static bool parser__accept(Parser *self, StackVersion version, TSTree *lookahead) { + lookahead->extra = true; + assert(lookahead->symbol == ts_builtin_sym_end); + CHECK(ts_stack_push(self->stack, version, lookahead, false, 1)); StackPopResult pop = ts_stack_pop_all(self->stack, version); CHECK(pop.status); CHECK(pop.slices.size); @@ -821,7 +821,7 @@ static bool parser__accept(Parser *self, StackVersion version) { for (size_t j = trees.size - 1; j + 1 > 0; j--) { TSTree *child = trees.contents[j]; if (!child->extra) { - root = ts_tree_make_copy(child); + CHECK(root = ts_tree_make_copy(child)); root->child_count = 0; for (size_t k = 0; k < child->child_count; k++) ts_tree_retain(child->children[k]); @@ -848,6 +848,8 @@ static bool parser__accept(Parser *self, StackVersion version) { return true; error: + for (size_t i = 0; i < pop.slices.size; i++) + ts_tree_array_delete(&pop.slices.contents[i].trees); return false; } @@ -1021,7 +1023,7 @@ static bool parser__recover(Parser *self, StackVersion version, TSStateId state, TreeArray children = array_new(); TSTree *parent = ts_tree_make_error_node(&children); CHECK(parser__push(self, version, parent, 1)); - return parser__accept(self, version); + return parser__accept(self, version, lookahead); } LOG("recover state:%u", state); @@ -1159,7 +1161,7 @@ static bool parser__advance(Parser *self, StackVersion version, continue; LOG("accept"); - CHECK(parser__accept(self, version)); + CHECK(parser__accept(self, version, lookahead)); ts_tree_release(lookahead); return true;