#include "spec_helper.h" #include "runtime/alloc.h" #include "helpers/record_alloc.h" #include "helpers/spy_input.h" #include "helpers/load_language.h" #include "helpers/log_debugger.h" #include "helpers/record_alloc.h" START_TEST describe("Parser", [&]() { TSDocument *doc; SpyInput *input; TSNode root; size_t chunk_size; before_each([&]() { record_alloc::start(); chunk_size = 3; input = nullptr; doc = ts_document_make(); }); after_each([&]() { if (doc) ts_document_free(doc); if (input) delete input; record_alloc::stop(); AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty()); }); auto set_text = [&](const char *text) { input = new SpyInput(text, chunk_size); ts_document_set_input(doc, input->input()); AssertThat(ts_document_parse(doc), Equals(0)); root = ts_document_root_node(doc); AssertThat(ts_node_end_byte(root), Equals(strlen(text))); input->clear(); }; auto insert_text = [&](size_t position, string text) { size_t prev_size = ts_node_end_byte(root); ts_document_edit(doc, input->replace(position, 0, text)); ts_document_parse(doc); root = ts_document_root_node(doc); size_t new_size = ts_node_end_byte(root); AssertThat(new_size, Equals(prev_size + text.size())); }; auto delete_text = [&](size_t position, size_t length) { size_t prev_size = ts_node_end_byte(root); ts_document_edit(doc, input->replace(position, length, "")); ts_document_parse(doc); root = ts_document_root_node(doc); size_t new_size = ts_node_end_byte(root); AssertThat(new_size, Equals(prev_size - length)); }; auto replace_text = [&](size_t position, size_t length, string new_text) { size_t prev_size = ts_node_end_byte(root); ts_document_edit(doc, input->replace(position, length, new_text)); ts_document_parse(doc); root = ts_document_root_node(doc); size_t new_size = ts_node_end_byte(root); AssertThat(new_size, Equals(prev_size - length + new_text.size())); }; auto assert_root_node = [&](const string &expected) { TSNode node = ts_document_root_node(doc); char *str = ts_node_string(node, doc); string actual(str); ts_free(str); AssertThat(actual, Equals(expected)); }; describe("handling errors", [&]() { before_each([&]() { ts_document_set_language(doc, get_test_language("json")); }); auto get_node_text = [&](TSNode node) { size_t start = ts_node_start_byte(node); size_t end = ts_node_end_byte(node); return input->content.substr(start, end - start); }; describe("when there is an invalid substring right before a valid token", [&]() { it("computes the error node's size and position correctly", [&]() { set_text(" [123, @@@@@, true]"); assert_root_node( "(array (number) (ERROR (UNEXPECTED '@')) (true))"); TSNode error = ts_node_named_child(root, 1); AssertThat(ts_node_name(error, doc), Equals("ERROR")); AssertThat(get_node_text(error), Equals(", @@@@@")); AssertThat(ts_node_child_count(error), Equals(2)); TSNode comma = ts_node_child(error, 0); AssertThat(get_node_text(comma), Equals(",")); TSNode garbage = ts_node_child(error, 1); AssertThat(get_node_text(garbage), Equals("@@@@@")); TSNode node_after_error = ts_node_named_child(root, 2); AssertThat(ts_node_name(node_after_error, doc), Equals("true")); AssertThat(get_node_text(node_after_error), Equals("true")); }); }); describe("when there is an unexpected string in the middle of a token", [&]() { it("computes the error node's size and position correctly", [&]() { set_text(" [123, faaaaalse, true]"); assert_root_node( "(array (number) (ERROR (UNEXPECTED 'a')) (true))"); TSNode error = ts_node_named_child(root, 1); AssertThat(ts_node_symbol(error), Equals(ts_builtin_sym_error)); AssertThat(ts_node_name(error, doc), Equals("ERROR")); AssertThat(get_node_text(error), Equals(", faaaaalse")); AssertThat(ts_node_child_count(error), Equals(2)); TSNode comma = ts_node_child(error, 0); AssertThat(ts_node_name(comma, doc), Equals(",")); AssertThat(get_node_text(comma), Equals(",")); TSNode garbage = ts_node_child(error, 1); AssertThat(ts_node_name(garbage, doc), Equals("ERROR")); AssertThat(get_node_text(garbage), Equals("faaaaalse")); TSNode last = ts_node_named_child(root, 2); AssertThat(ts_node_name(last, doc), Equals("true")); AssertThat(ts_node_start_byte(last), Equals(strlen(" [123, faaaaalse, "))); }); }); describe("when there is one unexpected token between two valid tokens", [&]() { it("computes the error node's size and position correctly", [&]() { set_text(" [123, true false, true]"); assert_root_node( "(array (number) (ERROR (true)) (false) (true))"); TSNode error = ts_node_named_child(root, 1); AssertThat(ts_node_name(error, doc), Equals("ERROR")); AssertThat(get_node_text(error), Equals("true")); AssertThat(ts_node_child_count(error), Equals(1)); TSNode last = ts_node_named_child(root, 2); AssertThat(ts_node_name(last, doc), Equals("false")); AssertThat(get_node_text(last), Equals("false")); }); }); }); describe("handling extra tokens", [&]() { // In the javascript example grammar, ASI works by using newlines as // terminators in statements, but also as extra tokens. before_each([&]() { ts_document_set_language(doc, get_test_language("javascript")); }); describe("when the token appears as part of a grammar rule", [&]() { it("is incorporated into the tree", [&]() { set_text("fn()\n"); assert_root_node( "(program (expression_statement (function_call (identifier))))"); }); }); describe("when the token appears somewhere else", [&]() { it("is incorporated into the tree", [&]() { set_text( "fn()\n" " .otherFn();"); assert_root_node( "(program (expression_statement (function_call " "(member_access (function_call (identifier)) (identifier)))))"); }); }); describe("when several extra tokens appear in a row", [&]() { it("is incorporated into the tree", [&]() { set_text( "fn()\n\n" "// This is a comment" "\n\n" ".otherFn();"); assert_root_node( "(program (expression_statement (function_call " "(member_access (function_call (identifier)) " "(comment) " "(identifier)))))"); }); }); }); describe("editing", [&]() { before_each([&]() { ts_document_set_language(doc, get_test_language("javascript")); }); describe("inserting text", [&]() { describe("creating new tokens near the end of the input", [&]() { it("updates the parse tree and re-reads only the changed portion of the text", [&]() { set_text("x * (100 + abc);"); assert_root_node( "(program (expression_statement (math_op " "(identifier) " "(math_op (number) (identifier)))))"); insert_text(strlen("x * (100 + abc"), ".d"); assert_root_node( "(program (expression_statement (math_op " "(identifier) " "(math_op (number) (member_access (identifier) (identifier))))))"); AssertThat(input->strings_read, Equals(vector({ " abc.d);", "" }))); }); }); describe("creating new tokens near the beginning of the input", [&]() { it("updates the parse tree and re-reads only the changed portion of the input", [&]() { chunk_size = 2; set_text("123 + 456 * (10 + x);"); assert_root_node( "(program (expression_statement (math_op " "(number) " "(math_op (number) (math_op (number) (identifier))))))"); insert_text(strlen("123"), " || 5"); assert_root_node( "(program (expression_statement (bool_op " "(number) " "(math_op " "(number) " "(math_op (number) (math_op (number) (identifier)))))))"); AssertThat(input->strings_read, Equals(vector({ "123 || 5 +", "" }))); }); }); describe("introducing an error", [&]() { it("gives the error the right size", [&]() { ts_document_set_language(doc, get_test_language("javascript")); set_text("var x = y;"); assert_root_node( "(program (var_declaration (var_assignment " "(identifier) (identifier))))"); insert_text(strlen("var x = y"), " *"); assert_root_node( "(program (var_declaration (identifier) (ERROR (identifier))))"); insert_text(strlen("var x = y *"), " z"); assert_root_node( "(program (var_declaration (var_assignment " "(identifier) (math_op (identifier) (identifier)))))"); }); }); describe("into the middle of an existing token", [&]() { it("updates the parse tree", [&]() { set_text("abc * 123;"); assert_root_node( "(program (expression_statement (math_op (identifier) (number))))"); insert_text(strlen("ab"), "XYZ"); assert_root_node( "(program (expression_statement (math_op (identifier) (number))))"); TSNode node = ts_node_named_descendant_for_range(root, 1, 1); AssertThat(ts_node_name(node, doc), Equals("identifier")); AssertThat(ts_node_end_byte(node), Equals(strlen("abXYZc"))); }); }); describe("at the end of an existing token", [&]() { it("updates the parse tree", [&]() { set_text("abc * 123;"); assert_root_node( "(program (expression_statement (math_op (identifier) (number))))"); insert_text(strlen("abc"), "XYZ"); assert_root_node( "(program (expression_statement (math_op (identifier) (number))))"); TSNode node = ts_node_named_descendant_for_range(root, 1, 1); AssertThat(ts_node_name(node, doc), Equals("identifier")); AssertThat(ts_node_end_byte(node), Equals(strlen("abcXYZ"))); }); }); describe("with non-ascii characters", [&]() { it("inserts the text according to the UTF8 character index", [&]() { // 'αβδ' + '1' set_text("'\u03b1\u03b2\u03b4' + '1';"); assert_root_node( "(program (expression_statement (math_op (string) (string))))"); // 'αβδ' + 'ψ1' insert_text(strlen("'abd' + '"), "\u03c8"); assert_root_node( "(program (expression_statement (math_op (string) (string))))"); }); }); describe("into a node containing a extra token", [&]() { it("updates the parse tree", [&]() { set_text("123 *\n" "// a-comment\n" "abc;"); assert_root_node( "(program (expression_statement (math_op " "(number) " "(comment) " "(identifier))))"); insert_text( strlen("123 *\n" "// a-comment\n" "abc"), "XYZ"); assert_root_node( "(program (expression_statement (math_op " "(number) " "(comment) " "(identifier))))"); }); }); }); describe("deleting text", [&]() { describe("when a critical token is removed", [&]() { it("updates the parse tree, creating an error", [&]() { set_text("123 * 456; 789 * 123;"); assert_root_node( "(program " "(expression_statement (math_op (number) (number))) " "(expression_statement (math_op (number) (number))))"); delete_text(strlen("123 "), 2); assert_root_node( "(program " "(expression_statement (number) (ERROR (number))) " "(expression_statement (math_op (number) (number))))"); }); }); }); describe("replacing text", [&]() { it("does not try to re-use nodes that are within the edited region", [&]() { ts_document_set_language(doc, get_test_language("javascript")); set_text("{ x: (b.c) };"); assert_root_node( "(program (expression_statement (object (pair " "(identifier) (member_access (identifier) (identifier))))))"); replace_text(strlen("{ x: "), strlen("(b.c)"), "b.c"); assert_root_node( "(program (expression_statement (object (pair " "(identifier) (member_access (identifier) (identifier))))))"); }); }); it("updates the document's parse count", [&]() { ts_document_set_language(doc, get_test_language("javascript")); AssertThat(ts_document_parse_count(doc), Equals(0)); set_text("{ x: (b.c) };"); AssertThat(ts_document_parse_count(doc), Equals(1)); insert_text(strlen("{ x"), "yz"); AssertThat(ts_document_parse_count(doc), Equals(2)); }); }); describe("lexing", [&]() { before_each([&]() { ts_document_set_language(doc, get_test_language("javascript")); }); describe("handling tokens containing wildcard patterns (e.g. comments)", [&]() { it("terminates them at the end of the document", [&]() { set_text("x; // this is a comment"); assert_root_node( "(program (expression_statement (identifier)) (comment))"); TSNode comment = ts_node_named_child(root, 1); AssertThat(ts_node_start_byte(comment), Equals(strlen("x; "))); AssertThat(ts_node_end_byte(comment), Equals(strlen("x; // this is a comment"))); }); }); it("recognizes UTF8 characters as single characters", [&]() { // 'ΩΩΩ — ΔΔ'; set_text("'\u03A9\u03A9\u03A9 \u2014 \u0394\u0394';"); assert_root_node( "(program (expression_statement (string)))"); AssertThat(ts_node_end_char(root), Equals(strlen("'OOO - DD';"))); AssertThat(ts_node_end_byte(root), Equals(strlen("'\u03A9\u03A9\u03A9 \u2014 \u0394\u0394';"))); }); }); describe("handling allocation failures", [&]() { it("handles failures when allocating documents", [&]() { record_alloc::start(); TSDocument *document = ts_document_make(); ts_document_free(document); AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty()); size_t allocation_count = record_alloc::allocation_count(); AssertThat(allocation_count, IsGreaterThan(1)); for (size_t i = 0; i < allocation_count; i++) { record_alloc::start(); record_alloc::fail_at_allocation_index(i); AssertThat(ts_document_make(), Equals(nullptr)); AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty()); } record_alloc::stop(); }); it("handles allocation failures during parsing", [&]() { const TSLanguage *language = get_test_language("cpp"); const char *input_string = "int main() { return vector().size(); }"; string expected_node_string = "(translation_unit (function_definition " "(identifier) " "(function_declarator (identifier)) " "(compound_statement " "(return_statement (call_expression (field_expression " "(call_expression (template_call " "(identifier) " "(type_name (identifier) (abstract_pointer_declarator)))) " "(identifier)))))))"; record_alloc::start(); ts_document_set_language(doc, language); ts_document_set_input_string(doc, input_string); AssertThat(ts_document_parse(doc), Equals(0)); size_t allocation_count = record_alloc::allocation_count(); AssertThat(allocation_count, IsGreaterThan(1)); assert_root_node(expected_node_string); ts_document_free(doc); doc = nullptr; AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty()); for (size_t i = 0; i < allocation_count; i++) { record_alloc::stop(); doc = ts_document_make(); record_alloc::start(); record_alloc::fail_at_allocation_index(i); ts_document_set_language(doc, language); ts_document_set_input_string(doc, input_string); AssertThat(ts_document_parse(doc), Equals(-1)); AssertThat(ts_document_root_node(doc).data, Equals(nullptr)); ts_document_free(doc); doc = nullptr; AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty()); } record_alloc::stop(); doc = ts_document_make(); record_alloc::start(); record_alloc::fail_at_allocation_index(allocation_count + 1); ts_document_set_language(doc, language); ts_document_set_input_string(doc, input_string); AssertThat(ts_document_parse(doc), Equals(0)); assert_root_node(expected_node_string); }); }); }); END_TEST