From f248ece3aac02a4111fa0b4b63ae6ebd98185b27 Mon Sep 17 00:00:00 2001 From: Karen Wang and Max Brunsfeld Date: Thu, 20 Mar 2014 18:15:38 -0700 Subject: [PATCH] Use table-driven tests for specific parsers --- include/tree_sitter/parser.h | 2 +- spec/runtime/arithmetic_spec.cc | 65 ------------ spec/runtime/helpers/read_test_entries.cc | 99 ++++++++++++++++++ spec/runtime/helpers/read_test_entries.h | 15 +++ spec/runtime/json_spec.cc | 101 ------------------- spec/runtime/language_specs.cc | 46 +++++++++ spec/runtime/languages/arithmetic/errors.txt | 16 +++ spec/runtime/languages/arithmetic/main.txt | 54 ++++++++++ spec/runtime/languages/json/errors.txt | 33 ++++++ spec/runtime/languages/json/main.txt | 44 ++++++++ spec/runtime/node_position_spec.cc | 46 +++++++++ tree_sitter.gyp | 1 + 12 files changed, 355 insertions(+), 167 deletions(-) delete mode 100644 spec/runtime/arithmetic_spec.cc create mode 100644 spec/runtime/helpers/read_test_entries.cc create mode 100644 spec/runtime/helpers/read_test_entries.h delete mode 100644 spec/runtime/json_spec.cc create mode 100644 spec/runtime/language_specs.cc create mode 100644 spec/runtime/languages/arithmetic/errors.txt create mode 100644 spec/runtime/languages/arithmetic/main.txt create mode 100644 spec/runtime/languages/json/errors.txt create mode 100644 spec/runtime/languages/json/main.txt create mode 100644 spec/runtime/node_position_spec.cc diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index e4ff02ec..ce13ca8f 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -277,7 +277,7 @@ static size_t ts_lr_parser_breakdown_stack(ts_lr_parser *parser, ts_input_edit * ts_tree *node; size_t position = 0; size_t child_count = 0; - + for (;;) { node = ts_stack_top_node(stack); if (!node) break; diff --git a/spec/runtime/arithmetic_spec.cc b/spec/runtime/arithmetic_spec.cc deleted file mode 100644 index a6b202ea..00000000 --- a/spec/runtime/arithmetic_spec.cc +++ /dev/null @@ -1,65 +0,0 @@ -#include "runtime_spec_helper.h" - -extern "C" ts_parser ts_parser_arithmetic(); - -START_TEST - -describe("arithmetic", []() { - ts_document *doc; - - before_each([&]() { - doc = ts_document_make(); - ts_document_set_parser(doc, ts_parser_arithmetic()); - }); - - after_each([&]() { - ts_document_free(doc); - }); - - it("parses variables", [&]() { - ts_document_set_input_string(doc, "x"); - AssertThat(string(ts_document_string(doc)), Equals( - "(expression (term (factor (variable))))")); - }); - - it("parses numbers", [&]() { - ts_document_set_input_string(doc, "5"); - AssertThat(string(ts_document_string(doc)), Equals( - "(expression (term (factor (number))))")); - }); - - it("parses products of variables", [&]() { - ts_document_set_input_string(doc, "x + y"); - AssertThat(string(ts_document_string(doc)), Equals( - "(expression (term (factor (variable))) (plus) (term (factor (variable))))")); - - ts_document_set_input_string(doc, "x * y"); - AssertThat(string(ts_document_string(doc)), Equals( - "(expression (term (factor (variable)) (times) (factor (variable))))")); - }); - - it("parses complex trees", [&]() { - ts_document_set_input_string(doc, "x * y + z * a"); - AssertThat(string(ts_document_string(doc)), Equals( - "(expression (term (factor (variable)) (times) (factor (variable))) (plus) (term (factor (variable)) (times) (factor (variable))))")); - - ts_document_set_input_string(doc, "x * (y + z)"); - AssertThat(string(ts_document_string(doc)), Equals( - "(expression (term (factor (variable)) (times) (factor (expression (term (factor (variable))) (plus) (term (factor (variable)))))))")); - }); - - describe("error recovery", [&]() { - it("recovers from errors at the top level", [&]() { - ts_document_set_input_string(doc, "x * * y"); - AssertThat(string(ts_document_string(doc)), Equals("(ERROR)")); - }); - - it("recovers from errors in parenthesized expressions", [&]() { - ts_document_set_input_string(doc, "x + (y * + z) * 5"); - AssertThat(string(ts_document_string(doc)), Equals( - "(expression (term (factor (variable))) (plus) (term (factor (ERROR)) (times) (factor (number))))")); - }); - }); -}); - -END_TEST \ No newline at end of file diff --git a/spec/runtime/helpers/read_test_entries.cc b/spec/runtime/helpers/read_test_entries.cc new file mode 100644 index 00000000..4ef4bafb --- /dev/null +++ b/spec/runtime/helpers/read_test_entries.cc @@ -0,0 +1,99 @@ +#include "helpers/read_test_entries.h" +#include +#include +#include +#include +#include + +using std::string; +using std::vector; +using std::ifstream; +using std::istreambuf_iterator; +using std::regex; +using std::regex_search; +using std::regex_replace; + +static string trim_output(const string &input) { + string result(input); + result = regex_replace(result, regex("^[\\s]+"), ""); + result = regex_replace(result, regex("[\\s]+$"), ""); + result = regex_replace(result, regex("\\)\\s+\\)"), "))"); + result = regex_replace(result, regex("\\s+"), " "); + return result; +} + +static vector get_test_entries_from_string(string content) { + regex header_pattern("===+\n([\\w ]+)\n===+"); + regex separator_pattern("---+"); + + vector descriptions; + vector bodies; + + for (;;) { + std::smatch matches; + regex_search(content, matches, header_pattern); + if (matches.empty()) break; + + string description = matches[1].str(); + descriptions.push_back(description); + + if (!bodies.empty()) + bodies.back().erase(matches.position()); + content.erase(0, matches.position() + matches[0].length()); + bodies.push_back(content); + } + + vector result; + for (size_t i = 0; i < descriptions.size(); i++) { + string body = bodies[i]; + std::smatch matches; + regex_search(body, matches, separator_pattern); + result.push_back({ + .description = descriptions[i], + .input = body.substr(0, matches.position()), + .tree_string = trim_output(body.substr(matches.position() + matches[0].length())) + }); + } + + return result; +} + +static vector list_directory(string dir_name) { + vector result; + + DIR *dir = opendir(dir_name.c_str()); + if (!dir) { + printf("\nTest error - no such directory '%s'", dir_name.c_str()); + return result; + } + + struct dirent *dir_entry; + while ((dir_entry = readdir(dir))) { + string name(dir_entry->d_name); + if (name != "." && name != "..") + result.push_back(dir_name + "/" + name); + } + return result; +} + +static string src_dir() { + const char * dir = getenv("TREESITTER_DIR"); + if (!dir) dir = getenv("PWD"); + return dir; +} + +vector test_entries_for_language(string language) { + vector result; + string language_dir = src_dir() + "/spec/runtime/languages/" + language; + vector filenames = list_directory(language_dir); + + for (string &filename : filenames) { + ifstream file(filename); + std::string content((istreambuf_iterator(file)), istreambuf_iterator()); + for (TestEntry entry : get_test_entries_from_string(content)) { + result.push_back(entry); + } + } + + return result; +} diff --git a/spec/runtime/helpers/read_test_entries.h b/spec/runtime/helpers/read_test_entries.h new file mode 100644 index 00000000..271e41f6 --- /dev/null +++ b/spec/runtime/helpers/read_test_entries.h @@ -0,0 +1,15 @@ +#ifndef HELPERS_READ_TEST_ENTRIES_H_ +#define HELPERS_READ_TEST_ENTRIES_H_ + +#include +#include + +struct TestEntry { + std::string description; + std::string input; + std::string tree_string; +}; + +std::vector test_entries_for_language(std::string language); + +#endif diff --git a/spec/runtime/json_spec.cc b/spec/runtime/json_spec.cc deleted file mode 100644 index 6e770ac5..00000000 --- a/spec/runtime/json_spec.cc +++ /dev/null @@ -1,101 +0,0 @@ -#include "runtime_spec_helper.h" - -extern "C" ts_parser ts_parser_json(); - -START_TEST - -describe("json", []() { - ts_document *doc; - - before_each([&]() { - doc = ts_document_make(); - ts_document_set_parser(doc, ts_parser_json()); - }); - - after_each([&]() { - ts_document_free(doc); - }); - - it("parses strings", [&]() { - ts_document_set_input_string(doc, "\"\""); - AssertThat(string(ts_document_string(doc)), Equals("(value (string))")); - - ts_document_set_input_string(doc, "\"simple-string\""); - AssertThat(string(ts_document_string(doc)), Equals("(value (string))")); - - ts_document_set_input_string(doc, "\"this is a \\\"string\\\" within a string\""); - AssertThat(string(ts_document_string(doc)), Equals("(value (string))")); - }); - - it("parses objects", [&]() { - ts_document_set_input_string(doc, "{}"); - AssertThat(string(ts_document_string(doc)), Equals("(value (object))")); - - ts_document_set_input_string(doc, "{ \"key1\": 1 }"); - AssertThat(string(ts_document_string(doc)), Equals("(value (object (string) (value (number))))")); - - ts_document_set_input_string(doc, "{\"key1\": 1, \"key2\": 2 }"); - AssertThat(string(ts_document_string(doc)), Equals("(value (object (string) (value (number)) (string) (value (number))))")); - }); - - it("parses arrays", [&]() { - ts_document_set_input_string(doc, "[]"); - AssertThat(string(ts_document_string(doc)), Equals("(value (array))")); - - ts_document_set_input_string(doc, "[5]"); - AssertThat(string(ts_document_string(doc)), Equals("(value (array (value (number))))")); - - ts_document_set_input_string(doc, "[1, 2, 3]"); - AssertThat(string(ts_document_string(doc)), Equals("(value (array (value (number)) (value (number)) (value (number))))")); - }); - - describe("tracking the positions of AST nodes", [&]() { - it("records the widths and offsets of nodes", [&]() { - ts_document_set_input_string(doc, " [12, 5]"); - const ts_tree *tree = ts_document_tree(doc); - - // TODO - make this better - const ts_tree *array = ts_tree_children(tree, NULL)[0]; - const ts_tree *number1 = ts_tree_children(ts_tree_children(array, NULL)[1], NULL)[0]; - const ts_tree *number2 = ts_tree_children(ts_tree_children(ts_tree_children(array, NULL)[2], NULL)[1], NULL)[0]; - - AssertThat(ts_document_symbol_name(doc, array), Equals("array")); - AssertThat(ts_document_symbol_name(doc, number1), Equals("number")); - AssertThat(ts_document_symbol_name(doc, number2), Equals("number")); - - AssertThat(number1->offset, Equals(0)); - AssertThat(number1->size, Equals(2)); - - AssertThat(number2->offset, Equals(1)); - AssertThat(number2->size, Equals(1)); - - AssertThat(array->offset, Equals(2)); - AssertThat(array->size, Equals(7)); - - AssertThat(tree->offset, Equals(2)); - AssertThat(tree->size, Equals(7)); - }); - }); - - describe("errors", [&]() { - it("reports errors in the top-level node", [&]() { - ts_document_set_input_string(doc, "["); - AssertThat(string(ts_document_string(doc)), Equals("(ERROR)")); - }); - - it("reports errors inside of arrays and objects", [&]() { - ts_document_set_input_string(doc, "{ \"key1\": 1, 5 }"); - AssertThat(string(ts_document_string(doc)), Equals("(value (object (string) (value (number)) (ERROR)))")); - - ts_document_set_input_string(doc, "[1,,2]"); - AssertThat(string(ts_document_string(doc)), Equals("(value (array (value (number)) (ERROR) (value (number))))")); - }); - - it("reports errors in nested objects", [&]() { - ts_document_set_input_string(doc, "{ \"key1\": { \"key2\": 1, 2 }, [, \"key3\": 3 }"); - AssertThat(string(ts_document_string(doc)), Equals("(value (object (string) (value (object (string) (value (number)) (ERROR))) (ERROR) (string) (value (number))))")); - }); - }); -}); - -END_TEST \ No newline at end of file diff --git a/spec/runtime/language_specs.cc b/spec/runtime/language_specs.cc new file mode 100644 index 00000000..0ee10ecc --- /dev/null +++ b/spec/runtime/language_specs.cc @@ -0,0 +1,46 @@ +#include "runtime_spec_helper.h" +#include "helpers/read_test_entries.h" + +extern "C" ts_parser ts_parser_json(); +extern "C" ts_parser ts_parser_arithmetic(); + +START_TEST + +describe("Languages", [&]() { + ts_document *doc; + + before_each([&]() { + doc = ts_document_make(); + }); + + after_each([&]() { + ts_document_free(doc); + }); + + auto run_tests_for_language = [&](string language) { + for (auto &entry : test_entries_for_language(language)) { + it(entry.description.c_str(), [&]() { + ts_document_set_input_string(doc, entry.input.c_str()); + AssertThat(ts_document_string(doc), Equals(entry.tree_string.c_str())); + }); + } + }; + + describe("json", [&]() { + before_each([&]() { + ts_document_set_parser(doc, ts_parser_json()); + }); + + run_tests_for_language("json"); + }); + + describe("arithmetic", [&]() { + before_each([&]() { + ts_document_set_parser(doc, ts_parser_arithmetic()); + }); + + run_tests_for_language("arithmetic"); + }); +}); + +END_TEST \ No newline at end of file diff --git a/spec/runtime/languages/arithmetic/errors.txt b/spec/runtime/languages/arithmetic/errors.txt new file mode 100644 index 00000000..23450af5 --- /dev/null +++ b/spec/runtime/languages/arithmetic/errors.txt @@ -0,0 +1,16 @@ +===================================================== +recovers from errors at the top level +===================================================== +x * * y +--- +(ERROR) + +===================================================== +recovers from errors inside parenthesized expressions +===================================================== +x + (y * + z) * 5 +--- +(expression + (term (factor (variable))) + (plus) + (term (factor (ERROR)) (times) (factor (number)))) \ No newline at end of file diff --git a/spec/runtime/languages/arithmetic/main.txt b/spec/runtime/languages/arithmetic/main.txt new file mode 100644 index 00000000..f188c546 --- /dev/null +++ b/spec/runtime/languages/arithmetic/main.txt @@ -0,0 +1,54 @@ +==================== +parses numbers +=================== +5 +--- +(expression (term (factor (number)))) + +=================== +parses variables +=================== +x +--- +(expression (term (factor (variable)))) + +=================== +parses products +=================== +x * x +--- +(expression (term + (factor (variable)) + (times) + (factor (variable)))) + +=================== +parses sums +=================== +x + x +--- +(expression + (term (factor (variable))) + (plus) + (term (factor (variable)))) + +==================== +parses complex trees +==================== +x * y + z * a +--- +(expression + (term (factor (variable)) (times) (factor (variable))) + (plus) + (term (factor (variable)) (times) (factor (variable)))) + +================================= +handles parenthesized expressions +================================= +x * (y + z) +--- +(expression + (term (factor (variable)) + (times) + (factor (expression (term (factor (variable))) (plus) (term (factor (variable))))))) + diff --git a/spec/runtime/languages/json/errors.txt b/spec/runtime/languages/json/errors.txt new file mode 100644 index 00000000..9a6b5362 --- /dev/null +++ b/spec/runtime/languages/json/errors.txt @@ -0,0 +1,33 @@ +============================== +recovers from top-level errors +============================== +[} +--- +(ERROR) + +================================== +recovers from errors inside arrays +================================== +[1,,2] +--- +(value (array + (value (number)) + (ERROR) + (value (number)))) + +================================== +recovers from errors inside objects +================================== +{ "key1": 1, 5 } +--- +(value (object (string) (value (number)) (ERROR))) + +========================================== +recovers from errors inside nested objects +========================================== +{ "key1": { "key2": 1, 2 }, [, "key3": 3 } +--- +(value (object + (string) (value (object (string) (value (number)) (ERROR))) + (ERROR) + (string) (value (number)))) \ No newline at end of file diff --git a/spec/runtime/languages/json/main.txt b/spec/runtime/languages/json/main.txt new file mode 100644 index 00000000..d7e16313 --- /dev/null +++ b/spec/runtime/languages/json/main.txt @@ -0,0 +1,44 @@ +==================== +parses empty objects +==================== +{} +--- +(value (object)) + +=================== +parses empty arrays +=================== +[] +--- +(value (array)) + +=================== +parses arrays +=================== +[ + 1, 2, 3, + { "stuff": "good" } +] +--- +(value (array + (value (number)) + (value (number)) + (value (number)) + (value (object + (string) (value (string)) + )) +)) + +=================== +parses long objects +=================== +{ + "key1": "value1", + "key2": 1 +} +--- +(value (object + (string) (value (string)) + (string) (value (number)) +)) + diff --git a/spec/runtime/node_position_spec.cc b/spec/runtime/node_position_spec.cc new file mode 100644 index 00000000..15d454f1 --- /dev/null +++ b/spec/runtime/node_position_spec.cc @@ -0,0 +1,46 @@ +#include "runtime_spec_helper.h" + +extern "C" ts_parser ts_parser_json(); + +START_TEST + +describe("tracking the positions of AST nodes", []() { + ts_document *doc; + + before_each([&]() { + doc = ts_document_make(); + ts_document_set_parser(doc, ts_parser_json()); + }); + + after_each([&]() { + ts_document_free(doc); + }); + + it("records the widths and offsets of nodes", [&]() { + ts_document_set_input_string(doc, " [12, 5]"); + const ts_tree *tree = ts_document_tree(doc); + + // TODO - make this better + const ts_tree *array = ts_tree_children(tree, NULL)[0]; + const ts_tree *number1 = ts_tree_children(ts_tree_children(array, NULL)[1], NULL)[0]; + const ts_tree *number2 = ts_tree_children(ts_tree_children(ts_tree_children(array, NULL)[2], NULL)[1], NULL)[0]; + + AssertThat(ts_document_symbol_name(doc, array), Equals("array")); + AssertThat(ts_document_symbol_name(doc, number1), Equals("number")); + AssertThat(ts_document_symbol_name(doc, number2), Equals("number")); + + AssertThat(number1->offset, Equals(0)); + AssertThat(number1->size, Equals(2)); + + AssertThat(number2->offset, Equals(1)); + AssertThat(number2->size, Equals(1)); + + AssertThat(array->offset, Equals(2)); + AssertThat(array->size, Equals(7)); + + AssertThat(tree->offset, Equals(2)); + AssertThat(tree->size, Equals(7)); + }); +}); + +END_TEST \ No newline at end of file diff --git a/tree_sitter.gyp b/tree_sitter.gyp index 67d40574..d39534a8 100644 --- a/tree_sitter.gyp +++ b/tree_sitter.gyp @@ -67,6 +67,7 @@ 'sources': [ '