From f37f73f92fea0c65a82a8dbecc9e2928488feba7 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 18 Sep 2015 23:20:06 -0700 Subject: [PATCH] Add ability to edit multiple times between parses --- include/tree_sitter/runtime.h | 4 +- spec/runtime/document_spec.cc | 115 ++++++++++++++++----------- spec/runtime/helpers/spy_input.cc | 7 +- spec/runtime/helpers/spy_input.h | 9 ++- spec/runtime/helpers/tree_helpers.cc | 18 ++--- spec/runtime/helpers/tree_helpers.h | 13 +-- spec/runtime/language_specs.cc | 7 ++ spec/runtime/node_spec.cc | 7 +- spec/runtime/parser_spec.cc | 5 ++ src/runtime/document.c | 36 ++++----- src/runtime/node.c | 4 + src/runtime/parser.c | 25 +++--- src/runtime/parser.h | 4 +- 13 files changed, 135 insertions(+), 119 deletions(-) diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index e4d3f4a8..eb431213 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -51,6 +51,7 @@ const char *ts_node_name(TSNode, const TSDocument *); const char *ts_node_string(TSNode, const TSDocument *); bool ts_node_eq(TSNode, TSNode); bool ts_node_is_named(TSNode); +bool ts_node_has_changes(TSNode); TSNode ts_node_parent(TSNode); TSNode ts_node_child(TSNode, size_t); TSNode ts_node_named_child(TSNode, size_t); @@ -70,9 +71,10 @@ void ts_document_set_language(TSDocument *, const TSLanguage *); TSInput ts_document_input(TSDocument *); void ts_document_set_input(TSDocument *, TSInput); void ts_document_set_input_string(TSDocument *, const char *); -void ts_document_edit(TSDocument *, TSInputEdit); TSDebugger ts_document_debugger(const TSDocument *); void ts_document_set_debugger(TSDocument *, TSDebugger); +void ts_document_parse(TSDocument *); +void ts_document_edit(TSDocument *, TSInputEdit); TSNode ts_document_root_node(const TSDocument *); size_t ts_document_parse_count(const TSDocument *); diff --git a/spec/runtime/document_spec.cc b/spec/runtime/document_spec.cc index 20d352e5..803dd093 100644 --- a/spec/runtime/document_spec.cc +++ b/spec/runtime/document_spec.cc @@ -1,14 +1,17 @@ -#include "runtime/debugger.h" #include "runtime/runtime_spec_helper.h" +#include "runtime/helpers/tree_helpers.h" +#include "runtime/debugger.h" #include "runtime/helpers/spy_debugger.h" #include "runtime/helpers/spy_input.h" extern "C" const TSLanguage * ts_language_json(); +extern "C" const TSLanguage * ts_language_javascript(); START_TEST describe("Document", [&]() { TSDocument *doc; + TSNode root; before_each([&]() { doc = ts_document_make(); @@ -18,61 +21,79 @@ describe("Document", [&]() { ts_document_free(doc); }); - describe("set_input(TSInput)", [&]() { - describe("when the language is set", [&]() { - before_each([&]() { - ts_document_set_language(doc, ts_language_json()); - }); + describe("set_input(input)", [&]() { + SpyInput *spy_input; - it("parses the document", [&]() { - ts_document_set_input_string(doc, "{ \"key\": [1, 2] }"); + before_each([&]() { + ts_document_set_language(doc, ts_language_json()); + ts_document_set_input_string(doc, "{\"key\": [1, 2]}"); + ts_document_parse(doc); - AssertThat(ts_node_string(ts_document_root_node(doc), doc), Equals( - "(object (string) (array (number) (number)))")); - }); + root = ts_document_root_node(doc); + AssertThat(ts_node_string(root, doc), Equals( + "(object (string) (array (number) (number)))")); + + spy_input = new SpyInput("{\"key\": [null, 2]}", 3); }); - describe("when the language is not set", [&]() { - it("does not try to parse the document", [&]() { - ts_document_set_input_string(doc, "{ \"key\": [1, 2] }"); - - AssertThat(ts_document_root_node(doc).data, Equals(nullptr)); - }); + after_each([&]() { + delete spy_input; }); it("allows the input to be retrieved later", [&]() { - auto spy_input = new SpyInput("12345", 3); ts_document_set_input(doc, spy_input->input()); AssertThat(ts_document_input(doc).payload, Equals(spy_input)); - delete spy_input; + AssertThat(ts_document_input(doc).read_fn, Equals(spy_input->input().read_fn)); + AssertThat(ts_document_input(doc).seek_fn, Equals(spy_input->input().seek_fn)); + }); + + it("does not assume that the document's text has changed", [&]() { + ts_document_set_input(doc, spy_input->input()); + AssertThat(ts_document_root_node(doc), Equals(root)); + AssertThat(ts_node_has_changes(root), IsFalse()); + AssertThat(spy_input->strings_read, Equals(vector({ "" }))); + }); + + it("reads text from the new input for future parses", [&]() { + ts_document_set_input(doc, spy_input->input()); + + // Insert 'null', delete '1'. + ts_document_edit(doc, {strlen("{\"key\": ["), 4, 1}); + ts_document_parse(doc); + + TSNode new_root = ts_document_root_node(doc); + AssertThat(ts_node_string(new_root, doc), Equals( + "(object (string) (array (null) (number)))")); + AssertThat(spy_input->strings_read, Equals(vector({" [null, 2", ""}))); }); }); - describe("set_language(TSLanguage)", [&]() { - describe("when the input is not set", [&]() { - it("does not try to parse the document", [&]() { - ts_document_set_language(doc, ts_language_json()); - - AssertThat(ts_document_root_node(doc).data, Equals(nullptr)); - }); + describe("set_language(language)", [&]() { + before_each([&]() { + ts_document_set_input_string(doc, "{\"key\": [1, 2]}\n"); }); - describe("when the input is set", [&]() { - before_each([&]() { - ts_document_set_input_string(doc, "{ \"key\": [1, 2] }"); - }); - - it("parses the document", [&]() { - ts_document_set_language(doc, ts_language_json()); - - AssertThat(ts_node_string(ts_document_root_node(doc), doc), Equals( - "(object (string) (array (number) (number)))")); - }); - }); - - it("allows the language to be retrieved later", [&]() { + it("uses the given language for future parses", [&]() { ts_document_set_language(doc, ts_language_json()); - AssertThat(ts_document_language(doc), Equals(ts_language_json())); + ts_document_parse(doc); + + root = ts_document_root_node(doc); + AssertThat(ts_node_string(root, doc), Equals( + "(object (string) (array (number) (number)))")); + }); + + it("clears out any previous tree", [&]() { + ts_document_set_language(doc, ts_language_json()); + ts_document_parse(doc); + + ts_document_set_language(doc, ts_language_javascript()); + AssertThat(ts_document_root_node(doc).data, Equals(nullptr)); + + ts_document_parse(doc); + root = ts_document_root_node(doc); + AssertThat(ts_node_string(root, doc), Equals( + "(program (expression_statement " + "(object (pair (string) (array (number) (number))))))")); }); }); @@ -82,11 +103,12 @@ describe("Document", [&]() { before_each([&]() { debugger = new SpyDebugger(); ts_document_set_language(doc, ts_language_json()); - ts_document_set_debugger(doc, debugger->debugger()); + ts_document_set_input_string(doc, "[1, 2]"); }); it("calls the debugger with a message for each lex action", [&]() { - ts_document_set_input_string(doc, "[1, 2]"); + ts_document_set_debugger(doc, debugger->debugger()); + ts_document_parse(doc); AssertThat(debugger->messages, Contains("lookahead char:'1'")); AssertThat(debugger->messages, Contains("advance state:1")); @@ -94,7 +116,8 @@ describe("Document", [&]() { }); it("calls the debugger with a message for each parse action", [&]() { - ts_document_set_input_string(doc, "[1, 2]"); + ts_document_set_debugger(doc, debugger->debugger()); + ts_document_parse(doc); AssertThat(debugger->messages, Contains("new_parse")); AssertThat(debugger->messages, Contains("lookahead char:'['")); @@ -103,16 +126,18 @@ describe("Document", [&]() { }); it("allows the debugger to be retrieved later", [&]() { + ts_document_set_debugger(doc, debugger->debugger()); AssertThat(ts_document_debugger(doc).payload, Equals(debugger)); }); describe("disabling debugging", [&]() { before_each([&]() { + ts_document_set_debugger(doc, debugger->debugger()); ts_document_set_debugger(doc, ts_debugger_null()); }); it("does not call the debugger any more", [&]() { - ts_document_set_input_string(doc, "[1, 2]"); + ts_document_parse(doc); AssertThat(debugger->messages, IsEmpty()); }); }); diff --git a/spec/runtime/helpers/spy_input.cc b/spec/runtime/helpers/spy_input.cc index 9adf272b..f2f6122b 100644 --- a/spec/runtime/helpers/spy_input.cc +++ b/spec/runtime/helpers/spy_input.cc @@ -33,12 +33,12 @@ static int spy_seek(void *data, TSLength byte_offset) { } SpyInput::SpyInput(string content, size_t chars_per_chunk) : - content(content), chars_per_chunk(chars_per_chunk), buffer_size(4 * chars_per_chunk), buffer(new char[buffer_size]), byte_offset(0), - strings_read({ "" }) {} + content(content), + strings_read({""}) {} SpyInput::~SpyInput() { delete buffer; @@ -73,7 +73,8 @@ const char * SpyInput::read(size_t *bytes_read) { } int SpyInput::seek(size_t pos) { - strings_read.push_back(""); + if (strings_read.size() == 0 || strings_read.back().size() > 0) + strings_read.push_back(""); byte_offset = pos; return 0; } diff --git a/spec/runtime/helpers/spy_input.h b/spec/runtime/helpers/spy_input.h index 0959c073..3dd772f7 100644 --- a/spec/runtime/helpers/spy_input.h +++ b/spec/runtime/helpers/spy_input.h @@ -6,6 +6,11 @@ #include "tree_sitter/runtime.h" class SpyInput { + size_t chars_per_chunk; + size_t buffer_size; + char *buffer; + size_t byte_offset; + public: SpyInput(std::string content, size_t chars_per_chunk); ~SpyInput(); @@ -18,10 +23,6 @@ class SpyInput { int seek(size_t position); std::string content; - size_t chars_per_chunk; - size_t buffer_size; - char *buffer; - size_t byte_offset; std::vector strings_read; }; diff --git a/spec/runtime/helpers/tree_helpers.cc b/spec/runtime/helpers/tree_helpers.cc index 6b2aa53c..cb2f3131 100644 --- a/spec/runtime/helpers/tree_helpers.cc +++ b/spec/runtime/helpers/tree_helpers.cc @@ -1,6 +1,6 @@ #include "runtime/helpers/tree_helpers.h" -const char *symbol_names[24] = { +static const char *symbol_names[24] = { "ERROR", "END", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen", "twenty", "twenty-one", @@ -14,18 +14,6 @@ TSTree ** tree_array(std::vector trees) { return result; } -EqualsTree::EqualsTree(const TSTree *expected, const char **symbol_names) - : expected(expected), symbol_names(symbol_names) {} - -bool EqualsTree::Matches(const TSTree *actual) const { - return ts_tree_eq(actual, expected); -} - -std::ostream &operator<<(std::ostream &stream, const EqualsTree &matcher) { - stream << std::string("equals tree: ") << std::string(ts_tree_string(matcher.expected, matcher.symbol_names)); - return stream; -} - std::ostream &operator<<(std::ostream &stream, const TSTree *tree) { return stream << std::string(ts_tree_string(tree, symbol_names));; } @@ -34,3 +22,7 @@ std::ostream &operator<<(std::ostream &stream, const TSNode node) { return stream << std::string("{") << (const TSTree *)node.data << std::string(", ") << std::to_string(ts_node_pos(node).chars) << std::string("}"); } + +bool operator==(const TSNode &left, const TSNode &right) { + return ts_node_eq(left, right); +} diff --git a/spec/runtime/helpers/tree_helpers.h b/spec/runtime/helpers/tree_helpers.h index 2e81fc8b..f668411f 100644 --- a/spec/runtime/helpers/tree_helpers.h +++ b/spec/runtime/helpers/tree_helpers.h @@ -5,19 +5,10 @@ #include #include -extern const char *symbol_names[24]; - TSTree ** tree_array(std::vector trees); -struct EqualsTree { - EqualsTree(const TSTree *expected, const char **symbol_names); - bool Matches(const TSTree *actual) const; - const TSTree *expected; - const char **symbol_names; -}; - -std::ostream &operator<<(std::ostream &stream, const EqualsTree &matcher); std::ostream &operator<<(std::ostream &stream, const TSTree *tree); -std::ostream &operator<<(std::ostream &stream, const TSNode ref); +std::ostream &operator<<(std::ostream &stream, const TSNode node); +bool operator==(const TSNode &left, const TSNode &right); #endif // HELPERS_TREE_HELPERS_H_ diff --git a/spec/runtime/language_specs.cc b/spec/runtime/language_specs.cc index 12e9f6d6..daecd861 100644 --- a/spec/runtime/language_specs.cc +++ b/spec/runtime/language_specs.cc @@ -39,6 +39,7 @@ describe("Languages", [&]() { it(("parses " + entry.description).c_str(), [&]() { ts_document_set_input_string(doc, entry.input.c_str()); + ts_document_parse(doc); expect_the_correct_tree(); }); @@ -50,15 +51,18 @@ describe("Languages", [&]() { it(("handles random insertions in " + entry.description).c_str(), [&]() { SpyInput reader(entry.input, 3); ts_document_set_input(doc, reader.input()); + ts_document_parse(doc); string garbage("%^&*"); size_t position = entry.input.size() / 2; reader.insert(position, garbage); ts_document_edit(doc, { position, garbage.size(), 0 }); + ts_document_parse(doc); reader.erase(position, garbage.size()); ts_document_edit(doc, { position, 0, garbage.size() }); + ts_document_parse(doc); expect_the_correct_tree(); }); @@ -66,15 +70,18 @@ describe("Languages", [&]() { it(("handles random deletions in " + entry.description).c_str(), [&]() { SpyInput reader(entry.input, 3); ts_document_set_input(doc, reader.input()); + ts_document_parse(doc); size_t position = entry.input.size() / 2; string removed = entry.input.substr(position); reader.erase(position, removed.size()); ts_document_edit(doc, { position, 0, removed.size() }); + ts_document_parse(doc); reader.insert(position, removed); ts_document_edit(doc, { position, removed.size(), 0 }); + ts_document_parse(doc); expect_the_correct_tree(); }); diff --git a/spec/runtime/node_spec.cc b/spec/runtime/node_spec.cc index cf58c761..e58317c9 100644 --- a/spec/runtime/node_spec.cc +++ b/spec/runtime/node_spec.cc @@ -1,4 +1,5 @@ #include "runtime/runtime_spec_helper.h" +#include "runtime/helpers/tree_helpers.h" extern "C" TSLanguage * ts_language_json(); @@ -12,6 +13,8 @@ describe("Node", []() { document = ts_document_make(); ts_document_set_language(document, ts_language_json()); ts_document_set_input_string(document, " [123, false, {\"x\": null}]"); + ts_document_parse(document); + array_node = ts_document_root_node(document); AssertThat(ts_node_string(array_node, document), Equals( "(array " @@ -268,7 +271,3 @@ describe("Node", []() { }); END_TEST - -bool operator==(const TSNode &left, const TSNode &right) { - return ts_node_eq(left, right); -} diff --git a/spec/runtime/parser_spec.cc b/spec/runtime/parser_spec.cc index 7b979293..63b81568 100644 --- a/spec/runtime/parser_spec.cc +++ b/spec/runtime/parser_spec.cc @@ -29,6 +29,8 @@ describe("Parser", [&]() { auto set_text = [&](const char *text) { input = new SpyInput(text, chunk_size); ts_document_set_input(doc, input->input()); + ts_document_parse(doc); + root = ts_document_root_node(doc); AssertThat(ts_node_size(root).bytes + ts_node_pos(root).bytes, Equals(strlen(text))); input->clear(); @@ -38,6 +40,7 @@ describe("Parser", [&]() { size_t prev_size = ts_node_size(root).bytes + ts_node_pos(root).bytes; AssertThat(input->insert(position, text), IsTrue()); ts_document_edit(doc, { position, text.length(), 0 }); + ts_document_parse(doc); root = ts_document_root_node(doc); size_t new_size = ts_node_size(root).bytes + ts_node_pos(root).bytes; @@ -48,6 +51,7 @@ describe("Parser", [&]() { size_t prev_size = ts_node_size(root).bytes + ts_node_pos(root).bytes; AssertThat(input->erase(position, length), IsTrue()); ts_document_edit(doc, { position, 0, length }); + ts_document_parse(doc); root = ts_document_root_node(doc); size_t new_size = ts_node_size(root).bytes + ts_node_pos(root).bytes; @@ -60,6 +64,7 @@ describe("Parser", [&]() { AssertThat(input->insert(position, new_text), IsTrue()); ts_document_edit(doc, { position, new_text.size(), length }); + ts_document_parse(doc); root = ts_document_root_node(doc); size_t new_size = ts_node_size(root).bytes + ts_node_pos(root).bytes; diff --git a/src/runtime/document.c b/src/runtime/document.c index 5a856597..cb397769 100644 --- a/src/runtime/document.c +++ b/src/runtime/document.c @@ -19,24 +19,13 @@ void ts_document_free(TSDocument *document) { free(document); } -static void ts_document__reparse(TSDocument *document) { - if (document->input.read_fn && document->parser.language) { - TSTree *tree = ts_parser_parse(&document->parser, document->input); - if (document->tree) - ts_tree_release(document->tree); - document->tree = tree; - ts_tree_retain(tree); - document->parse_count++; - } -} - const TSLanguage *ts_document_language(TSDocument *document) { return document->parser.language; } void ts_document_set_language(TSDocument *document, const TSLanguage *language) { document->parser.language = language; - ts_document__reparse(document); + document->tree = NULL; } TSDebugger ts_document_debugger(const TSDocument *document) { @@ -53,7 +42,10 @@ TSInput ts_document_input(TSDocument *document) { void ts_document_set_input(TSDocument *document, TSInput input) { document->input = input; - ts_document__reparse(document); +} + +void ts_document_set_input_string(TSDocument *document, const char *text) { + ts_document_set_input(document, ts_string_input_make(text)); } void ts_document_edit(TSDocument *document, TSInputEdit edit) { @@ -64,16 +56,18 @@ void ts_document_edit(TSDocument *document, TSInputEdit edit) { edit.chars_removed = max_chars - edit.position; ts_tree_edit(document->tree, edit); - ts_document__reparse(document); } -const char *ts_document_symbol_name(const TSDocument *document, - const TSTree *tree) { - return document->parser.language->symbol_names[tree->symbol]; -} - -void ts_document_set_input_string(TSDocument *document, const char *text) { - ts_document_set_input(document, ts_string_input_make(text)); +void ts_document_parse(TSDocument *document) { + if (document->input.read_fn && document->parser.language) { + TSTree *tree = + ts_parser_parse(&document->parser, document->input, document->tree); + if (document->tree) + ts_tree_release(document->tree); + document->tree = tree; + ts_tree_retain(tree); + document->parse_count++; + } } TSNode ts_document_root_node(const TSDocument *document) { diff --git a/src/runtime/node.c b/src/runtime/node.c index 4629f6d2..a1330de7 100644 --- a/src/runtime/node.c +++ b/src/runtime/node.c @@ -178,6 +178,10 @@ bool ts_node_is_named(TSNode this) { return ts_node__tree(this)->options.type == TSNodeTypeNamed; } +bool ts_node_has_changes(TSNode this) { + return ts_node__tree(this)->options.has_changes; +} + TSNode ts_node_parent(TSNode this) { const TSTree *tree = ts_node__tree(this); TSLength position = ts_node__offset(this); diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 444e47bb..3e51aeb1 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -255,22 +255,21 @@ static bool ts_parser__handle_error(TSParser *parser, int head) { } } -static void ts_parser__start(TSParser *parser, TSInput input) { - parser->lexer.input = input; - ts_lexer_reset(&parser->lexer, ts_length_zero()); - - parser->previous_tree = ts_stack_top_tree(parser->stack, 0); - if (parser->previous_tree) { +static void ts_parser__start(TSParser *parser, TSInput input, + TSTree *previous_tree) { + if (previous_tree) { DEBUG("parse_after_edit"); - ts_tree_retain(parser->previous_tree); } else { DEBUG("new_parse"); } - parser->reusable_subtree = parser->previous_tree; + + parser->lexer.input = input; + ts_lexer_reset(&parser->lexer, ts_length_zero()); + ts_stack_clear(parser->stack); + + parser->reusable_subtree = previous_tree; parser->reusable_subtree_pos = 0; parser->lookahead = NULL; - parser->is_verifying = false; - ts_stack_clear(parser->stack); } static TSTree *ts_parser__finish(TSParser *parser) { @@ -282,7 +281,6 @@ static TSTree *ts_parser__finish(TSParser *parser) { TSTree *root = trees[extra_count]; ts_tree_prepend_children(root, extra_count, trees); - ts_stack_push(parser->stack, 0, 0, root); return root; } @@ -389,7 +387,6 @@ TSParser ts_parser_make() { NULL, ts_parser__select_tree, }), .lookahead = NULL, - .is_verifying = false, }; } @@ -407,8 +404,8 @@ void ts_parser_set_debugger(TSParser *parser, TSDebugger debugger) { parser->lexer.debugger = debugger; } -TSTree *ts_parser_parse(TSParser *parser, TSInput input) { - ts_parser__start(parser, input); +TSTree *ts_parser_parse(TSParser *parser, TSInput input, TSTree *previous_tree) { + ts_parser__start(parser, input, previous_tree); for (;;) { ts_parser__get_next_lookahead(parser, false); diff --git a/src/runtime/parser.h b/src/runtime/parser.h index 962f5f1a..50370ba0 100644 --- a/src/runtime/parser.h +++ b/src/runtime/parser.h @@ -11,10 +11,8 @@ typedef struct { TSLexer lexer; Stack *stack; TSTree *lookahead; - TSTree *previous_tree; TSTree *reusable_subtree; size_t reusable_subtree_pos; - bool is_verifying; const TSLanguage *language; } TSParser; @@ -22,7 +20,7 @@ TSParser ts_parser_make(); void ts_parser_destroy(TSParser *); TSDebugger ts_parser_debugger(const TSParser *); void ts_parser_set_debugger(TSParser *, TSDebugger); -TSTree *ts_parser_parse(TSParser *, TSInput); +TSTree *ts_parser_parse(TSParser *, TSInput, TSTree *); #ifdef __cplusplus }