Add ability to edit multiple times between parses

This commit is contained in:
Max Brunsfeld 2015-09-18 23:20:06 -07:00
parent 6254f45c1b
commit f37f73f92f
13 changed files with 135 additions and 119 deletions

View file

@ -51,6 +51,7 @@ const char *ts_node_name(TSNode, const TSDocument *);
const char *ts_node_string(TSNode, const TSDocument *);
bool ts_node_eq(TSNode, TSNode);
bool ts_node_is_named(TSNode);
bool ts_node_has_changes(TSNode);
TSNode ts_node_parent(TSNode);
TSNode ts_node_child(TSNode, size_t);
TSNode ts_node_named_child(TSNode, size_t);
@ -70,9 +71,10 @@ void ts_document_set_language(TSDocument *, const TSLanguage *);
TSInput ts_document_input(TSDocument *);
void ts_document_set_input(TSDocument *, TSInput);
void ts_document_set_input_string(TSDocument *, const char *);
void ts_document_edit(TSDocument *, TSInputEdit);
TSDebugger ts_document_debugger(const TSDocument *);
void ts_document_set_debugger(TSDocument *, TSDebugger);
void ts_document_parse(TSDocument *);
void ts_document_edit(TSDocument *, TSInputEdit);
TSNode ts_document_root_node(const TSDocument *);
size_t ts_document_parse_count(const TSDocument *);

View file

@ -1,14 +1,17 @@
#include "runtime/debugger.h"
#include "runtime/runtime_spec_helper.h"
#include "runtime/helpers/tree_helpers.h"
#include "runtime/debugger.h"
#include "runtime/helpers/spy_debugger.h"
#include "runtime/helpers/spy_input.h"
extern "C" const TSLanguage * ts_language_json();
extern "C" const TSLanguage * ts_language_javascript();
START_TEST
describe("Document", [&]() {
TSDocument *doc;
TSNode root;
before_each([&]() {
doc = ts_document_make();
@ -18,61 +21,79 @@ describe("Document", [&]() {
ts_document_free(doc);
});
describe("set_input(TSInput)", [&]() {
describe("when the language is set", [&]() {
before_each([&]() {
ts_document_set_language(doc, ts_language_json());
});
describe("set_input(input)", [&]() {
SpyInput *spy_input;
it("parses the document", [&]() {
ts_document_set_input_string(doc, "{ \"key\": [1, 2] }");
before_each([&]() {
ts_document_set_language(doc, ts_language_json());
ts_document_set_input_string(doc, "{\"key\": [1, 2]}");
ts_document_parse(doc);
AssertThat(ts_node_string(ts_document_root_node(doc), doc), Equals(
"(object (string) (array (number) (number)))"));
});
root = ts_document_root_node(doc);
AssertThat(ts_node_string(root, doc), Equals(
"(object (string) (array (number) (number)))"));
spy_input = new SpyInput("{\"key\": [null, 2]}", 3);
});
describe("when the language is not set", [&]() {
it("does not try to parse the document", [&]() {
ts_document_set_input_string(doc, "{ \"key\": [1, 2] }");
AssertThat(ts_document_root_node(doc).data, Equals<void *>(nullptr));
});
after_each([&]() {
delete spy_input;
});
it("allows the input to be retrieved later", [&]() {
auto spy_input = new SpyInput("12345", 3);
ts_document_set_input(doc, spy_input->input());
AssertThat(ts_document_input(doc).payload, Equals<void *>(spy_input));
delete spy_input;
AssertThat(ts_document_input(doc).read_fn, Equals(spy_input->input().read_fn));
AssertThat(ts_document_input(doc).seek_fn, Equals(spy_input->input().seek_fn));
});
it("does not assume that the document's text has changed", [&]() {
ts_document_set_input(doc, spy_input->input());
AssertThat(ts_document_root_node(doc), Equals<TSNode>(root));
AssertThat(ts_node_has_changes(root), IsFalse());
AssertThat(spy_input->strings_read, Equals(vector<string>({ "" })));
});
it("reads text from the new input for future parses", [&]() {
ts_document_set_input(doc, spy_input->input());
// Insert 'null', delete '1'.
ts_document_edit(doc, {strlen("{\"key\": ["), 4, 1});
ts_document_parse(doc);
TSNode new_root = ts_document_root_node(doc);
AssertThat(ts_node_string(new_root, doc), Equals(
"(object (string) (array (null) (number)))"));
AssertThat(spy_input->strings_read, Equals(vector<string>({" [null, 2", ""})));
});
});
describe("set_language(TSLanguage)", [&]() {
describe("when the input is not set", [&]() {
it("does not try to parse the document", [&]() {
ts_document_set_language(doc, ts_language_json());
AssertThat(ts_document_root_node(doc).data, Equals<void *>(nullptr));
});
describe("set_language(language)", [&]() {
before_each([&]() {
ts_document_set_input_string(doc, "{\"key\": [1, 2]}\n");
});
describe("when the input is set", [&]() {
before_each([&]() {
ts_document_set_input_string(doc, "{ \"key\": [1, 2] }");
});
it("parses the document", [&]() {
ts_document_set_language(doc, ts_language_json());
AssertThat(ts_node_string(ts_document_root_node(doc), doc), Equals(
"(object (string) (array (number) (number)))"));
});
});
it("allows the language to be retrieved later", [&]() {
it("uses the given language for future parses", [&]() {
ts_document_set_language(doc, ts_language_json());
AssertThat(ts_document_language(doc), Equals(ts_language_json()));
ts_document_parse(doc);
root = ts_document_root_node(doc);
AssertThat(ts_node_string(root, doc), Equals(
"(object (string) (array (number) (number)))"));
});
it("clears out any previous tree", [&]() {
ts_document_set_language(doc, ts_language_json());
ts_document_parse(doc);
ts_document_set_language(doc, ts_language_javascript());
AssertThat(ts_document_root_node(doc).data, Equals<void *>(nullptr));
ts_document_parse(doc);
root = ts_document_root_node(doc);
AssertThat(ts_node_string(root, doc), Equals(
"(program (expression_statement "
"(object (pair (string) (array (number) (number))))))"));
});
});
@ -82,11 +103,12 @@ describe("Document", [&]() {
before_each([&]() {
debugger = new SpyDebugger();
ts_document_set_language(doc, ts_language_json());
ts_document_set_debugger(doc, debugger->debugger());
ts_document_set_input_string(doc, "[1, 2]");
});
it("calls the debugger with a message for each lex action", [&]() {
ts_document_set_input_string(doc, "[1, 2]");
ts_document_set_debugger(doc, debugger->debugger());
ts_document_parse(doc);
AssertThat(debugger->messages, Contains("lookahead char:'1'"));
AssertThat(debugger->messages, Contains("advance state:1"));
@ -94,7 +116,8 @@ describe("Document", [&]() {
});
it("calls the debugger with a message for each parse action", [&]() {
ts_document_set_input_string(doc, "[1, 2]");
ts_document_set_debugger(doc, debugger->debugger());
ts_document_parse(doc);
AssertThat(debugger->messages, Contains("new_parse"));
AssertThat(debugger->messages, Contains("lookahead char:'['"));
@ -103,16 +126,18 @@ describe("Document", [&]() {
});
it("allows the debugger to be retrieved later", [&]() {
ts_document_set_debugger(doc, debugger->debugger());
AssertThat(ts_document_debugger(doc).payload, Equals(debugger));
});
describe("disabling debugging", [&]() {
before_each([&]() {
ts_document_set_debugger(doc, debugger->debugger());
ts_document_set_debugger(doc, ts_debugger_null());
});
it("does not call the debugger any more", [&]() {
ts_document_set_input_string(doc, "[1, 2]");
ts_document_parse(doc);
AssertThat(debugger->messages, IsEmpty());
});
});

View file

@ -33,12 +33,12 @@ static int spy_seek(void *data, TSLength byte_offset) {
}
SpyInput::SpyInput(string content, size_t chars_per_chunk) :
content(content),
chars_per_chunk(chars_per_chunk),
buffer_size(4 * chars_per_chunk),
buffer(new char[buffer_size]),
byte_offset(0),
strings_read({ "" }) {}
content(content),
strings_read({""}) {}
SpyInput::~SpyInput() {
delete buffer;
@ -73,7 +73,8 @@ const char * SpyInput::read(size_t *bytes_read) {
}
int SpyInput::seek(size_t pos) {
strings_read.push_back("");
if (strings_read.size() == 0 || strings_read.back().size() > 0)
strings_read.push_back("");
byte_offset = pos;
return 0;
}

View file

@ -6,6 +6,11 @@
#include "tree_sitter/runtime.h"
class SpyInput {
size_t chars_per_chunk;
size_t buffer_size;
char *buffer;
size_t byte_offset;
public:
SpyInput(std::string content, size_t chars_per_chunk);
~SpyInput();
@ -18,10 +23,6 @@ class SpyInput {
int seek(size_t position);
std::string content;
size_t chars_per_chunk;
size_t buffer_size;
char *buffer;
size_t byte_offset;
std::vector<std::string> strings_read;
};

View file

@ -1,6 +1,6 @@
#include "runtime/helpers/tree_helpers.h"
const char *symbol_names[24] = {
static const char *symbol_names[24] = {
"ERROR", "END", "two", "three", "four", "five", "six", "seven", "eight",
"nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
"sixteen", "seventeen", "eighteen", "nineteen", "twenty", "twenty-one",
@ -14,18 +14,6 @@ TSTree ** tree_array(std::vector<TSTree *> trees) {
return result;
}
EqualsTree::EqualsTree(const TSTree *expected, const char **symbol_names)
: expected(expected), symbol_names(symbol_names) {}
bool EqualsTree::Matches(const TSTree *actual) const {
return ts_tree_eq(actual, expected);
}
std::ostream &operator<<(std::ostream &stream, const EqualsTree &matcher) {
stream << std::string("equals tree: ") << std::string(ts_tree_string(matcher.expected, matcher.symbol_names));
return stream;
}
std::ostream &operator<<(std::ostream &stream, const TSTree *tree) {
return stream << std::string(ts_tree_string(tree, symbol_names));;
}
@ -34,3 +22,7 @@ std::ostream &operator<<(std::ostream &stream, const TSNode node) {
return stream << std::string("{") << (const TSTree *)node.data <<
std::string(", ") << std::to_string(ts_node_pos(node).chars) << std::string("}");
}
bool operator==(const TSNode &left, const TSNode &right) {
return ts_node_eq(left, right);
}

View file

@ -5,19 +5,10 @@
#include <vector>
#include <string>
extern const char *symbol_names[24];
TSTree ** tree_array(std::vector<TSTree *> trees);
struct EqualsTree {
EqualsTree(const TSTree *expected, const char **symbol_names);
bool Matches(const TSTree *actual) const;
const TSTree *expected;
const char **symbol_names;
};
std::ostream &operator<<(std::ostream &stream, const EqualsTree &matcher);
std::ostream &operator<<(std::ostream &stream, const TSTree *tree);
std::ostream &operator<<(std::ostream &stream, const TSNode ref);
std::ostream &operator<<(std::ostream &stream, const TSNode node);
bool operator==(const TSNode &left, const TSNode &right);
#endif // HELPERS_TREE_HELPERS_H_

View file

@ -39,6 +39,7 @@ describe("Languages", [&]() {
it(("parses " + entry.description).c_str(), [&]() {
ts_document_set_input_string(doc, entry.input.c_str());
ts_document_parse(doc);
expect_the_correct_tree();
});
@ -50,15 +51,18 @@ describe("Languages", [&]() {
it(("handles random insertions in " + entry.description).c_str(), [&]() {
SpyInput reader(entry.input, 3);
ts_document_set_input(doc, reader.input());
ts_document_parse(doc);
string garbage("%^&*");
size_t position = entry.input.size() / 2;
reader.insert(position, garbage);
ts_document_edit(doc, { position, garbage.size(), 0 });
ts_document_parse(doc);
reader.erase(position, garbage.size());
ts_document_edit(doc, { position, 0, garbage.size() });
ts_document_parse(doc);
expect_the_correct_tree();
});
@ -66,15 +70,18 @@ describe("Languages", [&]() {
it(("handles random deletions in " + entry.description).c_str(), [&]() {
SpyInput reader(entry.input, 3);
ts_document_set_input(doc, reader.input());
ts_document_parse(doc);
size_t position = entry.input.size() / 2;
string removed = entry.input.substr(position);
reader.erase(position, removed.size());
ts_document_edit(doc, { position, 0, removed.size() });
ts_document_parse(doc);
reader.insert(position, removed);
ts_document_edit(doc, { position, removed.size(), 0 });
ts_document_parse(doc);
expect_the_correct_tree();
});

View file

@ -1,4 +1,5 @@
#include "runtime/runtime_spec_helper.h"
#include "runtime/helpers/tree_helpers.h"
extern "C" TSLanguage * ts_language_json();
@ -12,6 +13,8 @@ describe("Node", []() {
document = ts_document_make();
ts_document_set_language(document, ts_language_json());
ts_document_set_input_string(document, " [123, false, {\"x\": null}]");
ts_document_parse(document);
array_node = ts_document_root_node(document);
AssertThat(ts_node_string(array_node, document), Equals(
"(array "
@ -268,7 +271,3 @@ describe("Node", []() {
});
END_TEST
bool operator==(const TSNode &left, const TSNode &right) {
return ts_node_eq(left, right);
}

View file

@ -29,6 +29,8 @@ describe("Parser", [&]() {
auto set_text = [&](const char *text) {
input = new SpyInput(text, chunk_size);
ts_document_set_input(doc, input->input());
ts_document_parse(doc);
root = ts_document_root_node(doc);
AssertThat(ts_node_size(root).bytes + ts_node_pos(root).bytes, Equals(strlen(text)));
input->clear();
@ -38,6 +40,7 @@ describe("Parser", [&]() {
size_t prev_size = ts_node_size(root).bytes + ts_node_pos(root).bytes;
AssertThat(input->insert(position, text), IsTrue());
ts_document_edit(doc, { position, text.length(), 0 });
ts_document_parse(doc);
root = ts_document_root_node(doc);
size_t new_size = ts_node_size(root).bytes + ts_node_pos(root).bytes;
@ -48,6 +51,7 @@ describe("Parser", [&]() {
size_t prev_size = ts_node_size(root).bytes + ts_node_pos(root).bytes;
AssertThat(input->erase(position, length), IsTrue());
ts_document_edit(doc, { position, 0, length });
ts_document_parse(doc);
root = ts_document_root_node(doc);
size_t new_size = ts_node_size(root).bytes + ts_node_pos(root).bytes;
@ -60,6 +64,7 @@ describe("Parser", [&]() {
AssertThat(input->insert(position, new_text), IsTrue());
ts_document_edit(doc, { position, new_text.size(), length });
ts_document_parse(doc);
root = ts_document_root_node(doc);
size_t new_size = ts_node_size(root).bytes + ts_node_pos(root).bytes;

View file

@ -19,24 +19,13 @@ void ts_document_free(TSDocument *document) {
free(document);
}
static void ts_document__reparse(TSDocument *document) {
if (document->input.read_fn && document->parser.language) {
TSTree *tree = ts_parser_parse(&document->parser, document->input);
if (document->tree)
ts_tree_release(document->tree);
document->tree = tree;
ts_tree_retain(tree);
document->parse_count++;
}
}
const TSLanguage *ts_document_language(TSDocument *document) {
return document->parser.language;
}
void ts_document_set_language(TSDocument *document, const TSLanguage *language) {
document->parser.language = language;
ts_document__reparse(document);
document->tree = NULL;
}
TSDebugger ts_document_debugger(const TSDocument *document) {
@ -53,7 +42,10 @@ TSInput ts_document_input(TSDocument *document) {
void ts_document_set_input(TSDocument *document, TSInput input) {
document->input = input;
ts_document__reparse(document);
}
void ts_document_set_input_string(TSDocument *document, const char *text) {
ts_document_set_input(document, ts_string_input_make(text));
}
void ts_document_edit(TSDocument *document, TSInputEdit edit) {
@ -64,16 +56,18 @@ void ts_document_edit(TSDocument *document, TSInputEdit edit) {
edit.chars_removed = max_chars - edit.position;
ts_tree_edit(document->tree, edit);
ts_document__reparse(document);
}
const char *ts_document_symbol_name(const TSDocument *document,
const TSTree *tree) {
return document->parser.language->symbol_names[tree->symbol];
}
void ts_document_set_input_string(TSDocument *document, const char *text) {
ts_document_set_input(document, ts_string_input_make(text));
void ts_document_parse(TSDocument *document) {
if (document->input.read_fn && document->parser.language) {
TSTree *tree =
ts_parser_parse(&document->parser, document->input, document->tree);
if (document->tree)
ts_tree_release(document->tree);
document->tree = tree;
ts_tree_retain(tree);
document->parse_count++;
}
}
TSNode ts_document_root_node(const TSDocument *document) {

View file

@ -178,6 +178,10 @@ bool ts_node_is_named(TSNode this) {
return ts_node__tree(this)->options.type == TSNodeTypeNamed;
}
bool ts_node_has_changes(TSNode this) {
return ts_node__tree(this)->options.has_changes;
}
TSNode ts_node_parent(TSNode this) {
const TSTree *tree = ts_node__tree(this);
TSLength position = ts_node__offset(this);

View file

@ -255,22 +255,21 @@ static bool ts_parser__handle_error(TSParser *parser, int head) {
}
}
static void ts_parser__start(TSParser *parser, TSInput input) {
parser->lexer.input = input;
ts_lexer_reset(&parser->lexer, ts_length_zero());
parser->previous_tree = ts_stack_top_tree(parser->stack, 0);
if (parser->previous_tree) {
static void ts_parser__start(TSParser *parser, TSInput input,
TSTree *previous_tree) {
if (previous_tree) {
DEBUG("parse_after_edit");
ts_tree_retain(parser->previous_tree);
} else {
DEBUG("new_parse");
}
parser->reusable_subtree = parser->previous_tree;
parser->lexer.input = input;
ts_lexer_reset(&parser->lexer, ts_length_zero());
ts_stack_clear(parser->stack);
parser->reusable_subtree = previous_tree;
parser->reusable_subtree_pos = 0;
parser->lookahead = NULL;
parser->is_verifying = false;
ts_stack_clear(parser->stack);
}
static TSTree *ts_parser__finish(TSParser *parser) {
@ -282,7 +281,6 @@ static TSTree *ts_parser__finish(TSParser *parser) {
TSTree *root = trees[extra_count];
ts_tree_prepend_children(root, extra_count, trees);
ts_stack_push(parser->stack, 0, 0, root);
return root;
}
@ -389,7 +387,6 @@ TSParser ts_parser_make() {
NULL, ts_parser__select_tree,
}),
.lookahead = NULL,
.is_verifying = false,
};
}
@ -407,8 +404,8 @@ void ts_parser_set_debugger(TSParser *parser, TSDebugger debugger) {
parser->lexer.debugger = debugger;
}
TSTree *ts_parser_parse(TSParser *parser, TSInput input) {
ts_parser__start(parser, input);
TSTree *ts_parser_parse(TSParser *parser, TSInput input, TSTree *previous_tree) {
ts_parser__start(parser, input, previous_tree);
for (;;) {
ts_parser__get_next_lookahead(parser, false);

View file

@ -11,10 +11,8 @@ typedef struct {
TSLexer lexer;
Stack *stack;
TSTree *lookahead;
TSTree *previous_tree;
TSTree *reusable_subtree;
size_t reusable_subtree_pos;
bool is_verifying;
const TSLanguage *language;
} TSParser;
@ -22,7 +20,7 @@ TSParser ts_parser_make();
void ts_parser_destroy(TSParser *);
TSDebugger ts_parser_debugger(const TSParser *);
void ts_parser_set_debugger(TSParser *, TSDebugger);
TSTree *ts_parser_parse(TSParser *, TSInput);
TSTree *ts_parser_parse(TSParser *, TSInput, TSTree *);
#ifdef __cplusplus
}