Implement ts_document_parse_and_get_changed_ranges

This commit is contained in:
Max Brunsfeld 2016-09-22 18:02:11 -07:00
parent 3014101104
commit b3140b2689
9 changed files with 274 additions and 141 deletions

View file

@ -23,23 +23,18 @@ static void append_to_scope_sequence(ScopeSequence *sequence,
ScopeStack *current_scopes,
TSNode node, TSDocument *document,
const std::string &text) {
append_text_to_scope_sequence(sequence, current_scopes, text, ts_node_start_byte(node) - sequence->size());
string scope = ts_node_type(node, document);
current_scopes->push_back(scope);
size_t child_count = ts_node_child_count(node);
if (child_count > 0) {
size_t previous_child_end = ts_node_start_char(node);
for (size_t i = 0; i < child_count; i++) {
TSNode child = ts_node_child(node, i);
size_t child_start = ts_node_start_char(child);
size_t spacing = child_start - previous_child_end;
append_text_to_scope_sequence(sequence, current_scopes, text, spacing);
append_to_scope_sequence(sequence, current_scopes, child, document, text);
previous_child_end = ts_node_end_char(child);
}
size_t spacing = ts_node_end_char(node) - previous_child_end;
append_text_to_scope_sequence(sequence, current_scopes, text, spacing);
} else {
size_t length = ts_node_end_char(node) - ts_node_start_char(node);
size_t length = ts_node_end_byte(node) - ts_node_start_byte(node);
append_text_to_scope_sequence(sequence, current_scopes, text, length);
}
current_scopes->pop_back();
@ -50,7 +45,6 @@ ScopeSequence build_scope_sequence(TSDocument *document, const std::string &text
ScopeStack current_scopes;
TSNode node = ts_document_root_node(document);
append_to_scope_sequence(&sequence, &current_scopes, node, document, text);
AssertThat(sequence.size(), Equals(text.size()));
return sequence;
}
@ -66,7 +60,7 @@ bool operator<=(const TSPoint &left, const TSPoint &right) {
void verify_changed_ranges(const ScopeSequence &old_sequence, const ScopeSequence &new_sequence,
const string &text, TSRange *ranges, size_t range_count) {
TSPoint current_position = {0, 0};
for (size_t i = 0; i < text.size(); i++) {
for (size_t i = 0; i < old_sequence.size(); i++) {
if (text[i] == '\n') {
current_position.row++;
current_position.column = 0;
@ -89,6 +83,7 @@ void verify_changed_ranges(const ScopeSequence &old_sequence, const ScopeSequenc
std::stringstream message_stream;
message_stream << "Found changed scope outside of any invalidated range;\n";
message_stream << "Position: " << current_position << "\n";
message_stream << "Byte index: " << i << "\n";
size_t line_start_index = i - current_position.column;
size_t line_end_index = text.find_first_of('\n', i);
message_stream << "Line: " << text.substr(line_start_index, line_end_index - line_start_index) << "\n";
@ -99,7 +94,7 @@ void verify_changed_ranges(const ScopeSequence &old_sequence, const ScopeSequenc
message_stream << "New scopes: " << new_scopes << "\n";
message_stream << "Invalidated ranges:\n";
for (size_t j = 0; j < range_count; j++) {
message_stream << " " << ranges[i] << "\n";
message_stream << " " << ranges[j] << "\n";
}
Assert::Failure(message_stream.str());
}

View file

@ -8,6 +8,7 @@
#include "helpers/encoding_helpers.h"
#include "helpers/record_alloc.h"
#include "helpers/random_helpers.h"
#include "helpers/scope_sequence.h"
#include <set>
static void assert_correct_tree_shape(const TSDocument *document, string tree_string) {
@ -139,7 +140,16 @@ describe("The Corpus", []() {
ts_document_edit(document, input->undo());
assert_correct_tree_size(document, input->content);
ts_document_parse(document);
TSRange *ranges;
size_t range_count;
ScopeSequence old_scope_sequence = build_scope_sequence(document, input->content);
ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count);
ScopeSequence new_scope_sequence = build_scope_sequence(document, input->content);
verify_changed_ranges(old_scope_sequence, new_scope_sequence,
input->content, ranges, range_count);
ts_free(ranges);
});
}

View file

@ -76,7 +76,6 @@ describe("Document", [&]() {
ts_document_set_input(doc, spy_input->input());
ts_document_invalidate(doc);
ts_document_parse(doc);
TSNode root_node = ts_document_root_node(doc);
});
it("allows the input to be retrieved later", [&]() {
@ -211,7 +210,7 @@ describe("Document", [&]() {
});
});
describe("parse_and_get_changed_ranges()", [&]() {
describe("parse_and_get_changed_ranges()", [&]() {
SpyInput *input;
before_each([&]() {
@ -234,6 +233,7 @@ describe("Document", [&]() {
TSRange *ranges;
size_t range_count = 0;
ts_document_parse_and_get_changed_ranges(doc, &ranges, &range_count);
vector<TSRange> result;
@ -333,13 +333,18 @@ describe("Document", [&]() {
it("reports changes when trees have been wrapped", [&]() {
// Wrap the object in an assignment expression.
auto ranges = get_ranges([&]() {
return input->replace(0, 0, "x.y = ");
return input->replace(input->content.find("null"), 0, "b === ");
});
assert_node_string_equals(
ts_document_root_node(doc),
"(program (expression_statement (object "
"(pair (identifier) (rel_op (identifier) (null))))))");
AssertThat(ranges, Equals(vector<TSRange>({
TSRange{
TSPoint{0, 0},
TSPoint{0, input->content.find(";")},
TSPoint{0, input->content.find("b ===")},
TSPoint{0, input->content.find("}")},
},
})));
});

View file

@ -91,118 +91,210 @@ void ts_document_edit(TSDocument *self, TSInputEdit edit) {
typedef Array(TSRange) RangeArray;
#define NAME(t) ((t) ? (ts_language_symbol_name(doc->parser.language, ((TSTree *)(t))->symbol)) : "<NULL>")
// #define PRINT(msg, ...) for (size_t k = 0; k < depth; k++) { printf(" "); } printf(msg "\n", __VA_ARGS__);
#define PRINT(msg, ...)
#define NAME(t) \
((t) \
? (ts_language_symbol_name(doc->parser.language, ((TSTree *)(t))->symbol)) \
: "<NULL>")
static bool push_diff(RangeArray *results, TSNode *node, bool *extend_last_change) {
TSPoint start = ts_node_start_point(*node);
TSPoint end = ts_node_end_point(*node);
if (*extend_last_change) {
static bool push_change(RangeArray *results, TSPoint start, TSPoint end) {
if (results->size > 0) {
TSRange *last_range = array_back(results);
last_range->end = end;
return true;
}
*extend_last_change = true;
return array_push(results, ((TSRange){start, end}));
}
static bool ts_tree_get_changes(TSDocument *doc, TSTree *old, TSNode *new_node,
size_t depth, RangeArray *results,
bool *extend_last_change) {
TSTree *new = (TSTree *)(new_node->data);
PRINT("At %lu, ('%s', %lu) vs ('%s', %lu) {",
ts_node_start_byte(*new_node),
NAME(old), old->size.bytes,
NAME(new), new->size.bytes);
if (old->visible) {
if (old == new || (old->symbol == new->symbol &&
old->size.bytes == new->size.bytes && !old->has_changes)) {
*extend_last_change = false;
PRINT("}", NULL);
return true;
}
if (old->symbol != new->symbol) {
PRINT("}", NULL);
return push_diff(results, new_node, extend_last_change);
}
TSNode child = ts_node_child(*new_node, 0);
if (child.data) {
*new_node = child;
} else {
PRINT("}", NULL);
if (ts_point_lte(start, last_range->end)) {
last_range->end = end;
return true;
}
}
depth++;
size_t old_child_start;
size_t old_child_end = ts_node_start_byte(*new_node) - old->padding.bytes;
for (size_t j = 0; j < old->child_count; j++) {
TSTree *old_child = old->children[j];
if (old_child->padding.bytes == 0 && old_child->size.bytes == 0)
continue;
old_child_start = old_child_end + old_child->padding.bytes;
old_child_end = old_child_start + old_child->size.bytes;
while (true) {
size_t new_child_start = ts_node_start_byte(*new_node);
if (new_child_start < old_child_start) {
PRINT("skip new:('%s', %lu), old:('%s', %lu), old_parent:%s",
NAME(new_node->data), ts_node_start_byte(*new_node), NAME(old_child),
old_child_start, NAME(old));
if (!push_diff(results, new_node, extend_last_change))
return false;
TSNode next = ts_node_next_sibling(*new_node);
if (next.data) {
PRINT("advance before diff ('%s', %lu) -> ('%s', %lu)",
NAME(new_node->data), ts_node_start_byte(*new_node), NAME(next.data),
ts_node_start_byte(next));
*new_node = next;
} else {
break;
}
} else if (new_child_start == old_child_start) {
if (!ts_tree_get_changes(doc, old_child, new_node, depth, results, extend_last_change))
return false;
if (old_child->visible) {
TSNode next = ts_node_next_sibling(*new_node);
if (next.data) {
PRINT("advance after diff ('%s', %lu) -> ('%s', %lu)",
NAME(new_node->data), ts_node_start_byte(*new_node), NAME(next.data),
ts_node_start_byte(next));
*new_node = next;
}
}
break;
} else {
break;
}
}
if (ts_point_lt(start, end)) {
TSRange range = { start, end };
return array_push(results, range);
}
depth--;
if (old->visible) {
*new_node = ts_node_parent(*new_node);
}
PRINT("}", NULL);
return true;
}
int ts_document_parse_and_get_changed_ranges(TSDocument *self, TSRange **ranges, size_t *range_count) {
if (ranges) *ranges = NULL;
if (range_count) *range_count = 0;
static bool tree_path_descend(TreePath *path, TSPoint position) {
bool did_descend;
do {
did_descend = false;
TreePathEntry entry = *array_back(path);
TSLength child_position = entry.position;
for (size_t i = 0; i < entry.tree->child_count; i++) {
TSTree *child = entry.tree->children[i];
TSLength child_right_position =
ts_length_add(child_position, ts_tree_total_size(child));
if (ts_point_lt(position, child_right_position.extent)) {
TreePathEntry child_entry = { child, child_position, i };
if (child->visible) {
array_push(path, child_entry);
return true;
} else if (child->visible_child_count > 0) {
array_push(path, child_entry);
did_descend = true;
break;
}
}
child_position = child_right_position;
}
} while (did_descend);
return false;
}
static size_t tree_path_advance(TreePath *path) {
size_t ascend_count = 0;
while (path->size > 0) {
TreePathEntry entry = array_pop(path);
if (path->size == 0)
break;
TreePathEntry parent_entry = *array_back(path);
if (parent_entry.tree->visible) {
ascend_count++;
}
TSLength position =
ts_length_add(entry.position, ts_tree_total_size(entry.tree));
for (size_t i = entry.child_index + 1, n = parent_entry.tree->child_count;
i < n; i++) {
TSTree *next_child = parent_entry.tree->children[i];
if (next_child->visible || next_child->visible_child_count > 0) {
if (parent_entry.tree->visible) {
ascend_count--;
}
array_push(path,
((TreePathEntry){
.tree = next_child, .child_index = i, .position = position,
}));
if (!next_child->visible)
tree_path_descend(path, (TSPoint){ 0, 0 });
return ascend_count;
}
position = ts_length_add(position, ts_tree_total_size(next_child));
}
}
return ascend_count;
}
static void tree_path_ascend(TreePath *path, size_t count) {
for (size_t i = 0; i < count; i++) {
do {
array_pop(path);
} while (path->size > 0 && !array_back(path)->tree->visible);
}
}
static void tree_path_init(TreePath *path, TSTree *tree) {
array_clear(path);
array_push(path,
((TreePathEntry){
.tree = tree, .position = { 0, 0, { 0, 0 } }, .child_index = 0,
}));
if (!tree->visible)
tree_path_descend(path, (TSPoint){ 0, 0 });
}
static bool ts_tree_get_changes(TSDocument *doc, TreePath *old_path,
TreePath *new_path, size_t depth,
RangeArray *results) {
TSPoint position = { 0, 0 };
while (old_path->size && new_path->size) {
bool is_different = false;
TSPoint next_position = position;
TreePathEntry old_entry = *array_back(old_path);
TreePathEntry new_entry = *array_back(new_path);
TSTree *old_tree = old_entry.tree;
TSTree *new_tree = new_entry.tree;
TSSymbol old_symbol = old_tree->symbol;
TSSymbol new_symbol = new_tree->symbol;
size_t old_start_byte = old_entry.position.bytes;
size_t new_start_byte = new_entry.position.bytes;
size_t old_end_byte = old_start_byte + ts_tree_total_bytes(old_tree);
size_t new_end_byte = new_start_byte + ts_tree_total_bytes(new_tree);
TSPoint old_start_point =
ts_point_add(old_entry.position.extent, old_tree->padding.extent);
TSPoint new_start_point =
ts_point_add(new_entry.position.extent, new_tree->padding.extent);
TSPoint old_end_point = ts_point_add(old_start_point, old_tree->size.extent);
TSPoint new_end_point = ts_point_add(new_start_point, new_tree->size.extent);
// printf("At [%-2lu, %-2lu] Compare (%-20s\t [%-2lu, %-2lu] - [%lu, %lu])\tvs\t(%-20s\t [%lu, %lu] - [%lu, %lu])\t",
// position.row, position.column, NAME(old_tree), old_start_point.row,
// old_start_point.column, old_end_point.row, old_end_point.column,
// NAME(new_tree), new_start_point.row, new_start_point.column,
// new_end_point.row, new_end_point.column);
if (ts_point_lt(position, old_start_point)) {
if (ts_point_lt(position, new_start_point)) {
next_position = ts_point_min(old_start_point, new_start_point);
} else {
is_different = true;
next_position = old_start_point;
}
} else if (ts_point_lt(position, new_start_point)) {
is_different = true;
next_position = new_start_point;
} else {
if (old_tree == new_tree ||
(!old_tree->has_changes && old_symbol == new_symbol &&
old_start_byte == new_start_byte && old_end_byte == new_end_byte &&
old_tree->parse_state != TS_TREE_STATE_NONE &&
new_tree->parse_state != TS_TREE_STATE_NONE)) {
next_position = old_end_point;
} else if (old_symbol == new_symbol) {
bool old_descended = tree_path_descend(old_path, position);
bool new_descended = tree_path_descend(new_path, position);
if (old_descended) {
if (!new_descended) {
tree_path_ascend(old_path, 1);
is_different = true;
next_position = new_end_point;
}
} else if (new_descended) {
tree_path_ascend(new_path, 1);
is_different = true;
next_position = old_end_point;
} else {
next_position = ts_point_min(old_end_point, new_end_point);
}
} else {
is_different = true;
next_position = ts_point_min(old_end_point, new_end_point);
}
}
bool advance_old = ts_point_lte(old_end_point, next_position);
bool advance_new = ts_point_lte(new_end_point, next_position);
if (advance_new && advance_old) {
size_t old_ascend_count = tree_path_advance(old_path);
size_t new_ascend_count = tree_path_advance(new_path);
if (old_ascend_count > new_ascend_count) {
tree_path_ascend(new_path, old_ascend_count - new_ascend_count);
} else if (new_ascend_count > old_ascend_count) {
tree_path_ascend(old_path, new_ascend_count - old_ascend_count);
}
} else if (advance_new) {
size_t ascend_count = tree_path_advance(new_path);
tree_path_ascend(old_path, ascend_count);
} else if (advance_old) {
size_t ascend_count = tree_path_advance(old_path);
tree_path_ascend(new_path, ascend_count);
}
if (is_different)
push_change(results, position, next_position);
position = next_position;
}
return true;
}
int ts_document_parse_and_get_changed_ranges(TSDocument *self, TSRange **ranges,
size_t *range_count) {
if (ranges)
*ranges = NULL;
if (range_count)
*range_count = 0;
if (!self->input.read || !self->parser.language)
return -1;
@ -218,15 +310,13 @@ int ts_document_parse_and_get_changed_ranges(TSDocument *self, TSRange **ranges,
if (self->tree) {
TSTree *old_tree = self->tree;
self->tree = tree;
TSNode new_root = ts_document_root_node(self);
// ts_tree_print_dot_graph(old_tree, self->parser.language, stderr);
// ts_tree_print_dot_graph(tree, self->parser.language, stderr);
if (ranges && range_count) {
bool extend_last_change = false;
RangeArray result = {0, 0, 0};
if (!ts_tree_get_changes(self, old_tree, &new_root, 0, &result, &extend_last_change))
RangeArray result = { 0, 0, 0 };
tree_path_init(&self->parser.tree_path1, old_tree);
tree_path_init(&self->parser.tree_path2, tree);
if (!ts_tree_get_changes(self, &self->parser.tree_path1,
&self->parser.tree_path2, 0, &result))
return -1;
*ranges = result.contents;
*range_count = result.size;

View file

@ -18,6 +18,18 @@ static inline TSPoint ts_point_sub(TSPoint a, TSPoint b) {
return (TSPoint){0, a.column - b.column};
}
static inline bool ts_point_lte(TSPoint a, TSPoint b) {
return (a.row < b.row) || (a.row == b.row && a.column <= b.column);
}
static inline bool ts_point_lt(TSPoint a, TSPoint b) {
return (a.row < b.row) || (a.row == b.row && a.column < b.column);
}
static inline bool ts_point_eq(TSPoint a, TSPoint b) {
return a.row == b.row && a.column == b.column;
}
static inline TSPoint ts_point_min(TSPoint a, TSPoint b) {
if (a.row < b.row || (a.row == b.row && a.column < b.column))
return a;
@ -25,6 +37,13 @@ static inline TSPoint ts_point_min(TSPoint a, TSPoint b) {
return b;
}
static inline TSPoint ts_point_max(TSPoint a, TSPoint b) {
if (a.row > b.row || (a.row == b.row && a.column > b.column))
return a;
else
return b;
}
static inline bool ts_length_is_unknown(TSLength self) {
return self.bytes > 0 && self.chars == 0;
}

View file

@ -1226,7 +1226,8 @@ bool parser_init(Parser *self) {
self->finished_tree = NULL;
self->stack = NULL;
array_init(&self->reduce_actions);
array_init(&self->tree_stack);
array_init(&self->tree_path1);
array_init(&self->tree_path2);
self->stack = ts_stack_new();
if (!self->stack)
@ -1252,8 +1253,10 @@ void parser_destroy(Parser *self) {
ts_stack_delete(self->stack);
if (self->reduce_actions.contents)
array_delete(&self->reduce_actions);
if (self->tree_stack.contents)
array_delete(&self->tree_stack);
if (self->tree_path1.contents)
array_delete(&self->tree_path1);
if (self->tree_path2.contents)
array_delete(&self->tree_path2);
}
TSTree *parser_parse(Parser *self, TSInput input, TSTree *old_tree) {
@ -1299,7 +1302,7 @@ TSTree *parser_parse(Parser *self, TSInput input, TSTree *old_tree) {
LOG_TREE();
ts_stack_clear(self->stack);
parser__clear_cached_token(self);
CHECK(ts_tree_assign_parents(self->finished_tree, &self->tree_stack));
CHECK(ts_tree_assign_parents(self->finished_tree, &self->tree_path1));
return self->finished_tree;
error:

View file

@ -26,7 +26,8 @@ typedef struct {
TSTree *cached_token;
size_t cached_token_byte_index;
ReusableNode reusable_node;
TreeArray tree_stack;
TreePath tree_path1;
TreePath tree_path2;
} Parser;
bool parser_init(Parser *);

View file

@ -29,6 +29,7 @@ TSTree *ts_tree_make_leaf(TSSymbol sym, TSLength padding, TSLength size,
.visible = metadata.visible,
.named = metadata.named,
.first_leaf.symbol = sym,
.has_changes = false,
};
return result;
@ -91,12 +92,12 @@ TSTree *ts_tree_make_copy(TSTree *self) {
return result;
}
bool ts_tree_assign_parents(TSTree *self, TreeArray *stack) {
array_clear(stack);
if (!array_push(stack, self))
bool ts_tree_assign_parents(TSTree *self, TreePath *path) {
array_clear(path);
if (!array_push(path, ((TreePathEntry){self, ts_length_zero(), 0})))
return false;
while (stack->size > 0) {
TSTree *tree = array_pop(stack);
while (path->size > 0) {
TSTree *tree = array_pop(path).tree;
TSLength offset = ts_length_zero();
for (size_t i = 0; i < tree->child_count; i++) {
TSTree *child = tree->children[i];
@ -104,7 +105,7 @@ bool ts_tree_assign_parents(TSTree *self, TreeArray *stack) {
child->context.parent = tree;
child->context.index = i;
child->context.offset = offset;
if (!array_push(stack, child))
if (!array_push(path, ((TreePathEntry){child, ts_length_zero(), 0})))
return false;
}
offset = ts_length_add(offset, ts_tree_total_size(child));
@ -458,7 +459,7 @@ char *ts_tree_string(const TSTree *self, const TSLanguage *language,
return result;
}
void ts_tree__print_dot_graph(const TSTree *self, size_t offset,
void ts_tree__print_dot_graph(const TSTree *self, size_t byte_offset,
const TSLanguage *language, FILE *f) {
fprintf(f, "tree_%p [label=\"%s\"", self,
ts_language_symbol_name(language, self->symbol));
@ -469,13 +470,13 @@ void ts_tree__print_dot_graph(const TSTree *self, size_t offset,
fprintf(f, ", fontcolor=gray");
fprintf(f, ", tooltip=\"range:%lu - %lu\nstate:%d\nerror-cost:%u\"]\n",
offset, offset + ts_tree_total_chars(self), self->parse_state,
byte_offset, byte_offset + ts_tree_total_bytes(self), self->parse_state,
self->error_cost);
for (size_t i = 0; i < self->child_count; i++) {
const TSTree *child = self->children[i];
ts_tree__print_dot_graph(child, offset, language, f);
ts_tree__print_dot_graph(child, byte_offset, language, f);
fprintf(f, "tree_%p -> tree_%p [tooltip=%lu]\n", self, child, i);
offset += ts_tree_total_chars(child);
byte_offset += ts_tree_total_bytes(child);
}
}

View file

@ -49,7 +49,16 @@ typedef struct TSTree {
bool has_changes : 1;
} TSTree;
typedef struct {
TSTree *tree;
TSLength position;
size_t child_index;
} TreePathEntry;
typedef Array(TSTree *) TreeArray;
typedef Array(TreePathEntry) TreePath;
bool ts_tree_array_copy(TreeArray, TreeArray *);
void ts_tree_array_delete(TreeArray *);
size_t ts_tree_array_essential_count(const TreeArray *);
@ -67,7 +76,7 @@ int ts_tree_compare(const TSTree *tree1, const TSTree *tree2);
size_t ts_tree_start_column(const TSTree *self);
size_t ts_tree_end_column(const TSTree *self);
void ts_tree_set_children(TSTree *, size_t, TSTree **);
bool ts_tree_assign_parents(TSTree *, TreeArray *);
bool ts_tree_assign_parents(TSTree *, TreePath *);
void ts_tree_edit(TSTree *, const TSInputEdit *edit);
char *ts_tree_string(const TSTree *, const TSLanguage *, bool include_all);
void ts_tree_print_dot_graph(const TSTree *, const TSLanguage *, FILE *);