Store edits in trees, not by splitting stack

This allows for multiple edits per parse, though it is not exposed through
the API yet
This commit is contained in:
Max Brunsfeld 2015-09-13 19:47:45 -07:00
parent 0467d190fe
commit b3d883e128
11 changed files with 169 additions and 333 deletions

View file

@ -106,7 +106,6 @@
'src/runtime/node.c',
'src/runtime/parse_stack.c',
'src/runtime/parser.c',
'src/runtime/stack.c',
'src/runtime/string_input.c',
'src/runtime/tree.c',
'externals/utf8proc/utf8proc.c',

View file

@ -223,7 +223,7 @@ describe("Parser", [&]() {
});
it("re-reads only the changed portion of the input", [&]() {
AssertThat(input->strings_read, Equals(vector<string>({ " abc * 5)" })));
AssertThat(input->strings_read, Equals(vector<string>({ " abc * 5)", "" })));
});
});
@ -251,7 +251,7 @@ describe("Parser", [&]() {
});
it("re-reads only the changed portion of the input", [&]() {
AssertThat(input->strings_read, Equals(vector<string>({ "123 + 5 ", " 4", " ^ (", "" })));
AssertThat(input->strings_read, Equals(vector<string>({ "123 + 5 ", "" })));
});
});

View file

@ -1,52 +0,0 @@
#include "runtime/runtime_spec_helper.h"
#include "runtime/length.h"
#include "runtime/tree.h"
#include "runtime/stack.h"
START_TEST
enum { sym1, sym2, hidden_sym };
describe("stacks", [&]() {
TSStack stack;
before_each([&]() {
stack = ts_stack_make();
});
after_each([&]() {
ts_stack_delete(&stack);
});
it("starts out empty", [&]() {
AssertThat(stack.size, Equals<size_t>(0));
AssertThat(ts_stack_top_state(&stack), Equals(0));
AssertThat(ts_stack_top_node(&stack), Equals((TSTree *)nullptr));
});
describe("pushing a symbol", [&]() {
TSTree *node1;
before_each([&]() {
node1 = ts_tree_make_leaf(
sym1,
ts_length_make(0, 0),
ts_length_make(1, 1),
TSNodeTypeNamed);
ts_stack_push(&stack, 5, node1);
});
after_each([&]() {
ts_tree_release(node1);
});
it("adds the symbol to the stack", [&]() {
AssertThat(stack.size, Equals<size_t>(1));
AssertThat(ts_stack_top_state(&stack), Equals(5));
AssertThat(ts_stack_top_node(&stack), Equals(node1));
});
});
});
END_TEST

View file

@ -19,9 +19,9 @@ void ts_document_free(TSDocument *document) {
free(document);
}
static void ts_document__reparse(TSDocument *document, TSInputEdit *edit) {
static void ts_document__reparse(TSDocument *document) {
if (document->input.read_fn && document->parser.language) {
TSTree *tree = ts_parser_parse(&document->parser, document->input, edit);
TSTree *tree = ts_parser_parse(&document->parser, document->input);
if (document->tree)
ts_tree_release(document->tree);
document->tree = tree;
@ -36,7 +36,7 @@ const TSLanguage *ts_document_language(TSDocument *document) {
void ts_document_set_language(TSDocument *document, const TSLanguage *language) {
document->parser.language = language;
ts_document__reparse(document, NULL);
ts_document__reparse(document);
}
TSDebugger ts_document_debugger(const TSDocument *document) {
@ -53,11 +53,18 @@ TSInput ts_document_input(TSDocument *document) {
void ts_document_set_input(TSDocument *document, TSInput input) {
document->input = input;
ts_document__reparse(document, NULL);
ts_document__reparse(document);
}
void ts_document_edit(TSDocument *document, TSInputEdit edit) {
ts_document__reparse(document, &edit);
size_t max_chars = ts_tree_total_size(document->tree).chars;
if (edit.position > max_chars)
edit.position = max_chars;
if (edit.chars_removed > max_chars - edit.position)
edit.chars_removed = max_chars - edit.position;
ts_tree_edit(document->tree, edit);
ts_document__reparse(document);
}
const char *ts_document_symbol_name(const TSDocument *document,

View file

@ -43,6 +43,11 @@ static void ts_lexer__get_lookahead(TSLexer *lexer) {
static void ts_lexer__start(TSLexer *lexer, TSStateId lex_state) {
DEBUG("start_lex state:%d", lex_state);
DEBUG_LOOKAHEAD();
if (!lexer->chunk)
ts_lexer__get_chunk(lexer);
if (!lexer->lookahead_size)
ts_lexer__get_lookahead(lexer);
}
static void ts_lexer__start_token(TSLexer *lexer) {
@ -98,20 +103,18 @@ TSLexer ts_lexer_make() {
.accept_fn = ts_lexer__accept,
.chunk = NULL,
.chunk_start = 0,
.chunk_size = 0,
.current_position = ts_length_zero(),
.token_start_position = ts_length_zero(),
.token_end_position = ts_length_zero(),
.lookahead = 0,
.lookahead_size = 0,
.debugger = ts_debugger_null(),
};
ts_lexer_reset(&result, ts_length_zero());
return result;
}
void ts_lexer_reset(TSLexer *lexer, TSLength position) {
lexer->token_start_position = position;
lexer->token_end_position = position;
lexer->current_position = position;
ts_lexer__get_chunk(lexer);
ts_lexer__get_lookahead(lexer);
lexer->chunk = 0;
lexer->chunk_size = 0;
lexer->lookahead_size = 0;
lexer->lookahead = 0;
}

View file

@ -4,7 +4,6 @@
#include "tree_sitter/parser.h"
#include "runtime/tree.h"
#include "runtime/lexer.h"
#include "runtime/stack.h"
#include "runtime/parser.h"
#include "runtime/length.h"
#include "runtime/debugger.h"
@ -43,137 +42,92 @@ static TSParseAction ts_language__action(const TSLanguage *language,
return ts_language__actions(language, state, sym)[0];
}
static TSLength ts_parser__break_down_left(TSParser *parser, TSInputEdit edit) {
ts_stack_shrink(&parser->right_stack, 0);
TSLength prev_size =
ts_tree_total_size(ts_parse_stack_top_tree(parser->stack, 0));
parser->total_chars =
prev_size.chars + edit.chars_inserted - edit.chars_removed;
TSLength left_subtree_end = prev_size;
size_t right_subtree_start = parser->total_chars;
for (;;) {
ParseStackEntry *entry = ts_parse_stack_head(parser->stack, 0);
if (!entry)
break;
TSTree *node = entry->tree;
size_t child_count = node->child_count;
TSTree **children = node->children;
if (node->symbol == ts_builtin_sym_error)
child_count = 0;
if (left_subtree_end.chars < edit.position && !children &&
node->symbol != ts_builtin_sym_error)
break;
DEBUG("pop_left sym:%s, state:%u", SYM_NAME(node->symbol),
ts_parse_stack_top_state(parser->stack, 0));
ts_parse_stack_shrink(parser->stack, 0, 1);
left_subtree_end = ts_length_sub(left_subtree_end, ts_tree_total_size(node));
size_t i = 0;
for (; i < child_count && left_subtree_end.chars < edit.position; i++) {
TSTree *child = children[i];
TSStateId state = ts_parse_stack_top_state(parser->stack, 0);
TSParseAction action =
ts_language__action(parser->language, state, child->symbol);
TSStateId next_state =
ts_tree_is_extra(child) ? state : action.data.to_state;
DEBUG("push_left sym:%s, state:%u", SYM_NAME(child->symbol), next_state);
ts_parse_stack_push(parser->stack, 0, next_state, child);
left_subtree_end =
ts_length_add(left_subtree_end, ts_tree_total_size(child));
}
for (size_t j = child_count - 1; j + 1 > i; j--) {
TSTree *child = children[j];
right_subtree_start -= ts_tree_total_size(child).chars;
if (right_subtree_start < edit.position + edit.chars_inserted)
break;
DEBUG("push_right sym:%s", SYM_NAME(child->symbol));
ts_stack_push(&parser->right_stack, 0, child);
}
ts_tree_release(node);
}
DEBUG("reuse_left chars:%lu, state:%u", left_subtree_end.chars,
ts_parse_stack_top_state(parser->stack, 0));
return left_subtree_end;
/*
* Replace parser's reusable_subtree with its first non-fragile descendant.
* Return true if a suitable descendant is found, false otherwise.
*/
static bool ts_parser__breakdown_reusable_subtree(TSParser *parser) {
do {
if (parser->reusable_subtree->symbol == ts_builtin_sym_error)
return false;
if (parser->reusable_subtree->child_count == 0)
return false;
parser->reusable_subtree = parser->reusable_subtree->children[0];
} while (ts_tree_is_fragile(parser->reusable_subtree));
return true;
}
static TSTree *ts_parser__break_down_right(TSParser *parser) {
TSStack *stack = &parser->right_stack;
TSLength current_position = parser->lexer.current_position;
TSStateId state = ts_parse_stack_top_state(parser->stack, 0);
/*
* Replace the parser's reusable_subtree with its largest right neighbor, or
* NULL if no right neighbor exists.
*/
static void ts_parser__pop_reusable_subtree(TSParser *parser) {
parser->reusable_subtree_pos +=
ts_tree_total_size(parser->reusable_subtree).chars;
size_t right_subtree_start =
parser->total_chars - ts_stack_total_tree_size(stack).chars;
for (;;) {
TSTree *node = ts_stack_top_node(stack);
if (!node)
return NULL;
if (right_subtree_start > current_position.chars)
return NULL;
TSParseAction action =
ts_language__action(parser->language, state, node->symbol);
bool is_usable = (action.type != TSParseActionTypeError) &&
!ts_tree_is_extra(node) && !ts_tree_is_empty(node) &&
!ts_tree_is_fragile_left(node) &&
!ts_tree_is_fragile_right(node);
if (is_usable && right_subtree_start == current_position.chars) {
ts_stack_shrink(&parser->right_stack, parser->right_stack.size - 1);
return node;
while (parser->reusable_subtree) {
TSTree *parent = parser->reusable_subtree->context.parent;
size_t next_index = parser->reusable_subtree->context.index + 1;
if (parent && parent->child_count > next_index) {
parser->reusable_subtree = parent->children[next_index];
return;
}
size_t child_count = node->child_count;
TSTree **children = node->children;
DEBUG("pop_right sym:%s", SYM_NAME(node->symbol));
stack->size--;
right_subtree_start += ts_tree_total_size(node).chars;
for (size_t i = child_count - 1; i + 1 > 0; i--) {
if (right_subtree_start <= current_position.chars)
break;
TSTree *child = children[i];
DEBUG("push_right sym:%s", SYM_NAME(child->symbol));
ts_stack_push(stack, 0, child);
right_subtree_start -= ts_tree_total_size(child).chars;
}
ts_tree_release(node);
parser->reusable_subtree = parent;
}
}
static TSTree *ts_parser__next_node(TSParser *parser, TSStateId lex_state) {
TSTree *node;
/*
* Advance the parser's lookahead subtree. If there is a reusable subtree
* at the correct position in the parser's previous tree, use that. Otherwise,
* run the lexer.
*/
static void ts_parser__get_next_lookahead(TSParser *parser, bool error_mode) {
while (!error_mode && parser->reusable_subtree) {
if (parser->reusable_subtree_pos > parser->lexer.current_position.chars) {
break;
}
if ((node = ts_parser__break_down_right(parser))) {
DEBUG("reuse sym:%s, is_extra:%u, size:%lu", SYM_NAME(node->symbol),
ts_tree_is_extra(node), ts_tree_total_size(node).chars);
if (parser->reusable_subtree_pos < parser->lexer.current_position.chars) {
DEBUG("past_reuse sym:%s", SYM_NAME(parser->reusable_subtree->symbol));
ts_parser__pop_reusable_subtree(parser);
continue;
}
parser->lexer.token_start_position =
ts_length_add(parser->lexer.current_position, node->padding);
parser->lexer.token_end_position = parser->lexer.current_position =
ts_length_add(parser->lexer.token_start_position, node->size);
if (ts_tree_has_changes(parser->reusable_subtree) ||
ts_tree_is_fragile(parser->reusable_subtree) ||
ts_tree_is_extra(parser->reusable_subtree)) {
DEBUG("breakdown sym:%s", SYM_NAME(parser->reusable_subtree->symbol));
if (!ts_parser__breakdown_reusable_subtree(parser)) {
DEBUG("cant_reuse sym:%s", SYM_NAME(parser->reusable_subtree->symbol));
ts_parser__pop_reusable_subtree(parser);
}
continue;
}
parser->lexer.lookahead = 0;
parser->lexer.lookahead_size = 0;
parser->lexer.advance_fn(&parser->lexer, 0);
} else {
node = parser->language->lex_fn(&parser->lexer, lex_state);
TSStateId top_state = ts_parse_stack_top_state(parser->stack, 0);
TSSymbol symbol = parser->reusable_subtree->symbol;
if (ts_language__action(parser->language, top_state, symbol).type ==
TSParseActionTypeError) {
ts_parser__pop_reusable_subtree(parser);
continue;
}
parser->lookahead = parser->reusable_subtree;
TSLength size = ts_tree_total_size(parser->lookahead);
DEBUG("reuse sym:%s size:%lu extra:%d", SYM_NAME(parser->lookahead->symbol),
size.chars, parser->lookahead->options.extra);
ts_lexer_reset(&parser->lexer,
ts_length_add(parser->lexer.current_position, size));
ts_parser__pop_reusable_subtree(parser);
return;
}
return node;
TSStateId lex_state =
error_mode
? ts_lex_state_error
: parser->language->lex_states[ts_parse_stack_top_state(parser->stack, 0)];
parser->lookahead = parser->language->lex_fn(&parser->lexer, lex_state);
}
/*
@ -289,7 +243,7 @@ static bool ts_parser__handle_error(TSParser *parser, int head) {
DEBUG("skip token:%s", SYM_NAME(parser->lookahead->symbol));
ts_parser__shift(parser, head,
ts_parse_stack_top_state(parser->stack, head));
parser->lookahead = ts_parser__next_node(parser, ts_lex_state_error);
ts_parser__get_next_lookahead(parser, true);
error_token_count++;
/*
@ -303,6 +257,24 @@ static bool ts_parser__handle_error(TSParser *parser, int head) {
}
}
static void ts_parser__start(TSParser *parser, TSInput input) {
parser->lexer.input = input;
ts_lexer_reset(&parser->lexer, ts_length_zero());
parser->previous_tree = ts_parse_stack_top_tree(parser->stack, 0);
if (parser->previous_tree) {
DEBUG("parse_after_edit");
ts_tree_retain(parser->previous_tree);
} else {
DEBUG("new_parse");
}
parser->reusable_subtree = parser->previous_tree;
parser->reusable_subtree_pos = 0;
parser->lookahead = NULL;
parser->is_verifying = false;
ts_parse_stack_clear(parser->stack);
}
static TSTree *ts_parser__finish(TSParser *parser) {
ParseStackPopResult pop_result =
ts_parse_stack_pop(parser->stack, 0, -1, true).contents[0];
@ -404,24 +376,27 @@ static ParserNextResult ts_parser__next(TSParser *parser, int head_to_advance) {
return result;
}
static TSTree *ts_parser__select_tree(void *data, TSTree *left, TSTree *right) {
return left;
}
/*
* Public
*/
TSTree *ts_parser_select_tree(void *data, TSTree *left, TSTree *right) {
return left;
}
TSParser ts_parser_make() {
return (TSParser){.lexer = ts_lexer_make(),
.stack = ts_parse_stack_new(
(TreeSelectionCallback){ NULL, ts_parser_select_tree }),
.right_stack = ts_stack_make() };
return (TSParser){
.lexer = ts_lexer_make(),
.stack = ts_parse_stack_new((TreeSelectionCallback){
NULL, ts_parser__select_tree,
}),
.lookahead = NULL,
.is_verifying = false,
};
}
void ts_parser_destroy(TSParser *parser) {
ts_parse_stack_delete(parser->stack);
ts_stack_delete(&parser->right_stack);
if (parser->lookahead)
ts_tree_release(parser->lookahead);
}
@ -434,25 +409,11 @@ void ts_parser_set_debugger(TSParser *parser, TSDebugger debugger) {
parser->lexer.debugger = debugger;
}
TSTree *ts_parser_parse(TSParser *parser, TSInput input, TSInputEdit *edit) {
TSLength position;
if (edit) {
DEBUG("edit pos:%lu, inserted:%lu, deleted:%lu", edit->position,
edit->chars_inserted, edit->chars_removed);
position = ts_parser__break_down_left(parser, *edit);
} else {
DEBUG("new_parse");
ts_parse_stack_clear(parser->stack);
position = ts_length_zero();
}
parser->lexer.input = input;
ts_lexer_reset(&parser->lexer, position);
TSTree *ts_parser_parse(TSParser *parser, TSInput input) {
ts_parser__start(parser, input);
for (;;) {
TSStateId state = ts_parse_stack_top_state(parser->stack, 0);
parser->lookahead =
ts_parser__next_node(parser, parser->language->lex_states[state]);
ts_parser__get_next_lookahead(parser, false);
DEBUG("lookahead sym:%s", SYM_NAME(parser->lookahead->symbol));
DEBUG("head_count: %d", ts_parse_stack_head_count(parser->stack));

View file

@ -5,15 +5,16 @@
extern "C" {
#endif
#include "runtime/stack.h"
#include "runtime/parse_stack.h"
typedef struct {
TSLexer lexer;
ParseStack *stack;
TSStack right_stack;
size_t total_chars;
TSTree *lookahead;
TSTree *previous_tree;
TSTree *reusable_subtree;
size_t reusable_subtree_pos;
bool is_verifying;
const TSLanguage *language;
} TSParser;
@ -21,7 +22,7 @@ TSParser ts_parser_make();
void ts_parser_destroy(TSParser *);
TSDebugger ts_parser_debugger(const TSParser *);
void ts_parser_set_debugger(TSParser *, TSDebugger);
TSTree *ts_parser_parse(TSParser *, TSInput, TSInputEdit *);
TSTree *ts_parser_parse(TSParser *, TSInput);
#ifdef __cplusplus
}

View file

@ -1,57 +0,0 @@
#include "tree_sitter/parser.h"
#include "runtime/tree.h"
#include "runtime/stack.h"
#include "runtime/length.h"
static size_t INITIAL_SIZE = 100;
static TSStateId INITIAL_STATE = 0;
TSStack ts_stack_make() {
return (TSStack){.size = 0,
.capacity = INITIAL_SIZE,
.entries = malloc(INITIAL_SIZE * sizeof(TSStackEntry)) };
}
void ts_stack_delete(TSStack *stack) {
ts_stack_shrink(stack, 0);
free(stack->entries);
}
TSStateId ts_stack_top_state(const TSStack *stack) {
if (stack->size == 0)
return INITIAL_STATE;
return stack->entries[stack->size - 1].state;
}
TSTree *ts_stack_top_node(const TSStack *stack) {
if (stack->size == 0)
return NULL;
return stack->entries[stack->size - 1].node;
}
void ts_stack_push(TSStack *stack, TSStateId state, TSTree *node) {
if (stack->size == stack->capacity) {
stack->capacity *= 2;
stack->entries =
realloc(stack->entries, stack->capacity * sizeof(*stack->entries));
}
stack->entries[stack->size].state = state;
stack->entries[stack->size].node = node;
stack->size++;
ts_tree_retain(node);
}
void ts_stack_shrink(TSStack *stack, size_t new_size) {
for (size_t i = new_size; i < stack->size; i++)
ts_tree_release(stack->entries[i].node);
stack->size = new_size;
}
TSLength ts_stack_total_tree_size(const TSStack *stack) {
TSLength result = ts_length_zero();
for (size_t i = 0; i < stack->size; i++) {
TSTree *node = stack->entries[i].node;
result = ts_length_add(result, ts_tree_total_size(node));
}
return result;
}

View file

@ -1,33 +0,0 @@
#ifndef RUNTIME_STACK_H_
#define RUNTIME_STACK_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "tree_sitter/parser.h"
typedef struct {
TSTree *node;
TSStateId state;
} TSStackEntry;
typedef struct {
size_t size;
size_t capacity;
TSStackEntry *entries;
} TSStack;
TSStack ts_stack_make();
void ts_stack_delete(TSStack *);
void ts_stack_shrink(TSStack *stack, size_t new_size);
void ts_stack_push(TSStack *stack, TSStateId state, TSTree *node);
TSStateId ts_stack_top_state(const TSStack *stack);
TSTree *ts_stack_top_node(const TSStack *stack);
TSLength ts_stack_total_tree_size(const TSStack *stack);
#ifdef __cplusplus
}
#endif
#endif // RUNTIME_STACK_H_

View file

@ -251,11 +251,11 @@ void ts_tree_edit(TSTree *tree, TSInputEdit edit) {
if (remainder_to_delete > 0) {
size_t chars_removed = min(remainder_to_delete, child_size);
remainder_to_delete -= chars_removed;
ts_tree_edit(child, (TSInputEdit){
.position = 0,
.chars_inserted = 0,
.chars_removed = chars_removed,
});
ts_tree_edit(
child,
(TSInputEdit){
.position = 0, .chars_inserted = 0, .chars_removed = chars_removed,
});
} else {
break;
}

View file

@ -34,10 +34,16 @@ struct TSTree {
unsigned short int ref_count;
};
typedef struct {
TSTree *tree;
TSLength offset;
} TSTreeChild;
TSTree *ts_tree_make_leaf(TSSymbol, TSLength, TSLength, TSNodeType);
TSTree *ts_tree_make_node(TSSymbol, size_t, TSTree **, TSNodeType);
TSTree *ts_tree_make_error(TSLength size, TSLength padding, char lookahead_char);
void ts_tree_retain(TSTree *tree);
void ts_tree_release(TSTree *tree);
bool ts_tree_eq(const TSTree *tree1, const TSTree *tree2);
char *ts_tree_string(const TSTree *tree, const char **names);
TSLength ts_tree_total_size(const TSTree *tree);
void ts_tree_prepend_children(TSTree *, size_t, TSTree **);
void ts_tree_edit(TSTree *, TSInputEdit);
static inline bool ts_tree_is_extra(const TSTree *tree) {
return tree->options.extra;
@ -59,28 +65,29 @@ static inline void ts_tree_set_fragile_right(TSTree *tree) {
tree->options.fragile_right = true;
}
static inline bool ts_tree_is_fragile_left(TSTree *tree) {
static inline bool ts_tree_is_fragile_left(const TSTree *tree) {
return tree->options.fragile_left;
}
static inline bool ts_tree_is_fragile_right(TSTree *tree) {
static inline bool ts_tree_is_fragile_right(const TSTree *tree) {
return tree->options.fragile_right;
}
TSTree *ts_tree_make_leaf(TSSymbol, TSLength, TSLength, TSNodeType);
TSTree *ts_tree_make_node(TSSymbol, size_t, TSTree **, TSNodeType);
TSTree *ts_tree_make_error(TSLength size, TSLength padding, char lookahead_char);
void ts_tree_retain(TSTree *tree);
void ts_tree_release(TSTree *tree);
bool ts_tree_eq(const TSTree *tree1, const TSTree *tree2);
char *ts_tree_string(const TSTree *tree, const char **names);
char *ts_tree_error_string(const TSTree *tree, const char **names);
TSLength ts_tree_total_size(const TSTree *tree);
void ts_tree_prepend_children(TSTree *, size_t, TSTree **);
void ts_tree_edit(TSTree *, TSInputEdit);
static inline bool ts_tree_is_terminal(const TSTree *tree) {
return tree->child_count == 0;
}
static inline bool ts_tree_is_empty(TSTree *tree) {
return ts_tree_total_size(tree).bytes == 0;
static inline bool ts_tree_has_changes(const TSTree *tree) {
return tree->options.has_changes;
}
static inline bool ts_tree_is_empty(const TSTree *tree) {
return ts_tree_total_size(tree).chars == 0;
}
static inline bool ts_tree_is_fragile(const TSTree *tree) {
return ts_tree_is_empty(tree) || tree->options.fragile_left ||
tree->options.fragile_right;
}
#ifdef __cplusplus