Start work on re-using right side of parse tree

This commit is contained in:
Max Brunsfeld 2014-10-09 14:02:03 -07:00
parent 80b8a0a9fb
commit 4dcc712a8c
5 changed files with 119 additions and 27 deletions

View file

@ -36,13 +36,19 @@ SpyReader::SpyReader(string content, size_t chunk_size) :
buffer(new char[chunk_size]),
content(content),
position(0),
chunk_size(chunk_size) {}
chunk_size(chunk_size),
strings_read({ "" }) {}
SpyReader::~SpyReader() {
delete buffer;
}
const char * SpyReader::read(size_t *bytes_read) {
if (position > content.size()) {
*bytes_read = 0;
return "";
}
const char *start = content.data() + position;
long len = position_for_char_index(start, content.size() - position, chunk_size);
if (len < 0)
@ -58,7 +64,8 @@ const char * SpyReader::read(size_t *bytes_read) {
}
int SpyReader::seek(size_t pos) {
strings_read.push_back("");
if (position != pos)
strings_read.push_back("");
position = pos;
return 0;
}

View file

@ -36,7 +36,7 @@ describe("Parser", [&]() {
auto insert_text = [&](size_t position, string text) {
size_t prev_size = ts_node_size(root).bytes + ts_node_pos(root).bytes;
AssertThat(reader->insert(position, text), IsTrue());
ts_document_edit(doc, { position, 0, text.length() });
ts_document_edit(doc, { position, text.length(), 0 });
root = ts_document_root_node(doc);
size_t new_size = ts_node_size(root).bytes + ts_node_pos(root).bytes;
@ -46,7 +46,7 @@ describe("Parser", [&]() {
auto delete_text = [&](size_t position, size_t length) {
size_t prev_size = ts_node_size(root).bytes + ts_node_pos(root).bytes;
AssertThat(reader->erase(position, length), IsTrue());
ts_document_edit(doc, { position, length, 0 });
ts_document_edit(doc, { position, 0, length });
root = ts_document_root_node(doc);
size_t new_size = ts_node_size(root).bytes + ts_node_pos(root).bytes;
@ -257,21 +257,27 @@ describe("Parser", [&]() {
describe("new tokens near the beginning of the input", [&]() {
before_each([&]() {
set_text("123 * 456");
chunk_size = 2;
set_text("123 * 456 ^ (10 + x)");
AssertThat(ts_node_string(root), Equals(
"(DOCUMENT (product (number) (number)))"));
"(DOCUMENT (product "
"(number) "
"(exponent (number) (group (sum (number) (variable))))))"));
insert_text(strlen("123"), " + 5 ");
insert_text(strlen("123"), " + 5");
});
it("updates the parse tree", [&]() {
AssertThat(ts_node_string(root), Equals(
"(DOCUMENT (sum (number) (product (number) (number))))"));
"(DOCUMENT (sum (number) (product "
"(number) "
"(exponent (number) (group (sum (number) (variable)))))))"));
});
it_skip("re-reads only the changed portion of the input", [&]() {
AssertThat(reader->strings_read, Equals(vector<string>({ "\"key2\": 4, " })));
it("re-reads only the changed portion of the input", [&]() {
AssertThat(reader->strings_read, Equals(vector<string>({ "123 + 5 * ", "" })));
});
});

View file

@ -6,6 +6,15 @@
static const char *empty_chunk = "";
static void ts_lexer_read_next_chunk(TSLexer *lexer) {
TSInput input = lexer->input;
input.seek_fn(input.data, lexer->current_position);
lexer->chunk_start = lexer->current_position.bytes;
lexer->chunk = input.read_fn(input.data, &lexer->chunk_size);
if (!lexer->chunk_size)
lexer->chunk = empty_chunk;
}
static bool advance(TSLexer *lexer) {
/*
@ -27,10 +36,7 @@ static bool advance(TSLexer *lexer) {
* the end of the current chunk.
*/
if (lexer->current_position.bytes >= lexer->chunk_start + lexer->chunk_size) {
lexer->chunk_start += lexer->chunk_size;
lexer->chunk = lexer->input.read_fn(lexer->input.data, &lexer->chunk_size);
if (!lexer->chunk_size)
lexer->chunk = empty_chunk;
ts_lexer_read_next_chunk(lexer);
}
/*
@ -76,13 +82,12 @@ TSLexer ts_lexer_make() {
}
void ts_lexer_reset(TSLexer *lexer, TSLength position) {
lexer->input.seek_fn(lexer->input.data, position);
lexer->current_position = position;
lexer->token_end_position = position;
lexer->lookahead = 0;
lexer->lookahead_size = 0;
lexer->chunk_start = position.bytes;
lexer->chunk_size = 0;
lexer->chunk = lexer->input.read_fn(lexer->input.data, &lexer->chunk_size);
lexer->token_end_position = position;
lexer->current_position = position;
ts_lexer_read_next_chunk(lexer);
ts_lexer_advance(lexer);
}

View file

@ -1,4 +1,5 @@
#include <stdio.h>
#include <stdbool.h>
#include "tree_sitter/runtime.h"
#include "tree_sitter/parser.h"
#include "runtime/tree.h"
@ -22,11 +23,53 @@ static TSParseAction action_for(const TSLanguage *lang, TSStateId state,
return (lang->parse_table + (state * lang->symbol_count))[sym];
}
static void lex(TSParser *parser, TSStateId lex_state) {
parser->lookahead = parser->language->lex_fn(&parser->lexer, lex_state);
static size_t breakdown_right_stack(TSParser *parser, TSLength cur_position,
TSStateId state) {
TSStack *stack = &parser->right_stack;
size_t position = parser->total_chars - ts_stack_total_tree_size(stack).chars;
for (;;) {
TSTree *node = ts_stack_top_node(stack);
if (!node)
break;
if (position > cur_position.chars)
break;
bool can_be_used = action_for(parser->language, state, node->symbol).type !=
TSParseActionTypeError;
if (position == cur_position.chars && can_be_used)
break;
size_t child_count;
TSTree **children = ts_tree_children(node, &child_count);
DEBUG_PARSE("POP RIGHT %s", parser->language->symbol_names[node->symbol]);
stack->size--;
position += ts_tree_total_size(node).chars;
for (size_t i = child_count - 1; i + 1 > 0; i--) {
TSTree *child = children[i];
if (position > cur_position.chars) {
DEBUG_PARSE("PUSH RIGHT %s",
parser->language->symbol_names[child->symbol]);
ts_stack_push(stack, 0, child);
position -= ts_tree_total_size(child).chars;
} else {
break;
}
}
ts_tree_release(node);
}
return position;
}
static TSLength breakdown_stack(TSParser *parser, TSInputEdit *edit) {
ts_stack_shrink(&parser->right_stack, 0);
if (!edit) {
ts_stack_shrink(&parser->stack, 0);
return ts_length_zero();
@ -34,6 +77,8 @@ static TSLength breakdown_stack(TSParser *parser, TSInputEdit *edit) {
TSStack *stack = &parser->stack;
TSLength position = ts_stack_total_tree_size(stack);
parser->total_chars =
position.chars + edit->chars_inserted - edit->chars_removed;
for (;;) {
TSTree *node = ts_stack_top_node(stack);
@ -45,29 +90,54 @@ static TSLength breakdown_stack(TSParser *parser, TSInputEdit *edit) {
if (position.chars < edit->position && !children)
break;
DEBUG_PARSE("POP %s", parser->language->symbol_names[node->symbol]);
DEBUG_PARSE("POP LEFT %s", parser->language->symbol_names[node->symbol]);
stack->size--;
position = ts_length_sub(position, ts_tree_total_size(node));
for (size_t i = 0; i < child_count && position.chars < edit->position; i++) {
size_t i = 0;
for (; i < child_count && position.chars < edit->position; i++) {
TSTree *child = children[i];
TSStateId state = ts_stack_top_state(stack);
TSParseAction action = action_for(parser->language, state, child->symbol);
TSStateId next_state =
action.type == TSParseActionTypeShift ? action.data.to_state : state;
DEBUG_PARSE("PUT BACK %s", parser->language->symbol_names[child->symbol]);
DEBUG_PARSE("PUSH LEFT %s", parser->language->symbol_names[child->symbol]);
ts_stack_push(stack, next_state, child);
position = ts_length_add(position, ts_tree_total_size(child));
}
for (size_t j = child_count - 1; j + 1 > i + 1; j--) {
TSTree *child = children[j];
DEBUG_PARSE("PUSH RIGHT %s",
parser->language->symbol_names[child->symbol]);
ts_stack_push(&parser->right_stack, 0, child);
}
ts_tree_release(node);
}
DEBUG_PARSE("RESUME %lu", position.chars);
DEBUG_PARSE("RESUME LEFT %lu", position.chars);
return position;
}
static void lex(TSParser *parser, TSStateId lex_state) {
TSStateId state = ts_stack_top_state(&parser->stack);
size_t node_position =
breakdown_right_stack(parser, parser->lexer.current_position, state);
TSTree *node = ts_stack_top_node(&parser->right_stack);
if (node && node_position == parser->lexer.current_position.chars) {
DEBUG_PARSE("REUSE %s", parser->language->symbol_names[node->symbol]);
ts_stack_shrink(&parser->right_stack, parser->right_stack.size - 1);
parser->lookahead = node;
parser->lexer.current_position =
ts_length_add(parser->lexer.current_position, ts_tree_total_size(node));
} else {
parser->lookahead = parser->language->lex_fn(&parser->lexer, lex_state);
}
}
static void resize_error(TSParser *parser, TSTree *error) {
error->size =
ts_length_sub(ts_length_sub(parser->lexer.token_start_position,
@ -199,6 +269,7 @@ static TSTree *get_root(TSParser *parser) {
TSParser ts_parser_make(const TSLanguage *language) {
return (TSParser) { .lexer = ts_lexer_make(),
.stack = ts_stack_make(),
.right_stack = ts_stack_make(),
.debug = 0,
.language = language, };
}
@ -224,7 +295,8 @@ const TSTree *ts_parser_parse(TSParser *parser, TSInput input,
TSStateId state = ts_stack_top_state(&parser->stack);
if (!parser->lookahead)
lex(parser, parser->language->lex_states[state]);
TSParseAction action = action_for(parser->language, state, parser->lookahead->symbol);
TSParseAction action =
action_for(parser->language, state, parser->lookahead->symbol);
DEBUG_PARSE("LOOKAHEAD %s",
parser->language->symbol_names[parser->lookahead->symbol]);

View file

@ -10,6 +10,8 @@ extern "C" {
typedef struct {
TSLexer lexer;
TSStack stack;
TSStack right_stack;
size_t total_chars;
int debug;
TSTree *lookahead;
TSTree *next_lookahead;