Start work on re-using right side of parse tree
This commit is contained in:
parent
80b8a0a9fb
commit
4dcc712a8c
5 changed files with 119 additions and 27 deletions
|
|
@ -36,13 +36,19 @@ SpyReader::SpyReader(string content, size_t chunk_size) :
|
|||
buffer(new char[chunk_size]),
|
||||
content(content),
|
||||
position(0),
|
||||
chunk_size(chunk_size) {}
|
||||
chunk_size(chunk_size),
|
||||
strings_read({ "" }) {}
|
||||
|
||||
SpyReader::~SpyReader() {
|
||||
delete buffer;
|
||||
}
|
||||
|
||||
const char * SpyReader::read(size_t *bytes_read) {
|
||||
if (position > content.size()) {
|
||||
*bytes_read = 0;
|
||||
return "";
|
||||
}
|
||||
|
||||
const char *start = content.data() + position;
|
||||
long len = position_for_char_index(start, content.size() - position, chunk_size);
|
||||
if (len < 0)
|
||||
|
|
@ -58,7 +64,8 @@ const char * SpyReader::read(size_t *bytes_read) {
|
|||
}
|
||||
|
||||
int SpyReader::seek(size_t pos) {
|
||||
strings_read.push_back("");
|
||||
if (position != pos)
|
||||
strings_read.push_back("");
|
||||
position = pos;
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ describe("Parser", [&]() {
|
|||
auto insert_text = [&](size_t position, string text) {
|
||||
size_t prev_size = ts_node_size(root).bytes + ts_node_pos(root).bytes;
|
||||
AssertThat(reader->insert(position, text), IsTrue());
|
||||
ts_document_edit(doc, { position, 0, text.length() });
|
||||
ts_document_edit(doc, { position, text.length(), 0 });
|
||||
|
||||
root = ts_document_root_node(doc);
|
||||
size_t new_size = ts_node_size(root).bytes + ts_node_pos(root).bytes;
|
||||
|
|
@ -46,7 +46,7 @@ describe("Parser", [&]() {
|
|||
auto delete_text = [&](size_t position, size_t length) {
|
||||
size_t prev_size = ts_node_size(root).bytes + ts_node_pos(root).bytes;
|
||||
AssertThat(reader->erase(position, length), IsTrue());
|
||||
ts_document_edit(doc, { position, length, 0 });
|
||||
ts_document_edit(doc, { position, 0, length });
|
||||
|
||||
root = ts_document_root_node(doc);
|
||||
size_t new_size = ts_node_size(root).bytes + ts_node_pos(root).bytes;
|
||||
|
|
@ -257,21 +257,27 @@ describe("Parser", [&]() {
|
|||
|
||||
describe("new tokens near the beginning of the input", [&]() {
|
||||
before_each([&]() {
|
||||
set_text("123 * 456");
|
||||
chunk_size = 2;
|
||||
|
||||
set_text("123 * 456 ^ (10 + x)");
|
||||
|
||||
AssertThat(ts_node_string(root), Equals(
|
||||
"(DOCUMENT (product (number) (number)))"));
|
||||
"(DOCUMENT (product "
|
||||
"(number) "
|
||||
"(exponent (number) (group (sum (number) (variable))))))"));
|
||||
|
||||
insert_text(strlen("123"), " + 5 ");
|
||||
insert_text(strlen("123"), " + 5");
|
||||
});
|
||||
|
||||
it("updates the parse tree", [&]() {
|
||||
AssertThat(ts_node_string(root), Equals(
|
||||
"(DOCUMENT (sum (number) (product (number) (number))))"));
|
||||
"(DOCUMENT (sum (number) (product "
|
||||
"(number) "
|
||||
"(exponent (number) (group (sum (number) (variable)))))))"));
|
||||
});
|
||||
|
||||
it_skip("re-reads only the changed portion of the input", [&]() {
|
||||
AssertThat(reader->strings_read, Equals(vector<string>({ "\"key2\": 4, " })));
|
||||
it("re-reads only the changed portion of the input", [&]() {
|
||||
AssertThat(reader->strings_read, Equals(vector<string>({ "123 + 5 * ", "" })));
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -6,6 +6,15 @@
|
|||
|
||||
static const char *empty_chunk = "";
|
||||
|
||||
static void ts_lexer_read_next_chunk(TSLexer *lexer) {
|
||||
TSInput input = lexer->input;
|
||||
input.seek_fn(input.data, lexer->current_position);
|
||||
lexer->chunk_start = lexer->current_position.bytes;
|
||||
lexer->chunk = input.read_fn(input.data, &lexer->chunk_size);
|
||||
if (!lexer->chunk_size)
|
||||
lexer->chunk = empty_chunk;
|
||||
}
|
||||
|
||||
static bool advance(TSLexer *lexer) {
|
||||
|
||||
/*
|
||||
|
|
@ -27,10 +36,7 @@ static bool advance(TSLexer *lexer) {
|
|||
* the end of the current chunk.
|
||||
*/
|
||||
if (lexer->current_position.bytes >= lexer->chunk_start + lexer->chunk_size) {
|
||||
lexer->chunk_start += lexer->chunk_size;
|
||||
lexer->chunk = lexer->input.read_fn(lexer->input.data, &lexer->chunk_size);
|
||||
if (!lexer->chunk_size)
|
||||
lexer->chunk = empty_chunk;
|
||||
ts_lexer_read_next_chunk(lexer);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -76,13 +82,12 @@ TSLexer ts_lexer_make() {
|
|||
}
|
||||
|
||||
void ts_lexer_reset(TSLexer *lexer, TSLength position) {
|
||||
lexer->input.seek_fn(lexer->input.data, position);
|
||||
lexer->current_position = position;
|
||||
lexer->token_end_position = position;
|
||||
lexer->lookahead = 0;
|
||||
lexer->lookahead_size = 0;
|
||||
lexer->chunk_start = position.bytes;
|
||||
lexer->chunk_size = 0;
|
||||
lexer->chunk = lexer->input.read_fn(lexer->input.data, &lexer->chunk_size);
|
||||
|
||||
lexer->token_end_position = position;
|
||||
lexer->current_position = position;
|
||||
ts_lexer_read_next_chunk(lexer);
|
||||
|
||||
ts_lexer_advance(lexer);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
#include <stdio.h>
|
||||
#include <stdbool.h>
|
||||
#include "tree_sitter/runtime.h"
|
||||
#include "tree_sitter/parser.h"
|
||||
#include "runtime/tree.h"
|
||||
|
|
@ -22,11 +23,53 @@ static TSParseAction action_for(const TSLanguage *lang, TSStateId state,
|
|||
return (lang->parse_table + (state * lang->symbol_count))[sym];
|
||||
}
|
||||
|
||||
static void lex(TSParser *parser, TSStateId lex_state) {
|
||||
parser->lookahead = parser->language->lex_fn(&parser->lexer, lex_state);
|
||||
static size_t breakdown_right_stack(TSParser *parser, TSLength cur_position,
|
||||
TSStateId state) {
|
||||
TSStack *stack = &parser->right_stack;
|
||||
size_t position = parser->total_chars - ts_stack_total_tree_size(stack).chars;
|
||||
|
||||
for (;;) {
|
||||
TSTree *node = ts_stack_top_node(stack);
|
||||
if (!node)
|
||||
break;
|
||||
|
||||
if (position > cur_position.chars)
|
||||
break;
|
||||
|
||||
bool can_be_used = action_for(parser->language, state, node->symbol).type !=
|
||||
TSParseActionTypeError;
|
||||
if (position == cur_position.chars && can_be_used)
|
||||
break;
|
||||
|
||||
size_t child_count;
|
||||
TSTree **children = ts_tree_children(node, &child_count);
|
||||
|
||||
DEBUG_PARSE("POP RIGHT %s", parser->language->symbol_names[node->symbol]);
|
||||
stack->size--;
|
||||
position += ts_tree_total_size(node).chars;
|
||||
|
||||
for (size_t i = child_count - 1; i + 1 > 0; i--) {
|
||||
TSTree *child = children[i];
|
||||
|
||||
if (position > cur_position.chars) {
|
||||
DEBUG_PARSE("PUSH RIGHT %s",
|
||||
parser->language->symbol_names[child->symbol]);
|
||||
ts_stack_push(stack, 0, child);
|
||||
position -= ts_tree_total_size(child).chars;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ts_tree_release(node);
|
||||
}
|
||||
|
||||
return position;
|
||||
}
|
||||
|
||||
static TSLength breakdown_stack(TSParser *parser, TSInputEdit *edit) {
|
||||
ts_stack_shrink(&parser->right_stack, 0);
|
||||
|
||||
if (!edit) {
|
||||
ts_stack_shrink(&parser->stack, 0);
|
||||
return ts_length_zero();
|
||||
|
|
@ -34,6 +77,8 @@ static TSLength breakdown_stack(TSParser *parser, TSInputEdit *edit) {
|
|||
|
||||
TSStack *stack = &parser->stack;
|
||||
TSLength position = ts_stack_total_tree_size(stack);
|
||||
parser->total_chars =
|
||||
position.chars + edit->chars_inserted - edit->chars_removed;
|
||||
|
||||
for (;;) {
|
||||
TSTree *node = ts_stack_top_node(stack);
|
||||
|
|
@ -45,29 +90,54 @@ static TSLength breakdown_stack(TSParser *parser, TSInputEdit *edit) {
|
|||
if (position.chars < edit->position && !children)
|
||||
break;
|
||||
|
||||
DEBUG_PARSE("POP %s", parser->language->symbol_names[node->symbol]);
|
||||
DEBUG_PARSE("POP LEFT %s", parser->language->symbol_names[node->symbol]);
|
||||
stack->size--;
|
||||
position = ts_length_sub(position, ts_tree_total_size(node));
|
||||
|
||||
for (size_t i = 0; i < child_count && position.chars < edit->position; i++) {
|
||||
size_t i = 0;
|
||||
for (; i < child_count && position.chars < edit->position; i++) {
|
||||
TSTree *child = children[i];
|
||||
TSStateId state = ts_stack_top_state(stack);
|
||||
TSParseAction action = action_for(parser->language, state, child->symbol);
|
||||
TSStateId next_state =
|
||||
action.type == TSParseActionTypeShift ? action.data.to_state : state;
|
||||
|
||||
DEBUG_PARSE("PUT BACK %s", parser->language->symbol_names[child->symbol]);
|
||||
DEBUG_PARSE("PUSH LEFT %s", parser->language->symbol_names[child->symbol]);
|
||||
ts_stack_push(stack, next_state, child);
|
||||
position = ts_length_add(position, ts_tree_total_size(child));
|
||||
}
|
||||
|
||||
for (size_t j = child_count - 1; j + 1 > i + 1; j--) {
|
||||
TSTree *child = children[j];
|
||||
DEBUG_PARSE("PUSH RIGHT %s",
|
||||
parser->language->symbol_names[child->symbol]);
|
||||
ts_stack_push(&parser->right_stack, 0, child);
|
||||
}
|
||||
|
||||
ts_tree_release(node);
|
||||
}
|
||||
|
||||
DEBUG_PARSE("RESUME %lu", position.chars);
|
||||
DEBUG_PARSE("RESUME LEFT %lu", position.chars);
|
||||
return position;
|
||||
}
|
||||
|
||||
static void lex(TSParser *parser, TSStateId lex_state) {
|
||||
TSStateId state = ts_stack_top_state(&parser->stack);
|
||||
size_t node_position =
|
||||
breakdown_right_stack(parser, parser->lexer.current_position, state);
|
||||
TSTree *node = ts_stack_top_node(&parser->right_stack);
|
||||
if (node && node_position == parser->lexer.current_position.chars) {
|
||||
DEBUG_PARSE("REUSE %s", parser->language->symbol_names[node->symbol]);
|
||||
|
||||
ts_stack_shrink(&parser->right_stack, parser->right_stack.size - 1);
|
||||
parser->lookahead = node;
|
||||
parser->lexer.current_position =
|
||||
ts_length_add(parser->lexer.current_position, ts_tree_total_size(node));
|
||||
} else {
|
||||
parser->lookahead = parser->language->lex_fn(&parser->lexer, lex_state);
|
||||
}
|
||||
}
|
||||
|
||||
static void resize_error(TSParser *parser, TSTree *error) {
|
||||
error->size =
|
||||
ts_length_sub(ts_length_sub(parser->lexer.token_start_position,
|
||||
|
|
@ -199,6 +269,7 @@ static TSTree *get_root(TSParser *parser) {
|
|||
TSParser ts_parser_make(const TSLanguage *language) {
|
||||
return (TSParser) { .lexer = ts_lexer_make(),
|
||||
.stack = ts_stack_make(),
|
||||
.right_stack = ts_stack_make(),
|
||||
.debug = 0,
|
||||
.language = language, };
|
||||
}
|
||||
|
|
@ -224,7 +295,8 @@ const TSTree *ts_parser_parse(TSParser *parser, TSInput input,
|
|||
TSStateId state = ts_stack_top_state(&parser->stack);
|
||||
if (!parser->lookahead)
|
||||
lex(parser, parser->language->lex_states[state]);
|
||||
TSParseAction action = action_for(parser->language, state, parser->lookahead->symbol);
|
||||
TSParseAction action =
|
||||
action_for(parser->language, state, parser->lookahead->symbol);
|
||||
|
||||
DEBUG_PARSE("LOOKAHEAD %s",
|
||||
parser->language->symbol_names[parser->lookahead->symbol]);
|
||||
|
|
|
|||
|
|
@ -10,6 +10,8 @@ extern "C" {
|
|||
typedef struct {
|
||||
TSLexer lexer;
|
||||
TSStack stack;
|
||||
TSStack right_stack;
|
||||
size_t total_chars;
|
||||
int debug;
|
||||
TSTree *lookahead;
|
||||
TSTree *next_lookahead;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue