diff --git a/spec/runtime/helpers/spy_reader.cc b/spec/runtime/helpers/spy_reader.cc index 8d07754c..bc71df74 100644 --- a/spec/runtime/helpers/spy_reader.cc +++ b/spec/runtime/helpers/spy_reader.cc @@ -36,13 +36,19 @@ SpyReader::SpyReader(string content, size_t chunk_size) : buffer(new char[chunk_size]), content(content), position(0), - chunk_size(chunk_size) {} + chunk_size(chunk_size), + strings_read({ "" }) {} SpyReader::~SpyReader() { delete buffer; } const char * SpyReader::read(size_t *bytes_read) { + if (position > content.size()) { + *bytes_read = 0; + return ""; + } + const char *start = content.data() + position; long len = position_for_char_index(start, content.size() - position, chunk_size); if (len < 0) @@ -58,7 +64,8 @@ const char * SpyReader::read(size_t *bytes_read) { } int SpyReader::seek(size_t pos) { - strings_read.push_back(""); + if (position != pos) + strings_read.push_back(""); position = pos; return 0; } diff --git a/spec/runtime/parser_spec.cc b/spec/runtime/parser_spec.cc index 0f652efb..f72ae1b2 100644 --- a/spec/runtime/parser_spec.cc +++ b/spec/runtime/parser_spec.cc @@ -36,7 +36,7 @@ describe("Parser", [&]() { auto insert_text = [&](size_t position, string text) { size_t prev_size = ts_node_size(root).bytes + ts_node_pos(root).bytes; AssertThat(reader->insert(position, text), IsTrue()); - ts_document_edit(doc, { position, 0, text.length() }); + ts_document_edit(doc, { position, text.length(), 0 }); root = ts_document_root_node(doc); size_t new_size = ts_node_size(root).bytes + ts_node_pos(root).bytes; @@ -46,7 +46,7 @@ describe("Parser", [&]() { auto delete_text = [&](size_t position, size_t length) { size_t prev_size = ts_node_size(root).bytes + ts_node_pos(root).bytes; AssertThat(reader->erase(position, length), IsTrue()); - ts_document_edit(doc, { position, length, 0 }); + ts_document_edit(doc, { position, 0, length }); root = ts_document_root_node(doc); size_t new_size = ts_node_size(root).bytes + ts_node_pos(root).bytes; @@ -257,21 +257,27 @@ describe("Parser", [&]() { describe("new tokens near the beginning of the input", [&]() { before_each([&]() { - set_text("123 * 456"); + chunk_size = 2; + + set_text("123 * 456 ^ (10 + x)"); AssertThat(ts_node_string(root), Equals( - "(DOCUMENT (product (number) (number)))")); + "(DOCUMENT (product " + "(number) " + "(exponent (number) (group (sum (number) (variable))))))")); - insert_text(strlen("123"), " + 5 "); + insert_text(strlen("123"), " + 5"); }); it("updates the parse tree", [&]() { AssertThat(ts_node_string(root), Equals( - "(DOCUMENT (sum (number) (product (number) (number))))")); + "(DOCUMENT (sum (number) (product " + "(number) " + "(exponent (number) (group (sum (number) (variable)))))))")); }); - it_skip("re-reads only the changed portion of the input", [&]() { - AssertThat(reader->strings_read, Equals(vector({ "\"key2\": 4, " }))); + it("re-reads only the changed portion of the input", [&]() { + AssertThat(reader->strings_read, Equals(vector({ "123 + 5 * ", "" }))); }); }); diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index 898d6f75..bb7b6f1d 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -6,6 +6,15 @@ static const char *empty_chunk = ""; +static void ts_lexer_read_next_chunk(TSLexer *lexer) { + TSInput input = lexer->input; + input.seek_fn(input.data, lexer->current_position); + lexer->chunk_start = lexer->current_position.bytes; + lexer->chunk = input.read_fn(input.data, &lexer->chunk_size); + if (!lexer->chunk_size) + lexer->chunk = empty_chunk; +} + static bool advance(TSLexer *lexer) { /* @@ -27,10 +36,7 @@ static bool advance(TSLexer *lexer) { * the end of the current chunk. */ if (lexer->current_position.bytes >= lexer->chunk_start + lexer->chunk_size) { - lexer->chunk_start += lexer->chunk_size; - lexer->chunk = lexer->input.read_fn(lexer->input.data, &lexer->chunk_size); - if (!lexer->chunk_size) - lexer->chunk = empty_chunk; + ts_lexer_read_next_chunk(lexer); } /* @@ -76,13 +82,12 @@ TSLexer ts_lexer_make() { } void ts_lexer_reset(TSLexer *lexer, TSLength position) { - lexer->input.seek_fn(lexer->input.data, position); - lexer->current_position = position; - lexer->token_end_position = position; lexer->lookahead = 0; lexer->lookahead_size = 0; - lexer->chunk_start = position.bytes; - lexer->chunk_size = 0; - lexer->chunk = lexer->input.read_fn(lexer->input.data, &lexer->chunk_size); + + lexer->token_end_position = position; + lexer->current_position = position; + ts_lexer_read_next_chunk(lexer); + ts_lexer_advance(lexer); } diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 15424cd9..62e7abb8 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -1,4 +1,5 @@ #include +#include #include "tree_sitter/runtime.h" #include "tree_sitter/parser.h" #include "runtime/tree.h" @@ -22,11 +23,53 @@ static TSParseAction action_for(const TSLanguage *lang, TSStateId state, return (lang->parse_table + (state * lang->symbol_count))[sym]; } -static void lex(TSParser *parser, TSStateId lex_state) { - parser->lookahead = parser->language->lex_fn(&parser->lexer, lex_state); +static size_t breakdown_right_stack(TSParser *parser, TSLength cur_position, + TSStateId state) { + TSStack *stack = &parser->right_stack; + size_t position = parser->total_chars - ts_stack_total_tree_size(stack).chars; + + for (;;) { + TSTree *node = ts_stack_top_node(stack); + if (!node) + break; + + if (position > cur_position.chars) + break; + + bool can_be_used = action_for(parser->language, state, node->symbol).type != + TSParseActionTypeError; + if (position == cur_position.chars && can_be_used) + break; + + size_t child_count; + TSTree **children = ts_tree_children(node, &child_count); + + DEBUG_PARSE("POP RIGHT %s", parser->language->symbol_names[node->symbol]); + stack->size--; + position += ts_tree_total_size(node).chars; + + for (size_t i = child_count - 1; i + 1 > 0; i--) { + TSTree *child = children[i]; + + if (position > cur_position.chars) { + DEBUG_PARSE("PUSH RIGHT %s", + parser->language->symbol_names[child->symbol]); + ts_stack_push(stack, 0, child); + position -= ts_tree_total_size(child).chars; + } else { + break; + } + } + + ts_tree_release(node); + } + + return position; } static TSLength breakdown_stack(TSParser *parser, TSInputEdit *edit) { + ts_stack_shrink(&parser->right_stack, 0); + if (!edit) { ts_stack_shrink(&parser->stack, 0); return ts_length_zero(); @@ -34,6 +77,8 @@ static TSLength breakdown_stack(TSParser *parser, TSInputEdit *edit) { TSStack *stack = &parser->stack; TSLength position = ts_stack_total_tree_size(stack); + parser->total_chars = + position.chars + edit->chars_inserted - edit->chars_removed; for (;;) { TSTree *node = ts_stack_top_node(stack); @@ -45,29 +90,54 @@ static TSLength breakdown_stack(TSParser *parser, TSInputEdit *edit) { if (position.chars < edit->position && !children) break; - DEBUG_PARSE("POP %s", parser->language->symbol_names[node->symbol]); + DEBUG_PARSE("POP LEFT %s", parser->language->symbol_names[node->symbol]); stack->size--; position = ts_length_sub(position, ts_tree_total_size(node)); - for (size_t i = 0; i < child_count && position.chars < edit->position; i++) { + size_t i = 0; + for (; i < child_count && position.chars < edit->position; i++) { TSTree *child = children[i]; TSStateId state = ts_stack_top_state(stack); TSParseAction action = action_for(parser->language, state, child->symbol); TSStateId next_state = action.type == TSParseActionTypeShift ? action.data.to_state : state; - DEBUG_PARSE("PUT BACK %s", parser->language->symbol_names[child->symbol]); + DEBUG_PARSE("PUSH LEFT %s", parser->language->symbol_names[child->symbol]); ts_stack_push(stack, next_state, child); position = ts_length_add(position, ts_tree_total_size(child)); } + for (size_t j = child_count - 1; j + 1 > i + 1; j--) { + TSTree *child = children[j]; + DEBUG_PARSE("PUSH RIGHT %s", + parser->language->symbol_names[child->symbol]); + ts_stack_push(&parser->right_stack, 0, child); + } + ts_tree_release(node); } - DEBUG_PARSE("RESUME %lu", position.chars); + DEBUG_PARSE("RESUME LEFT %lu", position.chars); return position; } +static void lex(TSParser *parser, TSStateId lex_state) { + TSStateId state = ts_stack_top_state(&parser->stack); + size_t node_position = + breakdown_right_stack(parser, parser->lexer.current_position, state); + TSTree *node = ts_stack_top_node(&parser->right_stack); + if (node && node_position == parser->lexer.current_position.chars) { + DEBUG_PARSE("REUSE %s", parser->language->symbol_names[node->symbol]); + + ts_stack_shrink(&parser->right_stack, parser->right_stack.size - 1); + parser->lookahead = node; + parser->lexer.current_position = + ts_length_add(parser->lexer.current_position, ts_tree_total_size(node)); + } else { + parser->lookahead = parser->language->lex_fn(&parser->lexer, lex_state); + } +} + static void resize_error(TSParser *parser, TSTree *error) { error->size = ts_length_sub(ts_length_sub(parser->lexer.token_start_position, @@ -199,6 +269,7 @@ static TSTree *get_root(TSParser *parser) { TSParser ts_parser_make(const TSLanguage *language) { return (TSParser) { .lexer = ts_lexer_make(), .stack = ts_stack_make(), + .right_stack = ts_stack_make(), .debug = 0, .language = language, }; } @@ -224,7 +295,8 @@ const TSTree *ts_parser_parse(TSParser *parser, TSInput input, TSStateId state = ts_stack_top_state(&parser->stack); if (!parser->lookahead) lex(parser, parser->language->lex_states[state]); - TSParseAction action = action_for(parser->language, state, parser->lookahead->symbol); + TSParseAction action = + action_for(parser->language, state, parser->lookahead->symbol); DEBUG_PARSE("LOOKAHEAD %s", parser->language->symbol_names[parser->lookahead->symbol]); diff --git a/src/runtime/parser.h b/src/runtime/parser.h index 5e171e13..57135af9 100644 --- a/src/runtime/parser.h +++ b/src/runtime/parser.h @@ -10,6 +10,8 @@ extern "C" { typedef struct { TSLexer lexer; TSStack stack; + TSStack right_stack; + size_t total_chars; int debug; TSTree *lookahead; TSTree *next_lookahead;