diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index 73cf4fd1..9c38f8fb 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -40,8 +40,7 @@ typedef struct { typedef struct { void *payload; - const char *(*read)(void *payload, uint32_t *bytes_read); - int (*seek)(void *payload, uint32_t byte_index, TSPoint position); + const char *(*read)(void *payload, uint32_t byte_index, TSPoint position, uint32_t *bytes_read); TSInputEncoding encoding; } TSInput; diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index e6e5aa58..26d97f75 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -17,14 +17,13 @@ static const char empty_chunk[2] = { 0, 0 }; static void ts_lexer__get_chunk(Lexer *self) { - TSInput input = self->input; - if (!self->chunk || - self->current_position.bytes != self->chunk_start + self->chunk_size) { - input.seek(input.payload, self->current_position.bytes, self->current_position.extent); - } - self->chunk_start = self->current_position.bytes; - self->chunk = input.read(input.payload, &self->chunk_size); + self->chunk = self->input.read( + self->input.payload, + self->current_position.bytes, + self->current_position.extent, + &self->chunk_size + ); if (!self->chunk_size) self->chunk = empty_chunk; } @@ -74,8 +73,9 @@ static void ts_lexer__advance(void *payload, bool skip) { LOG_CHARACTER("consume", self->data.lookahead); } - if (self->current_position.bytes >= self->chunk_start + self->chunk_size) + if (self->current_position.bytes >= self->chunk_start + self->chunk_size) { ts_lexer__get_chunk(self); + } ts_lexer__get_lookahead(self); } @@ -105,10 +105,8 @@ static uint32_t ts_lexer__get_column(void *payload) { return result; } -/* - * The lexer's advance method is stored as a struct field so that generated - * parsers can call it without needing to be linked against this library. - */ +// The lexer's methods are stored as a struct field so that generated +// parsers can call them without needing to be linked against this library. void ts_lexer_init(Lexer *self) { *self = (Lexer){ @@ -163,11 +161,8 @@ void ts_lexer_start(Lexer *self) { self->token_start_position = self->current_position; self->token_end_position = LENGTH_UNDEFINED; self->data.result_symbol = 0; - - if (!self->chunk) - ts_lexer__get_chunk(self); - if (!self->lookahead_size) - ts_lexer__get_lookahead(self); + if (!self->chunk) ts_lexer__get_chunk(self); + if (!self->lookahead_size) ts_lexer__get_lookahead(self); } void ts_lexer_advance_to_end(Lexer *self) { diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 6e2299d4..58302445 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -62,6 +62,7 @@ struct TSParser { size_t operation_limit; volatile bool enabled; bool halt_on_error; + }; typedef struct { @@ -1461,7 +1462,7 @@ TSTree *ts_parser_resume(TSParser *self) { self->finished_tree = NULL; ts_stack_clear(self->stack); ts_parser__set_cached_token(self, 0, NULL, NULL); - ts_lexer_set_input(&self->lexer, (TSInput) { NULL, NULL, NULL, 0 }); + ts_lexer_set_input(&self->lexer, (TSInput) { NULL, NULL, 0 }); return result; } @@ -1475,5 +1476,5 @@ TSTree *ts_parser_parse_string(TSParser *self, const TSTree *old_tree, const char *string, uint32_t length) { TSStringInput input; ts_string_input_init(&input, string, length); - return ts_parser_parse(self, old_tree, input.input); + return ts_parser_parse(self, old_tree, ts_string_input_get(&input)); } diff --git a/src/runtime/string_input.c b/src/runtime/string_input.c index c4e13e0c..3ddc0c0d 100644 --- a/src/runtime/string_input.c +++ b/src/runtime/string_input.c @@ -2,32 +2,27 @@ #include "runtime/string_input.h" #include -static const char *ts_string_input__read(void *payload, uint32_t *bytes_read) { +static const char *ts_string_input__read(void *payload, uint32_t byte_offset, + TSPoint _, uint32_t *bytes_read) { TSStringInput *input = (TSStringInput *)payload; - if (input->position >= input->length) { + if (byte_offset >= input->length) { *bytes_read = 0; return ""; + } else { + *bytes_read = input->length - byte_offset; + return input->string + byte_offset; } - uint32_t previous_position = input->position; - input->position = input->length; - *bytes_read = input->position - previous_position; - return input->string + previous_position; -} - -static int ts_string_input__seek(void *payload, uint32_t byte, TSPoint _) { - TSStringInput *input = (TSStringInput *)payload; - input->position = byte; - return (byte < input->length); } void ts_string_input_init(TSStringInput *self, const char *string, uint32_t length) { self->string = string; - self->position = 0; self->length = length; - self->input = (TSInput) { +} + +TSInput ts_string_input_get(TSStringInput *self) { + return (TSInput) { .payload = self, .read = ts_string_input__read, - .seek = ts_string_input__seek, .encoding = TSInputEncodingUTF8, }; } diff --git a/src/runtime/string_input.h b/src/runtime/string_input.h index 19171e4f..b2e7536f 100644 --- a/src/runtime/string_input.h +++ b/src/runtime/string_input.h @@ -9,12 +9,11 @@ extern "C" { typedef struct { const char *string; - uint32_t position; uint32_t length; - TSInput input; } TSStringInput; void ts_string_input_init(TSStringInput *, const char *, uint32_t); +TSInput ts_string_input_get(TSStringInput *); #ifdef __cplusplus } diff --git a/test/helpers/spy_input.cc b/test/helpers/spy_input.cc index 6126f101..1b89e270 100644 --- a/test/helpers/spy_input.cc +++ b/test/helpers/spy_input.cc @@ -11,7 +11,6 @@ using std::vector; SpyInput::SpyInput(string content, size_t chars_per_chunk) : buffer(nullptr), - byte_offset(0), chars_per_chunk(chars_per_chunk), content(content), encoding(TSInputEncodingUTF8), @@ -42,22 +41,23 @@ static void add_byte_range(vector> *ranges, ranges->push_back({start, end}); } -const char * SpyInput::read(void *payload, uint32_t *bytes_read) { +const char *SpyInput::read(void *payload, uint32_t byte_offset, + TSPoint position, uint32_t *bytes_read) { auto spy = static_cast(payload); - if (spy->byte_offset > spy->content.size()) { + if (byte_offset >= spy->content.size()) { *bytes_read = 0; return ""; } - long byte_count = string_byte_for_character(spy->encoding, spy->content, spy->byte_offset, spy->chars_per_chunk); - if (byte_count < 0) - byte_count = spy->content.size() - spy->byte_offset; + long byte_count = string_byte_for_character(spy->encoding, spy->content, byte_offset, spy->chars_per_chunk); + if (byte_count < 0) { + byte_count = spy->content.size() - byte_offset; + } - string result = spy->content.substr(spy->byte_offset, byte_count); + string result = spy->content.substr(byte_offset, byte_count); *bytes_read = byte_count; - add_byte_range(&spy->ranges_read, spy->byte_offset, byte_count); - spy->byte_offset += byte_count; + add_byte_range(&spy->ranges_read, byte_offset, byte_count); /* * This class stores its entire `content` in a contiguous buffer, but we want @@ -79,12 +79,6 @@ const char * SpyInput::read(void *payload, uint32_t *bytes_read) { return spy->buffer; } -int SpyInput::seek(void *payload, uint32_t byte, TSPoint position) { - auto spy = static_cast(payload); - spy->byte_offset = byte; - return 0; -} - vector SpyInput::strings_read() const { vector result; for (auto &range : ranges_read) { @@ -97,7 +91,6 @@ TSInput SpyInput::input() { TSInput result; result.payload = this; result.encoding = encoding; - result.seek = seek; result.read = read; return result; } diff --git a/test/helpers/spy_input.h b/test/helpers/spy_input.h index e066ab1c..31cd1d8f 100644 --- a/test/helpers/spy_input.h +++ b/test/helpers/spy_input.h @@ -13,11 +13,9 @@ struct SpyInputEdit { class SpyInput { char *buffer; - uint32_t byte_offset; std::vector undo_stack; - static const char * read(void *, uint32_t *); - static int seek(void *, uint32_t, TSPoint); + static const char *read(void *, uint32_t, TSPoint, uint32_t *); std::pair swap_substr(size_t, size_t, std::string); public: diff --git a/test/runtime/parser_test.cc b/test/runtime/parser_test.cc index 7c9e786f..37942971 100644 --- a/test/runtime/parser_test.cc +++ b/test/runtime/parser_test.cc @@ -630,15 +630,12 @@ describe("Parser", [&]() { size_t read_count = 0; TSInput infinite_input = { &read_count, - [](void *payload, uint32_t *bytes_read) { + [](void *payload, uint32_t byte, TSPoint position, uint32_t *bytes_read) { size_t *read_count = static_cast(payload); assert((*read_count)++ < 100000); *bytes_read = 1; return "["; }, - [](void *payload, unsigned byte, TSPoint position) -> int { - return true; - }, TSInputEncodingUTF8 }; @@ -681,15 +678,12 @@ describe("Parser", [&]() { // it has been read. TSInput infinite_input = { &state, - [](void *payload, uint32_t *bytes_read) { + [](void *payload, uint32_t byte, TSPoint position, uint32_t *bytes_read) { InputState *state = static_cast(payload); assert(state->read_count++ <= 10); *bytes_read = strlen(state->string); return state->string; }, - [](void *payload, unsigned byte, TSPoint position) -> int { - return true; - }, TSInputEncodingUTF8 };