diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index a0c1be12..a9d211ec 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -38,11 +38,15 @@ typedef struct { typedef struct { ts_input input; - const char *current_chunk; - size_t current_chunk_end; - size_t position; + + const char *chunk; + size_t chunk_start; + size_t chunk_size; + size_t position_in_chunk; + size_t token_end_position; size_t token_start_position; + ts_tree *lookahead_node; ts_tree *prev_lookahead_node; ts_state lex_state; @@ -60,9 +64,12 @@ static ts_parser ts_parser_make(ts_input input) { .input = input, .token_start_position = 0, .token_end_position = 0, - .position = 0, - .current_chunk = chunk, - .current_chunk_end = bytes_read, + + .chunk = chunk, + .chunk_size = bytes_read, + .chunk_start = 0, + .position_in_chunk = 0, + .lookahead_node = NULL, .prev_lookahead_node = NULL, .lex_state = 0, @@ -71,9 +78,13 @@ static ts_parser ts_parser_make(ts_input input) { }; return result; } + +static size_t ts_parser_position(const ts_parser *parser) { + return parser->chunk_start + parser->position_in_chunk; +} static char ts_parser_lookahead_char(const ts_parser *parser) { - return parser->current_chunk[parser->position]; + return parser->chunk[parser->position_in_chunk]; } static ts_symbol ts_parser_lookahead_sym(const ts_parser *parser) { @@ -143,13 +154,19 @@ static void ts_parser_reduce(ts_parser *parser, ts_symbol symbol, int immediate_ DEBUG_PARSE("reduce: %s, state: %u \n", ts_symbol_names[symbol], ts_parser_parse_state(parser)); } +static const char empty_chunk[1] = { '\0' }; + static void ts_parser_advance(ts_parser *parser) { - if (parser->position < parser->current_chunk_end) { - parser->position++; + if (parser->position_in_chunk < parser->chunk_size - 1) { + parser->position_in_chunk++; } else { - size_t bytes_read = 0; - parser->current_chunk = parser->input.read_fn(parser->input.data, &bytes_read); - parser->current_chunk_end += bytes_read; + parser->chunk_start += parser->chunk_size; + parser->chunk = parser->input.read_fn(parser->input.data, &parser->chunk_size); + if (parser->chunk_size == 0) { + parser->chunk = empty_chunk; + parser->chunk_size = 1; + } + parser->position_in_chunk = 0; } } @@ -161,10 +178,11 @@ static void ts_parser_advance_to_state(ts_parser *parser, ts_state lex_state) { static void ts_parser_set_lookahead_sym(ts_parser *parser, ts_symbol symbol) { DEBUG_LEX("token: %s \n", ts_symbol_names[symbol]); - size_t size = parser->position - parser->token_start_position; + size_t position = ts_parser_position(parser); + size_t size = position - parser->token_start_position; size_t offset = parser->token_start_position - parser->token_end_position; parser->lookahead_node = ts_tree_make_leaf(symbol, size, offset); - parser->token_end_position = parser->position; + parser->token_end_position = position; } static ts_tree * ts_parser_tree(ts_parser *parser) { @@ -175,7 +193,7 @@ static ts_tree * ts_parser_tree(ts_parser *parser) { static void ts_parser_skip_whitespace(ts_parser *parser) { while (isspace(ts_parser_lookahead_char(parser))) ts_parser_advance(parser); - parser->token_start_position = parser->position; + parser->token_start_position = ts_parser_position(parser); } static int ts_parser_handle_error(ts_parser *parser, size_t count, const ts_symbol *expected_symbols) { diff --git a/spec/runtime/helpers/spy_reader.cc b/spec/runtime/helpers/spy_reader.cc new file mode 100644 index 00000000..f376ecde --- /dev/null +++ b/spec/runtime/helpers/spy_reader.cc @@ -0,0 +1,37 @@ +#include "helpers/spy_reader.h" +#include + +using std::string; + +static const char * spy_read(void *data, size_t *bytes_read) { + SpyReader *reader = static_cast(data); + size_t size = std::min(reader->chunk_size, + reader->content.length() - reader->position); + const char *result = reader->content.data() + reader->position; + reader->chunks_read.push_back(string(result, size)); + reader->position += size; + *bytes_read = size; + return result; +} + +static int spy_seek(void *data, size_t position) { + SpyReader *reader = static_cast(data); + reader->position = position; + return 0; +} + +static void spy_release(void *data) { + SpyReader *reader = static_cast(data); + delete reader; +} + +SpyReader::SpyReader(string content, size_t chunk_size) : + content(content), + position(0), + chunk_size(chunk_size), + input({ + .read_fn = spy_read, + .seek_fn = spy_seek, + .release_fn = spy_release, + .data = this + }) {} diff --git a/spec/runtime/helpers/spy_reader.h b/spec/runtime/helpers/spy_reader.h new file mode 100644 index 00000000..457d5898 --- /dev/null +++ b/spec/runtime/helpers/spy_reader.h @@ -0,0 +1,19 @@ +#ifndef HELPERS_SPY_READER_H_ +#define HELPERS_SPY_READER_H_ + +#include +#include +#include "tree_sitter/runtime.h" + +class SpyReader { +public: + SpyReader(std::string content, size_t chunk_size); + + std::string content; + size_t position; + size_t chunk_size; + ts_input input; + std::vector chunks_read; +}; + +#endif // HELPERS_SPY_READER_H_ diff --git a/spec/runtime/parser_spec.cc b/spec/runtime/parser_spec.cc new file mode 100644 index 00000000..d247eb87 --- /dev/null +++ b/spec/runtime/parser_spec.cc @@ -0,0 +1,36 @@ +#include "runtime_spec_helper.h" +#include "helpers/spy_reader.h" + +extern ts_parse_config ts_parse_config_json; + +START_TEST + +describe("reading from an input", [&]() { + ts_document *doc; + + before_each([&]() { + doc = ts_document_make(); + ts_document_set_parser(doc, ts_parse_config_json); + }); + + after_each([&]() { + ts_document_free(doc); + }); + + it("reads the entire input", [&]() { + SpyReader reader("\"ok go do it!\"", 3); + ts_document_set_input(doc, reader.input); + + AssertThat(string(ts_document_string(doc)), Equals("(value (string))")); + AssertThat(reader.chunks_read, Equals(vector({ + "\"ok", + " go", + " do", + " it", + "!\"", + "" + }))); + }); +}); + +END_TEST \ No newline at end of file diff --git a/src/compiler/prepared_grammar.h b/src/compiler/prepared_grammar.h index ee2804fa..f3a26b80 100644 --- a/src/compiler/prepared_grammar.h +++ b/src/compiler/prepared_grammar.h @@ -9,7 +9,7 @@ namespace tree_sitter { class PreparedGrammar : public Grammar { - public: + public: PreparedGrammar(std::string start_rule_name, const std::map &rules, const std::map &aux_rules); diff --git a/src/runtime/document.c b/src/runtime/document.c index b5b31bdd..26e33823 100644 --- a/src/runtime/document.c +++ b/src/runtime/document.c @@ -39,8 +39,14 @@ typedef struct { const char * ts_string_input_read(void *d, size_t *bytes_read) { ts_string_input_data *data = (ts_string_input_data *)d; - *bytes_read = data->length; - return data->string + data->position; + if (data->position >= data->length) { + *bytes_read = 0; + return ""; + } + size_t previous_position = data->position; + data->position = data->length; + *bytes_read = data->position - previous_position; + return data->string + previous_position; } int ts_string_input_seek(void *d, size_t position) {