diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index 4af24e7c..77171498 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -37,7 +37,8 @@ typedef struct { } ts_stack_entry; typedef struct { - const char *input; + ts_input input; + const char *current_chunk; size_t position; size_t token_end_position; size_t token_start_position; @@ -51,12 +52,13 @@ typedef struct { static void ts_lex(ts_parser *parser); static const ts_symbol * ts_recover(ts_state state, ts_state *to_state, size_t *count); -static ts_parser ts_parser_make(const char *input) { +static ts_parser ts_parser_make(ts_input input) { ts_parser result = { .input = input, .token_start_position = 0, .token_end_position = 0, .position = 0, + .current_chunk = input.read_fn(input.data), .lookahead_node = NULL, .prev_lookahead_node = NULL, .lex_state = 0, @@ -67,7 +69,7 @@ static ts_parser ts_parser_make(const char *input) { } static char ts_parser_lookahead_char(const ts_parser *parser) { - return parser->input[parser->position]; + return parser->current_chunk[parser->position]; } static ts_symbol ts_parser_lookahead_sym(const ts_parser *parser) { @@ -136,14 +138,22 @@ static void ts_parser_reduce(ts_parser *parser, ts_symbol symbol, int immediate_ ts_parser_shrink_stack(parser, new_stack_size); DEBUG_PARSE("reduce: %s, state: %u \n", ts_symbol_names[symbol], ts_parser_parse_state(parser)); } - -static void ts_parser_advance(ts_parser *parser, ts_state lex_state) { - DEBUG_LEX("character: '%c' \n", ts_parser_lookahead_char(parser)); - if (ts_parser_lookahead_char(parser)) + +static void ts_parser_advance(ts_parser *parser) { + if (parser->current_chunk && parser->current_chunk[parser->position]) { parser->position++; - parser->lex_state = lex_state; + } else { + parser->current_chunk = parser->input.read_fn(parser->input.data); + parser->position = 0; + } } +static void ts_parser_advance_to_state(ts_parser *parser, ts_state lex_state) { + DEBUG_LEX("character: '%c' \n", ts_parser_lookahead_char(parser)); + ts_parser_advance(parser); + parser->lex_state = lex_state; +} + static void ts_parser_set_lookahead_sym(ts_parser *parser, ts_symbol symbol) { DEBUG_LEX("token: %s \n", ts_symbol_names[symbol]); size_t size = parser->position - parser->token_start_position; @@ -159,7 +169,7 @@ static ts_tree * ts_parser_tree(ts_parser *parser) { static void ts_parser_skip_whitespace(ts_parser *parser) { while (isspace(ts_parser_lookahead_char(parser))) - parser->position++; + ts_parser_advance(parser); parser->token_start_position = parser->position; } @@ -198,7 +208,7 @@ static int ts_parser_handle_error(ts_parser *parser, size_t count, const ts_symb static void ts_lex(ts_parser *parser) #define PARSE_FN() \ -static const ts_tree * ts_parse(const char *input) +static const ts_tree * ts_parse(ts_input input) #define SYMBOL_NAMES \ static const char *ts_symbol_names[] = @@ -243,7 +253,7 @@ parser->lex_state { ts_parser_shift(parser, state); goto next_state; } #define ADVANCE(state_index) \ -{ ts_parser_advance(parser, state_index); goto next_state; } +{ ts_parser_advance_to_state(parser, state_index); goto next_state; } #define REDUCE(symbol, child_count, collapse_flags) \ { \ diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index 79a560bf..604b98a4 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -40,10 +40,15 @@ char * ts_tree_error_string(const ts_tree *tree, const char **names); size_t ts_tree_child_count(const ts_tree *tree); ts_tree ** ts_tree_children(const ts_tree *tree); -typedef const ts_tree * ts_parse_fn(const char *); +typedef struct { + void *data; + const char * (* read_fn)(void *data); + int (* seek_fn)(void *data, size_t position); + void (* release_fn)(void *data); +} ts_input; typedef struct { - ts_parse_fn *parse_fn; + const ts_tree * (* parse_fn)(ts_input); const char **symbol_names; } ts_parse_config; @@ -51,10 +56,11 @@ typedef struct ts_document ts_document; ts_document * ts_document_make(); void ts_document_free(ts_document *); -void ts_document_set_parser(ts_document *document, ts_parse_config config); -void ts_document_set_input_string(ts_document *document, const char *text); -const ts_tree * ts_document_tree(const ts_document *document); -const char * ts_document_string(const ts_document *document); +void ts_document_set_parser(ts_document *, ts_parse_config); +void ts_document_set_input(ts_document *, ts_input input); +void ts_document_set_input_string(ts_document *, const char *text); +const ts_tree * ts_document_tree(const ts_document *); +const char * ts_document_string(const ts_document *); #ifdef __cplusplus } diff --git a/src/runtime/document.cpp b/src/runtime/document.cpp index baefd9c1..d22dcce6 100644 --- a/src/runtime/document.cpp +++ b/src/runtime/document.cpp @@ -1,11 +1,10 @@ #include "tree_sitter/runtime.h" +#include struct ts_document { - ts_parse_fn *parse_fn; - const char **symbol_names; + ts_parse_config parse_config; const ts_tree *tree; size_t error_count; - ts_tree **errors; }; ts_document * ts_document_make() { @@ -17,14 +16,7 @@ void ts_document_free(ts_document *document) { } void ts_document_set_parser(ts_document *document, ts_parse_config config) { - document->parse_fn = config.parse_fn; - document->symbol_names = config.symbol_names; -} - -void ts_document_set_input_string(ts_document *document, const char *text) { - const ts_tree * result = document->parse_fn(text); - document->tree = result; - document->errors = NULL; + document->parse_config = config; } const ts_tree * ts_document_tree(const ts_document *document) { @@ -32,9 +24,44 @@ const ts_tree * ts_document_tree(const ts_document *document) { } const char * ts_document_string(const ts_document *document) { - if (document->error_count > 0) { - return ts_tree_error_string(document->errors[0], document->symbol_names); - } else { - return ts_tree_string(document->tree, document->symbol_names); - } + return ts_tree_string(document->tree, document->parse_config.symbol_names); +} + +void ts_document_set_input(ts_document *document, ts_input input) { + document->tree = document->parse_config.parse_fn(input); +} + +typedef struct { + const char *string; + size_t position; + size_t length; +} ts_string_input_data; + +const char * ts_string_input_read(void *d) { + ts_string_input_data *data = (ts_string_input_data *)d; + return data->string + data->position; +} + +int ts_string_input_seek(void *d, size_t position) { + ts_string_input_data *data = (ts_string_input_data *)d; + data->position = position; + return (position < data->length); +} + +ts_input ts_string_input_make(const char *string) { + ts_string_input_data *data = new ts_string_input_data(); + data->string = string; + data->position = 0; + data->length = strlen(string); + ts_input input = { + .data = (void *)data, + .read_fn = ts_string_input_read, + .seek_fn = ts_string_input_seek, + .release_fn = free, + }; + return input; +} + +void ts_document_set_input_string(ts_document *document, const char *text) { + ts_document_set_input(document, ts_string_input_make(text)); }