diff --git a/examples/parsers/arithmetic.c b/examples/parsers/arithmetic.c index 4c20090a..88b430c9 100644 --- a/examples/parsers/arithmetic.c +++ b/examples/parsers/arithmetic.c @@ -24,7 +24,7 @@ SYMBOL_NAMES { "token2", }; -static const ts_symbol * ts_recover(ts_state state, ts_state *to_state, size_t *count) { +RECOVER_FN() { switch (state) { case 6: RECOVER(7, 1, EXPECT({ts_aux_sym_token2})); diff --git a/examples/parsers/json.c b/examples/parsers/json.c index fc204324..23e2e38b 100644 --- a/examples/parsers/json.c +++ b/examples/parsers/json.c @@ -32,7 +32,7 @@ SYMBOL_NAMES { "repeat_helper2", }; -static const ts_symbol * ts_recover(ts_state state, ts_state *to_state, size_t *count) { +RECOVER_FN() { switch (state) { case 3: RECOVER(52, 2, EXPECT({ts_sym_comma, ts_sym_right_brace})); diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index 3257e007..b979e2ee 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -5,11 +5,17 @@ extern "C" { #endif -#include -#include #include +#include #include "tree_sitter/runtime.h" +/* + * Parsing DSL Macros + * + * Generated parser use these macros. They prevent the code generator + * from having too much knowledge of the runtime types and functions. + */ + //#define TS_DEBUG_PARSE //#define TS_DEBUG_LEX @@ -25,289 +31,114 @@ extern "C" { #define DEBUG_PARSE(...) #endif -static int INITIAL_STACK_SIZE = 100; -static const char *ts_symbol_names[]; - -typedef int ts_state; -static const ts_state ts_lex_state_error = -1; - -typedef struct { - ts_state state; - ts_tree *node; -} ts_stack_entry; - -typedef struct { - ts_input input; - const char *chunk; - size_t chunk_start; - size_t chunk_size; - size_t position_in_chunk; - - size_t token_end_position; - size_t token_start_position; - - ts_tree *lookahead_node; - ts_tree *prev_lookahead_node; - ts_state lex_state; - ts_stack_entry *stack; - size_t stack_size; -} ts_parser; - -static void ts_lex(ts_parser *parser); -static const ts_symbol * ts_recover(ts_state state, ts_state *to_state, size_t *count); -static void ts_parser_advance(ts_parser *); - -static ts_parser ts_parser_make(ts_input input) { - ts_parser result = { - .input = input, - .chunk = NULL, - .chunk_start = 0, - .chunk_size = 0, - .position_in_chunk = 0, - - .token_start_position = 0, - .token_end_position = 0, - - .lookahead_node = NULL, - .prev_lookahead_node = NULL, - .lex_state = 0, - .stack = calloc(INITIAL_STACK_SIZE, sizeof(ts_stack_entry)), - .stack_size = 0, - }; - - ts_parser_advance(&result); - return result; -} - -static size_t ts_parser_position(const ts_parser *parser) { - return parser->chunk_start + parser->position_in_chunk; -} - -static char ts_parser_lookahead_char(const ts_parser *parser) { - return parser->chunk[parser->position_in_chunk]; -} - -static ts_symbol ts_parser_lookahead_sym(const ts_parser *parser) { - ts_tree *node = parser->lookahead_node; - return node ? node->symbol : ts_builtin_sym_error; -} - -static ts_state ts_parser_parse_state(const ts_parser *parser) { - if (parser->stack_size == 0) return 0; - return parser->stack[parser->stack_size - 1].state; -} - -static void ts_parser_push(ts_parser *parser, ts_state state, ts_tree *node) { - ts_stack_entry *entry = (parser->stack + parser->stack_size); - entry->state = state; - entry->node = node; - parser->stack_size++; -} - -static void ts_parser_shift(ts_parser *parser, ts_state parse_state) { - DEBUG_PARSE("shift: %d \n", parse_state); - ts_parser_push(parser, parse_state, parser->lookahead_node); - parser->lookahead_node = parser->prev_lookahead_node; - parser->prev_lookahead_node = NULL; -} - -static void ts_parser_shrink_stack(ts_parser *parser, size_t new_size) { - for (size_t i = new_size; i < parser->stack_size; i++) - ts_tree_release(parser->stack[i].node); - parser->stack_size = new_size; -} - -static void ts_parser_reduce(ts_parser *parser, ts_symbol symbol, int immediate_child_count, const int *collapse_flags) { - size_t new_stack_size = parser->stack_size - immediate_child_count; - - int child_count = 0; - for (int i = 0; i < immediate_child_count; i++) { - ts_tree *child = parser->stack[new_stack_size + i].node; - child_count += collapse_flags[i] ? ts_tree_child_count(child) : 1; - } - - int child_index = 0; - size_t size = 0, offset = 0; - ts_tree **children = malloc(child_count * sizeof(ts_tree *)); - for (int i = 0; i < immediate_child_count; i++) { - ts_tree *child = parser->stack[new_stack_size + i].node; - if (i == 0) { - offset = child->offset; - size = child->size; - } else { - size += child->offset + child->size; - } - - if (collapse_flags[i]) { - size_t grandchild_count = ts_tree_child_count(child); - memcpy(children + child_index, ts_tree_children(child), (grandchild_count * sizeof(ts_tree *))); - child_index += grandchild_count; - } else { - memcpy(children + child_index, &child, sizeof(ts_tree *)); - child_index++; - } - } - - parser->prev_lookahead_node = parser->lookahead_node; - parser->lookahead_node = ts_tree_make_node(symbol, child_count, children, size, offset); - ts_parser_shrink_stack(parser, new_stack_size); - DEBUG_PARSE("reduce: %s, state: %u \n", ts_symbol_names[symbol], ts_parser_parse_state(parser)); -} - -static const char empty_chunk[1] = { '\0' }; - -static void ts_parser_advance(ts_parser *parser) { - if (parser->position_in_chunk + 1 < parser->chunk_size) { - parser->position_in_chunk++; - } else { - parser->chunk_start += parser->chunk_size; - parser->chunk = parser->input.read_fn(parser->input.data, &parser->chunk_size); - if (parser->chunk_size == 0) { - parser->chunk = empty_chunk; - parser->chunk_size = 1; - } - parser->position_in_chunk = 0; - } -} - -static void ts_parser_advance_to_state(ts_parser *parser, ts_state lex_state) { - DEBUG_LEX("character: '%c' \n", ts_parser_lookahead_char(parser)); - ts_parser_advance(parser); - parser->lex_state = lex_state; -} - -static void ts_parser_set_lookahead_sym(ts_parser *parser, ts_symbol symbol) { - DEBUG_LEX("token: %s \n", ts_symbol_names[symbol]); - size_t position = ts_parser_position(parser); - size_t size = position - parser->token_start_position; - size_t offset = parser->token_start_position - parser->token_end_position; - parser->lookahead_node = ts_tree_make_leaf(symbol, size, offset); - parser->token_end_position = position; -} - -static ts_tree * ts_parser_tree(ts_parser *parser) { - DEBUG_PARSE("accept \n"); - return parser->stack[0].node; -} - -static void ts_parser_skip_whitespace(ts_parser *parser) { - while (isspace(ts_parser_lookahead_char(parser))) - ts_parser_advance(parser); - parser->token_start_position = ts_parser_position(parser); -} - -static int ts_parser_handle_error(ts_parser *parser, size_t count, const ts_symbol *expected_symbols) { - ts_tree *error = ts_tree_make_error(ts_parser_lookahead_char(parser), count, expected_symbols, 0, 0); - - while (1) { - ts_tree_release(parser->lookahead_node); - parser->lookahead_node = NULL; - parser->lex_state = ts_lex_state_error; - ts_lex(parser); - - for (long i = parser->stack_size - 1; i >= 0; i--) { - size_t count; - ts_state to_state; - const ts_symbol *symbols = ts_recover(parser->stack[i].state, &to_state, &count); - for (size_t j = 0; j < count; j++) { - if (symbols[j] == ts_parser_lookahead_sym(parser)) { - ts_parser_shrink_stack(parser, i + 1); - ts_parser_push(parser, to_state, error); - return 1; - } - } - } - - if (!ts_parser_lookahead_char(parser)) { - parser->stack[0].node = error; - return 0; - } - } -} - -#pragma mark - DSL +#define PARSE_FN() \ +static const ts_tree * \ +ts_parse(void *data, ts_input input) #define LEX_FN() \ -static void ts_lex(ts_parser *parser) +static ts_tree * \ +ts_lex(ts_lexer *lexer, state_id lex_state) -#define PARSE_FN() \ -static const ts_tree * ts_parse(ts_input input) +#define RECOVER_FN() \ +static const ts_symbol * \ +ts_recover(state_id state, state_id *to_state, size_t *count) #define SYMBOL_NAMES \ static const char *ts_symbol_names[] = -#define EXPORT_PARSER(name) \ -ts_parse_config name = { \ - .parse_fn = ts_parse, \ - .symbol_names = ts_symbol_names \ -}; +#define EXPORT_PARSER(constructor_name) \ +ts_parser constructor_name() { \ + ts_parser result = { \ + .parse_fn = ts_parse, \ + .symbol_names = ts_symbol_names, \ + .data = ts_lr_parser_make(), \ + .free_fn = NULL \ + }; \ + return result; \ +} #define START_PARSER() \ -ts_parser p = ts_parser_make(input), *parser = &p; \ -next_state: - -#define START_LEXER() \ -ts_parser_skip_whitespace(parser); \ -if (!ts_parser_lookahead_char(parser)) { \ - parser->lookahead_node = ts_tree_make_leaf(ts_builtin_sym_end, 0, 0); \ - return; \ -} \ +ts_lr_parser *parser = (ts_lr_parser *)data; \ +ts_lr_parser_reset(parser); \ +parser->lexer.input = input; \ +ts_lexer_advance(&parser->lexer); \ next_state: #define LOOKAHEAD_SYM() \ -ts_parser_lookahead_sym(parser) - -#define LOOKAHEAD_CHAR() \ -ts_parser_lookahead_char(parser) +ts_lr_parser_lookahead_sym(parser) #define PARSE_STATE() \ -ts_parser_parse_state(parser) - -#define LEX_STATE() \ -parser->lex_state +ts_stack_top_state(&parser->stack) #define SET_LEX_STATE(state_index) \ -{ \ - parser->lex_state = state_index; \ - if (!parser->lookahead_node) ts_lex(parser); \ -} +{ if (!parser->lookahead) parser->lookahead = ts_lex(&parser->lexer, state_index); } #define SHIFT(state) \ -{ ts_parser_shift(parser, state); goto next_state; } - -#define ADVANCE(state_index) \ -{ ts_parser_advance_to_state(parser, state_index); goto next_state; } +{ \ + DEBUG_PARSE("shift: %d \n", state); \ + ts_lr_parser_shift(parser, state); \ + goto next_state; \ +} #define REDUCE(symbol, child_count, collapse_flags) \ { \ static const int flags[] = collapse_flags; \ - ts_parser_reduce(parser, symbol, child_count, flags); \ + ts_lr_parser_reduce(parser, symbol, child_count, flags); \ + DEBUG_PARSE("reduce: %s, state: %u \n", ts_symbol_names[symbol], ts_stack_state(stack)); \ goto next_state; \ } #define ACCEPT_INPUT() \ -{ goto done; } - -#define ACCEPT_TOKEN(symbol) \ -{ ts_parser_set_lookahead_sym(parser, symbol); return; } - -#define LEX_ERROR() \ -{ ts_parser_set_lookahead_sym(parser, ts_builtin_sym_error); return; } +goto done; #define PARSE_ERROR(count, inputs) \ { \ static const ts_symbol expected_inputs[] = inputs; \ - if (ts_parser_handle_error(parser, count, expected_inputs)) \ + if (ts_lr_parser_handle_error(parser, count, expected_inputs)) \ goto next_state; \ else \ goto done; \ } +#define FINISH_PARSER() \ +done: \ +DEBUG_PARSE("accept \n"); \ +return ts_stack_root(&parser->stack); + +#define START_LEXER() \ +ts_lexer_skip_whitespace(lexer); \ +if (!ts_lexer_lookahead_char(lexer)) { \ + return ts_tree_make_leaf(ts_builtin_sym_end, 0, 0); \ +} \ +next_state: + +#define LEX_STATE() \ +lex_state + +#define LOOKAHEAD_CHAR() \ +ts_lexer_lookahead_char(lexer) + +#define ADVANCE(state_index) \ +{ \ + ts_lexer_advance(lexer); \ + lex_state = state_index; \ + goto next_state; \ +} + +#define ACCEPT_TOKEN(symbol) \ +{ \ + DEBUG_LEX("token: %s \n", ts_symbol_names[symbol]); \ + return ts_lexer_build_node(lexer, symbol); \ +} + +#define LEX_ERROR() \ +return ts_lexer_build_node(lexer, ts_builtin_sym_error); + #define LEX_PANIC() \ -printf("Lex error: unexpected state %d", LEX_STATE()); +{ DEBUG_LEX("Lex error: unexpected state %d", LEX_STATE()); return NULL; } #define PARSE_PANIC() \ -printf("Parse error: unexpected state %d", PARSE_STATE()); +{ DEBUG_PARSE("Parse error: unexpected state %d", PARSE_STATE()); } #define RECOVER(new_state, symbol_count, values) \ { \ @@ -320,9 +151,169 @@ printf("Parse error: unexpected state %d", PARSE_STATE()); #define EXPECT(...) __VA_ARGS__ #define COLLAPSE(...) __VA_ARGS__ -#define FINISH_PARSER() \ -done: \ -return ts_parser_tree(parser); + +/* + * Stack + */ +typedef int state_id; +typedef struct { + size_t size; + struct { + ts_tree *node; + state_id state; + } *entries; +} ts_stack; + +ts_stack ts_stack_make(); +ts_tree * ts_stack_root(ts_stack *stack); +ts_tree * ts_stack_reduce(ts_stack *stack, ts_symbol symbol, int immediate_child_count, const int *collapse_flags); +void ts_stack_shrink(ts_stack *stack, size_t new_size); +void ts_stack_push(ts_stack *stack, state_id state, ts_tree *node); +state_id ts_stack_top_state(const ts_stack *stack); + + +/* + * Lexer + */ +typedef struct { + ts_input input; + const char *chunk; + size_t chunk_start; + size_t chunk_size; + size_t position_in_chunk; + size_t token_end_position; + size_t token_start_position; +} ts_lexer; + +static ts_lexer ts_lexer_make() { + ts_lexer result = { + .chunk = NULL, + .chunk_start = 0, + .chunk_size = 0, + .position_in_chunk = 0, + .token_start_position = 0, + .token_end_position = 0, + }; + return result; +} + +static size_t ts_lexer_position(const ts_lexer *lexer) { + return lexer->chunk_start + lexer->position_in_chunk; +} + +static char ts_lexer_lookahead_char(const ts_lexer *lexer) { + return lexer->chunk[lexer->position_in_chunk]; +} + +static const char empty_chunk[1] = ""; + +static void ts_lexer_advance(ts_lexer *lexer) { + if (lexer->position_in_chunk + 1 < lexer->chunk_size) { + lexer->position_in_chunk++; + } else { + lexer->chunk_start += lexer->chunk_size; + lexer->chunk = lexer->input.read_fn(lexer->input.data, &lexer->chunk_size); + if (lexer->chunk_size == 0) { + lexer->chunk = empty_chunk; + lexer->chunk_size = 1; + } + lexer->position_in_chunk = 0; + } +} + +static ts_tree * ts_lexer_build_node(ts_lexer *lexer, ts_symbol symbol) { + size_t current_position = ts_lexer_position(lexer); + size_t size = current_position - lexer->token_start_position; + size_t offset = lexer->token_start_position - lexer->token_end_position; + lexer->token_end_position = current_position; + return ts_tree_make_leaf(symbol, size, offset); +} + +static void ts_lexer_skip_whitespace(ts_lexer *lexer) { + while (isspace(ts_lexer_lookahead_char(lexer))) + ts_lexer_advance(lexer); + lexer->token_start_position = ts_lexer_position(lexer); +} + +static const state_id ts_lex_state_error = -1; + + +/* + * Forward declarations + * The file including this header should define these functions + */ +PARSE_FN(); +LEX_FN(); +RECOVER_FN(); + + +/* + * Parser + */ +typedef struct { + ts_lexer lexer; + ts_stack stack; + ts_tree *lookahead; + ts_tree *previous_lookahead; +} ts_lr_parser; + +static ts_lr_parser * ts_lr_parser_make() { + ts_lr_parser *result = malloc(sizeof(ts_lr_parser)); + result->lexer = ts_lexer_make(); + result->stack = ts_stack_make(); + return result; +} + +static void ts_lr_parser_reset(ts_lr_parser *parser) { + ts_stack_shrink(&parser->stack, 0); + parser->lookahead = NULL; + parser->previous_lookahead = NULL; + parser->lexer = ts_lexer_make(); +} + +static ts_symbol ts_lr_parser_lookahead_sym(const ts_lr_parser *parser) { + ts_tree *node = parser->lookahead; + return node ? node->symbol : ts_builtin_sym_error; +} + +static void ts_lr_parser_shift(ts_lr_parser *parser, state_id parse_state) { + ts_stack_push(&parser->stack, parse_state, parser->lookahead); + parser->lookahead = parser->previous_lookahead; + parser->previous_lookahead = NULL; +} + +static void ts_lr_parser_reduce(ts_lr_parser *parser, ts_symbol symbol, int immediate_child_count, const int *collapse_flags) { + ts_tree *lookahead = ts_stack_reduce(&parser->stack, symbol, immediate_child_count, collapse_flags); + parser->previous_lookahead = parser->lookahead; + parser->lookahead = lookahead; +} + +static int ts_lr_parser_handle_error(ts_lr_parser *parser, size_t count, const ts_symbol *expected_symbols) { + ts_tree *error = ts_tree_make_error(ts_lexer_lookahead_char(&parser->lexer), count, expected_symbols, 0, 0); + + for (;;) { + ts_tree_release(parser->lookahead); + parser->lookahead = ts_lex(&parser->lexer, ts_lex_state_error); + + for (long i = parser->stack.size - 1; i >= 0; i--) { + size_t count; + state_id to_state; + const ts_symbol *symbols = ts_recover(parser->stack.entries[i].state, &to_state, &count); + for (size_t j = 0; j < count; j++) { + if (symbols[j] == ts_lr_parser_lookahead_sym(parser)) { + ts_stack_shrink(&parser->stack, i + 1); + ts_stack_push(&parser->stack, to_state, error); + return 1; + } + } + } + + if (!ts_lexer_lookahead_char(&parser->lexer)) { + parser->stack.entries[0].node = error; + return 0; + } + } +} #ifdef __cplusplus } diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index 6d883ecf..c81dc9a0 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -47,16 +47,21 @@ typedef struct { void (* release_fn)(void *data); } ts_input; -typedef struct { - const ts_tree * (* parse_fn)(ts_input); - const char **symbol_names; -} ts_parse_config; + typedef struct { + const ts_tree * (* parse_fn)(void *data, ts_input input); + void (* free_fn)(void *data); + const char **symbol_names; + void *data; + } ts_parser; + + const ts_tree * ts_parser_parse(ts_parser *, ts_input); + void ts_parser_free(ts_parser *); typedef struct ts_document ts_document; ts_document * ts_document_make(); void ts_document_free(ts_document *doc); -void ts_document_set_parser(ts_document *doc, ts_parse_config parser); +void ts_document_set_parser(ts_document *doc, ts_parser parser); void ts_document_set_input(ts_document *doc, ts_input input); void ts_document_set_input_string(ts_document *doc, const char *text); void ts_document_edit(ts_document *doc, size_t position, size_t deleted_bytes, size_t inserted_bytes); diff --git a/spec/runtime/arithmetic_spec.cc b/spec/runtime/arithmetic_spec.cc index 240d18b7..391c1a77 100644 --- a/spec/runtime/arithmetic_spec.cc +++ b/spec/runtime/arithmetic_spec.cc @@ -1,6 +1,6 @@ #include "runtime_spec_helper.h" -extern ts_parse_config ts_parse_config_arithmetic; +extern "C" ts_parser ts_parse_config_arithmetic(); START_TEST @@ -9,7 +9,8 @@ describe("arithmetic", []() { before_each([&]() { doc = ts_document_make(); - ts_document_set_parser(doc, ts_parse_config_arithmetic); + ts_parser parser = ts_parse_config_arithmetic(); + ts_document_set_parser(doc, parser); }); after_each([&]() { diff --git a/spec/runtime/json_spec.cc b/spec/runtime/json_spec.cc index 0f5bfe1d..92a5a750 100644 --- a/spec/runtime/json_spec.cc +++ b/spec/runtime/json_spec.cc @@ -1,6 +1,6 @@ #include "runtime_spec_helper.h" -extern ts_parse_config ts_parse_config_json; +extern "C" ts_parser ts_parse_config_json(); START_TEST @@ -9,7 +9,7 @@ describe("json", []() { before_each([&]() { doc = ts_document_make(); - ts_document_set_parser(doc, ts_parse_config_json); + ts_document_set_parser(doc, ts_parse_config_json()); }); after_each([&]() { diff --git a/spec/runtime/parser_spec.cc b/spec/runtime/parser_spec.cc index 895a7a86..d6dae4c7 100644 --- a/spec/runtime/parser_spec.cc +++ b/spec/runtime/parser_spec.cc @@ -1,8 +1,8 @@ #include "runtime_spec_helper.h" #include "helpers/spy_reader.h" -extern ts_parse_config ts_parse_config_json; - +extern "C" ts_parser ts_parse_config_json(); + START_TEST describe("parsing", [&]() { @@ -11,7 +11,7 @@ describe("parsing", [&]() { before_each([&]() { doc = ts_document_make(); - ts_document_set_parser(doc, ts_parse_config_json); + ts_document_set_parser(doc, ts_parse_config_json()); reader = new SpyReader("{ \"key\": [1, 2] }", 5); ts_document_set_input(doc, reader->input); @@ -31,7 +31,7 @@ describe("parsing", [&]() { "(value (number)) " "(value (number))))))")); }); - + it("reads the entire input", [&]() { AssertThat(reader->chunks_read, Equals(vector({ "{ \"ke", @@ -63,7 +63,7 @@ describe("parsing", [&]() { "(value (number))))" )); }); - + it_skip("re-reads only the changed portion of the input", [&]() { AssertThat(reader->chunks_read, Equals(vector({ "" diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index 9230629a..62b70b17 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -253,11 +253,9 @@ namespace tree_sitter { } cases += _default(recover_case(0, set())); - string body = _switch("state", cases); return join({ - "static const ts_symbol * " - "ts_recover(ts_state state, ts_state *to_state, size_t *count) {", - indent(body), + "RECOVER_FN() {", + indent(_switch("state", cases)), "}" }); } diff --git a/src/runtime/document.c b/src/runtime/document.c index 8a65f495..46fbcea4 100644 --- a/src/runtime/document.c +++ b/src/runtime/document.c @@ -2,7 +2,7 @@ #include struct ts_document { - ts_parse_config parse_config; + ts_parser parser; const ts_tree *tree; ts_input input; size_t error_count; @@ -16,8 +16,8 @@ void ts_document_free(ts_document *document) { free(document); } -void ts_document_set_parser(ts_document *document, ts_parse_config config) { - document->parse_config = config; +void ts_document_set_parser(ts_document *document, ts_parser parser) { + document->parser = parser; } const ts_tree * ts_document_tree(const ts_document *document) { @@ -25,17 +25,17 @@ const ts_tree * ts_document_tree(const ts_document *document) { } const char * ts_document_string(const ts_document *document) { - return ts_tree_string(document->tree, document->parse_config.symbol_names); + return ts_tree_string(document->tree, document->parser.symbol_names); } void ts_document_set_input(ts_document *document, ts_input input) { document->input = input; - document->tree = document->parse_config.parse_fn(input); + document->tree = ts_parser_parse(&document->parser, input); } void ts_document_edit(ts_document *document, size_t position, size_t bytes_removed, size_t bytes_inserted) { document->input.seek_fn(document->input.data, 0); - document->tree = document->parse_config.parse_fn(document->input); + document->tree = ts_parser_parse(&document->parser, document->input); } typedef struct { diff --git a/src/runtime/parser.c b/src/runtime/parser.c new file mode 100644 index 00000000..c7f3a86a --- /dev/null +++ b/src/runtime/parser.c @@ -0,0 +1,10 @@ +#include "tree_sitter/runtime.h" + +const ts_tree * ts_parser_parse(ts_parser *parser, ts_input input) { + return parser->parse_fn(parser->data, input); +} + +void ts_parser_free(ts_parser *parser) { + if (parser->free_fn != NULL) + parser->free_fn(parser->data); +} \ No newline at end of file diff --git a/src/runtime/stack.c b/src/runtime/stack.c new file mode 100644 index 00000000..75a2ff1a --- /dev/null +++ b/src/runtime/stack.c @@ -0,0 +1,80 @@ +#include "tree_sitter/runtime.h" +#include + +typedef int state_id; +static const state_id ts_lex_state_error = -1; + +typedef struct { + size_t size; + struct { + ts_tree *node; + state_id state; + } *entries; +} ts_stack; + +static int INITIAL_STACK_SIZE = 100; + +ts_stack ts_stack_make() { + ts_stack result = { + .entries = calloc(INITIAL_STACK_SIZE, sizeof(*result.entries)), + .size = 0, + }; + return result; +} + +state_id ts_stack_top_state(const ts_stack *stack) { + if (stack->size == 0) return 0; + return stack->entries[stack->size - 1].state; +} + +ts_tree * ts_stack_root(ts_stack *stack) { + return stack->entries[0].node; +} + +void ts_stack_push(ts_stack *stack, state_id state, ts_tree *node) { + stack->entries[stack->size].state = state; + stack->entries[stack->size].node = node; + stack->size++; +} + +void ts_stack_shrink(ts_stack *stack, size_t new_size) { + for (size_t i = new_size; i < stack->size; i++) + ts_tree_release(stack->entries[i].node); + stack->size = new_size; +} + +ts_tree * ts_stack_reduce(ts_stack *stack, ts_symbol symbol, int immediate_child_count, const int *collapse_flags) { + size_t new_stack_size = stack->size - immediate_child_count; + + int child_count = 0; + for (int i = 0; i < immediate_child_count; i++) { + ts_tree *child = stack->entries[new_stack_size + i].node; + child_count += collapse_flags[i] ? ts_tree_child_count(child) : 1; + } + + int child_index = 0; + size_t size = 0, offset = 0; + ts_tree **children = malloc(child_count * sizeof(ts_tree *)); + for (int i = 0; i < immediate_child_count; i++) { + ts_tree *child = stack->entries[new_stack_size + i].node; + if (i == 0) { + offset = child->offset; + size = child->size; + } else { + size += child->offset + child->size; + } + + if (collapse_flags[i]) { + size_t grandchild_count = ts_tree_child_count(child); + memcpy(children + child_index, ts_tree_children(child), (grandchild_count * sizeof(ts_tree *))); + child_index += grandchild_count; + } else { + memcpy(children + child_index, &child, sizeof(ts_tree *)); + child_index++; + } + } + + ts_tree *lookahead = ts_tree_make_node(symbol, child_count, children, size, offset); + ts_stack_shrink(stack, new_stack_size); + return lookahead; +}