From eecbcccee084b27d98c2552d7a4e2e9f155b2bda Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 30 Jul 2014 23:40:02 -0700 Subject: [PATCH] Remove generated parsers' dependency on the runtime library Generated parsers no longer export a parser constructor function. They now export an opaque Language object which can be set on Documents directly. This way, the logic for constructing parsers lives entirely in the runtime. The Languages are just structs which have no load-time dependency on the runtime --- examples/parsers/arithmetic.c | 2 +- examples/parsers/golang.c | 2 +- examples/parsers/javascript.c | 2 +- examples/parsers/json.c | 2 +- include/tree_sitter/parser.h | 94 +++++++------------ include/tree_sitter/runtime.h | 19 ++-- spec/runtime/document_spec.cc | 4 +- .../{dummy_parser.c => dummy_language.c} | 6 +- .../{dummy_parser.h => dummy_language.h} | 8 +- spec/runtime/languages/language_specs.cc | 24 ++--- spec/runtime/node_spec.cc | 4 +- spec/runtime/parser_spec.cc | 31 +++--- spec/runtime/stack_spec.cc | 2 +- src/compiler/generate_code/c_code.cc | 2 +- src/runtime/document.c | 26 +++-- src/runtime/lexer.c | 29 +++--- src/runtime/lexer.h | 16 ++++ src/runtime/parser.c | 52 +++++----- src/runtime/parser.h | 30 ++++++ src/runtime/stack.c | 3 +- src/runtime/stack.h | 34 +++++++ 21 files changed, 219 insertions(+), 173 deletions(-) rename spec/runtime/helpers/{dummy_parser.c => dummy_language.c} (86%) rename spec/runtime/helpers/{dummy_parser.h => dummy_language.h} (59%) create mode 100644 src/runtime/lexer.h create mode 100644 src/runtime/parser.h create mode 100644 src/runtime/stack.h diff --git a/examples/parsers/arithmetic.c b/examples/parsers/arithmetic.c index d80d3868..07880f0c 100644 --- a/examples/parsers/arithmetic.c +++ b/examples/parsers/arithmetic.c @@ -549,4 +549,4 @@ PARSE_TABLE = { #pragma GCC diagnostic pop -EXPORT_PARSER(ts_parser_arithmetic); +EXPORT_LANGUAGE(ts_language_arithmetic); diff --git a/examples/parsers/golang.c b/examples/parsers/golang.c index c7822b4d..15c9a5cc 100644 --- a/examples/parsers/golang.c +++ b/examples/parsers/golang.c @@ -6675,4 +6675,4 @@ PARSE_TABLE = { #pragma GCC diagnostic pop -EXPORT_PARSER(ts_parser_golang); +EXPORT_LANGUAGE(ts_language_golang); diff --git a/examples/parsers/javascript.c b/examples/parsers/javascript.c index ba001bcd..e45f9b8b 100644 --- a/examples/parsers/javascript.c +++ b/examples/parsers/javascript.c @@ -77412,4 +77412,4 @@ PARSE_TABLE = { #pragma GCC diagnostic pop -EXPORT_PARSER(ts_parser_javascript); +EXPORT_LANGUAGE(ts_language_javascript); diff --git a/examples/parsers/json.c b/examples/parsers/json.c index fb618bee..bfd1fcee 100644 --- a/examples/parsers/json.c +++ b/examples/parsers/json.c @@ -736,4 +736,4 @@ PARSE_TABLE = { #pragma GCC diagnostic pop -EXPORT_PARSER(ts_parser_json); +EXPORT_LANGUAGE(ts_language_json); diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index a9bac3fa..32daea2b 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -12,7 +12,7 @@ typedef struct TSTree TSTree; #define ts_lex_state_error 0 -typedef struct { +typedef struct TSLexer { TSInput input; int debug; const char *chunk; @@ -22,11 +22,10 @@ typedef struct { size_t token_end_position; size_t token_start_position; int reached_end; -} TSLexer; -TSLexer ts_lexer_make(); -int ts_lexer_advance(TSLexer *lexer); -TSTree *ts_lexer_build_node(TSLexer *lexer, TSSymbol symbol, int is_hidden); + TSTree * (* accept_fn)(struct TSLexer *, TSSymbol, int); + int (* advance_fn)(struct TSLexer *); +} TSLexer; static inline size_t ts_lexer_position(const TSLexer *lexer) { return lexer->chunk_start + lexer->position_in_chunk; @@ -40,28 +39,16 @@ static inline void ts_lexer_start_token(TSLexer *lexer) { lexer->token_start_position = ts_lexer_position(lexer); } +static inline int ts_lexer_advance(TSLexer *lexer) { + return lexer->advance_fn(lexer); +} + +static inline TSTree *ts_lexer_accept(TSLexer *lexer, TSSymbol symbol, int is_hidden) { + return lexer->accept_fn(lexer, symbol, is_hidden); +} + typedef unsigned short TSStateId; -typedef struct { - size_t size; - struct { - TSTree *node; - TSStateId state; - int is_extra; - } *entries; -} TSStack; - -TSStack ts_stack_make(); -void ts_stack_delete(TSStack *); -TSTree *ts_stack_reduce(TSStack *stack, TSSymbol symbol, - size_t immediate_child_count, - const int *hidden_symbol_flags, int gather_extras); -void ts_stack_shrink(TSStack *stack, size_t new_size); -void ts_stack_push(TSStack *stack, TSStateId state, TSTree *node); -TSStateId ts_stack_top_state(const TSStack *stack); -TSTree *ts_stack_top_node(const TSStack *stack); -size_t ts_stack_right_position(const TSStack *stack); - typedef enum { TSParseActionTypeError, TSParseActionTypeShift, @@ -82,32 +69,15 @@ typedef struct { } data; } TSParseAction; -typedef struct { +struct TSLanguage { size_t symbol_count; const char **symbol_names; const int *hidden_symbol_flags; const TSParseAction *parse_table; const TSStateId *lex_states; - TSTree *(*lex_fn)(TSParser *, TSStateId); -} TSParserConfig; - -struct TSParser { - TSLexer lexer; - TSStack stack; - int debug; - TSTree *lookahead; - TSTree *next_lookahead; - TSParserConfig config; + TSTree *(*lex_fn)(TSLexer *, TSStateId); }; -TSParser *ts_parser_make(TSParserConfig); -void ts_parser_free(TSParser *); -TSParserConfig ts_parser_config(TSParser *); -const TSTree *ts_parser_parse(TSParser *parser, TSInput input, - TSInputEdit *edit); -void ts_parser_start(TSParser *parser, TSInput input, TSInputEdit *edit); -TSTree *ts_parser_step(TSParser *parser); - #define SYMBOL_NAMES static const char *ts_symbol_names[] #define HIDDEN_SYMBOLS static const int ts_hidden_symbol_flags[SYMBOL_COUNT] @@ -117,10 +87,10 @@ TSTree *ts_parser_step(TSParser *parser); #define PARSE_TABLE \ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] -#define LEX_FN() static TSTree *ts_lex(TSParser *parser, TSStateId lex_state) +#define LEX_FN() static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) #define DEBUG_LEX(...) \ - if (parser->lexer.debug) { \ + if (lexer->debug) { \ fprintf(stderr, "\n" __VA_ARGS__); \ } @@ -128,15 +98,15 @@ TSTree *ts_parser_step(TSParser *parser); DEBUG_LEX("LEX %d", lex_state); \ char lookahead; \ next_state: \ - lookahead = ts_lexer_lookahead_char(&parser->lexer); \ + lookahead = ts_lexer_lookahead_char(lexer); \ DEBUG_LEX("CHAR '%c'", lookahead); -#define START_TOKEN() ts_lexer_start_token(&parser->lexer); +#define START_TOKEN() ts_lexer_start_token(lexer); #define ADVANCE(state_index) \ { \ DEBUG_LEX("ADVANCE %d", state_index); \ - if (!ts_lexer_advance(&parser->lexer)) \ + if (!ts_lexer_advance(lexer)) \ ACCEPT_TOKEN(ts_builtin_sym_end); \ lex_state = state_index; \ goto next_state; \ @@ -145,14 +115,14 @@ TSTree *ts_parser_step(TSParser *parser); #define ACCEPT_TOKEN(symbol) \ { \ DEBUG_LEX("TOKEN %s", ts_symbol_names[symbol]); \ - return ts_lexer_build_node(&parser->lexer, symbol, \ + return ts_lexer_accept(lexer, symbol, \ ts_hidden_symbol_flags[symbol]); \ } #define LEX_ERROR() \ { \ DEBUG_LEX("ERROR"); \ - return ts_lexer_build_node(&parser->lexer, ts_builtin_sym_error, 0); \ + return ts_lexer_accept(lexer, ts_builtin_sym_error, 0); \ } #define LEX_PANIC() \ @@ -183,17 +153,17 @@ TSTree *ts_parser_step(TSParser *parser); #define ACCEPT_INPUT() \ { .type = TSParseActionTypeAccept } -#define EXPORT_PARSER(constructor_name) \ - TSParser *constructor_name() { \ - return ts_parser_make((TSParserConfig) { \ - .symbol_count = SYMBOL_COUNT, \ - .hidden_symbol_flags = ts_hidden_symbol_flags, \ - .parse_table = (const TSParseAction *)ts_parse_actions, \ - .lex_states = ts_lex_states, \ - .symbol_names = ts_symbol_names, \ - .lex_fn = ts_lex, \ - }); \ - } +#define EXPORT_LANGUAGE(language_name) \ + static TSLanguage language = (TSLanguage) { \ + .symbol_count = SYMBOL_COUNT, \ + .hidden_symbol_flags = ts_hidden_symbol_flags, \ + .parse_table = (const TSParseAction *)ts_parse_actions, \ + .lex_states = ts_lex_states, \ + .symbol_names = ts_symbol_names, \ + .lex_fn = ts_lex, \ + }; \ + \ + const TSLanguage *language_name = &language; #ifdef __cplusplus } diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index c842caad..a52637f4 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -21,11 +21,9 @@ typedef struct { } TSInputEdit; typedef unsigned short TSSymbol; +typedef struct TSLanguage TSLanguage; typedef struct TSNode TSNode; -typedef struct TSParser TSParser; -typedef struct TSDocument TSDocument; - size_t ts_node_pos(const TSNode *); size_t ts_node_size(const TSNode *); TSSymbol ts_node_sym(const TSNode *); @@ -41,14 +39,15 @@ void ts_node_retain(TSNode *node); void ts_node_release(TSNode *node); int ts_node_eq(const TSNode *, const TSNode *); +typedef struct TSDocument TSDocument; TSDocument *ts_document_make(); -void ts_document_free(TSDocument *doc); -void ts_document_set_parser(TSDocument *doc, TSParser *parser); -void ts_document_set_input(TSDocument *doc, TSInput input); -void ts_document_set_input_string(TSDocument *doc, const char *text); -void ts_document_edit(TSDocument *doc, TSInputEdit edit); -const char *ts_document_string(const TSDocument *doc); -TSNode *ts_document_root_node(const TSDocument *document); +void ts_document_free(TSDocument *); +void ts_document_set_language(TSDocument *, const TSLanguage *); +void ts_document_set_input(TSDocument *, TSInput); +void ts_document_set_input_string(TSDocument *, const char *); +void ts_document_edit(TSDocument *, TSInputEdit); +const char *ts_document_string(const TSDocument *); +TSNode *ts_document_root_node(const TSDocument *); #define ts_builtin_sym_error 0 #define ts_builtin_sym_end 1 diff --git a/spec/runtime/document_spec.cc b/spec/runtime/document_spec.cc index 5e0a4473..1d6d2d9c 100644 --- a/spec/runtime/document_spec.cc +++ b/spec/runtime/document_spec.cc @@ -2,7 +2,7 @@ #include "runtime/helpers/spy_reader.h" #include "runtime/tree.h" -extern "C" TSParser * ts_parser_json(); +extern "C" const TSLanguage * ts_language_json; START_TEST @@ -11,7 +11,7 @@ describe("incremental parsing", [&]() { before_each([&]() { doc = ts_document_make(); - ts_document_set_parser(doc, ts_parser_json()); + ts_document_set_language(doc, ts_language_json); }); after_each([&]() { diff --git a/spec/runtime/helpers/dummy_parser.c b/spec/runtime/helpers/dummy_language.c similarity index 86% rename from spec/runtime/helpers/dummy_parser.c rename to spec/runtime/helpers/dummy_language.c index 50143dec..6fb2be1e 100644 --- a/spec/runtime/helpers/dummy_parser.c +++ b/spec/runtime/helpers/dummy_language.c @@ -1,4 +1,4 @@ -#include "runtime/helpers/dummy_parser.h" +#include "runtime/helpers/dummy_language.h" #include "tree_sitter/parser.h" const TSParseAction parse_table[3][5] = { @@ -30,9 +30,11 @@ const int hidden_symbols[5] = { [dummy_sym3] = 1, }; -TSParserConfig dummy_parser = { +static TSLanguage language = { .symbol_count = 5, .parse_table = (const TSParseAction *)parse_table, .lex_states = lex_states, .hidden_symbol_flags = hidden_symbols, }; + +TSLanguage *dummy_language = &language; diff --git a/spec/runtime/helpers/dummy_parser.h b/spec/runtime/helpers/dummy_language.h similarity index 59% rename from spec/runtime/helpers/dummy_parser.h rename to spec/runtime/helpers/dummy_language.h index 9b2d7cd0..0f95965a 100644 --- a/spec/runtime/helpers/dummy_parser.h +++ b/spec/runtime/helpers/dummy_language.h @@ -1,5 +1,5 @@ -#ifndef HELPERS_DUMMY_PARSER_H_ -#define HELPERS_DUMMY_PARSER_H_ +#ifndef HELPERS_DUMMY_LANGUAGE_H_ +#define HELPERS_DUMMY_LANGUAGE_H_ #ifdef __cplusplus extern "C" { @@ -14,10 +14,10 @@ enum { dummy_sym3 = 4, }; -extern TSParserConfig dummy_parser; +extern TSLanguage *dummy_language; #ifdef __cplusplus } #endif -#endif // HELPERS_DUMMY_PARSER_H_ +#endif // HELPERS_DUMMY_LANGUAGE_H_ diff --git a/spec/runtime/languages/language_specs.cc b/spec/runtime/languages/language_specs.cc index 62ccf978..8ce26e19 100644 --- a/spec/runtime/languages/language_specs.cc +++ b/spec/runtime/languages/language_specs.cc @@ -1,10 +1,10 @@ #include "runtime/runtime_spec_helper.h" #include "runtime/helpers/read_test_entries.h" -extern "C" TSParser * ts_parser_javascript(); -extern "C" TSParser * ts_parser_json(); -extern "C" TSParser * ts_parser_arithmetic(); -extern "C" TSParser * ts_parser_golang(); +extern "C" TSLanguage *ts_language_javascript; +extern "C" TSLanguage *ts_language_json; +extern "C" TSLanguage *ts_language_arithmetic; +extern "C" TSLanguage *ts_language_golang; START_TEST @@ -19,13 +19,13 @@ describe("Languages", [&]() { ts_document_free(doc); }); - auto run_tests_for_language = [&](string language, TSParser * (parser_constructor)()) { - describe(language.c_str(), [&]() { + auto run_tests_for_language = [&](string language_name, TSLanguage *language) { + describe(language_name.c_str(), [&]() { before_each([&]() { - ts_document_set_parser(doc, parser_constructor()); + ts_document_set_language(doc, language); }); - for (auto &entry : test_entries_for_language(language)) { + for (auto &entry : test_entries_for_language(language_name)) { it(entry.description.c_str(), [&]() { ts_document_set_input_string(doc, entry.input.c_str()); auto doc_string = ts_document_string(doc); @@ -36,10 +36,10 @@ describe("Languages", [&]() { }); }; - run_tests_for_language("json", ts_parser_json); - run_tests_for_language("arithmetic", ts_parser_arithmetic); - run_tests_for_language("javascript", ts_parser_javascript); - run_tests_for_language("golang", ts_parser_golang); + run_tests_for_language("json", ts_language_json); + run_tests_for_language("arithmetic", ts_language_arithmetic); + run_tests_for_language("javascript", ts_language_javascript); + run_tests_for_language("golang", ts_language_golang); }); END_TEST diff --git a/spec/runtime/node_spec.cc b/spec/runtime/node_spec.cc index b2b1427b..2b74d79d 100644 --- a/spec/runtime/node_spec.cc +++ b/spec/runtime/node_spec.cc @@ -1,6 +1,6 @@ #include "runtime/runtime_spec_helper.h" -extern "C" TSParser * ts_parser_json(); +extern "C" TSLanguage * ts_language_json; START_TEST @@ -10,7 +10,7 @@ describe("Node", []() { before_each([&]() { document = ts_document_make(); - ts_document_set_parser(document, ts_parser_json()); + ts_document_set_language(document, ts_language_json); ts_document_set_input_string(document, " [12, 5, 345]"); root = ts_document_root_node(document); diff --git a/spec/runtime/parser_spec.cc b/spec/runtime/parser_spec.cc index 7dae60f2..64a85627 100644 --- a/spec/runtime/parser_spec.cc +++ b/spec/runtime/parser_spec.cc @@ -1,15 +1,16 @@ #include "runtime/runtime_spec_helper.h" #include "runtime/helpers/spy_reader.h" -#include "runtime/helpers/dummy_parser.h" +#include "runtime/helpers/dummy_language.h" #include "runtime/tree.h" +#include "runtime/parser.h" #include "tree_sitter/parser.h" TSTree *lex_fn_node_to_return; TSStateId lex_fn_state_received; -TSParser *lex_fn_parser_received; +TSLexer *lex_fn_lexer_received; -TSTree * fake_lex(TSParser *parser, TSStateId state_id) { - lex_fn_parser_received = parser; +TSTree * fake_lex(TSLexer *lexer, TSStateId state_id) { + lex_fn_lexer_received = lexer; lex_fn_state_received = state_id; return lex_fn_node_to_return; } @@ -17,31 +18,29 @@ TSTree * fake_lex(TSParser *parser, TSStateId state_id) { START_TEST describe("LR Parsers", [&]() { - TSParser *parser; + TSParser parser; SpyReader *reader; before_each([&]() { - TSParserConfig config = dummy_parser; - config.lex_fn = fake_lex; - - parser = ts_parser_make(config); + dummy_language->lex_fn = fake_lex; + parser = ts_parser_make(dummy_language); reader = new SpyReader("some structured text", 5); }); after_each([&]() { - ts_parser_free(parser); + ts_parser_destroy(&parser); delete reader; }); describe("when starting at the beginning of the input (edit is NULL)", [&]() { before_each([&]() { - ts_parser_start(parser, reader->input, nullptr); + ts_parser_start(&parser, reader->input, nullptr); }); it("runs the lexer with the lex state corresponding to the initial state", [&]() { lex_fn_node_to_return = ts_tree_make_leaf(dummy_sym2, 5, 1, 0); - ts_parser_step(parser); + ts_parser_step(&parser); AssertThat(lex_fn_state_received, Equals(100)); }); @@ -51,12 +50,12 @@ describe("LR Parsers", [&]() { }); it("advances to the state specified in the action", [&]() { - ts_parser_step(parser); - AssertThat(ts_stack_top_state(&parser->stack), Equals(12)); + ts_parser_step(&parser); + AssertThat(ts_stack_top_state(&parser.stack), Equals(12)); }); it("continues parsing (returns NULL)", [&]() { - auto result = ts_parser_step(parser); + auto result = ts_parser_step(&parser); AssertThat(result, Equals((TSTree *)nullptr)); }); }); @@ -67,7 +66,7 @@ describe("LR Parsers", [&]() { }); it("ends the parse, returning an error tree", [&]() { - auto result = ts_parser_step(parser); + auto result = ts_parser_step(&parser); AssertThat(result->symbol, Equals(ts_builtin_sym_error)); }); }); diff --git a/spec/runtime/stack_spec.cc b/spec/runtime/stack_spec.cc index 6d2e3ee3..06c10358 100644 --- a/spec/runtime/stack_spec.cc +++ b/spec/runtime/stack_spec.cc @@ -1,6 +1,6 @@ #include "runtime/runtime_spec_helper.h" #include "runtime/tree.h" -#include "tree_sitter/parser.h" +#include "runtime/stack.h" START_TEST diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index cdc2f695..6e4c8afc 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -163,7 +163,7 @@ class CCodeGenerator { } void parser_export() { - line("EXPORT_PARSER(ts_parser_" + name + ");"); + line("EXPORT_LANGUAGE(ts_language_" + name + ");"); line(); } diff --git a/src/runtime/document.c b/src/runtime/document.c index 02b0172c..f54391d9 100644 --- a/src/runtime/document.c +++ b/src/runtime/document.c @@ -2,10 +2,11 @@ #include "tree_sitter/parser.h" #include "runtime/tree.h" #include "runtime/node.h" +#include "runtime/parser.h" #include struct TSDocument { - TSParser *parser; + TSParser parser; const TSTree *tree; TSInput input; }; @@ -14,24 +15,21 @@ TSDocument *ts_document_make() { TSDocument *document = malloc(sizeof(TSDocument)); *document = (TSDocument) { .input = (TSInput) { .data = NULL, .read_fn = NULL, .seek_fn = NULL }, - .parser = NULL, .tree = NULL }; return document; } void ts_document_free(TSDocument *document) { - if (document->parser) - ts_parser_free(document->parser); + ts_parser_destroy(&document->parser); if (document->input.release_fn) document->input.release_fn(document->input.data); free(document); } -void ts_document_set_parser(TSDocument *document, TSParser *parser) { - if (document->parser) - ts_parser_free(document->parser); - document->parser = parser; +void ts_document_set_language(TSDocument *document, const TSLanguage *language) { + ts_parser_destroy(&document->parser); + document->parser = ts_parser_make(language); } const TSTree *ts_document_tree(const TSDocument *document) { @@ -39,22 +37,21 @@ const TSTree *ts_document_tree(const TSDocument *document) { } const char *ts_document_string(const TSDocument *document) { - return ts_tree_string(document->tree, - ts_parser_config(document->parser).symbol_names); + return ts_tree_string(document->tree, document->parser.language->symbol_names); } void ts_document_set_input(TSDocument *document, TSInput input) { document->input = input; - document->tree = ts_parser_parse(document->parser, document->input, NULL); + document->tree = ts_parser_parse(&document->parser, document->input, NULL); } void ts_document_edit(TSDocument *document, TSInputEdit edit) { - document->tree = ts_parser_parse(document->parser, document->input, &edit); + document->tree = ts_parser_parse(&document->parser, document->input, &edit); } const char *ts_document_symbol_name(const TSDocument *document, const TSTree *tree) { - return ts_parser_config(document->parser).symbol_names[tree->symbol]; + return document->parser.language->symbol_names[tree->symbol]; } typedef struct { @@ -97,8 +94,7 @@ void ts_document_set_input_string(TSDocument *document, const char *text) { } TSNode *ts_document_root_node(const TSDocument *document) { - return ts_node_make_root(document->tree, - document->parser->config.symbol_names); + return ts_node_make_root(document->tree, document->parser.language->symbol_names); } TSNode *ts_document_get_node(const TSDocument *document, size_t pos) { diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index bb42b853..cbced29a 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -1,18 +1,7 @@ #include "tree_sitter/parser.h" #include "runtime/tree.h" -TSLexer ts_lexer_make() { - return (TSLexer) { .chunk = NULL, - .debug = 0, - .chunk_start = 0, - .chunk_size = 0, - .position_in_chunk = 0, - .token_start_position = 0, - .token_end_position = 0, - .reached_end = 0 }; -} - -int ts_lexer_advance(TSLexer *lexer) { +static int advance(TSLexer *lexer) { static const char *empty_chunk = ""; if (lexer->position_in_chunk + 1 < lexer->chunk_size) { lexer->position_in_chunk++; @@ -31,10 +20,24 @@ int ts_lexer_advance(TSLexer *lexer) { return 1; } -TSTree *ts_lexer_build_node(TSLexer *lexer, TSSymbol symbol, int is_hidden) { +static TSTree * accept(TSLexer *lexer, TSSymbol symbol, int is_hidden) { size_t current_position = ts_lexer_position(lexer); size_t size = current_position - lexer->token_start_position; size_t offset = lexer->token_start_position - lexer->token_end_position; lexer->token_end_position = current_position; return ts_tree_make_leaf(symbol, size, offset, is_hidden); } + +TSLexer ts_lexer_make() { + return (TSLexer) { .chunk = NULL, + .debug = 0, + .chunk_start = 0, + .chunk_size = 0, + .position_in_chunk = 0, + .token_start_position = 0, + .token_end_position = 0, + .reached_end = 0, + .advance_fn = advance, + .accept_fn = accept, + }; +} diff --git a/src/runtime/lexer.h b/src/runtime/lexer.h new file mode 100644 index 00000000..9e941a90 --- /dev/null +++ b/src/runtime/lexer.h @@ -0,0 +1,16 @@ +#ifndef RUNTIME_LEXER_H_ +#define RUNTIME_LEXER_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "tree_sitter/parser.h" + +TSLexer ts_lexer_make(); + +#ifdef __cplusplus +} +#endif + +#endif // RUNTIME_LEXER_H_ diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 0c4e7980..c4501c9d 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -2,14 +2,17 @@ #include "tree_sitter/runtime.h" #include "tree_sitter/parser.h" #include "runtime/tree.h" +#include "runtime/lexer.h" +#include "runtime/stack.h" +#include "runtime/parser.h" /* * Private */ -static const TSParseAction *actions_for_state(TSParserConfig config, +static const TSParseAction *actions_for_state(const TSLanguage *language, TSStateId state) { - return config.parse_table + (state * config.symbol_count); + return language->parse_table + (state * language->symbol_count); } static size_t breakdown_stack(TSParser *parser, TSInputEdit *edit) { @@ -37,7 +40,7 @@ static size_t breakdown_stack(TSParser *parser, TSInputEdit *edit) { TSTree *child = children[i]; TSStateId state = ts_stack_top_state(stack); TSStateId next_state = - actions_for_state(parser->config, state)[child->symbol].data.to_state; + actions_for_state(parser->language, state)[child->symbol].data.to_state; ts_stack_push(stack, next_state, child); ts_tree_retain(child); position += ts_tree_total_size(child); @@ -52,14 +55,14 @@ static size_t breakdown_stack(TSParser *parser, TSInputEdit *edit) { static TSSymbol *expected_symbols(TSParser *parser, size_t *count) { *count = 0; const TSParseAction *actions = - actions_for_state(parser->config, ts_stack_top_state(&parser->stack)); - for (size_t i = 0; i < parser->config.symbol_count; i++) + actions_for_state(parser->language, ts_stack_top_state(&parser->stack)); + for (size_t i = 0; i < parser->language->symbol_count; i++) if (actions[i].type != TSParseActionTypeError) (*count)++; size_t n = 0; TSSymbol *result = malloc(*count * sizeof(*result)); - for (TSSymbol i = 0; i < parser->config.symbol_count; i++) + for (TSSymbol i = 0; i < parser->language->symbol_count; i++) if (actions[i].type != TSParseActionTypeError) result[n++] = i; @@ -70,22 +73,19 @@ static TSSymbol *expected_symbols(TSParser *parser, size_t *count) { * Public */ -TSParser *ts_parser_make(TSParserConfig config) { - TSParser *result = malloc(sizeof(*result)); - *result = (TSParser) { .lexer = ts_lexer_make(), - .stack = ts_stack_make(), - .debug = 0, - .config = config, }; - return result; +TSParser ts_parser_make(const TSLanguage *language) { + return (TSParser) { .lexer = ts_lexer_make(), + .stack = ts_stack_make(), + .debug = 0, + .language = language, }; } -void ts_parser_free(TSParser *parser) { +void ts_parser_destroy(TSParser *parser) { if (parser->lookahead) ts_tree_release(parser->lookahead); if (parser->next_lookahead) ts_tree_release(parser->next_lookahead); ts_stack_delete(&parser->stack); - free(parser); } void ts_parser_start(TSParser *parser, TSInput input, TSInputEdit *edit) { @@ -118,7 +118,7 @@ void ts_parser_shift_extra(TSParser *parser) { void ts_parser_reduce(TSParser *parser, TSSymbol symbol, size_t child_count) { parser->next_lookahead = parser->lookahead; parser->lookahead = ts_stack_reduce(&parser->stack, symbol, child_count, - parser->config.hidden_symbol_flags, 1); + parser->language->hidden_symbol_flags, 1); } int ts_parser_reduce_extra(TSParser *parser, TSSymbol symbol) { @@ -141,7 +141,7 @@ int ts_parser_handle_error(TSParser *parser) { for (;;) { ts_tree_release(parser->lookahead); size_t position = ts_lexer_position(&parser->lexer); - parser->lookahead = parser->config.lex_fn(parser, ts_lex_state_error); + parser->lookahead = parser->language->lex_fn(&parser->lexer, ts_lex_state_error); int at_end = 0; if (ts_lexer_position(&parser->lexer) == position) @@ -160,10 +160,10 @@ int ts_parser_handle_error(TSParser *parser) { size_t i = parser->stack.size - 1 - j; TSStateId stack_state = parser->stack.entries[i].state; TSParseAction action_on_error = - actions_for_state(parser->config, stack_state)[ts_builtin_sym_error]; + actions_for_state(parser->language, stack_state)[ts_builtin_sym_error]; if (action_on_error.type == TSParseActionTypeShift) { TSStateId state_after_error = action_on_error.data.to_state; - if (actions_for_state(parser->config, + if (actions_for_state(parser->language, state_after_error)[parser->lookahead->symbol] .type != TSParseActionTypeError) { ts_stack_shrink(&parser->stack, i + 1); @@ -180,13 +180,13 @@ TSTree *ts_parser_tree_root(TSParser *parser) { size_t node_count = 0; for (size_t i = 0; i < stack->size; i++) { TSTree *node = stack->entries[i].node; - if (!parser->config.hidden_symbol_flags[node->symbol]) + if (!parser->language->hidden_symbol_flags[node->symbol]) node_count++; } if (node_count > 1) return ts_stack_reduce(stack, 2, stack->size, - parser->config.hidden_symbol_flags, 0); + parser->language->hidden_symbol_flags, 0); else return ts_stack_top_node(stack); } @@ -195,8 +195,8 @@ TSParseAction ts_parser_next_action(TSParser *parser) { TSStateId state = ts_stack_top_state(&parser->stack); if (!parser->lookahead) parser->lookahead = - parser->config.lex_fn(parser, parser->config.lex_states[state]); - return actions_for_state(parser->config, state)[parser->lookahead->symbol]; + parser->language->lex_fn(&parser->lexer, parser->language->lex_states[state]); + return actions_for_state(parser->language, state)[parser->lookahead->symbol]; } #define DEBUG_PARSE(...) \ @@ -207,7 +207,7 @@ TSParseAction ts_parser_next_action(TSParser *parser) { TSTree *ts_parser_step(TSParser *parser) { TSParseAction action = ts_parser_next_action(parser); DEBUG_PARSE("LOOKAHEAD %s", - parser->config.symbol_names[parser->lookahead->symbol]); + parser->language->symbol_names[parser->lookahead->symbol]); switch (action.type) { case TSParseActionTypeShift: DEBUG_PARSE("SHIFT %d", action.data.to_state); @@ -219,7 +219,7 @@ TSTree *ts_parser_step(TSParser *parser) { return NULL; case TSParseActionTypeReduce: DEBUG_PARSE("REDUCE %s %d", - parser->config.symbol_names[action.data.symbol], + parser->language->symbol_names[action.data.symbol], action.data.child_count); ts_parser_reduce(parser, action.data.symbol, action.data.child_count); return NULL; @@ -255,5 +255,3 @@ const TSTree *ts_parser_parse(TSParser *parser, TSInput input, return tree; } } - -TSParserConfig ts_parser_config(TSParser *parser) { return parser->config; } diff --git a/src/runtime/parser.h b/src/runtime/parser.h new file mode 100644 index 00000000..4be898c2 --- /dev/null +++ b/src/runtime/parser.h @@ -0,0 +1,30 @@ +#ifndef RUNTIME_PARSER_H_ +#define RUNTIME_PARSER_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "runtime/stack.h" + +typedef struct { + TSLexer lexer; + TSStack stack; + int debug; + TSTree *lookahead; + TSTree *next_lookahead; + const TSLanguage *language; +} TSParser; + +TSParser ts_parser_make(const TSLanguage *); +void ts_parser_destroy(TSParser *); +const TSTree *ts_parser_parse(TSParser *parser, TSInput input, + TSInputEdit *edit); +void ts_parser_start(TSParser *parser, TSInput input, TSInputEdit *edit); +TSTree *ts_parser_step(TSParser *parser); + +#ifdef __cplusplus +} +#endif + +#endif // RUNTIME_PARSER_H_ diff --git a/src/runtime/stack.c b/src/runtime/stack.c index d5106acb..591a6fa3 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -1,7 +1,6 @@ -#include "tree_sitter/runtime.h" #include "tree_sitter/parser.h" #include "runtime/tree.h" -#include +#include "runtime/stack.h" static size_t INITIAL_STACK_SIZE = 100; static TSStateId INITIAL_STATE = 0; diff --git a/src/runtime/stack.h b/src/runtime/stack.h new file mode 100644 index 00000000..aa6388bc --- /dev/null +++ b/src/runtime/stack.h @@ -0,0 +1,34 @@ +#ifndef RUNTIME_STACK_H_ +#define RUNTIME_STACK_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "tree_sitter/parser.h" + +typedef struct { + size_t size; + struct { + TSTree *node; + TSStateId state; + int is_extra; + } *entries; +} TSStack; + +TSStack ts_stack_make(); +void ts_stack_delete(TSStack *); +TSTree *ts_stack_reduce(TSStack *stack, TSSymbol symbol, + size_t immediate_child_count, + const int *hidden_symbol_flags, int gather_extras); +void ts_stack_shrink(TSStack *stack, size_t new_size); +void ts_stack_push(TSStack *stack, TSStateId state, TSTree *node); +TSStateId ts_stack_top_state(const TSStack *stack); +TSTree *ts_stack_top_node(const TSStack *stack); +size_t ts_stack_right_position(const TSStack *stack); + +#ifdef __cplusplus +} +#endif + +#endif // RUNTIME_STACK_H_