diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index c9ff30df..b43dd677 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -11,7 +11,7 @@ extern "C" { #include "tree_sitter/runtime.h" #include "tree_sitter/parser/lexer.h" #include "tree_sitter/parser/stack.h" -#include "tree_sitter/parser/lr_parser.h" +#include "tree_sitter/parser/state_machine.h" #define SYMBOL_NAMES \ static const char *ts_symbol_names[] @@ -73,10 +73,10 @@ ts_lexer_start_token(lexer); SYMBOL_NAMES; static const TSTree * ts_parse(void *data, TSInput input, TSInputEdit *edit) { - ts_lr_parser *parser = (ts_lr_parser *)data; - ts_lr_parser_initialize(parser, input, edit); + TSStateMachine *parser = (TSStateMachine *)data; + ts_state_machine_initialize(parser, input, edit); for (;;) { - const TSTree *tree = ts_lr_parser_parse(parser, ts_symbol_names); + const TSTree *tree = ts_state_machine_parse(parser, ts_symbol_names); if (tree) return tree; } } @@ -85,9 +85,9 @@ static const TSTree * ts_parse(void *data, TSInput input, TSInputEdit *edit) { TSParser constructor_name() { \ return (TSParser) { \ .parse_fn = ts_parse, \ - .free_fn = ts_lr_parser_free, \ + .free_fn = ts_state_machine_free, \ .symbol_names = ts_symbol_names, \ - .data = ts_lr_parser_make( \ + .data = ts_state_machine_make( \ SYMBOL_COUNT, \ (const TSParseAction *)ts_parse_actions, \ ts_lex_states, \ diff --git a/include/tree_sitter/parser/lr_parser.h b/include/tree_sitter/parser/state_machine.h similarity index 72% rename from include/tree_sitter/parser/lr_parser.h rename to include/tree_sitter/parser/state_machine.h index 4e650c2c..2d64c44a 100644 --- a/include/tree_sitter/parser/lr_parser.h +++ b/include/tree_sitter/parser/state_machine.h @@ -51,16 +51,17 @@ typedef struct { const TSStateId *lex_states; TSTree * (* lex_fn)(TSLexer *, TSStateId); } config; -} ts_lr_parser; +} TSStateMachine; -ts_lr_parser * ts_lr_parser_make(size_t symbol_count, - const TSParseAction *parse_table, - const TSStateId *lex_states, - TSTree * (* lex_fn)(TSLexer *, TSStateId), - const int *hidden_symbol_flags); -void ts_lr_parser_free(void *data); -void ts_lr_parser_initialize(ts_lr_parser *parser, TSInput input, TSInputEdit *edit); -TSTree * ts_lr_parser_parse(ts_lr_parser *parser, const char **symbol_names); +TSStateMachine * ts_state_machine_make( + size_t symbol_count, + const TSParseAction *parse_table, + const TSStateId *lex_states, + TSTree * (* lex_fn)(TSLexer *, TSStateId), + const int *hidden_symbol_flags); +void ts_state_machine_free(void *data); +void ts_state_machine_initialize(TSStateMachine *, TSInput, TSInputEdit *); +TSTree * ts_state_machine_parse(TSStateMachine *, const char **symbol_names); #ifdef __cplusplus } diff --git a/spec/runtime/helpers/dummy_parser.h b/spec/runtime/helpers/dummy_parser.h index 087df753..4d51ee2f 100644 --- a/spec/runtime/helpers/dummy_parser.h +++ b/spec/runtime/helpers/dummy_parser.h @@ -5,7 +5,7 @@ extern "C" { #endif -#include "tree_sitter/parser/lr_parser.h" +#include "tree_sitter/parser/state_machine.h" enum { dummy_sym1 = 2, diff --git a/spec/runtime/lr_parser_spec.cc b/spec/runtime/state_machine_spec.cc similarity index 82% rename from spec/runtime/lr_parser_spec.cc rename to spec/runtime/state_machine_spec.cc index 83f82904..d90b3148 100644 --- a/spec/runtime/lr_parser_spec.cc +++ b/spec/runtime/state_machine_spec.cc @@ -1,7 +1,7 @@ #include "runtime/runtime_spec_helper.h" #include "runtime/helpers/spy_reader.h" #include "runtime/helpers/dummy_parser.h" -#include "tree_sitter/parser/lr_parser.h" +#include "tree_sitter/parser/state_machine.h" TSTree *lex_fn_node_to_return; TSStateId lex_fn_state_received; @@ -16,12 +16,12 @@ TSTree * fake_lex(TSLexer *lexer, TSStateId state_id) { START_TEST describe("LR Parsers", [&]() { - ts_lr_parser *parser; + TSStateMachine *parser; SpyReader *reader; before_each([&]() { reader = new SpyReader("some structured text", 5); - parser = ts_lr_parser_make(dummy_parser.symbol_count, + parser = ts_state_machine_make(dummy_parser.symbol_count, (const TSParseAction *)dummy_parser.parse_table, dummy_parser.lex_states, fake_lex, @@ -34,12 +34,12 @@ describe("LR Parsers", [&]() { describe("when starting at the beginning of the input (edit is NULL)", [&]() { before_each([&]() { - ts_lr_parser_initialize(parser, reader->input, nullptr); + ts_state_machine_initialize(parser, reader->input, nullptr); }); it("runs the lexer with the lex state corresponding to the initial state", [&]() { lex_fn_node_to_return = ts_tree_make_leaf(dummy_sym2, 5, 1); - ts_lr_parser_parse(parser, nullptr); + ts_state_machine_parse(parser, nullptr); AssertThat(lex_fn_state_received, Equals(100)); }); @@ -49,12 +49,12 @@ describe("LR Parsers", [&]() { }); it("advances to the state specified in the action", [&]() { - ts_lr_parser_parse(parser, nullptr); + ts_state_machine_parse(parser, nullptr); AssertThat(ts_stack_top_state(&parser->stack), Equals(12)); }); it("continues parsing (returns NULL)", [&]() { - auto result = ts_lr_parser_parse(parser, nullptr); + auto result = ts_state_machine_parse(parser, nullptr); AssertThat(result, Equals((TSTree *)nullptr)); }); }); @@ -65,7 +65,7 @@ describe("LR Parsers", [&]() { }); it("ends the parse, returning an error tree", [&]() { - auto result = ts_lr_parser_parse(parser, nullptr); + auto result = ts_state_machine_parse(parser, nullptr); AssertThat(ts_tree_symbol(result), Equals(ts_builtin_sym_error)); }); }); diff --git a/src/runtime/lr_parser.c b/src/runtime/state_machine.c similarity index 50% rename from src/runtime/lr_parser.c rename to src/runtime/state_machine.c index 2414d49e..642fd6d2 100644 --- a/src/runtime/lr_parser.c +++ b/src/runtime/state_machine.c @@ -1,34 +1,34 @@ -#include "tree_sitter/parser/lr_parser.h" +#include "tree_sitter/parser/state_machine.h" #include "runtime/tree.h" /* * Private */ -static const TSParseAction * actions_for_state(ts_lr_parser *parser, TSStateId state) { - return parser->config.parse_table + (state * parser->config.symbol_count); +static const TSParseAction * actions_for_state(TSStateMachine *machine, TSStateId state) { + return machine->config.parse_table + (state * machine->config.symbol_count); } -void shift(ts_lr_parser *parser, TSStateId parse_state, int is_extra) { - parser->lookahead->is_extra = is_extra; - ts_stack_push(&parser->stack, parse_state, parser->lookahead); - parser->lookahead = parser->next_lookahead; - parser->next_lookahead = NULL; +void shift(TSStateMachine *machine, TSStateId parse_state, int is_extra) { + machine->lookahead->is_extra = is_extra; + ts_stack_push(&machine->stack, parse_state, machine->lookahead); + machine->lookahead = machine->next_lookahead; + machine->next_lookahead = NULL; } -void reduce(ts_lr_parser *parser, TSSymbol symbol, size_t child_count) { - parser->next_lookahead = parser->lookahead; - parser->lookahead = ts_stack_reduce(&parser->stack, +void reduce(TSStateMachine *machine, TSSymbol symbol, size_t child_count) { + machine->next_lookahead = machine->lookahead; + machine->lookahead = ts_stack_reduce(&machine->stack, symbol, child_count, - parser->config.hidden_symbol_flags, + machine->config.hidden_symbol_flags, 1); } -static size_t breakdown_stack(ts_lr_parser *parser, TSInputEdit *edit) { +static size_t breakdown_stack(TSStateMachine *machine, TSInputEdit *edit) { if (!edit) return 0; - TSStack *stack = &parser->stack; + TSStack *stack = &machine->stack; size_t position = 0; for (;;) { @@ -46,7 +46,7 @@ static size_t breakdown_stack(ts_lr_parser *parser, TSInputEdit *edit) { for (size_t i = 0; i < child_count && position < edit->position; i++) { TSTree *child = children[i]; TSStateId state = ts_stack_top_state(stack); - TSStateId next_state = actions_for_state(parser, state)[ts_tree_symbol(child)].data.to_state; + TSStateId next_state = actions_for_state(machine, state)[ts_tree_symbol(child)].data.to_state; ts_stack_push(stack, next_state, child); ts_tree_retain(child); position += ts_tree_total_size(child); @@ -58,42 +58,42 @@ static size_t breakdown_stack(ts_lr_parser *parser, TSInputEdit *edit) { return position; } -TSSymbol * expected_symbols(ts_lr_parser *parser, size_t *count) { +TSSymbol * expected_symbols(TSStateMachine *machine, size_t *count) { *count = 0; - const TSParseAction *actions = actions_for_state(parser, ts_stack_top_state(&parser->stack)); - for (size_t i = 0; i < parser->config.symbol_count; i++) + const TSParseAction *actions = actions_for_state(machine, ts_stack_top_state(&machine->stack)); + for (size_t i = 0; i < machine->config.symbol_count; i++) if (actions[i].type != ts_parse_action_type_error) ++(*count); size_t n = 0; TSSymbol *result = malloc(*count * sizeof(*result)); - for (TSSymbol i = 0; i < parser->config.symbol_count; i++) + for (TSSymbol i = 0; i < machine->config.symbol_count; i++) if (actions[i].type != ts_parse_action_type_error) result[n++] = i; return result; } -int handle_error(ts_lr_parser *parser) { +int handle_error(TSStateMachine *machine) { size_t count = 0; - const TSSymbol *inputs = expected_symbols(parser, &count); - TSTree *error = ts_tree_make_error(ts_lexer_lookahead_char(&parser->lexer), + const TSSymbol *inputs = expected_symbols(machine, &count); + TSTree *error = ts_tree_make_error(ts_lexer_lookahead_char(&machine->lexer), count, inputs, 0, 0); for (;;) { - ts_tree_release(parser->lookahead); - size_t position = ts_lexer_position(&parser->lexer); - parser->lookahead = parser->config.lex_fn(&parser->lexer, ts_lex_state_error); + ts_tree_release(machine->lookahead); + size_t position = ts_lexer_position(&machine->lexer); + machine->lookahead = machine->config.lex_fn(&machine->lexer, ts_lex_state_error); int at_end = 0; - if (ts_lexer_position(&parser->lexer) == position) - at_end = !ts_lexer_advance(&parser->lexer); + if (ts_lexer_position(&machine->lexer) == position) + at_end = !ts_lexer_advance(&machine->lexer); - if (at_end || ts_tree_symbol(parser->lookahead) == ts_builtin_sym_end) { - ts_stack_push(&parser->stack, 0, error); + if (at_end || ts_tree_symbol(machine->lookahead) == ts_builtin_sym_end) { + ts_stack_push(&machine->stack, 0, error); return 0; } @@ -101,15 +101,15 @@ int handle_error(ts_lr_parser *parser) { * Unwind the stack, looking for a state in which this token * may appear after an error. */ - for (size_t j = 0; j < parser->stack.size; j++) { - size_t i = parser->stack.size - 1 - j; - TSStateId stack_state = parser->stack.entries[i].state; - TSParseAction action_on_error = actions_for_state(parser, stack_state)[ts_builtin_sym_error]; + for (size_t j = 0; j < machine->stack.size; j++) { + size_t i = machine->stack.size - 1 - j; + TSStateId stack_state = machine->stack.entries[i].state; + TSParseAction action_on_error = actions_for_state(machine, stack_state)[ts_builtin_sym_error]; if (action_on_error.type == ts_parse_action_type_shift) { TSStateId state_after_error = action_on_error.data.to_state; - if (actions_for_state(parser, state_after_error)[ts_tree_symbol(parser->lookahead)].type != ts_parse_action_type_error) { - ts_stack_shrink(&parser->stack, i + 1); - ts_stack_push(&parser->stack, state_after_error, error); + if (actions_for_state(machine, state_after_error)[ts_tree_symbol(machine->lookahead)].type != ts_parse_action_type_error) { + ts_stack_shrink(&machine->stack, i + 1); + ts_stack_push(&machine->stack, state_after_error, error); return 1; } } @@ -117,8 +117,8 @@ int handle_error(ts_lr_parser *parser) { } } -TSTree * get_tree_root(ts_lr_parser *parser) { - TSStack *stack = &parser->stack; +TSTree * get_tree_root(TSStateMachine *machine) { + TSStack *stack = &machine->stack; TSTree *top_node = ts_stack_top_node(stack); if (stack->size <= 1) return top_node; @@ -134,37 +134,37 @@ TSTree * get_tree_root(ts_lr_parser *parser) { child->is_extra = 0; ts_tree_retain(child); TSStateId state = ts_stack_top_state(stack); - TSStateId next_state = actions_for_state(parser, state)[ts_tree_symbol(child)].data.to_state; + TSStateId next_state = actions_for_state(machine, state)[ts_tree_symbol(child)].data.to_state; ts_stack_push(stack, next_state, child); } TSTree *new_node = ts_stack_reduce(stack, top_node->symbol, stack->size, - parser->config.hidden_symbol_flags, + machine->config.hidden_symbol_flags, 0); ts_tree_release(top_node); return new_node; } -TSParseAction get_next_action(ts_lr_parser *parser) { - TSStateId state = ts_stack_top_state(&parser->stack); - if (!parser->lookahead) - parser->lookahead = parser->config.lex_fn(&parser->lexer, parser->config.lex_states[state]); - return actions_for_state(parser, state)[ts_tree_symbol(parser->lookahead)]; +TSParseAction get_next_action(TSStateMachine *machine) { + TSStateId state = ts_stack_top_state(&machine->stack); + if (!machine->lookahead) + machine->lookahead = machine->config.lex_fn(&machine->lexer, machine->config.lex_states[state]); + return actions_for_state(machine, state)[ts_tree_symbol(machine->lookahead)]; } /* * Public API */ -ts_lr_parser * ts_lr_parser_make(size_t symbol_count, +TSStateMachine * ts_state_machine_make(size_t symbol_count, const TSParseAction *parse_table, const TSStateId *lex_states, TSTree * (* lex_fn)(TSLexer *, TSStateId), const int *hidden_symbol_flags) { - ts_lr_parser *result = malloc(sizeof(ts_lr_parser)); - *result = (ts_lr_parser) { + TSStateMachine *result = malloc(sizeof(TSStateMachine)); + *result = (TSStateMachine) { .lexer = ts_lexer_make(), .stack = ts_stack_make(), .config = { @@ -178,25 +178,25 @@ ts_lr_parser * ts_lr_parser_make(size_t symbol_count, return result; } -void ts_lr_parser_free(void *data) { - ts_lr_parser *parser = (ts_lr_parser *)data; - if (parser->lookahead) ts_tree_release(parser->lookahead); - if (parser->next_lookahead) ts_tree_release(parser->next_lookahead); - ts_stack_delete(&parser->stack); - free(parser); +void ts_state_machine_free(void *data) { + TSStateMachine *machine = (TSStateMachine *)data; + if (machine->lookahead) ts_tree_release(machine->lookahead); + if (machine->next_lookahead) ts_tree_release(machine->next_lookahead); + ts_stack_delete(&machine->stack); + free(machine); } -void ts_lr_parser_initialize(ts_lr_parser *parser, TSInput input, TSInputEdit *edit) { - if (!edit) ts_stack_shrink(&parser->stack, 0); - parser->lookahead = NULL; - parser->next_lookahead = NULL; +void ts_state_machine_initialize(TSStateMachine *machine, TSInput input, TSInputEdit *edit) { + if (!edit) ts_stack_shrink(&machine->stack, 0); + machine->lookahead = NULL; + machine->next_lookahead = NULL; - size_t position = breakdown_stack(parser, edit); + size_t position = breakdown_stack(machine, edit); input.seek_fn(input.data, position); - parser->lexer = ts_lexer_make(); - parser->lexer.input = input; - ts_lexer_advance(&parser->lexer); + machine->lexer = ts_lexer_make(); + machine->lexer.input = input; + ts_lexer_advance(&machine->lexer); } /* #define TS_DEBUG_PARSE */ @@ -208,31 +208,31 @@ void ts_lr_parser_initialize(ts_lr_parser *parser, TSInput input, TSInputEdit *e #define DEBUG_PARSE(...) #endif -TSTree * ts_lr_parser_parse(ts_lr_parser *parser, const char **symbol_names) { - TSParseAction action = get_next_action(parser); - DEBUG_PARSE("LOOKAHEAD %s", symbol_names[ts_tree_symbol(parser->lookahead)]); +TSTree * ts_state_machine_parse(TSStateMachine *machine, const char **symbol_names) { + TSParseAction action = get_next_action(machine); + DEBUG_PARSE("LOOKAHEAD %s", symbol_names[ts_tree_symbol(machine->lookahead)]); switch (action.type) { case ts_parse_action_type_shift: DEBUG_PARSE("SHIFT %d", action.data.to_state); - shift(parser, action.data.to_state, 0); + shift(machine, action.data.to_state, 0); return NULL; case ts_parse_action_type_shift_extra: DEBUG_PARSE("SHIFT EXTRA"); - shift(parser, ts_stack_top_state(&parser->stack), 1); + shift(machine, ts_stack_top_state(&machine->stack), 1); return NULL; case ts_parse_action_type_reduce: DEBUG_PARSE("REDUCE %s %d", symbol_names[action.data.symbol], action.data.child_count); - reduce(parser, action.data.symbol, action.data.child_count); + reduce(machine, action.data.symbol, action.data.child_count); return NULL; case ts_parse_action_type_accept: DEBUG_PARSE("ACCEPT"); - return get_tree_root(parser); + return get_tree_root(machine); case ts_parse_action_type_error: DEBUG_PARSE("ERROR"); - if (handle_error(parser)) + if (handle_error(machine)) return NULL; else - return get_tree_root(parser); + return get_tree_root(machine); default: return NULL; }