diff --git a/examples/parsers/json.c b/examples/parsers/json.c index af4c2a42..a0113878 100644 --- a/examples/parsers/json.c +++ b/examples/parsers/json.c @@ -320,7 +320,6 @@ LEX_FN() { ADVANCE(27); LEX_ERROR(); case ts_lex_state_error: - START_TOKEN(); if (lookahead == 0) ADVANCE(25); if (('\t' <= lookahead && lookahead <= '\n') || diff --git a/spec/runtime/stack_spec.cc b/spec/runtime/stack_spec.cc index 4ea53727..d08e55ce 100644 --- a/spec/runtime/stack_spec.cc +++ b/spec/runtime/stack_spec.cc @@ -5,7 +5,6 @@ START_TEST enum { sym1, sym2, hidden_sym }; -int hidden_symbols[] = { 0, 0, 1 }; describe("stacks", [&]() { TSStack stack; @@ -42,55 +41,6 @@ describe("stacks", [&]() { AssertThat(ts_stack_top_node(&stack), Equals(node1)); }); }); - - describe("reducing a symbol", [&]() { - TSTree **nodes; - - before_each([&]() { - nodes = tree_array({ - ts_tree_make_leaf(sym1, 5, 1, 0), - ts_tree_make_leaf(sym1, 5, 1, 0), - ts_tree_make_leaf(hidden_sym, 5, 1, 0), - ts_tree_make_leaf(sym1, 5, 1, 0), - }); - - for (TSStateId i = 0; i < 4; i++) - ts_stack_push(&stack, 10 + i, nodes[i]); - }); - - after_each([&]() { - for (TSStateId i = 0; i < 4; i++) - ts_tree_release(nodes[i]); - free(nodes); - }); - - it("pops the given number of nodes off the stack", [&]() { - AssertThat(stack.size, Equals(4)); - ts_stack_reduce(&stack, sym2, 3, hidden_symbols); - AssertThat(stack.size, Equals(1)); - }); - - it("returns a node with the given symbol", [&]() { - TSTree *node = ts_stack_reduce(&stack, sym2, 3, hidden_symbols); - AssertThat(node->symbol, Equals(sym2)); - }); - - it("removes any hidden nodes from its regular list of children", [&]() { - TSTree *expected_children[3] = { - stack.entries[1].node, - stack.entries[2].node, - stack.entries[3].node, - }; - - TSTree *node = ts_stack_reduce(&stack, sym2, 3, hidden_symbols); - size_t child_count; - TSTree **children = ts_tree_children(node, &child_count); - - AssertThat(child_count, Equals(3)); - for (size_t i = 0; i < 2; i++) - AssertThat(children[i], Equals(expected_children[i])); - }); - }); }); END_TEST diff --git a/src/runtime/document.c b/src/runtime/document.c index 9d4d9019..2b2a491f 100644 --- a/src/runtime/document.c +++ b/src/runtime/document.c @@ -26,10 +26,12 @@ void ts_document_free(TSDocument *document) { static void reparse(TSDocument *document, TSInputEdit *edit) { if (document->input.read_fn && document->parser.language) { - const TSTree *tree = ts_parser_parse(&document->parser, document->input, edit); + const TSTree *tree = + ts_parser_parse(&document->parser, document->input, edit); if (document->node) ts_node_release(document->node); - document->node = ts_node_make_root(tree, document->parser.language->symbol_names); + document->node = + ts_node_make_root(tree, document->parser.language->symbol_names); } } diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 3c732d49..3d3adc7d 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -6,10 +6,6 @@ #include "runtime/stack.h" #include "runtime/parser.h" -/* - * Private - */ - static const TSParseAction *actions_for_state(const TSLanguage *language, TSStateId state) { return language->parse_table + (state * language->symbol_count); @@ -69,40 +65,7 @@ static TSSymbol *expected_symbols(TSParser *parser, size_t *count) { return result; } -/* - * Public - */ - -TSParser ts_parser_make(const TSLanguage *language) { - return (TSParser) { .lexer = ts_lexer_make(), - .stack = ts_stack_make(), - .debug = 0, - .language = language, }; -} - -void ts_parser_destroy(TSParser *parser) { - if (parser->lookahead) - ts_tree_release(parser->lookahead); - if (parser->next_lookahead) - ts_tree_release(parser->next_lookahead); - ts_stack_delete(&parser->stack); -} - -void ts_parser_start(TSParser *parser, TSInput input, TSInputEdit *edit) { - if (!edit) - ts_stack_shrink(&parser->stack, 0); - parser->lookahead = NULL; - parser->next_lookahead = NULL; - - size_t position = breakdown_stack(parser, edit); - input.seek_fn(input.data, position); - - parser->lexer = ts_lexer_make(); - parser->lexer.input = input; - ts_lexer_advance(&parser->lexer); -} - -void ts_parser_shift(TSParser *parser, TSStateId parse_state) { +static void shift(TSParser *parser, TSStateId parse_state) { if (ts_tree_is_extra(parser->lookahead)) parse_state = ts_stack_top_state(&parser->stack); ts_stack_push(&parser->stack, parse_state, parser->lookahead); @@ -110,21 +73,40 @@ void ts_parser_shift(TSParser *parser, TSStateId parse_state) { parser->next_lookahead = NULL; } -void ts_parser_shift_extra(TSParser *parser) { +static void shift_extra(TSParser *parser) { ts_tree_set_extra(parser->lookahead); - ts_parser_shift(parser, 0); + shift(parser, 0); } -void ts_parser_reduce(TSParser *parser, TSSymbol symbol, size_t child_count) { +static void reduce(TSParser *parser, TSSymbol symbol, size_t child_count) { + TSStack *stack = &parser->stack; parser->next_lookahead = parser->lookahead; - parser->lookahead = ts_stack_reduce(&parser->stack, symbol, child_count, - parser->language->hidden_symbol_flags); + + // Walk down the stack to determine which symbols will be reduced. + // The child node count is known ahead of time, but some children + // may be ubiquitous tokens, which don't count. + for (size_t i = 0; i < child_count; i++) { + if (child_count == stack->size) + break; + TSTree *child = stack->entries[stack->size - 1 - i].node; + if (ts_tree_is_extra(child)) + child_count++; + } + + size_t start_index = stack->size - child_count; + TSTree **children = calloc(child_count, sizeof(TSTree *)); + for (size_t i = 0; i < child_count; i++) + children[i] = stack->entries[start_index + i].node; + + int hidden = parser->language->hidden_symbol_flags[symbol]; + parser->lookahead = ts_tree_make_node(symbol, child_count, children, hidden); + ts_stack_shrink(stack, stack->size - child_count); } -int ts_parser_reduce_extra(TSParser *parser, TSSymbol symbol) { +static int reduce_extra(TSParser *parser, TSSymbol symbol) { TSTree *top_node = ts_stack_top_node(&parser->stack); if (top_node->symbol == symbol && !ts_tree_is_extra(top_node)) { - ts_parser_reduce(parser, symbol, 1); + reduce(parser, symbol, 1); ts_tree_set_extra(parser->lookahead); return 1; } else { @@ -132,7 +114,7 @@ int ts_parser_reduce_extra(TSParser *parser, TSSymbol symbol) { } } -int ts_parser_handle_error(TSParser *parser) { +static int handle_error(TSParser *parser) { size_t count = 0; const TSSymbol *inputs = expected_symbols(parser, &count); TSTree *error = ts_tree_make_error(ts_lexer_lookahead_char(&parser->lexer), @@ -176,19 +158,17 @@ int ts_parser_handle_error(TSParser *parser) { } } -TSTree *ts_parser_tree_root(TSParser *parser) { - TSStack *stack = &parser->stack; - if (stack->size == 0) +static TSTree *get_root(TSParser *parser) { + if (parser->stack.size == 0) return NULL; - TSTree *tree = ts_stack_reduce(stack, ts_builtin_sym_document, - stack->size, parser->language->hidden_symbol_flags); - tree->options = 0; - ts_stack_push(stack, 0, tree); - return tree; + reduce(parser, ts_builtin_sym_document, parser->stack.size); + parser->lookahead->options = 0; + shift(parser, 0); + return parser->stack.entries[0].node; } -TSParseAction ts_parser_next_action(TSParser *parser) { +static TSParseAction next_action(TSParser *parser) { TSStateId state = ts_stack_top_state(&parser->stack); if (!parser->lookahead) parser->lookahead = parser->language->lex_fn( @@ -196,59 +176,82 @@ TSParseAction ts_parser_next_action(TSParser *parser) { return actions_for_state(parser->language, state)[parser->lookahead->symbol]; } +TSParser ts_parser_make(const TSLanguage *language) { + return (TSParser) { .lexer = ts_lexer_make(), + .stack = ts_stack_make(), + .debug = 0, + .language = language, }; +} + +void ts_parser_destroy(TSParser *parser) { + if (parser->lookahead) + ts_tree_release(parser->lookahead); + if (parser->next_lookahead) + ts_tree_release(parser->next_lookahead); + ts_stack_delete(&parser->stack); +} + #define DEBUG_PARSE(...) \ if (parser->debug) { \ fprintf(stderr, "\n" __VA_ARGS__); \ } -TSTree *ts_parser_step(TSParser *parser) { - TSParseAction action = ts_parser_next_action(parser); - DEBUG_PARSE("LOOKAHEAD %s", - parser->language->symbol_names[parser->lookahead->symbol]); - switch (action.type) { - case TSParseActionTypeShift: - DEBUG_PARSE("SHIFT %d", action.data.to_state); - ts_parser_shift(parser, action.data.to_state); - return NULL; - case TSParseActionTypeShiftExtra: - DEBUG_PARSE("SHIFT EXTRA"); - ts_parser_shift_extra(parser); - return NULL; - case TSParseActionTypeReduce: - DEBUG_PARSE("REDUCE %s %d", - parser->language->symbol_names[action.data.symbol], - action.data.child_count); - ts_parser_reduce(parser, action.data.symbol, action.data.child_count); - return NULL; - case TSParseActionTypeReduceExtra: - if (!ts_parser_reduce_extra(parser, action.data.symbol)) - goto error; - DEBUG_PARSE("REDUCE EXTRA"); - return NULL; - case TSParseActionTypeAccept: - DEBUG_PARSE("ACCEPT"); - return ts_parser_tree_root(parser); - case TSParseActionTypeError: - goto error; - default: - return NULL; - } - -error: - DEBUG_PARSE("ERROR"); - if (!ts_parser_handle_error(parser)) - return ts_parser_tree_root(parser); - else - return NULL; -} - const TSTree *ts_parser_parse(TSParser *parser, TSInput input, TSInputEdit *edit) { - ts_parser_start(parser, input, edit); + if (!edit) + ts_stack_shrink(&parser->stack, 0); + + parser->lookahead = NULL; + parser->next_lookahead = NULL; + parser->lexer = ts_lexer_make(); + parser->lexer.input = input; + + input.seek_fn(input.data, breakdown_stack(parser, edit)); + ts_lexer_advance(&parser->lexer); for (;;) { - const TSTree *tree = ts_parser_step(parser); - if (tree) - return tree; + TSParseAction action = next_action(parser); + DEBUG_PARSE("LOOKAHEAD %s", + parser->language->symbol_names[parser->lookahead->symbol]); + + switch (action.type) { + case TSParseActionTypeShift: + DEBUG_PARSE("SHIFT %d", action.data.to_state); + shift(parser, action.data.to_state); + break; + + case TSParseActionTypeShiftExtra: + DEBUG_PARSE("SHIFT EXTRA"); + shift_extra(parser); + break; + + case TSParseActionTypeReduce: + DEBUG_PARSE("REDUCE %s %d", + parser->language->symbol_names[action.data.symbol], + action.data.child_count); + reduce(parser, action.data.symbol, action.data.child_count); + break; + + case TSParseActionTypeReduceExtra: + if (!reduce_extra(parser, action.data.symbol)) { + DEBUG_PARSE("ERROR"); + if (!handle_error(parser)) + return get_root(parser); + } + DEBUG_PARSE("REDUCE EXTRA"); + break; + + case TSParseActionTypeAccept: + DEBUG_PARSE("ACCEPT"); + return get_root(parser); + + case TSParseActionTypeError: + if (!handle_error(parser)) + return get_root(parser); + break; + + default: + return NULL; + } } } diff --git a/src/runtime/parser.h b/src/runtime/parser.h index 4be898c2..5e171e13 100644 --- a/src/runtime/parser.h +++ b/src/runtime/parser.h @@ -18,10 +18,7 @@ typedef struct { TSParser ts_parser_make(const TSLanguage *); void ts_parser_destroy(TSParser *); -const TSTree *ts_parser_parse(TSParser *parser, TSInput input, - TSInputEdit *edit); -void ts_parser_start(TSParser *parser, TSInput input, TSInputEdit *edit); -TSTree *ts_parser_step(TSParser *parser); +const TSTree *ts_parser_parse(TSParser *, TSInput, TSInputEdit *); #ifdef __cplusplus } diff --git a/src/runtime/stack.c b/src/runtime/stack.c index 43a20a9d..50f9f763 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -50,28 +50,3 @@ size_t ts_stack_right_position(const TSStack *stack) { } return result; } - -TSTree *ts_stack_reduce(TSStack *stack, TSSymbol symbol, size_t child_count, - const int *hidden_symbol_flags) { - - // First, walk down the stack to determine which symbols will be reduced. - // The child node count is known ahead of time, but some children may be - // ubiquitous tokens, which don't count. - for (size_t i = 0; i < child_count; i++) { - if (child_count == stack->size) - break; - TSTree *child = stack->entries[stack->size - 1 - i].node; - if (ts_tree_is_extra(child)) - child_count++; - } - - size_t start_index = stack->size - child_count; - TSTree **children = calloc(child_count, sizeof(TSTree *)); - for (size_t i = 0; i < child_count; i++) - children[i] = stack->entries[start_index + i].node; - - TSTree *lookahead = ts_tree_make_node(symbol, child_count, children, - hidden_symbol_flags[symbol]); - ts_stack_shrink(stack, stack->size - child_count); - return lookahead; -} diff --git a/src/runtime/stack.h b/src/runtime/stack.h index 668127be..ba523e56 100644 --- a/src/runtime/stack.h +++ b/src/runtime/stack.h @@ -18,7 +18,6 @@ typedef struct { TSStack ts_stack_make(); void ts_stack_delete(TSStack *); -TSTree *ts_stack_reduce(TSStack *, TSSymbol, size_t, const int *hidden_symbols); void ts_stack_shrink(TSStack *stack, size_t new_size); void ts_stack_push(TSStack *stack, TSStateId state, TSTree *node); TSStateId ts_stack_top_state(const TSStack *stack);