From 442db56b92b1266a16a896c501efc63169c96da4 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 18 Jun 2015 15:04:03 -0700 Subject: [PATCH] Use graph-structured parse stack in parser Not using the splitting feature yet. --- spec/runtime/parse_stack_spec.cc | 26 +++++++--- src/runtime/parse_stack.c | 48 ++++++++++++++---- src/runtime/parse_stack.h | 34 ++++++++++--- src/runtime/parser.c | 86 +++++++++++++------------------- src/runtime/parser.h | 3 +- 5 files changed, 123 insertions(+), 74 deletions(-) diff --git a/spec/runtime/parse_stack_spec.cc b/spec/runtime/parse_stack_spec.cc index b2d4e5ef..226390ed 100644 --- a/spec/runtime/parse_stack_spec.cc +++ b/spec/runtime/parse_stack_spec.cc @@ -72,7 +72,7 @@ describe("ParseStack", [&]() { /* * A0. */ - pop = ts_parse_stack_pop(stack, 0, 2); + pop = ts_parse_stack_pop(stack, 0, 2, false); AssertThat(pop.size, Equals(1)); AssertThat(pop.contents[0].tree_count, Equals(2)); AssertThat(pop.contents[0].trees[0], Equals(trees[1])); @@ -82,7 +82,7 @@ describe("ParseStack", [&]() { /* * . */ - pop = ts_parse_stack_pop(stack, 0, 1); + pop = ts_parse_stack_pop(stack, 0, 1, false); AssertThat(pop.size, Equals(1)); AssertThat(pop.contents[0].tree_count, Equals(1)); AssertThat(pop.contents[0].trees[0], Equals(trees[0])); @@ -95,7 +95,7 @@ describe("ParseStack", [&]() { ts_parse_stack_push(stack, 0, stateC, trees[2]); ts_tree_set_extra(trees[1]); - pop = ts_parse_stack_pop(stack, 0, 2); + pop = ts_parse_stack_pop(stack, 0, 2, false); AssertThat(pop.size, Equals(1)); AssertThat(pop.contents[0].tree_count, Equals(3)); AssertThat(pop.contents[0].trees[0], Equals(trees[0])); @@ -121,7 +121,7 @@ describe("ParseStack", [&]() { * \. */ ts_parse_stack_push(stack, 0, stateD, trees[3]); - ts_parse_stack_pop(stack, 1, 1); + ts_parse_stack_pop(stack, 1, 1, false); AssertThat(ts_parse_stack_head_count(stack), Equals(2)); AssertThat(*ts_parse_stack_head(stack, 0), Equals({trees[3], stateD})); @@ -139,6 +139,20 @@ describe("ParseStack", [&]() { AssertThat(*ts_parse_stack_head(stack, 1), Equals({trees[3], stateF})); }); + it("pops the entire stack when given a negative count", [&]() { + ts_parse_stack_push(stack, 0, stateA, trees[0]); + ts_parse_stack_push(stack, 0, stateB, trees[1]); + ts_parse_stack_push(stack, 0, stateC, trees[2]); + + pop = ts_parse_stack_pop(stack, 0, -1, false); + + AssertThat(pop.size, Equals(1)); + AssertThat(pop.contents[0].tree_count, Equals(3)); + AssertThat(pop.contents[0].trees[0], Equals(trees[0])); + AssertThat(pop.contents[0].trees[1], Equals(trees[1])); + AssertThat(pop.contents[0].trees[2], Equals(trees[2])); + }); + describe("when same state is pushed onto two heads", [&]() { before_each([&]() { /* @@ -154,7 +168,7 @@ describe("ParseStack", [&]() { */ ts_parse_stack_split(stack, 0); ts_parse_stack_push(stack, 0, stateD, trees[3]); - ts_parse_stack_pop(stack, 1, 1); + ts_parse_stack_pop(stack, 1, 1, false); ts_parse_stack_push(stack, 1, stateE, trees[4]); ts_parse_stack_push(stack, 1, stateF, trees[5]); @@ -185,7 +199,7 @@ describe("ParseStack", [&]() { * A0__B1__C2. * \__E4. */ - pop = ts_parse_stack_pop(stack, 0, 2); + pop = ts_parse_stack_pop(stack, 0, 2, false); AssertThat(pop.size, Equals(2)); AssertThat(pop.contents[0].tree_count, Equals(2)); diff --git a/src/runtime/parse_stack.c b/src/runtime/parse_stack.c index 92ad272a..8d85e163 100644 --- a/src/runtime/parse_stack.c +++ b/src/runtime/parse_stack.c @@ -7,6 +7,7 @@ #define MAX_POP_PATH_COUNT 8 #define INITIAL_HEAD_CAPACITY 3 +#define STARTING_TREE_CAPACITY 10 typedef struct ParseStackNode { ParseStackEntry entry; @@ -45,7 +46,17 @@ void ts_parse_stack_delete(ParseStack *this) { * Section: Reading from the stack */ -const ParseStackEntry *ts_parse_stack_head(const ParseStack *this, int head) { +TSStateId ts_parse_stack_top_state(const ParseStack *this, int head) { + ParseStackEntry *entry = ts_parse_stack_head((ParseStack *)this, head); + return entry ? entry->state : 0; +} + +TSTree *ts_parse_stack_top_tree(const ParseStack *this, int head) { + ParseStackEntry *entry = ts_parse_stack_head((ParseStack *)this, head); + return entry ? entry->tree : NULL; +} + +ParseStackEntry *ts_parse_stack_head(ParseStack *this, int head) { assert(head < this->head_count); ParseStackNode *node = this->heads[head]; return node ? &node->entry : NULL; @@ -59,7 +70,7 @@ int ts_parse_stack_entry_next_count(const ParseStackEntry *entry) { return ((const ParseStackNode *)entry)->successor_count; } -const ParseStackEntry *ts_parse_stack_entry_next(const ParseStackEntry *entry, int i) { +ParseStackEntry *ts_parse_stack_entry_next(const ParseStackEntry *entry, int i) { return &((const ParseStackNode *)entry)->successors[i]->entry; } @@ -175,13 +186,14 @@ int ts_parse_stack_split(ParseStack *this, int head_index) { return ts_parse_stack_add_head(this, this->heads[head_index]); } -ParseStackPopResultList ts_parse_stack_pop(ParseStack *this, int head_index, int child_count) { +ParseStackPopResultList ts_parse_stack_pop(ParseStack *this, int head_index, int child_count, bool count_extra) { ParseStackNode *previous_head = this->heads[head_index]; int path_count = 1; + int capacity = (child_count == -1) ? STARTING_TREE_CAPACITY : child_count; size_t tree_counts_by_path[MAX_POP_PATH_COUNT] = {child_count}; - TreeVector trees_by_path[MAX_POP_PATH_COUNT] = {tree_vector_new(child_count)}; ParseStackNode *nodes_by_path[MAX_POP_PATH_COUNT] = {previous_head}; + TreeVector trees_by_path[MAX_POP_PATH_COUNT] = {tree_vector_new(capacity)}; /* * Reduce along every possible path in parallel. Stop when the given number @@ -192,16 +204,15 @@ ParseStackPopResultList ts_parse_stack_pop(ParseStack *this, int head_index, int all_paths_done = true; int current_path_count = path_count; for (int path = 0; path < current_path_count; path++) { - if (trees_by_path[path].size == tree_counts_by_path[path]) + ParseStackNode *node = nodes_by_path[path]; + if (!node || (trees_by_path[path].size == tree_counts_by_path[path])) continue; - else - all_paths_done = false; + all_paths_done = false; /* * Children that are 'extra' do not count towards the total child count. */ - ParseStackNode *node = nodes_by_path[path]; - if (ts_tree_is_extra(node->entry.tree)) + if (ts_tree_is_extra(node->entry.tree) && !count_extra) tree_counts_by_path[path]++; /* @@ -251,3 +262,22 @@ ParseStackPopResultList ts_parse_stack_pop(ParseStack *this, int head_index, int .contents = this->last_pop_results, }; } + +void ts_parse_stack_shrink(ParseStack *this, int head_index, int count) { + ParseStackNode *head = this->heads[head_index]; + ParseStackNode *new_head = head; + for (int i = 0; i < count; i++) { + if (new_head->successor_count == 0) break; + new_head = new_head->successors[0]; + } + stack_node_retain(new_head); + stack_node_release(head); + this->heads[head_index] = new_head; +} + +void ts_parse_stack_clear(ParseStack *this) { + for (int i = 0; i < this->head_count; i++) + stack_node_release(this->heads[i]); + this->head_count = 1; + this->heads[0] = NULL; +} diff --git a/src/runtime/parse_stack.h b/src/runtime/parse_stack.h index d3731443..e31e1596 100644 --- a/src/runtime/parse_stack.h +++ b/src/runtime/parse_stack.h @@ -41,19 +41,31 @@ void ts_parse_stack_delete(ParseStack *); int ts_parse_stack_head_count(const ParseStack *); /* - * Get the tree and state that are at the top of the given stack head. + * Get the state at given head of the stack. If the stack is empty, this + * returns the initial state (0). */ -const ParseStackEntry *ts_parse_stack_head(const ParseStack *, int head); +TSStateId ts_parse_stack_top_state(const ParseStack *, int head); /* - * Get the number of successors for a given parse stack entry. + * Get the tree at given head of the stack. If the stack is empty, this + * returns NULL. + */ +TSTree *ts_parse_stack_top_tree(const ParseStack *, int head); + +/* + * Get the entry at the given head of the stack. + */ +ParseStackEntry *ts_parse_stack_head(ParseStack *, int head); + +/* + * Get the number of successors for the parse stack entry. */ int ts_parse_stack_entry_next_count(const ParseStackEntry *); /* - * Get the nth successor to a given parse stack entry. + * Get the given successor for the parse stack entry. */ -const ParseStackEntry *ts_parse_stack_entry_next(const ParseStackEntry *, int); +ParseStackEntry *ts_parse_stack_entry_next(const ParseStackEntry *, int); /* * Push a (tree, state) pair onto the given head of the stack. Returns @@ -68,7 +80,12 @@ bool ts_parse_stack_push(ParseStack *, int head, TSStateId, TSTree *); * which had previously been merged. It returns a struct that indicates the * index of each revealed head and the trees removed from that head. */ -ParseStackPopResultList ts_parse_stack_pop(ParseStack *, int head, int count); +ParseStackPopResultList ts_parse_stack_pop(ParseStack *, int head, int count, bool count_extra); + +/* + * Remove the given number of entries from the given head of the stack. + */ +void ts_parse_stack_shrink(ParseStack *, int head, int count); /* * Split the given stack head into two heads, so that the stack can be @@ -77,6 +94,11 @@ ParseStackPopResultList ts_parse_stack_pop(ParseStack *, int head, int count); */ int ts_parse_stack_split(ParseStack *, int head); +/* + * Remove all entries from the stack. + */ +void ts_parse_stack_clear(ParseStack *); + #ifdef __cplusplus } #endif diff --git a/src/runtime/parser.c b/src/runtime/parser.c index a0f9f41b..75aa3e9e 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -40,16 +40,17 @@ static TSParseAction get_action(const TSLanguage *language, TSStateId state, static TSLength break_down_left_stack(TSParser *parser, TSInputEdit edit) { ts_stack_shrink(&parser->right_stack, 0); - TSLength prev_size = ts_stack_total_tree_size(&parser->stack); + TSLength prev_size = ts_tree_total_size(ts_parse_stack_top_tree(parser->stack, 0)); parser->total_chars = prev_size.chars + edit.chars_inserted - edit.chars_removed; TSLength left_subtree_end = prev_size; size_t right_subtree_start = parser->total_chars; for (;;) { - TSTree *node = ts_stack_top_node(&parser->stack); - if (!node) + ParseStackEntry *entry = ts_parse_stack_head(parser->stack, 0); + if (!entry) break; + TSTree *node = entry->tree; size_t child_count; TSTree **children = ts_tree_children(node, &child_count); @@ -57,20 +58,20 @@ static TSLength break_down_left_stack(TSParser *parser, TSInputEdit edit) { break; DEBUG("pop_left sym:%s, state:%u", SYM_NAME(node->symbol), - ts_stack_top_state(&parser->stack)); - parser->stack.size--; + ts_parse_stack_top_state(parser->stack, 0)); + ts_parse_stack_shrink(parser->stack, 0, 1); left_subtree_end = ts_length_sub(left_subtree_end, ts_tree_total_size(node)); size_t i = 0; for (; i < child_count && left_subtree_end.chars < edit.position; i++) { TSTree *child = children[i]; - TSStateId state = ts_stack_top_state(&parser->stack); + TSStateId state = ts_parse_stack_top_state(parser->stack, 0); TSParseAction action = get_action(parser->language, state, child->symbol); TSStateId next_state = ts_tree_is_extra(child) ? state : action.data.to_state; DEBUG("push_left sym:%s, state:%u", SYM_NAME(child->symbol), next_state); - ts_stack_push(&parser->stack, next_state, child); + ts_parse_stack_push(parser->stack, 0, next_state, child); left_subtree_end = ts_length_add(left_subtree_end, ts_tree_total_size(child)); } @@ -89,14 +90,14 @@ static TSLength break_down_left_stack(TSParser *parser, TSInputEdit edit) { } DEBUG("reuse_left chars:%lu, state:%u", left_subtree_end.chars, - ts_stack_top_state(&parser->stack)); + ts_parse_stack_top_state(parser->stack, 0)); return left_subtree_end; } static TSTree *break_down_right_stack(TSParser *parser) { TSStack *stack = &parser->right_stack; TSLength current_position = parser->lexer.current_position; - TSStateId state = ts_stack_top_state(&parser->stack); + TSStateId state = ts_parse_stack_top_state(parser->stack, 0); size_t right_subtree_start = parser->total_chars - ts_stack_total_tree_size(stack).chars; @@ -168,7 +169,7 @@ static TSTree *get_next_node(TSParser *parser, TSStateId lex_state) { */ static void shift(TSParser *parser, TSStateId parse_state) { - ts_stack_push(&parser->stack, parse_state, parser->lookahead); + ts_parse_stack_push(parser->stack, 0, parse_state, parser->lookahead); parser->lookahead = NULL; } @@ -178,39 +179,16 @@ static void shift_extra(TSParser *parser, TSStateId state) { } static TSTree * reduce_helper(TSParser *parser, TSSymbol symbol, size_t child_count, bool extra, bool count_extra) { - - /* - * Walk down the stack to determine which symbols will be reduced. - * The child node count is known ahead of time, but some children - * may be ubiquitous tokens, which don't count. - */ - TSStack *stack = &parser->stack; - if (!count_extra) { - for (size_t i = 0; i < child_count; i++) { - if (child_count == stack->size) - break; - TSTree *child = stack->entries[stack->size - 1 - i].node; - if (ts_tree_is_extra(child)) - child_count++; - } - } - - size_t start_index = stack->size - child_count; - TSTree **children = calloc(child_count, sizeof(TSTree *)); - for (size_t i = 0; i < child_count; i++) - children[i] = stack->entries[start_index + i].node; + ParseStackPopResultList pop_results = ts_parse_stack_pop(parser->stack, 0, child_count, count_extra); + TSTree **children = pop_results.contents[0].trees; bool hidden = parser->language->hidden_symbol_flags[symbol]; - TSTree *parent = ts_tree_make_node(symbol, child_count, children, hidden); + TSTree *parent = ts_tree_make_node(symbol, pop_results.contents[0].tree_count, children, hidden); - ts_stack_shrink(stack, start_index); - TSStateId top_state = ts_stack_top_state(stack), state; - if (extra) - state = top_state; - else - state = get_action(parser->language, top_state, symbol).data.to_state; + TSStateId top_state = ts_parse_stack_top_state(parser->stack, 0); + TSStateId state = extra ? top_state : get_action(parser->language, top_state, symbol).data.to_state; - ts_stack_push(stack, state, parent); + ts_parse_stack_push(parser->stack, 0, state, parent); return parent; } @@ -238,7 +216,8 @@ static void reduce_error(TSParser *parser, size_t child_count) { } static int handle_error(TSParser *parser) { - size_t index_before_error = parser->stack.size - 1; + size_t error_token_count = 1; + ParseStackEntry *entry_before_error = ts_parse_stack_head(parser->stack, 0); for (;;) { @@ -246,8 +225,11 @@ static int handle_error(TSParser *parser) { * Unwind the parse stack until a state is found in which an error is * expected and the current lookahead token is expected afterwards. */ - for (size_t i = index_before_error; i + 1 > 0; i--) { - TSStateId stack_state = parser->stack.entries[i].state; + int i = -1; + for (ParseStackEntry *entry = entry_before_error; + entry != NULL; + entry = ts_parse_stack_entry_next(entry, 0), i++) { + TSStateId stack_state = entry->state; TSParseAction action_on_error = get_action( parser->language, stack_state, ts_builtin_sym_error); @@ -257,8 +239,8 @@ static int handle_error(TSParser *parser) { parser->language, state_after_error, parser->lookahead->symbol); if (action_after_error.type != TSParseActionTypeError) { - DEBUG("recover state:%u, count:%lu", state_after_error, parser->stack.size - i); - reduce_error(parser, parser->stack.size - i - 1); + DEBUG("recover state:%u, count:%lu", state_after_error, error_token_count + i); + reduce_error(parser, error_token_count + i); return 1; } } @@ -269,23 +251,23 @@ static int handle_error(TSParser *parser) { * current lookahead token, advance to the next token. */ DEBUG("skip token:%s", SYM_NAME(parser->lookahead->symbol)); - shift(parser, ts_stack_top_state(&parser->stack)); + shift(parser, ts_parse_stack_top_state(parser->stack, 0)); parser->lookahead = get_next_node(parser, ts_lex_state_error); + error_token_count++; /* * If the end of input is reached, exit. */ if (parser->lookahead->symbol == ts_builtin_sym_end) { DEBUG("fail_to_recover"); - reduce_error(parser, parser->stack.size - index_before_error - 1); + reduce_error(parser, error_token_count - 1); return 0; } } } static TSTree *finish(TSParser *parser) { - reduce(parser, ts_builtin_sym_document, parser->stack.size); - return parser->stack.entries[0].node; + return reduce_helper(parser, ts_builtin_sym_document, -1, false, true); } /* @@ -294,12 +276,12 @@ static TSTree *finish(TSParser *parser) { TSParser ts_parser_make() { return (TSParser) { .lexer = ts_lexer_make(), - .stack = ts_stack_make(), + .stack = ts_parse_stack_new(), .right_stack = ts_stack_make() }; } void ts_parser_destroy(TSParser *parser) { - ts_stack_delete(&parser->stack); + ts_parse_stack_delete(parser->stack); ts_stack_delete(&parser->right_stack); if (parser->lookahead) @@ -328,7 +310,7 @@ const TSTree *ts_parser_parse(TSParser *parser, TSInput input, position = break_down_left_stack(parser, *edit); } else { DEBUG("new_parse"); - ts_stack_shrink(&parser->stack, 0); + ts_parse_stack_clear(parser->stack); position = ts_length_zero(); } @@ -337,7 +319,7 @@ const TSTree *ts_parser_parse(TSParser *parser, TSInput input, ts_lexer_reset(&parser->lexer, position); for (;;) { - TSStateId state = ts_stack_top_state(&parser->stack); + TSStateId state = ts_parse_stack_top_state(parser->stack, 0); if (!parser->lookahead) parser->lookahead = get_next_node(parser, parser->language->lex_states[state]); TSParseAction action = get_action(parser->language, state, parser->lookahead->symbol); diff --git a/src/runtime/parser.h b/src/runtime/parser.h index 3b8b6327..1fe25929 100644 --- a/src/runtime/parser.h +++ b/src/runtime/parser.h @@ -6,10 +6,11 @@ extern "C" { #endif #include "runtime/stack.h" +#include "runtime/parse_stack.h" typedef struct { TSLexer lexer; - TSStack stack; + ParseStack *stack; TSStack right_stack; size_t total_chars; TSTree *lookahead;