From f483178f83fc3537b740776bceadc34dbaafc456 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 24 Dec 2015 10:21:42 -0800 Subject: [PATCH 1/4] Clean up main parser loop --- src/runtime/parser.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 4b2cbeba..1007309a 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -198,7 +198,11 @@ static TSTree *ts_parser__get_next_lookahead(TSParser *self, int head) { return result; } - return NULL; + ts_lexer_reset(&self->lexer, position); + TSStateId parse_state = ts_stack_top_state(self->stack, head); + TSStateId lex_state = self->language->lex_states[parse_state]; + LOG("lex state:%d", lex_state); + return self->language->lex_fn(&self->lexer, lex_state); } static int ts_parser__split(TSParser *self, int head) { @@ -648,29 +652,22 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) { for (;;) { TSTree *lookahead = NULL; - TSLength position = ts_length_zero(), last_position; + TSLength last_position = ts_length_zero(); + TSLength position = ts_length_zero(); self->is_split = ts_stack_head_count(self->stack) > 1; + for (int head = 0; head < ts_stack_head_count(self->stack);) { - StackEntry *entry = ts_stack_head(self->stack, head); last_position = position; - position = entry ? entry->position : ts_length_zero(); + position = ts_stack_top_position(self->stack, head); LOG("process head:%d, head_count:%d, state:%d, pos:%lu", head, ts_stack_head_count(self->stack), ts_stack_top_state(self->stack, head), position.chars); - if (!ts_parser__can_reuse(self, head, lookahead) || - position.chars != last_position.chars) { + if (position.chars != last_position.chars || + !ts_parser__can_reuse(self, head, lookahead)) lookahead = ts_parser__get_next_lookahead(self, head); - if (!lookahead) { - ts_lexer_reset(&self->lexer, position); - TSStateId parse_state = ts_stack_top_state(self->stack, head); - TSStateId lex_state = self->language->lex_states[parse_state]; - LOG("lex state:%d", lex_state); - lookahead = self->language->lex_fn(&self->lexer, lex_state); - } - } LOG("lookahead sym:%s, size:%lu", SYM_NAME(lookahead->symbol), ts_tree_total_chars(lookahead)); From 13d8de3eb39cdcf1f657643559ff2567f4c3c127 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 24 Dec 2015 22:04:20 -0800 Subject: [PATCH 2/4] Remove stack head after it is accepted --- src/runtime/parser.c | 81 +++++++++++++++++++++++++------------------- src/runtime/parser.h | 2 +- 2 files changed, 48 insertions(+), 35 deletions(-) diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 1007309a..4410de11 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -220,6 +220,11 @@ static void ts_parser__remove_head(TSParser *self, int head) { } static TSTree *ts_parser__select_tree(void *data, TSTree *left, TSTree *right) { + if (!left) + return right; + if (!right) + return left; + TSParser *self = data; int comparison = ts_tree_compare(left, right); if (comparison <= 0) { @@ -492,37 +497,40 @@ static void ts_parser__start(TSParser *self, TSInput input, }; vector_clear(&self->lookahead_states); vector_push(&self->lookahead_states, &lookahead_state); - self->finished_stack_head = -1; + self->finished_tree = NULL; } -static TSTree *ts_parser__finish(TSParser *self, int finished_stack_head) { - Vector pop_results = ts_stack_pop(self->stack, finished_stack_head, -1, true); - StackPopResult *pop_result = vector_get(&pop_results, 0); +static void ts_parser__accept(TSParser *self, int head) { + Vector pop_results = ts_stack_pop(self->stack, head, -1, true); - for (size_t i = 0; i < pop_result->tree_count; i++) { - if (!pop_result->trees[i]->extra) { - TSTree *root = pop_result->trees[i]; - size_t leading_extra_count = i; - size_t trailing_extra_count = pop_result->tree_count - 1 - i; - TSTree **new_children = - malloc((root->child_count + leading_extra_count + trailing_extra_count) * - sizeof(TSTree *)); - memcpy(new_children, pop_result->trees, - leading_extra_count * sizeof(TSTree *)); - memcpy(new_children + leading_extra_count, root->children, - root->child_count * sizeof(TSTree *)); - memcpy(new_children + leading_extra_count + root->child_count, - pop_result->trees + leading_extra_count + 1, - trailing_extra_count * sizeof(TSTree *)); - size_t new_count = - root->child_count + leading_extra_count + trailing_extra_count; - ts_tree_set_children(root, new_count, new_children); - ts_tree_assign_parents(root); - return root; + for (size_t j = 0; j < pop_results.size; j++) { + StackPopResult *pop_result = vector_get(&pop_results, j); + + for (size_t i = 0; i < pop_result->tree_count; i++) { + if (!pop_result->trees[i]->extra) { + TSTree *root = pop_result->trees[i]; + size_t leading_extra_count = i; + size_t trailing_extra_count = pop_result->tree_count - 1 - i; + TSTree **new_children = + malloc((root->child_count + leading_extra_count + trailing_extra_count) * + sizeof(TSTree *)); + memcpy(new_children, pop_result->trees, + leading_extra_count * sizeof(TSTree *)); + memcpy(new_children + leading_extra_count, root->children, + root->child_count * sizeof(TSTree *)); + memcpy(new_children + leading_extra_count + root->child_count, + pop_result->trees + leading_extra_count + 1, + trailing_extra_count * sizeof(TSTree *)); + size_t new_count = + root->child_count + leading_extra_count + trailing_extra_count; + ts_tree_set_children(root, new_count, new_children); + ts_tree_retain(root); + ts_parser__remove_head(self, pop_result->head_index); + self->finished_tree = ts_parser__select_tree(self, self->finished_tree, root); + break; + } } } - - return NULL; } /* @@ -573,9 +581,12 @@ static bool ts_parser__consume_lookahead(TSParser *self, int head, } if (ts_stack_head_count(self->stack) == 1) { - if (!ts_parser__handle_error(self, current_head, lookahead)) - self->finished_stack_head = current_head; - return true; + if (ts_parser__handle_error(self, current_head, lookahead)) { + return true; + } else { + ts_parser__accept(self, current_head); + return false; + } } else { LOG("bail current_head:%d", current_head); ts_parser__remove_head(self, current_head); @@ -614,8 +625,8 @@ static bool ts_parser__consume_lookahead(TSParser *self, int head, case TSParseActionTypeAccept: LOG("accept"); - self->finished_stack_head = current_head; - return true; + ts_parser__accept(self, current_head); + return false; } } } @@ -631,7 +642,7 @@ TSParser ts_parser_make() { .stack = ts_stack_new(), .lookahead_states = vector_new(sizeof(LookaheadState), 4), .reduce_parents = vector_new(sizeof(TSTree *), 4), - .finished_stack_head = -1, + .finished_tree = NULL, }; } @@ -676,7 +687,9 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) { head++; } - if (self->finished_stack_head >= 0 && ts_stack_head_count(self->stack) == 1) - return ts_parser__finish(self, self->finished_stack_head); + if (ts_stack_head_count(self->stack) == 0) { + ts_tree_assign_parents(self->finished_tree); + return self->finished_tree; + } } } diff --git a/src/runtime/parser.h b/src/runtime/parser.h index 402223a0..cc537090 100644 --- a/src/runtime/parser.h +++ b/src/runtime/parser.h @@ -14,7 +14,7 @@ typedef struct { const TSLanguage *language; Vector lookahead_states; Vector reduce_parents; - int finished_stack_head; + TSTree *finished_tree; bool is_split; } TSParser; From a8f50986e081c0f9a2fd446c104d82600c81b35b Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 24 Dec 2015 22:05:54 -0800 Subject: [PATCH 3/4] clang-format --- src/compiler/build_tables/remove_duplicate_states.h | 3 +-- src/runtime/parser.c | 12 +++++++----- src/runtime/tree.c | 4 ++-- src/runtime/tree.h | 3 ++- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/compiler/build_tables/remove_duplicate_states.h b/src/compiler/build_tables/remove_duplicate_states.h index 35b2d822..48734596 100644 --- a/src/compiler/build_tables/remove_duplicate_states.h +++ b/src/compiler/build_tables/remove_duplicate_states.h @@ -8,8 +8,7 @@ namespace tree_sitter { namespace build_tables { template -std::map remove_duplicate_states( - std::vector *states) { +std::map remove_duplicate_states(std::vector *states) { std::map replacements; while (true) { diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 4410de11..9403fbc5 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -185,7 +185,8 @@ static TSTree *ts_parser__get_next_lookahead(TSParser *self, int head) { } if (!ts_parser__can_reuse(self, head, state->reusable_subtree)) { - LOG("breakdown_unreusable sym:%s", SYM_NAME(state->reusable_subtree->symbol)); + LOG("breakdown_unreusable sym:%s", + SYM_NAME(state->reusable_subtree->symbol)); ts_parser__breakdown_reusable_subtree(state); continue; } @@ -511,9 +512,9 @@ static void ts_parser__accept(TSParser *self, int head) { TSTree *root = pop_result->trees[i]; size_t leading_extra_count = i; size_t trailing_extra_count = pop_result->tree_count - 1 - i; - TSTree **new_children = - malloc((root->child_count + leading_extra_count + trailing_extra_count) * - sizeof(TSTree *)); + TSTree **new_children = malloc( + (root->child_count + leading_extra_count + trailing_extra_count) * + sizeof(TSTree *)); memcpy(new_children, pop_result->trees, leading_extra_count * sizeof(TSTree *)); memcpy(new_children + leading_extra_count, root->children, @@ -526,7 +527,8 @@ static void ts_parser__accept(TSParser *self, int head) { ts_tree_set_children(root, new_count, new_children); ts_tree_retain(root); ts_parser__remove_head(self, pop_result->head_index); - self->finished_tree = ts_parser__select_tree(self, self->finished_tree, root); + self->finished_tree = + ts_parser__select_tree(self, self->finished_tree, root); break; } } diff --git a/src/runtime/tree.c b/src/runtime/tree.c index 0f9a279c..1442c28f 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -220,8 +220,8 @@ static size_t ts_tree__write_to_string(const TSTree *self, char *cursor = string; char **writer = (limit > 0) ? &cursor : &string; - bool visible = is_root || (self->visible && - (include_anonymous || self->named)); + bool visible = + is_root || (self->visible && (include_anonymous || self->named)); if (visible && !is_root) cursor += snprintf(*writer, limit, " "); diff --git a/src/runtime/tree.h b/src/runtime/tree.h index 615d6f6e..26a41a27 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -68,7 +68,8 @@ static inline TSLength ts_tree_total_size(const TSTree *self) { } static inline bool ts_tree_is_fragile(const TSTree *tree) { - return tree->fragile_left || tree->fragile_right || ts_tree_total_chars(tree) == 0; + return tree->fragile_left || tree->fragile_right || + ts_tree_total_chars(tree) == 0; } #ifdef __cplusplus From ff3a4baf42e0b15fc4d7b7bc1ba6a2d5303eaf72 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sat, 26 Dec 2015 16:49:23 -0800 Subject: [PATCH 4/4] Ensure parse stack heads are at the same positions --- src/runtime/parser.c | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 9403fbc5..1985487f 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -662,6 +662,7 @@ void ts_parser_set_debugger(TSParser *self, TSDebugger debugger) { TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) { ts_parser__start(self, input, previous_tree); + size_t max_position = 0; for (;;) { TSTree *lookahead = NULL; @@ -671,22 +672,35 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) { self->is_split = ts_stack_head_count(self->stack) > 1; for (int head = 0; head < ts_stack_head_count(self->stack);) { - last_position = position; - position = ts_stack_top_position(self->stack, head); + for (;;) { + last_position = position; + position = ts_stack_top_position(self->stack, head); - LOG("process head:%d, head_count:%d, state:%d, pos:%lu", head, - ts_stack_head_count(self->stack), - ts_stack_top_state(self->stack, head), position.chars); + if (position.chars > max_position) { + max_position = position.chars; + head++; + break; + } - if (position.chars != last_position.chars || - !ts_parser__can_reuse(self, head, lookahead)) - lookahead = ts_parser__get_next_lookahead(self, head); + if (position.chars == max_position && head > 0) { + head++; + break; + } - LOG("lookahead sym:%s, size:%lu", SYM_NAME(lookahead->symbol), - ts_tree_total_chars(lookahead)); + LOG("process head:%d, head_count:%d, state:%d, pos:%lu", head, + ts_stack_head_count(self->stack), + ts_stack_top_state(self->stack, head), position.chars); - if (ts_parser__consume_lookahead(self, head, lookahead)) - head++; + if (position.chars != last_position.chars || + !ts_parser__can_reuse(self, head, lookahead)) + lookahead = ts_parser__get_next_lookahead(self, head); + + LOG("lookahead sym:%s, size:%lu", SYM_NAME(lookahead->symbol), + ts_tree_total_chars(lookahead)); + + if (!ts_parser__consume_lookahead(self, head, lookahead)) + break; + } } if (ts_stack_head_count(self->stack) == 0) {