From 863cabc82794def3209c8ed1f87a3b09cf42bd46 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 2 Dec 2015 07:53:15 -0800 Subject: [PATCH] Don't include trailing ubiquitous tokens as children when reducing --- spec/fixtures/corpus/c/declarations.txt | 81 +++++++-- spec/fixtures/corpus/golang/declarations.txt | 3 + spec/fixtures/corpus/javascript/literals.txt | 11 +- spec/runtime/helpers/tree_helpers.cc | 19 +- spec/runtime/helpers/tree_helpers.h | 3 +- spec/runtime/parser_spec.cc | 3 +- spec/runtime/stack_spec.cc | 85 +++++---- spec/runtime/tree_spec.cc | 8 +- src/runtime/parser.c | 172 +++++++++++++------ src/runtime/stack.c | 22 ++- src/runtime/stack.h | 8 + src/runtime/tree.c | 18 +- src/runtime/tree.h | 2 +- 13 files changed, 286 insertions(+), 149 deletions(-) diff --git a/spec/fixtures/corpus/c/declarations.txt b/spec/fixtures/corpus/c/declarations.txt index caa9d36e..123e9818 100644 --- a/spec/fixtures/corpus/c/declarations.txt +++ b/spec/fixtures/corpus/c/declarations.txt @@ -44,8 +44,8 @@ int main() { (comment) (declaration (identifier) - (pointer_declarator (function_declarator (identifier) (parameter_declaration (identifier) (identifier)))) - (comment)) + (pointer_declarator (function_declarator (identifier) (parameter_declaration (identifier) (identifier))))) + (comment) (expression_statement (math_expression (identifier) (call_expression (identifier) (number))))))) @@ -54,38 +54,50 @@ int main() { ambiguous expressions ========================================== +/* + * ambiguities + */ + int main() { - // cast vs parenthesized product + // cast a((B *)c); + + // parenthesized product d((e * f)); } --- -(translation_unit (function_definition - (identifier) - (function_declarator (identifier)) - (compound_statement - (comment) - (expression_statement (call_expression - (identifier) - (cast_expression (type_name (identifier) (abstract_pointer_declarator)) (identifier)))) - (expression_statement (call_expression - (identifier) - (math_expression (identifier) (identifier))))))) +(translation_unit + (comment) + (function_definition + (identifier) + (function_declarator (identifier)) + (compound_statement + (comment) + (expression_statement (call_expression + (identifier) + (cast_expression (type_name (identifier) (abstract_pointer_declarator)) (identifier)))) + (comment) + (expression_statement (call_expression + (identifier) + (math_expression (identifier) (identifier))))))) ========================================== function-like macros that produce types ========================================== +// this is a macro GIT_INLINE(int) x = 5; --- -(translation_unit (declaration - (macro_type (identifier) (identifier)) - (identifier) - (initializer (number)))) +(translation_unit + (comment) + (declaration + (macro_type (identifier) (identifier)) + (identifier) + (initializer (number)))) ============================================ 3-way ambiguities (regression) @@ -100,6 +112,9 @@ int main() { */ ABC(d); + /* + * Normal declaration + */ efg hij; } @@ -112,4 +127,34 @@ int main() { (compound_statement (comment) (declaration (identifier) (identifier)) + (comment) (declaration (identifier) (identifier))))) + +========================================= +Comments after for loops with ambiguities +=========================================== + +int main() { + for (a *b = c; d; e) { + aff; + } + + // a-comment + + g; +} + +--- + +(translation_unit (function_definition + (identifier) + (function_declarator (identifier)) + (compound_statement + (for_statement + (declaration (identifier) (pointer_declarator (identifier)) (initializer (identifier))) + (identifier) + (identifier) + (compound_statement + (expression_statement (identifier)))) + (comment) + (expression_statement (identifier))))) diff --git a/spec/fixtures/corpus/golang/declarations.txt b/spec/fixtures/corpus/golang/declarations.txt index aab1fc87..f3eca94d 100644 --- a/spec/fixtures/corpus/golang/declarations.txt +++ b/spec/fixtures/corpus/golang/declarations.txt @@ -40,6 +40,8 @@ package trivial var x = 1 // on variable +// between declarations + func main() { // in function } @@ -49,4 +51,5 @@ func main() { (program (package_directive (package_name)) (var_declaration (var_name) (number) (comment)) + (comment) (func_declaration (var_name) (block_statement (comment)))) diff --git a/spec/fixtures/corpus/javascript/literals.txt b/spec/fixtures/corpus/javascript/literals.txt index e3f333ad..b027d092 100644 --- a/spec/fixtures/corpus/javascript/literals.txt +++ b/spec/fixtures/corpus/javascript/literals.txt @@ -63,9 +63,10 @@ var thing = { // this is a property. // its value is a function. key: function(x /* this is a parameter */) { - - // this is a statement - doStuff(); + // this is one statement + one(); + // this is another statement + two(); } }; @@ -80,8 +81,10 @@ var thing = { (comment) (comment) (pair (identifier) (function_expression - (formal_parameters (identifier) (comment)) + (formal_parameters (identifier)) (comment) (statement_block + (comment) + (expression_statement (function_call (identifier) (arguments))) (comment) (expression_statement (function_call (identifier) (arguments)))))))))) diff --git a/spec/runtime/helpers/tree_helpers.cc b/spec/runtime/helpers/tree_helpers.cc index 2871b695..d3eb4100 100644 --- a/spec/runtime/helpers/tree_helpers.cc +++ b/spec/runtime/helpers/tree_helpers.cc @@ -1,5 +1,9 @@ #include "runtime/helpers/tree_helpers.h" +using std::string; +using std::to_string; +using std::ostream; + static const char *symbol_names[24] = { "ERROR", "END", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", @@ -14,15 +18,20 @@ TSTree ** tree_array(std::vector trees) { return result; } -std::ostream &operator<<(std::ostream &stream, const TSTree *tree) { - return stream << std::string(ts_tree_string(tree, symbol_names, true));; +ostream &operator<<(std::ostream &stream, const TSTree *tree) { + return stream << string(ts_tree_string(tree, symbol_names, true));; } -std::ostream &operator<<(std::ostream &stream, const TSNode node) { - return stream << std::string("{") << (const TSTree *)node.data << - std::string(", ") << std::to_string(ts_node_pos(node).chars) << std::string("}"); +ostream &operator<<(ostream &stream, const TSNode &node) { + return stream << string("{") << (const TSTree *)node.data << + string(", ") << to_string(ts_node_pos(node).chars) << string("}"); } bool operator==(const TSNode &left, const TSNode &right) { return ts_node_eq(left, right); } + +ostream &operator<<(ostream &stream, const TSLength &length) { + return stream << string("{") << to_string(length.chars) << string(", ") << + to_string(length.bytes) << string("}"); +} diff --git a/spec/runtime/helpers/tree_helpers.h b/spec/runtime/helpers/tree_helpers.h index f668411f..6fca9e57 100644 --- a/spec/runtime/helpers/tree_helpers.h +++ b/spec/runtime/helpers/tree_helpers.h @@ -8,7 +8,8 @@ TSTree ** tree_array(std::vector trees); std::ostream &operator<<(std::ostream &stream, const TSTree *tree); -std::ostream &operator<<(std::ostream &stream, const TSNode node); +std::ostream &operator<<(std::ostream &stream, const TSNode &node); +std::ostream &operator<<(std::ostream &stream, const TSLength &length); bool operator==(const TSNode &left, const TSNode &right); #endif // HELPERS_TREE_HELPERS_H_ diff --git a/spec/runtime/parser_spec.cc b/spec/runtime/parser_spec.cc index 36677f0d..8848d064 100644 --- a/spec/runtime/parser_spec.cc +++ b/spec/runtime/parser_spec.cc @@ -396,7 +396,8 @@ describe("Parser", [&]() { }); }); - it("updates the document's parse-count", [&]() { + it("updates the document's parse count", [&]() { + ts_document_set_language(doc, ts_language_javascript()); AssertThat(ts_document_parse_count(doc), Equals(0)); set_text("{ x: (b.c) };"); diff --git a/spec/runtime/stack_spec.cc b/spec/runtime/stack_spec.cc index 355ae563..0ec66991 100644 --- a/spec/runtime/stack_spec.cc +++ b/spec/runtime/stack_spec.cc @@ -9,8 +9,7 @@ enum { }; enum { - symbol0 = ts_builtin_sym_start, - symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7, symbol8 + symbol0, symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7, symbol8 }; struct TreeSelectionSpy { @@ -19,6 +18,14 @@ struct TreeSelectionSpy { const TSTree *arguments[2]; }; +TSLength operator*(const TSLength &length, size_t factor) { + return {length.bytes * factor, length.chars * factor}; +} + +TSPoint operator*(const TSPoint &point, size_t factor) { + return {0, point.column * factor}; +} + extern "C" TSTree * tree_selection_spy_callback(void *data, TSTree *left, TSTree *right) { TreeSelectionSpy *spy = (TreeSelectionSpy *)data; @@ -30,11 +37,15 @@ TSTree * tree_selection_spy_callback(void *data, TSTree *left, TSTree *right) { START_TEST + describe("Stack", [&]() { Stack *stack; const size_t tree_count = 10; TSTree *trees[tree_count]; TreeSelectionSpy tree_selection_spy{0, NULL, {NULL, NULL}}; + TSLength tree_len = ts_length_make(2, 3); + TSPoint tree_extent = ts_point_make(0, 3); + TSSymbolMetadata metadata = {true, true, true}; before_each([&]() { stack = ts_stack_new({ @@ -42,10 +53,8 @@ describe("Stack", [&]() { tree_selection_spy_callback }); - TSLength len = ts_length_make(2, 2); - TSPoint point = ts_point_make(1, 1); for (size_t i = 0; i < tree_count; i++) - trees[i] = ts_tree_make_leaf(i, len, len, point, point, {}); + trees[i] = ts_tree_make_leaf(i, ts_length_zero(), tree_len, ts_point_zero(), tree_extent, {}); }); after_each([&]() { @@ -64,7 +73,7 @@ describe("Stack", [&]() { */ ts_stack_push(stack, 0, stateA, trees[0]); const StackEntry *entry1 = ts_stack_head(stack, 0); - AssertThat(*entry1, Equals({trees[0], stateA})); + AssertThat(*entry1, Equals({trees[0], stateA, tree_len, tree_extent})); AssertThat(ts_stack_entry_next_count(entry1), Equals(1)); AssertThat(ts_stack_entry_next(entry1, 0), Equals(nullptr)); @@ -73,7 +82,7 @@ describe("Stack", [&]() { */ ts_stack_push(stack, 0, stateB, trees[1]); const StackEntry *entry2 = ts_stack_head(stack, 0); - AssertThat(*entry2, Equals({trees[1], stateB})); + AssertThat(*entry2, Equals({trees[1], stateB, tree_len * 2, tree_extent * 2})); AssertThat(ts_stack_entry_next_count(entry2), Equals(1)); AssertThat(ts_stack_entry_next(entry2, 0), Equals(entry1)); @@ -82,7 +91,7 @@ describe("Stack", [&]() { */ ts_stack_push(stack, 0, stateC, trees[2]); const StackEntry *entry3 = ts_stack_head(stack, 0); - AssertThat(*entry3, Equals({trees[2], stateC})); + AssertThat(*entry3, Equals({trees[2], stateC, tree_len * 3, tree_extent * 2})); AssertThat(ts_stack_entry_next_count(entry3), Equals(1)); AssertThat(ts_stack_entry_next(entry3, 0), Equals(entry2)); }); @@ -108,7 +117,7 @@ describe("Stack", [&]() { AssertThat(pop1.tree_count, Equals(2)); AssertThat(pop1.trees[0], Equals(trees[1])); AssertThat(pop1.trees[1], Equals(trees[2])); - AssertThat(*ts_stack_head(stack, 0), Equals({trees[0], stateA})); + AssertThat(*ts_stack_head(stack, 0), Equals({trees[0], stateA, tree_len, tree_extent})); /* * . @@ -167,8 +176,8 @@ describe("Stack", [&]() { ts_stack_pop(stack, 1, 1, false); AssertThat(ts_stack_head_count(stack), Equals(2)); - AssertThat(*ts_stack_head(stack, 0), Equals({trees[3], stateD})); - AssertThat(*ts_stack_head(stack, 1), Equals({trees[1], stateB})); + AssertThat(*ts_stack_head(stack, 0), Equals({trees[3], stateD, tree_len * 4, tree_extent * 4})); + AssertThat(*ts_stack_head(stack, 1), Equals({trees[1], stateB, tree_len * 2, tree_extent * 2})); /* * A0__B1__C2__D3. @@ -178,8 +187,8 @@ describe("Stack", [&]() { ts_stack_push(stack, 1, stateF, trees[3]); AssertThat(ts_stack_head_count(stack), Equals(2)); - AssertThat(*ts_stack_head(stack, 0), Equals({trees[3], stateD})); - AssertThat(*ts_stack_head(stack, 1), Equals({trees[3], stateF})); + AssertThat(*ts_stack_head(stack, 0), Equals({trees[3], stateD, tree_len * 4, tree_extent * 4})); + AssertThat(*ts_stack_head(stack, 1), Equals({trees[3], stateF, tree_len * 4, tree_extent * 4})); }); }); @@ -198,8 +207,8 @@ describe("Stack", [&]() { ts_stack_push(stack, 1, stateF, trees[5]); AssertThat(ts_stack_head_count(stack), Equals(2)); - AssertThat(*ts_stack_head(stack, 0), Equals({trees[3], stateD})); - AssertThat(*ts_stack_head(stack, 1), Equals({trees[5], stateF})); + AssertThat(*ts_stack_head(stack, 0), Equals({trees[3], stateD, tree_len * 4, tree_extent * 4})); + AssertThat(*ts_stack_head(stack, 1), Equals({trees[5], stateF, tree_len * 4, tree_extent * 4})); }); describe("when the trees are identical", [&]() { @@ -215,10 +224,10 @@ describe("Stack", [&]() { AssertThat(ts_stack_head_count(stack), Equals(1)); const StackEntry *entry1 = ts_stack_head(stack, 0); - AssertThat(*entry1, Equals({trees[6], stateG})); + AssertThat(*entry1, Equals({trees[6], stateG, tree_len * 5, tree_extent * 5})); AssertThat(ts_stack_entry_next_count(entry1), Equals(2)); - AssertThat(*ts_stack_entry_next(entry1, 0), Equals({trees[3], stateD})); - AssertThat(*ts_stack_entry_next(entry1, 1), Equals({trees[5], stateF})); + AssertThat(*ts_stack_entry_next(entry1, 0), Equals({trees[3], stateD, tree_len * 4, tree_extent * 4})); + AssertThat(*ts_stack_entry_next(entry1, 1), Equals({trees[5], stateF, tree_len * 4, tree_extent * 4})); }); }); @@ -242,10 +251,7 @@ describe("Stack", [&]() { AssertThat(tree_selection_spy.call_count, Equals(1)); AssertThat(tree_selection_spy.arguments[0], Equals(trees[6])); AssertThat(tree_selection_spy.arguments[1], Equals(trees[7])); - AssertThat(*ts_stack_head(stack, 0), Equals({ - trees[7], - stateG - })); + AssertThat(*ts_stack_head(stack, 0), Equals({trees[7], stateG, tree_len * 5, tree_extent * 5})); }); }); @@ -271,11 +277,11 @@ describe("Stack", [&]() { AssertThat(ts_stack_head_count(stack), Equals(1)); StackEntry *head = ts_stack_head(stack, 0); - AssertThat(*head, Equals({trees[7], stateH})) + AssertThat(*head, Equals({trees[7], stateH, tree_len * 6, tree_extent * 6})) AssertThat(ts_stack_entry_next_count(head), Equals(1)); StackEntry *next = ts_stack_entry_next(head, 0); - AssertThat(*next, Equals({trees[6], stateG})) + AssertThat(*next, Equals({trees[6], stateG, tree_len * 5, tree_extent * 5})) AssertThat(ts_stack_entry_next_count(next), Equals(2)); }); }); @@ -283,24 +289,31 @@ describe("Stack", [&]() { describe("when the first head is only one node deep", [&]() { it("adds it as an additional successor node to The Null node", [&]() { /* - * .__A0. - * B1.__/ + * .__C5. + * B2.__/ */ ts_stack_clear(stack); ts_stack_split(stack, 0); - ts_stack_push(stack, 0, stateA, trees[0]); - bool merged = ts_stack_push(stack, 1, stateB, trees[1]); + TSTree *parent = ts_tree_make_node(5, 2, tree_array({ trees[2], trees[3] }), metadata); + + ts_stack_push(stack, 0, stateC, parent); + + tree_selection_spy.tree_to_return = parent; + tree_selection_spy.call_count = 0; + + bool merged = ts_stack_push(stack, 1, stateB, trees[2]); AssertThat(merged, IsFalse()); - merged = ts_stack_push(stack, 1, stateA, trees[0]); + merged = ts_stack_push(stack, 1, stateC, trees[3]); AssertThat(merged, IsTrue()); + AssertThat(tree_selection_spy.call_count, Equals(1)); AssertThat(ts_stack_head_count(stack), Equals(1)); StackEntry *head = ts_stack_head(stack, 0); - AssertThat(*head, Equals({trees[0], stateA})); + AssertThat(*head, Equals({parent, stateC, tree_len * 2, tree_extent * 2})); AssertThat(ts_stack_entry_next_count(head), Equals(2)); AssertThat(ts_stack_entry_next(head, 0), Equals(nullptr)); - AssertThat(*ts_stack_entry_next(head, 1), Equals({trees[1], stateB})); + AssertThat(*ts_stack_entry_next(head, 1), Equals({trees[2], stateB, tree_len, tree_extent})); }); }); }); @@ -347,8 +360,8 @@ describe("Stack", [&]() { AssertThat(pop2.trees[1], Equals(trees[6])); AssertThat(ts_stack_head_count(stack), Equals(2)); - AssertThat(*ts_stack_head(stack, 0), Equals({trees[2], stateC})); - AssertThat(*ts_stack_head(stack, 1), Equals({trees[4], stateE})); + AssertThat(*ts_stack_head(stack, 0), Equals({trees[2], stateC, tree_len * 3, tree_extent * 3})); + AssertThat(*ts_stack_head(stack, 1), Equals({trees[4], stateE, tree_len * 3, tree_extent * 3})); }); }); @@ -411,7 +424,7 @@ describe("Stack", [&]() { */ Vector pop = ts_stack_pop(stack, 0, 3, false); AssertThat(ts_stack_head_count(stack), Equals(1)); - AssertThat(*ts_stack_head(stack, 0), Equals({trees[1], stateB})); + AssertThat(*ts_stack_head(stack, 0), Equals({trees[1], stateB, tree_len * 2, tree_extent * 2})); AssertThat(pop.size, Equals(2)); StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0); @@ -532,9 +545,9 @@ describe("Stack", [&]() { END_TEST bool operator==(const StackEntry &left, const StackEntry &right) { - return left.state == right.state && ts_tree_eq(left.tree, right.tree); + return left.state == right.state && ts_tree_eq(left.tree, right.tree) && ts_length_eq(left.position, right.position); } std::ostream &operator<<(std::ostream &stream, const StackEntry &entry) { - return stream << "{" << entry.state << ", " << entry.tree << "}"; + return stream << "{" << entry.state << ", " << entry.tree << ", " << entry.position << "}"; } diff --git a/spec/runtime/tree_spec.cc b/spec/runtime/tree_spec.cc index 3b0922f7..7067f418 100644 --- a/spec/runtime/tree_spec.cc +++ b/spec/runtime/tree_spec.cc @@ -27,8 +27,8 @@ static const char *names[] = { describe("Tree", []() { TSTree *tree1, *tree2, *parent1; - TSSymbolMetadata visible = {true, true}; - TSSymbolMetadata invisible = {false, false}; + TSSymbolMetadata visible = {true, true, false}; + TSSymbolMetadata invisible = {false, false, false}; before_each([&]() { tree1 = ts_tree_make_leaf(cat, {2, 1}, {5, 4}, ts_point_zero(), ts_point_zero(), visible); @@ -377,10 +377,6 @@ describe("Tree", []() { END_TEST -ostream &operator<<(ostream &stream, const TSLength &length) { - return stream << "{bytes:" << length.bytes << ", chars:" << length.chars << "}"; -} - bool operator==(TSLength left, TSLength right) { return ts_length_eq(left, right); } diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 485fd2f3..3326ec1c 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -26,8 +26,6 @@ typedef struct { TSTree *reusable_subtree; size_t reusable_subtree_pos; - TSLength position; - TSPoint offset_point; } LookaheadState; typedef enum { @@ -90,13 +88,14 @@ static bool ts_parser__can_reuse(TSParser *self, int head, TSTree *subtree) { */ static TSTree *ts_parser__get_next_lookahead(TSParser *self, int head) { LookaheadState *state = vector_get(&self->lookahead_states, head); + TSLength position = ts_stack_top_position(self->stack, head); while (state->reusable_subtree) { - if (state->reusable_subtree_pos > state->position.chars) { + if (state->reusable_subtree_pos > position.chars) { break; } - if (state->reusable_subtree_pos < state->position.chars) { + if (state->reusable_subtree_pos < position.chars) { LOG("past_reuse sym:%s", SYM_NAME(state->reusable_subtree->symbol)); ts_parser__pop_reusable_subtree(state); continue; @@ -127,9 +126,9 @@ static TSTree *ts_parser__get_next_lookahead(TSParser *self, int head) { static int ts_parser__split(TSParser *self, int head) { int result = ts_stack_split(self->stack, head); assert(result == (int)self->lookahead_states.size); - LookaheadState head_state = + LookaheadState lookahead_state = *(LookaheadState *)vector_get(&self->lookahead_states, head); - vector_push(&self->lookahead_states, &head_state); + vector_push(&self->lookahead_states, &lookahead_state); return result; } @@ -148,10 +147,6 @@ static TSTree *ts_parser__select_tree(void *data, TSTree *left, TSTree *right) { static ConsumeResult ts_parser__shift(TSParser *self, int head, TSStateId parse_state, TSTree *lookahead) { - LookaheadState *head_state = vector_get(&self->lookahead_states, head); - head_state->position = - ts_length_add(head_state->position, ts_tree_total_size(lookahead)); - head_state->offset_point = ts_point_add(head_state->offset_point, ts_tree_total_size_point(lookahead)); if (ts_stack_push(self->stack, head, parse_state, lookahead)) { LOG("merge head:%d", head); vector_erase(&self->lookahead_states, head); @@ -167,14 +162,16 @@ static bool ts_parser__shift_extra(TSParser *self, int head, TSStateId state, return ts_parser__shift(self, head, state, lookahead); } -static TSTree *ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, +static bool ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, int child_count, bool extra, bool count_extra) { vector_clear(&self->reduce_parents); - TSSymbolMetadata metadata = self->language->symbol_metadata[symbol]; + const TSSymbolMetadata *all_metadata = self->language->symbol_metadata; + TSSymbolMetadata metadata = all_metadata[symbol]; Vector pop_results = ts_stack_pop(self->stack, head, child_count, count_extra); int last_head_index = -1; - int removed_heads = 0; + size_t removed_heads = 0; + size_t revealed_heads = 0; for (size_t i = 0; i < pop_results.size; i++) { StackPopResult *pop_result = vector_get(&pop_results, i); @@ -184,11 +181,13 @@ static TSTree *ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, * tree that was added to that head. */ TSTree *parent = NULL; + size_t trailing_extra_count = 0; for (size_t j = 0; j < i; j++) { StackPopResult *prior_result = vector_get(&pop_results, j); if (pop_result->trees == prior_result->trees) { TSTree **existing_parent = vector_get(&self->reduce_parents, j); parent = *existing_parent; + trailing_extra_count = pop_result->tree_count - parent->child_count; break; } } @@ -196,9 +195,17 @@ static TSTree *ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, /* * Otherwise, create a new parent node for this set of trees. */ - if (!parent) - parent = ts_tree_make_node(symbol, pop_result->tree_count, + if (!parent) { + for (size_t j = pop_result->tree_count - 1; j + 1 > 0; j--) { + if (pop_result->trees[j]->options.extra) { + trailing_extra_count++; + } else + break; + } + + parent = ts_tree_make_node(symbol, pop_result->tree_count - trailing_extra_count, pop_result->trees, metadata); + } vector_push(&self->reduce_parents, &parent); /* @@ -209,6 +216,9 @@ static TSTree *ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, if (pop_result->head_index == last_head_index) { ts_stack_add_alternative(self->stack, new_head, parent); continue; + } else { + revealed_heads++; + last_head_index = pop_result->head_index; } /* @@ -216,9 +226,15 @@ static TSTree *ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, * the lookahead state for this head, for the new head. */ if (i > 0) { + if (symbol == ts_builtin_sym_error) { + ts_stack_remove_head(self->stack, new_head); + free(pop_result->trees); + continue; + } + LOG("split_during_reduce new_head:%d", new_head); - LookaheadState *head_state = vector_get(&self->lookahead_states, head); - vector_push(&self->lookahead_states, head_state); + LookaheadState *lookahead_state = vector_get(&self->lookahead_states, head); + vector_push(&self->lookahead_states, lookahead_state); } /* @@ -247,36 +263,54 @@ static TSTree *ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, * then remove the lookahead state for the head. */ if (ts_stack_push(self->stack, new_head, state, parent)) { + LOG("merge_during_reduce head:%d", new_head); vector_erase(&self->lookahead_states, new_head); removed_heads++; + continue; } - last_head_index = pop_result->head_index; + if (trailing_extra_count > 0) { + for (size_t j = 0; j < trailing_extra_count; j++) { + size_t index = pop_result->tree_count - trailing_extra_count + j; + if (ts_stack_push(self->stack, new_head, state, pop_result->trees[index])) { + vector_erase(&self->lookahead_states, new_head); + removed_heads++; + continue; + } + } + } } - TSTree **last_parent = vector_back(&self->reduce_parents); - return *last_parent; + return removed_heads < revealed_heads; } -static void ts_parser__reduce_fragile(TSParser *self, int head, TSSymbol symbol, +static bool ts_parser__reduce_fragile(TSParser *self, int head, TSSymbol symbol, size_t child_count) { - TSTree *reduced = - ts_parser__reduce(self, head, symbol, child_count, false, false); - ts_tree_set_fragile_left(reduced); - ts_tree_set_fragile_right(reduced); + bool result = ts_parser__reduce(self, head, symbol, child_count, false, false); + if (result) + for (size_t i = 0; i < self->reduce_parents.size; i++) { + TSTree **parent = vector_get(&self->reduce_parents, i); + ts_tree_set_fragile_left(*parent); + ts_tree_set_fragile_right(*parent); + } + return result; } static void ts_parser__reduce_error(TSParser *self, int head, size_t child_count, TSTree *lookahead) { - LookaheadState *head_state = vector_get(&self->lookahead_states, head); - TSTree *reduced = ts_parser__reduce(self, head, ts_builtin_sym_error, - child_count, false, true); - reduced->size = ts_length_add(reduced->size, lookahead->padding); - head_state->position = ts_length_add(head_state->position, lookahead->padding); - head_state->offset_point = ts_point_add(head_state->offset_point, lookahead->padding_point); - lookahead->padding = ts_length_zero(); - ts_tree_set_fragile_left(reduced); - ts_tree_set_fragile_right(reduced); + bool result = ts_parser__reduce(self, head, ts_builtin_sym_error, + child_count, false, true); + if (result) { + TSTree **parent = vector_back(&self->reduce_parents); + StackEntry *stack_entry = ts_stack_head(self->stack, head); + stack_entry->position = ts_length_add(stack_entry->position, lookahead->padding); + stack_entry->position_point = ts_point_add(stack_entry->position_point, lookahead->padding_point); + (*parent)->size = ts_length_add((*parent)->size, lookahead->padding); + lookahead->padding = ts_length_zero(); + lookahead->padding_point = ts_point_zero(); + ts_tree_set_fragile_left(*parent); + ts_tree_set_fragile_right(*parent); + } } static bool ts_parser__handle_error(TSParser *self, int head, TSTree *lookahead) { @@ -346,25 +380,44 @@ static void ts_parser__start(TSParser *self, TSInput input, ts_lexer_reset(&self->lexer, ts_length_zero(), ts_point_zero()); ts_stack_clear(self->stack); - LookaheadState head_state = { - .position = ts_length_zero(), + LookaheadState lookahead_state = { .reusable_subtree = previous_tree, .reusable_subtree_pos = 0, }; vector_clear(&self->lookahead_states); - vector_push(&self->lookahead_states, &head_state); + vector_push(&self->lookahead_states, &lookahead_state); } static TSTree *ts_parser__finish(TSParser *self) { Vector pop_results = ts_stack_pop(self->stack, 0, -1, true); StackPopResult *pop_result = vector_get(&pop_results, 0); - size_t extra_count = pop_result->tree_count - 1; - TSTree *root = pop_result->trees[extra_count]; + for (size_t i = 0; i < pop_result->tree_count; i++) { + if (!pop_result->trees[i]->options.extra) { + TSTree *root = pop_result->trees[i]; + size_t leading_extra_count = i; + size_t trailing_extra_count = pop_result->tree_count - 1 - i; + TSTree **new_children = malloc((root->child_count + leading_extra_count + trailing_extra_count) * sizeof(TSTree *)); + memcpy( + new_children, + pop_result->trees, + leading_extra_count * sizeof(TSTree *)); + memcpy( + new_children + leading_extra_count, + root->children, + root->child_count * sizeof(TSTree *)); + memcpy( + new_children + leading_extra_count + root->child_count, + pop_result->trees + leading_extra_count + 1, + trailing_extra_count * sizeof(TSTree *)); + size_t new_count = root->child_count + leading_extra_count + trailing_extra_count; + ts_tree_set_children(root, new_count, new_children); + ts_tree_assign_parents(root); + return root; + } + } - ts_tree_prepend_children(root, extra_count, pop_result->trees); - ts_tree_assign_parents(root); - return root; + return NULL; } /* @@ -384,13 +437,14 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self, int head, * action, it will always appear *last* in the list of actions. Perform it * on the original stack head and return. */ - while (next_action->type != 0) { + while (next_action) { TSParseAction action = *next_action; next_action++; int current_head; if (next_action->type == 0) { current_head = head; + next_action = NULL; } else { current_head = ts_parser__split(self, head); LOG("split_action from_head:%d, new_head:%d", head, current_head); @@ -427,21 +481,25 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self, int head, case TSParseActionTypeReduce: LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.data.symbol), action.data.child_count); - ts_parser__reduce(self, current_head, action.data.symbol, - action.data.child_count, false, false); + if (!ts_parser__reduce(self, current_head, action.data.symbol, + action.data.child_count, false, false)) + if (!next_action) + return ConsumeResultRemoved; break; case TSParseActionTypeReduceExtra: LOG("reduce_extra sym:%s", SYM_NAME(action.data.symbol)); - ts_parser__reduce(self, current_head, action.data.symbol, 1, true, - false); + ts_parser__reduce(self, current_head, action.data.symbol, 1, + true, false); break; case TSParseActionTypeReduceFragile: LOG("reduce_fragile sym:%s, count:%u", SYM_NAME(action.data.symbol), action.data.child_count); - ts_parser__reduce_fragile(self, current_head, action.data.symbol, - action.data.child_count); + if (!ts_parser__reduce_fragile(self, current_head, action.data.symbol, + action.data.child_count)) + if (!next_action) + return ConsumeResultRemoved; break; case TSParseActionTypeAccept: @@ -484,25 +542,25 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) { for (;;) { TSTree *lookahead = NULL; - TSLength position = ts_length_zero(); - TSPoint offset_point = ts_point_zero(); + TSLength last_position = ts_length_zero(); for (int head = 0; head < ts_stack_head_count(self->stack);) { - LookaheadState *state = vector_get(&self->lookahead_states, head); + StackEntry *entry = ts_stack_head(self->stack, head); + TSLength position = entry ? entry->position : ts_length_zero(); + TSPoint position_point = entry ? entry->position_point : ts_point_zero(); LOG("process head:%d, head_count:%d, state:%d, pos:%lu", head, ts_stack_head_count(self->stack), - ts_stack_top_state(self->stack, head), state->position.chars); + ts_stack_top_state(self->stack, head), position.chars); if (!ts_parser__can_reuse(self, head, lookahead) || - !ts_length_eq(state->position, position)) { + !ts_length_eq(position, last_position)) { TSTree *reused_lookahead = ts_parser__get_next_lookahead(self, head); if (ts_parser__can_reuse(self, head, reused_lookahead)) { lookahead = reused_lookahead; } else { - position = state->position; - offset_point = state->offset_point; - ts_lexer_reset(&self->lexer, position, offset_point); + last_position = position; + ts_lexer_reset(&self->lexer, position, position_point); TSStateId parse_state = ts_stack_top_state(self->stack, head); TSStateId lex_state = self->language->lex_states[parse_state]; lookahead = self->language->lex_fn(&self->lexer, lex_state); diff --git a/src/runtime/stack.c b/src/runtime/stack.c index b82ac91b..7530da7c 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -65,6 +65,11 @@ TSStateId ts_stack_top_state(const Stack *self, int head) { return entry ? entry->state : 0; } +TSLength ts_stack_top_position(const Stack *self, int head) { + StackEntry *entry = ts_stack_head((Stack *)self, head); + return entry ? entry->position : ts_length_zero(); +} + TSTree *ts_stack_top_tree(const Stack *self, int head) { StackEntry *entry = ts_stack_head((Stack *)self, head); return entry ? entry->tree : NULL; @@ -120,13 +125,19 @@ static StackNode *stack_node_new(StackNode *next, TSStateId state, TSTree *tree) assert(tree->ref_count > 0); ts_tree_retain(tree); stack_node_retain(next); + TSLength position = ts_tree_total_size(tree); + TSPoint position_point = ts_tree_total_size_point(tree); + if (next) { + position = ts_length_add(next->entry.position, position); + position_point = ts_point_add(next->entry.position_point, position_point); + } *self = (StackNode){ .ref_count = 1, .successor_count = 1, .successors = { next, NULL, NULL }, .entry = { - .state = state, .tree = tree, + .state = state, .tree = tree, .position = position, .position_point = position_point, }, }; return self; @@ -192,10 +203,10 @@ void ts_stack_remove_head(Stack *self, int head_index) { } static bool ts_stack__merge_head(Stack *self, int head_index, TSStateId state, - TSTree *tree) { + TSTree *tree, TSLength position) { for (int i = 0; i < head_index; i++) { StackNode *head = self->heads[i]; - if (head->entry.state == state) { + if (head->entry.state == state && ts_length_eq(head->entry.position, position)) { if (head->entry.tree != tree) { head->entry.tree = self->tree_selection_callback.callback( self->tree_selection_callback.data, head->entry.tree, tree); @@ -215,7 +226,10 @@ static bool ts_stack__merge_head(Stack *self, int head_index, TSStateId state, bool ts_stack_push(Stack *self, int head_index, TSStateId state, TSTree *tree) { assert(head_index < self->head_count); - if (ts_stack__merge_head(self, head_index, state, tree)) + TSLength position = ts_tree_total_size(tree); + if (self->heads[head_index]) + position = ts_length_add(self->heads[head_index]->entry.position, position); + if (ts_stack__merge_head(self, head_index, state, tree, position)) return true; self->heads[head_index] = stack_node_new(self->heads[head_index], state, tree); return false; diff --git a/src/runtime/stack.h b/src/runtime/stack.h index 3b44f351..b2f8969b 100644 --- a/src/runtime/stack.h +++ b/src/runtime/stack.h @@ -13,6 +13,8 @@ typedef struct Stack Stack; typedef struct { TSTree *tree; TSStateId state; + TSLength position; + TSPoint position_point; } StackEntry; typedef struct { @@ -53,6 +55,12 @@ TSStateId ts_stack_top_state(const Stack *, int head); */ TSTree *ts_stack_top_tree(const Stack *, int head); +/* + * Get the position of the given head of the stack. If the stack is empty, this + * returns {0, 0}. + */ +TSLength ts_stack_top_position(const Stack *, int head); + /* * Get the entry at the given head of the stack. */ diff --git a/src/runtime/tree.c b/src/runtime/tree.c index b04fde46..2e2a5d89 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -66,8 +66,7 @@ void ts_tree_assign_parents(TSTree *self) { } } -static void ts_tree__set_children(TSTree *self, TSTree **children, - size_t child_count) { +void ts_tree_set_children(TSTree *self, size_t child_count, TSTree **children) { self->children = children; self->child_count = child_count; self->visible_child_count = 0; @@ -108,7 +107,7 @@ TSTree *ts_tree_make_node(TSSymbol symbol, size_t child_count, TSTree **children, TSSymbolMetadata metadata) { TSTree *result = ts_tree_make_leaf(symbol, ts_length_zero(), ts_length_zero(), ts_point_zero(), ts_point_zero(), metadata); - ts_tree__set_children(result, children, child_count); + ts_tree_set_children(result, child_count, children); return result; } @@ -266,19 +265,6 @@ char *ts_tree_string(const TSTree *self, const char **symbol_names, return result; } -void ts_tree_prepend_children(TSTree *self, size_t count, TSTree **children) { - if (count == 0) - return; - - size_t new_child_count = count + self->child_count; - TSTree **new_children = realloc(children, new_child_count * sizeof(TSTree *)); - memcpy(new_children + count, self->children, - self->child_count * sizeof(TSTree *)); - free(self->children); - - ts_tree__set_children(self, new_children, new_child_count); -} - static inline long min(long a, long b) { return a <= b ? a : b; } diff --git a/src/runtime/tree.h b/src/runtime/tree.h index b55aa445..28ab3157 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -57,7 +57,7 @@ char *ts_tree_string(const TSTree *tree, const char **names, size_t ts_tree_offset_column(const TSTree *self); TSLength ts_tree_total_size(const TSTree *tree); TSPoint ts_tree_total_size_point(const TSTree *self); -void ts_tree_prepend_children(TSTree *, size_t, TSTree **); +void ts_tree_set_children(TSTree *, size_t, TSTree **); void ts_tree_assign_parents(TSTree *); void ts_tree_edit(TSTree *, TSInputEdit);