diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index 951ad608..7028d955 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -71,8 +71,7 @@ TSNode *ts_document_root_node(const TSDocument *); #define ts_builtin_sym_error 0 #define ts_builtin_sym_end 1 #define ts_builtin_sym_document 2 -#define ts_builtin_sym_ambiguity 3 -#define ts_builtin_sym_start 4 +#define ts_builtin_sym_start 3 #ifdef __cplusplus } diff --git a/spec/fixtures/parsers/arithmetic.c b/spec/fixtures/parsers/arithmetic.c index 08571017..3cc03384 100644 --- a/spec/fixtures/parsers/arithmetic.c +++ b/spec/fixtures/parsers/arithmetic.c @@ -1,7 +1,7 @@ #include "tree_sitter/parser.h" #define STATE_COUNT 32 -#define SYMBOL_COUNT 21 +#define SYMBOL_COUNT 20 enum { sym_expression = ts_builtin_sym_start, @@ -24,7 +24,6 @@ enum { }; static const char *ts_symbol_names[] = { - [ts_builtin_sym_ambiguity] = "AMBIGUITY", [ts_builtin_sym_document] = "DOCUMENT", [sym_expression] = "expression", [sym_sum] = "sum", diff --git a/spec/fixtures/parsers/c.c b/spec/fixtures/parsers/c.c index 1ebc68f5..42fe4d06 100644 --- a/spec/fixtures/parsers/c.c +++ b/spec/fixtures/parsers/c.c @@ -1,7 +1,7 @@ #include "tree_sitter/parser.h" #define STATE_COUNT 267 -#define SYMBOL_COUNT 65 +#define SYMBOL_COUNT 64 enum { sym_program = ts_builtin_sym_start, @@ -68,7 +68,6 @@ enum { }; static const char *ts_symbol_names[] = { - [ts_builtin_sym_ambiguity] = "AMBIGUITY", [ts_builtin_sym_document] = "DOCUMENT", [sym_program] = "program", [sym_function_definition] = "function_definition", diff --git a/spec/fixtures/parsers/golang.c b/spec/fixtures/parsers/golang.c index ede31e6d..cd95b73e 100644 --- a/spec/fixtures/parsers/golang.c +++ b/spec/fixtures/parsers/golang.c @@ -1,7 +1,7 @@ #include "tree_sitter/parser.h" #define STATE_COUNT 431 -#define SYMBOL_COUNT 86 +#define SYMBOL_COUNT 85 enum { sym_program = ts_builtin_sym_start, @@ -89,7 +89,6 @@ enum { }; static const char *ts_symbol_names[] = { - [ts_builtin_sym_ambiguity] = "AMBIGUITY", [ts_builtin_sym_document] = "DOCUMENT", [sym_program] = "program", [sym_package_directive] = "package_directive", diff --git a/spec/fixtures/parsers/javascript.c b/spec/fixtures/parsers/javascript.c index d4ee0e0b..3e9bd2cf 100644 --- a/spec/fixtures/parsers/javascript.c +++ b/spec/fixtures/parsers/javascript.c @@ -1,7 +1,7 @@ #include "tree_sitter/parser.h" #define STATE_COUNT 1564 -#define SYMBOL_COUNT 108 +#define SYMBOL_COUNT 107 enum { sym_program = ts_builtin_sym_start, @@ -111,7 +111,6 @@ enum { }; static const char *ts_symbol_names[] = { - [ts_builtin_sym_ambiguity] = "AMBIGUITY", [ts_builtin_sym_document] = "DOCUMENT", [sym_program] = "program", [sym_statement] = "statement", diff --git a/spec/fixtures/parsers/json.c b/spec/fixtures/parsers/json.c index bb53efd5..a4431586 100644 --- a/spec/fixtures/parsers/json.c +++ b/spec/fixtures/parsers/json.c @@ -1,7 +1,7 @@ #include "tree_sitter/parser.h" #define STATE_COUNT 69 -#define SYMBOL_COUNT 20 +#define SYMBOL_COUNT 19 enum { sym_value = ts_builtin_sym_start, @@ -23,7 +23,6 @@ enum { }; static const char *ts_symbol_names[] = { - [ts_builtin_sym_ambiguity] = "AMBIGUITY", [ts_builtin_sym_document] = "DOCUMENT", [sym_value] = "value", [sym_object] = "object", diff --git a/spec/runtime/languages/c/declarations.txt b/spec/runtime/languages/c/declarations.txt index fa4ea1b3..2938cdbe 100644 --- a/spec/runtime/languages/c/declarations.txt +++ b/spec/runtime/languages/c/declarations.txt @@ -26,16 +26,16 @@ ambiguous declarations ========================================== int main() { + int i; someTypeOrValue * pointerOrMultiplicand(); + float y; } --- (function_definition - (identifier) - (direct_declarator (identifier)) - (AMBIGUITY - (ALTERNATIVE (compound_statement - (declaration (identifier) (declarator (star) (direct_declarator (identifier)))))) - (ALTERNATIVE (compound_statement - (expression_statement (math_expression (identifier) (star) (call_expression (identifier)))))))) + (identifier) (direct_declarator (identifier)) + (compound_statement + (declaration (identifier) (identifier)) + (declaration (identifier) (declarator (star) (direct_declarator (identifier)))) + (declaration (identifier) (identifier)))) diff --git a/spec/runtime/parse_stack_spec.cc b/spec/runtime/parse_stack_spec.cc index 2f1a39dd..f6810718 100644 --- a/spec/runtime/parse_stack_spec.cc +++ b/spec/runtime/parse_stack_spec.cc @@ -12,15 +12,35 @@ enum { symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7 }; +struct TreeSelectionSpy { + int call_count; + TSTree *tree_to_return; + const TSTree *arguments[2]; +}; + +extern "C" +TSTree * tree_selection_spy_callback(void *data, TSTree *left, TSTree *right) { + TreeSelectionSpy *spy = (TreeSelectionSpy *)data; + spy->call_count++; + spy->arguments[0] = left; + spy->arguments[1] = right; + return spy->tree_to_return; +} + START_TEST describe("ParseStack", [&]() { ParseStack *stack; const size_t tree_count = 8; TSTree *trees[tree_count]; + TreeSelectionSpy tree_selection_spy{0, NULL, {NULL, NULL}}; before_each([&]() { - stack = ts_parse_stack_new(); + stack = ts_parse_stack_new({ + &tree_selection_spy, + tree_selection_spy_callback + }); + TSLength len = ts_length_make(2, 2); for (size_t i = 0; i < tree_count; i++) trees[i] = ts_tree_make_leaf(ts_builtin_sym_start + i, len, len, false); @@ -199,7 +219,12 @@ describe("ParseStack", [&]() { }); describe("when the trees are different", [&]() { - it("merges the heads by creating an ambiguity node", [&]() { + before_each([&]() { + tree_selection_spy.tree_to_return = trees[7]; + AssertThat(tree_selection_spy.call_count, Equals(0)); + }); + + it("merges the heads, selecting the tree with the tree selection callback", [&]() { /* * A0__B1__C2__D3__G(6|7) * \__E4__F5____/ @@ -210,12 +235,46 @@ describe("ParseStack", [&]() { AssertThat(merged, IsTrue()); AssertThat(ts_parse_stack_head_count(stack), Equals(1)); + AssertThat(tree_selection_spy.call_count, Equals(1)); + AssertThat(tree_selection_spy.arguments[0], Equals(trees[6])); + AssertThat(tree_selection_spy.arguments[1], Equals(trees[7])); AssertThat(*ts_parse_stack_head(stack, 0), Equals({ - ts_tree_make_ambiguity(trees[6], trees[7]), + trees[7], stateG })); }); }); + + describe("when successor nodes of the merged nodes have the same state", [&]() { + it("recursively merges those successor nodes", [&]() { + /* + * A0__B1__C2__D3__G6__H7. + * \__E4__F5__G6. + */ + bool merged = ts_parse_stack_push(stack, 0, stateG, trees[6]); + AssertThat(merged, IsFalse()); + merged = ts_parse_stack_push(stack, 0, stateH, trees[7]); + AssertThat(merged, IsFalse()); + merged = ts_parse_stack_push(stack, 1, stateG, trees[6]); + AssertThat(merged, IsFalse()); + + /* + * A0__B1__C2__D3__G6__H7. + * \__E4__F5_/ + */ + merged = ts_parse_stack_push(stack, 1, stateH, trees[7]); + AssertThat(merged, IsTrue()); + + AssertThat(ts_parse_stack_head_count(stack), Equals(1)); + ParseStackEntry *head = ts_parse_stack_head(stack, 0); + AssertThat(*head, Equals({trees[7], stateH})) + AssertThat(ts_parse_stack_entry_next_count(head), Equals(1)); + + ParseStackEntry *next = ts_parse_stack_entry_next(head, 0); + AssertThat(*next, Equals({trees[6], stateG})) + AssertThat(ts_parse_stack_entry_next_count(next), Equals(2)); + }); + }); }); describe("popping from a stack head that has been merged", [&]() { diff --git a/spec/runtime/tree_spec.cc b/spec/runtime/tree_spec.cc index 70038cd3..74ee4564 100644 --- a/spec/runtime/tree_spec.cc +++ b/spec/runtime/tree_spec.cc @@ -11,7 +11,6 @@ enum { }; static const char *names[] = { - "AMBIGUITY", "DOCUMENT", "ERROR", "END", diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index 6becbd04..935021bf 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -73,7 +73,6 @@ class ParseTableBuilder { parse_table.symbols.insert(rules::ERROR()); parse_table.symbols.insert(rules::DOCUMENT()); - parse_table.symbols.insert(rules::AMBIGUITY()); return { parse_table, nullptr }; } diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index b4a066e4..3da035cc 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -323,8 +323,6 @@ class CCodeGenerator { return "ts_builtin_sym_end"; else if (symbol == rules::DOCUMENT()) return "ts_builtin_sym_document"; - else if (symbol == rules::AMBIGUITY()) - return "ts_builtin_sym_ambiguity"; else return ""; } else { @@ -344,8 +342,6 @@ class CCodeGenerator { return "END"; else if (symbol == rules::DOCUMENT()) return "DOCUMENT"; - else if (symbol == rules::AMBIGUITY()) - return "AMBIGUITY"; else return ""; } else { diff --git a/src/compiler/rules/built_in_symbols.cc b/src/compiler/rules/built_in_symbols.cc index e53d13c6..7a648a3d 100644 --- a/src/compiler/rules/built_in_symbols.cc +++ b/src/compiler/rules/built_in_symbols.cc @@ -7,7 +7,6 @@ Symbol END_OF_INPUT() { return Symbol(-1, SymbolOptionToken); } Symbol ERROR() { return Symbol(-2, SymbolOptionToken); } Symbol START() { return Symbol(-3); } Symbol DOCUMENT() { return Symbol(-4); } -Symbol AMBIGUITY() { return Symbol(-5); } } // namespace rules } // namespace tree_sitter diff --git a/src/compiler/rules/built_in_symbols.h b/src/compiler/rules/built_in_symbols.h index e0784b93..63ad3df4 100644 --- a/src/compiler/rules/built_in_symbols.h +++ b/src/compiler/rules/built_in_symbols.h @@ -10,7 +10,6 @@ Symbol ERROR(); Symbol START(); Symbol END_OF_INPUT(); Symbol DOCUMENT(); -Symbol AMBIGUITY(); } // namespace rules } // namespace tree_sitter diff --git a/src/runtime/parse_stack.c b/src/runtime/parse_stack.c index e28f5c09..0ef85040 100644 --- a/src/runtime/parse_stack.c +++ b/src/runtime/parse_stack.c @@ -21,18 +21,20 @@ struct ParseStack { int head_count; int head_capacity; ParseStackPopResult last_pop_results[MAX_POP_PATH_COUNT]; + TreeSelectionCallback tree_selection_callback; }; /* * Section: Stack lifecycle */ -ParseStack *ts_parse_stack_new() { +ParseStack *ts_parse_stack_new(TreeSelectionCallback tree_selection_callback) { ParseStack *this = malloc(sizeof(ParseStack)); *this = (ParseStack) { .heads = calloc(INITIAL_HEAD_CAPACITY, sizeof(ParseStackNode *)), .head_count = 1, .head_capacity = INITIAL_HEAD_CAPACITY, + .tree_selection_callback = tree_selection_callback, }; return this; } @@ -115,13 +117,27 @@ static ParseStackNode *stack_node_new(ParseStackNode *next, TSStateId state, TST return this; } -static void stack_node_add_successor(ParseStackNode *this, ParseStackNode *successor) { - for (int i = 0; i < this->successor_count; i++) - if (this->successors[i] == successor) +static void ts_parse_stack_add_node_successor(ParseStack *this, ParseStackNode *node, ParseStackNode *new_successor) { + for (int i = 0; i < node->successor_count; i++) { + ParseStackNode *successor = node->successors[i]; + if (successor == new_successor) return; - stack_node_retain(successor); - this->successors[this->successor_count] = successor; - this->successor_count++; + if (successor->entry.state == new_successor->entry.state) { + if (successor->entry.tree != new_successor->entry.tree) + successor->entry.tree = this->tree_selection_callback.callback( + this->tree_selection_callback.data, + successor->entry.tree, + new_successor->entry.tree + ); + for (int j = 0; j < new_successor->successor_count; j++) + ts_parse_stack_add_node_successor(this, successor, new_successor->successors[j]); + return; + } + } + + stack_node_retain(new_successor); + node->successors[node->successor_count] = new_successor; + node->successor_count++; } /* @@ -159,16 +175,16 @@ static bool ts_parse_stack_merge_head(ParseStack *this, int head_index, TSStateI for (int i = 0; i < head_index; i++) { ParseStackNode *head = this->heads[i]; if (head->entry.state == state) { - if (head->entry.tree == tree) { - stack_node_add_successor(head, this->heads[head_index]); - ts_parse_stack_remove_head(this, head_index); - return true; - } else { - head->entry.tree = ts_tree_add_alternative(head->entry.tree, tree); - stack_node_add_successor(head, this->heads[head_index]); - ts_parse_stack_remove_head(this, head_index); - return true; + if (head->entry.tree != tree) { + head->entry.tree = this->tree_selection_callback.callback( + this->tree_selection_callback.data, + head->entry.tree, + tree + ); } + ts_parse_stack_add_node_successor(this, head, this->heads[head_index]); + ts_parse_stack_remove_head(this, head_index); + return true; } } return false; @@ -189,7 +205,11 @@ bool ts_parse_stack_push(ParseStack *this, int head_index, TSStateId state, TSTr void ts_parse_stack_add_alternative(ParseStack *this, int head_index, TSTree *tree) { assert(head_index < this->head_count); ParseStackEntry *entry = &this->heads[head_index]->entry; - entry->tree = ts_tree_add_alternative(entry->tree, tree); + entry->tree = this->tree_selection_callback.callback( + this->tree_selection_callback.data, + entry->tree, + tree + ); } int ts_parse_stack_split(ParseStack *this, int head_index) { diff --git a/src/runtime/parse_stack.h b/src/runtime/parse_stack.h index c536f17d..ddbe10d3 100644 --- a/src/runtime/parse_stack.h +++ b/src/runtime/parse_stack.h @@ -25,10 +25,15 @@ typedef struct { ParseStackPopResult *contents; } ParseStackPopResultList; +typedef struct { + void *data; + TSTree * (*callback)(void *data, TSTree *, TSTree *); +} TreeSelectionCallback; + /* * Create a ParseStack. */ -ParseStack *ts_parse_stack_new(); +ParseStack *ts_parse_stack_new(TreeSelectionCallback); /* * Release any resources reserved by a parse stack. diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 8005c246..0ff5539b 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -303,9 +303,13 @@ static TSTree *finish(TSParser *parser) { * Public */ +TSTree * ts_parser_select_tree(void *data, TSTree *left, TSTree *right) { + return left; +} + TSParser ts_parser_make() { return (TSParser) { .lexer = ts_lexer_make(), - .stack = ts_parse_stack_new(), + .stack = ts_parse_stack_new((TreeSelectionCallback) {NULL, ts_parser_select_tree}), .right_stack = ts_stack_make() }; } diff --git a/src/runtime/tree.c b/src/runtime/tree.c index 0975a965..9ca837a9 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -121,37 +121,6 @@ TSTree *ts_tree_make_node(TSSymbol symbol, size_t child_count, return result; } -TSTree *ts_tree_make_ambiguity(TSTree *left, TSTree *right) { - TSTree *result = malloc(sizeof(TSTree)); - TSTree **alternatives = malloc(2 * sizeof(TSTree *)); - alternatives[0] = left; - alternatives[1] = right; - ts_tree_retain(left); - ts_tree_retain(right); - *result = (TSTree) { .ref_count = 1, - .symbol = ts_builtin_sym_ambiguity, - .size = alternatives[0]->size, - .padding = alternatives[0]->padding, - .child_count = 2, - .children = alternatives, - .options = 0 }; - return result; -} - -TSTree *ts_tree_add_alternative(TSTree *left, TSTree *right) { - if (left->symbol == ts_builtin_sym_ambiguity) { - size_t index = left->child_count++; - left->children = realloc(left->children, left->child_count * sizeof(TSTree *)); - left->children[index] = right; - ts_tree_retain(right); - return left; - } else { - TSTree *result = ts_tree_make_ambiguity(left, right); - ts_tree_release(left); - return result; - } -} - void ts_tree_retain(TSTree *tree) { assert(tree->ref_count > 0); tree->ref_count++; @@ -253,12 +222,8 @@ static size_t tree_write_to_string(const TSTree *tree, const char **symbol_names } for (size_t i = 0; i < tree->child_count; i++) { - if (tree->symbol == ts_builtin_sym_ambiguity) - cursor += snprintf(*writer, limit, " (ALTERNATIVE"); TSTree *child = tree->children[i]; cursor += tree_write_to_string(child, symbol_names, *writer, limit, 0); - if (tree->symbol == ts_builtin_sym_ambiguity) - cursor += snprintf(*writer, limit, ")"); } if (visible) diff --git a/src/runtime/tree.h b/src/runtime/tree.h index 8ace958e..9b39e710 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -76,7 +76,6 @@ static inline bool ts_tree_is_fragile_right(TSTree *tree) { TSTree *ts_tree_make_leaf(TSSymbol, TSLength, TSLength, bool); TSTree *ts_tree_make_node(TSSymbol, size_t, TSTree **, bool); TSTree *ts_tree_make_error(TSLength size, TSLength padding, char lookahead_char); -TSTree *ts_tree_make_ambiguity(TSTree *, TSTree *); void ts_tree_retain(TSTree *tree); void ts_tree_release(TSTree *tree); bool ts_tree_eq(const TSTree *tree1, const TSTree *tree2); @@ -85,7 +84,6 @@ char *ts_tree_error_string(const TSTree *tree, const char **names); TSTree **ts_tree_children(const TSTree *tree, size_t *count); TSTreeChild *ts_tree_visible_children(const TSTree *tree, size_t *count); TSLength ts_tree_total_size(const TSTree *tree); -TSTree *ts_tree_add_alternative(TSTree *tree, TSTree *alternative); static inline bool ts_tree_is_empty(TSTree *tree) { return ts_tree_total_size(tree).bytes == 0;