From 381f89f8ba250eafc8570d5102aa5a424cfa9143 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 28 May 2015 15:06:39 -0700 Subject: [PATCH] Create ambiguity nodes when joining stack heads --- include/tree_sitter/runtime.h | 3 +- spec/fixtures/parsers/arithmetic.c | 3 +- spec/fixtures/parsers/golang.c | 3 +- spec/fixtures/parsers/javascript.c | 3 +- spec/fixtures/parsers/json.c | 3 +- spec/runtime/parse_stack_spec.cc | 131 ++++++++++-------- .../build_tables/build_parse_table.cc | 1 + src/compiler/generate_code/c_code.cc | 6 +- src/compiler/rules/built_in_symbols.cc | 1 + src/compiler/rules/built_in_symbols.h | 1 + src/runtime/parse_stack.c | 34 +++-- src/runtime/tree.c | 12 ++ src/runtime/tree.h | 1 + 13 files changed, 130 insertions(+), 72 deletions(-) diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index 7028d955..951ad608 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -71,7 +71,8 @@ TSNode *ts_document_root_node(const TSDocument *); #define ts_builtin_sym_error 0 #define ts_builtin_sym_end 1 #define ts_builtin_sym_document 2 -#define ts_builtin_sym_start 3 +#define ts_builtin_sym_ambiguity 3 +#define ts_builtin_sym_start 4 #ifdef __cplusplus } diff --git a/spec/fixtures/parsers/arithmetic.c b/spec/fixtures/parsers/arithmetic.c index 3cc03384..e1978745 100644 --- a/spec/fixtures/parsers/arithmetic.c +++ b/spec/fixtures/parsers/arithmetic.c @@ -1,7 +1,7 @@ #include "tree_sitter/parser.h" #define STATE_COUNT 32 -#define SYMBOL_COUNT 20 +#define SYMBOL_COUNT 21 enum { sym_expression = ts_builtin_sym_start, @@ -24,6 +24,7 @@ enum { }; static const char *ts_symbol_names[] = { + [ts_builtin_sym_ambiguity] = "DOCUMENT", [ts_builtin_sym_document] = "DOCUMENT", [sym_expression] = "expression", [sym_sum] = "sum", diff --git a/spec/fixtures/parsers/golang.c b/spec/fixtures/parsers/golang.c index cd95b73e..439d7fe0 100644 --- a/spec/fixtures/parsers/golang.c +++ b/spec/fixtures/parsers/golang.c @@ -1,7 +1,7 @@ #include "tree_sitter/parser.h" #define STATE_COUNT 431 -#define SYMBOL_COUNT 85 +#define SYMBOL_COUNT 86 enum { sym_program = ts_builtin_sym_start, @@ -89,6 +89,7 @@ enum { }; static const char *ts_symbol_names[] = { + [ts_builtin_sym_ambiguity] = "DOCUMENT", [ts_builtin_sym_document] = "DOCUMENT", [sym_program] = "program", [sym_package_directive] = "package_directive", diff --git a/spec/fixtures/parsers/javascript.c b/spec/fixtures/parsers/javascript.c index 3e9bd2cf..2fe8784c 100644 --- a/spec/fixtures/parsers/javascript.c +++ b/spec/fixtures/parsers/javascript.c @@ -1,7 +1,7 @@ #include "tree_sitter/parser.h" #define STATE_COUNT 1564 -#define SYMBOL_COUNT 107 +#define SYMBOL_COUNT 108 enum { sym_program = ts_builtin_sym_start, @@ -111,6 +111,7 @@ enum { }; static const char *ts_symbol_names[] = { + [ts_builtin_sym_ambiguity] = "DOCUMENT", [ts_builtin_sym_document] = "DOCUMENT", [sym_program] = "program", [sym_statement] = "statement", diff --git a/spec/fixtures/parsers/json.c b/spec/fixtures/parsers/json.c index a4431586..b6c80f90 100644 --- a/spec/fixtures/parsers/json.c +++ b/spec/fixtures/parsers/json.c @@ -1,7 +1,7 @@ #include "tree_sitter/parser.h" #define STATE_COUNT 69 -#define SYMBOL_COUNT 19 +#define SYMBOL_COUNT 20 enum { sym_value = ts_builtin_sym_start, @@ -23,6 +23,7 @@ enum { }; static const char *ts_symbol_names[] = { + [ts_builtin_sym_ambiguity] = "DOCUMENT", [ts_builtin_sym_document] = "DOCUMENT", [sym_value] = "value", [sym_object] = "object", diff --git a/spec/runtime/parse_stack_spec.cc b/spec/runtime/parse_stack_spec.cc index e54c00eb..e0c480bf 100644 --- a/spec/runtime/parse_stack_spec.cc +++ b/spec/runtime/parse_stack_spec.cc @@ -3,9 +3,19 @@ #include "runtime/tree.h" #include "runtime/length.h" -enum { stateA, stateB, stateC, stateD, stateE, stateF, stateG, }; -enum { symbol0, symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, }; -const char *symbol_names[] = { "zero", "one", "two", "three", "four", "five", "six", }; +enum { + stateA, stateB, stateC, stateD, stateE, stateF, stateG, stageH +}; + +enum { + symbol0 = ts_builtin_sym_start, + symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7 +}; + +const char *symbol_names[] = { + "ERROR", "END", "DOCUMENT", "AMBIGUITY", + "zero", "one", "two", "three", "four", "five", "six", "seven", +}; START_TEST @@ -18,7 +28,7 @@ describe("ParseStack", [&]() { stack = ts_parse_stack_new(); TSLength len = ts_length_make(2, 2); for (size_t i = 0; i < tree_count; i++) - trees[i] = ts_tree_make_leaf(i, len, len, false); + trees[i] = ts_tree_make_leaf(ts_builtin_sym_start + i, len, len, false); }); after_each([&]() { @@ -103,6 +113,7 @@ describe("ParseStack", [&]() { }); describe("split(head_index)", [&]() { + int new_index; bool merged; before_each([&]() { @@ -111,18 +122,18 @@ describe("ParseStack", [&]() { ts_parse_stack_shift(stack, 0, stateB, trees[1]); ts_parse_stack_shift(stack, 0, stateC, trees[2]); - int new_index = ts_parse_stack_split(stack, 0); + new_index = ts_parse_stack_split(stack, 0); AssertThat(new_index, Equals(1)); AssertThat(ts_parse_stack_head_count(stack), Equals(2)); - }); - it("creates a new head pointing to the same node as the given head", [&]() { // A0__B1__C2__D3 - // \__E4__F3 + // \______E4__F3 ts_parse_stack_shift(stack, 0, stateD, trees[3]); ts_parse_stack_reduce(stack, 1, stateE, symbol4, 2); merged = ts_parse_stack_shift(stack, 1, stateF, trees[3]); + }); + it("creates a new head pointing to the same node as the given head", [&]() { AssertThat(merged, IsFalse()); AssertThat(ts_parse_stack_head_count(stack), Equals(2)); @@ -137,65 +148,69 @@ describe("ParseStack", [&]() { AssertThat(head2->successor_count, Equals(1)); }); - it("re-joins the heads when the same state and tree are shifted onto both heads", [&]() { - // A0__B1__C2__D3 - // \____E4____/ - ts_parse_stack_shift(stack, 0, stateD, trees[3]); - ts_parse_stack_reduce(stack, 1, stateE, symbol4, 2); - TSTree *tree4 = ts_parse_stack_head(stack, 1)->tree; - merged = ts_parse_stack_shift(stack, 1, stateD, trees[3]); + describe("when the same state and tree are shifted onto both heads", [&]() { + before_each([&]() { + // A0__B1__C2__D3__G5 + // \______E4__F3__/ + merged = ts_parse_stack_shift(stack, 0, stateG, trees[5]); + AssertThat(merged, IsFalse()); + merged = ts_parse_stack_shift(stack, 1, stateG, trees[5]); + AssertThat(merged, IsTrue()); + }); - AssertThat(merged, IsTrue()); - AssertThat(ts_parse_stack_head_count(stack), Equals(1)); + it("re-joins the heads", [&]() { + AssertThat(ts_parse_stack_head_count(stack), Equals(1)); - ParseStackNode *head = ts_parse_stack_head(stack, 0); - AssertThat(head->state, Equals(stateD)); - AssertThat(head->tree, Equals(trees[3])); - AssertThat(head->successor_count, Equals(2)); + ParseStackNode *head = ts_parse_stack_head(stack, 0); + AssertThat(head->state, Equals(stateG)); + AssertThat(head->tree, Equals(trees[5])); + AssertThat(head->successor_count, Equals(2)); - ParseStackNode *successor1 = head->successors[0]; - AssertThat(successor1->state, Equals(stateC)); - AssertThat(successor1->tree, Equals(trees[2])) - AssertThat(successor1->successor_count, Equals(1)); + ParseStackNode *successor1 = head->successors[0]; + AssertThat(successor1->state, Equals(stateD)); + AssertThat(successor1->tree, Equals(trees[3])) + AssertThat(successor1->successor_count, Equals(1)); - ParseStackNode *successor2 = head->successors[1]; - AssertThat(successor2->state, Equals(stateE)); - AssertThat(successor2->tree, Equals(tree4)) - AssertThat(successor2->successor_count, Equals(1)); + ParseStackNode *successor2 = head->successors[1]; + AssertThat(successor2->state, Equals(stateF)); + AssertThat(successor2->tree, Equals(trees[3])) + AssertThat(successor2->successor_count, Equals(1)); + }); }); - it("re-joins the heads when the same state and tree are reduced onto both heads", [&]() { - // A0__B1__C2__F4 - // \__D3__E4 - ts_parse_stack_reduce(stack, 0, stateD, symbol3, 1); - ts_parse_stack_shift(stack, 0, stateE, trees[4]); - ts_parse_stack_shift(stack, 1, stateF, trees[4]); + describe("when both heads are reduced into the same state with the same symbol and yield", [&]() { + before_each([&]() { + // A0__G5 + merged = ts_parse_stack_reduce(stack, 0, stateG, symbol5, 3); + AssertThat(merged, IsFalse()); + merged = ts_parse_stack_reduce(stack, 1, stateG, symbol5, 2); + AssertThat(merged, IsTrue()); + }); - AssertThat(ts_parse_stack_head_count(stack), Equals(2)); - ParseStackNode *head1 = ts_parse_stack_head(stack, 0); - AssertThat(head1->state, Equals(stateE)); - AssertThat(head1->tree, Equals(trees[4])); - AssertThat(head1->successor_count, Equals(1)); + it("re-joins the heads, creating an 'ambiguity' node", [&]() { + AssertThat(ts_parse_stack_head_count(stack), Equals(1)); - ParseStackNode *head2 = ts_parse_stack_head(stack, 1); - AssertThat(head2->state, Equals(stateF)); - AssertThat(head2->tree, Equals(trees[4])); - AssertThat(head2->successor_count, Equals(1)); + ParseStackNode *head = ts_parse_stack_head(stack, 0); + AssertThat(head->state, Equals(stateG)); - // A0__B1__C2__G5 - // \__D3__/ - merged = ts_parse_stack_reduce(stack, 0, stateG, symbol5, 1); - AssertThat(merged, IsFalse()); - merged = ts_parse_stack_reduce(stack, 1, stateG, symbol5, 1); - AssertThat(merged, IsTrue()); - - AssertThat(ts_parse_stack_head_count(stack), Equals(1)); - ParseStackNode *head = ts_parse_stack_head(stack, 0); - AssertThat(head->state, Equals(stateG)); - AssertThat(head->tree, Fulfills(EqualsTree( - ts_tree_make_node(symbol5, 1, tree_array({ trees[4] }), false), - symbol_names))); - AssertThat(head->successor_count, Equals(2)); + AssertThat(head->tree, Fulfills(EqualsTree( + ts_tree_make_ambiguity(2, tree_array({ + ts_tree_make_node(symbol5, 3, tree_array({ + trees[1], + trees[2], + trees[3], + }), false), + ts_tree_make_node(symbol5, 2, tree_array({ + ts_tree_make_node(symbol4, 2, tree_array({ + trees[1], + trees[2], + }), false), + trees[3] + }), false) + })), + symbol_names))); + AssertThat(head->successor_count, Equals(1)); + }); }); }); }); diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index 697d2569..a76b11f9 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -69,6 +69,7 @@ class ParseTableBuilder { parse_table.symbols.insert(rules::ERROR()); parse_table.symbols.insert(rules::DOCUMENT()); + parse_table.symbols.insert(rules::AMBIGUITY()); return { parse_table, nullptr }; } diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index 8b5c81be..eb4744c7 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -315,8 +315,12 @@ class CCodeGenerator { return "ts_builtin_sym_error"; else if (symbol == rules::END_OF_INPUT()) return "ts_builtin_sym_end"; - else + else if (symbol == rules::DOCUMENT()) return "ts_builtin_sym_document"; + else if (symbol == rules::AMBIGUITY()) + return "ts_builtin_sym_ambiguity"; + else + return ""; } else { string name = sanitize_name(rule_name(symbol)); if (symbol.is_auxiliary()) diff --git a/src/compiler/rules/built_in_symbols.cc b/src/compiler/rules/built_in_symbols.cc index 7a648a3d..e53d13c6 100644 --- a/src/compiler/rules/built_in_symbols.cc +++ b/src/compiler/rules/built_in_symbols.cc @@ -7,6 +7,7 @@ Symbol END_OF_INPUT() { return Symbol(-1, SymbolOptionToken); } Symbol ERROR() { return Symbol(-2, SymbolOptionToken); } Symbol START() { return Symbol(-3); } Symbol DOCUMENT() { return Symbol(-4); } +Symbol AMBIGUITY() { return Symbol(-5); } } // namespace rules } // namespace tree_sitter diff --git a/src/compiler/rules/built_in_symbols.h b/src/compiler/rules/built_in_symbols.h index 63ad3df4..e0784b93 100644 --- a/src/compiler/rules/built_in_symbols.h +++ b/src/compiler/rules/built_in_symbols.h @@ -10,6 +10,7 @@ Symbol ERROR(); Symbol START(); Symbol END_OF_INPUT(); Symbol DOCUMENT(); +Symbol AMBIGUITY(); } // namespace rules } // namespace tree_sitter diff --git a/src/runtime/parse_stack.c b/src/runtime/parse_stack.c index 29d3c04b..ff40136b 100644 --- a/src/runtime/parse_stack.c +++ b/src/runtime/parse_stack.c @@ -1,6 +1,7 @@ #include "tree_sitter/parser.h" #include "runtime/tree.h" #include "runtime/parse_stack.h" +#include "runtime/length.h" #include static const size_t INITIAL_HEAD_CAPACITY = 3; @@ -90,13 +91,14 @@ bool ts_parse_stack_reduce(ParseStack *this, int head_index, TSStateId state, } TSTree *parent = ts_tree_make_node(symbol, child_count, children, false); - if (parse_stack_merge_head(this, head_index, state, parent)) { - ts_tree_release(parent); - return true; - } stack_node_retain(next_node); stack_node_release(this->heads[head_index]); + this->heads[head_index] = next_node; + + if (parse_stack_merge_head(this, head_index, state, parent)) + return true; + this->heads[head_index] = stack_node_new(next_node, state, parent); return false; } @@ -152,6 +154,9 @@ static bool stack_node_release(ParseStackNode *this) { static void stack_node_add_successor(ParseStackNode *this, ParseStackNode *successor) { stack_node_retain(successor); + for (int i = 0; i < this->successor_count; i++) + if (this->successors[i] == successor) + return; this->successors[this->successor_count] = successor; this->successor_count++; } @@ -159,10 +164,23 @@ static void stack_node_add_successor(ParseStackNode *this, ParseStackNode *succe static bool parse_stack_merge_head(ParseStack *this, int head_index, TSStateId state, TSTree *tree) { for (int i = 0; i < head_index; i++) { ParseStackNode *head = this->heads[i]; - if (head->state == state && ts_tree_eq(head->tree, tree)) { - stack_node_add_successor(head, this->heads[head_index]); - parse_stack_remove_head(this, head_index); - return true; + if (head->state == state) { + if (head->tree == tree) { + stack_node_add_successor(head, this->heads[head_index]); + parse_stack_remove_head(this, head_index); + return true; + } + + if (head->tree->symbol == tree->symbol && + ts_length_eq(head->tree->size, tree->size)) { + TSTree **options = malloc(2 * sizeof(TSTree *)); + options[0] = head->tree; + options[1] = tree; + head->tree = ts_tree_make_ambiguity(2, options); + stack_node_add_successor(head, this->heads[head_index]); + parse_stack_remove_head(this, head_index); + return true; + } } } return false; diff --git a/src/runtime/tree.c b/src/runtime/tree.c index 49f3e676..d8736f88 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -120,6 +120,18 @@ TSTree *ts_tree_make_node(TSSymbol symbol, size_t child_count, return result; } +TSTree *ts_tree_make_ambiguity(size_t alternative_count, TSTree **alternatives) { + TSTree *result = malloc(sizeof(TSTree)); + *result = (TSTree) { .ref_count = 1, + .symbol = ts_builtin_sym_ambiguity, + .size = alternatives[0]->size, + .padding = alternatives[0]->padding, + .child_count = alternative_count, + .children = alternatives, + .options = 0 }; + return result; +} + void ts_tree_retain(TSTree *tree) { tree->ref_count++; } void ts_tree_release(TSTree *tree) { diff --git a/src/runtime/tree.h b/src/runtime/tree.h index 9b39e710..a63ef4cc 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -76,6 +76,7 @@ static inline bool ts_tree_is_fragile_right(TSTree *tree) { TSTree *ts_tree_make_leaf(TSSymbol, TSLength, TSLength, bool); TSTree *ts_tree_make_node(TSSymbol, size_t, TSTree **, bool); TSTree *ts_tree_make_error(TSLength size, TSLength padding, char lookahead_char); +TSTree *ts_tree_make_ambiguity(size_t, TSTree **); void ts_tree_retain(TSTree *tree); void ts_tree_release(TSTree *tree); bool ts_tree_eq(const TSTree *tree1, const TSTree *tree2);