diff --git a/project.gyp b/project.gyp index 5c0b0c98..ed97e4c5 100644 --- a/project.gyp +++ b/project.gyp @@ -105,6 +105,7 @@ 'src/runtime/document.c', 'src/runtime/lexer.c', 'src/runtime/node.c', + 'src/runtime/parse_stack.c', 'src/runtime/parser.c', 'src/runtime/stack.c', 'src/runtime/string_input.c', diff --git a/spec/runtime/parse_stack_spec.cc b/spec/runtime/parse_stack_spec.cc new file mode 100644 index 00000000..834a6030 --- /dev/null +++ b/spec/runtime/parse_stack_spec.cc @@ -0,0 +1,211 @@ +#include "runtime/runtime_spec_helper.h" +#include "runtime/parse_stack.h" +#include "runtime/tree.h" +#include "runtime/length.h" + +enum { stateA, stateB, stateC, stateD, stateE, stateF, stateG, }; +enum { symbol0, symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, }; +const char *names[] = { "zero", "one", "two", "three", "four", "five", "six", }; + +START_TEST + +describe("ParseStack", [&]() { + ParseStack *stack; + const size_t tree_count = 6; + TSTree *trees[tree_count]; + + before_each([&]() { + stack = ts_parse_stack_new(); + TSLength len = ts_length_make(2, 2); + for (size_t i = 0; i < tree_count; i++) + trees[i] = ts_tree_make_leaf(i, len, len, false); + }); + + after_each([&]() { + ts_parse_stack_delete(stack); + for (size_t i = 0; i < tree_count; i++) + ts_tree_release(trees[i]); + }); + + it("starts with a single null head", [&]() { + AssertThat(ts_parse_stack_head_count(stack), Equals(1)); + AssertThat(ts_parse_stack_head(stack, 0), Equals(NULL)); + }); + + describe("shift(head_index, state, tree)", [&]() { + it("pushes a node with the given state and tree onto the given head", [&]() { + ts_parse_stack_shift(stack, 0, 100, trees[0]); + ts_parse_stack_shift(stack, 0, 101, trees[1]); + ts_parse_stack_shift(stack, 0, 102, trees[2]); + + ParseStackNode *head = ts_parse_stack_head(stack, 0); + AssertThat(head->state, Equals(102)); + AssertThat(head->tree, Equals(trees[2])); + AssertThat(head->successor_count, Equals(1)); + + head = head->successors[0]; + AssertThat(head->state, Equals(101)); + AssertThat(head->tree, Equals(trees[1])); + AssertThat(head->successor_count, Equals(1)); + + head = head->successors[0]; + AssertThat(head->state, Equals(100)); + AssertThat(head->tree, Equals(trees[0])); + AssertThat(head->successor_count, Equals(1)); + + head = head->successors[0]; + AssertThat(head, Equals(NULL)); + }); + }); + + describe("reduce(head_index, state, symbol, child_count)", [&]() { + before_each([&]() { + ts_parse_stack_shift(stack, 0, 100, trees[0]); + ts_parse_stack_shift(stack, 0, 101, trees[1]); + ts_parse_stack_shift(stack, 0, 102, trees[2]); + }); + + it("replaces the given number of nodes with a single parent node", [&]() { + ts_parse_stack_reduce(stack, 0, 103, symbol4, 2); + + ParseStackNode *head = ts_parse_stack_head(stack, 0); + AssertThat(head->state, Equals(103)); + AssertThat( + ts_tree_eq( + head->tree, + ts_tree_make_node(symbol4, 2, tree_array({ trees[1], trees[2] }), false) + ), + IsTrue()); + AssertThat(head->successor_count, Equals(1)); + + head = head->successors[0]; + AssertThat(head->state, Equals(100)); + AssertThat(head->tree, Equals(trees[0])); + AssertThat(head->successor_count, Equals(1)); + }); + + describe("when one of the reduced children is an 'extra' tree", [&]() { + before_each([&]() { + ts_tree_set_extra(trees[1]); + }); + + it("does not count that child toward the number of children to replace", [&]() { + ts_parse_stack_reduce(stack, 0, 103, symbol4, 2); + + ParseStackNode *head = ts_parse_stack_head(stack, 0); + AssertThat(head->state, Equals(103)); + AssertThat( + ts_tree_eq( + head->tree, + ts_tree_make_node(symbol4, 3, tree_array({ trees[0], trees[1], trees[2] }), false) + ), + IsTrue()); + AssertThat(head->successor_count, Equals(1)); + + head = head->successors[0]; + AssertThat(head, Equals(NULL)); + }); + }); + }); + + describe("split(head_index)", [&]() { + bool merged; + + before_each([&]() { + // A0__B1__C2 + ts_parse_stack_shift(stack, 0, stateA, trees[0]); + ts_parse_stack_shift(stack, 0, stateB, trees[1]); + ts_parse_stack_shift(stack, 0, stateC, trees[2]); + + int new_index = ts_parse_stack_split(stack, 0); + AssertThat(new_index, Equals(1)); + AssertThat(ts_parse_stack_head_count(stack), Equals(2)); + }); + + it("creates a new head pointing to the same node as the given head", [&]() { + // A0__B1__C2__D3 + // \__E4__F3 + ts_parse_stack_shift(stack, 0, stateD, trees[3]); + ts_parse_stack_reduce(stack, 1, stateE, symbol4, 2); + merged = ts_parse_stack_shift(stack, 1, stateF, trees[3]); + + AssertThat(merged, IsFalse()); + AssertThat(ts_parse_stack_head_count(stack), Equals(2)); + + ParseStackNode *head1 = ts_parse_stack_head(stack, 0); + AssertThat(head1->state, Equals(stateD)); + AssertThat(head1->tree, Equals(trees[3])); + AssertThat(head1->successor_count, Equals(1)); + + ParseStackNode *head2 = ts_parse_stack_head(stack, 1); + AssertThat(head2->state, Equals(stateF)); + AssertThat(head2->tree, Equals(trees[3])); + AssertThat(head2->successor_count, Equals(1)); + }); + + it("re-joins the heads when the same state and tree are shifted onto both heads", [&]() { + // A0__B1__C2__D3 + // \____E4____/ + ts_parse_stack_shift(stack, 0, stateD, trees[3]); + ts_parse_stack_reduce(stack, 1, stateE, symbol4, 2); + TSTree *tree4 = ts_parse_stack_head(stack, 1)->tree; + merged = ts_parse_stack_shift(stack, 1, stateD, trees[3]); + + AssertThat(merged, IsTrue()); + AssertThat(ts_parse_stack_head_count(stack), Equals(1)); + + ParseStackNode *head = ts_parse_stack_head(stack, 0); + AssertThat(head->state, Equals(stateD)); + AssertThat(head->tree, Equals(trees[3])); + AssertThat(head->successor_count, Equals(2)); + + ParseStackNode *successor1 = head->successors[0]; + AssertThat(successor1->state, Equals(stateC)); + AssertThat(successor1->tree, Equals(trees[2])) + AssertThat(successor1->successor_count, Equals(1)); + + ParseStackNode *successor2 = head->successors[1]; + AssertThat(successor2->state, Equals(stateE)); + AssertThat(successor2->tree, Equals(tree4)) + AssertThat(successor2->successor_count, Equals(1)); + }); + + it("re-joins the heads when the same state and tree are reduced onto both heads", [&]() { + // A0__B1__C2__F4 + // \__D3__E4 + ts_parse_stack_reduce(stack, 0, stateD, symbol3, 1); + ts_parse_stack_shift(stack, 0, stateE, trees[4]); + ts_parse_stack_shift(stack, 1, stateF, trees[4]); + + AssertThat(ts_parse_stack_head_count(stack), Equals(2)); + ParseStackNode *head1 = ts_parse_stack_head(stack, 0); + AssertThat(head1->state, Equals(stateE)); + AssertThat(head1->tree, Equals(trees[4])); + AssertThat(head1->successor_count, Equals(1)); + + ParseStackNode *head2 = ts_parse_stack_head(stack, 1); + AssertThat(head2->state, Equals(stateF)); + AssertThat(head2->tree, Equals(trees[4])); + AssertThat(head2->successor_count, Equals(1)); + + // A0__B1__C2__G5 + // \__D3__/ + merged = ts_parse_stack_reduce(stack, 0, stateG, symbol5, 1); + AssertThat(merged, IsFalse()); + merged = ts_parse_stack_reduce(stack, 1, stateG, symbol5, 1); + AssertThat(merged, IsTrue()); + + AssertThat(ts_parse_stack_head_count(stack), Equals(1)); + ParseStackNode *head = ts_parse_stack_head(stack, 0); + AssertThat(head->state, Equals(stateG)); + AssertThat( + ts_tree_eq( + head->tree, + ts_tree_make_node(symbol5, 1, tree_array({ trees[4] }), false)), + IsTrue()); + AssertThat(head->successor_count, Equals(2)); + }); + }); +}); + +END_TEST diff --git a/src/runtime/parse_stack.c b/src/runtime/parse_stack.c new file mode 100644 index 00000000..6120f7bf --- /dev/null +++ b/src/runtime/parse_stack.c @@ -0,0 +1,177 @@ +#include "tree_sitter/parser.h" +#include "runtime/tree.h" +#include "runtime/parse_stack.h" +#include + +static const size_t INITIAL_HEAD_CAPACITY = 3; + +struct ParseStack { + ParseStackNode **heads; + int head_count; + int head_capacity; +}; + +/* + * Section: Lifecycle + */ + +ParseStack *ts_parse_stack_new() { + ParseStack *this = malloc(sizeof(ParseStack)); + *this = (ParseStack) { + .heads = calloc(INITIAL_HEAD_CAPACITY, sizeof(ParseStackNode *)), + .head_count = 1, + .head_capacity = INITIAL_HEAD_CAPACITY, + }; + return this; +} + +void ts_parse_stack_delete(ParseStack *this) { + if (this->heads) + free(this->heads); + free(this); +} + +/* + * Section: Reading + */ + +ParseStackNode *ts_parse_stack_head(const ParseStack *this, int head_index) { + assert(head_index < this->head_count); + return this->heads[head_index]; +} + +int ts_parse_stack_head_count(const ParseStack *this) { + return this->head_count; +} + +/* + * Section: Updating + */ + +static ParseStackNode *stack_node_new(ParseStackNode *, TSStateId, TSTree *); +static void stack_node_retain(ParseStackNode *); +static bool stack_node_release(ParseStackNode *); +static void stack_node_add_successor(ParseStackNode *, ParseStackNode *); +static void parse_stack_remove_head(ParseStack *, int); + +bool ts_parse_stack_shift(ParseStack *this, int head_index, TSStateId state, TSTree *tree) { + assert(head_index < this->head_count); + for (int i = 0; i < head_index; i++) { + ParseStackNode *head = this->heads[i]; + if (head->state == state && ts_tree_eq(head->tree, tree)) { + stack_node_add_successor(head, this->heads[head_index]); + parse_stack_remove_head(this, head_index); + return true; + } + } + ParseStackNode *new_head = stack_node_new(this->heads[head_index], state, tree); + this->heads[head_index] = new_head; + return false; +} + +bool ts_parse_stack_reduce(ParseStack *this, int head_index, TSStateId state, + TSSymbol symbol, int child_count) { + ParseStackNode *head = this->heads[head_index]; + + /* + * Walk down the stack to determine which symbols will be reduced. + * The child node count is known ahead of time, but some children + * may be ubiquitous tokens, which don't count. + */ + ParseStackNode *next_node = head; + for (int i = 0; i < child_count; i++) { + TSTree *child = next_node->tree; + if (ts_tree_is_extra(child)) + child_count++; + next_node = next_node->successors[0]; + if (!next_node) + break; + } + + TSTree **children = malloc(child_count * sizeof(TSTree *)); + next_node = head; + for (int i = 0; i < child_count; i++) { + children[child_count - i - 1] = next_node->tree; + next_node = next_node->successors[0]; + } + + TSTree *parent = ts_tree_make_node(symbol, child_count, children, false); + + for (int i = 0; i < head_index; i++) { + ParseStackNode *head = this->heads[i]; + if (head->state == state && ts_tree_eq(head->tree, parent)) { + stack_node_add_successor(head, this->heads[head_index]); + ts_tree_release(parent); + parse_stack_remove_head(this, head_index); + return true; + } + } + + stack_node_retain(next_node); + stack_node_release(this->heads[head_index]); + this->heads[head_index] = stack_node_new(next_node, state, parent); + return false; +} + +int ts_parse_stack_split(ParseStack *this, int head_index) { + assert(head_index < this->head_count); + if (this->head_count == this->head_capacity) { + this->head_capacity += 3; + this->heads = realloc(this->heads, this->head_capacity * sizeof(ParseStackNode *)); + } + int new_index = this->head_count++; + this->heads[new_index] = this->heads[head_index]; + stack_node_retain(this->heads[new_index]); + return new_index; +} + +/* + * Section: Private + */ + +static ParseStackNode *stack_node_new(ParseStackNode *next, TSStateId state, TSTree *tree) { + ParseStackNode *this = malloc(sizeof(ParseStackNode)); + *this = (ParseStackNode) { + .ref_count = 1, + .successor_count = 1, + .successors = {next, NULL, NULL}, + .state = state, + .tree = tree, + }; + return this; +} + +static void stack_node_retain(ParseStackNode *this) { + if (!this) return; + assert(this->ref_count != 0); + this->ref_count++; +} + +static bool stack_node_release(ParseStackNode *this) { + if (!this) return false; + assert(this->ref_count != 0); + this->ref_count--; + if (this->ref_count == 0) { + for (int i = 0; i < this->successor_count; i++) + stack_node_release(this->successors[i]); + ts_tree_release(this->tree); + free(this); + return true; + } else { + return false; + } +} + +static void stack_node_add_successor(ParseStackNode *this, ParseStackNode *successor) { + stack_node_retain(successor); + this->successors[this->successor_count] = successor; + this->successor_count++; +} + +static void parse_stack_remove_head(ParseStack *this, int head_index) { + stack_node_release(this->heads[head_index]); + for (int i = head_index; i < this->head_count - 1; i++) { + this->heads[head_index] = this->heads[head_index + 1]; + } + this->head_count--; +} diff --git a/src/runtime/parse_stack.h b/src/runtime/parse_stack.h new file mode 100644 index 00000000..05d498ed --- /dev/null +++ b/src/runtime/parse_stack.h @@ -0,0 +1,34 @@ +#ifndef RUNTIME_PARSE_STACK_H_ +#define RUNTIME_PARSE_STACK_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "tree_sitter/parser.h" + +typedef struct ParseStack ParseStack; + +typedef struct ParseStackNode { + TSTree *tree; + TSStateId state; + struct ParseStackNode *successors[4]; + short unsigned int successor_count; + short unsigned int ref_count; +} ParseStackNode; + +ParseStack *ts_parse_stack_new(); +void ts_parse_stack_delete(ParseStack *); + +ParseStackNode *ts_parse_stack_head(const ParseStack *, int); +int ts_parse_stack_head_count(const ParseStack *); + +bool ts_parse_stack_shift(ParseStack *, int, TSStateId, TSTree *); +bool ts_parse_stack_reduce(ParseStack *, int, TSStateId, TSSymbol, int); +int ts_parse_stack_split(ParseStack *, int); + +#ifdef __cplusplus +} +#endif + +#endif // RUNTIME_PARSE_STACK_H_