Merge pull request #8 from maxbrunsfeld/glr-with-differing-lex-states

Handle ambiguities where each interpretation expects different lexical tokens
This commit is contained in:
Max Brunsfeld 2015-11-20 00:18:08 -08:00
commit 16692be210
10 changed files with 561 additions and 283 deletions

View file

@ -1,15 +1,16 @@
#include "runtime/runtime_spec_helper.h"
#include "runtime/helpers/tree_helpers.h"
#include "runtime/stack.h"
#include "runtime/tree.h"
#include "runtime/length.h"
enum {
stateA, stateB, stateC, stateD, stateE, stateF, stateG, stateH
stateA, stateB, stateC, stateD, stateE, stateF, stateG, stateH, stateI, stateJ
};
enum {
symbol0 = ts_builtin_sym_start,
symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7
symbol1, symbol2, symbol3, symbol4, symbol5, symbol6, symbol7, symbol8
};
struct TreeSelectionSpy {
@ -31,7 +32,7 @@ START_TEST
describe("Stack", [&]() {
Stack *stack;
const size_t tree_count = 8;
const size_t tree_count = 10;
TSTree *trees[tree_count];
TreeSelectionSpy tree_selection_spy{0, NULL, {NULL, NULL}};
@ -43,7 +44,7 @@ describe("Stack", [&]() {
TSLength len = ts_length_make(2, 2);
for (size_t i = 0; i < tree_count; i++)
trees[i] = ts_tree_make_leaf(ts_builtin_sym_start + i, len, len, TSNodeTypeNamed);
trees[i] = ts_tree_make_leaf(i, len, len, TSNodeTypeNamed);
});
after_each([&]() {
@ -87,8 +88,6 @@ describe("Stack", [&]() {
});
describe("popping nodes from the stack", [&]() {
StackPopResultList pop;
before_each([&]() {
/*
* A0__B1__C2.
@ -102,43 +101,47 @@ describe("Stack", [&]() {
/*
* A0.
*/
pop = ts_stack_pop(stack, 0, 2, false);
AssertThat(pop.size, Equals(1));
AssertThat(pop.contents[0].tree_count, Equals(2));
AssertThat(pop.contents[0].trees[0], Equals(trees[1]));
AssertThat(pop.contents[0].trees[1], Equals(trees[2]));
Vector pop = ts_stack_pop(stack, 0, 2, false);
StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0);
AssertThat(pop.size, Equals<size_t>(1));
AssertThat(pop1.tree_count, Equals<size_t>(2));
AssertThat(pop1.trees[0], Equals(trees[1]));
AssertThat(pop1.trees[1], Equals(trees[2]));
AssertThat(*ts_stack_head(stack, 0), Equals<StackEntry>({trees[0], stateA}));
/*
* .
*/
pop = ts_stack_pop(stack, 0, 1, false);
AssertThat(pop.size, Equals(1));
AssertThat(pop.contents[0].tree_count, Equals(1));
AssertThat(pop.contents[0].trees[0], Equals(trees[0]));
pop1 = *(StackPopResult *)vector_get(&pop, 0);
AssertThat(pop.size, Equals<size_t>(1));
AssertThat(pop1.tree_count, Equals<size_t>(1));
AssertThat(pop1.trees[0], Equals(trees[0]));
AssertThat(ts_stack_head(stack, 0), Equals<const StackEntry *>(nullptr));
});
it("does not count 'extra' trees toward the count", [&]() {
ts_tree_set_extra(trees[1]);
pop = ts_stack_pop(stack, 0, 2, false);
AssertThat(pop.size, Equals(1));
AssertThat(pop.contents[0].tree_count, Equals(3));
AssertThat(pop.contents[0].trees[0], Equals(trees[0]));
AssertThat(pop.contents[0].trees[1], Equals(trees[1]));
AssertThat(pop.contents[0].trees[2], Equals(trees[2]));
Vector pop = ts_stack_pop(stack, 0, 2, false);
StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0);
AssertThat(pop.size, Equals<size_t>(1));
AssertThat(pop1.tree_count, Equals<size_t>(3));
AssertThat(pop1.trees[0], Equals(trees[0]));
AssertThat(pop1.trees[1], Equals(trees[1]));
AssertThat(pop1.trees[2], Equals(trees[2]));
AssertThat(ts_stack_head(stack, 0), Equals<const StackEntry *>(nullptr));
});
it("pops the entire stack when given a negative count", [&]() {
pop = ts_stack_pop(stack, 0, -1, false);
Vector pop = ts_stack_pop(stack, 0, -1, false);
AssertThat(pop.size, Equals(1));
AssertThat(pop.contents[0].tree_count, Equals(3));
AssertThat(pop.contents[0].trees[0], Equals(trees[0]));
AssertThat(pop.contents[0].trees[1], Equals(trees[1]));
AssertThat(pop.contents[0].trees[2], Equals(trees[2]));
AssertThat(pop.size, Equals<size_t>(1));
StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0);
AssertThat(pop1.tree_count, Equals<size_t>(3));
AssertThat(pop1.trees[0], Equals(trees[0]));
AssertThat(pop1.trees[1], Equals(trees[1]));
AssertThat(pop1.trees[2], Equals(trees[2]));
});
});
@ -316,6 +319,9 @@ describe("Stack", [&]() {
ts_stack_push(stack, 1, stateE, trees[4]);
ts_stack_push(stack, 1, stateF, trees[5]);
ts_stack_push(stack, 1, stateG, trees[6]);
AssertThat(ts_stack_head_count(stack), Equals(1));
AssertThat(ts_stack_entry_next_count(ts_stack_head(stack, 0)), Equals(2));
});
describe("when there are two paths that lead to two different heads", [&]() {
@ -324,18 +330,18 @@ describe("Stack", [&]() {
* A0__B1__C2.
* \__E4.
*/
StackPopResultList pop = ts_stack_pop(stack, 0, 2, false);
Vector pop = ts_stack_pop(stack, 0, 2, false);
AssertThat(pop.size, Equals(2));
StackPopResult pop1 = pop.contents[0];
AssertThat(pop1.index, Equals(0));
AssertThat(pop1.tree_count, Equals(2));
AssertThat(pop.size, Equals<size_t>(2));
StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0);
AssertThat(pop1.head_index, Equals(0));
AssertThat(pop1.tree_count, Equals<size_t>(2));
AssertThat(pop1.trees[0], Equals(trees[3]));
AssertThat(pop1.trees[1], Equals(trees[6]));
StackPopResult pop2 = pop.contents[1];
AssertThat(pop2.index, Equals(1));
AssertThat(pop2.tree_count, Equals(2));
StackPopResult pop2 = *(StackPopResult *)vector_get(&pop, 1);
AssertThat(pop2.head_index, Equals(1));
AssertThat(pop2.tree_count, Equals<size_t>(2));
AssertThat(pop2.trees[0], Equals(trees[5]));
AssertThat(pop2.trees[1], Equals(trees[6]));
@ -359,9 +365,9 @@ describe("Stack", [&]() {
* A0__B1__C2__D3__G6.
* \__E4__F5__/
*/
StackPopResultList pop = ts_stack_pop(stack, 0, 1, false);
Vector pop = ts_stack_pop(stack, 0, 1, false);
AssertThat(pop.size, Equals(1));
AssertThat(pop.size, Equals<size_t>(1));
AssertThat(ts_stack_head_count(stack), Equals(1));
});
});
@ -379,19 +385,21 @@ describe("Stack", [&]() {
* A0__B1__C2__D3.
* \__E4__F5.
*/
StackPopResultList pop = ts_stack_pop(stack, 0, 2, false);
Vector pop = ts_stack_pop(stack, 0, 2, false);
AssertThat(ts_stack_head_count(stack), Equals(2));
AssertThat(pop.size, Equals(2));
AssertThat(pop.contents[0].index, Equals(0));
AssertThat(pop.contents[0].tree_count, Equals(2));
AssertThat(pop.contents[0].trees[0], Equals(trees[6]));
AssertThat(pop.contents[0].trees[1], Equals(trees[7]));
AssertThat(pop.size, Equals<size_t>(2));
StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0);
AssertThat(pop1.head_index, Equals(0));
AssertThat(pop1.tree_count, Equals<size_t>(2));
AssertThat(pop1.trees[0], Equals(trees[6]));
AssertThat(pop1.trees[1], Equals(trees[7]));
AssertThat(pop.contents[1].index, Equals(1));
AssertThat(pop.contents[1].tree_count, Equals(2));
AssertThat(pop.contents[1].trees[0], Equals(trees[6]));
AssertThat(pop.contents[1].trees[1], Equals(trees[7]));
StackPopResult pop2 = *(StackPopResult *)vector_get(&pop, 1);
AssertThat(pop2.head_index, Equals(1));
AssertThat(pop2.tree_count, Equals<size_t>(2));
AssertThat(pop2.trees[0], Equals(trees[6]));
AssertThat(pop2.trees[1], Equals(trees[7]));
});
});
@ -400,17 +408,121 @@ describe("Stack", [&]() {
/*
* A0__B1.
*/
StackPopResultList pop = ts_stack_pop(stack, 0, 3, false);
Vector pop = ts_stack_pop(stack, 0, 3, false);
AssertThat(ts_stack_head_count(stack), Equals(1));
AssertThat(*ts_stack_head(stack, 0), Equals<StackEntry>({trees[1], stateB}));
AssertThat(pop.size, Equals(2));
AssertThat(pop.contents[0].tree_count, Equals(3));
AssertThat(pop.contents[0].index, Equals(0));
AssertThat(pop.contents[0].trees[0], Equals(trees[2]));
AssertThat(pop.contents[1].tree_count, Equals(3));
AssertThat(pop.contents[1].index, Equals(0));
AssertThat(pop.contents[1].trees[0], Equals(trees[4]));
AssertThat(pop.size, Equals<size_t>(2));
StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0);
AssertThat(pop1.tree_count, Equals<size_t>(3));
AssertThat(pop1.head_index, Equals(0));
AssertThat(pop1.trees[0], Equals(trees[2]));
StackPopResult pop2 = *(StackPopResult *)vector_get(&pop, 1);
AssertThat(pop2.tree_count, Equals<size_t>(3));
AssertThat(pop2.head_index, Equals(0));
AssertThat(pop2.trees[0], Equals(trees[4]));
});
});
});
describe("popping from a stack head that has been 3-way merged", [&]() {
before_each([&]() {
/*
* A0__B1__C2__D3__I8__J9.
* \__E4__F5__/
* \__G6__H7__/
*/
ts_stack_clear(stack);
ts_stack_push(stack, 0, stateA, trees[0]);
ts_stack_push(stack, 0, stateB, trees[1]);
ts_stack_split(stack, 0);
ts_stack_split(stack, 1);
ts_stack_push(stack, 0, stateC, trees[2]);
ts_stack_push(stack, 1, stateE, trees[4]);
ts_stack_push(stack, 2, stateG, trees[6]);
ts_stack_push(stack, 0, stateD, trees[3]);
ts_stack_push(stack, 1, stateF, trees[5]);
ts_stack_push(stack, 2, stateH, trees[7]);
ts_stack_push(stack, 0, stateI, trees[8]);
ts_stack_push(stack, 1, stateI, trees[8]);
ts_stack_push(stack, 1, stateI, trees[8]);
ts_stack_push(stack, 0, stateJ, trees[9]);
AssertThat(ts_stack_head_count(stack), Equals(1));
StackEntry *head = ts_stack_head(stack, 0);
AssertThat(ts_stack_entry_next_count(head), Equals(1));
AssertThat(ts_stack_entry_next_count(ts_stack_entry_next(head, 0)), Equals(3));
});
describe("when there is one path that leads to three different heads", [&]() {
it("returns three entries with the same array of trees", [&]() {
/*
* A0__B1__C2__D3.
* \__E4__F5.
* \__G6__H7.
*/
Vector pop = ts_stack_pop(stack, 0, 2, false);
AssertThat(ts_stack_head_count(stack), Equals(3));
AssertThat(pop.size, Equals<size_t>(3));
StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0);
AssertThat(ts_stack_top_tree(stack, 0), Equals(trees[3]));
AssertThat(pop1.head_index, Equals(0));
AssertThat(pop1.tree_count, Equals<size_t>(2));
AssertThat(pop1.trees[0], Equals(trees[8]));
AssertThat(pop1.trees[1], Equals(trees[9]));
StackPopResult pop2 = *(StackPopResult *)vector_get(&pop, 1);
AssertThat(ts_stack_top_tree(stack, 1), Equals(trees[5]));
AssertThat(pop2.head_index, Equals(1));
AssertThat(pop2.tree_count, Equals<size_t>(2));
AssertThat(pop2.trees, Equals(pop1.trees));
StackPopResult pop3 = *(StackPopResult *)vector_get(&pop, 2);
AssertThat(ts_stack_top_tree(stack, 2), Equals(trees[7]));
AssertThat(pop3.head_index, Equals(2));
AssertThat(pop3.tree_count, Equals<size_t>(2));
AssertThat(pop3.trees, Equals(pop1.trees));
});
});
describe("when there are three different paths that lead to three different heads", [&]() {
it("returns three entries with different arrays of trees", [&]() {
/*
* A0__B1__C2.
* \__E4.
* \__G6.
*/
Vector pop = ts_stack_pop(stack, 0, 3, false);
AssertThat(ts_stack_head_count(stack), Equals(3));
AssertThat(pop.size, Equals<size_t>(3));
StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0);
AssertThat(ts_stack_top_tree(stack, 0), Equals(trees[2]));
AssertThat(pop1.head_index, Equals(0));
AssertThat(pop1.tree_count, Equals<size_t>(3));
AssertThat(pop1.trees[0], Equals(trees[3]));
AssertThat(pop1.trees[1], Equals(trees[8]));
AssertThat(pop1.trees[2], Equals(trees[9]));
StackPopResult pop2 = *(StackPopResult *)vector_get(&pop, 1);
AssertThat(ts_stack_top_tree(stack, 1), Equals(trees[4]));
AssertThat(pop2.head_index, Equals(1));
AssertThat(pop2.tree_count, Equals<size_t>(3));
AssertThat(pop2.trees[0], Equals(trees[5]));
AssertThat(pop2.trees[1], Equals(trees[8]));
AssertThat(pop2.trees[2], Equals(trees[9]));
StackPopResult pop3 = *(StackPopResult *)vector_get(&pop, 2);
AssertThat(ts_stack_top_tree(stack, 2), Equals(trees[6]));
AssertThat(pop3.head_index, Equals(2));
AssertThat(pop3.tree_count, Equals<size_t>(3));
AssertThat(pop3.trees[0], Equals(trees[7]));
AssertThat(pop3.trees[1], Equals(trees[8]));
AssertThat(pop3.trees[2], Equals(trees[9]));
});
});
});

View file

@ -110,6 +110,9 @@ TSLexer ts_lexer_make() {
}
void ts_lexer_reset(TSLexer *self, TSLength position) {
if (ts_length_eq(position, self->current_position))
return;
self->token_start_position = position;
self->token_end_position = position;
self->current_position = position;

View file

@ -6,6 +6,9 @@
#include "runtime/tree.h"
#include "runtime/lexer.h"
#include "runtime/length.h"
#include "runtime/vector.h"
#include <assert.h>
/*
* Debugging
@ -20,6 +23,12 @@
#define SYM_NAME(sym) self->language->symbol_names[sym]
typedef struct {
TSTree *reusable_subtree;
size_t reusable_subtree_pos;
TSLength position;
} HeadState;
typedef enum {
ConsumeResultShifted,
ConsumeResultRemoved,
@ -53,14 +62,14 @@ static TSParseAction ts_language__last_action(const TSLanguage *language,
* Replace the parser's reusable_subtree with its first non-fragile descendant.
* Return true if a suitable descendant is found, false otherwise.
*/
static bool ts_parser__breakdown_reusable_subtree(TSParser *self) {
static bool ts_parser__breakdown_reusable_subtree(HeadState *state) {
do {
if (self->reusable_subtree->symbol == ts_builtin_sym_error)
if (state->reusable_subtree->symbol == ts_builtin_sym_error)
return false;
if (self->reusable_subtree->child_count == 0)
if (state->reusable_subtree->child_count == 0)
return false;
self->reusable_subtree = self->reusable_subtree->children[0];
} while (ts_tree_is_fragile(self->reusable_subtree));
state->reusable_subtree = state->reusable_subtree->children[0];
} while (ts_tree_is_fragile(state->reusable_subtree));
return true;
}
@ -68,80 +77,82 @@ static bool ts_parser__breakdown_reusable_subtree(TSParser *self) {
* Replace the parser's reusable_subtree with its largest right neighbor, or
* NULL if no right neighbor exists.
*/
static void ts_parser__pop_reusable_subtree(TSParser *self) {
self->reusable_subtree_pos += ts_tree_total_size(self->reusable_subtree).chars;
static void ts_parser__pop_reusable_subtree(HeadState *state) {
state->reusable_subtree_pos +=
ts_tree_total_size(state->reusable_subtree).chars;
while (self->reusable_subtree) {
TSTree *parent = self->reusable_subtree->context.parent;
size_t next_index = self->reusable_subtree->context.index + 1;
while (state->reusable_subtree) {
TSTree *parent = state->reusable_subtree->context.parent;
size_t next_index = state->reusable_subtree->context.index + 1;
if (parent && parent->child_count > next_index) {
self->reusable_subtree = parent->children[next_index];
state->reusable_subtree = parent->children[next_index];
return;
}
self->reusable_subtree = parent;
state->reusable_subtree = parent;
}
}
static bool ts_parser__can_reuse(TSParser *self, int head, TSTree *subtree) {
if (subtree->symbol == ts_builtin_sym_error)
return false;
TSStateId state = ts_stack_top_state(self->stack, head);
const TSParseAction *action =
ts_language__actions(self->language, state, subtree->symbol);
return action->type != TSParseActionTypeError;
}
/*
* Advance the parser's lookahead subtree. If there is a reusable subtree
* at the correct position in the parser's previous tree, use that. Otherwise,
* run the lexer.
*/
static void ts_parser__get_next_lookahead(TSParser *self) {
while (self->reusable_subtree) {
if (self->reusable_subtree_pos > self->lexer.current_position.chars) {
static TSTree *ts_parser__get_next_lookahead(TSParser *self, int head) {
HeadState *state = vector_get(&self->head_states, head);
while (state->reusable_subtree) {
if (state->reusable_subtree_pos > state->position.chars) {
break;
}
if (self->reusable_subtree_pos < self->lexer.current_position.chars) {
DEBUG("past_reuse sym:%s", SYM_NAME(self->reusable_subtree->symbol));
ts_parser__pop_reusable_subtree(self);
if (state->reusable_subtree_pos < state->position.chars) {
DEBUG("past_reuse sym:%s", SYM_NAME(state->reusable_subtree->symbol));
ts_parser__pop_reusable_subtree(state);
continue;
}
if (ts_tree_has_changes(self->reusable_subtree) ||
ts_tree_is_fragile(self->reusable_subtree) ||
ts_tree_is_extra(self->reusable_subtree)) {
DEBUG("breakdown sym:%s", SYM_NAME(self->reusable_subtree->symbol));
if (!ts_parser__breakdown_reusable_subtree(self))
ts_parser__pop_reusable_subtree(self);
if (ts_tree_has_changes(state->reusable_subtree) ||
ts_tree_is_fragile(state->reusable_subtree) ||
ts_tree_is_extra(state->reusable_subtree) ||
(state->reusable_subtree->child_count > 0 &&
!ts_parser__can_reuse(self, head, state->reusable_subtree))) {
DEBUG("breakdown sym:%s", SYM_NAME(state->reusable_subtree->symbol));
if (!ts_parser__breakdown_reusable_subtree(state))
ts_parser__pop_reusable_subtree(state);
continue;
}
TSStateId top_state = ts_stack_top_state(self->stack, 0);
TSSymbol symbol = self->reusable_subtree->symbol;
if (ts_language__last_action(self->language, top_state, symbol).type ==
TSParseActionTypeError) {
DEBUG("cant_reuse sym:%s", SYM_NAME(self->reusable_subtree->symbol));
ts_parser__pop_reusable_subtree(self);
continue;
}
self->lookahead = self->reusable_subtree;
TSLength size = ts_tree_total_size(self->lookahead);
DEBUG("reuse sym:%s size:%lu extra:%d", SYM_NAME(self->lookahead->symbol),
size.chars, self->lookahead->options.extra);
ts_lexer_reset(&self->lexer,
ts_length_add(self->lexer.current_position, size));
ts_parser__pop_reusable_subtree(self);
return;
TSTree *result = state->reusable_subtree;
TSLength size = ts_tree_total_size(result);
DEBUG("reuse sym:%s size:%lu extra:%d", SYM_NAME(result->symbol),
size.chars, result->options.extra);
ts_parser__pop_reusable_subtree(state);
return result;
}
TSLength position = self->lexer.current_position;
for (size_t i = 0, count = ts_stack_head_count(self->stack); i < count; i++) {
if (i > 0) {
ts_lexer_reset(&self->lexer, position);
ts_tree_release(self->lookahead);
}
return NULL;
}
TSStateId parse_state = ts_stack_top_state(self->stack, i);
TSStateId lex_state = self->language->lex_states[parse_state];
DEBUG("lex state:%d", lex_state);
self->lookahead = self->language->lex_fn(&self->lexer, lex_state);
static int ts_parser__split(TSParser *self, int head) {
int result = ts_stack_split(self->stack, head);
assert(result == self->head_states.size);
HeadState head_state = *(HeadState *)vector_get(&self->head_states, head);
vector_push(&self->head_states, &head_state);
return result;
}
if (self->lookahead->symbol != ts_builtin_sym_error)
break;
}
static void ts_parser__remove_head(TSParser *self, int head) {
vector_erase(&self->head_states, head);
ts_stack_remove_head(self->stack, head);
}
/*
@ -150,10 +161,16 @@ static void ts_parser__get_next_lookahead(TSParser *self) {
static ConsumeResult ts_parser__shift(TSParser *self, int head,
TSStateId parse_state) {
if (ts_stack_push(self->stack, head, parse_state, self->lookahead))
HeadState *head_state = vector_get(&self->head_states, head);
head_state->position =
ts_length_add(head_state->position, ts_tree_total_size(self->lookahead));
if (ts_stack_push(self->stack, head, parse_state, self->lookahead)) {
DEBUG("merge head:%d", head);
vector_erase(&self->head_states, head);
return ConsumeResultRemoved;
else
} else {
return ConsumeResultShifted;
}
}
static bool ts_parser__shift_extra(TSParser *self, int head, TSStateId state) {
@ -164,44 +181,91 @@ static bool ts_parser__shift_extra(TSParser *self, int head, TSStateId state) {
static TSTree *ts_parser__reduce(TSParser *self, int head, TSSymbol symbol,
size_t child_count, bool extra,
bool count_extra) {
vector_clear(&self->reduce_parents);
TSNodeType node_type = self->language->node_types[symbol];
StackPopResultList pop_results =
ts_stack_pop(self->stack, head, child_count, count_extra);
Vector pop_results = ts_stack_pop(self->stack, head, child_count, count_extra);
TSTree *parent = NULL;
TSTree **last_children = NULL;
int last_index = -1;
int last_head_index = -1;
int removed_heads = 0;
for (int i = 0; i < pop_results.size; i++) {
StackPopResult pop_result = pop_results.contents[i];
for (size_t i = 0; i < pop_results.size; i++) {
StackPopResult *pop_result = vector_get(&pop_results, i);
if (pop_result.trees != last_children) {
parent = ts_tree_make_node(symbol, pop_result.tree_count,
pop_result.trees, node_type);
}
if (pop_result.index == last_index) {
ts_stack_add_alternative(self->stack, pop_result.index, parent);
} else {
TSStateId top_state = ts_stack_top_state(self->stack, pop_result.index);
TSStateId state;
if (extra) {
ts_tree_set_extra(parent);
state = top_state;
} else {
state = ts_language__last_action(self->language, top_state, symbol)
.data.to_state;
/*
* If the same set of trees led to a previous stack head, reuse the parent
* tree that was added to that head.
*/
TSTree *parent = NULL;
for (size_t j = 0; j < i; j++) {
StackPopResult *prior_result = vector_get(&pop_results, j);
if (pop_result->trees == prior_result->trees) {
TSTree **existing_parent = vector_get(&self->reduce_parents, j);
parent = *existing_parent;
break;
}
ts_stack_push(self->stack, pop_result.index, state, parent);
}
last_index = pop_result.index;
last_children = pop_result.trees;
/*
* Otherwise, create a new parent node for this set of trees.
*/
if (!parent)
parent = ts_tree_make_node(symbol, pop_result->tree_count, pop_result->trees, node_type);
vector_push(&self->reduce_parents, &parent);
/*
* If another path led to the same stack head, add this new parent tree
* as an alternative for that stack head.
*/
int new_head = pop_result->head_index - removed_heads;
if (pop_result->head_index == last_head_index) {
ts_stack_add_alternative(self->stack, new_head, parent);
continue;
}
/*
* If the stack has split in the process of popping, create a duplicate of
* the lookahead state for this head, for the new head.
*/
if (i > 0) {
DEBUG("split_during_reduce new_head:%d", new_head);
HeadState *head_state = vector_get(&self->head_states, head);
vector_push(&self->head_states, head_state);
}
/*
* If the parent node is extra, then do not change the state when pushing
* it. Otherwise, proceed to the state given in the parse table for the
* new parent symbol.
*/
TSStateId state;
TSStateId top_state = ts_stack_top_state(self->stack, new_head);
if (extra) {
ts_tree_set_extra(parent);
state = top_state;
} else {
TSParseAction action = ts_language__last_action(self->language, top_state, symbol);
if (child_count == -1) {
state = 0;
} else {
assert(action.type == TSParseActionTypeShift);
state = action.data.to_state;
}
}
/*
* If the given state already existed at a different head of the stack,
* then remove the lookahead state for the head.
*/
if (ts_stack_push(self->stack, new_head, state, parent)) {
vector_erase(&self->head_states, new_head);
removed_heads++;
}
last_head_index = pop_result->head_index;
}
return parent;
TSTree **last_parent = vector_back(&self->reduce_parents);
return *last_parent;
}
static void ts_parser__reduce_fragile(TSParser *self, int head, TSSymbol symbol,
@ -214,9 +278,12 @@ static void ts_parser__reduce_fragile(TSParser *self, int head, TSSymbol symbol,
static void ts_parser__reduce_error(TSParser *self, int head,
size_t child_count) {
HeadState *head_state = vector_get(&self->head_states, head);
TSTree *reduced = ts_parser__reduce(self, head, ts_builtin_sym_error,
child_count, false, true);
reduced->size = ts_length_add(reduced->size, self->lookahead->padding);
head_state->position =
ts_length_add(head_state->position, self->lookahead->padding);
self->lookahead->padding = ts_length_zero();
ts_tree_set_fragile_left(reduced);
ts_tree_set_fragile_right(reduced);
@ -234,7 +301,7 @@ static bool ts_parser__handle_error(TSParser *self, int head) {
*/
int i = -1;
for (StackEntry *entry = entry_before_error; true;
entry = ts_stack_entry_next(entry, head), i++) {
entry = ts_stack_entry_next(entry, 0), i++) {
TSStateId stack_state = entry ? entry->state : 0;
TSParseAction action_on_error = ts_language__last_action(
self->language, stack_state, ts_builtin_sym_error);
@ -270,7 +337,7 @@ static bool ts_parser__handle_error(TSParser *self, int head) {
*/
if (self->lookahead->symbol == ts_builtin_sym_end) {
DEBUG("fail_to_recover");
ts_parser__reduce_error(self, head, error_token_count - 1);
ts_parser__reduce_error(self, head, -1);
return false;
}
}
@ -288,19 +355,27 @@ static void ts_parser__start(TSParser *self, TSInput input,
ts_lexer_reset(&self->lexer, ts_length_zero());
ts_stack_clear(self->stack);
self->reusable_subtree = previous_tree;
self->reusable_subtree_pos = 0;
HeadState head_state = {
.position = ts_length_zero(),
.reusable_subtree = previous_tree,
.reusable_subtree_pos = 0,
};
vector_clear(&self->head_states);
vector_push(&self->head_states, &head_state);
self->lookahead = NULL;
}
static TSTree *ts_parser__finish(TSParser *self) {
StackPopResult pop_result = ts_stack_pop(self->stack, 0, -1, true).contents[0];
Vector pop_results = ts_stack_pop(self->stack, 0, -1, true);
StackPopResult *pop_result = vector_get(&pop_results, 0);
TSTree **trees = pop_result.trees;
size_t extra_count = pop_result.tree_count - 1;
TSTree **trees = pop_result->trees;
size_t extra_count = pop_result->tree_count - 1;
TSTree *root = trees[extra_count];
ts_tree_prepend_children(root, extra_count, trees);
ts_tree_assign_parents(root);
return root;
}
@ -327,11 +402,9 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self, int head) {
int current_head;
if (next_action->type == 0) {
current_head = head;
DEBUG("action current_head:%d, state:%d", current_head, state);
} else {
current_head = ts_stack_split(self->stack, head);
DEBUG("split_action from_head:%d, current_head:%d, state:%d", head,
current_head, state);
current_head = ts_parser__split(self, head);
DEBUG("split_action from_head:%d, new_head:%d", head, current_head);
}
// TODO: Remove this by making a separate symbol for errors returned from
@ -349,7 +422,7 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self, int head) {
return ConsumeResultFinished;
} else {
DEBUG("bail current_head:%d", current_head);
ts_stack_remove_head(self->stack, current_head);
ts_parser__remove_head(self, current_head);
return ConsumeResultRemoved;
}
@ -390,10 +463,14 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self, int head) {
}
static int ts_tree__compare(TSTree *left, TSTree *right) {
if (left->symbol < right->symbol) return -1;
if (right->symbol < left->symbol) return 1;
if (left->child_count < right->child_count) return -1;
if (right->child_count < left->child_count) return 1;
if (left->symbol < right->symbol)
return -1;
if (right->symbol < left->symbol)
return 1;
if (left->child_count < right->child_count)
return -1;
if (right->child_count < left->child_count)
return 1;
for (size_t i = 0; i < left->child_count; i++) {
TSTree *left_child = left->children[i];
TSTree *right_child = right->children[i];
@ -426,6 +503,8 @@ TSParser ts_parser_make() {
.stack = ts_stack_new((TreeSelectionCallback){
NULL, ts_parser__select_tree,
}),
.head_states = vector_new(sizeof(HeadState), 4),
.reduce_parents = vector_new(sizeof(TSTree *), 4),
.lookahead = NULL,
};
}
@ -448,13 +527,28 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) {
ts_parser__start(self, input, previous_tree);
for (;;) {
ts_parser__get_next_lookahead(self);
DEBUG("lookahead sym:%s, pos:%lu, head_count:%d",
SYM_NAME(self->lookahead->symbol), self->lexer.current_position.chars,
ts_stack_head_count(self->stack));
for (int head = 0; head < ts_stack_head_count(self->stack);) {
HeadState *state = vector_get(&self->head_states, head);
DEBUG("process head:%d, head_count:%d, state:%d, pos:%lu", head,
ts_stack_head_count(self->stack),
ts_stack_top_state(self->stack, head), state->position.chars);
TSTree *reused_lookahead = ts_parser__get_next_lookahead(self, head);
if (reused_lookahead &&
ts_parser__can_reuse(self, head, reused_lookahead)) {
self->lookahead = reused_lookahead;
} else if (!(self->lookahead &&
ts_parser__can_reuse(self, head, self->lookahead))) {
ts_lexer_reset(&self->lexer, state->position);
TSStateId parse_state = ts_stack_top_state(self->stack, head);
TSStateId lex_state = self->language->lex_states[parse_state];
self->lookahead = self->language->lex_fn(&self->lexer, lex_state);
}
DEBUG("lookahead sym:%s, size:%lu", SYM_NAME(self->lookahead->symbol),
ts_tree_total_size(self->lookahead).chars);
switch (ts_parser__consume_lookahead(self, head)) {
case ConsumeResultRemoved:
break;
@ -465,5 +559,7 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) {
return ts_parser__finish(self);
}
}
self->lookahead = NULL;
}
}

View file

@ -6,14 +6,15 @@ extern "C" {
#endif
#include "runtime/stack.h"
#include "runtime/vector.h"
typedef struct {
TSLexer lexer;
Stack *stack;
TSTree *lookahead;
TSTree *reusable_subtree;
size_t reusable_subtree_pos;
const TSLanguage *language;
Vector head_states;
Vector reduce_parents;
} TSParser;
TSParser ts_parser_make();

View file

@ -1,17 +1,17 @@
#include "tree_sitter/parser.h"
#include "runtime/tree.h"
#include "runtime/tree_vector.h"
#include "runtime/vector.h"
#include "runtime/stack.h"
#include "runtime/length.h"
#include <assert.h>
#define MAX_POP_PATH_COUNT 8
#define MAX_SUCCESSOR_COUNT 8
#define INITIAL_HEAD_CAPACITY 3
#define STARTING_TREE_CAPACITY 10
typedef struct StackNode {
StackEntry entry;
struct StackNode *successors[MAX_POP_PATH_COUNT];
struct StackNode *successors[MAX_SUCCESSOR_COUNT];
short unsigned int successor_count;
short unsigned int ref_count;
} StackNode;
@ -20,10 +20,18 @@ struct Stack {
StackNode **heads;
int head_count;
int head_capacity;
StackPopResult last_pop_results[MAX_POP_PATH_COUNT];
Vector pop_results;
Vector pop_paths;
TreeSelectionCallback tree_selection_callback;
};
typedef struct {
size_t goal_tree_count;
StackNode *node;
Vector trees;
bool is_shared;
} PopPath;
/*
* Section: Stack lifecycle
*/
@ -35,11 +43,15 @@ Stack *ts_stack_new(TreeSelectionCallback tree_selection_callback) {
.head_count = 1,
.head_capacity = INITIAL_HEAD_CAPACITY,
.tree_selection_callback = tree_selection_callback,
.pop_results = vector_new(sizeof(StackPopResult), 4),
.pop_paths = vector_new(sizeof(PopPath), 4),
};
return self;
}
void ts_stack_delete(Stack *self) {
vector_delete(&self->pop_results);
vector_delete(&self->pop_paths);
free(self->heads);
free(self);
}
@ -164,12 +176,12 @@ static int ts_stack__add_head(Stack *self, StackNode *node) {
return new_index;
}
static int ts_stack__find_or_add_head(Stack *self, StackNode *node) {
static int ts_stack__find_head(Stack *self, StackNode *node) {
for (int i = 0; i < self->head_count; i++)
if (self->heads[i] == node) {
return i;
}
return ts_stack__add_head(self, node);
return -1;
}
void ts_stack_remove_head(Stack *self, int head_index) {
@ -221,16 +233,24 @@ int ts_stack_split(Stack *self, int head_index) {
return ts_stack__add_head(self, self->heads[head_index]);
}
StackPopResultList ts_stack_pop(Stack *self, int head_index, int child_count,
bool count_extra) {
StackNode *previous_head = self->heads[head_index];
const char *symbol_names[] = {
"zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine",
"ten", "eleven", "twelve"
};
int path_count = 1;
Vector ts_stack_pop(Stack *self, int head_index, int child_count, bool count_extra) {
StackNode *previous_head = self->heads[head_index];
int capacity = (child_count == -1) ? STARTING_TREE_CAPACITY : child_count;
size_t tree_counts_by_path[MAX_POP_PATH_COUNT] = { child_count };
StackNode *nodes_by_path[MAX_POP_PATH_COUNT] = { previous_head };
TreeVector trees_by_path[MAX_POP_PATH_COUNT] = { tree_vector_new(capacity) };
bool is_shared_by_path[MAX_POP_PATH_COUNT] = { false };
PopPath initial_path = {
.goal_tree_count = child_count,
.node = previous_head,
.trees = vector_new(sizeof(TSTree *), capacity),
.is_shared = false,
};
vector_clear(&self->pop_results);
vector_clear(&self->pop_paths);
vector_push(&self->pop_paths, &initial_path);
/*
* Reduce along every possible path in parallel. Stop when the given number
@ -239,71 +259,72 @@ StackPopResultList ts_stack_pop(Stack *self, int head_index, int child_count,
bool all_paths_done = false;
while (!all_paths_done) {
all_paths_done = true;
int current_path_count = path_count;
for (int path = 0; path < current_path_count; path++) {
StackNode *node = nodes_by_path[path];
if (!node || (trees_by_path[path].size == tree_counts_by_path[path]))
for (size_t i = 0; i < self->pop_paths.size; i++) {
PopPath *path = vector_get(&self->pop_paths, i);
StackNode *node = path->node;
if (!node || path->trees.size == path->goal_tree_count)
continue;
all_paths_done = false;
/*
* Children that are 'extra' do not count towards the total child count.
*/
if (ts_tree_is_extra(node->entry.tree) && !count_extra)
tree_counts_by_path[path]++;
path->goal_tree_count++;
/*
* If a node has more than one successor, create new paths for each of
* the additional successors.
*/
if (is_shared_by_path[path]) {
trees_by_path[path] = tree_vector_copy(&trees_by_path[path]);
is_shared_by_path[path] = false;
if (path->is_shared) {
path->trees = vector_copy(&path->trees);
path->is_shared = false;
}
tree_vector_push(&trees_by_path[path], node->entry.tree);
for (int i = 0; i < node->successor_count; i++) {
int next_path;
if (i > 0) {
if (path_count == MAX_POP_PATH_COUNT)
break;
next_path = path_count;
tree_counts_by_path[next_path] = tree_counts_by_path[path];
trees_by_path[next_path] = trees_by_path[path];
is_shared_by_path[next_path] = true;
path_count++;
} else {
next_path = path;
}
ts_tree_retain(node->entry.tree);
vector_push(&path->trees, &node->entry.tree);
nodes_by_path[next_path] = node->successors[i];
path->node = path->node->successors[0];
for (int j = 1; j < node->successor_count; j++) {
PopPath path_copy = *path;
vector_push(&self->pop_paths, &path_copy);
PopPath *next_path = vector_back(&self->pop_paths);
next_path->node = node->successors[j];
next_path->is_shared = true;
}
}
}
for (int path = 0; path < path_count; path++) {
if (!is_shared_by_path[path])
tree_vector_reverse(&trees_by_path[path]);
int index = -1;
if (path == 0) {
stack_node_retain(nodes_by_path[path]);
self->heads[head_index] = nodes_by_path[path];
index = head_index;
for (size_t i = 0; i < self->pop_paths.size; i++) {
PopPath *path = vector_get(&self->pop_paths, i);
if (!path->is_shared)
vector_reverse(&path->trees);
StackPopResult result = {
.trees = path->trees.contents,
.tree_count = path->trees.size,
.head_index = -1,
};
if (i == 0) {
stack_node_retain(path->node);
self->heads[head_index] = path->node;
result.head_index = head_index;
} else {
index = ts_stack__find_or_add_head(self, nodes_by_path[path]);
result.head_index = ts_stack__find_head(self, path->node);
if (result.head_index == -1)
result.head_index = ts_stack__add_head(self, path->node);
}
self->last_pop_results[path] = (StackPopResult){
.index = index,
.tree_count = trees_by_path[path].size,
.trees = trees_by_path[path].contents,
};
vector_push(&self->pop_results, &result);
}
stack_node_release(previous_head);
return (StackPopResultList){
.size = path_count, .contents = self->last_pop_results,
};
return self->pop_results;
}
void ts_stack_shrink(Stack *self, int head_index, int count) {

View file

@ -6,6 +6,7 @@ extern "C" {
#endif
#include "tree_sitter/parser.h"
#include "runtime/vector.h"
typedef struct Stack Stack;
@ -15,16 +16,11 @@ typedef struct {
} StackEntry;
typedef struct {
int index;
int tree_count;
TSTree **trees;
size_t tree_count;
int head_index;
} StackPopResult;
typedef struct {
int size;
StackPopResult *contents;
} StackPopResultList;
typedef struct {
void *data;
TSTree *(*callback)(void *data, TSTree *, TSTree *);
@ -90,7 +86,7 @@ void ts_stack_add_alternative(Stack *, int head, TSTree *);
* which had previously been merged. It returns a struct that indicates the
* index of each revealed head and the trees removed from that head.
*/
StackPopResultList ts_stack_pop(Stack *, int head, int count, bool count_extra);
Vector ts_stack_pop(Stack *, int head, int count, bool count_extra);
/*
* Remove the given number of entries from the given head of the stack.

View file

@ -36,6 +36,20 @@ TSTree *ts_tree_make_error(TSLength size, TSLength padding, char lookahead_char)
return result;
}
void ts_tree_assign_parents(TSTree *self) {
TSLength offset = ts_length_zero();
for (size_t i = 0; i < self->child_count; i++) {
TSTree *child = self->children[i];
if (child->context.parent != self) {
child->context.parent = self;
child->context.index = i;
child->context.offset = offset;
ts_tree_assign_parents(child);
}
offset = ts_length_add(offset, ts_tree_total_size(child));
}
}
static void ts_tree__set_children(TSTree *self, TSTree **children,
size_t child_count) {
self->children = children;
@ -44,9 +58,6 @@ static void ts_tree__set_children(TSTree *self, TSTree **children,
for (size_t i = 0; i < child_count; i++) {
TSTree *child = children[i];
ts_tree_retain(child);
child->context.parent = self;
child->context.index = i;
child->context.offset = ts_tree_total_size(self);
if (i == 0) {
self->padding = child->padding;

View file

@ -44,6 +44,7 @@ char *ts_tree_string(const TSTree *tree, const char **names,
bool include_anonymous);
TSLength ts_tree_total_size(const TSTree *tree);
void ts_tree_prepend_children(TSTree *, size_t, TSTree **);
void ts_tree_assign_parents(TSTree *);
void ts_tree_edit(TSTree *, TSInputEdit);
static inline bool ts_tree_is_extra(const TSTree *tree) {

View file

@ -1,55 +0,0 @@
#ifndef RUNTIME_TREE_VECTOR_H_
#define RUNTIME_TREE_VECTOR_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <string.h>
#include "./tree.h"
typedef struct {
TSTree **contents;
size_t capacity;
size_t size;
} TreeVector;
static inline TreeVector tree_vector_new(size_t size) {
return (TreeVector){
.contents = malloc(size * sizeof(TSTree *)), .capacity = size, .size = 0,
};
}
static inline void tree_vector_push(TreeVector *self, TSTree *tree) {
if (self->size == self->capacity) {
self->capacity += 4;
self->contents = realloc(self->contents, self->capacity * sizeof(TSTree *));
}
ts_tree_retain(tree);
self->contents[self->size++] = tree;
}
static inline void tree_vector_reverse(TreeVector *self) {
TSTree *swap;
size_t limit = self->size / 2;
for (size_t i = 0; i < limit; i++) {
swap = self->contents[i];
self->contents[i] = self->contents[self->size - 1 - i];
self->contents[self->size - 1 - i] = swap;
}
}
static inline TreeVector tree_vector_copy(TreeVector *self) {
return (TreeVector){
.contents = memcpy(malloc(self->capacity * sizeof(TSTree *)),
self->contents, self->size * sizeof(TSTree *)),
.capacity = self->capacity,
.size = self->size,
};
}
#ifdef __cplusplus
}
#endif
#endif // RUNTIME_TREE_VECTOR_H_

92
src/runtime/vector.h Normal file
View file

@ -0,0 +1,92 @@
#ifndef RUNTIME_VECTOR_H_
#define RUNTIME_VECTOR_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <string.h>
#include <stdlib.h>
#include <assert.h>
typedef struct {
void *contents;
size_t size;
size_t capacity;
size_t element_size;
} Vector;
static inline Vector vector_new(size_t element_size, size_t capacity) {
Vector result;
result.contents = malloc(capacity * element_size);
result.size = 0;
result.capacity = capacity;
result.element_size = element_size;
return result;
}
static inline void vector_delete(Vector *self) {
free(self->contents);
}
static inline void *vector_get(Vector *self, size_t index) {
assert(index < self->size);
return (void *)((char *)self->contents + index * self->element_size);
}
static inline void *vector_back(Vector *self) {
assert(self->size > 0);
return vector_get(self, self->size - 1);
}
static inline void vector_clear(Vector *self) {
self->size = 0;
}
static inline void vector_erase(Vector *self, size_t index) {
assert(index < self->size);
char *contents = (char *)self->contents;
memmove(contents + index * self->element_size,
contents + (index + 1) * self->element_size,
(self->size - index - 1) * self->element_size);
self->size--;
}
static inline void vector_push(Vector *self, void *entry) {
if (self->size == self->capacity) {
self->capacity += 4;
self->contents =
realloc(self->contents, self->capacity * self->element_size);
}
char *contents = (char *)self->contents;
memcpy(contents + (self->size * self->element_size), (char *)entry,
self->element_size);
self->size++;
}
static inline void vector_reverse(Vector *self) {
char swap[self->element_size];
char *contents = (char *)self->contents;
size_t limit = self->size / 2;
for (size_t i = 0; i < limit; i++) {
size_t offset = i * self->element_size;
size_t reverse_offset = (self->size - 1 - i) * self->element_size;
memcpy(&swap, contents + offset, self->element_size);
memcpy(contents + offset, contents + reverse_offset, self->element_size);
memcpy(contents + reverse_offset, &swap, self->element_size);
}
}
static inline Vector vector_copy(Vector *self) {
Vector copy = *self;
copy.contents = memcpy(malloc(self->capacity * self->element_size),
self->contents, self->size * self->element_size);
return copy;
}
#ifdef __cplusplus
}
#endif
#endif // RUNTIME_VECTOR_H_