Reuse nodes based on state matching, not sentential form validity

I think that state matching is the only correct strategy for incremental
node reuse that is compatible with the new error recovery algorithm. It's
also simpler than the sentential-form algorithm. With the compressed parse
tables, state matching shouldn't be too conservative of a test.
This commit is contained in:
Max Brunsfeld 2016-07-17 13:35:43 -07:00
parent 0ee1994078
commit 87ca3cb099
3 changed files with 137 additions and 160 deletions

View file

@ -173,7 +173,7 @@ describe("Document", [&]() {
AssertThat(debugger->messages, Contains("new_parse"));
AssertThat(debugger->messages, Contains("lookahead char:'['"));
AssertThat(debugger->messages, Contains("reduce sym:array, child_count:4, fragile:false"));
AssertThat(debugger->messages, Contains("reduce sym:array, child_count:4"));
AssertThat(debugger->messages, Contains("accept"));
});

View file

@ -231,7 +231,7 @@ describe("Parser", [&]() {
"(identifier) "
"(math_op (number) (member_access (identifier) (identifier))))))");
AssertThat(input->strings_read, Equals(vector<string>({ " abc.d);", "" })));
AssertThat(input->strings_read, Equals(vector<string>({ " + abc.d)", "" })));
});
});

View file

@ -157,8 +157,7 @@ static void ts_parser__pop_reusable_node(ReusableNode *reusable_node) {
}
static bool ts_parser__breakdown_reusable_node(ReusableNode *reusable_node) {
if (reusable_node->tree->symbol == ts_builtin_sym_error ||
reusable_node->tree->child_count == 0) {
if (reusable_node->tree->child_count == 0) {
return false;
} else {
reusable_node->tree = reusable_node->tree->children[0];
@ -166,6 +165,43 @@ static bool ts_parser__breakdown_reusable_node(ReusableNode *reusable_node) {
}
}
static bool ts_parser__breakdown_lookahead(TSParser *self, TSTree **lookahead, TSStateId state, ReusableNode *reusable_node) {
bool result = false;
while (reusable_node->tree->child_count > 0 &&
(reusable_node->tree->parse_state != state ||
reusable_node->tree->fragile_left ||
reusable_node->tree->fragile_right)) {
LOG("state_mismatch sym:%s", SYM_NAME(reusable_node->tree->symbol));
ts_parser__breakdown_reusable_node(reusable_node);
result = true;
}
if (result) {
LOG("lookahead sym:%s", SYM_NAME(reusable_node->tree->symbol));
ts_tree_release(*lookahead);
ts_tree_retain(*lookahead = reusable_node->tree);
}
return result;
}
static void ts_parser__pop_reusable_node_leaf(ReusableNode *reusable_node) {
while (reusable_node->tree->child_count > 0)
reusable_node->tree = reusable_node->tree->children[0];
ts_parser__pop_reusable_node(reusable_node);
}
static bool ts_parser__can_reuse(TSParser *self, TSStateId state, TSTree *tree,
TableEntry *table_entry) {
if (tree->first_leaf.lex_state == self->language->lex_states[state])
return true;
if (!table_entry->is_reusable)
return false;
if (!table_entry->depends_on_lookahead)
return true;
return tree->child_count > 1 && tree->error_size == 0;
}
static bool ts_parser__condense_stack(TSParser *self) {
bool result = false;
for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) {
@ -194,67 +230,6 @@ static bool ts_parser__condense_stack(TSParser *self) {
return result;
}
static bool ts_parser__can_reuse(TSParser *self, TSStateId state,
TableEntry *table_entry, TSTree *tree) {
if (tree->parse_state != state) {
if (ts_tree_is_fragile(tree)) {
LOG("cant_reuse_fragile sym:%s, size:%lu", SYM_NAME(tree->symbol),
tree->size.chars);
return false;
}
if (!table_entry->is_reusable) {
LOG("cant_reuse_ambiguous sym:%s, size:%lu", SYM_NAME(tree->symbol),
tree->size.chars);
return false;
}
if (table_entry->action_count == 0) {
LOG("cant_reuse_unexpected sym:%s, size:%lu", SYM_NAME(tree->symbol),
tree->size.chars);
return false;
}
TSParseAction action = table_entry->actions[table_entry->action_count - 1];
if (tree->extra != action.extra) {
LOG("cant_reuse_extra sym:%s, size:%lu", SYM_NAME(tree->symbol),
tree->size.chars);
return false;
}
TSStateId lex_state = self->language->lex_states[state];
if (tree->first_leaf.lex_state != lex_state) {
if (tree->child_count > 0) {
TableEntry leaf_entry;
ts_language_table_entry(self->language, state, tree->first_leaf.symbol,
&leaf_entry);
if (!leaf_entry.is_reusable) {
LOG("cant_reuse_first_leaf sym:%s, leaf_sym:%s, size:%lu",
SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol),
tree->size.chars);
return false;
}
if (tree->child_count == 1 && leaf_entry.depends_on_lookahead) {
LOG("cant_reuse_lookahead_dependent sym:%s, leaf_sym:%s, size:%lu",
SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol),
tree->size.chars);
return false;
}
} else if (table_entry->depends_on_lookahead) {
LOG("cant_reuse_lookahead_dependent sym:%s, leaf_sym:%s, size:%lu",
SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol),
tree->size.chars);
return false;
}
}
}
LOG("reuse sym:%s size:%lu", SYM_NAME(tree->symbol), tree->size.chars);
return true;
}
static TSTree *ts_parser__lex(TSParser *self, TSStateId parse_state,
bool error_mode) {
TSStateId state = self->language->lex_states[parse_state];
@ -283,9 +258,10 @@ static TSTree *ts_parser__lex(TSParser *self, TSStateId parse_state,
if (!result)
return NULL;
result->parse_state = parse_state;
result->first_leaf.lex_state = state;
}
result->first_leaf.lex_state = state;
return result;
}
@ -295,8 +271,7 @@ static void ts_parser__clear_cached_token(TSParser *self) {
}
static TSTree *ts_parser__get_lookahead(TSParser *self, StackVersion version,
ReusableNode *reusable_node,
bool *is_fresh) {
ReusableNode *reusable_node) {
TSLength position = ts_stack_top_position(self->stack, version);
while (reusable_node->tree) {
@ -313,17 +288,6 @@ static TSTree *ts_parser__get_lookahead(TSParser *self, StackVersion version,
continue;
}
if (reusable_node->tree->symbol == ts_builtin_sym_error) {
LOG("cant_reuse_error sym:%s, size:%lu",
SYM_NAME(reusable_node->tree->symbol),
reusable_node->tree->size.chars);
if (!ts_parser__breakdown_reusable_node(reusable_node)) {
ts_parser__pop_reusable_node(reusable_node);
CHECK(ts_parser__breakdown_top_of_stack(self, version));
}
continue;
}
if (reusable_node->tree->has_changes) {
LOG("cant_reuse_changed tree:%s, size:%lu",
SYM_NAME(reusable_node->tree->symbol),
@ -335,9 +299,19 @@ static TSTree *ts_parser__get_lookahead(TSParser *self, StackVersion version,
continue;
}
if (reusable_node->tree->symbol == ts_builtin_sym_error) {
LOG("cant_reuse_error tree:%s, size:%lu",
SYM_NAME(reusable_node->tree->symbol),
reusable_node->tree->size.chars);
if (!ts_parser__breakdown_reusable_node(reusable_node)) {
ts_parser__pop_reusable_node(reusable_node);
CHECK(ts_parser__breakdown_top_of_stack(self, version));
}
continue;
}
TSTree *result = reusable_node->tree;
ts_tree_retain(result);
*is_fresh = false;
return result;
}
@ -349,7 +323,6 @@ static TSTree *ts_parser__get_lookahead(TSParser *self, StackVersion version,
ts_lexer_reset(&self->lexer, position);
TSStateId parse_state = ts_stack_top_state(self->stack, version);
bool error_mode = parse_state == TS_STATE_ERROR;
*is_fresh = true;
return ts_parser__lex(self, parse_state, error_mode);
error:
@ -514,7 +487,7 @@ static Reduction ts_parser__reduce(TSParser *self, StackVersion version,
}
TSStateId state = ts_stack_top_state(self->stack, slice.version);
if (fragile || self->is_split || initial_version_count > 1) {
if (fragile || self->is_split || pop.slices.size > 1 || initial_version_count > 1) {
parent->fragile_left = true;
parent->fragile_right = true;
parent->parse_state = TS_TREE_STATE_NONE;
@ -699,13 +672,13 @@ static StackIterateAction ts_parser__error_repair_callback(
}
static RepairResult ts_parser__repair_error(TSParser *self, StackSlice slice,
TSTree *lookahead,
TSSymbol lookahead_symbol,
const TSParseAction *actions,
size_t action_count) {
LOG("repair_error");
ErrorRepairSession session = {
.parser = self,
.lookahead_symbol = lookahead->symbol,
.lookahead_symbol = lookahead_symbol,
.found_repair = false,
.trees_above_error = &slice.trees,
.tree_count_above_error = ts_tree_array_essential_count(&slice.trees),
@ -837,6 +810,7 @@ static bool ts_parser__accept(TSParser *self, StackVersion version) {
if (ts_parser__select_tree(self, self->finished_tree, root)) {
ts_tree_release(self->finished_tree);
assert(root->ref_count > 0);
self->finished_tree = root;
} else {
ts_tree_release(root);
@ -852,8 +826,7 @@ error:
return false;
}
static bool ts_parser__handle_error(TSParser *self, StackVersion version,
TSTree *lookahead) {
static bool ts_parser__handle_error(TSParser *self, StackVersion version) {
size_t previous_version_count = ts_stack_version_count(self->stack);
TSStateId state = ts_stack_top_state(self->stack, version);
@ -963,37 +936,59 @@ error:
return false;
}
typedef enum {
ConsumeFailed,
ConsumeInvalid,
ConsumeSucceeded,
} ConsumeResult;
static bool ts_parser__advance(TSParser *self, StackVersion version,
ReusableNode *reusable_node) {
bool validated_lookahead = false;
TSTree *lookahead = ts_parser__get_lookahead(self, version, reusable_node);
CHECK(lookahead);
static ConsumeResult ts_parser__consume_lookahead(TSParser *self,
StackVersion version,
TSTree *lookahead,
bool lookahead_is_fresh) {
for (;;) {
TSStateId state = ts_stack_top_state(self->stack, version);
TableEntry entry;
ts_language_table_entry(self->language, state, lookahead->symbol, &entry);
TableEntry table_entry;
ts_language_table_entry(self->language, state, lookahead->first_leaf.symbol,
&table_entry);
if (!lookahead_is_fresh &&
!ts_parser__can_reuse(self, state, &entry, lookahead))
return ConsumeInvalid;
if (!validated_lookahead) {
if (!ts_parser__can_reuse(self, state, lookahead, &table_entry)) {
if (lookahead == reusable_node->tree)
ts_parser__pop_reusable_node_leaf(reusable_node);
else
ts_parser__clear_cached_token(self);
LOG("lookahead sym:%s, size:%lu", SYM_NAME(lookahead->symbol),
lookahead->size.chars);
ts_tree_release(lookahead);
lookahead = ts_parser__get_lookahead(self, version, reusable_node);
CHECK(lookahead);
continue;
}
validated_lookahead = true;
LOG("lookahead sym:%s, size:%lu", SYM_NAME(lookahead->symbol),
lookahead->size.chars);
}
bool reduction_stopped_at_error = false;
StackVersion last_reduction_version = STACK_VERSION_NONE;
for (size_t i = 0; i < entry.action_count; i++) {
TSParseAction action = entry.actions[i];
for (size_t i = 0; i < table_entry.action_count; i++) {
TSParseAction action = table_entry.actions[i];
switch (action.type) {
case TSParseActionTypeShift: {
if (lookahead->child_count > 0) {
if (ts_parser__breakdown_lookahead(self, &lookahead, state, reusable_node)) {
if (!ts_parser__can_reuse(self, state, lookahead, &table_entry)) {
ts_parser__pop_reusable_node(reusable_node);
ts_tree_release(lookahead);
lookahead = ts_parser__get_lookahead(self, version, reusable_node);
CHECK(lookahead);
}
}
action = *ts_language_last_action(self->language, state,
lookahead->symbol);
}
TSStateId next_state;
if (action.extra) {
next_state = state;
@ -1005,7 +1000,12 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self,
CHECK(ts_parser__shift(self, version, next_state, lookahead,
action.extra));
return ConsumeSucceeded;
if (lookahead == reusable_node->tree)
ts_parser__pop_reusable_node(reusable_node);
ts_tree_release(lookahead);
return true;
}
case TSParseActionTypeReduce: {
@ -1015,14 +1015,14 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self,
if (action.extra) {
LOG("reduce_extra");
} else {
LOG("reduce sym:%s, child_count:%u, fragile:%s",
SYM_NAME(action.symbol), action.child_count,
BOOL_STRING(action.fragile));
LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.symbol),
action.child_count);
}
Reduction reduction =
ts_parser__reduce(self, version, action.symbol, action.child_count,
action.extra, action.fragile, true);
action.extra, (i < table_entry.action_count - 1),
true);
switch (reduction.status) {
case ReduceFailed:
@ -1032,9 +1032,9 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self,
break;
case ReduceStoppedAtError: {
reduction_stopped_at_error = true;
switch (ts_parser__repair_error(self, reduction.slice, lookahead,
entry.actions,
entry.action_count)) {
switch (ts_parser__repair_error(
self, reduction.slice, lookahead->first_leaf.symbol,
table_entry.actions, table_entry.action_count)) {
case RepairFailed:
goto error;
case RepairNoneFound:
@ -1055,58 +1055,35 @@ static ConsumeResult ts_parser__consume_lookahead(TSParser *self,
LOG("accept");
CHECK(ts_parser__accept(self, version));
return ConsumeSucceeded;
ts_tree_release(lookahead);
return true;
}
case TSParseActionTypeRecover: {
while (lookahead->child_count > 0) {
ts_parser__breakdown_reusable_node(reusable_node);
ts_tree_release(lookahead);
lookahead = reusable_node->tree;
ts_tree_retain(lookahead);
}
action =
*ts_language_last_action(self->language, state, lookahead->symbol);
CHECK(ts_parser__recover(self, version, action.to_state, lookahead));
return ConsumeSucceeded;
if (lookahead == reusable_node->tree)
ts_parser__pop_reusable_node(reusable_node);
ts_tree_release(lookahead);
return true;
}
}
}
if (last_reduction_version == STACK_VERSION_NONE)
return ConsumeInvalid;
ts_stack_renumber_version(self->stack, last_reduction_version, version);
LOG_STACK();
}
error:
return ConsumeFailed;
}
static bool ts_parser__advance(TSParser *self, StackVersion version,
ReusableNode *reusable_node) {
TSTree *lookahead = NULL;
for (;;) {
bool lookahead_is_fresh;
CHECK(lookahead = ts_parser__get_lookahead(self, version, reusable_node,
&lookahead_is_fresh));
switch (ts_parser__consume_lookahead(self, version, lookahead,
lookahead_is_fresh)) {
case ConsumeFailed:
goto error;
case ConsumeSucceeded:
if (lookahead == reusable_node->tree)
ts_parser__pop_reusable_node(reusable_node);
ts_tree_release(lookahead);
return true;
case ConsumeInvalid:
break;
}
if (!lookahead_is_fresh) {
if (lookahead == reusable_node->tree) {
if (!ts_parser__breakdown_reusable_node(reusable_node))
ts_parser__pop_reusable_node(reusable_node);
} else {
ts_parser__clear_cached_token(self);
}
ts_tree_release(lookahead);
if (last_reduction_version != STACK_VERSION_NONE) {
ts_stack_renumber_version(self->stack, last_reduction_version, version);
LOG_STACK();
continue;
}
@ -1114,17 +1091,17 @@ static bool ts_parser__advance(TSParser *self, StackVersion version,
case BreakdownFailed:
goto error;
case BreakdownPerformed:
ts_tree_release(lookahead);
continue;
case BreakdownAborted:
break;
}
CHECK(ts_parser__handle_error(self, version, lookahead));
ts_tree_release(lookahead);
CHECK(ts_parser__handle_error(self, version));
if (ts_stack_is_halted(self->stack, version))
if (ts_stack_is_halted(self->stack, version)) {
ts_tree_release(lookahead);
return true;
}
}
error: