Don't reuse nodes within ambiguities

This commit is contained in:
Max Brunsfeld 2018-05-18 18:04:42 -07:00
parent 95fbc23fd6
commit 39c3de3fc8
2 changed files with 91 additions and 90 deletions

View file

@ -57,7 +57,6 @@ struct TSParser {
TokenCache token_cache;
ReusableNode reusable_node;
void *external_scanner_payload;
bool in_ambiguity;
FILE *dot_graph_file;
bool halt_on_error;
unsigned accept_count;
@ -254,6 +253,24 @@ static void ts_parser__restore_external_scanner(TSParser *self, const Subtree *e
}
}
static bool ts_parser__can_reuse_first_leaf(TSParser *self, TSStateId state, const Subtree *tree,
TableEntry *table_entry) {
TSLexMode current_lex_mode = self->language->lex_modes[state];
// If the token was created in a state with the same set of lookaheads, it is reusable.
if (tree->first_leaf.lex_mode.lex_state == current_lex_mode.lex_state &&
tree->first_leaf.lex_mode.external_lex_state == current_lex_mode.external_lex_state &&
(tree->first_leaf.symbol != self->language->keyword_capture_token ||
tree->parse_state == state)) return true;
// Empty tokens are not reusable in states with different lookaheads.
if (tree->size.bytes == 0 && tree->symbol != ts_builtin_sym_end) return false;
// If the current state allows external tokens or other tokens that conflict with this
// token, this token is not reusable.
return current_lex_mode.external_lex_state == 0 && table_entry->is_reusable;
}
static const Subtree *ts_parser__lex(TSParser *self, StackVersion version, TSStateId parse_state) {
Length start_position = ts_stack_position(self->stack, version);
const Subtree *external_token = ts_stack_last_external_token(self->stack, version);
@ -402,20 +419,27 @@ static const Subtree *ts_parser__lex(TSParser *self, StackVersion version, TSSta
return result;
}
static const Subtree *ts_parser__get_cached_token(TSParser *self, size_t byte_index,
const Subtree *last_external_token) {
static const Subtree *ts_parser__get_cached_token(TSParser *self, TSStateId state,
size_t position,
const Subtree *last_external_token,
TableEntry *table_entry) {
TokenCache *cache = &self->token_cache;
if (cache->token &&
cache->byte_index == byte_index &&
ts_subtree_external_scanner_state_eq(cache->last_external_token, last_external_token)) {
return cache->token;
} else {
return NULL;
if (
cache->token && cache->byte_index == position &&
ts_subtree_external_scanner_state_eq(cache->last_external_token, last_external_token)
) {
ts_language_table_entry(self->language, state, cache->token->first_leaf.symbol, table_entry);
if (ts_parser__can_reuse_first_leaf(self, state, cache->token, table_entry)) {
ts_subtree_retain(cache->token);
return cache->token;
}
}
return NULL;
}
static void ts_parser__set_cached_token(TSParser *self, size_t byte_index,
const Subtree *last_external_token, const Subtree *token) {
const Subtree *last_external_token,
const Subtree *token) {
TokenCache *cache = &self->token_cache;
if (token) ts_subtree_retain(token);
if (last_external_token) ts_subtree_retain(last_external_token);
@ -426,47 +450,27 @@ static void ts_parser__set_cached_token(TSParser *self, size_t byte_index,
cache->last_external_token = last_external_token;
}
static bool ts_parser__can_reuse_first_leaf(TSParser *self, TSStateId state, const Subtree *tree,
static const Subtree *ts_parser__reuse_node(TSParser *self, StackVersion version,
TSStateId *state, uint32_t position,
const Subtree *last_external_token,
TableEntry *table_entry) {
TSLexMode current_lex_mode = self->language->lex_modes[state];
// If the token was created in a state with the same set of lookaheads, it is reusable.
if (tree->first_leaf.lex_mode.lex_state == current_lex_mode.lex_state &&
tree->first_leaf.lex_mode.external_lex_state == current_lex_mode.external_lex_state &&
(tree->first_leaf.symbol != self->language->keyword_capture_token ||
tree->parse_state == state)) return true;
// Empty tokens are not reusable in states with different lookaheads.
if (tree->size.bytes == 0 && tree->symbol != ts_builtin_sym_end) return false;
// If the current state allows external tokens or other tokens that conflict with this
// token, this token is not reusable.
return current_lex_mode.external_lex_state == 0 && table_entry->is_reusable;
}
static const Subtree *ts_parser__get_lookahead(TSParser *self, StackVersion version,
TSStateId *state, ReusableNode *reusable_node,
TableEntry *table_entry) {
Length position = ts_stack_position(self->stack, version);
const Subtree *last_external_token = ts_stack_last_external_token(self->stack, version);
const Subtree *result;
while ((result = reusable_node_tree(reusable_node))) {
uint32_t byte_offset = reusable_node_byte_offset(reusable_node);
if (byte_offset > position.bytes) {
while ((result = reusable_node_tree(&self->reusable_node))) {
uint32_t byte_offset = reusable_node_byte_offset(&self->reusable_node);
if (byte_offset > position) {
LOG("before_reusable_node symbol:%s", SYM_NAME(result->symbol));
break;
}
if (byte_offset < position.bytes) {
if (byte_offset < position) {
LOG("past_reusable_node symbol:%s", SYM_NAME(result->symbol));
reusable_node_advance(reusable_node);
reusable_node_advance(&self->reusable_node);
continue;
}
if (!ts_subtree_external_scanner_state_eq(reusable_node->last_external_token, last_external_token)) {
if (!ts_subtree_external_scanner_state_eq(self->reusable_node.last_external_token, last_external_token)) {
LOG("reusable_node_has_different_external_scanner_state symbol:%s", SYM_NAME(result->symbol));
reusable_node_advance(reusable_node);
reusable_node_advance(&self->reusable_node);
continue;
}
@ -479,14 +483,12 @@ static const Subtree *ts_parser__get_lookahead(TSParser *self, StackVersion vers
reason = "is_missing";
} else if (result->fragile_left || result->fragile_right) {
reason = "is_fragile";
} else if (self->in_ambiguity && result->children.size) {
reason = "in_ambiguity";
}
if (reason) {
LOG("cant_reuse_node_%s tree:%s", reason, SYM_NAME(result->symbol));
if (!reusable_node_descend(reusable_node)) {
reusable_node_advance(reusable_node);
if (!reusable_node_descend(&self->reusable_node)) {
reusable_node_advance(&self->reusable_node);
ts_parser__breakdown_top_of_stack(self, version);
*state = ts_stack_state(self->stack, version);
}
@ -500,7 +502,7 @@ static const Subtree *ts_parser__get_lookahead(TSParser *self, StackVersion vers
SYM_NAME(result->symbol),
SYM_NAME(result->first_leaf.symbol)
);
reusable_node_advance_past_leaf(reusable_node);
reusable_node_advance_past_leaf(&self->reusable_node);
break;
}
@ -509,18 +511,7 @@ static const Subtree *ts_parser__get_lookahead(TSParser *self, StackVersion vers
return result;
}
if ((result = ts_parser__get_cached_token(self, position.bytes, last_external_token))) {
ts_language_table_entry(self->language, *state, result->first_leaf.symbol, table_entry);
if (ts_parser__can_reuse_first_leaf(self, *state, result, table_entry)) {
ts_subtree_retain(result);
return result;
}
}
result = ts_parser__lex(self, version, *state);
ts_parser__set_cached_token(self, position.bytes, last_external_token, result);
ts_language_table_entry(self->language, *state, result->symbol, table_entry);
return result;
return NULL;
}
static bool ts_parser__select_tree(TSParser *self, const Subtree *left, const Subtree *right) {
@ -654,7 +645,7 @@ static StackSliceArray ts_parser__reduce(TSParser *self, StackVersion version, T
TSStateId state = ts_stack_state(self->stack, slice.version);
TSStateId next_state = ts_language_next_state(self->language, state, symbol);
if (fragile || self->in_ambiguity || pop.size > 1 || initial_version_count > 1) {
if (fragile || pop.size > 1 || initial_version_count > 1) {
parent->fragile_left = true;
parent->fragile_right = true;
parent->parse_state = TS_TREE_STATE_NONE;
@ -712,7 +703,6 @@ static void ts_parser__start(TSParser *self, TSInput input, const Subtree *previ
reusable_node_reset(&self->reusable_node, previous_tree);
self->finished_tree = NULL;
self->accept_count = 0;
self->in_ambiguity = false;
}
static void ts_parser__accept(TSParser *self, StackVersion version, const Subtree *lookahead) {
@ -1107,12 +1097,36 @@ static void ts_parser__recover(TSParser *self, StackVersion version, const Subtr
}
}
static void ts_parser__advance(TSParser *self, StackVersion version, ReusableNode *reusable_node) {
static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_node_reuse) {
TSStateId state = ts_stack_state(self->stack, version);
uint32_t position = ts_stack_position(self->stack, version).bytes;
const Subtree *last_external_token = ts_stack_last_external_token(self->stack, version);
bool did_reuse = true;
const Subtree *lookahead = NULL;
TableEntry table_entry;
const Subtree *lookahead = ts_parser__get_lookahead(
self, version, &state, reusable_node, &table_entry
);
// If possible, reuse a node from the previous syntax tree.
if (allow_node_reuse) {
lookahead = ts_parser__reuse_node(
self, version, &state, position, last_external_token, &table_entry
);
}
// Otherwise, try to reuse the token previously returned by the lexer.
if (!lookahead) {
did_reuse = false;
lookahead = ts_parser__get_cached_token(
self, state, position, last_external_token, &table_entry
);
}
// Otherwise, re-run the lexer.
if (!lookahead) {
lookahead = ts_parser__lex(self, version, state);
ts_parser__set_cached_token(self, position, last_external_token, lookahead);
ts_language_table_entry(self->language, state, lookahead->symbol, &table_entry);
}
for (;;) {
StackVersion last_reduction_version = STACK_VERSION_NONE;
@ -1137,14 +1151,12 @@ static void ts_parser__advance(TSParser *self, StackVersion version, ReusableNod
}
if (lookahead->children.size > 0) {
ts_parser__breakdown_lookahead(self, &lookahead, state, reusable_node);
ts_parser__breakdown_lookahead(self, &lookahead, state, &self->reusable_node);
next_state = ts_language_next_state(self->language, state, lookahead->symbol);
}
ts_parser__shift(self, version, next_state, lookahead, action.params.extra);
if (lookahead == reusable_node_tree(reusable_node)) {
reusable_node_advance(reusable_node);
}
if (did_reuse) reusable_node_advance(&self->reusable_node);
return;
}
@ -1168,13 +1180,12 @@ static void ts_parser__advance(TSParser *self, StackVersion version, ReusableNod
}
case TSParseActionTypeRecover: {
while (lookahead->children.size > 0) {
ts_parser__breakdown_lookahead(self, &lookahead, state, reusable_node);
if (lookahead->children.size > 0) {
ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node);
}
ts_parser__recover(self, version, lookahead);
if (lookahead == reusable_node_tree(reusable_node)) {
reusable_node_advance(reusable_node);
}
if (did_reuse) reusable_node_advance(&self->reusable_node);
return;
}
}
@ -1355,15 +1366,13 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) {
if (!self->language) return NULL;
ts_parser__start(self, input, old_tree ? old_tree->root : NULL);
StackVersion version = STACK_VERSION_NONE;
uint32_t position = 0, last_position = 0;
ReusableNode reusable_node = reusable_node_new();
reusable_node_assign(&reusable_node, &self->reusable_node);
uint32_t position = 0, last_position = 0, version_count = 0;
do {
for (version = 0; version < ts_stack_version_count(self->stack); version++) {
reusable_node_assign(&reusable_node, &self->reusable_node);
for (StackVersion version = 0;
version_count = ts_stack_version_count(self->stack), version < version_count;
version++) {
bool allow_node_reuse = version_count == 1;
while (ts_stack_is_active(self->stack, version)) {
LOG("process version:%d, version_count:%u, state:%d, row:%u, col:%u",
version, ts_stack_version_count(self->stack),
@ -1371,7 +1380,7 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) {
ts_stack_position(self->stack, version).extent.row,
ts_stack_position(self->stack, version).extent.column);
ts_parser__advance(self, version, &reusable_node);
ts_parser__advance(self, version, allow_node_reuse);
LOG_STACK();
position = ts_stack_position(self->stack, version).bytes;
@ -1382,8 +1391,6 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) {
}
}
reusable_node_assign(&self->reusable_node, &reusable_node);
unsigned min_error_cost = ts_parser__condense_stack(self);
if (self->finished_tree && self->finished_tree->error_cost < min_error_cost) {
break;
@ -1391,11 +1398,8 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) {
ts_parser__halt_parse(self);
break;
}
} while (version_count != 0);
self->in_ambiguity = version > 1;
} while (version != 0);
reusable_node_delete(&reusable_node);
ts_stack_clear(self->stack);
ts_parser__set_cached_token(self, 0, NULL, NULL);
ts_subtree_balance(self->finished_tree, &self->tree_pool, self->language);

View file

@ -22,6 +22,7 @@ static inline void reusable_node_reset(ReusableNode *self, const Subtree *tree)
.child_index = 0,
.byte_offset = 0,
}));
self->last_external_token = NULL;
}
static inline const Subtree *reusable_node_tree(ReusableNode *self) {
@ -40,10 +41,6 @@ static inline void reusable_node_delete(ReusableNode *self) {
array_delete(&self->stack);
}
static inline void reusable_node_assign(ReusableNode *self, const ReusableNode *other) {
array_assign(&self->stack, &other->stack);
}
static inline void reusable_node_advance(ReusableNode *self) {
StackEntry last_entry = *array_back(&self->stack);
uint32_t byte_offset = last_entry.byte_offset + ts_subtree_total_bytes(last_entry.tree);