Allow lookahead to be broken down further after performing reductions

This commit is contained in:
Max Brunsfeld 2016-07-01 15:08:19 -07:00
parent 0e2bbbd7ee
commit c3a242740b
3 changed files with 164 additions and 98 deletions

View file

@ -49,11 +49,6 @@
static const unsigned ERROR_COST_THRESHOLD = 3;
typedef struct {
TSTree *tree;
size_t char_index;
} ReusableNode;
typedef struct {
TSParser *parser;
TSSymbol lookahead_symbol;
@ -112,8 +107,6 @@ static BreakdownResult ts_parser__breakdown_top_of_stack(TSParser *self,
TSStateId state = ts_stack_top_state(self->stack, slice.version);
TSTree *parent = *array_front(&slice.trees);
LOG("breakdown_top_of_stack tree:%s", SYM_NAME(parent->symbol));
for (size_t j = 0; j < parent->child_count; j++) {
TSTree *child = parent->children[j];
pending = child->child_count > 0;
@ -136,6 +129,9 @@ static BreakdownResult ts_parser__breakdown_top_of_stack(TSParser *self,
CHECK(ts_parser__push(self, slice.version, tree, state));
}
LOG("breakdown_top_of_stack tree:%s", SYM_NAME(parent->symbol));
LOG_STACK();
ts_tree_release(parent);
array_delete(&slice.trees);
}
@ -197,41 +193,31 @@ static bool ts_parser__condense_stack(TSParser *self) {
return result;
}
static bool ts_parser__can_reuse(TSParser *self, StackVersion version,
TSTree *tree) {
if (tree->symbol == ts_builtin_sym_error) {
LOG("cant_reuse_error tree:%s", SYM_NAME(tree->symbol));
return false;
}
if (tree->has_changes) {
LOG("cant_reuse_changed tree:%s", SYM_NAME(tree->symbol));
return false;
}
TSStateId state = ts_stack_top_state(self->stack, version);
static bool ts_parser__can_reuse(TSParser *self, TSStateId state,
TableEntry *table_entry, TSTree *tree) {
if (tree->parse_state != state) {
if (ts_tree_is_fragile(tree)) {
LOG("cant_reuse_fragile sym:%s", SYM_NAME(tree->symbol));
LOG("cant_reuse_fragile sym:%s, size:%lu", SYM_NAME(tree->symbol),
tree->size.chars);
return false;
}
TableEntry entry;
ts_language_table_entry(self->language, state, tree->symbol, &entry);
if (!entry.is_reusable) {
LOG("cant_reuse_ambiguous sym:%s", SYM_NAME(tree->symbol));
if (!table_entry->is_reusable) {
LOG("cant_reuse_ambiguous sym:%s, size:%lu", SYM_NAME(tree->symbol),
tree->size.chars);
return false;
}
if (entry.action_count == 0) {
LOG("cant_reuse_unexpected sym:%s", SYM_NAME(tree->symbol));
if (table_entry->action_count == 0) {
LOG("cant_reuse_unexpected sym:%s, size:%lu", SYM_NAME(tree->symbol),
tree->size.chars);
return false;
}
TSParseAction action = entry.actions[entry.action_count - 1];
TSParseAction action = table_entry->actions[table_entry->action_count - 1];
if (tree->extra != action.extra) {
LOG("cant_reuse_extra sym:%s", SYM_NAME(tree->symbol));
LOG("cant_reuse_extra sym:%s, size:%lu", SYM_NAME(tree->symbol),
tree->size.chars);
return false;
}
@ -243,24 +229,28 @@ static bool ts_parser__can_reuse(TSParser *self, StackVersion version,
&leaf_entry);
if (!leaf_entry.is_reusable) {
LOG("cant_reuse_first_leaf sym:%s, leaf_sym:%s",
SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol));
LOG("cant_reuse_first_leaf sym:%s, leaf_sym:%s, size:%lu",
SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol),
tree->size.chars);
return false;
}
if (tree->child_count == 1 && leaf_entry.depends_on_lookahead) {
LOG("cant_reuse_lookahead_dependent sym:%s, leaf_sym:%s",
SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol));
LOG("cant_reuse_lookahead_dependent sym:%s, leaf_sym:%s, size:%lu",
SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol),
tree->size.chars);
return false;
}
} else if (entry.depends_on_lookahead) {
LOG("cant_reuse_lookahead_dependent sym:%s, leaf_sym:%s",
SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol));
} else if (table_entry->depends_on_lookahead) {
LOG("cant_reuse_lookahead_dependent sym:%s, leaf_sym:%s, size:%lu",
SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol),
tree->size.chars);
return false;
}
}
}
LOG("reuse sym:%s size:%lu", SYM_NAME(tree->symbol), tree->size.chars);
return true;
}
@ -298,22 +288,45 @@ static TSTree *ts_parser__lex(TSParser *self, TSStateId parse_state,
return result;
}
static void ts_parser__clear_cached_token(TSParser *self) {
ts_tree_release(self->cached_token);
self->cached_token = NULL;
}
static TSTree *ts_parser__get_lookahead(TSParser *self, StackVersion version,
ReusableNode *reusable_node) {
ReusableNode *reusable_node,
bool *is_fresh) {
TSLength position = ts_stack_top_position(self->stack, version);
while (reusable_node->tree) {
if (reusable_node->char_index > position.chars) {
LOG("before_reusable sym:%s, pos:%lu",
SYM_NAME(reusable_node->tree->symbol), reusable_node->char_index);
break;
}
if (reusable_node->char_index < position.chars) {
LOG("past_reusable sym:%s", SYM_NAME(reusable_node->tree->symbol));
LOG("past_reusable sym:%s, pos:%lu",
SYM_NAME(reusable_node->tree->symbol), reusable_node->char_index);
ts_parser__pop_reusable_node(reusable_node);
continue;
}
if (!ts_parser__can_reuse(self, version, reusable_node->tree)) {
if (reusable_node->tree->symbol == ts_builtin_sym_error) {
LOG("cant_reuse_error sym:%s, size:%lu",
SYM_NAME(reusable_node->tree->symbol),
reusable_node->tree->size.chars);
if (!ts_parser__breakdown_reusable_node(reusable_node)) {
ts_parser__pop_reusable_node(reusable_node);
CHECK(ts_parser__breakdown_top_of_stack(self, version));
}
continue;
}
if (reusable_node->tree->has_changes) {
LOG("cant_reuse_changed tree:%s, size:%lu",
SYM_NAME(reusable_node->tree->symbol),
reusable_node->tree->size.chars);
if (!ts_parser__breakdown_reusable_node(reusable_node)) {
ts_parser__pop_reusable_node(reusable_node);
CHECK(ts_parser__breakdown_top_of_stack(self, version));
@ -322,17 +335,20 @@ static TSTree *ts_parser__get_lookahead(TSParser *self, StackVersion version,
}
TSTree *result = reusable_node->tree;
TSLength size = ts_tree_total_size(result);
LOG("reuse sym:%s size:%lu extra:%d", SYM_NAME(result->symbol), size.chars,
result->extra);
ts_parser__pop_reusable_node(reusable_node);
ts_tree_retain(result);
*is_fresh = false;
return result;
}
if (self->cached_token && position.chars == self->cached_token_char_index) {
ts_tree_retain(self->cached_token);
return self->cached_token;
}
ts_lexer_reset(&self->lexer, position);
TSStateId parse_state = ts_stack_top_state(self->stack, version);
bool error_mode = parse_state == TS_STATE_ERROR;
*is_fresh = true;
return ts_parser__lex(self, parse_state, error_mode);
error:
@ -784,7 +800,8 @@ static void ts_parser__start(TSParser *self, TSInput input,
ts_lexer_set_input(&self->lexer, input);
ts_stack_clear(self->stack);
self->reusable_node = (ReusableNode){ previous_tree, 0 };
self->cached_token = NULL;
self->finished_tree = NULL;
}
@ -835,8 +852,9 @@ error:
}
static bool ts_parser__handle_error(TSParser *self, StackVersion version,
TSStateId state, TSTree *lookahead) {
TSTree *lookahead) {
size_t previous_version_count = ts_stack_version_count(self->stack);
TSStateId state = ts_stack_top_state(self->stack, version);
unsigned error_cost = ts_stack_error_cost(self->stack, version);
unsigned error_depth = ts_stack_error_depth(self->stack, version) + 1;
@ -944,19 +962,34 @@ error:
return false;
}
static bool ts_parser__consume_lookahead(TSParser *self, StackVersion version,
TSTree *lookahead) {
typedef enum {
ConsumeFailed,
ConsumeInvalid,
ConsumeSucceeded,
} ConsumeResult;
static ConsumeResult ts_parser__consume_lookahead(TSParser *self,
StackVersion version,
TSTree *lookahead,
bool lookahead_is_fresh) {
for (;;) {
TSStateId state = ts_stack_top_state(self->stack, version);
TableEntry entry;
ts_language_table_entry(self->language, state, lookahead->symbol, &entry);
if (!lookahead_is_fresh &&
!ts_parser__can_reuse(self, state, &entry, lookahead))
return ConsumeInvalid;
LOG("lookahead sym:%s, size:%lu", SYM_NAME(lookahead->symbol),
lookahead->size.chars);
bool reduction_stopped_at_error = false;
StackVersion last_reduction_version = STACK_VERSION_NONE;
size_t action_count;
const TSParseAction *actions = ts_language_actions(
self->language, state, lookahead->symbol, &action_count);
for (size_t i = 0; i < action_count; i++) {
TSParseAction action = actions[i];
for (size_t i = 0; i < entry.action_count; i++) {
TSParseAction action = entry.actions[i];
switch (action.type) {
case TSParseActionTypeShift: {
@ -971,7 +1004,7 @@ static bool ts_parser__consume_lookahead(TSParser *self, StackVersion version,
CHECK(ts_parser__shift(self, version, next_state, lookahead,
action.extra));
return true;
return ConsumeSucceeded;
}
case TSParseActionTypeReduce: {
@ -999,7 +1032,8 @@ static bool ts_parser__consume_lookahead(TSParser *self, StackVersion version,
case ReduceStoppedAtError: {
reduction_stopped_at_error = true;
switch (ts_parser__repair_error(self, reduction.slice, lookahead,
actions, action_count)) {
entry.actions,
entry.action_count)) {
case RepairFailed:
goto error;
case RepairNoneFound:
@ -1020,20 +1054,58 @@ static bool ts_parser__consume_lookahead(TSParser *self, StackVersion version,
LOG("accept");
CHECK(ts_parser__accept(self, version));
return true;
return ConsumeSucceeded;
}
case TSParseActionTypeRecover: {
CHECK(ts_parser__recover(self, version, action.to_state, lookahead));
return true;
return ConsumeSucceeded;
}
}
LOG_STACK();
}
if (last_reduction_version != STACK_VERSION_NONE) {
ts_stack_renumber_version(self->stack, last_reduction_version, version);
if (last_reduction_version == STACK_VERSION_NONE)
return ConsumeInvalid;
ts_stack_renumber_version(self->stack, last_reduction_version, version);
LOG_STACK();
}
error:
return ConsumeFailed;
}
static bool ts_parser__advance(TSParser *self, StackVersion version,
ReusableNode *reusable_node) {
TSTree *lookahead = NULL;
for (;;) {
bool lookahead_is_fresh;
CHECK(lookahead = ts_parser__get_lookahead(self, version, reusable_node,
&lookahead_is_fresh));
switch (ts_parser__consume_lookahead(self, version, lookahead,
lookahead_is_fresh)) {
case ConsumeFailed:
goto error;
case ConsumeSucceeded:
if (lookahead == reusable_node->tree)
ts_parser__pop_reusable_node(reusable_node);
ts_tree_release(lookahead);
return true;
case ConsumeInvalid:
break;
}
if (!lookahead_is_fresh) {
if (lookahead == reusable_node->tree) {
if (!ts_parser__breakdown_reusable_node(reusable_node))
ts_parser__pop_reusable_node(reusable_node);
} else {
ts_parser__clear_cached_token(self);
}
ts_tree_release(lookahead);
continue;
}
@ -1041,15 +1113,22 @@ static bool ts_parser__consume_lookahead(TSParser *self, StackVersion version,
case BreakdownFailed:
goto error;
case BreakdownPerformed:
break;
ts_tree_release(lookahead);
continue;
case BreakdownAborted:
CHECK(ts_parser__handle_error(self, version, state, lookahead));
if (ts_stack_is_halted(self->stack, version))
return true;
break;
}
CHECK(ts_parser__handle_error(self, version, lookahead));
ts_tree_release(lookahead);
if (ts_stack_is_halted(self->stack, version))
return true;
}
error:
if (lookahead)
ts_tree_release(lookahead);
return false;
}
@ -1087,16 +1166,14 @@ void ts_parser_destroy(TSParser *self) {
TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *old_tree) {
ts_parser__start(self, input, old_tree);
StackVersion version = 0;
size_t last_position = 0, position = 0;
ReusableNode reusable_node, current_reusable_node = { old_tree, 0 };
for (;;) {
TSTree *lookahead = NULL;
size_t lookahead_position = 0;
StackVersion version = STACK_VERSION_NONE;
size_t position = 0, last_position = 0;
ReusableNode reusable_node;
do {
for (version = 0; version < ts_stack_version_count(self->stack); version++) {
reusable_node = current_reusable_node;
reusable_node = self->reusable_node;
last_position = position;
while (!ts_stack_is_halted(self->stack, version)) {
@ -1111,44 +1188,25 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *old_tree) {
ts_stack_top_position(self->stack, version).rows + 1,
ts_stack_top_position(self->stack, version).columns + 1);
if (!lookahead || (position != lookahead_position) ||
!ts_parser__can_reuse(self, version, lookahead)) {
ts_tree_release(lookahead);
lookahead = ts_parser__get_lookahead(self, version, &reusable_node);
lookahead_position = position;
CHECK(lookahead);
}
LOG("lookahead sym:%s, size:%lu", SYM_NAME(lookahead->symbol),
ts_tree_total_chars(lookahead));
if (!ts_parser__consume_lookahead(self, version, lookahead)) {
ts_tree_release(lookahead);
goto error;
}
CHECK(ts_parser__advance(self, version, &reusable_node));
LOG_STACK();
}
}
current_reusable_node = reusable_node;
self->reusable_node = reusable_node;
if (ts_parser__condense_stack(self)) {
LOG("condense");
LOG_STACK();
}
ts_tree_release(lookahead);
if (version == 0)
break;
else
self->is_split = (version > 1);
}
self->is_split = (version > 1);
} while (version != 0);
LOG("done");
LOG_TREE();
ts_stack_clear(self->stack);
ts_parser__clear_cached_token(self);
ts_tree_assign_parents(self->finished_tree);
return self->finished_tree;