Allow stack versions to be temporarily paused

This way, when detecting an error, we can defer the decision about
whether to bail or recover until all stack versions are processed.
This commit is contained in:
Max Brunsfeld 2018-04-02 09:47:01 -07:00
parent 1109a565fc
commit e59558c83b
4 changed files with 206 additions and 146 deletions

View file

@ -171,6 +171,18 @@ static ErrorComparison parser__compare_versions(Parser *self, ErrorStatus a, Err
return ErrorComparisonNone;
}
static ErrorStatus parser__version_status(Parser *self, StackVersion version) {
unsigned cost = ts_stack_error_cost(self->stack, version);
bool is_paused = ts_stack_is_paused(self->stack, version);
if (is_paused) cost += ERROR_COST_PER_SKIPPED_TREE;
return (ErrorStatus) {
.cost = cost,
.push_count = ts_stack_push_count(self->stack, version),
.dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version),
.is_in_error = is_paused || ts_stack_state(self->stack, version) == ERROR_STATE
};
}
static bool parser__better_version_exists(Parser *self, StackVersion version,
bool is_in_error, unsigned cost) {
if (self->finished_tree && self->finished_tree->error_cost <= cost) return true;
@ -185,14 +197,9 @@ static bool parser__better_version_exists(Parser *self, StackVersion version,
for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) {
if (i == version ||
ts_stack_is_halted(self->stack, i) ||
!ts_stack_is_active(self->stack, i) ||
ts_stack_position(self->stack, i).bytes < position.bytes) continue;
ErrorStatus status_i = {
.cost = ts_stack_error_cost(self->stack, i),
.is_in_error = ts_stack_state(self->stack, i) == ERROR_STATE,
.dynamic_precedence = ts_stack_dynamic_precedence(self->stack, i),
.push_count = ts_stack_push_count(self->stack, i)
};
ErrorStatus status_i = parser__version_status(self, i);
switch (parser__compare_versions(self, status, status_i)) {
case ErrorComparisonTakeRight:
return true;
@ -206,83 +213,6 @@ static bool parser__better_version_exists(Parser *self, StackVersion version,
return false;
}
static unsigned parser__condense_stack(Parser *self) {
bool made_changes = false;
unsigned min_error_cost = UINT_MAX;
for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) {
if (ts_stack_is_halted(self->stack, i)) {
ts_stack_remove_version(self->stack, i);
i--;
continue;
}
ErrorStatus status_i = {
.cost = ts_stack_error_cost(self->stack, i),
.push_count = ts_stack_push_count(self->stack, i),
.dynamic_precedence = ts_stack_dynamic_precedence(self->stack, i),
.is_in_error = ts_stack_state(self->stack, i) == ERROR_STATE,
};
if (!status_i.is_in_error && status_i.cost < min_error_cost) {
min_error_cost = status_i.cost;
}
for (StackVersion j = 0; j < i; j++) {
ErrorStatus status_j = {
.cost = ts_stack_error_cost(self->stack, j),
.push_count = ts_stack_push_count(self->stack, j),
.dynamic_precedence = ts_stack_dynamic_precedence(self->stack, j),
.is_in_error = ts_stack_state(self->stack, j) == ERROR_STATE,
};
bool can_merge = ts_stack_can_merge(self->stack, j, i);
switch (parser__compare_versions(self, status_j, status_i)) {
case ErrorComparisonTakeLeft:
made_changes = true;
ts_stack_remove_version(self->stack, i);
i--;
j = i;
break;
case ErrorComparisonPreferLeft:
case ErrorComparisonNone:
if (can_merge) {
made_changes = true;
ts_stack_force_merge(self->stack, j, i);
i--;
j = i;
}
break;
case ErrorComparisonPreferRight:
made_changes = true;
ts_stack_swap_versions(self->stack, i, j);
if (can_merge) {
ts_stack_force_merge(self->stack, j, i);
i--;
j = i;
}
break;
case ErrorComparisonTakeRight:
made_changes = true;
ts_stack_remove_version(self->stack, j);
i--;
j--;
break;
}
}
}
while (ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) {
ts_stack_remove_version(self->stack, MAX_VERSION_COUNT);
made_changes = true;
}
if (made_changes) {
LOG("condense");
LOG_STACK();
}
return min_error_cost;
}
static void parser__restore_external_scanner(Parser *self, Tree *external_token) {
if (external_token) {
self->language->external_scanner.deserialize(
@ -334,9 +264,7 @@ static Tree *parser__lex(Parser *self, StackVersion version, TSStateId parse_sta
self->lexer.token_end_position = self->lexer.current_position;
}
if (error_mode && self->lexer.token_end_position.bytes <= current_position.bytes) {
LOG("disregard_empty_token");
} else {
if (!error_mode || self->lexer.token_end_position.bytes > current_position.bytes) {
found_external_token = true;
break;
}
@ -360,7 +288,6 @@ static Tree *parser__lex(Parser *self, StackVersion version, TSStateId parse_sta
}
if (!error_mode) {
LOG("retry_in_error_mode");
error_mode = true;
lex_mode = self->language->lex_modes[ERROR_STATE];
valid_external_tokens = ts_language_enabled_external_tokens(
@ -797,7 +724,8 @@ static void parser__accept(Parser *self, StackVersion version,
static bool parser__do_all_potential_reductions(Parser *self, StackVersion starting_version,
TSSymbol lookahead_symbol) {
bool result = false;
for (StackVersion version = starting_version;;) {
for (StackVersion version = starting_version;
ts_stack_version_count(self->stack) < MAX_VERSION_COUNT;) {
uint32_t version_count = ts_stack_version_count(self->stack);
if (version >= version_count) break;
@ -869,24 +797,7 @@ static bool parser__do_all_potential_reductions(Parser *self, StackVersion start
}
static void parser__handle_error(Parser *self, StackVersion version, TSSymbol lookahead_symbol) {
// If enough parse versions have already completed, just halt this version.
if (self->accept_count > MAX_VERSION_COUNT) {
ts_stack_halt(self->stack, version);
LOG("bail_after_too_many_tries");
return;
}
// If there are other in-progress versions that are clearly better than this one,
// just halt this version.
unsigned new_cost = ts_stack_error_cost(self->stack, version) + ERROR_COST_PER_SKIPPED_TREE;
if (parser__better_version_exists(self, version, true, new_cost)) {
ts_stack_halt(self->stack, version);
LOG("bail_on_error");
return;
}
// Perform any reductions that could have happened in this state, regardless of the lookahead.
LOG("handle_error");
uint32_t previous_version_count = ts_stack_version_count(self->stack);
parser__do_all_potential_reductions(self, version, 0);
uint32_t version_count = ts_stack_version_count(self->stack);
@ -923,7 +834,6 @@ static void parser__handle_error(Parser *self, StackVersion version, TSSymbol lo
lookahead_symbol
)) {
LOG("recover_with_missing symbol:%s, state:%u", SYM_NAME(missing_symbol), state_after_missing_symbol);
LOG_STACK();
did_insert_missing_token = true;
break;
}
@ -1023,13 +933,15 @@ static void parser__recover(Parser *self, StackVersion version, Tree *lookahead)
unsigned previous_version_count = ts_stack_version_count(self->stack);
Length position = ts_stack_position(self->stack, version);
StackSummary *summary = ts_stack_get_summary(self->stack, version);
unsigned depth_since_error = ts_stack_depth_since_error(self->stack, version);
for (unsigned i = 0; i < summary->size; i++) {
StackSummaryEntry entry = summary->contents[i];
if (entry.state == ERROR_STATE) continue;
if (entry.position.bytes == position.bytes) continue;
unsigned depth = entry.depth + ts_stack_depth_since_error(self->stack, version);
unsigned depth = entry.depth + depth_since_error;
if (depth > MAX_SUMMARY_DEPTH) break;
unsigned new_cost =
depth * ERROR_COST_PER_SKIPPED_TREE +
@ -1041,21 +953,22 @@ static void parser__recover(Parser *self, StackVersion version, Tree *lookahead)
if (parser__recover_to_state(self, version, depth, entry.state)) {
did_recover = true;
LOG("recover state:%u, depth:%u", entry.state, depth);
LOG_STACK();
break;
}
}
}
for (unsigned i = previous_version_count; i < ts_stack_version_count(self->stack); i++) {
if (ts_stack_is_halted(self->stack, i)) {
ts_stack_remove_version(self->stack, i--);
} else {
if (ts_stack_is_active(self->stack, i)) {
for (unsigned j = 0; j < i; j++) {
if (ts_stack_can_merge(self->stack, j, i)) {
ts_stack_remove_version(self->stack, i--);
break;
}
}
} else {
ts_stack_remove_version(self->stack, i--);
}
}
@ -1154,21 +1067,14 @@ static void parser__advance(Parser *self, StackVersion version, ReusableNode *re
if (last_reduction_version != STACK_VERSION_NONE) {
ts_stack_renumber_version(self->stack, last_reduction_version, version);
LOG_STACK();
} else if (state == ERROR_STATE) {
ts_stack_push(self->stack, version, lookahead, false, ERROR_STATE);
return;
} else if (!parser__breakdown_top_of_stack(self, version)) {
if (state == ERROR_STATE) {
ts_stack_push(self->stack, version, lookahead, false, ERROR_STATE);
return;
}
parser__handle_error(self, version, lookahead->first_leaf.symbol);
if (ts_stack_is_halted(self->stack, version)) {
ts_tree_release(&self->tree_pool, lookahead);
return;
} else if (lookahead->size.bytes == 0) {
ts_tree_release(&self->tree_pool, lookahead);
state = ts_stack_state(self->stack, version);
lookahead = parser__get_lookahead(self, version, &state, reusable_node, &table_entry);
}
LOG("detect_error");
ts_stack_pause(self->stack, version, lookahead->first_leaf.symbol);
ts_tree_release(&self->tree_pool, lookahead);
return;
}
state = ts_stack_state(self->stack, version);
@ -1176,6 +1082,93 @@ static void parser__advance(Parser *self, StackVersion version, ReusableNode *re
}
}
static unsigned parser__condense_stack(Parser *self) {
bool made_changes = false;
unsigned min_error_cost = UINT_MAX;
for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) {
if (ts_stack_is_halted(self->stack, i)) {
ts_stack_remove_version(self->stack, i);
i--;
continue;
}
ErrorStatus status_i = parser__version_status(self, i);
if (!status_i.is_in_error && status_i.cost < min_error_cost) {
min_error_cost = status_i.cost;
}
for (StackVersion j = 0; j < i; j++) {
ErrorStatus status_j = parser__version_status(self, j);
bool can_merge = ts_stack_can_merge(self->stack, j, i);
switch (parser__compare_versions(self, status_j, status_i)) {
case ErrorComparisonTakeLeft:
made_changes = true;
ts_stack_remove_version(self->stack, i);
i--;
j = i;
break;
case ErrorComparisonPreferLeft:
case ErrorComparisonNone:
if (can_merge) {
made_changes = true;
ts_stack_force_merge(self->stack, j, i);
i--;
j = i;
}
break;
case ErrorComparisonPreferRight:
made_changes = true;
ts_stack_swap_versions(self->stack, i, j);
if (can_merge) {
ts_stack_force_merge(self->stack, j, i);
i--;
j = i;
}
break;
case ErrorComparisonTakeRight:
made_changes = true;
ts_stack_remove_version(self->stack, j);
i--;
j--;
break;
}
}
}
while (ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) {
ts_stack_remove_version(self->stack, MAX_VERSION_COUNT);
made_changes = true;
}
if (ts_stack_version_count(self->stack) > 0) {
bool has_unpaused_version = false;
for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) {
if (ts_stack_is_paused(self->stack, i)) {
if (!has_unpaused_version) {
LOG("resume version:%u", i);
TSSymbol lookahead_symbol = ts_stack_resume(self->stack, i);
parser__handle_error(self, i, lookahead_symbol);
has_unpaused_version = true;
} else {
ts_stack_remove_version(self->stack, i);
i--;
n--;
}
} else {
has_unpaused_version = true;
}
}
}
if (made_changes) {
LOG("condense");
LOG_STACK();
}
return min_error_cost;
}
bool parser_init(Parser *self) {
ts_lexer_init(&self->lexer);
array_init(&self->reduce_actions);
@ -1219,13 +1212,7 @@ Tree *parser_parse(Parser *self, TSInput input, Tree *old_tree, bool halt_on_err
for (version = 0; version < ts_stack_version_count(self->stack); version++) {
reusable_node = self->reusable_node;
while (!ts_stack_is_halted(self->stack, version)) {
position = ts_stack_position(self->stack, version).bytes;
if (position > last_position || (version > 0 && position == last_position)) {
last_position = position;
break;
}
while (ts_stack_is_active(self->stack, version)) {
LOG("process version:%d, version_count:%u, state:%d, row:%u, col:%u",
version, ts_stack_version_count(self->stack),
ts_stack_state(self->stack, version),
@ -1234,6 +1221,12 @@ Tree *parser_parse(Parser *self, TSInput input, Tree *old_tree, bool halt_on_err
parser__advance(self, version, &reusable_node);
LOG_STACK();
position = ts_stack_position(self->stack, version).bytes;
if (position > last_position || (version > 0 && position == last_position)) {
last_position = position;
break;
}
}
}

View file

@ -49,12 +49,19 @@ typedef struct {
typedef Array(StackNode *) StackNodeArray;
typedef enum {
StackStatusActive,
StackStatusPaused,
StackStatusHalted,
} StackStatus;
typedef struct {
StackNode *node;
Tree *last_external_token;
uint32_t push_count;
bool is_halted;
StackSummary *summary;
uint32_t push_count;
TSSymbol lookahead_when_paused;
StackStatus status;
} StackHead;
struct Stack {
@ -215,7 +222,8 @@ static StackVersion ts_stack__add_version(Stack *self, StackVersion original_ver
.node = node,
.push_count = self->heads.contents[original_version].push_count,
.last_external_token = last_external_token,
.is_halted = false,
.status = StackStatusActive,
.lookahead_when_paused = 0,
};
array_push(&self->heads, head);
stack_node_retain(node);
@ -581,7 +589,8 @@ bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version
StackHead *head1 = &self->heads.contents[version1];
StackHead *head2 = &self->heads.contents[version2];
return
!head1->is_halted && !head2->is_halted &&
head1->status == StackStatusActive &&
head2->status == StackStatusActive &&
head1->node->state == head2->node->state &&
head1->node->position.bytes == head2->node->position.bytes &&
head1->node->depth == head2->node->depth &&
@ -598,11 +607,34 @@ void ts_stack_force_merge(Stack *self, StackVersion version1, StackVersion versi
}
void ts_stack_halt(Stack *self, StackVersion version) {
array_get(&self->heads, version)->is_halted = true;
array_get(&self->heads, version)->status = StackStatusHalted;
}
bool ts_stack_is_halted(Stack *self, StackVersion version) {
return array_get(&self->heads, version)->is_halted;
void ts_stack_pause(Stack *self, StackVersion version, TSSymbol lookahead) {
StackHead *head = array_get(&self->heads, version);
head->status = StackStatusPaused;
head->lookahead_when_paused = lookahead;
}
bool ts_stack_is_active(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->status == StackStatusActive;
}
bool ts_stack_is_halted(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->status == StackStatusHalted;
}
bool ts_stack_is_paused(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->status == StackStatusPaused;
}
TSSymbol ts_stack_resume(Stack *self, StackVersion version) {
StackHead *head = array_get(&self->heads, version);
assert(head->status == StackStatusPaused);
TSSymbol result = head->lookahead_when_paused;
head->status = StackStatusActive;
head->lookahead_when_paused = 0;
return result;
}
void ts_stack_clear(Stack *self) {
@ -614,7 +646,8 @@ void ts_stack_clear(Stack *self) {
array_push(&self->heads, ((StackHead){
.node = self->base_node,
.last_external_token = NULL,
.is_halted = false,
.status = StackStatusActive,
.lookahead_when_paused = 0,
}));
}
@ -631,13 +664,18 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) {
array_clear(&self->iterators);
for (uint32_t i = 0; i < self->heads.size; i++) {
if (ts_stack_is_halted(self, i)) continue;
StackHead *head = &self->heads.contents[i];
if (head->status == StackStatusHalted) continue;
fprintf(f, "node_head_%u [shape=none, label=\"\"]\n", i);
fprintf(
f,
"node_head_%u -> node_%p [label=%u, fontcolor=blue, weight=10000, "
"labeltooltip=\"push_count: %u\ndepth: %u", i, head->node, i, head->push_count, head->node->depth
fprintf(f, "node_head_%u -> node_%p [", i, head->node);
if (head->status == StackStatusPaused) {
fprintf(f, "color=red ");
}
fprintf(f,
"label=%u, fontcolor=blue, weight=10000, labeltooltip=\"push_count: %u\ndepth: %u",
i, head->push_count, head->node->depth
);
if (head->last_external_token) {

View file

@ -102,9 +102,17 @@ bool ts_stack_can_merge(Stack *, StackVersion, StackVersion);
void ts_stack_force_merge(Stack *, StackVersion, StackVersion);
TSSymbol ts_stack_resume(Stack *, StackVersion);
void ts_stack_pause(Stack *, StackVersion, TSSymbol);
void ts_stack_halt(Stack *, StackVersion);
bool ts_stack_is_halted(Stack *, StackVersion);
bool ts_stack_is_active(const Stack *, StackVersion);
bool ts_stack_is_paused(const Stack *, StackVersion);
bool ts_stack_is_halted(const Stack *, StackVersion);
void ts_stack_renumber_version(Stack *, StackVersion, StackVersion);

View file

@ -141,3 +141,24 @@ int y = 5;
(translation_unit
(declaration (primitive_type) (ERROR (identifier)) (identifier))
(declaration (primitive_type) (init_declarator (identifier) (number_literal))))
==========================================
Declarations with missing variable names
==========================================
int a() {
struct x = 1;
int = 2;
}
---
(translation_unit
(function_definition
(primitive_type)
(function_declarator (identifier) (parameter_list))
(compound_statement
(struct_specifier (type_identifier))
(ERROR (number_literal))
(primitive_type)
(ERROR (number_literal)))))