Reuse fragile tokens that came from the current lex state

This commit is contained in:
Max Brunsfeld 2015-12-21 16:04:11 -08:00
parent 386b124866
commit 2bcd2e4d00
5 changed files with 54 additions and 35 deletions

View file

@ -47,6 +47,7 @@ typedef struct TSLexer {
size_t lookahead_size;
int32_t lookahead;
TSStateId starting_state;
TSInput input;
TSDebugger debugger;

View file

@ -45,6 +45,7 @@ static void ts_lexer__start(TSLexer *self, TSStateId lex_state) {
LOG("start_lex state:%d, pos:%lu", lex_state, self->current_position.chars);
LOG_LOOKAHEAD();
self->starting_state = lex_state;
if (!self->chunk)
ts_lexer__get_chunk(self);
if (!self->lookahead_size)
@ -101,7 +102,9 @@ static TSTree *ts_lexer__accept(TSLexer *self, TSSymbol symbol,
result = ts_tree_make_leaf(symbol, padding, size, metadata);
}
result->options.fragile_left = fragile;
if (fragile)
result->context.lex_state = self->starting_state;
return result;
}

View file

@ -84,19 +84,26 @@ static void ts_parser__breakdown_top_of_stack(TSParser *self, int head) {
} while (last_child->child_count > 0);
}
static void ts_parser__pop_reusable_subtree(LookaheadState *state);
/*
* Replace the parser's reusable_subtree with its first non-fragile descendant.
* Return true if a suitable descendant is found, false otherwise.
*/
static bool ts_parser__breakdown_reusable_subtree(LookaheadState *state) {
static void ts_parser__breakdown_reusable_subtree(LookaheadState *state) {
do {
if (state->reusable_subtree->symbol == ts_builtin_sym_error)
return false;
if (state->reusable_subtree->child_count == 0)
return false;
if (state->reusable_subtree->symbol == ts_builtin_sym_error) {
ts_parser__pop_reusable_subtree(state);
return;
}
if (state->reusable_subtree->child_count == 0) {
ts_parser__pop_reusable_subtree(state);
return;
}
state->reusable_subtree = state->reusable_subtree->children[0];
} while (ts_tree_is_fragile(state->reusable_subtree));
return true;
}
/*
@ -118,13 +125,30 @@ static void ts_parser__pop_reusable_subtree(LookaheadState *state) {
}
static bool ts_parser__can_reuse(TSParser *self, int head, TSTree *subtree) {
if (!subtree || subtree->symbol == ts_builtin_sym_error ||
ts_tree_is_fragile(subtree))
if (!subtree)
return false;
if (subtree->symbol == ts_builtin_sym_error)
return false;
if (ts_tree_is_fragile(subtree))
return false;
TSStateId state = ts_stack_top_state(self->stack, head);
if (subtree->context.lex_state != TSTREE_LEX_STATE_INDEPENDENT) {
TSStateId lex_state = self->language->lex_states[state];
if (subtree->context.lex_state != lex_state)
return false;
}
const TSParseAction *action =
ts_language_actions(self->language, state, subtree->symbol);
return action->type != TSParseActionTypeError && !action->can_hide_split;
if (action->type == TSParseActionTypeError || action->can_hide_split)
return false;
if (ts_tree_is_extra(subtree) && !action->extra)
return false;
return true;
}
/*
@ -142,33 +166,25 @@ static TSTree *ts_parser__get_next_lookahead(TSParser *self, int head) {
}
if (state->reusable_subtree_pos < position.chars) {
LOG("past_reuse sym:%s", SYM_NAME(state->reusable_subtree->symbol));
LOG("past_reusable sym:%s", SYM_NAME(state->reusable_subtree->symbol));
ts_parser__pop_reusable_subtree(state);
continue;
}
bool can_reuse = true;
if (ts_tree_has_changes(state->reusable_subtree)) {
if (state->is_verifying) {
ts_parser__breakdown_top_of_stack(self, head);
state->is_verifying = false;
}
LOG("breakdown_changed sym:%s", SYM_NAME(state->reusable_subtree->symbol));
can_reuse = false;
} else if (ts_tree_is_extra(state->reusable_subtree)) {
LOG("breakdown_extra sym:%s", SYM_NAME(state->reusable_subtree->symbol));
can_reuse = false;
} else if (!ts_parser__can_reuse(self, head, state->reusable_subtree)) {
LOG("breakdown_non_reusable sym:%s",
SYM_NAME(state->reusable_subtree->symbol));
can_reuse = false;
ts_parser__breakdown_reusable_subtree(state);
continue;
}
if (!can_reuse) {
if (!ts_parser__breakdown_reusable_subtree(state)) {
LOG("dont_reuse sym:%s", SYM_NAME(state->reusable_subtree->symbol));
ts_parser__pop_reusable_subtree(state);
}
if (!ts_parser__can_reuse(self, head, state->reusable_subtree)) {
LOG("breakdown_unreusable sym:%s", SYM_NAME(state->reusable_subtree->symbol));
ts_parser__breakdown_reusable_subtree(state);
continue;
}
@ -217,8 +233,6 @@ static TSTree *ts_parser__select_tree(void *data, TSTree *left, TSTree *right) {
static bool ts_parser__shift(TSParser *self, int head, TSStateId parse_state,
TSTree *lookahead) {
if (self->language->symbol_metadata[lookahead->symbol].extra)
ts_tree_set_fragile(lookahead);
if (ts_stack_push(self->stack, head, parse_state, lookahead)) {
LOG("merge head:%d", head);
vector_erase(&self->lookahead_states, head);
@ -643,10 +657,8 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) {
if (!ts_parser__can_reuse(self, head, lookahead) ||
position.chars != last_position.chars) {
TSTree *reused_lookahead = ts_parser__get_next_lookahead(self, head);
if (ts_parser__can_reuse(self, head, reused_lookahead)) {
lookahead = reused_lookahead;
} else {
lookahead = ts_parser__get_next_lookahead(self, head);
if (!lookahead) {
ts_lexer_reset(&self->lexer, position);
TSStateId parse_state = ts_stack_top_state(self->stack, head);
TSStateId lex_state = self->language->lex_states[parse_state];

View file

@ -18,10 +18,9 @@ TSTree *ts_tree_make_leaf(TSSymbol sym, TSLength padding, TSLength size,
.named_child_count = 0,
.children = NULL,
.padding = padding,
.options =
{
.visible = metadata.visible, .named = metadata.named,
},
.options.visible = metadata.visible,
.options.named = metadata.named,
.context.lex_state = TSTREE_LEX_STATE_INDEPENDENT,
};
if (sym == ts_builtin_sym_error) {
@ -92,6 +91,7 @@ void ts_tree_set_children(TSTree *self, size_t child_count, TSTree **children) {
}
if (child_count > 0) {
self->context.lex_state = children[0]->context.lex_state;
if (children[0]->options.fragile_left)
self->options.fragile_left = true;
if (children[child_count - 1]->options.fragile_right)

View file

@ -9,11 +9,14 @@ extern "C" {
#include "tree_sitter/parser.h"
#include "runtime/length.h"
#define TSTREE_LEX_STATE_INDEPENDENT (unsigned short)(-1)
struct TSTree {
struct {
struct TSTree *parent;
size_t index;
TSLength offset;
TSStateId lex_state;
} context;
size_t child_count;
size_t visible_child_count;