Reuse fragile tokens that came from the current lex state
This commit is contained in:
parent
386b124866
commit
2bcd2e4d00
5 changed files with 54 additions and 35 deletions
|
|
@ -47,6 +47,7 @@ typedef struct TSLexer {
|
|||
|
||||
size_t lookahead_size;
|
||||
int32_t lookahead;
|
||||
TSStateId starting_state;
|
||||
|
||||
TSInput input;
|
||||
TSDebugger debugger;
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ static void ts_lexer__start(TSLexer *self, TSStateId lex_state) {
|
|||
LOG("start_lex state:%d, pos:%lu", lex_state, self->current_position.chars);
|
||||
LOG_LOOKAHEAD();
|
||||
|
||||
self->starting_state = lex_state;
|
||||
if (!self->chunk)
|
||||
ts_lexer__get_chunk(self);
|
||||
if (!self->lookahead_size)
|
||||
|
|
@ -101,7 +102,9 @@ static TSTree *ts_lexer__accept(TSLexer *self, TSSymbol symbol,
|
|||
result = ts_tree_make_leaf(symbol, padding, size, metadata);
|
||||
}
|
||||
|
||||
result->options.fragile_left = fragile;
|
||||
if (fragile)
|
||||
result->context.lex_state = self->starting_state;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -84,19 +84,26 @@ static void ts_parser__breakdown_top_of_stack(TSParser *self, int head) {
|
|||
} while (last_child->child_count > 0);
|
||||
}
|
||||
|
||||
static void ts_parser__pop_reusable_subtree(LookaheadState *state);
|
||||
|
||||
/*
|
||||
* Replace the parser's reusable_subtree with its first non-fragile descendant.
|
||||
* Return true if a suitable descendant is found, false otherwise.
|
||||
*/
|
||||
static bool ts_parser__breakdown_reusable_subtree(LookaheadState *state) {
|
||||
static void ts_parser__breakdown_reusable_subtree(LookaheadState *state) {
|
||||
do {
|
||||
if (state->reusable_subtree->symbol == ts_builtin_sym_error)
|
||||
return false;
|
||||
if (state->reusable_subtree->child_count == 0)
|
||||
return false;
|
||||
if (state->reusable_subtree->symbol == ts_builtin_sym_error) {
|
||||
ts_parser__pop_reusable_subtree(state);
|
||||
return;
|
||||
}
|
||||
|
||||
if (state->reusable_subtree->child_count == 0) {
|
||||
ts_parser__pop_reusable_subtree(state);
|
||||
return;
|
||||
}
|
||||
|
||||
state->reusable_subtree = state->reusable_subtree->children[0];
|
||||
} while (ts_tree_is_fragile(state->reusable_subtree));
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -118,13 +125,30 @@ static void ts_parser__pop_reusable_subtree(LookaheadState *state) {
|
|||
}
|
||||
|
||||
static bool ts_parser__can_reuse(TSParser *self, int head, TSTree *subtree) {
|
||||
if (!subtree || subtree->symbol == ts_builtin_sym_error ||
|
||||
ts_tree_is_fragile(subtree))
|
||||
if (!subtree)
|
||||
return false;
|
||||
if (subtree->symbol == ts_builtin_sym_error)
|
||||
return false;
|
||||
if (ts_tree_is_fragile(subtree))
|
||||
return false;
|
||||
|
||||
TSStateId state = ts_stack_top_state(self->stack, head);
|
||||
|
||||
if (subtree->context.lex_state != TSTREE_LEX_STATE_INDEPENDENT) {
|
||||
TSStateId lex_state = self->language->lex_states[state];
|
||||
if (subtree->context.lex_state != lex_state)
|
||||
return false;
|
||||
}
|
||||
|
||||
const TSParseAction *action =
|
||||
ts_language_actions(self->language, state, subtree->symbol);
|
||||
return action->type != TSParseActionTypeError && !action->can_hide_split;
|
||||
if (action->type == TSParseActionTypeError || action->can_hide_split)
|
||||
return false;
|
||||
|
||||
if (ts_tree_is_extra(subtree) && !action->extra)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -142,33 +166,25 @@ static TSTree *ts_parser__get_next_lookahead(TSParser *self, int head) {
|
|||
}
|
||||
|
||||
if (state->reusable_subtree_pos < position.chars) {
|
||||
LOG("past_reuse sym:%s", SYM_NAME(state->reusable_subtree->symbol));
|
||||
LOG("past_reusable sym:%s", SYM_NAME(state->reusable_subtree->symbol));
|
||||
ts_parser__pop_reusable_subtree(state);
|
||||
continue;
|
||||
}
|
||||
|
||||
bool can_reuse = true;
|
||||
if (ts_tree_has_changes(state->reusable_subtree)) {
|
||||
if (state->is_verifying) {
|
||||
ts_parser__breakdown_top_of_stack(self, head);
|
||||
state->is_verifying = false;
|
||||
}
|
||||
|
||||
LOG("breakdown_changed sym:%s", SYM_NAME(state->reusable_subtree->symbol));
|
||||
can_reuse = false;
|
||||
} else if (ts_tree_is_extra(state->reusable_subtree)) {
|
||||
LOG("breakdown_extra sym:%s", SYM_NAME(state->reusable_subtree->symbol));
|
||||
can_reuse = false;
|
||||
} else if (!ts_parser__can_reuse(self, head, state->reusable_subtree)) {
|
||||
LOG("breakdown_non_reusable sym:%s",
|
||||
SYM_NAME(state->reusable_subtree->symbol));
|
||||
can_reuse = false;
|
||||
ts_parser__breakdown_reusable_subtree(state);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!can_reuse) {
|
||||
if (!ts_parser__breakdown_reusable_subtree(state)) {
|
||||
LOG("dont_reuse sym:%s", SYM_NAME(state->reusable_subtree->symbol));
|
||||
ts_parser__pop_reusable_subtree(state);
|
||||
}
|
||||
if (!ts_parser__can_reuse(self, head, state->reusable_subtree)) {
|
||||
LOG("breakdown_unreusable sym:%s", SYM_NAME(state->reusable_subtree->symbol));
|
||||
ts_parser__breakdown_reusable_subtree(state);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -217,8 +233,6 @@ static TSTree *ts_parser__select_tree(void *data, TSTree *left, TSTree *right) {
|
|||
|
||||
static bool ts_parser__shift(TSParser *self, int head, TSStateId parse_state,
|
||||
TSTree *lookahead) {
|
||||
if (self->language->symbol_metadata[lookahead->symbol].extra)
|
||||
ts_tree_set_fragile(lookahead);
|
||||
if (ts_stack_push(self->stack, head, parse_state, lookahead)) {
|
||||
LOG("merge head:%d", head);
|
||||
vector_erase(&self->lookahead_states, head);
|
||||
|
|
@ -643,10 +657,8 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) {
|
|||
|
||||
if (!ts_parser__can_reuse(self, head, lookahead) ||
|
||||
position.chars != last_position.chars) {
|
||||
TSTree *reused_lookahead = ts_parser__get_next_lookahead(self, head);
|
||||
if (ts_parser__can_reuse(self, head, reused_lookahead)) {
|
||||
lookahead = reused_lookahead;
|
||||
} else {
|
||||
lookahead = ts_parser__get_next_lookahead(self, head);
|
||||
if (!lookahead) {
|
||||
ts_lexer_reset(&self->lexer, position);
|
||||
TSStateId parse_state = ts_stack_top_state(self->stack, head);
|
||||
TSStateId lex_state = self->language->lex_states[parse_state];
|
||||
|
|
|
|||
|
|
@ -18,10 +18,9 @@ TSTree *ts_tree_make_leaf(TSSymbol sym, TSLength padding, TSLength size,
|
|||
.named_child_count = 0,
|
||||
.children = NULL,
|
||||
.padding = padding,
|
||||
.options =
|
||||
{
|
||||
.visible = metadata.visible, .named = metadata.named,
|
||||
},
|
||||
.options.visible = metadata.visible,
|
||||
.options.named = metadata.named,
|
||||
.context.lex_state = TSTREE_LEX_STATE_INDEPENDENT,
|
||||
};
|
||||
|
||||
if (sym == ts_builtin_sym_error) {
|
||||
|
|
@ -92,6 +91,7 @@ void ts_tree_set_children(TSTree *self, size_t child_count, TSTree **children) {
|
|||
}
|
||||
|
||||
if (child_count > 0) {
|
||||
self->context.lex_state = children[0]->context.lex_state;
|
||||
if (children[0]->options.fragile_left)
|
||||
self->options.fragile_left = true;
|
||||
if (children[child_count - 1]->options.fragile_right)
|
||||
|
|
|
|||
|
|
@ -9,11 +9,14 @@ extern "C" {
|
|||
#include "tree_sitter/parser.h"
|
||||
#include "runtime/length.h"
|
||||
|
||||
#define TSTREE_LEX_STATE_INDEPENDENT (unsigned short)(-1)
|
||||
|
||||
struct TSTree {
|
||||
struct {
|
||||
struct TSTree *parent;
|
||||
size_t index;
|
||||
TSLength offset;
|
||||
TSStateId lex_state;
|
||||
} context;
|
||||
size_t child_count;
|
||||
size_t visible_child_count;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue