Refine logic for deciding when tokens need to be re-lexed
* While generating the lex table, note which tokens can match the same string. A token needs to be relexed when it has possible homonyms in the current state. * Also note which tokens can match substrings of each other tokens. A token needs to be relexed when there are viable tokens that could match longer strings in the current state and the next token has been edited. * Remove the logic for marking tokens as fragile on creation. * Store the reusability/non-reusability of symbols off of individual actions and onto the entire entry for the state & symbol.
This commit is contained in:
parent
45f7cee0c8
commit
38c144b4a3
19 changed files with 337 additions and 257 deletions
|
|
@ -6,38 +6,30 @@ static const TSParseAction ERROR_SHIFT_EXTRA = {
|
|||
.type = TSParseActionTypeShift, .extra = true,
|
||||
};
|
||||
|
||||
const TSParseAction *ts_language_actions(const TSLanguage *self, TSStateId state,
|
||||
TSSymbol symbol, size_t *count) {
|
||||
void ts_language_table_entry(const TSLanguage *self, TSStateId state,
|
||||
TSSymbol symbol, TableEntry *result) {
|
||||
if (state == ts_parse_state_error) {
|
||||
*count = 1;
|
||||
if (symbol == ts_builtin_sym_error)
|
||||
return &ERROR_SHIFT_EXTRA;
|
||||
else if (self->recovery_actions[symbol].type == TSParseActionTypeError)
|
||||
return &ERROR_SHIFT_EXTRA;
|
||||
result->action_count = 1;
|
||||
result->is_reusable = false;
|
||||
result->depends_on_lookahead = false;
|
||||
if (symbol == ts_builtin_sym_error ||
|
||||
self->recovery_actions[symbol].type == TSParseActionTypeError)
|
||||
result->actions = &ERROR_SHIFT_EXTRA;
|
||||
else
|
||||
return &self->recovery_actions[symbol];
|
||||
result->actions = &self->recovery_actions[symbol];
|
||||
return;
|
||||
}
|
||||
|
||||
size_t action_index = 0;
|
||||
if (symbol != ts_builtin_sym_error)
|
||||
action_index = self->parse_table[state * self->symbol_count + symbol];
|
||||
size_t action_index =
|
||||
(symbol != ts_builtin_sym_error)
|
||||
? self->parse_table[state * self->symbol_count + symbol]
|
||||
: 0;
|
||||
|
||||
*count = self->parse_actions[action_index].count;
|
||||
const TSParseActionEntry *entry = self->parse_actions + action_index + 1;
|
||||
return (const TSParseAction *)entry;
|
||||
}
|
||||
|
||||
TSParseAction ts_language_last_action(const TSLanguage *self, TSStateId state,
|
||||
TSSymbol sym) {
|
||||
size_t count;
|
||||
const TSParseAction *actions = ts_language_actions(self, state, sym, &count);
|
||||
return actions[count - 1];
|
||||
}
|
||||
|
||||
bool ts_language_has_action(const TSLanguage *self, TSStateId state,
|
||||
TSSymbol symbol) {
|
||||
TSParseAction action = ts_language_last_action(self, state, symbol);
|
||||
return action.type != TSParseActionTypeError;
|
||||
const TSParseActionEntry *entry = &self->parse_actions[action_index];
|
||||
result->action_count = entry->count;
|
||||
result->is_reusable = entry->reusable;
|
||||
result->depends_on_lookahead = entry->depends_on_lookahead;
|
||||
result->actions = (const TSParseAction *)(entry + 1);
|
||||
}
|
||||
|
||||
size_t ts_language_symbol_count(const TSLanguage *language) {
|
||||
|
|
|
|||
|
|
@ -8,13 +8,48 @@ extern "C" {
|
|||
#include "tree_sitter/parser.h"
|
||||
#include "runtime/tree.h"
|
||||
|
||||
typedef struct {
|
||||
const TSParseAction *actions;
|
||||
size_t action_count;
|
||||
bool is_reusable;
|
||||
bool depends_on_lookahead;
|
||||
} TableEntry;
|
||||
|
||||
void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol,
|
||||
TableEntry *);
|
||||
|
||||
bool ts_language_symbol_is_in_progress(const TSLanguage *, TSStateId, TSSymbol);
|
||||
|
||||
const TSParseAction *ts_language_actions(const TSLanguage *, TSStateId,
|
||||
TSSymbol, size_t *);
|
||||
TSParseAction ts_language_last_action(const TSLanguage *, TSStateId, TSSymbol);
|
||||
static inline const TSParseAction *ts_language_actions(const TSLanguage *self,
|
||||
TSStateId state,
|
||||
TSSymbol symbol,
|
||||
size_t *count) {
|
||||
TableEntry entry;
|
||||
ts_language_table_entry(self, state, symbol, &entry);
|
||||
*count = entry.action_count;
|
||||
return entry.actions;
|
||||
}
|
||||
|
||||
bool ts_language_has_action(const TSLanguage *, TSStateId, TSSymbol);
|
||||
static inline TSParseAction ts_language_last_action(const TSLanguage *self,
|
||||
TSStateId state,
|
||||
TSSymbol symbol) {
|
||||
TableEntry entry;
|
||||
ts_language_table_entry(self, state, symbol, &entry);
|
||||
return entry.actions[entry.action_count - 1];
|
||||
}
|
||||
|
||||
static inline bool ts_language_has_action(const TSLanguage *self,
|
||||
TSStateId state, TSSymbol symbol) {
|
||||
TSParseAction action = ts_language_last_action(self, state, symbol);
|
||||
return action.type != TSParseActionTypeError;
|
||||
}
|
||||
|
||||
static inline bool ts_language_is_reusable(const TSLanguage *self,
|
||||
TSStateId state, TSSymbol symbol) {
|
||||
TableEntry entry;
|
||||
ts_language_table_entry(self, state, symbol, &entry);
|
||||
return entry.is_reusable;
|
||||
}
|
||||
|
||||
TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol);
|
||||
|
||||
|
|
|
|||
|
|
@ -137,7 +137,6 @@ void ts_lexer_start(TSLexer *self, TSStateId lex_state) {
|
|||
self->starting_state = lex_state;
|
||||
self->token_start_position = self->current_position;
|
||||
self->result_follows_error = false;
|
||||
self->result_is_fragile = false;
|
||||
self->result_symbol = 0;
|
||||
self->first_unexpected_character = 0;
|
||||
|
||||
|
|
@ -156,13 +155,11 @@ void ts_lexer_finish(TSLexer *self, TSLexerResult *result) {
|
|||
result->size =
|
||||
ts_length_sub(self->error_end_position, self->token_start_position);
|
||||
result->first_unexpected_character = self->first_unexpected_character;
|
||||
result->is_fragile = true;
|
||||
ts_lexer_reset(self, self->error_end_position);
|
||||
} else {
|
||||
result->symbol = self->result_symbol;
|
||||
result->size =
|
||||
ts_length_sub(self->current_position, self->token_start_position);
|
||||
result->is_fragile = self->result_is_fragile;
|
||||
self->token_end_position = self->current_position;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@ typedef struct {
|
|||
TSSymbol symbol;
|
||||
TSLength padding;
|
||||
TSLength size;
|
||||
bool is_fragile;
|
||||
int32_t first_unexpected_character;
|
||||
} TSLexerResult;
|
||||
|
||||
|
|
|
|||
|
|
@ -211,28 +211,64 @@ static bool ts_parser__can_reuse(TSParser *self, StackVersion version,
|
|||
if (tree->symbol == ts_builtin_sym_error)
|
||||
return false;
|
||||
|
||||
if (ts_tree_is_fragile(tree) &&
|
||||
tree->parse_state != ts_stack_top_state(self->stack, version))
|
||||
return false;
|
||||
TSStateId state = ts_stack_top_state(self->stack, version);
|
||||
if (tree->parse_state != state) {
|
||||
if (ts_tree_is_fragile(tree)) {
|
||||
LOG_ACTION("cant_reuse_fragile tree:%s", SYM_NAME(tree->symbol));
|
||||
return false;
|
||||
}
|
||||
|
||||
TSStateId top_state = ts_stack_top_state(self->stack, version);
|
||||
TableEntry entry;
|
||||
ts_language_table_entry(self->language, state, tree->symbol, &entry);
|
||||
|
||||
if (tree->lex_state != TS_TREE_STATE_INDEPENDENT &&
|
||||
tree->lex_state != ts_language_lex_state(self->language, top_state))
|
||||
return false;
|
||||
if (!entry.is_reusable) {
|
||||
LOG_ACTION("cant_reuse tree:%s", SYM_NAME(tree->symbol));
|
||||
return false;
|
||||
}
|
||||
|
||||
const TSParseAction action =
|
||||
ts_language_last_action(self->language, top_state, tree->symbol);
|
||||
if (action.type == TSParseActionTypeError || action.can_hide_split)
|
||||
return false;
|
||||
TSParseAction action = entry.actions[entry.action_count - 1];
|
||||
if (action.type == TSParseActionTypeError) {
|
||||
LOG_ACTION("cant_reuse_unexpected tree:%s", SYM_NAME(tree->symbol));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (tree->extra && !action.extra)
|
||||
return false;
|
||||
if (tree->extra != action.extra) {
|
||||
LOG_ACTION("cant_reuse_extra tree:%s", SYM_NAME(tree->symbol));
|
||||
return false;
|
||||
}
|
||||
|
||||
TSStateId lex_state = ts_language_lex_state(self->language, state);
|
||||
if (tree->first_leaf.lex_state != lex_state) {
|
||||
if (tree->child_count > 0) {
|
||||
TableEntry leaf_entry;
|
||||
ts_language_table_entry(self->language, state, tree->first_leaf.symbol,
|
||||
&leaf_entry);
|
||||
|
||||
if (!leaf_entry.is_reusable) {
|
||||
LOG_ACTION("cant_reuse_first_leaf tree:%s, leaf:%s",
|
||||
SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (tree->child_count == 1 && leaf_entry.depends_on_lookahead) {
|
||||
LOG_ACTION("cant_reuse_lookahead_dependent tree:%s, leaf:%s", SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol));
|
||||
return false;
|
||||
}
|
||||
} else if (entry.depends_on_lookahead) {
|
||||
LOG_ACTION("cant_reuse_lookahead_dependent tree:%s", SYM_NAME(tree->symbol));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static TSTree *ts_parser__lex(TSParser *self, TSStateId state, bool error_mode) {
|
||||
static TSTree *ts_parser__lex(TSParser *self, TSStateId parse_state,
|
||||
bool error_mode) {
|
||||
TSStateId state = error_mode ? 0 : self->language->lex_states[parse_state];
|
||||
LOG("lex state:%d", state);
|
||||
|
||||
TSLength position = self->lexer.current_position;
|
||||
|
||||
ts_lexer_start(&self->lexer, state);
|
||||
|
|
@ -247,6 +283,7 @@ static TSTree *ts_parser__lex(TSParser *self, TSStateId state, bool error_mode)
|
|||
|
||||
TSTree *result;
|
||||
if (lex_result.symbol == ts_builtin_sym_error) {
|
||||
LOG("accept_error_token");
|
||||
result = ts_tree_make_error(lex_result.size, lex_result.padding,
|
||||
lex_result.first_unexpected_character);
|
||||
} else {
|
||||
|
|
@ -254,14 +291,12 @@ static TSTree *ts_parser__lex(TSParser *self, TSStateId state, bool error_mode)
|
|||
result = ts_tree_make_leaf(
|
||||
lex_result.symbol, lex_result.padding, lex_result.size,
|
||||
ts_language_symbol_metadata(self->language, lex_result.symbol));
|
||||
if (!result)
|
||||
return NULL;
|
||||
result->parse_state = parse_state;
|
||||
result->first_leaf.lex_state = state;
|
||||
}
|
||||
|
||||
if (!result)
|
||||
return NULL;
|
||||
|
||||
if (lex_result.is_fragile)
|
||||
result->lex_state = state;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -284,21 +319,20 @@ static TSTree *ts_parser__get_lookahead(TSParser *self, StackVersion version,
|
|||
if (reusable_node->tree->child_count == 0)
|
||||
ts_parser__breakdown_top_of_stack(self, version);
|
||||
|
||||
LOG("breakdown_changed sym:%s", SYM_NAME(reusable_node->tree->symbol));
|
||||
LOG_ACTION("breakdown_changed sym:%s", SYM_NAME(reusable_node->tree->symbol));
|
||||
ts_parser__breakdown_reusable_node(reusable_node);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!ts_parser__can_reuse(self, version, reusable_node->tree)) {
|
||||
LOG("breakdown_unreusable sym:%s", SYM_NAME(reusable_node->tree->symbol));
|
||||
ts_parser__breakdown_reusable_node(reusable_node);
|
||||
continue;
|
||||
}
|
||||
|
||||
TSTree *result = reusable_node->tree;
|
||||
TSLength size = ts_tree_total_size(result);
|
||||
LOG("reuse sym:%s size:%lu extra:%d", SYM_NAME(result->symbol), size.chars,
|
||||
result->extra);
|
||||
LOG_ACTION("reuse sym:%s size:%lu extra:%d", SYM_NAME(result->symbol),
|
||||
size.chars, result->extra);
|
||||
ts_parser__pop_reusable_node(reusable_node);
|
||||
ts_tree_retain(result);
|
||||
return result;
|
||||
|
|
@ -307,9 +341,7 @@ static TSTree *ts_parser__get_lookahead(TSParser *self, StackVersion version,
|
|||
ts_lexer_reset(&self->lexer, position);
|
||||
TSStateId parse_state = ts_stack_top_state(self->stack, version);
|
||||
bool error_mode = parse_state == ts_parse_state_error;
|
||||
TSStateId lex_state = error_mode ? 0 : self->language->lex_states[parse_state];
|
||||
LOG("lex state:%d", lex_state);
|
||||
return ts_parser__lex(self, lex_state, error_mode);
|
||||
return ts_parser__lex(self, parse_state, error_mode);
|
||||
}
|
||||
|
||||
static bool ts_parser__select_tree(TSParser *self, TSTree *left, TSTree *right) {
|
||||
|
|
@ -487,7 +519,7 @@ static Reduction ts_parser__reduce(TSParser *self, StackVersion version,
|
|||
return (Reduction){ ReduceSucceeded, pop.slices.contents[0] };
|
||||
|
||||
error:
|
||||
return (Reduction){ ReduceFailed };
|
||||
return (Reduction){ ReduceFailed, {} };
|
||||
}
|
||||
|
||||
static inline const TSParseAction *ts_parser__reductions_after_sequence(
|
||||
|
|
|
|||
|
|
@ -28,15 +28,13 @@ TSTree *ts_tree_make_leaf(TSSymbol sym, TSLength padding, TSLength size,
|
|||
.padding = padding,
|
||||
.visible = metadata.visible,
|
||||
.named = metadata.named,
|
||||
.lex_state = TS_TREE_STATE_INDEPENDENT,
|
||||
.parse_state = TS_TREE_STATE_INDEPENDENT,
|
||||
.first_leaf =
|
||||
{
|
||||
.symbol = sym, .lex_state = TS_TREE_STATE_INDEPENDENT,
|
||||
},
|
||||
};
|
||||
|
||||
if (sym == ts_builtin_sym_error) {
|
||||
result->fragile_left = true;
|
||||
result->fragile_right = true;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -81,6 +79,8 @@ TSTree *ts_tree_make_error(TSLength size, TSLength padding, char lookahead_char)
|
|||
if (!result)
|
||||
return NULL;
|
||||
|
||||
result->fragile_left = true;
|
||||
result->fragile_right = true;
|
||||
result->lookahead_char = lookahead_char;
|
||||
return result;
|
||||
}
|
||||
|
|
@ -174,7 +174,7 @@ void ts_tree_set_children(TSTree *self, size_t child_count, TSTree **children) {
|
|||
}
|
||||
|
||||
if (child_count > 0) {
|
||||
self->lex_state = children[0]->lex_state;
|
||||
self->first_leaf = children[0]->first_leaf;
|
||||
if (children[0]->fragile_left)
|
||||
self->fragile_left = true;
|
||||
if (children[child_count - 1]->fragile_right)
|
||||
|
|
@ -206,9 +206,16 @@ TSTree *ts_tree_make_error_node(TreeArray *children) {
|
|||
}
|
||||
}
|
||||
|
||||
return ts_tree_make_node(
|
||||
TSTree *result = ts_tree_make_node(
|
||||
ts_builtin_sym_error, children->size, children->contents,
|
||||
(TSSymbolMetadata){.extra = false, .visible = true, .named = true });
|
||||
|
||||
if (!result)
|
||||
return NULL;
|
||||
|
||||
result->fragile_left = true;
|
||||
result->fragile_right = true;
|
||||
return result;
|
||||
}
|
||||
|
||||
void ts_tree_retain(TSTree *self) {
|
||||
|
|
|
|||
|
|
@ -32,10 +32,14 @@ typedef struct TSTree {
|
|||
TSLength size;
|
||||
|
||||
TSSymbol symbol;
|
||||
TSStateId lex_state;
|
||||
TSStateId parse_state;
|
||||
size_t error_size;
|
||||
|
||||
struct {
|
||||
TSSymbol symbol;
|
||||
TSStateId lex_state;
|
||||
} first_leaf;
|
||||
|
||||
unsigned short ref_count;
|
||||
bool visible : 1;
|
||||
bool named : 1;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue