Start work toward maintaining external scanner's state during incremental parses
This commit is contained in:
parent
2b3da512a4
commit
e6c82ead2c
9 changed files with 131 additions and 70 deletions
|
|
@ -89,7 +89,6 @@ const TSLanguage *load_language(const string &source_filename,
|
|||
compiler_name,
|
||||
"-shared",
|
||||
"-fPIC",
|
||||
"-g",
|
||||
"-I", header_dir.c_str(),
|
||||
"-o", lib_filename.c_str(),
|
||||
"-x", "c",
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
#include "helpers/tree_helpers.h"
|
||||
#include "helpers/point_helpers.h"
|
||||
#include "helpers/spy_logger.h"
|
||||
#include "helpers/stderr_logger.h"
|
||||
#include "helpers/spy_input.h"
|
||||
#include "helpers/load_language.h"
|
||||
|
||||
|
|
@ -112,7 +113,7 @@ describe("Document", [&]() {
|
|||
assert_node_string_equals(
|
||||
new_root,
|
||||
"(object (pair (string) (array (null) (number))))");
|
||||
AssertThat(spy_input->strings_read, Equals(vector<string>({" [null, 2"})));
|
||||
AssertThat(spy_input->strings_read, Equals(vector<string>({" [null, 2", ""})));
|
||||
});
|
||||
|
||||
it("reads from the new input correctly when the old input was blank", [&]() {
|
||||
|
|
|
|||
|
|
@ -253,7 +253,7 @@ describe("Parser", [&]() {
|
|||
"(identifier) "
|
||||
"(math_op (number) (member_access (identifier) (identifier))))))");
|
||||
|
||||
AssertThat(input->strings_read, Equals(vector<string>({ " + abc.d)" })));
|
||||
AssertThat(input->strings_read, Equals(vector<string>({ " + abc.d)", "" })));
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -277,7 +277,7 @@ describe("Parser", [&]() {
|
|||
"(number) "
|
||||
"(math_op (number) (math_op (number) (identifier)))))))");
|
||||
|
||||
AssertThat(input->strings_read, Equals(vector<string>({ "123 || 5 +" })));
|
||||
AssertThat(input->strings_read, Equals(vector<string>({ "123 || 5 +", "" })));
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -415,16 +415,20 @@ describe("Parser", [&]() {
|
|||
string text = dedent(R"PYTHON(
|
||||
if a:
|
||||
print b
|
||||
|
||||
return c
|
||||
)PYTHON");
|
||||
|
||||
set_text(text);
|
||||
|
||||
assert_root_node("(module "
|
||||
"(if_statement (identifier) "
|
||||
"(print_statement (identifier))) "
|
||||
"(return_statement (expression_list (identifier))))");
|
||||
|
||||
replace_text(text.find("return"), 0, " ");
|
||||
assert_root_node("(module "
|
||||
"(if_statement (identifier) "
|
||||
"(print_statement (identifier)) "
|
||||
"(return_statement (expression_list (identifier)))))");
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -12,8 +12,8 @@ class Symbol : public Rule {
|
|||
typedef int Index;
|
||||
|
||||
typedef enum {
|
||||
Terminal,
|
||||
External,
|
||||
Terminal,
|
||||
NonTerminal,
|
||||
} Type;
|
||||
|
||||
|
|
|
|||
|
|
@ -19,6 +19,10 @@ void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol, TableEntry
|
|||
|
||||
TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol);
|
||||
|
||||
static inline bool ts_language_is_symbol_external(const TSLanguage *self, TSSymbol symbol) {
|
||||
return 0 < symbol && symbol < self->external_token_count + 1;
|
||||
}
|
||||
|
||||
static inline const TSParseAction *ts_language_actions(const TSLanguage *self,
|
||||
TSStateId state,
|
||||
TSSymbol symbol,
|
||||
|
|
@ -52,7 +56,11 @@ static inline TSStateId ts_language_next_state(const TSLanguage *self,
|
|||
static inline const bool *
|
||||
ts_language_enabled_external_tokens(const TSLanguage *self,
|
||||
unsigned external_scanner_state) {
|
||||
return self->external_token_lists + self->external_token_count * external_scanner_state;
|
||||
if (external_scanner_state == 0) {
|
||||
return NULL;
|
||||
} else {
|
||||
return self->external_token_lists + self->external_token_count * external_scanner_state;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
|||
|
|
@ -145,7 +145,6 @@ static bool parser__breakdown_lookahead(Parser *self, Tree **lookahead,
|
|||
}
|
||||
|
||||
if (result) {
|
||||
LOG("lookahead sym:%s", SYM_NAME(reusable_node->tree->symbol));
|
||||
ts_tree_release(*lookahead);
|
||||
ts_tree_retain(*lookahead = reusable_node->tree);
|
||||
}
|
||||
|
|
@ -161,7 +160,11 @@ static void parser__pop_reusable_node_leaf(ReusableNode *reusable_node) {
|
|||
|
||||
static bool parser__can_reuse(Parser *self, TSStateId state, Tree *tree,
|
||||
TableEntry *table_entry) {
|
||||
if (tree->first_leaf.lex_state == self->language->lex_modes[state].lex_state)
|
||||
TSLexMode current_lex_mode = self->language->lex_modes[state];
|
||||
if (ts_language_is_symbol_external(self->language, tree->first_leaf.symbol)) return false;
|
||||
if (tree->size.bytes == 0) return false;
|
||||
if (tree->first_leaf.lex_mode.lex_state == current_lex_mode.lex_state &&
|
||||
tree->first_leaf.lex_mode.external_tokens == current_lex_mode.external_tokens)
|
||||
return true;
|
||||
if (!table_entry->is_reusable)
|
||||
return false;
|
||||
|
|
@ -208,58 +211,92 @@ static bool parser__condense_stack(Parser *self) {
|
|||
return result;
|
||||
}
|
||||
|
||||
static bool parser__try_lex(Parser *self, TSLexMode lex_mode) {
|
||||
Length start_position = self->lexer.current_position;
|
||||
ts_lexer_start(&self->lexer);
|
||||
|
||||
if (lex_mode.external_tokens) {
|
||||
const bool *external_tokens = ts_language_enabled_external_tokens(
|
||||
self->language,
|
||||
lex_mode.external_tokens
|
||||
);
|
||||
|
||||
LOG("lex external:%d, pos:%u",
|
||||
lex_mode.external_tokens,
|
||||
self->lexer.current_position.chars
|
||||
);
|
||||
|
||||
if (self->language->external_scanner.scan(
|
||||
self->external_scanner_payload,
|
||||
&self->lexer.data,
|
||||
external_tokens
|
||||
)) {
|
||||
self->lexer.data.result_symbol = self->language->external_token_symbol_map[self->lexer.data.result_symbol];
|
||||
return true;
|
||||
} else {
|
||||
ts_lexer_reset(&self->lexer, start_position);
|
||||
ts_lexer_start(&self->lexer);
|
||||
static StackIterateAction parser__restore_external_scanner_callback(
|
||||
void *payload, TSStateId state, TreeArray *trees, uint32_t tree_count,
|
||||
bool is_done, bool is_pending) {
|
||||
Parser *self = payload;
|
||||
if (tree_count > 0) {
|
||||
Tree *tree = *array_back(trees);
|
||||
if (tree->has_external_token_state && tree->child_count == 0) {
|
||||
self->language->external_scanner.deserialize(self->external_scanner_payload, tree->external_token_state);
|
||||
return StackIterateStop;
|
||||
}
|
||||
} else if (is_done) {
|
||||
self->language->external_scanner.reset(self->external_scanner_payload);
|
||||
return StackIterateStop;
|
||||
}
|
||||
|
||||
LOG("lex state:%d, pos:%u", lex_mode.lex_state, self->lexer.current_position.chars);
|
||||
return self->language->lex_fn(&self->lexer.data, lex_mode.lex_state);
|
||||
return StackIterateNone;
|
||||
}
|
||||
|
||||
static Tree *parser__lex(Parser *self, TSStateId parse_state) {
|
||||
TSLexMode lex_mode = self->language->lex_modes[parse_state];
|
||||
TSStateId start_state = lex_mode.lex_state;
|
||||
Length start_position = self->lexer.current_position;
|
||||
static void parser__restore_external_scanner(Parser *self, StackVersion version) {
|
||||
StackPopResult pop = ts_stack_iterate(self->stack, version, parser__restore_external_scanner_callback, self);
|
||||
if (pop.slices.size > 0) {
|
||||
StackSlice slice = pop.slices.contents[0];
|
||||
for (size_t i = 1; i < slice.trees.size; i++) {
|
||||
Tree *tree = slice.trees.contents[i];
|
||||
if (tree->has_external_tokens) {
|
||||
printf("RE-SCANNING TREE: %s\n", ts_tree_string(tree, self->language, true));
|
||||
}
|
||||
}
|
||||
ts_tree_array_delete(&slice.trees);
|
||||
}
|
||||
}
|
||||
|
||||
static Tree *parser__lex(Parser *self, StackVersion version) {
|
||||
TSStateId parse_state = ts_stack_top_state(self->stack, version);
|
||||
Length start_position = ts_stack_top_position(self->stack, version);
|
||||
TSLexMode lex_mode = self->language->lex_modes[parse_state];
|
||||
const bool *external_tokens = ts_language_enabled_external_tokens(
|
||||
self->language,
|
||||
lex_mode.external_tokens
|
||||
);
|
||||
|
||||
bool found_external_token = false;
|
||||
bool found_error = false;
|
||||
bool skipped_error = false;
|
||||
int32_t first_error_character = 0;
|
||||
Length error_start_position, error_end_position;
|
||||
ts_lexer_reset(&self->lexer, start_position);
|
||||
|
||||
for (;;) {
|
||||
Length current_position = self->lexer.current_position;
|
||||
|
||||
if (external_tokens) {
|
||||
LOG("lex_external state:%d, row:%u, column:%u", lex_mode.external_tokens,
|
||||
current_position.extent.row, current_position.extent.column);
|
||||
parser__restore_external_scanner(self, version);
|
||||
ts_lexer_start(&self->lexer);
|
||||
if (self->language->external_scanner.scan(self->external_scanner_payload,
|
||||
&self->lexer.data, external_tokens)) {
|
||||
found_external_token = true;
|
||||
break;
|
||||
}
|
||||
ts_lexer_reset(&self->lexer, current_position);
|
||||
}
|
||||
|
||||
LOG("lex_internal state:%d, row:%u, column:%u", lex_mode.lex_state,
|
||||
current_position.extent.row, current_position.extent.column);
|
||||
ts_lexer_start(&self->lexer);
|
||||
if (self->language->lex_fn(&self->lexer.data, lex_mode.lex_state)) {
|
||||
break;
|
||||
}
|
||||
|
||||
while (!parser__try_lex(self, lex_mode)) {
|
||||
if (!found_error) {
|
||||
LOG("retry_in_error_mode");
|
||||
found_error = true;
|
||||
lex_mode = self->language->lex_modes[ERROR_STATE];
|
||||
external_tokens = ts_language_enabled_external_tokens(
|
||||
self->language,
|
||||
lex_mode.external_tokens
|
||||
);
|
||||
ts_lexer_reset(&self->lexer, start_position);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!skipped_error) {
|
||||
LOG("skip_unrecognized_character");
|
||||
skipped_error = true;
|
||||
error_start_position = self->lexer.token_start_position;
|
||||
first_error_character = self->lexer.data.lookahead;
|
||||
}
|
||||
|
|
@ -272,7 +309,6 @@ static Tree *parser__lex(Parser *self, TSStateId parse_state) {
|
|||
self->lexer.data.advance(&self->lexer, false);
|
||||
}
|
||||
|
||||
skipped_error = true;
|
||||
error_end_position = self->lexer.current_position;
|
||||
}
|
||||
|
||||
|
|
@ -284,14 +320,26 @@ static Tree *parser__lex(Parser *self, TSStateId parse_state) {
|
|||
result = ts_tree_make_error(size, padding, first_error_character);
|
||||
} else {
|
||||
TSSymbol symbol = self->lexer.data.result_symbol;
|
||||
if (found_external_token) symbol = self->language->external_token_symbol_map[symbol];
|
||||
|
||||
Length padding = length_sub(self->lexer.token_start_position, start_position);
|
||||
Length size = length_sub(self->lexer.current_position, self->lexer.token_start_position);
|
||||
TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, symbol);
|
||||
result = ts_tree_make_leaf(symbol, padding, size, metadata);
|
||||
|
||||
if (found_external_token) {
|
||||
result->has_external_tokens = true;
|
||||
if (self->language->external_scanner.serialize(self->external_scanner_payload, result->external_token_state)) {
|
||||
result->has_external_token_state = true;
|
||||
self->last_external_token = result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result->parse_state = parse_state;
|
||||
result->first_leaf.lex_state = start_state;
|
||||
result->first_leaf.lex_mode = lex_mode;
|
||||
|
||||
LOG("lexed_lookahead sym:%s, size:%u", SYM_NAME(result->symbol), result->size.bytes);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -301,19 +349,18 @@ static void parser__clear_cached_token(Parser *self) {
|
|||
}
|
||||
|
||||
static Tree *parser__get_lookahead(Parser *self, StackVersion version,
|
||||
ReusableNode *reusable_node) {
|
||||
ReusableNode *reusable_node,
|
||||
bool *is_fresh) {
|
||||
Length position = ts_stack_top_position(self->stack, version);
|
||||
|
||||
while (reusable_node->tree) {
|
||||
if (reusable_node->byte_index > position.bytes) {
|
||||
LOG("before_reusable sym:%s, pos:%u",
|
||||
SYM_NAME(reusable_node->tree->symbol), reusable_node->byte_index);
|
||||
LOG("before_reusable_node sym:%s", SYM_NAME(reusable_node->tree->symbol));
|
||||
break;
|
||||
}
|
||||
|
||||
if (reusable_node->byte_index < position.bytes) {
|
||||
LOG("past_reusable sym:%s, pos:%u",
|
||||
SYM_NAME(reusable_node->tree->symbol), reusable_node->byte_index);
|
||||
LOG("past_reusable sym:%s", SYM_NAME(reusable_node->tree->symbol));
|
||||
parser__pop_reusable_node(reusable_node);
|
||||
continue;
|
||||
}
|
||||
|
|
@ -350,9 +397,8 @@ static Tree *parser__get_lookahead(Parser *self, StackVersion version,
|
|||
return self->cached_token;
|
||||
}
|
||||
|
||||
ts_lexer_reset(&self->lexer, position);
|
||||
TSStateId parse_state = ts_stack_top_state(self->stack, version);
|
||||
return parser__lex(self, parse_state);
|
||||
*is_fresh = true;
|
||||
return parser__lex(self, version);
|
||||
}
|
||||
|
||||
static bool parser__select_tree(Parser *self, Tree *left, Tree *right) {
|
||||
|
|
@ -977,30 +1023,29 @@ static void parser__recover(Parser *self, StackVersion version, TSStateId state,
|
|||
static void parser__advance(Parser *self, StackVersion version,
|
||||
ReusableNode *reusable_node) {
|
||||
bool validated_lookahead = false;
|
||||
Tree *lookahead = parser__get_lookahead(self, version, reusable_node);
|
||||
Tree *lookahead = parser__get_lookahead(self, version, reusable_node, &validated_lookahead);
|
||||
|
||||
for (;;) {
|
||||
TSStateId state = ts_stack_top_state(self->stack, version);
|
||||
|
||||
TableEntry table_entry;
|
||||
ts_language_table_entry(self->language, state, lookahead->first_leaf.symbol,
|
||||
&table_entry);
|
||||
ts_language_table_entry(self->language, state, lookahead->first_leaf.symbol, &table_entry);
|
||||
|
||||
if (!validated_lookahead) {
|
||||
if (!parser__can_reuse(self, state, lookahead, &table_entry)) {
|
||||
if (lookahead == reusable_node->tree)
|
||||
if (lookahead == reusable_node->tree) {
|
||||
parser__pop_reusable_node_leaf(reusable_node);
|
||||
else
|
||||
} else {
|
||||
parser__clear_cached_token(self);
|
||||
}
|
||||
|
||||
ts_tree_release(lookahead);
|
||||
lookahead = parser__get_lookahead(self, version, reusable_node);
|
||||
lookahead = parser__get_lookahead(self, version, reusable_node, &validated_lookahead);
|
||||
continue;
|
||||
}
|
||||
|
||||
validated_lookahead = true;
|
||||
LOG("lookahead sym:%s, size:%u", SYM_NAME(lookahead->symbol),
|
||||
lookahead->size.bytes);
|
||||
LOG("reused_lookahead sym:%s, size:%u", SYM_NAME(lookahead->symbol), lookahead->size.bytes);
|
||||
}
|
||||
|
||||
bool reduction_stopped_at_error = false;
|
||||
|
|
@ -1023,12 +1068,11 @@ static void parser__advance(Parser *self, StackVersion version,
|
|||
}
|
||||
|
||||
if (lookahead->child_count > 0) {
|
||||
if (parser__breakdown_lookahead(self, &lookahead, state,
|
||||
reusable_node)) {
|
||||
if (parser__breakdown_lookahead(self, &lookahead, state, reusable_node)) {
|
||||
if (!parser__can_reuse(self, state, lookahead, &table_entry)) {
|
||||
parser__pop_reusable_node(reusable_node);
|
||||
ts_tree_release(lookahead);
|
||||
lookahead = parser__get_lookahead(self, version, reusable_node);
|
||||
lookahead = parser__get_lookahead(self, version, reusable_node, &validated_lookahead);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1175,8 +1219,8 @@ Tree *parser_parse(Parser *self, TSInput input, Tree *old_tree) {
|
|||
LOG("process version:%d, version_count:%u, state:%d, row:%u, col:%u",
|
||||
version, ts_stack_version_count(self->stack),
|
||||
ts_stack_top_state(self->stack, version),
|
||||
ts_stack_top_position(self->stack, version).extent.row + 1,
|
||||
ts_stack_top_position(self->stack, version).extent.column + 1);
|
||||
ts_stack_top_position(self->stack, version).extent.row,
|
||||
ts_stack_top_position(self->stack, version).extent.column);
|
||||
|
||||
parser__advance(self, version, &reusable_node);
|
||||
LOG_STACK();
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ typedef struct {
|
|||
TreePath tree_path1;
|
||||
TreePath tree_path2;
|
||||
void *external_scanner_payload;
|
||||
Tree *last_external_token;
|
||||
} Parser;
|
||||
|
||||
bool parser_init(Parser *);
|
||||
|
|
|
|||
|
|
@ -25,10 +25,7 @@ Tree *ts_tree_make_leaf(TSSymbol sym, Length padding, Length size,
|
|||
.visible = metadata.visible,
|
||||
.named = metadata.named,
|
||||
.has_changes = false,
|
||||
.first_leaf = {
|
||||
.symbol = sym,
|
||||
.lex_state = 0
|
||||
}
|
||||
.first_leaf.symbol = sym,
|
||||
};
|
||||
return result;
|
||||
}
|
||||
|
|
@ -111,6 +108,8 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children) {
|
|||
self->named_child_count = 0;
|
||||
self->visible_child_count = 0;
|
||||
self->error_cost = 0;
|
||||
self->has_external_tokens = false;
|
||||
self->has_external_token_state = false;
|
||||
|
||||
for (uint32_t i = 0; i < child_count; i++) {
|
||||
Tree *child = children[i];
|
||||
|
|
@ -133,6 +132,9 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children) {
|
|||
self->named_child_count += child->named_child_count;
|
||||
}
|
||||
|
||||
if (child->has_external_tokens) self->has_external_tokens = true;
|
||||
if (child->has_external_token_state) self->has_external_token_state = true;
|
||||
|
||||
if (child->symbol == ts_builtin_sym_error) {
|
||||
self->fragile_left = self->fragile_right = true;
|
||||
self->parse_state = TS_TREE_STATE_NONE;
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ typedef struct Tree {
|
|||
|
||||
struct {
|
||||
TSSymbol symbol;
|
||||
TSStateId lex_state;
|
||||
TSLexMode lex_mode;
|
||||
} first_leaf;
|
||||
|
||||
unsigned short ref_count;
|
||||
|
|
@ -51,6 +51,8 @@ typedef struct Tree {
|
|||
bool fragile_left : 1;
|
||||
bool fragile_right : 1;
|
||||
bool has_changes : 1;
|
||||
bool has_external_tokens : 1;
|
||||
bool has_external_token_state : 1;
|
||||
} Tree;
|
||||
|
||||
typedef struct {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue