Start work toward maintaining external scanner's state during incremental parses

This commit is contained in:
Max Brunsfeld 2016-12-20 17:06:20 -08:00
parent 2b3da512a4
commit e6c82ead2c
9 changed files with 131 additions and 70 deletions

View file

@ -89,7 +89,6 @@ const TSLanguage *load_language(const string &source_filename,
compiler_name,
"-shared",
"-fPIC",
"-g",
"-I", header_dir.c_str(),
"-o", lib_filename.c_str(),
"-x", "c",

View file

@ -5,6 +5,7 @@
#include "helpers/tree_helpers.h"
#include "helpers/point_helpers.h"
#include "helpers/spy_logger.h"
#include "helpers/stderr_logger.h"
#include "helpers/spy_input.h"
#include "helpers/load_language.h"
@ -112,7 +113,7 @@ describe("Document", [&]() {
assert_node_string_equals(
new_root,
"(object (pair (string) (array (null) (number))))");
AssertThat(spy_input->strings_read, Equals(vector<string>({" [null, 2"})));
AssertThat(spy_input->strings_read, Equals(vector<string>({" [null, 2", ""})));
});
it("reads from the new input correctly when the old input was blank", [&]() {

View file

@ -253,7 +253,7 @@ describe("Parser", [&]() {
"(identifier) "
"(math_op (number) (member_access (identifier) (identifier))))))");
AssertThat(input->strings_read, Equals(vector<string>({ " + abc.d)" })));
AssertThat(input->strings_read, Equals(vector<string>({ " + abc.d)", "" })));
});
});
@ -277,7 +277,7 @@ describe("Parser", [&]() {
"(number) "
"(math_op (number) (math_op (number) (identifier)))))))");
AssertThat(input->strings_read, Equals(vector<string>({ "123 || 5 +" })));
AssertThat(input->strings_read, Equals(vector<string>({ "123 || 5 +", "" })));
});
});
@ -415,16 +415,20 @@ describe("Parser", [&]() {
string text = dedent(R"PYTHON(
if a:
print b
return c
)PYTHON");
set_text(text);
assert_root_node("(module "
"(if_statement (identifier) "
"(print_statement (identifier))) "
"(return_statement (expression_list (identifier))))");
replace_text(text.find("return"), 0, " ");
assert_root_node("(module "
"(if_statement (identifier) "
"(print_statement (identifier)) "
"(return_statement (expression_list (identifier)))))");
});
});

View file

@ -12,8 +12,8 @@ class Symbol : public Rule {
typedef int Index;
typedef enum {
Terminal,
External,
Terminal,
NonTerminal,
} Type;

View file

@ -19,6 +19,10 @@ void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol, TableEntry
TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol);
static inline bool ts_language_is_symbol_external(const TSLanguage *self, TSSymbol symbol) {
return 0 < symbol && symbol < self->external_token_count + 1;
}
static inline const TSParseAction *ts_language_actions(const TSLanguage *self,
TSStateId state,
TSSymbol symbol,
@ -52,7 +56,11 @@ static inline TSStateId ts_language_next_state(const TSLanguage *self,
static inline const bool *
ts_language_enabled_external_tokens(const TSLanguage *self,
unsigned external_scanner_state) {
return self->external_token_lists + self->external_token_count * external_scanner_state;
if (external_scanner_state == 0) {
return NULL;
} else {
return self->external_token_lists + self->external_token_count * external_scanner_state;
}
}
#ifdef __cplusplus

View file

@ -145,7 +145,6 @@ static bool parser__breakdown_lookahead(Parser *self, Tree **lookahead,
}
if (result) {
LOG("lookahead sym:%s", SYM_NAME(reusable_node->tree->symbol));
ts_tree_release(*lookahead);
ts_tree_retain(*lookahead = reusable_node->tree);
}
@ -161,7 +160,11 @@ static void parser__pop_reusable_node_leaf(ReusableNode *reusable_node) {
static bool parser__can_reuse(Parser *self, TSStateId state, Tree *tree,
TableEntry *table_entry) {
if (tree->first_leaf.lex_state == self->language->lex_modes[state].lex_state)
TSLexMode current_lex_mode = self->language->lex_modes[state];
if (ts_language_is_symbol_external(self->language, tree->first_leaf.symbol)) return false;
if (tree->size.bytes == 0) return false;
if (tree->first_leaf.lex_mode.lex_state == current_lex_mode.lex_state &&
tree->first_leaf.lex_mode.external_tokens == current_lex_mode.external_tokens)
return true;
if (!table_entry->is_reusable)
return false;
@ -208,58 +211,92 @@ static bool parser__condense_stack(Parser *self) {
return result;
}
static bool parser__try_lex(Parser *self, TSLexMode lex_mode) {
Length start_position = self->lexer.current_position;
ts_lexer_start(&self->lexer);
if (lex_mode.external_tokens) {
const bool *external_tokens = ts_language_enabled_external_tokens(
self->language,
lex_mode.external_tokens
);
LOG("lex external:%d, pos:%u",
lex_mode.external_tokens,
self->lexer.current_position.chars
);
if (self->language->external_scanner.scan(
self->external_scanner_payload,
&self->lexer.data,
external_tokens
)) {
self->lexer.data.result_symbol = self->language->external_token_symbol_map[self->lexer.data.result_symbol];
return true;
} else {
ts_lexer_reset(&self->lexer, start_position);
ts_lexer_start(&self->lexer);
static StackIterateAction parser__restore_external_scanner_callback(
void *payload, TSStateId state, TreeArray *trees, uint32_t tree_count,
bool is_done, bool is_pending) {
Parser *self = payload;
if (tree_count > 0) {
Tree *tree = *array_back(trees);
if (tree->has_external_token_state && tree->child_count == 0) {
self->language->external_scanner.deserialize(self->external_scanner_payload, tree->external_token_state);
return StackIterateStop;
}
} else if (is_done) {
self->language->external_scanner.reset(self->external_scanner_payload);
return StackIterateStop;
}
LOG("lex state:%d, pos:%u", lex_mode.lex_state, self->lexer.current_position.chars);
return self->language->lex_fn(&self->lexer.data, lex_mode.lex_state);
return StackIterateNone;
}
static Tree *parser__lex(Parser *self, TSStateId parse_state) {
TSLexMode lex_mode = self->language->lex_modes[parse_state];
TSStateId start_state = lex_mode.lex_state;
Length start_position = self->lexer.current_position;
static void parser__restore_external_scanner(Parser *self, StackVersion version) {
StackPopResult pop = ts_stack_iterate(self->stack, version, parser__restore_external_scanner_callback, self);
if (pop.slices.size > 0) {
StackSlice slice = pop.slices.contents[0];
for (size_t i = 1; i < slice.trees.size; i++) {
Tree *tree = slice.trees.contents[i];
if (tree->has_external_tokens) {
printf("RE-SCANNING TREE: %s\n", ts_tree_string(tree, self->language, true));
}
}
ts_tree_array_delete(&slice.trees);
}
}
static Tree *parser__lex(Parser *self, StackVersion version) {
TSStateId parse_state = ts_stack_top_state(self->stack, version);
Length start_position = ts_stack_top_position(self->stack, version);
TSLexMode lex_mode = self->language->lex_modes[parse_state];
const bool *external_tokens = ts_language_enabled_external_tokens(
self->language,
lex_mode.external_tokens
);
bool found_external_token = false;
bool found_error = false;
bool skipped_error = false;
int32_t first_error_character = 0;
Length error_start_position, error_end_position;
ts_lexer_reset(&self->lexer, start_position);
for (;;) {
Length current_position = self->lexer.current_position;
if (external_tokens) {
LOG("lex_external state:%d, row:%u, column:%u", lex_mode.external_tokens,
current_position.extent.row, current_position.extent.column);
parser__restore_external_scanner(self, version);
ts_lexer_start(&self->lexer);
if (self->language->external_scanner.scan(self->external_scanner_payload,
&self->lexer.data, external_tokens)) {
found_external_token = true;
break;
}
ts_lexer_reset(&self->lexer, current_position);
}
LOG("lex_internal state:%d, row:%u, column:%u", lex_mode.lex_state,
current_position.extent.row, current_position.extent.column);
ts_lexer_start(&self->lexer);
if (self->language->lex_fn(&self->lexer.data, lex_mode.lex_state)) {
break;
}
while (!parser__try_lex(self, lex_mode)) {
if (!found_error) {
LOG("retry_in_error_mode");
found_error = true;
lex_mode = self->language->lex_modes[ERROR_STATE];
external_tokens = ts_language_enabled_external_tokens(
self->language,
lex_mode.external_tokens
);
ts_lexer_reset(&self->lexer, start_position);
continue;
}
if (!skipped_error) {
LOG("skip_unrecognized_character");
skipped_error = true;
error_start_position = self->lexer.token_start_position;
first_error_character = self->lexer.data.lookahead;
}
@ -272,7 +309,6 @@ static Tree *parser__lex(Parser *self, TSStateId parse_state) {
self->lexer.data.advance(&self->lexer, false);
}
skipped_error = true;
error_end_position = self->lexer.current_position;
}
@ -284,14 +320,26 @@ static Tree *parser__lex(Parser *self, TSStateId parse_state) {
result = ts_tree_make_error(size, padding, first_error_character);
} else {
TSSymbol symbol = self->lexer.data.result_symbol;
if (found_external_token) symbol = self->language->external_token_symbol_map[symbol];
Length padding = length_sub(self->lexer.token_start_position, start_position);
Length size = length_sub(self->lexer.current_position, self->lexer.token_start_position);
TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, symbol);
result = ts_tree_make_leaf(symbol, padding, size, metadata);
if (found_external_token) {
result->has_external_tokens = true;
if (self->language->external_scanner.serialize(self->external_scanner_payload, result->external_token_state)) {
result->has_external_token_state = true;
self->last_external_token = result;
}
}
}
result->parse_state = parse_state;
result->first_leaf.lex_state = start_state;
result->first_leaf.lex_mode = lex_mode;
LOG("lexed_lookahead sym:%s, size:%u", SYM_NAME(result->symbol), result->size.bytes);
return result;
}
@ -301,19 +349,18 @@ static void parser__clear_cached_token(Parser *self) {
}
static Tree *parser__get_lookahead(Parser *self, StackVersion version,
ReusableNode *reusable_node) {
ReusableNode *reusable_node,
bool *is_fresh) {
Length position = ts_stack_top_position(self->stack, version);
while (reusable_node->tree) {
if (reusable_node->byte_index > position.bytes) {
LOG("before_reusable sym:%s, pos:%u",
SYM_NAME(reusable_node->tree->symbol), reusable_node->byte_index);
LOG("before_reusable_node sym:%s", SYM_NAME(reusable_node->tree->symbol));
break;
}
if (reusable_node->byte_index < position.bytes) {
LOG("past_reusable sym:%s, pos:%u",
SYM_NAME(reusable_node->tree->symbol), reusable_node->byte_index);
LOG("past_reusable sym:%s", SYM_NAME(reusable_node->tree->symbol));
parser__pop_reusable_node(reusable_node);
continue;
}
@ -350,9 +397,8 @@ static Tree *parser__get_lookahead(Parser *self, StackVersion version,
return self->cached_token;
}
ts_lexer_reset(&self->lexer, position);
TSStateId parse_state = ts_stack_top_state(self->stack, version);
return parser__lex(self, parse_state);
*is_fresh = true;
return parser__lex(self, version);
}
static bool parser__select_tree(Parser *self, Tree *left, Tree *right) {
@ -977,30 +1023,29 @@ static void parser__recover(Parser *self, StackVersion version, TSStateId state,
static void parser__advance(Parser *self, StackVersion version,
ReusableNode *reusable_node) {
bool validated_lookahead = false;
Tree *lookahead = parser__get_lookahead(self, version, reusable_node);
Tree *lookahead = parser__get_lookahead(self, version, reusable_node, &validated_lookahead);
for (;;) {
TSStateId state = ts_stack_top_state(self->stack, version);
TableEntry table_entry;
ts_language_table_entry(self->language, state, lookahead->first_leaf.symbol,
&table_entry);
ts_language_table_entry(self->language, state, lookahead->first_leaf.symbol, &table_entry);
if (!validated_lookahead) {
if (!parser__can_reuse(self, state, lookahead, &table_entry)) {
if (lookahead == reusable_node->tree)
if (lookahead == reusable_node->tree) {
parser__pop_reusable_node_leaf(reusable_node);
else
} else {
parser__clear_cached_token(self);
}
ts_tree_release(lookahead);
lookahead = parser__get_lookahead(self, version, reusable_node);
lookahead = parser__get_lookahead(self, version, reusable_node, &validated_lookahead);
continue;
}
validated_lookahead = true;
LOG("lookahead sym:%s, size:%u", SYM_NAME(lookahead->symbol),
lookahead->size.bytes);
LOG("reused_lookahead sym:%s, size:%u", SYM_NAME(lookahead->symbol), lookahead->size.bytes);
}
bool reduction_stopped_at_error = false;
@ -1023,12 +1068,11 @@ static void parser__advance(Parser *self, StackVersion version,
}
if (lookahead->child_count > 0) {
if (parser__breakdown_lookahead(self, &lookahead, state,
reusable_node)) {
if (parser__breakdown_lookahead(self, &lookahead, state, reusable_node)) {
if (!parser__can_reuse(self, state, lookahead, &table_entry)) {
parser__pop_reusable_node(reusable_node);
ts_tree_release(lookahead);
lookahead = parser__get_lookahead(self, version, reusable_node);
lookahead = parser__get_lookahead(self, version, reusable_node, &validated_lookahead);
}
}
@ -1175,8 +1219,8 @@ Tree *parser_parse(Parser *self, TSInput input, Tree *old_tree) {
LOG("process version:%d, version_count:%u, state:%d, row:%u, col:%u",
version, ts_stack_version_count(self->stack),
ts_stack_top_state(self->stack, version),
ts_stack_top_position(self->stack, version).extent.row + 1,
ts_stack_top_position(self->stack, version).extent.column + 1);
ts_stack_top_position(self->stack, version).extent.row,
ts_stack_top_position(self->stack, version).extent.column);
parser__advance(self, version, &reusable_node);
LOG_STACK();

View file

@ -30,6 +30,7 @@ typedef struct {
TreePath tree_path1;
TreePath tree_path2;
void *external_scanner_payload;
Tree *last_external_token;
} Parser;
bool parser_init(Parser *);

View file

@ -25,10 +25,7 @@ Tree *ts_tree_make_leaf(TSSymbol sym, Length padding, Length size,
.visible = metadata.visible,
.named = metadata.named,
.has_changes = false,
.first_leaf = {
.symbol = sym,
.lex_state = 0
}
.first_leaf.symbol = sym,
};
return result;
}
@ -111,6 +108,8 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children) {
self->named_child_count = 0;
self->visible_child_count = 0;
self->error_cost = 0;
self->has_external_tokens = false;
self->has_external_token_state = false;
for (uint32_t i = 0; i < child_count; i++) {
Tree *child = children[i];
@ -133,6 +132,9 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children) {
self->named_child_count += child->named_child_count;
}
if (child->has_external_tokens) self->has_external_tokens = true;
if (child->has_external_token_state) self->has_external_token_state = true;
if (child->symbol == ts_builtin_sym_error) {
self->fragile_left = self->fragile_right = true;
self->parse_state = TS_TREE_STATE_NONE;

View file

@ -41,7 +41,7 @@ typedef struct Tree {
struct {
TSSymbol symbol;
TSStateId lex_state;
TSLexMode lex_mode;
} first_leaf;
unsigned short ref_count;
@ -51,6 +51,8 @@ typedef struct Tree {
bool fragile_left : 1;
bool fragile_right : 1;
bool has_changes : 1;
bool has_external_tokens : 1;
bool has_external_token_state : 1;
} Tree;
typedef struct {