Handle memory errors occurring in wasm scanners
* In WASM, use a custom, simple malloc implementation that lets us expicitly reset the heap with a new start location. * When a WASM call traps or errors, propagate that as a parse failure. * Reset the WASM heap after every parse. Co-authored-by: Conrad <conrad@zed.dev>
This commit is contained in:
parent
b19d85aca7
commit
7a9b3076ef
9 changed files with 1795 additions and 2384 deletions
|
|
@ -110,6 +110,7 @@ struct TSParser {
|
|||
Subtree old_tree;
|
||||
TSRangeArray included_range_differences;
|
||||
unsigned included_range_difference_index;
|
||||
bool has_scanner_error;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
|
|
@ -337,6 +338,22 @@ static bool ts_parser__better_version_exists(
|
|||
return false;
|
||||
}
|
||||
|
||||
static bool ts_parser__call_main_lex_fn(TSParser *self, TSLexMode lex_mode) {
|
||||
if (ts_language_is_wasm(self->language)) {
|
||||
return ts_wasm_store_call_lex_main(self->wasm_store, lex_mode.lex_state);
|
||||
} else {
|
||||
return self->language->lex_fn(&self->lexer.data, lex_mode.lex_state);
|
||||
}
|
||||
}
|
||||
|
||||
static bool ts_parser__call_keyword_lex_fn(TSParser *self, TSLexMode lex_mode) {
|
||||
if (ts_language_is_wasm(self->language)) {
|
||||
return ts_wasm_store_call_lex_keyword(self->wasm_store, 0);
|
||||
} else {
|
||||
return self->language->keyword_lex_fn(&self->lexer.data, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void ts_parser__external_scanner_create(
|
||||
TSParser *self
|
||||
) {
|
||||
|
|
@ -345,6 +362,9 @@ static void ts_parser__external_scanner_create(
|
|||
self->external_scanner_payload = (void *)(uintptr_t)ts_wasm_store_call_scanner_create(
|
||||
self->wasm_store
|
||||
);
|
||||
if (ts_wasm_store_has_error(self->wasm_store)) {
|
||||
self->has_scanner_error = true;
|
||||
}
|
||||
} else if (self->language->external_scanner.create) {
|
||||
self->external_scanner_payload = self->language->external_scanner.create();
|
||||
}
|
||||
|
|
@ -354,21 +374,17 @@ static void ts_parser__external_scanner_create(
|
|||
static void ts_parser__external_scanner_destroy(
|
||||
TSParser *self
|
||||
) {
|
||||
if (self->language && self->external_scanner_payload) {
|
||||
if (ts_language_is_wasm(self->language)) {
|
||||
if (self->wasm_store) {
|
||||
ts_wasm_store_call_scanner_destroy(
|
||||
self->wasm_store,
|
||||
(uintptr_t)self->external_scanner_payload
|
||||
);
|
||||
}
|
||||
} else if (self->language->external_scanner.destroy) {
|
||||
self->language->external_scanner.destroy(
|
||||
self->external_scanner_payload
|
||||
);
|
||||
}
|
||||
self->external_scanner_payload = NULL;
|
||||
if (
|
||||
self->language &&
|
||||
self->external_scanner_payload &&
|
||||
self->language->external_scanner.destroy &&
|
||||
!ts_language_is_wasm(self->language)
|
||||
) {
|
||||
self->language->external_scanner.destroy(
|
||||
self->external_scanner_payload
|
||||
);
|
||||
}
|
||||
self->external_scanner_payload = NULL;
|
||||
}
|
||||
|
||||
static unsigned ts_parser__external_scanner_serialize(
|
||||
|
|
@ -406,6 +422,9 @@ static void ts_parser__external_scanner_deserialize(
|
|||
data,
|
||||
length
|
||||
);
|
||||
if (ts_wasm_store_has_error(self->wasm_store)) {
|
||||
self->has_scanner_error = true;
|
||||
}
|
||||
} else {
|
||||
self->language->external_scanner.deserialize(
|
||||
self->external_scanner_payload,
|
||||
|
|
@ -419,13 +438,16 @@ static bool ts_parser__external_scanner_scan(
|
|||
TSParser *self,
|
||||
TSStateId external_lex_state
|
||||
) {
|
||||
|
||||
if (ts_language_is_wasm(self->language)) {
|
||||
return ts_wasm_store_call_scanner_scan(
|
||||
bool result = ts_wasm_store_call_scanner_scan(
|
||||
self->wasm_store,
|
||||
(uintptr_t)self->external_scanner_payload,
|
||||
external_lex_state * self->language->external_token_count
|
||||
);
|
||||
if (ts_wasm_store_has_error(self->wasm_store)) {
|
||||
self->has_scanner_error = true;
|
||||
}
|
||||
return result;
|
||||
} else {
|
||||
const bool *valid_external_tokens = ts_language_enabled_external_tokens(
|
||||
self->language,
|
||||
|
|
@ -514,6 +536,7 @@ static Subtree ts_parser__lex(
|
|||
ts_lexer_start(&self->lexer);
|
||||
ts_parser__external_scanner_deserialize(self, external_token);
|
||||
found_token = ts_parser__external_scanner_scan(self, lex_mode.external_lex_state);
|
||||
if (self->has_scanner_error) return NULL_SUBTREE;
|
||||
ts_lexer_finish(&self->lexer, &lookahead_end_byte);
|
||||
|
||||
if (found_token) {
|
||||
|
|
@ -564,11 +587,7 @@ static Subtree ts_parser__lex(
|
|||
current_position.extent.column
|
||||
);
|
||||
ts_lexer_start(&self->lexer);
|
||||
if (ts_language_is_wasm(self->language)) {
|
||||
found_token = ts_wasm_store_call_lex_main(self->wasm_store, lex_mode.lex_state);
|
||||
} else {
|
||||
found_token = self->language->lex_fn(&self->lexer.data, lex_mode.lex_state);
|
||||
}
|
||||
found_token = ts_parser__call_main_lex_fn(self, lex_mode);
|
||||
ts_lexer_finish(&self->lexer, &lookahead_end_byte);
|
||||
if (found_token) break;
|
||||
|
||||
|
|
@ -626,11 +645,7 @@ static Subtree ts_parser__lex(
|
|||
ts_lexer_reset(&self->lexer, self->lexer.token_start_position);
|
||||
ts_lexer_start(&self->lexer);
|
||||
|
||||
if (ts_language_is_wasm(self->language)) {
|
||||
is_keyword = ts_wasm_store_call_lex_keyword(self->wasm_store, 0);
|
||||
} else {
|
||||
is_keyword = self->language->keyword_lex_fn(&self->lexer.data, 0);
|
||||
}
|
||||
is_keyword = ts_parser__call_keyword_lex_fn(self, lex_mode);
|
||||
|
||||
if (
|
||||
is_keyword &&
|
||||
|
|
@ -1527,6 +1542,7 @@ static bool ts_parser__advance(
|
|||
if (needs_lex) {
|
||||
needs_lex = false;
|
||||
lookahead = ts_parser__lex(self, version, state);
|
||||
if (self->has_scanner_error) return false;
|
||||
|
||||
if (lookahead.ptr) {
|
||||
ts_parser__set_cached_token(self, position, last_external_token, lookahead);
|
||||
|
|
@ -1830,6 +1846,9 @@ TSParser *ts_parser_new(void) {
|
|||
self->dot_graph_file = NULL;
|
||||
self->cancellation_flag = NULL;
|
||||
self->timeout_duration = 0;
|
||||
self->language = NULL;
|
||||
self->has_scanner_error = false;
|
||||
self->external_scanner_payload = NULL;
|
||||
self->end_clock = clock_null();
|
||||
self->operation_count = 0;
|
||||
self->old_tree = NULL_SUBTREE;
|
||||
|
|
@ -1965,6 +1984,7 @@ void ts_parser_reset(TSParser *self) {
|
|||
self->finished_tree = NULL_SUBTREE;
|
||||
}
|
||||
self->accept_count = 0;
|
||||
self->has_scanner_error = false;
|
||||
}
|
||||
|
||||
TSTree *ts_parser_parse(
|
||||
|
|
@ -1972,18 +1992,15 @@ TSTree *ts_parser_parse(
|
|||
const TSTree *old_tree,
|
||||
TSInput input
|
||||
) {
|
||||
TSTree *result = NULL;
|
||||
if (!self->language || !input.read) return NULL;
|
||||
|
||||
if (ts_language_is_wasm(self->language)) {
|
||||
if (self->wasm_store) {
|
||||
ts_wasm_store_start(self->wasm_store, &self->lexer.data, self->language);
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
if (!self->wasm_store) return NULL;
|
||||
ts_wasm_store_start(self->wasm_store, &self->lexer.data, self->language);
|
||||
}
|
||||
|
||||
ts_lexer_set_input(&self->lexer, input);
|
||||
|
||||
array_clear(&self->included_range_differences);
|
||||
self->included_range_difference_index = 0;
|
||||
|
||||
|
|
@ -2035,7 +2052,11 @@ TSTree *ts_parser_parse(
|
|||
ts_stack_position(self->stack, version).extent.column
|
||||
);
|
||||
|
||||
if (!ts_parser__advance(self, version, allow_node_reuse)) return NULL;
|
||||
if (!ts_parser__advance(self, version, allow_node_reuse)) {
|
||||
if (self->has_scanner_error) goto exit;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
LOG_STACK();
|
||||
|
||||
position = ts_stack_position(self->stack, version).bytes;
|
||||
|
|
@ -2074,13 +2095,15 @@ TSTree *ts_parser_parse(
|
|||
LOG("done");
|
||||
LOG_TREE(self->finished_tree);
|
||||
|
||||
TSTree *result = ts_tree_new(
|
||||
result = ts_tree_new(
|
||||
self->finished_tree,
|
||||
self->language,
|
||||
self->lexer.included_ranges,
|
||||
self->lexer.included_range_count
|
||||
);
|
||||
self->finished_tree = NULL_SUBTREE;
|
||||
|
||||
exit:
|
||||
ts_parser_reset(self);
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue