Handle memory errors occurring in wasm scanners

* In WASM, use a custom, simple malloc implementation that lets us
  expicitly reset the heap with a new start location.
* When a WASM call traps or errors, propagate that as a parse failure.
* Reset the WASM heap after every parse.

Co-authored-by: Conrad <conrad@zed.dev>
This commit is contained in:
Max Brunsfeld 2024-03-13 23:34:17 -07:00
parent b19d85aca7
commit 7a9b3076ef
9 changed files with 1795 additions and 2384 deletions

View file

@ -81,7 +81,6 @@ fn record_alloc(ptr: *mut c_void) {
}
fn record_dealloc(ptr: *mut c_void) {
assert!(!ptr.is_null(), "Zero pointer deallocation!");
RECORDER.with(|recorder| {
if recorder.enabled.load(SeqCst) {
recorder

View file

@ -29,7 +29,71 @@ fn test_wasm_stdlib_symbols() {
}
#[test]
fn test_load_wasm_language() {
fn test_load_wasm_ruby_language() {
allocations::record(|| {
let mut store = WasmStore::new(ENGINE.clone()).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-ruby.wasm")).unwrap();
let language = store.load_language("ruby", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tree = parser.parse("class A; end", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(program (class name: (constant)))"
);
});
}
#[test]
fn test_load_wasm_html_language() {
allocations::record(|| {
let mut store = WasmStore::new(ENGINE.clone()).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-html.wasm")).unwrap();
let language = store.load_language("html", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tree = parser
.parse("<div><span></span><p></p></div>", None)
.unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(document (element (start_tag (tag_name)) (element (start_tag (tag_name)) (end_tag (tag_name))) (element (start_tag (tag_name)) (end_tag (tag_name))) (end_tag (tag_name))))"
);
});
}
#[test]
fn test_load_wasm_rust_language() {
allocations::record(|| {
let mut store = WasmStore::new(ENGINE.clone()).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap();
let language = store.load_language("rust", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tree = parser.parse("fn main() {}", None).unwrap();
assert_eq!(tree.root_node().to_sexp(), "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))");
});
}
#[test]
fn test_load_wasm_javascript_language() {
allocations::record(|| {
let mut store = WasmStore::new(ENGINE.clone()).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-javascript.wasm")).unwrap();
let language = store.load_language("javascript", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tree = parser.parse("const a = b\nconst c = d", None).unwrap();
assert_eq!(tree.root_node().to_sexp(), "(program (lexical_declaration (variable_declarator name: (identifier) value: (identifier))) (lexical_declaration (variable_declarator name: (identifier) value: (identifier))))");
});
}
#[test]
fn test_load_multiple_wasm_languages() {
allocations::record(|| {
let mut store = WasmStore::new(ENGINE.clone()).unwrap();
let mut parser = Parser::new();
@ -158,3 +222,28 @@ fn test_load_wasm_errors() {
);
});
}
#[test]
fn test_wasm_oom() {
allocations::record(|| {
let mut store = WasmStore::new(ENGINE.clone()).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-html.wasm")).unwrap();
let language = store.load_language("html", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tag_name = "a-b".repeat(2 * 1024 * 1024);
let code = format!("<{tag_name}>hello world</{tag_name}>");
assert!(parser.parse(&code, None).is_none());
let tag_name = "a-b".repeat(20);
let code = format!("<{tag_name}>hello world</{tag_name}>");
parser.set_language(&language).unwrap();
let tree = parser.parse(&code, None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(document (element (start_tag (tag_name)) (text) (end_tag (tag_name))))"
);
});
}

View file

@ -62,6 +62,7 @@ pub fn compile_language_to_wasm(
"__cxa_atexit",
"abort",
"emscripten_notify_memory_growth",
"tree_sitter_debug_message",
"proc_exit",
];

View file

@ -110,6 +110,7 @@ struct TSParser {
Subtree old_tree;
TSRangeArray included_range_differences;
unsigned included_range_difference_index;
bool has_scanner_error;
};
typedef struct {
@ -337,6 +338,22 @@ static bool ts_parser__better_version_exists(
return false;
}
static bool ts_parser__call_main_lex_fn(TSParser *self, TSLexMode lex_mode) {
if (ts_language_is_wasm(self->language)) {
return ts_wasm_store_call_lex_main(self->wasm_store, lex_mode.lex_state);
} else {
return self->language->lex_fn(&self->lexer.data, lex_mode.lex_state);
}
}
static bool ts_parser__call_keyword_lex_fn(TSParser *self, TSLexMode lex_mode) {
if (ts_language_is_wasm(self->language)) {
return ts_wasm_store_call_lex_keyword(self->wasm_store, 0);
} else {
return self->language->keyword_lex_fn(&self->lexer.data, 0);
}
}
static void ts_parser__external_scanner_create(
TSParser *self
) {
@ -345,6 +362,9 @@ static void ts_parser__external_scanner_create(
self->external_scanner_payload = (void *)(uintptr_t)ts_wasm_store_call_scanner_create(
self->wasm_store
);
if (ts_wasm_store_has_error(self->wasm_store)) {
self->has_scanner_error = true;
}
} else if (self->language->external_scanner.create) {
self->external_scanner_payload = self->language->external_scanner.create();
}
@ -354,21 +374,17 @@ static void ts_parser__external_scanner_create(
static void ts_parser__external_scanner_destroy(
TSParser *self
) {
if (self->language && self->external_scanner_payload) {
if (ts_language_is_wasm(self->language)) {
if (self->wasm_store) {
ts_wasm_store_call_scanner_destroy(
self->wasm_store,
(uintptr_t)self->external_scanner_payload
);
}
} else if (self->language->external_scanner.destroy) {
self->language->external_scanner.destroy(
self->external_scanner_payload
);
}
self->external_scanner_payload = NULL;
if (
self->language &&
self->external_scanner_payload &&
self->language->external_scanner.destroy &&
!ts_language_is_wasm(self->language)
) {
self->language->external_scanner.destroy(
self->external_scanner_payload
);
}
self->external_scanner_payload = NULL;
}
static unsigned ts_parser__external_scanner_serialize(
@ -406,6 +422,9 @@ static void ts_parser__external_scanner_deserialize(
data,
length
);
if (ts_wasm_store_has_error(self->wasm_store)) {
self->has_scanner_error = true;
}
} else {
self->language->external_scanner.deserialize(
self->external_scanner_payload,
@ -419,13 +438,16 @@ static bool ts_parser__external_scanner_scan(
TSParser *self,
TSStateId external_lex_state
) {
if (ts_language_is_wasm(self->language)) {
return ts_wasm_store_call_scanner_scan(
bool result = ts_wasm_store_call_scanner_scan(
self->wasm_store,
(uintptr_t)self->external_scanner_payload,
external_lex_state * self->language->external_token_count
);
if (ts_wasm_store_has_error(self->wasm_store)) {
self->has_scanner_error = true;
}
return result;
} else {
const bool *valid_external_tokens = ts_language_enabled_external_tokens(
self->language,
@ -514,6 +536,7 @@ static Subtree ts_parser__lex(
ts_lexer_start(&self->lexer);
ts_parser__external_scanner_deserialize(self, external_token);
found_token = ts_parser__external_scanner_scan(self, lex_mode.external_lex_state);
if (self->has_scanner_error) return NULL_SUBTREE;
ts_lexer_finish(&self->lexer, &lookahead_end_byte);
if (found_token) {
@ -564,11 +587,7 @@ static Subtree ts_parser__lex(
current_position.extent.column
);
ts_lexer_start(&self->lexer);
if (ts_language_is_wasm(self->language)) {
found_token = ts_wasm_store_call_lex_main(self->wasm_store, lex_mode.lex_state);
} else {
found_token = self->language->lex_fn(&self->lexer.data, lex_mode.lex_state);
}
found_token = ts_parser__call_main_lex_fn(self, lex_mode);
ts_lexer_finish(&self->lexer, &lookahead_end_byte);
if (found_token) break;
@ -626,11 +645,7 @@ static Subtree ts_parser__lex(
ts_lexer_reset(&self->lexer, self->lexer.token_start_position);
ts_lexer_start(&self->lexer);
if (ts_language_is_wasm(self->language)) {
is_keyword = ts_wasm_store_call_lex_keyword(self->wasm_store, 0);
} else {
is_keyword = self->language->keyword_lex_fn(&self->lexer.data, 0);
}
is_keyword = ts_parser__call_keyword_lex_fn(self, lex_mode);
if (
is_keyword &&
@ -1527,6 +1542,7 @@ static bool ts_parser__advance(
if (needs_lex) {
needs_lex = false;
lookahead = ts_parser__lex(self, version, state);
if (self->has_scanner_error) return false;
if (lookahead.ptr) {
ts_parser__set_cached_token(self, position, last_external_token, lookahead);
@ -1830,6 +1846,9 @@ TSParser *ts_parser_new(void) {
self->dot_graph_file = NULL;
self->cancellation_flag = NULL;
self->timeout_duration = 0;
self->language = NULL;
self->has_scanner_error = false;
self->external_scanner_payload = NULL;
self->end_clock = clock_null();
self->operation_count = 0;
self->old_tree = NULL_SUBTREE;
@ -1965,6 +1984,7 @@ void ts_parser_reset(TSParser *self) {
self->finished_tree = NULL_SUBTREE;
}
self->accept_count = 0;
self->has_scanner_error = false;
}
TSTree *ts_parser_parse(
@ -1972,18 +1992,15 @@ TSTree *ts_parser_parse(
const TSTree *old_tree,
TSInput input
) {
TSTree *result = NULL;
if (!self->language || !input.read) return NULL;
if (ts_language_is_wasm(self->language)) {
if (self->wasm_store) {
ts_wasm_store_start(self->wasm_store, &self->lexer.data, self->language);
} else {
return NULL;
}
if (!self->wasm_store) return NULL;
ts_wasm_store_start(self->wasm_store, &self->lexer.data, self->language);
}
ts_lexer_set_input(&self->lexer, input);
array_clear(&self->included_range_differences);
self->included_range_difference_index = 0;
@ -2035,7 +2052,11 @@ TSTree *ts_parser_parse(
ts_stack_position(self->stack, version).extent.column
);
if (!ts_parser__advance(self, version, allow_node_reuse)) return NULL;
if (!ts_parser__advance(self, version, allow_node_reuse)) {
if (self->has_scanner_error) goto exit;
return NULL;
}
LOG_STACK();
position = ts_stack_position(self->stack, version).bytes;
@ -2074,13 +2095,15 @@ TSTree *ts_parser_parse(
LOG("done");
LOG_TREE(self->finished_tree);
TSTree *result = ts_tree_new(
result = ts_tree_new(
self->finished_tree,
self->language,
self->lexer.included_ranges,
self->lexer.included_range_count
);
self->finished_tree = NULL_SUBTREE;
exit:
ts_parser_reset(self);
return result;
}

109
lib/src/wasm/stdlib.c Normal file
View file

@ -0,0 +1,109 @@
// This file implements a very simple allocator for external scanners running
// in WASM. Allocation is just bumping a static pointer and growing the heap
// as needed, and freeing is mostly a noop. But in the special case of freeing
// the last-allocated pointer, we'll reuse that pointer again.
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
extern void tree_sitter_debug_message(const char *, size_t);
#define PAGESIZE 0x10000
#define MAX_HEAP_SIZE (4 * 1024 * 1024)
typedef struct {
size_t size;
char data[0];
} Region;
static Region *heap_end = NULL;
static Region *heap_start = NULL;
static Region *next = NULL;
// Get the region metadata for the given heap pointer.
static inline Region *region_for_ptr(void *ptr) {
return ((Region *)ptr) - 1;
}
// Get the location of the next region after the given region,
// if the given region had the given size.
static inline Region *region_after(Region *self, size_t len) {
char *address = self->data + len;
char *aligned = (char *)((uintptr_t)(address + 3) & ~0x3);
return (Region *)aligned;
}
static void *get_heap_end() {
return (void *)(__builtin_wasm_memory_size(0) * PAGESIZE);
}
static int grow_heap(size_t size) {
size_t new_page_count = ((size - 1) / PAGESIZE) + 1;
return __builtin_wasm_memory_grow(0, new_page_count) != SIZE_MAX;
}
// Clear out the heap, and move it to the given address.
void reset_heap(void *new_heap_start) {
heap_start = new_heap_start;
next = new_heap_start;
heap_end = get_heap_end();
}
void *malloc(size_t size) {
Region *region_end = region_after(next, size);
if (region_end > heap_end) {
if ((char *)region_end - (char *)heap_start > MAX_HEAP_SIZE) {
return NULL;
}
if (!grow_heap(size)) return NULL;
heap_end = get_heap_end();
}
void *result = &next->data;
next->size = size;
next = region_end;
return result;
}
void free(void *ptr) {
if (ptr == NULL) return;
Region *region = region_for_ptr(ptr);
Region *region_end = region_after(region, region->size);
// When freeing the last allocated pointer, re-use that
// pointer for the next allocation.
if (region_end == next) {
next = region;
}
}
void *calloc(size_t count, size_t size) {
void *result = malloc(count * size);
memset(result, 0, count * size);
return result;
}
void *realloc(void *ptr, size_t new_size) {
if (ptr == NULL) {
return malloc(new_size);
}
Region *region = region_for_ptr(ptr);
Region *region_end = region_after(region, region->size);
// When reallocating the last allocated region, return
// the same pointer, and skip copying the data.
if (region_end == next) {
next = region;
return malloc(new_size);
}
void *result = malloc(new_size);
memcpy(result, &region->data, region->size);
return result;
}

File diff suppressed because it is too large Load diff

View file

@ -15,14 +15,14 @@
#include "./wasm_store.h"
#include "./wasm/wasm-stdlib.h"
#define array_len(a) (sizeof(a) / sizeof(a[0]))
// The following symbols from the C and C++ standard libraries are available
// for external scanners to use.
const char *STDLIB_SYMBOLS[] = {
#include "./stdlib-symbols.txt"
};
#define STDLIB_SYMBOL_COUNT (sizeof(STDLIB_SYMBOLS) / sizeof(STDLIB_SYMBOLS[0]))
// The contents of the `dylink.0` custom section of a wasm module,
// as specified by the current WebAssembly dynamic linking ABI proposal.
typedef struct {
@ -70,6 +70,18 @@ typedef struct {
int32_t scanner_scan_fn_index;
} LanguageWasmInstance;
typedef struct {
uint32_t reset_heap;
uint32_t proc_exit;
uint32_t abort;
uint32_t assert_fail;
uint32_t notify_memory_growth;
uint32_t debug_message;
uint32_t at_exit;
uint32_t args_get;
uint32_t args_sizes_get;
} BuiltinFunctionIndices;
// TSWasmStore - A struct that allows a given `Parser` to use wasm-backed
// languages. This struct is mutable, and can only be used by one parser at a
// time.
@ -82,11 +94,15 @@ struct TSWasmStore {
LanguageWasmInstance *current_instance;
Array(LanguageWasmInstance) language_instances;
uint32_t current_memory_offset;
uint32_t current_memory_size;
uint32_t current_function_table_offset;
uint16_t *fn_indices;
uint32_t *stdlib_fn_indices;
BuiltinFunctionIndices builtin_fn_indices;
wasmtime_global_t stack_pointer_global;
wasm_globaltype_t *const_i32_type;
wasm_globaltype_t *var_i32_type;
bool has_error;
uint32_t lexer_address;
uint32_t serialization_buffer_address;
};
typedef Array(char) StringData;
@ -147,29 +163,8 @@ typedef struct {
static volatile uint32_t NEXT_LANGUAGE_ID;
// Linear memory layout:
// [ <-- stack | built-in data | heap --> | static data ]
#define STACK_SIZE (64 * 1024)
#define HEAP_SIZE (1024 * 1024)
#define INITIAL_MEMORY_SIZE (4 * 1024 * 1024 / MEMORY_PAGE_SIZE)
#define MAX_MEMORY_SIZE 32768
#define SERIALIZATION_BUFFER_ADDRESS (STACK_SIZE)
#define LEXER_ADDRESS (SERIALIZATION_BUFFER_ADDRESS + TREE_SITTER_SERIALIZATION_BUFFER_SIZE)
#define HEAP_START_ADDRESS (LEXER_ADDRESS + sizeof(LexerInWasmMemory))
#define DATA_START_ADDRESS (HEAP_START_ADDRESS + HEAP_SIZE)
enum FunctionIx {
NULL_IX = 0,
PROC_EXIT_IX,
ABORT_IX,
ASSERT_FAIL_IX,
NOTIFY_MEMORY_GROWTH_IX,
AT_EXIT_IX,
LEXER_ADVANCE_IX,
LEXER_MARK_END_IX,
LEXER_GET_COLUMN_IX,
LEXER_IS_AT_INCLUDED_RANGE_START_IX,
LEXER_EOF_IX,
};
// [ <-- stack | stdlib statics | lexer | serialization_buffer | language statics --> | heap --> ]
#define MAX_MEMORY_SIZE (128 * 1024 * 1024 / MEMORY_PAGE_SIZE)
/************************
* WasmDylinkMemoryInfo
@ -247,14 +242,13 @@ static bool wasm_dylink_info__parse(
* Native callbacks exposed to wasm modules
*******************************************/
static wasm_trap_t *callback__exit(
static wasm_trap_t *callback__abort(
void *env,
wasmtime_caller_t* caller,
wasmtime_val_raw_t *args_and_results,
size_t args_and_results_len
) {
fprintf(stderr, "wasm module called exit");
abort();
return wasmtime_trap_new("wasm module called abort", 24);
}
static wasm_trap_t *callback__notify_memory_growth(
@ -263,11 +257,26 @@ static wasm_trap_t *callback__notify_memory_growth(
wasmtime_val_raw_t *args_and_results,
size_t args_and_results_len
) {
fprintf(stderr, "wasm module called exit");
abort();
return NULL;
}
static wasm_trap_t *callback__at_exit(
static wasm_trap_t *callback__debug_message(
void *env,
wasmtime_caller_t* caller,
wasmtime_val_raw_t *args_and_results,
size_t args_and_results_len
) {
wasmtime_context_t *context = wasmtime_caller_context(caller);
TSWasmStore *store = env;
assert(args_and_results_len == 2);
uint32_t string_address = args_and_results[0].i32;
uint32_t value = args_and_results[1].i32;
uint8_t *memory = wasmtime_memory_data(context, &store->memory);
printf("DEBUG: %s %u\n", &memory[string_address], value);
return NULL;
}
static wasm_trap_t *callback__noop(
void *env,
wasmtime_caller_t* caller,
wasmtime_val_raw_t *args_and_results,
@ -291,7 +300,7 @@ static wasm_trap_t *callback__lexer_advance(
lexer->advance(lexer, skip);
uint8_t *memory = wasmtime_memory_data(context, &store->memory);
memcpy(&memory[LEXER_ADDRESS], &lexer->lookahead, sizeof(lexer->lookahead));
memcpy(&memory[store->lexer_address], &lexer->lookahead, sizeof(lexer->lookahead));
return NULL;
}
@ -347,12 +356,11 @@ static wasm_trap_t *callback__lexer_eof(
}
typedef struct {
uint32_t *storage_location;
wasmtime_func_unchecked_callback_t callback;
wasm_functype_t *type;
} FunctionDefinition;
#define array_len(a) (sizeof(a) / sizeof(a[0]))
static void *copy(const void *data, size_t size) {
void *result = ts_malloc(size);
memcpy(result, data, size);
@ -427,17 +435,6 @@ static inline wasm_functype_t* wasm_functype_new_4_0(
return wasm_functype_new(&params, &results);
}
static wasmtime_extern_t get_builtin_func_extern(
wasmtime_context_t *context,
wasmtime_table_t *table,
unsigned index
) {
wasmtime_val_t val;
bool exists = wasmtime_table_get(context, table, index, &val);
assert(exists);
return (wasmtime_extern_t) {.kind = WASMTIME_EXTERN_FUNC, .of.func = val.of.funcref};
}
#define format(output, ...) \
do { \
size_t message_length = snprintf((char *)NULL, 0, __VA_ARGS__); \
@ -463,6 +460,19 @@ void language_id_delete(WasmLanguageId *self) {
}
}
static wasmtime_extern_t get_builtin_extern(
wasmtime_table_t *table,
unsigned index
) {
return (wasmtime_extern_t) {
.kind = WASMTIME_EXTERN_FUNC,
.of.func = (wasmtime_func_t) {
.store_id = table->store_id,
.index = index
}
};
}
static bool ts_wasm_store__provide_builtin_import(
TSWasmStore *self,
const wasm_name_t *import_name,
@ -484,18 +494,8 @@ static bool ts_wasm_store__provide_builtin_import(
error = wasmtime_global_new(context, self->const_i32_type, &value, &global);
assert(!error);
*import = (wasmtime_extern_t) {.kind = WASMTIME_EXTERN_GLOBAL, .of.global = global};
} else if (name_eq(import_name, "__heap_base")) {
wasmtime_val_t value = WASM_I32_VAL(HEAP_START_ADDRESS);
wasmtime_global_t global;
error = wasmtime_global_new(context, self->var_i32_type, &value, &global);
assert(!error);
*import = (wasmtime_extern_t) {.kind = WASMTIME_EXTERN_GLOBAL, .of.global = global};
} else if (name_eq(import_name, "__stack_pointer")) {
wasmtime_val_t value = WASM_I32_VAL(STACK_SIZE);
wasmtime_global_t global;
error = wasmtime_global_new(context, self->var_i32_type, &value, &global);
assert(!error);
*import = (wasmtime_extern_t) {.kind = WASMTIME_EXTERN_GLOBAL, .of.global = global};
*import = (wasmtime_extern_t) {.kind = WASMTIME_EXTERN_GLOBAL, .of.global = self->stack_pointer_global};
} else if (name_eq(import_name, "__indirect_function_table")) {
*import = (wasmtime_extern_t) {.kind = WASMTIME_EXTERN_TABLE, .of.table = self->function_table};
} else if (name_eq(import_name, "memory")) {
@ -504,15 +504,21 @@ static bool ts_wasm_store__provide_builtin_import(
// Builtin functions
else if (name_eq(import_name, "__assert_fail")) {
*import = get_builtin_func_extern(context, &self->function_table, ASSERT_FAIL_IX);
*import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.assert_fail);
} else if (name_eq(import_name, "__cxa_atexit")) {
*import = get_builtin_func_extern(context, &self->function_table, AT_EXIT_IX);
*import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.at_exit);
} else if (name_eq(import_name, "args_get")) {
*import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.args_get);
} else if (name_eq(import_name, "args_sizes_get")) {
*import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.args_sizes_get);
} else if (name_eq(import_name, "abort")) {
*import = get_builtin_func_extern(context, &self->function_table, ABORT_IX);
*import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.abort);
} else if (name_eq(import_name, "proc_exit")) {
*import = get_builtin_func_extern(context, &self->function_table, PROC_EXIT_IX);
*import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.proc_exit);
} else if (name_eq(import_name, "emscripten_notify_memory_growth")) {
*import = get_builtin_func_extern(context, &self->function_table, NOTIFY_MEMORY_GROWTH_IX);
*import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.notify_memory_growth);
} else if (name_eq(import_name, "tree_sitter_debug_message")) {
*import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.debug_message);
} else {
return false;
}
@ -528,7 +534,8 @@ static bool ts_wasm_store__call_module_initializer(
) {
if (
name_eq(export_name, "_initialize") ||
name_eq(export_name, "__wasm_apply_data_relocs")
name_eq(export_name, "__wasm_apply_data_relocs") ||
name_eq(export_name, "__wasm_call_ctors")
) {
wasmtime_context_t *context = wasmtime_store_context(self->store);
wasmtime_func_t initialization_func = export->of.func;
@ -551,7 +558,7 @@ TSWasmStore *ts_wasm_store_new(TSWasmEngine *engine, TSWasmError *wasm_error) {
wasmtime_extern_t *imports = NULL;
// Initialize store's memory
wasm_limits_t memory_limits = {.min = INITIAL_MEMORY_SIZE, .max = MAX_MEMORY_SIZE};
wasm_limits_t memory_limits = {.min = 4, .max = MAX_MEMORY_SIZE};
wasm_memorytype_t *memory_type = wasm_memorytype_new(&memory_limits);
wasmtime_memory_t memory;
error = wasmtime_memory_new(context, memory_type, &memory);
@ -567,40 +574,11 @@ TSWasmStore *ts_wasm_store_new(TSWasmEngine *engine, TSWasmError *wasm_error) {
}
wasm_memorytype_delete(memory_type);
// Initialize lexer struct with function pointers in wasm memory.
uint8_t *memory_data = wasmtime_memory_data(context, &memory);
LexerInWasmMemory lexer = {
.lookahead = 0,
.result_symbol = 0,
.advance = LEXER_ADVANCE_IX,
.mark_end = LEXER_MARK_END_IX,
.get_column = LEXER_GET_COLUMN_IX,
.is_at_included_range_start = LEXER_IS_AT_INCLUDED_RANGE_START_IX,
.eof = LEXER_EOF_IX,
};
memcpy(&memory_data[LEXER_ADDRESS], &lexer, sizeof(lexer));
// Define builtin functions.
FunctionDefinition definitions[] = {
[NULL_IX] = {NULL, NULL},
[PROC_EXIT_IX] = {callback__exit, wasm_functype_new_1_0(wasm_valtype_new_i32())},
[ABORT_IX] = {callback__exit, wasm_functype_new_0_0()},
[ASSERT_FAIL_IX] = {callback__exit, wasm_functype_new_4_0(wasm_valtype_new_i32(), wasm_valtype_new_i32(), wasm_valtype_new_i32(), wasm_valtype_new_i32())},
[NOTIFY_MEMORY_GROWTH_IX] = {callback__notify_memory_growth, wasm_functype_new_1_0(wasm_valtype_new_i32())},
[AT_EXIT_IX] = {callback__at_exit, wasm_functype_new_3_1(wasm_valtype_new_i32(), wasm_valtype_new_i32(), wasm_valtype_new_i32(), wasm_valtype_new_i32())},
[LEXER_ADVANCE_IX] = {callback__lexer_advance, wasm_functype_new_2_0(wasm_valtype_new_i32(), wasm_valtype_new_i32())},
[LEXER_MARK_END_IX] = {callback__lexer_mark_end, wasm_functype_new_1_0(wasm_valtype_new_i32())},
[LEXER_GET_COLUMN_IX] = {callback__lexer_get_column, wasm_functype_new_1_1(wasm_valtype_new_i32(), wasm_valtype_new_i32())},
[LEXER_IS_AT_INCLUDED_RANGE_START_IX] = {callback__lexer_is_at_included_range_start, wasm_functype_new_1_1(wasm_valtype_new_i32(), wasm_valtype_new_i32())},
[LEXER_EOF_IX] = {callback__lexer_eof, wasm_functype_new_1_1(wasm_valtype_new_i32(), wasm_valtype_new_i32())},
};
unsigned definitions_len = array_len(definitions);
// Add builtin functions to the store's function table.
wasmtime_table_t function_table;
wasm_limits_t table_limits = {.min = definitions_len, .max = wasm_limits_max_default};
// Initialize store's function table
wasm_limits_t table_limits = {.min = 1, .max = wasm_limits_max_default};
wasm_tabletype_t *table_type = wasm_tabletype_new(wasm_valtype_new(WASM_FUNCREF), &table_limits);
wasmtime_val_t initializer = {.kind = WASMTIME_FUNCREF};
wasmtime_table_t function_table;
error = wasmtime_table_new(context, table_type, &initializer, &function_table);
if (error) {
wasmtime_error_message(error, &message);
@ -614,48 +592,124 @@ TSWasmStore *ts_wasm_store_new(TSWasmEngine *engine, TSWasmError *wasm_error) {
}
wasm_tabletype_delete(table_type);
uint32_t prev_size;
error = wasmtime_table_grow(context, &function_table, definitions_len, &initializer, &prev_size);
if (error) {
wasmtime_error_message(error, &message);
wasm_error->kind = TSWasmErrorKindAllocate;
format(
&wasm_error->message,
"failed to grow wasm table to initial size: %.*s",
(int)message.size, message.data
);
goto error;
}
unsigned stdlib_symbols_len = array_len(STDLIB_SYMBOLS);
for (unsigned i = 1; i < definitions_len; i++) {
FunctionDefinition *definition = &definitions[i];
wasmtime_func_t func;
wasmtime_func_new_unchecked(context, definition->type, definition->callback, self, NULL, &func);
wasmtime_val_t func_val = {.kind = WASMTIME_FUNCREF, .of.funcref = func};
error = wasmtime_table_set(context, &function_table, i, &func_val);
assert(!error);
wasm_functype_delete(definition->type);
}
// Define globals for the stack and heap start addresses.
wasm_globaltype_t *const_i32_type = wasm_globaltype_new(wasm_valtype_new_i32(), WASM_CONST);
wasm_globaltype_t *var_i32_type = wasm_globaltype_new(wasm_valtype_new_i32(), WASM_VAR);
wasmtime_val_t stack_pointer_value = WASM_I32_VAL(0);
wasmtime_global_t stack_pointer_global;
error = wasmtime_global_new(context, var_i32_type, &stack_pointer_value, &stack_pointer_global);
assert(!error);
*self = (TSWasmStore) {
.store = store,
.engine = engine,
.store = store,
.memory = memory,
.language_instances = array_new(),
.function_table = function_table,
.language_instances = array_new(),
.stdlib_fn_indices = ts_calloc(stdlib_symbols_len, sizeof(uint32_t)),
.stack_pointer_global = stack_pointer_global,
.current_memory_offset = 0,
.fn_indices = ts_calloc(STDLIB_SYMBOL_COUNT, sizeof(uint16_t)),
.current_memory_size = 64 * MEMORY_PAGE_SIZE,
.current_function_table_offset = definitions_len,
.const_i32_type = wasm_globaltype_new(wasm_valtype_new_i32(), WASM_CONST),
.var_i32_type = wasm_globaltype_new(wasm_valtype_new_i32(), WASM_VAR),
.current_function_table_offset = 0,
.const_i32_type = const_i32_type,
.var_i32_type = var_i32_type,
};
WasmDylinkInfo dylink_info;
if (!wasm_dylink_info__parse(STDLIB_WASM, STDLIB_WASM_LEN, &dylink_info)) {
wasm_error->kind = TSWasmErrorKindParse;
format(&wasm_error->message, "failed to parse wasm stdlib");
goto error;
// Define lexer callback functions.
LexerInWasmMemory lexer = {
.lookahead = 0,
.result_symbol = 0,
};
FunctionDefinition lexer_definitions[] = {
{
(uint32_t *)&lexer.advance,
callback__lexer_advance,
wasm_functype_new_2_0(wasm_valtype_new_i32(), wasm_valtype_new_i32())
},
{
(uint32_t *)&lexer.mark_end,
callback__lexer_mark_end,
wasm_functype_new_1_0(wasm_valtype_new_i32())
},
{
(uint32_t *)&lexer.get_column,
callback__lexer_get_column,
wasm_functype_new_1_1(wasm_valtype_new_i32(), wasm_valtype_new_i32())
},
{
(uint32_t *)&lexer.is_at_included_range_start,
callback__lexer_is_at_included_range_start,
wasm_functype_new_1_1(wasm_valtype_new_i32(), wasm_valtype_new_i32())
},
{
(uint32_t *)&lexer.eof,
callback__lexer_eof,
wasm_functype_new_1_1(wasm_valtype_new_i32(), wasm_valtype_new_i32())
},
};
// Define builtin functions used by scanners.
FunctionDefinition builtin_definitions[] = {
{
&self->builtin_fn_indices.proc_exit,
callback__abort,
wasm_functype_new_1_0(wasm_valtype_new_i32())
},
{
&self->builtin_fn_indices.abort,
callback__abort,
wasm_functype_new_0_0()
},
{
&self->builtin_fn_indices.assert_fail,
callback__abort,
wasm_functype_new_4_0(wasm_valtype_new_i32(), wasm_valtype_new_i32(), wasm_valtype_new_i32(), wasm_valtype_new_i32())
},
{
&self->builtin_fn_indices.notify_memory_growth,
callback__notify_memory_growth,
wasm_functype_new_1_0(wasm_valtype_new_i32())
},
{
&self->builtin_fn_indices.debug_message,
callback__debug_message,
wasm_functype_new_2_0(wasm_valtype_new_i32(), wasm_valtype_new_i32())
},
{
&self->builtin_fn_indices.at_exit,
callback__noop,
wasm_functype_new_3_1(wasm_valtype_new_i32(), wasm_valtype_new_i32(), wasm_valtype_new_i32(), wasm_valtype_new_i32())
},
{
&self->builtin_fn_indices.args_get,
callback__noop,
wasm_functype_new_2_1(wasm_valtype_new_i32(), wasm_valtype_new_i32(), wasm_valtype_new_i32())
},
{
&self->builtin_fn_indices.args_sizes_get,
callback__noop,
wasm_functype_new_2_1(wasm_valtype_new_i32(), wasm_valtype_new_i32(), wasm_valtype_new_i32())
},
};
// Create wasm functions.
unsigned builtin_definitions_len = array_len(builtin_definitions);
unsigned lexer_definitions_len = array_len(lexer_definitions);
for (unsigned i = 0; i < builtin_definitions_len; i++) {
FunctionDefinition *definition = &builtin_definitions[i];
wasmtime_func_t func;
wasmtime_func_new_unchecked(context, definition->type, definition->callback, self, NULL, &func);
*definition->storage_location = func.index;
wasm_functype_delete(definition->type);
}
for (unsigned i = 0; i < lexer_definitions_len; i++) {
FunctionDefinition *definition = &lexer_definitions[i];
wasmtime_func_t func;
wasmtime_func_new_unchecked(context, definition->type, definition->callback, self, NULL, &func);
*definition->storage_location = func.index;
wasm_functype_delete(definition->type);
}
wasmtime_module_t *stdlib_module;
@ -715,11 +769,8 @@ TSWasmStore *ts_wasm_store_new(TSWasmEngine *engine, TSWasmError *wasm_error) {
}
wasm_importtype_vec_delete(&import_types);
self->current_memory_offset = DATA_START_ADDRESS + dylink_info.memory_size;
self->current_function_table_offset += dylink_info.table_size;
for (unsigned i = 0; i < STDLIB_SYMBOL_COUNT; i++) {
self->fn_indices[i] = UINT16_MAX;
for (unsigned i = 0; i < stdlib_symbols_len; i++) {
self->stdlib_fn_indices[i] = UINT32_MAX;
}
// Process the stdlib module's exports.
@ -734,6 +785,12 @@ TSWasmStore *ts_wasm_store_new(TSWasmEngine *engine, TSWasmError *wasm_error) {
bool exists = wasmtime_instance_export_nth(context, &instance, i, &export_name, &name_len, &export);
assert(exists);
if (export.kind == WASMTIME_EXTERN_GLOBAL) {
if (name_eq(name, "__stack_pointer")) {
self->stack_pointer_global = export.of.global;
}
}
if (export.kind == WASMTIME_EXTERN_FUNC) {
if (ts_wasm_store__call_module_initializer(self, name, &export, &trap)) {
if (trap) {
@ -749,17 +806,31 @@ TSWasmStore *ts_wasm_store_new(TSWasmEngine *engine, TSWasmError *wasm_error) {
continue;
}
for (unsigned j = 0; j < array_len(STDLIB_SYMBOLS); j++) {
if (name_eq(name, "reset_heap")) {
self->builtin_fn_indices.reset_heap = export.of.func.index;
continue;
}
for (unsigned j = 0; j < stdlib_symbols_len; j++) {
if (name_eq(name, STDLIB_SYMBOLS[j])) {
self->fn_indices[j] = export.of.func.index;
self->stdlib_fn_indices[j] = export.of.func.index;
break;
}
}
}
}
for (unsigned i = 0; i < STDLIB_SYMBOL_COUNT; i++) {
if (self->fn_indices[i] == UINT16_MAX) {
if (self->builtin_fn_indices.reset_heap == UINT32_MAX) {
wasm_error->kind = TSWasmErrorKindInstantiate;
format(
&wasm_error->message,
"missing malloc reset function in wasm stdlib"
);
goto error;
}
for (unsigned i = 0; i < stdlib_symbols_len; i++) {
if (self->stdlib_fn_indices[i] == UINT32_MAX) {
wasm_error->kind = TSWasmErrorKindInstantiate;
format(
&wasm_error->message,
@ -771,6 +842,48 @@ TSWasmStore *ts_wasm_store_new(TSWasmEngine *engine, TSWasmError *wasm_error) {
}
wasm_exporttype_vec_delete(&export_types);
// Add lexer callback functions to the function table. Replace the func indices in the lexer
// struct with the function table indices which serve as function pointer addresses.
uint32_t prev_size;
error = wasmtime_table_grow(context, &function_table, lexer_definitions_len, &initializer, &prev_size);
if (error) {
wasmtime_error_message(error, &message);
wasm_error->kind = TSWasmErrorKindAllocate;
format(
&wasm_error->message,
"failed to grow wasm table to initial size: %.*s",
(int)message.size, message.data
);
goto error;
}
uint32_t table_index = prev_size;
for (unsigned i = 0; i < lexer_definitions_len; i++) {
FunctionDefinition *definition = &lexer_definitions[i];
wasmtime_func_t func = {function_table.store_id, *definition->storage_location};
wasmtime_val_t func_val = {.kind = WASMTIME_FUNCREF, .of.funcref = func};
error = wasmtime_table_set(context, &function_table, table_index, &func_val);
assert(!error);
*(int32_t *)(definition->storage_location) = table_index;
table_index++;
}
self->current_function_table_offset = table_index;
self->lexer_address = 2 * MEMORY_PAGE_SIZE;
self->serialization_buffer_address = self->lexer_address + sizeof(LexerInWasmMemory);
self->current_memory_offset = self->serialization_buffer_address + TREE_SITTER_SERIALIZATION_BUFFER_SIZE;
uint64_t prev_memory_size;
wasmtime_memory_grow(
context,
&memory,
(self->current_memory_offset - (2 * MEMORY_PAGE_SIZE) - 1) / MEMORY_PAGE_SIZE + 1,
&prev_memory_size
);
uint8_t *memory_data = wasmtime_memory_data(context, &memory);
memcpy(&memory_data[self->lexer_address], &lexer, sizeof(lexer));
return self;
error:
@ -786,7 +899,7 @@ error:
void ts_wasm_store_delete(TSWasmStore *self) {
if (!self) return;
ts_free(self->fn_indices);
ts_free(self->stdlib_fn_indices);
wasm_globaltype_delete(self->const_i32_type);
wasm_globaltype_delete(self->var_i32_type);
wasmtime_store_delete(self->store);
@ -837,9 +950,10 @@ static bool ts_wasm_store__instantiate(
// Grow the memory to make room for the new data.
uint32_t needed_memory_size = self->current_memory_offset + dylink_info->memory_size;
if (needed_memory_size > self->current_memory_size) {
uint32_t current_memory_size = wasmtime_memory_data_size(context, &self->memory);
if (needed_memory_size > current_memory_size) {
uint32_t pages_to_grow = (
needed_memory_size - self->current_memory_size + MEMORY_PAGE_SIZE - 1) /
needed_memory_size - current_memory_size + MEMORY_PAGE_SIZE - 1) /
MEMORY_PAGE_SIZE;
uint64_t prev_memory_size;
error = wasmtime_memory_grow(context, &self->memory, pages_to_grow, &prev_memory_size);
@ -847,7 +961,6 @@ static bool ts_wasm_store__instantiate(
format(error_message, "invalid memory size %u", dylink_info->memory_size);
goto error;
}
self->current_memory_size += pages_to_grow * MEMORY_PAGE_SIZE;
}
// Construct the language function name as string.
@ -875,7 +988,7 @@ static bool ts_wasm_store__instantiate(
bool defined_in_stdlib = false;
for (unsigned j = 0; j < array_len(STDLIB_SYMBOLS); j++) {
if (name_eq(import_name, STDLIB_SYMBOLS[j])) {
uint16_t address = self->fn_indices[j];
uint16_t address = self->stdlib_fn_indices[j];
imports[i] = (wasmtime_extern_t) {.kind = WASMTIME_EXTERN_FUNC, .of.func = {store_id, address}};
defined_in_stdlib = true;
break;
@ -1326,17 +1439,37 @@ bool ts_wasm_store_add_language(
return true;
}
void ts_wasm_store_reset_heap(TSWasmStore *self) {
wasmtime_context_t *context = wasmtime_store_context(self->store);
wasmtime_func_t func = {
self->function_table.store_id,
self->builtin_fn_indices.reset_heap
};
wasm_trap_t *trap = NULL;
wasmtime_val_t args[1] = {
{.of.i32 = self->current_memory_offset, .kind = WASMTIME_I32},
};
wasmtime_error_t *error = wasmtime_func_call(context, &func, args, 1, NULL, 0, &trap);
assert(!error);
assert(!trap);
}
bool ts_wasm_store_start(TSWasmStore *self, TSLexer *lexer, const TSLanguage *language) {
uint32_t instance_index;
if (!ts_wasm_store_add_language(self, language, &instance_index)) return false;
self->current_lexer = lexer;
self->current_instance = &self->language_instances.contents[instance_index];
self->has_error = false;
ts_wasm_store_reset_heap(self);
return true;
}
void ts_wasm_store_stop(TSWasmStore *self) {
void ts_wasm_store_reset(TSWasmStore *self) {
self->current_lexer = NULL;
self->current_instance = NULL;
self->has_error = false;
ts_wasm_store_reset_heap(self);
}
static void ts_wasm_store__call(
@ -1354,17 +1487,26 @@ static void ts_wasm_store__call(
wasm_trap_t *trap = NULL;
wasmtime_error_t *error = wasmtime_func_call_unchecked(context, &func, args_and_results, args_and_results_len, &trap);
assert(!error);
if (trap) {
wasm_message_t message;
wasm_trap_message(trap, &message);
fprintf(
stderr,
"trap when calling wasm lexing function %u: %.*s\n",
function_index,
(int)message.size, message.data
);
abort();
if (error) {
// wasm_message_t message;
// wasmtime_error_message(error, &message);
// fprintf(
// stderr,
// "error in wasm module: %.*s\n",
// (int)message.size, message.data
// );
wasmtime_error_delete(error);
self->has_error = true;
} else if (trap) {
// wasm_message_t message;
// wasm_trap_message(trap, &message);
// fprintf(
// stderr,
// "trap in wasm module: %.*s\n",
// (int)message.size, message.data
// );
wasm_trap_delete(trap);
self->has_error = true;
}
}
@ -1372,21 +1514,22 @@ static bool ts_wasm_store__call_lex_function(TSWasmStore *self, unsigned functio
wasmtime_context_t *context = wasmtime_store_context(self->store);
uint8_t *memory_data = wasmtime_memory_data(context, &self->memory);
memcpy(
&memory_data[LEXER_ADDRESS],
&memory_data[self->lexer_address],
&self->current_lexer->lookahead,
sizeof(self->current_lexer->lookahead)
);
wasmtime_val_raw_t args[2] = {
{.i32 = LEXER_ADDRESS},
{.i32 = self->lexer_address},
{.i32 = state},
};
ts_wasm_store__call(self, function_index, args, 2);
if (self->has_error) return false;
bool result = args[0].i32;
memcpy(
&self->current_lexer->lookahead,
&memory_data[LEXER_ADDRESS],
&memory_data[self->lexer_address],
sizeof(self->current_lexer->lookahead) + sizeof(self->current_lexer->result_symbol)
);
return result;
@ -1411,12 +1554,15 @@ bool ts_wasm_store_call_lex_keyword(TSWasmStore *self, TSStateId state) {
uint32_t ts_wasm_store_call_scanner_create(TSWasmStore *self) {
wasmtime_val_raw_t args[1] = {{.i32 = 0}};
ts_wasm_store__call(self, self->current_instance->scanner_create_fn_index, args, 1);
if (self->has_error) return 0;
return args[0].i32;
}
void ts_wasm_store_call_scanner_destroy(TSWasmStore *self, uint32_t scanner_address) {
wasmtime_val_raw_t args[1] = {{.i32 = scanner_address}};
ts_wasm_store__call(self, self->current_instance->scanner_destroy_fn_index, args, 1);
if (self->current_instance) {
wasmtime_val_raw_t args[1] = {{.i32 = scanner_address}};
ts_wasm_store__call(self, self->current_instance->scanner_destroy_fn_index, args, 1);
}
}
bool ts_wasm_store_call_scanner_scan(
@ -1428,7 +1574,7 @@ bool ts_wasm_store_call_scanner_scan(
uint8_t *memory_data = wasmtime_memory_data(context, &self->memory);
memcpy(
&memory_data[LEXER_ADDRESS],
&memory_data[self->lexer_address],
&self->current_lexer->lookahead,
sizeof(self->current_lexer->lookahead)
);
@ -1438,14 +1584,15 @@ bool ts_wasm_store_call_scanner_scan(
(valid_tokens_ix * sizeof(bool));
wasmtime_val_raw_t args[3] = {
{.i32 = scanner_address},
{.i32 = LEXER_ADDRESS},
{.i32 = self->lexer_address},
{.i32 = valid_tokens_address}
};
ts_wasm_store__call(self, self->current_instance->scanner_scan_fn_index, args, 3);
if (self->has_error) return false;
memcpy(
&self->current_lexer->lookahead,
&memory_data[LEXER_ADDRESS],
&memory_data[self->lexer_address],
sizeof(self->current_lexer->lookahead) + sizeof(self->current_lexer->result_symbol)
);
return args[0].i32;
@ -1461,15 +1608,17 @@ uint32_t ts_wasm_store_call_scanner_serialize(
wasmtime_val_raw_t args[2] = {
{.i32 = scanner_address},
{.i32 = SERIALIZATION_BUFFER_ADDRESS},
{.i32 = self->serialization_buffer_address},
};
ts_wasm_store__call(self, self->current_instance->scanner_serialize_fn_index, args, 2);
if (self->has_error) return 0;
uint32_t length = args[0].i32;
if (length > 0) {
memcpy(
((Lexer *)self->current_lexer)->debug_buffer,
&memory_data[SERIALIZATION_BUFFER_ADDRESS],
&memory_data[self->serialization_buffer_address],
length
);
}
@ -1487,7 +1636,7 @@ void ts_wasm_store_call_scanner_deserialize(
if (length > 0) {
memcpy(
&memory_data[SERIALIZATION_BUFFER_ADDRESS],
&memory_data[self->serialization_buffer_address],
buffer,
length
);
@ -1495,12 +1644,16 @@ void ts_wasm_store_call_scanner_deserialize(
wasmtime_val_raw_t args[3] = {
{.i32 = scanner_address},
{.i32 = SERIALIZATION_BUFFER_ADDRESS},
{.i32 = self->serialization_buffer_address},
{.i32 = length},
};
ts_wasm_store__call(self, self->current_instance->scanner_deserialize_fn_index, args, 3);
}
bool ts_wasm_store_has_error(const TSWasmStore *self) {
return self->has_error;
}
bool ts_language_is_wasm(const TSLanguage *self) {
return self->lex_fn == ts_wasm_store__sentinel_lex_fn;
}
@ -1569,7 +1722,7 @@ bool ts_wasm_store_start(
return false;
}
void ts_wasm_store_stop(TSWasmStore *self) {
void ts_wasm_store_reset(TSWasmStore *self) {
(void)self;
}
@ -1632,6 +1785,11 @@ void ts_wasm_store_call_scanner_deserialize(
(void)length;
}
bool ts_wasm_store_has_error(const TSWasmStore *self) {
(void)self;
return false;
}
bool ts_language_is_wasm(const TSLanguage *self) {
(void)self;
return false;

View file

@ -9,7 +9,8 @@ extern "C" {
#include "./parser.h"
bool ts_wasm_store_start(TSWasmStore *, TSLexer *, const TSLanguage *);
void ts_wasm_store_stop(TSWasmStore *);
void ts_wasm_store_reset(TSWasmStore *);
bool ts_wasm_store_has_error(const TSWasmStore *);
bool ts_wasm_store_call_lex_main(TSWasmStore *, TSStateId);
bool ts_wasm_store_call_lex_keyword(TSWasmStore *, TSStateId);

View file

@ -2,27 +2,33 @@
set -e
# Remove quotes, add leading underscores, remove newlines, remove trailing comma.
# Remove quotes and commas
EXPORTED_FUNCTIONS=$( \
cat lib/src/wasm/stdlib-symbols.txt | \
sed -e 's/"//g' | \
sed -e 's/^/_/g' | \
tr -d '\n"' | \
sed -e 's/,$//' \
tr -d ',"' \
)
emcc \
-o stdlib.wasm \
-Os \
--no-entry \
-s MAIN_MODULE=2 \
-s "EXPORTED_FUNCTIONS=${EXPORTED_FUNCTIONS}" \
-s 'ALLOW_MEMORY_GROWTH' \
-s 'TOTAL_MEMORY=4MB' \
-fvisibility=hidden \
-fno-exceptions \
-xc \
/dev/null
EXPORT_FLAGS=""
for function in ${EXPORTED_FUNCTIONS}; do
EXPORT_FLAGS+=" -Wl,--export=${function}"
done
target/wasi-sdk-21.0/bin/clang-17 \
-o stdlib.wasm \
-Os \
-fPIC \
-Wl,--no-entry \
-Wl,--stack-first \
-Wl,-z -Wl,stack-size=65536 \
-Wl,--import-undefined \
-Wl,--import-memory \
-Wl,--import-table \
-Wl,--strip-debug \
-Wl,--export=reset_heap \
-Wl,--export=__wasm_call_ctors \
-Wl,--export=__stack_pointer \
${EXPORT_FLAGS} \
lib/src/wasm/stdlib.c
xxd -C -i stdlib.wasm > lib/src/wasm/wasm-stdlib.h
mv stdlib.wasm target/