From 6fd7a1e44e45ed7562d5f92d02404c2582d12e85 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 26 Nov 2023 10:41:33 -0800 Subject: [PATCH] Return informative error when load_language fails --- cli/loader/src/lib.rs | 2 +- cli/src/tests/wasm_language_test.rs | 42 ++++++++-- lib/binding_rust/bindings.rs | 95 +++++++++++----------- lib/binding_rust/wasm_language.rs | 67 +++++++++++++-- lib/include/tree_sitter/api.h | 10 ++- lib/src/wasm.c | 122 +++++++++++++++++++++------- lib/src/wasm/wasm.h | 2 +- script/generate-bindings | 14 ---- 8 files changed, 246 insertions(+), 108 deletions(-) diff --git a/cli/loader/src/lib.rs b/cli/loader/src/lib.rs index 79ea706a..5a40e524 100644 --- a/cli/loader/src/lib.rs +++ b/cli/loader/src/lib.rs @@ -385,7 +385,7 @@ impl Loader { } let wasm_bytes = fs::read(&library_path)?; - return Ok(wasm_store.load_language(name, &wasm_bytes)); + return Ok(wasm_store.load_language(name, &wasm_bytes)?); } { diff --git a/cli/src/tests/wasm_language_test.rs b/cli/src/tests/wasm_language_test.rs index 3bbc6940..7cd3a7e8 100644 --- a/cli/src/tests/wasm_language_test.rs +++ b/cli/src/tests/wasm_language_test.rs @@ -1,14 +1,14 @@ use crate::tests::helpers::fixtures::WASM_DIR; use lazy_static::lazy_static; use std::fs; -use tree_sitter::{wasmtime::Engine, Parser, WasmStore}; +use tree_sitter::{wasmtime::Engine, Parser, WasmError, WasmErrorKind, WasmStore}; lazy_static! { static ref ENGINE: Engine = Engine::default(); } #[test] -fn test_wasm_store() { +fn test_load_wasm_language() { let mut store = WasmStore::new(ENGINE.clone()); let mut parser = Parser::new(); @@ -17,10 +17,10 @@ fn test_wasm_store() { let wasm_rb = fs::read(&WASM_DIR.join(format!("tree-sitter-ruby.wasm"))).unwrap(); let wasm_typescript = fs::read(&WASM_DIR.join(format!("tree-sitter-typescript.wasm"))).unwrap(); - let language_rust = store.load_language("rust", &wasm_rs); - let language_cpp = store.load_language("cpp", &wasm_cpp); - let language_ruby = store.load_language("ruby", &wasm_rb); - let language_typescript = store.load_language("typescript", &wasm_typescript); + let language_rust = store.load_language("rust", &wasm_rs).unwrap(); + let language_cpp = store.load_language("cpp", &wasm_cpp).unwrap(); + let language_ruby = store.load_language("ruby", &wasm_rb).unwrap(); + let language_typescript = store.load_language("typescript", &wasm_typescript).unwrap(); parser.set_wasm_store(store).unwrap(); let mut parser2 = Parser::new(); @@ -60,3 +60,33 @@ fn test_wasm_store() { } } } + +#[test] +fn test_load_wasm_errors() { + let mut store = WasmStore::new(ENGINE.clone()); + let wasm = fs::read(&WASM_DIR.join(format!("tree-sitter-rust.wasm"))).unwrap(); + + let bad_wasm = &wasm[1..]; + assert_eq!( + store.load_language("rust", &bad_wasm).unwrap_err(), + WasmError { + kind: WasmErrorKind::Parse, + message: "failed to parse dylink section of wasm module".into(), + } + ); + + assert_eq!( + store.load_language("not_rust", &wasm).unwrap_err(), + WasmError { + kind: WasmErrorKind::Instantiate, + message: "module did not contain language function: tree_sitter_not_rust".into(), + } + ); + + let mut bad_wasm = wasm.clone(); + bad_wasm[300..500].iter_mut().for_each(|b| *b = 0); + assert_eq!( + store.load_language("rust", &bad_wasm).unwrap_err().kind, + WasmErrorKind::Compile, + ); +} diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index 112161ee..ef3ba30b 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -1,4 +1,4 @@ -/* automatically generated by rust-bindgen 0.66.1 */ +/* automatically generated by rust-bindgen 0.69.1 */ pub const TREE_SITTER_LANGUAGE_VERSION: u32 = 14; pub const TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION: u32 = 13; @@ -715,53 +715,6 @@ extern "C" { #[doc = " Get the ABI version number for this language. This version number is used\n to ensure that languages were generated by a compatible version of\n Tree-sitter.\n\n See also [`ts_parser_set_language`]."] pub fn ts_language_version(self_: *const TSLanguage) -> u32; } -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct wasm_engine_t { - _unused: [u8; 0], -} -pub type TSWasmEngine = wasm_engine_t; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct TSWasmStore { - _unused: [u8; 0], -} -extern "C" { - #[doc = " Create a Wasm store."] - pub fn ts_wasm_store_new(engine: *mut TSWasmEngine) -> *mut TSWasmStore; -} -extern "C" { - #[doc = " Free the memory associated with the given Wasm store."] - pub fn ts_wasm_store_delete(arg1: *mut TSWasmStore); -} -extern "C" { - #[doc = " Create a language from a buffer of Wasm. The resulting language behaves"] - #[doc = " like any other Tree-sitter language, except that in order to use it with"] - #[doc = " a parser, that parser must have a Wasm store. Note that the language"] - #[doc = " can be used with any Wasm store, it doesn't need to be the same store that"] - #[doc = " was used to originally load it."] - pub fn ts_wasm_store_load_language( - arg1: *mut TSWasmStore, - name: *const ::std::os::raw::c_char, - wasm: *const ::std::os::raw::c_char, - wasm_len: u32, - ) -> *const TSLanguage; -} -extern "C" { - #[doc = " Check if the language came from a Wasm module. If so, then in order to use"] - #[doc = " this langauge with a Parser, that parser must have a Wasm store assigned."] - pub fn ts_language_is_wasm(arg1: *const TSLanguage) -> bool; -} -extern "C" { - #[doc = " Assign the given Wasm store to the parser. A parser must have a Wasm store"] - #[doc = " in order to use Wasm languages."] - pub fn ts_parser_set_wasm_store(arg1: *mut TSParser, arg2: *mut TSWasmStore); -} -extern "C" { - #[doc = " Remove the parser's current Wasm store and return it. This returns NULL if"] - #[doc = " the parser doesn't have a Wasm store."] - pub fn ts_parser_take_wasm_store(arg1: *mut TSParser) -> *mut TSWasmStore; -} extern "C" { #[doc = " Get the next parse state. Combine this with lookahead iterators to generate\n completion suggestions or valid symbols in error nodes. Use\n [`ts_node_grammar_symbol`] for valid symbols."] pub fn ts_language_next_state( @@ -814,6 +767,52 @@ extern "C" { self_: *const TSLookaheadIterator, ) -> *const ::std::os::raw::c_char; } +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct wasm_engine_t { + _unused: [u8; 0], +} +pub type TSWasmEngine = wasm_engine_t; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSWasmStore { + _unused: [u8; 0], +} +pub const TSWasmErrorParse: TSWasmError = 0; +pub const TSWasmErrorCompile: TSWasmError = 1; +pub const TSWasmErrorInstantiate: TSWasmError = 2; +pub type TSWasmError = ::std::os::raw::c_uint; +extern "C" { + #[doc = " Create a Wasm store."] + pub fn ts_wasm_store_new(engine: *mut TSWasmEngine) -> *mut TSWasmStore; +} +extern "C" { + #[doc = " Free the memory associated with the given Wasm store."] + pub fn ts_wasm_store_delete(arg1: *mut TSWasmStore); +} +extern "C" { + #[doc = " Create a language from a buffer of Wasm. The resulting language behaves\n like any other Tree-sitter language, except that in order to use it with\n a parser, that parser must have a Wasm store. Note that the language\n can be used with any Wasm store, it doesn't need to be the same store that\n was used to originally load it."] + pub fn ts_wasm_store_load_language( + arg1: *mut TSWasmStore, + name: *const ::std::os::raw::c_char, + wasm: *const ::std::os::raw::c_char, + wasm_len: u32, + error: *mut TSWasmError, + message: *mut *mut ::std::os::raw::c_char, + ) -> *const TSLanguage; +} +extern "C" { + #[doc = " Check if the language came from a Wasm module. If so, then in order to use\n this langauge with a Parser, that parser must have a Wasm store assigned."] + pub fn ts_language_is_wasm(arg1: *const TSLanguage) -> bool; +} +extern "C" { + #[doc = " Assign the given Wasm store to the parser. A parser must have a Wasm store\n in order to use Wasm languages."] + pub fn ts_parser_set_wasm_store(arg1: *mut TSParser, arg2: *mut TSWasmStore); +} +extern "C" { + #[doc = " Remove the parser's current Wasm store and return it. This returns NULL if\n the parser doesn't have a Wasm store."] + pub fn ts_parser_take_wasm_store(arg1: *mut TSParser) -> *mut TSWasmStore; +} extern "C" { #[doc = " Set the allocation functions used by the library.\n\n By default, Tree-sitter uses the standard libc allocation functions,\n but aborts the process when an allocation fails. This function lets\n you supply alternative allocation functions at runtime.\n\n If you pass `NULL` for any parameter, Tree-sitter will switch back to\n its default implementation of that function.\n\n If you call this function after the library has already been used, then\n you must ensure that either:\n 1. All the existing objects have been freed.\n 2. The new allocator shares its state with the old one, so it is capable\n of freeing memory that was allocated by the old allocator."] pub fn ts_set_allocator( diff --git a/lib/binding_rust/wasm_language.rs b/lib/binding_rust/wasm_language.rs index 7b719c1e..f0cc4f81 100644 --- a/lib/binding_rust/wasm_language.rs +++ b/lib/binding_rust/wasm_language.rs @@ -1,5 +1,11 @@ use crate::{ffi, Language, LanguageError, Parser}; -use std::{ffi::CString, mem, os::raw::c_char}; +use std::{ + error, + ffi::CString, + fmt, + mem::{self, MaybeUninit}, + os::raw::c_char, +}; pub use wasmtime; #[cfg(feature = "wasm")] @@ -18,6 +24,20 @@ pub struct wasm_engine_t { pub struct WasmStore(*mut ffi::TSWasmStore); +#[derive(Debug, PartialEq, Eq)] +pub struct WasmError { + pub kind: WasmErrorKind, + pub message: String, +} + +#[derive(Debug, PartialEq, Eq)] +pub enum WasmErrorKind { + Parse, + Compile, + Instantiate, + Other, +} + impl WasmStore { pub fn new(engine: wasmtime::Engine) -> Self { let engine = Box::new(wasm_engine_t { engine }); @@ -26,16 +46,37 @@ impl WasmStore { }) } - pub fn load_language(&mut self, name: &str, bytes: &[u8]) -> Language { + pub fn load_language(&mut self, name: &str, bytes: &[u8]) -> Result { let name = CString::new(name).unwrap(); - Language(unsafe { - ffi::ts_wasm_store_load_language( + unsafe { + let mut error = MaybeUninit::::uninit(); + let mut message = MaybeUninit::<*mut c_char>::uninit(); + let language = ffi::ts_wasm_store_load_language( self.0, name.as_ptr(), bytes.as_ptr() as *const c_char, bytes.len() as u32, - ) - }) + error.as_mut_ptr(), + message.as_mut_ptr(), + ); + + if language.is_null() { + let error = error.assume_init(); + let message = message.assume_init(); + let message = CString::from_raw(message); + Err(WasmError { + kind: match error { + ffi::TSWasmErrorParse => WasmErrorKind::Parse, + ffi::TSWasmErrorCompile => WasmErrorKind::Compile, + ffi::TSWasmErrorInstantiate => WasmErrorKind::Instantiate, + _ => WasmErrorKind::Other, + }, + message: message.into_string().unwrap(), + }) + } else { + Ok(Language(language)) + } + } } } @@ -67,3 +108,17 @@ impl Drop for WasmStore { unsafe { ffi::ts_wasm_store_delete(self.0) }; } } + +impl fmt::Display for WasmError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let kind = match self.kind { + WasmErrorKind::Parse => "Failed to parse wasm", + WasmErrorKind::Compile => "Failed to compile wasm", + WasmErrorKind::Instantiate => "Failed to instantiate wasm module", + WasmErrorKind::Other => "Unknown error", + }; + write!(f, "{kind} {}", self.message) + } +} + +impl error::Error for WasmError {} diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 7c770832..3129d5c3 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -1149,6 +1149,12 @@ const char *ts_lookahead_iterator_current_symbol_name(const TSLookaheadIterator typedef struct wasm_engine_t TSWasmEngine; typedef struct TSWasmStore TSWasmStore; +typedef enum { + TSWasmErrorParse, + TSWasmErrorCompile, + TSWasmErrorInstantiate, +} TSWasmError; + /** * Create a Wasm store. */ @@ -1170,7 +1176,9 @@ const TSLanguage *ts_wasm_store_load_language( TSWasmStore *, const char *name, const char *wasm, - uint32_t wasm_len + uint32_t wasm_len, + TSWasmError *error, + char **message ); /** diff --git a/lib/src/wasm.c b/lib/src/wasm.c index be19ed70..81581ec6 100644 --- a/lib/src/wasm.c +++ b/lib/src/wasm.c @@ -692,31 +692,39 @@ void ts_wasm_store_delete(TSWasmStore *self) { ts_free(self); } +#define format(output, ...) \ + do { \ + size_t message_length = snprintf((char *)NULL, 0, __VA_ARGS__); \ + *output = ts_malloc(message_length + 1); \ + snprintf(*output, message_length + 1, __VA_ARGS__); \ + } while (0) + static bool ts_wasm_store__instantiate( TSWasmStore *self, wasmtime_module_t *module, const char *language_name, const WasmDylinkInfo *dylink_info, wasmtime_instance_t *result, - int32_t *language_address + int32_t *language_address, + char **error_message ) { - wasmtime_context_t *context = wasmtime_store_context(self->store); wasmtime_error_t *error = NULL; wasm_trap_t *trap = NULL; + wasm_message_t message = WASM_EMPTY_VEC; + char *language_function_name = NULL; // Grow the function table to make room for the new functions. + wasmtime_context_t *context = wasmtime_store_context(self->store); wasmtime_val_t initializer = {.kind = WASMTIME_FUNCREF}; uint32_t prev_size; error = wasmtime_table_grow(context, &self->function_table, dylink_info->table_size, &initializer, &prev_size); - assert(!error); + if (error) { + format(error_message, "invalid function table size %u", dylink_info->table_size); + goto error; + } // Construct the language function name as string. - unsigned prefix_len = strlen("tree_sitter_"); - size_t name_len = strlen(language_name); - char *language_function_name = ts_malloc(prefix_len + name_len + 1); - memcpy(&language_function_name[0], "tree_sitter_", prefix_len); - memcpy(&language_function_name[prefix_len], language_name, name_len); - language_function_name[prefix_len + name_len] = '\0'; + format(&language_function_name, "tree_sitter_%s", language_name); const uint64_t store_id = self->function_table.store_id; @@ -730,6 +738,7 @@ static bool ts_wasm_store__instantiate( const wasm_importtype_t *import_type = import_types.data[i]; const wasm_name_t *import_name = wasm_importtype_name(import_type); if (import_name->size == 0) { + format(error_message, "empty import name"); goto error; } @@ -748,7 +757,11 @@ static bool ts_wasm_store__instantiate( } if (!defined_in_stdlib) { - printf("unexpected import '%.*s'\n", (int)import_name->size, import_name->data); + format( + error_message, + "invalid import '%.*s'\n", + (int)import_name->size, import_name->data + ); goto error; } } @@ -757,15 +770,21 @@ static bool ts_wasm_store__instantiate( error = wasmtime_instance_new(context, module, imports, import_types.size, &instance, &trap); wasm_importtype_vec_delete(&import_types); if (error) { - wasm_message_t message; wasmtime_error_message(error, &message); - printf("error instantiating wasm module: %s\n", message.data); + format( + error_message, + "error instantiating wasm module: %.*s\n", + (int)message.size, message.data + ); goto error; } if (trap) { - wasm_message_t message; wasm_trap_message(trap, &message); - printf("error instantiating wasm module: %s\n", message.data); + format( + error_message, + "trap when instantiating wasm module: %.*s\n", + (int)message.size, message.data + ); goto error; } @@ -781,6 +800,7 @@ static bool ts_wasm_store__instantiate( wasm_exporttype_t *export_type = export_types.data[i]; const wasm_name_t *name = wasm_exporttype_name(export_type); + size_t name_len; char *export_name; wasmtime_extern_t export = {.kind = WASM_EXTERN_GLOBAL}; bool exists = wasmtime_instance_export_nth(context, &instance, i, &export_name, &name_len, &export); @@ -792,9 +812,12 @@ static bool ts_wasm_store__instantiate( error = wasmtime_func_call(context, &apply_relocation_func, NULL, 0, NULL, 0, &trap); assert(!error); if (trap) { - wasm_message_t message; wasm_trap_message(trap, &message); - printf("error calling relocation function: %s\n", message.data); + format( + error_message, + "trap when calling data relocation function: %.*s\n", + (int)message.size, message.data + ); goto error; } } @@ -808,7 +831,11 @@ static bool ts_wasm_store__instantiate( wasm_exporttype_vec_delete(&export_types); if (!found_language) { - printf("failed to find function %s\n", language_function_name); + format( + error_message, + "module did not contain language function: %s", + language_function_name + ); goto error; } @@ -818,19 +845,34 @@ static bool ts_wasm_store__instantiate( error = wasmtime_func_call(context, &language_func, NULL, 0, &language_address_val, 1, &trap); assert(!error); if (trap) { - wasm_message_t message; wasm_trap_message(trap, &message); - printf("error calling language function: %s\n", message.data); + format( + error_message, + "trapped when calling language function: %s: %.*s\n", + language_function_name, (int)message.size, message.data + ); goto error; } - assert(language_address_val.kind == WASMTIME_I32); + if (language_address_val.kind != WASMTIME_I32) { + format( + error_message, + "language function did not return an integer: %s\n", + language_function_name + ); + goto error; + } + + ts_free(language_function_name); *result = instance; *language_address = language_address_val.of.i32; return true; error: - ts_free(language_function_name); + if (language_function_name) ts_free(language_function_name); + if (message.size) wasm_byte_vec_delete(&message); + if (error) wasmtime_error_delete(error); + if (trap) wasm_trap_delete(trap); return false; } @@ -842,22 +884,29 @@ const TSLanguage *ts_wasm_store_load_language( TSWasmStore *self, const char *language_name, const char *wasm, - uint32_t wasm_len + uint32_t wasm_len, + TSWasmError *wasm_error, + char **error_message ) { WasmDylinkInfo dylink_info; + wasmtime_module_t *module = NULL; + wasmtime_error_t *error = NULL; + if (!wasm_dylink_info__parse((const unsigned char *)wasm, wasm_len, &dylink_info)) { - printf("failed to parse wasm dylink info\n"); - return NULL; + *wasm_error = TSWasmErrorParse; + format(error_message, "failed to parse dylink section of wasm module"); + goto error; } // Compile the wasm code. - wasmtime_module_t *module; - wasmtime_error_t *error = wasmtime_module_new(self->engine, (const uint8_t *)wasm, wasm_len, &module); + error = wasmtime_module_new(self->engine, (const uint8_t *)wasm, wasm_len, &module); if (error) { wasm_message_t message; wasmtime_error_message(error, &message); - printf("failed to load wasm language: %s", message.data); - return NULL; + *wasm_error = TSWasmErrorCompile; + format(error_message, "error compiling wasm module: %.*s", (int)message.size, message.data); + wasm_byte_vec_delete(&message); + goto error; } // Instantiate the module in this store. @@ -869,8 +918,12 @@ const TSLanguage *ts_wasm_store_load_language( language_name, &dylink_info, &instance, - &language_address - )) return NULL; + &language_address, + error_message + )) { + *wasm_error = TSWasmErrorInstantiate; + goto error; + } // Copy all of the static data out of the language object in wasm memory, // constructing a native language object. @@ -1062,6 +1115,10 @@ const TSLanguage *ts_wasm_store_load_language( })); return language; + +error: + if (module) wasmtime_module_delete(module); + return NULL; } bool ts_wasm_store_add_language( @@ -1085,6 +1142,7 @@ bool ts_wasm_store_add_language( // If the language module has not been instantiated in this store, then add // it to this store. if (!exists) { + char *message; wasmtime_instance_t instance; int32_t language_address; if (!ts_wasm_store__instantiate( @@ -1093,8 +1151,10 @@ bool ts_wasm_store_add_language( language_module->name, &language_module->dylink_info, &instance, - &language_address + &language_address, + &message )) { + ts_free(message); return false; } diff --git a/lib/src/wasm/wasm.h b/lib/src/wasm/wasm.h index b008328d..3e1e90cc 100644 --- a/lib/src/wasm/wasm.h +++ b/lib/src/wasm/wasm.h @@ -711,4 +711,4 @@ static inline void* wasm_val_ptr(const wasm_val_t* val) { } // extern "C" #endif -#endif // #ifdef WASM_H \ No newline at end of file +#endif // #ifdef WASM_H diff --git a/script/generate-bindings b/script/generate-bindings index 0e5c8ca5..659337c9 100755 --- a/script/generate-bindings +++ b/script/generate-bindings @@ -37,21 +37,7 @@ bindgen \ --blocklist-type '^__.*' \ --no-prepend-enum-name \ --no-copy "$no_copy" \ - --blocklist-function ts_tree_print_dot_graph \ - --size_t-is-usize \ $header_path \ -- \ -D TREE_SITTER_FEATURE_WASM \ > $output_path - -echo "" >> $output_path - -defines=( - TREE_SITTER_LANGUAGE_VERSION - TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION -) - -for define in ${defines[@]}; do - define_value=$(egrep "#define $define (.*)" $header_path | cut -d' ' -f3) - echo "pub const $define: usize = $define_value;" >> $output_path -done