diff --git a/Cargo.lock b/Cargo.lock index 092bb3d2..440e163d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -306,7 +306,7 @@ dependencies = [ "itertools", "log", "smallvec", - "wasmparser", + "wasmparser 0.118.1", "wasmtime-types", ] @@ -1274,6 +1274,7 @@ dependencies = [ "tree-sitter-tests-proc-macro", "unindent", "walkdir", + "wasmparser 0.121.0", "webbrowser", "which", ] @@ -1487,6 +1488,17 @@ dependencies = [ "semver", ] +[[package]] +name = "wasmparser" +version = "0.121.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "953cf6a7606ab31382cb1caa5ae403e77ba70c7f8e12eeda167e7040d42bfda8" +dependencies = [ + "bitflags 2.4.1", + "indexmap", + "semver", +] + [[package]] name = "wasmtime" version = "16.0.0" @@ -1506,7 +1518,7 @@ dependencies = [ "serde_derive", "serde_json", "target-lexicon", - "wasmparser", + "wasmparser 0.118.1", "wasmtime-cranelift", "wasmtime-environ", "wasmtime-jit", @@ -1562,7 +1574,7 @@ dependencies = [ "object", "target-lexicon", "thiserror", - "wasmparser", + "wasmparser 0.118.1", "wasmtime-cranelift-shared", "wasmtime-environ", "wasmtime-versioned-export-macros", @@ -1598,7 +1610,7 @@ dependencies = [ "serde_derive", "target-lexicon", "thiserror", - "wasmparser", + "wasmparser 0.118.1", "wasmtime-types", ] @@ -1668,7 +1680,7 @@ dependencies = [ "serde", "serde_derive", "thiserror", - "wasmparser", + "wasmparser 0.118.1", ] [[package]] diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 97c6a129..5735b0bf 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -47,6 +47,7 @@ serde_derive = "1.0" smallbitvec = "2.5.1" tiny_http = "0.12.0" walkdir = "2.3.3" +wasmparser = "0.121" webbrowser = "0.8.10" which = "4.4.0" diff --git a/cli/src/tests/wasm_language_test.rs b/cli/src/tests/wasm_language_test.rs index 9b0c8f3c..4efa6e53 100644 --- a/cli/src/tests/wasm_language_test.rs +++ b/cli/src/tests/wasm_language_test.rs @@ -9,6 +9,25 @@ lazy_static! { static ref ENGINE: Engine = Engine::default(); } +#[test] +fn test_wasm_stdlib_symbols() { + let symbols = tree_sitter::wasm_stdlib_symbols().collect::>(); + assert_eq!( + symbols, + { + let mut symbols = symbols.clone(); + symbols.sort(); + symbols + }, + "symbols aren't sorted" + ); + + assert!(symbols.contains(&"malloc")); + assert!(symbols.contains(&"free")); + assert!(symbols.contains(&"memset")); + assert!(symbols.contains(&"memcpy")); +} + #[test] fn test_load_wasm_language() { allocations::record(|| { diff --git a/cli/src/wasm.rs b/cli/src/wasm.rs index 5b3b093f..f5e24165 100644 --- a/cli/src/wasm.rs +++ b/cli/src/wasm.rs @@ -1,7 +1,9 @@ use super::generate::parse_grammar::GrammarJSON; -use anyhow::{Context, Result}; +use anyhow::{anyhow, Context, Result}; use std::{fs, path::Path}; +use tree_sitter::wasm_stdlib_symbols; use tree_sitter_loader::Loader; +use wasmparser::Parser; pub fn load_language_wasm_file(language_dir: &Path) -> Result<(String, Vec)> { let grammar_name = get_grammar_name(&language_dir) @@ -46,5 +48,46 @@ pub fn compile_language_to_wasm( &output_filename, force_docker, )?; + + // Exit with an error if the external scanner uses symbols from the + // C or C++ standard libraries that aren't available to wasm parsers. + let stdlib_symbols: Vec<_> = wasm_stdlib_symbols().collect(); + let builtin_symbols = [ + "__indirect_function_table", + "__memory_base", + "__table_base", + "memory", + ]; + + let mut missing_symbols = Vec::new(); + let wasm_bytes = fs::read(&output_filename)?; + let parser = Parser::new(0); + for payload in parser.parse_all(&wasm_bytes) { + if let wasmparser::Payload::ImportSection(imports) = payload? { + for import in imports { + let import = import?.name; + if !builtin_symbols.contains(&import) && !stdlib_symbols.contains(&import) { + missing_symbols.push(import); + } + } + } + } + + if !missing_symbols.is_empty() { + Err(anyhow!( + concat!( + "This external scanner uses a symbol that isn't available to wasm parsers.\n", + "\n", + "Missing symbols:\n", + " {}\n", + "\n", + "Available symbols:\n", + " {}", + ), + missing_symbols.join("\n "), + stdlib_symbols.join("\n ") + ))?; + } + Ok(()) } diff --git a/lib/binding_rust/build.rs b/lib/binding_rust/build.rs index 31dd5fdc..e1176ae7 100644 --- a/lib/binding_rust/build.rs +++ b/lib/binding_rust/build.rs @@ -2,6 +2,8 @@ use std::path::{Path, PathBuf}; use std::{env, fs}; fn main() { + let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); + println!("cargo:rerun-if-env-changed=TREE_SITTER_STATIC_ANALYSIS"); if env::var("TREE_SITTER_STATIC_ANALYSIS").is_ok() { if let (Some(clang_path), Some(scan_build_path)) = (which("clang"), which("scan-build")) { @@ -18,7 +20,13 @@ fn main() { } #[cfg(feature = "bindgen")] - generate_bindings(); + generate_bindings(&out_dir); + + fs::copy( + "src/wasm/stdlib-symbols.txt", + out_dir.join("stdlib-symbols.txt"), + ) + .unwrap(); let mut config = cc::Build::new(); @@ -47,7 +55,7 @@ fn main() { } #[cfg(feature = "bindgen")] -fn generate_bindings() { +fn generate_bindings(out_dir: &PathBuf) { const HEADER_PATH: &str = "include/tree_sitter/api.h"; println!("cargo:rerun-if-changed={}", HEADER_PATH); @@ -77,9 +85,7 @@ fn generate_bindings() { .generate() .expect("Failed to generate bindings"); - let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); let bindings_rs = out_dir.join("bindings.rs"); - bindings.write_to_file(&bindings_rs).expect(&*format!( "Failed to write bindings into path: {bindings_rs:?}" )); diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 30579e35..35b4a345 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -2706,6 +2706,15 @@ impl fmt::Display for QueryError { } } +pub fn wasm_stdlib_symbols() -> impl Iterator { + const WASM_STDLIB_SYMBOLS: &'static str = + include_str!(concat!(env!("OUT_DIR"), "/stdlib-symbols.txt")); + + WASM_STDLIB_SYMBOLS + .lines() + .map(|s| s.trim_matches(|c| c == '"' || c == ',')) +} + extern "C" { fn free(ptr: *mut c_void); } diff --git a/lib/binding_rust/wasm_language.rs b/lib/binding_rust/wasm_language.rs index 75fc6e8c..89b16d7f 100644 --- a/lib/binding_rust/wasm_language.rs +++ b/lib/binding_rust/wasm_language.rs @@ -8,10 +8,10 @@ use std::{ }; pub use wasmtime; -#[cfg(feature = "wasm")] -pub fn test() { - // Force Cargo to include wasmtime-c-api as a dependency of this crate, - // even though it is only used by the C code. +// Force Cargo to include wasmtime-c-api as a dependency of this crate, +// even though it is only used by the C code. +#[allow(unused)] +fn _use_wasmtime() { wasmtime_c_api::wasm_engine_new(); }