From fe3cfff38566819a039592c8e26c6fa55fd94ce7 Mon Sep 17 00:00:00 2001 From: Will Lillis Date: Fri, 16 Jan 2026 03:49:20 -0500 Subject: [PATCH] feat(loader): use `wasm-opt` to optimize wasm files after compilation Similar to wasi-sdk's `clang`, if `wasm-opt` cannot be found, binaryen is downloaded, unzipped, and cached for future use. --- crates/loader/src/loader.rs | 298 +++++++++++++++++++++++++----------- 1 file changed, 212 insertions(+), 86 deletions(-) diff --git a/crates/loader/src/loader.rs b/crates/loader/src/loader.rs index 11c8b673..9a756bfd 100644 --- a/crates/loader/src/loader.rs +++ b/crates/loader/src/loader.rs @@ -43,6 +43,40 @@ static GRAMMAR_NAME_REGEX: LazyLock = LazyLock::new(|| Regex::new(r#""name":\s*"(.*?)""#).unwrap()); const WASI_SDK_VERSION: &str = include_str!("../wasi-sdk-version").trim_ascii(); +const BINARYEN_VERSION: &str = include_str!("../binaryen-version").trim_ascii(); + +#[cfg(all(target_os = "macos", target_arch = "aarch64"))] +const ARCH_OS: Result<&str, LoaderError> = Ok("arm64-macos"); +#[cfg(all(target_os = "macos", target_arch = "x86_64"))] +const ARCH_OS: Result<&str, LoaderError> = Ok("x86_64-macos"); +#[cfg(all( + target_os = "macos", + not(any(target_arch = "aarch64", target_arch = "x86_64")) +))] +const ARCH_OS: Result<&str, LoaderError> = Err(LoaderError::WasiSDKPlatform); + +#[cfg(all(target_os = "windows", target_arch = "aarch64"))] +const ARCH_OS: Result<&str, LoaderError> = Ok("arm64-windows"); +#[cfg(all(target_os = "windows", target_arch = "x86_64"))] +const ARCH_OS: Result<&str, LoaderError> = Ok("x86_64-windows"); +#[cfg(all( + target_os = "windows", + not(any(target_arch = "aarch64", target_arch = "x86_64")) +))] +const ARCH_OS: Result<&str, LoaderError> = Err(LoaderError::WasiSDKPlatform); + +#[cfg(all(target_os = "linux", target_arch = "aarch64"))] +const ARCH_OS: Result<&str, LoaderError> = Ok("arm64-linux"); +#[cfg(all(target_os = "linux", target_arch = "x86_64"))] +const ARCH_OS: Result<&str, LoaderError> = Ok("x86_64-linux"); +#[cfg(all( + target_os = "linux", + not(any(target_arch = "aarch64", target_arch = "x86_64")) +))] +const ARCH_OS: Result<&str, LoaderError> = Err(LoaderError::WasiSDKPlatform); + +#[cfg(not(any(target_os = "macos", target_os = "windows", target_os = "linux")))] +const ARCH_OS: Result<&str, LoaderError> = Err(LoaderError::WasiSDKPlatform); pub type LoaderResult = Result; @@ -92,10 +126,10 @@ pub enum LoaderError { Time(#[from] SystemTimeError), #[error("Unknown scope '{0}'")] UnknownScope(String), - #[error("Failed to download wasi-sdk from {0}")] - WasiSDKDownload(String), + #[error("Failed to download {tool} from {url}")] + WasmToolDownload { tool: &'static str, url: String }, #[error(transparent)] - WasiSDKClang(#[from] WasiSDKClangError), + WasmTool(#[from] WasmToolError), #[error("Unsupported platform for wasi-sdk")] WasiSDKPlatform, #[cfg(feature = "wasm")] @@ -103,8 +137,12 @@ pub enum LoaderError { Wasm(#[from] WasmError), #[error("Failed to run wasi-sdk clang -- {0}")] WasmCompiler(std::io::Error), + #[error("Failed to run wasm-opt -- {0}")] + WasmOptimizer(std::io::Error), #[error("wasi-sdk clang command failed: {0}")] WasmCompilation(String), + #[error("wasm-opt command failed: {0}")] + WasmOptimization(String), } #[derive(Debug, Error)] @@ -223,22 +261,25 @@ impl std::fmt::Display for ScannerSymbolError { } #[derive(Debug, Error)] -pub struct WasiSDKClangError { - pub wasi_sdk_dir: String, +pub struct WasmToolError { + pub exe: &'static str, + pub toolchain: &'static str, + pub tool_dir: String, pub possible_executables: Vec<&'static str>, pub download: bool, } -impl std::fmt::Display for WasiSDKClangError { +impl std::fmt::Display for WasmToolError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { if self.download { write!( f, - "Failed to find clang executable in downloaded wasi-sdk at '{}'.", - self.wasi_sdk_dir + "Failed to find {} executable in downloaded {} at '{}'.", + self.exe, self.toolchain, self.tool_dir )?; } else { - write!(f, "TREE_SITTER_WASI_SDK_PATH is set to '{}', but no clang executable found in 'bin/' directory.", self.wasi_sdk_dir)?; + let toolchain_upper = self.toolchain.replace('-', "_").to_ascii_uppercase(); + write!(f, "TREE_SITTER_{toolchain_upper}_PATH is set to '{}', but no clang executable found in 'bin/' directory.", self.tool_dir)?; } let possible_exes = self.possible_executables.join(", "); @@ -1332,12 +1373,13 @@ impl Loader { scanner_filename: Option<&Path>, output_path: &Path, ) -> LoaderResult<()> { - let clang_executable = self.ensure_wasi_sdk_exists()?; + let clang_exe = self.ensure_wasi_sdk_exists()?; + let output_path = output_path.to_str().unwrap(); - let mut command = Command::new(&clang_executable); + let mut command = Command::new(&clang_exe); command.current_dir(src_path).args([ "-o", - output_path.to_str().unwrap(), + output_path, "-fPIC", "-shared", if self.debug_build { "-g" } else { "-Os" }, @@ -1356,11 +1398,24 @@ impl Loader { command.arg(scanner_filename); } - let output = command.output().map_err(LoaderError::WasmCompiler)?; + let compile_output = command.output().map_err(LoaderError::WasmCompiler)?; - if !output.status.success() { + if !compile_output.status.success() { return Err(LoaderError::WasmCompilation( - String::from_utf8_lossy(&output.stderr).to_string(), + String::from_utf8_lossy(&compile_output.stderr).to_string(), + )); + } + + let wasm_opt_exe = self.ensure_binaryen_exists()?; + + let opt_output = Command::new(&wasm_opt_exe) + .args([output_path, "-Os", "-o", output_path]) + .output() + .map_err(LoaderError::WasmOptimizer)?; + + if !opt_output.status.success() { + return Err(LoaderError::WasmOptimization( + String::from_utf8_lossy(&opt_output.stderr).to_string(), )); } @@ -1392,7 +1447,7 @@ impl Loader { Ok(()) } - /// This ensures that the wasi-sdk is available, downloading and extracting it if necessary, + /// This ensures that wasi-sdk is available, downloading and extracting it if necessary, /// and returns the path to the `clang` executable. /// /// If `TREE_SITTER_WASI_SDK_PATH` is set, it will use that path to look for the clang executable. @@ -1407,63 +1462,16 @@ impl Loader { vec!["clang", "wasm32-unknown-wasi-clang", "wasm32-wasi-clang"] }; - if let Ok(wasi_sdk_path) = std::env::var("TREE_SITTER_WASI_SDK_PATH") { - let wasi_sdk_dir = PathBuf::from(wasi_sdk_path); - - for exe in &possible_executables { - let clang_exe = wasi_sdk_dir.join("bin").join(exe); - if clang_exe.exists() { - return Ok(clang_exe); - } - } - - return Err(LoaderError::WasiSDKClang(WasiSDKClangError { - wasi_sdk_dir: wasi_sdk_dir.to_string_lossy().to_string(), - possible_executables, - download: false, - })); + if let Some(path) = self.get_existing_tool( + "clang", + "wasi-sdk", + &possible_executables, + "TREE_SITTER_WASI_SDK_PATH", + )? { + return Ok(path); } - let cache_dir = etcetera::choose_base_strategy()? - .cache_dir() - .join("tree-sitter"); - fs::create_dir_all(&cache_dir) - .map_err(|e| LoaderError::IO(IoError::new(e, Some(cache_dir.as_path()))))?; - - let wasi_sdk_dir = cache_dir.join("wasi-sdk"); - - for exe in &possible_executables { - let clang_exe = wasi_sdk_dir.join("bin").join(exe); - if clang_exe.exists() { - return Ok(clang_exe); - } - } - - fs::create_dir_all(&wasi_sdk_dir) - .map_err(|e| LoaderError::IO(IoError::new(e, Some(wasi_sdk_dir.as_path()))))?; - - let arch_os = if cfg!(target_os = "macos") { - if cfg!(target_arch = "aarch64") { - "arm64-macos" - } else { - "x86_64-macos" - } - } else if cfg!(target_os = "windows") { - if cfg!(target_arch = "aarch64") { - "arm64-windows" - } else { - "x86_64-windows" - } - } else if cfg!(target_os = "linux") { - if cfg!(target_arch = "aarch64") { - "arm64-linux" - } else { - "x86_64-linux" - } - } else { - return Err(LoaderError::WasiSDKPlatform); - }; - + let arch_os = ARCH_OS?; let sdk_filename = format!("wasi-sdk-{WASI_SDK_VERSION}-{arch_os}.tar.gz"); let wasi_sdk_major_version = WASI_SDK_VERSION .trim_end_matches(char::is_numeric) // trim minor version... @@ -1471,39 +1479,157 @@ impl Loader { let sdk_url = format!( "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-{wasi_sdk_major_version}/{sdk_filename}", ); + self.download_tool( + "clang", + "wasi-sdk", + &sdk_filename, + &sdk_url, + &possible_executables, + ) + } - info!("Downloading wasi-sdk from {sdk_url}..."); - let temp_tar_path = cache_dir.join(sdk_filename); + /// This ensures that binaryen is available, downloading and extracting it if necessary, + /// and returns the path to the `wasm-opt` executable. + /// + /// If `TREE_SITTER_BINARYEN_PATH` is set, it will use that path to look for the wasm-opt executable. + fn ensure_binaryen_exists(&self) -> LoaderResult { + let possible_executables = if cfg!(windows) { + vec![ + "wasm-opt.exe", + "wasm32-unknown-wasm-opt.exe", + "wasm32-wasm-opt.exe", + ] + } else { + vec!["wasm-opt", "wasm32-unknown-wasm-opt", "wasm32-wasm-opt"] + }; + if let Some(path) = self.get_existing_tool( + "wasm-opt", + "binaryen", + &possible_executables, + "TREE_SITTER_BINARYEN_PATH", + )? { + return Ok(path); + } + + let arch_os = ARCH_OS?.replace("arm64-linux", "aarch64-linux"); + let binaryen_filename = format!("binaryen-version_{BINARYEN_VERSION}-{arch_os}.tar.gz"); + let binaryen_url = format!( + "https://github.com/WebAssembly/binaryen/releases/download/version_{BINARYEN_VERSION}/{binaryen_filename}" + ); + self.download_tool( + "wasm-opt", + "binaryen", + &binaryen_filename, + &binaryen_url, + &possible_executables, + ) + } + + fn get_existing_tool( + &self, + tool_name: &'static str, + toolchain: &'static str, + possible_exes: &[&'static str], + env_var: &str, + ) -> LoaderResult> { + if let Ok(tool_path) = std::env::var(env_var) { + let tool_dir = PathBuf::from(tool_path); + + for exe in possible_exes { + let tool_exe = tool_dir.join("bin").join(exe); + if tool_exe.exists() { + return Ok(Some(tool_exe)); + } + } + + Err(LoaderError::WasmTool(WasmToolError { + exe: tool_name, + toolchain, + tool_dir: tool_dir.to_string_lossy().to_string(), + possible_executables: possible_exes.to_vec(), + download: false, + }))?; + } + + let cache_dir = etcetera::choose_base_strategy()? + .cache_dir() + .join("tree-sitter"); + fs::create_dir_all(&cache_dir).map_err(|error| { + LoaderError::IO(IoError { + error, + path: Some(cache_dir.to_string_lossy().to_string()), + }) + })?; + + let tool_dir = cache_dir.join(tool_name); + + for exe in possible_exes { + let tool_exe = tool_dir.join("bin").join(exe); + if tool_exe.exists() { + return Ok(Some(tool_exe)); + } + } + + Ok(None) + } + + fn download_tool( + &self, + tool_name: &'static str, + toolchain: &'static str, + filename: &str, + url: &str, + possible_exes: &[&'static str], + ) -> LoaderResult { + let cache_dir = etcetera::choose_base_strategy()? + .cache_dir() + .join("tree-sitter"); + let tool_dir = cache_dir.join(tool_name); + + fs::create_dir_all(&tool_dir).map_err(|error| { + LoaderError::IO(IoError { + error, + path: Some(tool_dir.to_string_lossy().to_string()), + }) + })?; + + info!("Downloading {tool_name} from {url}..."); + let temp_tar_path = cache_dir.join(filename); let status = Command::new("curl") .arg("-f") .arg("-L") .arg("-o") .arg(&temp_tar_path) - .arg(&sdk_url) + .arg(url) .status() - .map_err(|e| LoaderError::Curl(sdk_url.clone(), e))?; + .map_err(|e| LoaderError::Curl(url.to_string(), e))?; if !status.success() { - return Err(LoaderError::WasiSDKDownload(sdk_url)); + Err(LoaderError::WasmToolDownload { + tool: tool_name, + url: url.to_string(), + })?; } - info!("Extracting wasi-sdk to {}...", wasi_sdk_dir.display()); - self.extract_tar_gz_with_strip(&temp_tar_path, &wasi_sdk_dir)?; + info!("Extracting {tool_name} to {}...", tool_dir.display()); + self.extract_tar_gz_with_strip(&temp_tar_path, &tool_dir)?; fs::remove_file(temp_tar_path).ok(); - for exe in &possible_executables { - let clang_exe = wasi_sdk_dir.join("bin").join(exe); - if clang_exe.exists() { - return Ok(clang_exe); + for exe in possible_exes { + let tool_exe = tool_dir.join("bin").join(exe); + if tool_exe.exists() { + return Ok(tool_exe); } } - Err(LoaderError::WasiSDKClang(WasiSDKClangError { - wasi_sdk_dir: wasi_sdk_dir.to_string_lossy().to_string(), - possible_executables, + Err(LoaderError::WasmTool(WasmToolError { + exe: tool_name, + toolchain, + tool_dir: tool_dir.to_string_lossy().to_string(), + possible_executables: possible_exes.to_vec(), download: true, - })) + }))? } #[must_use]