From ca7ff033db8acc1d07f8fd559a5f78705ddb1a03 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 27 Apr 2025 19:42:34 -0700 Subject: [PATCH] Fix wasm build issues introduced by switching to wasi-sdk (#4407) * Don't shell out for extracting tar.gz files * Avoid wasi-sdk adding dependency on libc.so * Clippy * Fix -nostdlib flag --- Cargo.lock | 31 +++++++++-- Cargo.toml | 2 + cli/loader/Cargo.toml | 2 + cli/loader/src/lib.rs | 126 +++++++++++++++++++++--------------------- docs/src/cli/build.md | 5 -- xtask/src/generate.rs | 3 - xtask/src/main.rs | 3 - 7 files changed, 93 insertions(+), 79 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b64bf82c..d7e76291 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -615,9 +615,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.35" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c" +checksum = "7ced92e76e966ca2fd84c8f7aa01a4aea65b0eb6648d72f7c8f3e2764a67fece" dependencies = [ "crc32fast", "miniz_oxide", @@ -1201,9 +1201,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.8.3" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8402cab7aefae129c6977bb0ff1b8fd9a04eb5b51efc50a70bea51cda0c7924" +checksum = "3be647b768db090acb35d5ec5db2b0e1f1de11133ca123b9eacf5137868f892a" dependencies = [ "adler2", ] @@ -1786,6 +1786,17 @@ dependencies = [ "syn", ] +[[package]] +name = "tar" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d863878d212c87a19c1a610eb53bb01fe12951c0501cf5a0d65f724914a667a" +dependencies = [ + "filetime", + "libc", + "xattr", +] + [[package]] name = "target-lexicon" version = "0.13.1" @@ -2069,6 +2080,7 @@ dependencies = [ "anyhow", "cc", "etcetera", + "flate2", "fs4", "indoc", "libloading", @@ -2078,6 +2090,7 @@ dependencies = [ "semver", "serde", "serde_json", + "tar", "tempfile", "tree-sitter", "tree-sitter-highlight", @@ -2907,6 +2920,16 @@ version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" +[[package]] +name = "xattr" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d65cbf2f12c15564212d48f4e3dfb87923d25d611f2aed18f4cb23f0413d89e" +dependencies = [ + "libc", + "rustix 1.0.2", +] + [[package]] name = "xtask" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index bf216832..3eeedeb2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -116,6 +116,7 @@ ctrlc = { version = "3.4.6", features = ["termination"] } dialoguer = { version = "0.11.0", features = ["fuzzy-select"] } etcetera = "0.8.0" filetime = "0.2.25" +flate2 = "1.0.28" fs4 = "0.12.0" git2 = "0.20.1" glob = "0.3.2" @@ -140,6 +141,7 @@ serde_json = { version = "1.0.140", features = ["preserve_order"] } similar = "2.7.0" smallbitvec = "2.6.0" streaming-iterator = "0.1.9" +tar = "0.4.40" tempfile = "3.19.1" thiserror = "2.0.12" tiny_http = "0.12.0" diff --git a/cli/loader/Cargo.toml b/cli/loader/Cargo.toml index 75253d23..aa40a0b9 100644 --- a/cli/loader/Cargo.toml +++ b/cli/loader/Cargo.toml @@ -27,6 +27,7 @@ default = ["tree-sitter-highlight", "tree-sitter-tags"] anyhow.workspace = true cc.workspace = true etcetera.workspace = true +flate2.workspace = true fs4.workspace = true indoc.workspace = true libloading.workspace = true @@ -36,6 +37,7 @@ regex.workspace = true semver.workspace = true serde.workspace = true serde_json.workspace = true +tar.workspace = true tempfile.workspace = true url.workspace = true ureq = "3.0.11" diff --git a/cli/loader/src/lib.rs b/cli/loader/src/lib.rs index 551caa97..58cb3f41 100644 --- a/cli/loader/src/lib.rs +++ b/cli/loader/src/lib.rs @@ -8,7 +8,7 @@ use std::sync::Mutex; use std::{ collections::HashMap, env, fs, - io::{BufRead, BufReader, Write}, + io::{BufRead, BufReader, Write as _}, mem, path::{Path, PathBuf}, process::Command, @@ -20,6 +20,7 @@ use std::{ use anyhow::Error; use anyhow::{anyhow, Context, Result}; use etcetera::BaseStrategy as _; +use flate2::read::GzDecoder; use fs4::fs_std::FileExt; use indoc::indoc; use libloading::{Library, Symbol}; @@ -976,31 +977,34 @@ impl Loader { let output_name = "output.wasm"; let mut command = Command::new(&clang_executable); - command.current_dir(src_path); - command.args([ + command.current_dir(src_path).args([ "-o", output_name, "-fPIC", "-shared", "-Os", format!("-Wl,--export=tree_sitter_{language_name}").as_str(), + "-Wl,--allow-undefined", + "-Wl,--no-entry", + "-nostdlib", "-fno-exceptions", "-fvisibility=hidden", "-I", ".", + "parser.c", ]); if let Some(scanner_filename) = scanner_filename { command.arg(scanner_filename); } - command.arg("parser.c"); - let output = command - .output() - .with_context(|| format!("Failed to run wasi-sdk clang command: {:?}", command))?; + let output = command.output().context("Failed to run wasi-sdk clang")?; if !output.status.success() { - return Err(anyhow!("wasi-sdk clang command failed")); + return Err(anyhow!( + "wasi-sdk clang command failed: {}", + String::from_utf8_lossy(&output.stderr) + )); } fs::rename(src_path.join(output_name), output_path) @@ -1009,13 +1013,43 @@ impl Loader { Ok(()) } + /// Extracts a tar.gz archive, stripping the first path component. + /// + /// Similar to `tar -xzf --strip-components=1` + fn extract_tar_gz_with_strip( + &self, + archive_path: &Path, + destination: &Path, + ) -> Result<(), Error> { + let archive_file = fs::File::open(archive_path).context("Failed to open archive")?; + let mut archive = tar::Archive::new(GzDecoder::new(archive_file)); + for entry in archive + .entries() + .with_context(|| "Failed to read archive entries")? + { + let mut entry = entry?; + let path = entry.path()?; + let Some(first_component) = path.components().next() else { + continue; + }; + let dest_path = destination.join(path.strip_prefix(first_component).unwrap()); + if let Some(parent) = dest_path.parent() { + fs::create_dir_all(parent).with_context(|| { + format!("Failed to create directory at {}", parent.display()) + })?; + } + entry + .unpack(&dest_path) + .with_context(|| format!("Failed to extract file to {}", dest_path.display()))?; + } + Ok(()) + } + fn ensure_wasi_sdk_exists(&self) -> Result { let cache_dir = etcetera::choose_base_strategy()? .cache_dir() .join("tree-sitter"); - if !cache_dir.exists() { - fs::create_dir_all(&cache_dir)?; - } + fs::create_dir_all(&cache_dir)?; let wasi_sdk_dir = cache_dir.join("wasi-sdk"); let clang_exe = if cfg!(windows) { @@ -1023,36 +1057,37 @@ impl Loader { } else { wasi_sdk_dir.join("bin").join("clang") }; + if clang_exe.exists() { return Ok(clang_exe); } - if !wasi_sdk_dir.exists() { - fs::create_dir_all(&wasi_sdk_dir)?; - } + fs::create_dir_all(&wasi_sdk_dir)?; - let sdk_filename = if cfg!(target_os = "macos") { + let arch_os = if cfg!(target_os = "macos") { if cfg!(target_arch = "aarch64") { - "wasi-sdk-25.0-arm64-macos.tar.gz" + "arm64-macos" } else { - "wasi-sdk-25.0-x86_64-macos.tar.gz" + "x86_64-macos" } } else if cfg!(target_os = "windows") { - "wasi-sdk-25.0-x86_64-windows.tar.gz" + "x86_64-windows" } else if cfg!(target_os = "linux") { if cfg!(target_arch = "aarch64") { - "wasi-sdk-25.0-arm64-linux.tar.gz" + "arm64-linux" } else { - "wasi-sdk-25.0-x86_64-linux.tar.gz" + "x86_64-linux" } } else { return Err(anyhow!("Unsupported platform for wasi-sdk")); }; - let base_url = "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-25"; - let sdk_url = format!("{}/{}", base_url, sdk_filename); - eprintln!("Downloading wasi-sdk from {}...", sdk_url); + let sdk_filename = format!("wasi-sdk-25.0-{arch_os}.tar.gz"); + let sdk_url = format!( + "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-25/{sdk_filename}", + ); + eprintln!("Downloading wasi-sdk from {sdk_url}..."); let temp_tar_path = cache_dir.join(sdk_filename); let mut temp_file = fs::File::create(&temp_tar_path).with_context(|| { format!( @@ -1063,7 +1098,7 @@ impl Loader { let response = ureq::get(&sdk_url) .call() - .with_context(|| format!("Failed to download wasi-sdk from {}", sdk_url))?; + .with_context(|| format!("Failed to download wasi-sdk from {sdk_url}"))?; if !response.status().is_success() { return Err(anyhow::anyhow!( "Failed to download wasi-sdk from {}", @@ -1077,50 +1112,13 @@ impl Loader { .flush() .context("Failed to flush downloaded file")?; eprintln!("Extracting wasi-sdk to {}...", wasi_sdk_dir.display()); - - #[cfg(unix)] - { - let status = Command::new("tar") - .args([ - "-xzf", - temp_tar_path.to_str().unwrap(), - "-C", - wasi_sdk_dir.to_str().unwrap(), - "--strip-components=1", - ]) - .status() - .context("Failed to extract wasi-sdk archive with tar")?; - - if !status.success() { - return Err(anyhow!("Failed to extract wasi-sdk archive with tar")); - } - } - - #[cfg(windows)] - { - // On Windows, use PowerShell to extract the tar.gz file directly - let ps_command = format!( - "cd '{}'; tar -xzf '{}' --strip-components=1", - wasi_sdk_dir.to_str().unwrap(), - temp_tar_path.to_str().unwrap() - ); - - let status = Command::new("powershell") - .args(["-Command", &ps_command]) - .status() - .context("Failed to extract wasi-sdk archive with PowerShell")?; - - if !status.success() { - return Err(anyhow!( - "Failed to extract wasi-sdk archive with PowerShell" - )); - } - } + self.extract_tar_gz_with_strip(&temp_tar_path, &wasi_sdk_dir) + .context("Failed to extract wasi-sdk archive")?; fs::remove_file(temp_tar_path).ok(); if !clang_exe.exists() { return Err(anyhow!( - "Failed to extract wasi-sdk correctly. Clang executable not found at {}", + "Failed to extract wasi-sdk correctly. Clang executable not found at expected location: {}", clang_exe.display() )); } diff --git a/docs/src/cli/build.md b/docs/src/cli/build.md index 180e7f92..dfa5f9af 100644 --- a/docs/src/cli/build.md +++ b/docs/src/cli/build.md @@ -20,11 +20,6 @@ will attempt to build the parser in the current working directory. Compile the parser as a WASM module. -### `-d/--docker` - -Use Docker or Podman to supply Emscripten. This removes the need to install Emscripten on your machine locally. -Note that this flag is only available when compiling to WASM. - ### `-o/--output` Specify where to output the shared object file (native or WASM). This flag accepts either an absolute path or a relative diff --git a/xtask/src/generate.rs b/xtask/src/generate.rs index d7fb8ba1..c0317f95 100644 --- a/xtask/src/generate.rs +++ b/xtask/src/generate.rs @@ -42,9 +42,6 @@ pub fn run_fixtures(args: &GenerateFixtures) -> Result<()> { &format!("target/release/tree-sitter-{grammar_name}.wasm"), grammar_dir.to_str().unwrap(), ]); - if args.docker { - cmd.arg("--docker"); - } bail_on_err( &cmd.spawn()?.wait_with_output()?, &format!("Failed to regenerate {grammar_name} parser to wasm"), diff --git a/xtask/src/main.rs b/xtask/src/main.rs index ec8bc927..375571c0 100644 --- a/xtask/src/main.rs +++ b/xtask/src/main.rs @@ -123,9 +123,6 @@ struct GenerateFixtures { /// Generates the parser to WASM #[arg(long, short)] wasm: bool, - /// Run emscripten via docker even if it is installed locally. - #[arg(long, short, requires = "wasm")] - docker: bool, } #[derive(Args)]