Fix wasm build issues introduced by switching to wasi-sdk (#4407)

* Don't shell out for extracting tar.gz files

* Avoid wasi-sdk adding dependency on libc.so

* Clippy

* Fix -nostdlib flag
This commit is contained in:
Max Brunsfeld 2025-04-27 19:42:34 -07:00 committed by GitHub
parent d4d8ed32b3
commit ca7ff033db
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 93 additions and 79 deletions

31
Cargo.lock generated
View file

@ -615,9 +615,9 @@ dependencies = [
[[package]]
name = "flate2"
version = "1.0.35"
version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c"
checksum = "7ced92e76e966ca2fd84c8f7aa01a4aea65b0eb6648d72f7c8f3e2764a67fece"
dependencies = [
"crc32fast",
"miniz_oxide",
@ -1201,9 +1201,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "miniz_oxide"
version = "0.8.3"
version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8402cab7aefae129c6977bb0ff1b8fd9a04eb5b51efc50a70bea51cda0c7924"
checksum = "3be647b768db090acb35d5ec5db2b0e1f1de11133ca123b9eacf5137868f892a"
dependencies = [
"adler2",
]
@ -1786,6 +1786,17 @@ dependencies = [
"syn",
]
[[package]]
name = "tar"
version = "0.4.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d863878d212c87a19c1a610eb53bb01fe12951c0501cf5a0d65f724914a667a"
dependencies = [
"filetime",
"libc",
"xattr",
]
[[package]]
name = "target-lexicon"
version = "0.13.1"
@ -2069,6 +2080,7 @@ dependencies = [
"anyhow",
"cc",
"etcetera",
"flate2",
"fs4",
"indoc",
"libloading",
@ -2078,6 +2090,7 @@ dependencies = [
"semver",
"serde",
"serde_json",
"tar",
"tempfile",
"tree-sitter",
"tree-sitter-highlight",
@ -2907,6 +2920,16 @@ version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51"
[[package]]
name = "xattr"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d65cbf2f12c15564212d48f4e3dfb87923d25d611f2aed18f4cb23f0413d89e"
dependencies = [
"libc",
"rustix 1.0.2",
]
[[package]]
name = "xtask"
version = "0.1.0"

View file

@ -116,6 +116,7 @@ ctrlc = { version = "3.4.6", features = ["termination"] }
dialoguer = { version = "0.11.0", features = ["fuzzy-select"] }
etcetera = "0.8.0"
filetime = "0.2.25"
flate2 = "1.0.28"
fs4 = "0.12.0"
git2 = "0.20.1"
glob = "0.3.2"
@ -140,6 +141,7 @@ serde_json = { version = "1.0.140", features = ["preserve_order"] }
similar = "2.7.0"
smallbitvec = "2.6.0"
streaming-iterator = "0.1.9"
tar = "0.4.40"
tempfile = "3.19.1"
thiserror = "2.0.12"
tiny_http = "0.12.0"

View file

@ -27,6 +27,7 @@ default = ["tree-sitter-highlight", "tree-sitter-tags"]
anyhow.workspace = true
cc.workspace = true
etcetera.workspace = true
flate2.workspace = true
fs4.workspace = true
indoc.workspace = true
libloading.workspace = true
@ -36,6 +37,7 @@ regex.workspace = true
semver.workspace = true
serde.workspace = true
serde_json.workspace = true
tar.workspace = true
tempfile.workspace = true
url.workspace = true
ureq = "3.0.11"

View file

@ -8,7 +8,7 @@ use std::sync::Mutex;
use std::{
collections::HashMap,
env, fs,
io::{BufRead, BufReader, Write},
io::{BufRead, BufReader, Write as _},
mem,
path::{Path, PathBuf},
process::Command,
@ -20,6 +20,7 @@ use std::{
use anyhow::Error;
use anyhow::{anyhow, Context, Result};
use etcetera::BaseStrategy as _;
use flate2::read::GzDecoder;
use fs4::fs_std::FileExt;
use indoc::indoc;
use libloading::{Library, Symbol};
@ -976,31 +977,34 @@ impl Loader {
let output_name = "output.wasm";
let mut command = Command::new(&clang_executable);
command.current_dir(src_path);
command.args([
command.current_dir(src_path).args([
"-o",
output_name,
"-fPIC",
"-shared",
"-Os",
format!("-Wl,--export=tree_sitter_{language_name}").as_str(),
"-Wl,--allow-undefined",
"-Wl,--no-entry",
"-nostdlib",
"-fno-exceptions",
"-fvisibility=hidden",
"-I",
".",
"parser.c",
]);
if let Some(scanner_filename) = scanner_filename {
command.arg(scanner_filename);
}
command.arg("parser.c");
let output = command
.output()
.with_context(|| format!("Failed to run wasi-sdk clang command: {:?}", command))?;
let output = command.output().context("Failed to run wasi-sdk clang")?;
if !output.status.success() {
return Err(anyhow!("wasi-sdk clang command failed"));
return Err(anyhow!(
"wasi-sdk clang command failed: {}",
String::from_utf8_lossy(&output.stderr)
));
}
fs::rename(src_path.join(output_name), output_path)
@ -1009,13 +1013,43 @@ impl Loader {
Ok(())
}
/// Extracts a tar.gz archive, stripping the first path component.
///
/// Similar to `tar -xzf <archive> --strip-components=1`
fn extract_tar_gz_with_strip(
&self,
archive_path: &Path,
destination: &Path,
) -> Result<(), Error> {
let archive_file = fs::File::open(archive_path).context("Failed to open archive")?;
let mut archive = tar::Archive::new(GzDecoder::new(archive_file));
for entry in archive
.entries()
.with_context(|| "Failed to read archive entries")?
{
let mut entry = entry?;
let path = entry.path()?;
let Some(first_component) = path.components().next() else {
continue;
};
let dest_path = destination.join(path.strip_prefix(first_component).unwrap());
if let Some(parent) = dest_path.parent() {
fs::create_dir_all(parent).with_context(|| {
format!("Failed to create directory at {}", parent.display())
})?;
}
entry
.unpack(&dest_path)
.with_context(|| format!("Failed to extract file to {}", dest_path.display()))?;
}
Ok(())
}
fn ensure_wasi_sdk_exists(&self) -> Result<PathBuf, Error> {
let cache_dir = etcetera::choose_base_strategy()?
.cache_dir()
.join("tree-sitter");
if !cache_dir.exists() {
fs::create_dir_all(&cache_dir)?;
}
fs::create_dir_all(&cache_dir)?;
let wasi_sdk_dir = cache_dir.join("wasi-sdk");
let clang_exe = if cfg!(windows) {
@ -1023,36 +1057,37 @@ impl Loader {
} else {
wasi_sdk_dir.join("bin").join("clang")
};
if clang_exe.exists() {
return Ok(clang_exe);
}
if !wasi_sdk_dir.exists() {
fs::create_dir_all(&wasi_sdk_dir)?;
}
fs::create_dir_all(&wasi_sdk_dir)?;
let sdk_filename = if cfg!(target_os = "macos") {
let arch_os = if cfg!(target_os = "macos") {
if cfg!(target_arch = "aarch64") {
"wasi-sdk-25.0-arm64-macos.tar.gz"
"arm64-macos"
} else {
"wasi-sdk-25.0-x86_64-macos.tar.gz"
"x86_64-macos"
}
} else if cfg!(target_os = "windows") {
"wasi-sdk-25.0-x86_64-windows.tar.gz"
"x86_64-windows"
} else if cfg!(target_os = "linux") {
if cfg!(target_arch = "aarch64") {
"wasi-sdk-25.0-arm64-linux.tar.gz"
"arm64-linux"
} else {
"wasi-sdk-25.0-x86_64-linux.tar.gz"
"x86_64-linux"
}
} else {
return Err(anyhow!("Unsupported platform for wasi-sdk"));
};
let base_url = "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-25";
let sdk_url = format!("{}/{}", base_url, sdk_filename);
eprintln!("Downloading wasi-sdk from {}...", sdk_url);
let sdk_filename = format!("wasi-sdk-25.0-{arch_os}.tar.gz");
let sdk_url = format!(
"https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-25/{sdk_filename}",
);
eprintln!("Downloading wasi-sdk from {sdk_url}...");
let temp_tar_path = cache_dir.join(sdk_filename);
let mut temp_file = fs::File::create(&temp_tar_path).with_context(|| {
format!(
@ -1063,7 +1098,7 @@ impl Loader {
let response = ureq::get(&sdk_url)
.call()
.with_context(|| format!("Failed to download wasi-sdk from {}", sdk_url))?;
.with_context(|| format!("Failed to download wasi-sdk from {sdk_url}"))?;
if !response.status().is_success() {
return Err(anyhow::anyhow!(
"Failed to download wasi-sdk from {}",
@ -1077,50 +1112,13 @@ impl Loader {
.flush()
.context("Failed to flush downloaded file")?;
eprintln!("Extracting wasi-sdk to {}...", wasi_sdk_dir.display());
#[cfg(unix)]
{
let status = Command::new("tar")
.args([
"-xzf",
temp_tar_path.to_str().unwrap(),
"-C",
wasi_sdk_dir.to_str().unwrap(),
"--strip-components=1",
])
.status()
.context("Failed to extract wasi-sdk archive with tar")?;
if !status.success() {
return Err(anyhow!("Failed to extract wasi-sdk archive with tar"));
}
}
#[cfg(windows)]
{
// On Windows, use PowerShell to extract the tar.gz file directly
let ps_command = format!(
"cd '{}'; tar -xzf '{}' --strip-components=1",
wasi_sdk_dir.to_str().unwrap(),
temp_tar_path.to_str().unwrap()
);
let status = Command::new("powershell")
.args(["-Command", &ps_command])
.status()
.context("Failed to extract wasi-sdk archive with PowerShell")?;
if !status.success() {
return Err(anyhow!(
"Failed to extract wasi-sdk archive with PowerShell"
));
}
}
self.extract_tar_gz_with_strip(&temp_tar_path, &wasi_sdk_dir)
.context("Failed to extract wasi-sdk archive")?;
fs::remove_file(temp_tar_path).ok();
if !clang_exe.exists() {
return Err(anyhow!(
"Failed to extract wasi-sdk correctly. Clang executable not found at {}",
"Failed to extract wasi-sdk correctly. Clang executable not found at expected location: {}",
clang_exe.display()
));
}

View file

@ -20,11 +20,6 @@ will attempt to build the parser in the current working directory.
Compile the parser as a WASM module.
### `-d/--docker`
Use Docker or Podman to supply Emscripten. This removes the need to install Emscripten on your machine locally.
Note that this flag is only available when compiling to WASM.
### `-o/--output`
Specify where to output the shared object file (native or WASM). This flag accepts either an absolute path or a relative

View file

@ -42,9 +42,6 @@ pub fn run_fixtures(args: &GenerateFixtures) -> Result<()> {
&format!("target/release/tree-sitter-{grammar_name}.wasm"),
grammar_dir.to_str().unwrap(),
]);
if args.docker {
cmd.arg("--docker");
}
bail_on_err(
&cmd.spawn()?.wait_with_output()?,
&format!("Failed to regenerate {grammar_name} parser to wasm"),

View file

@ -123,9 +123,6 @@ struct GenerateFixtures {
/// Generates the parser to WASM
#[arg(long, short)]
wasm: bool,
/// Run emscripten via docker even if it is installed locally.
#[arg(long, short, requires = "wasm")]
docker: bool,
}
#[derive(Args)]