From d4d8ed32b324a846d6bbe6078393d7b3a2975519 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 24 Apr 2025 16:00:09 -0700 Subject: [PATCH] cli: Compile parsers to wasm using `wasi-sdk`, not emscripten (#4393) * Build wasm modules using wasi-sdk, not emscripten * Log a warning if --docker is passed --- Cargo.lock | 1 + cli/loader/Cargo.toml | 1 + cli/loader/src/lib.rs | 249 +++++++++++++++++++++++------------------- cli/src/main.rs | 8 +- cli/src/wasm.rs | 2 - 5 files changed, 141 insertions(+), 120 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ddc2ef52..b64bf82c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2082,6 +2082,7 @@ dependencies = [ "tree-sitter", "tree-sitter-highlight", "tree-sitter-tags", + "ureq", "url", ] diff --git a/cli/loader/Cargo.toml b/cli/loader/Cargo.toml index e42274d9..75253d23 100644 --- a/cli/loader/Cargo.toml +++ b/cli/loader/Cargo.toml @@ -38,6 +38,7 @@ serde.workspace = true serde_json.workspace = true tempfile.workspace = true url.workspace = true +ureq = "3.0.11" tree-sitter = { workspace = true } tree-sitter-highlight = { workspace = true, optional = true } diff --git a/cli/loader/src/lib.rs b/cli/loader/src/lib.rs index 06237dc3..551caa97 100644 --- a/cli/loader/src/lib.rs +++ b/cli/loader/src/lib.rs @@ -7,10 +7,8 @@ use std::ops::Range; use std::sync::Mutex; use std::{ collections::HashMap, - env, - ffi::{OsStr, OsString}, - fs, - io::{BufRead, BufReader}, + env, fs, + io::{BufRead, BufReader, Write}, mem, path::{Path, PathBuf}, process::Command, @@ -26,7 +24,6 @@ use fs4::fs_std::FileExt; use indoc::indoc; use libloading::{Library, Symbol}; use once_cell::unsync::OnceCell; -use path_slash::PathBufExt as _; use regex::{Regex, RegexBuilder}; use semver::Version; use serde::{Deserialize, Deserializer, Serialize}; @@ -44,8 +41,6 @@ use url::Url; static GRAMMAR_NAME_REGEX: LazyLock = LazyLock::new(|| Regex::new(r#""name":\s*"(.*?)""#).unwrap()); -pub const EMSCRIPTEN_TAG: &str = concat!("docker.io/emscripten/emsdk:", env!("EMSCRIPTEN_VERSION")); - #[derive(Default, Deserialize, Serialize)] pub struct Config { #[serde(default)] @@ -735,7 +730,6 @@ impl Loader { .as_ref() .and_then(|p| p.strip_prefix(config.src_path).ok()), &output_path, - false, )?; } @@ -973,117 +967,23 @@ impl Loader { pub fn compile_parser_to_wasm( &self, language_name: &str, - root_path: Option<&Path>, + _root_path: Option<&Path>, src_path: &Path, scanner_filename: Option<&Path>, output_path: &Path, - force_docker: bool, ) -> Result<(), Error> { - #[derive(PartialEq, Eq)] - enum EmccSource { - Native, - Docker, - Podman, - } - - let root_path = root_path.unwrap_or(src_path); - let emcc_name = if cfg!(windows) { "emcc.bat" } else { "emcc" }; - - // Order of preference: emscripten > docker > podman > error - let source = if !force_docker && Command::new(emcc_name).output().is_ok() { - EmccSource::Native - } else if Command::new("docker") - .output() - .is_ok_and(|out| out.status.success()) - { - EmccSource::Docker - } else if Command::new("podman") - .arg("--version") - .output() - .is_ok_and(|out| out.status.success()) - { - EmccSource::Podman - } else { - return Err(anyhow!( - "You must have either emcc, docker, or podman on your PATH to run this command" - )); - }; - - let mut command = match source { - EmccSource::Native => { - let mut command = Command::new(emcc_name); - command.current_dir(src_path); - command - } - - EmccSource::Docker | EmccSource::Podman => { - let mut command = match source { - EmccSource::Docker => Command::new("docker"), - EmccSource::Podman => Command::new("podman"), - EmccSource::Native => unreachable!(), - }; - command.args(["run", "--rm"]); - - // The working directory is the directory containing the parser itself - let workdir = if root_path == src_path { - PathBuf::from("/src") - } else { - let mut path = PathBuf::from("/src"); - path.push(src_path.strip_prefix(root_path).unwrap()); - path - }; - command.args(["--workdir", &workdir.to_slash_lossy()]); - - // Mount the root directory as a volume, which is the repo root - let mut volume_string = OsString::from(&root_path); - volume_string.push(":/src:Z"); - command.args([OsStr::new("--volume"), &volume_string]); - - // In case `docker` is an alias to `podman`, ensure that podman - // mounts the current directory as writable by the container - // user which has the same uid as the host user. Setting the - // podman-specific variable is more reliable than attempting to - // detect whether `docker` is an alias for `podman`. - // see https://docs.podman.io/en/latest/markdown/podman-run.1.html#userns-mode - command.env("PODMAN_USERNS", "keep-id"); - - // Get the current user id so that files created in the docker container will have - // the same owner. - #[cfg(unix)] - { - #[link(name = "c")] - extern "C" { - fn getuid() -> u32; - } - // don't need to set user for podman since PODMAN_USERNS=keep-id is already set - if source == EmccSource::Docker { - let user_id = unsafe { getuid() }; - command.args(["--user", &user_id.to_string()]); - } - }; - - // Run `emcc` in a container using the `emscripten-slim` image - command.args([EMSCRIPTEN_TAG, "emcc"]); - command - } - }; + let clang_executable = self.ensure_wasi_sdk_exists()?; let output_name = "output.wasm"; - + let mut command = Command::new(&clang_executable); + command.current_dir(src_path); command.args([ "-o", output_name, + "-fPIC", + "-shared", "-Os", - "-s", - "WASM=1", - "-s", - "SIDE_MODULE=2", - "-s", - "TOTAL_MEMORY=33554432", - "-s", - "NODEJS_CATCH_EXIT=0", - "-s", - &format!("EXPORTED_FUNCTIONS=[\"_tree_sitter_{language_name}\"]"), + format!("-Wl,--export=tree_sitter_{language_name}").as_str(), "-fno-exceptions", "-fvisibility=hidden", "-I", @@ -1095,12 +995,12 @@ impl Loader { } command.arg("parser.c"); - let status = command - .spawn() - .with_context(|| "Failed to run emcc command")? - .wait()?; - if !status.success() { - return Err(anyhow!("emcc command failed")); + let output = command + .output() + .with_context(|| format!("Failed to run wasi-sdk clang command: {:?}", command))?; + + if !output.status.success() { + return Err(anyhow!("wasi-sdk clang command failed")); } fs::rename(src_path.join(output_name), output_path) @@ -1109,6 +1009,125 @@ impl Loader { Ok(()) } + fn ensure_wasi_sdk_exists(&self) -> Result { + let cache_dir = etcetera::choose_base_strategy()? + .cache_dir() + .join("tree-sitter"); + if !cache_dir.exists() { + fs::create_dir_all(&cache_dir)?; + } + + let wasi_sdk_dir = cache_dir.join("wasi-sdk"); + let clang_exe = if cfg!(windows) { + wasi_sdk_dir.join("bin").join("clang.exe") + } else { + wasi_sdk_dir.join("bin").join("clang") + }; + if clang_exe.exists() { + return Ok(clang_exe); + } + + if !wasi_sdk_dir.exists() { + fs::create_dir_all(&wasi_sdk_dir)?; + } + + let sdk_filename = if cfg!(target_os = "macos") { + if cfg!(target_arch = "aarch64") { + "wasi-sdk-25.0-arm64-macos.tar.gz" + } else { + "wasi-sdk-25.0-x86_64-macos.tar.gz" + } + } else if cfg!(target_os = "windows") { + "wasi-sdk-25.0-x86_64-windows.tar.gz" + } else if cfg!(target_os = "linux") { + if cfg!(target_arch = "aarch64") { + "wasi-sdk-25.0-arm64-linux.tar.gz" + } else { + "wasi-sdk-25.0-x86_64-linux.tar.gz" + } + } else { + return Err(anyhow!("Unsupported platform for wasi-sdk")); + }; + + let base_url = "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-25"; + let sdk_url = format!("{}/{}", base_url, sdk_filename); + eprintln!("Downloading wasi-sdk from {}...", sdk_url); + + let temp_tar_path = cache_dir.join(sdk_filename); + let mut temp_file = fs::File::create(&temp_tar_path).with_context(|| { + format!( + "Failed to create temporary file at {}", + temp_tar_path.display() + ) + })?; + + let response = ureq::get(&sdk_url) + .call() + .with_context(|| format!("Failed to download wasi-sdk from {}", sdk_url))?; + if !response.status().is_success() { + return Err(anyhow::anyhow!( + "Failed to download wasi-sdk from {}", + sdk_url + )); + } + + std::io::copy(&mut response.into_body().into_reader(), &mut temp_file) + .context("Failed to write to temporary file")?; + temp_file + .flush() + .context("Failed to flush downloaded file")?; + eprintln!("Extracting wasi-sdk to {}...", wasi_sdk_dir.display()); + + #[cfg(unix)] + { + let status = Command::new("tar") + .args([ + "-xzf", + temp_tar_path.to_str().unwrap(), + "-C", + wasi_sdk_dir.to_str().unwrap(), + "--strip-components=1", + ]) + .status() + .context("Failed to extract wasi-sdk archive with tar")?; + + if !status.success() { + return Err(anyhow!("Failed to extract wasi-sdk archive with tar")); + } + } + + #[cfg(windows)] + { + // On Windows, use PowerShell to extract the tar.gz file directly + let ps_command = format!( + "cd '{}'; tar -xzf '{}' --strip-components=1", + wasi_sdk_dir.to_str().unwrap(), + temp_tar_path.to_str().unwrap() + ); + + let status = Command::new("powershell") + .args(["-Command", &ps_command]) + .status() + .context("Failed to extract wasi-sdk archive with PowerShell")?; + + if !status.success() { + return Err(anyhow!( + "Failed to extract wasi-sdk archive with PowerShell" + )); + } + } + + fs::remove_file(temp_tar_path).ok(); + if !clang_exe.exists() { + return Err(anyhow!( + "Failed to extract wasi-sdk correctly. Clang executable not found at {}", + clang_exe.display() + )); + } + + Ok(clang_exe) + } + #[must_use] #[cfg(feature = "tree-sitter-highlight")] pub fn highlight_config_for_injection_string<'a>( diff --git a/cli/src/main.rs b/cli/src/main.rs index f6c8be2f..eadb68f9 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -138,8 +138,7 @@ struct Build { /// Build a WASM module instead of a dynamic library #[arg(short, long)] pub wasm: bool, - /// Run emscripten via docker even if it is installed locally (only if building a WASM module - /// with --wasm) + /// No longer used. #[arg(short, long)] pub docker: bool, /// The path to output the compiled file @@ -792,6 +791,10 @@ impl Build { fn run(self, mut loader: loader::Loader, current_dir: &Path) -> Result<()> { let grammar_path = current_dir.join(self.path.as_deref().unwrap_or_default()); + if self.docker { + eprintln!("Warning: --docker flag is no longer used, and will be removed in a future release."); + } + if self.wasm { let output_path = self.output.map(|path| current_dir.join(path)); let root_path = get_root_path(&grammar_path.join("tree-sitter.json"))?; @@ -801,7 +804,6 @@ impl Build { &grammar_path, current_dir, output_path, - self.docker, )?; } else { let output_path = if let Some(ref path) = self.output { diff --git a/cli/src/wasm.rs b/cli/src/wasm.rs index eef6d08b..92adb65a 100644 --- a/cli/src/wasm.rs +++ b/cli/src/wasm.rs @@ -44,7 +44,6 @@ pub fn compile_language_to_wasm( language_dir: &Path, output_dir: &Path, output_file: Option, - force_docker: bool, ) -> Result<()> { let grammar_name = get_grammar_name(language_dir)?; let output_filename = @@ -59,7 +58,6 @@ pub fn compile_language_to_wasm( .as_ref() .and_then(|p| Some(Path::new(p.file_name()?))), &output_filename, - force_docker, )?; // Exit with an error if the external scanner uses symbols from the