Integrate WASM compilation into the CLI's Loader
This commit is contained in:
parent
042e6f9d57
commit
d47713ee4a
15 changed files with 310 additions and 151 deletions
|
|
@ -10,6 +10,9 @@ keywords = ["incremental", "parsing"]
|
|||
categories = ["command-line-utilities", "parsing"]
|
||||
repository = "https://github.com/tree-sitter/tree-sitter"
|
||||
|
||||
[features]
|
||||
wasm = ["tree-sitter/wasm"]
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
cc = "^1.0.58"
|
||||
|
|
@ -18,6 +21,7 @@ libloading = "0.7"
|
|||
once_cell = "1.7"
|
||||
regex = "1"
|
||||
serde = { version = "1.0.130", features = ["derive"] }
|
||||
which = "4.1.0"
|
||||
|
||||
[dependencies.serde_json]
|
||||
version = "1.0"
|
||||
|
|
|
|||
|
|
@ -3,4 +3,10 @@ fn main() {
|
|||
"cargo:rustc-env=BUILD_TARGET={}",
|
||||
std::env::var("TARGET").unwrap()
|
||||
);
|
||||
|
||||
let emscripten_version = std::fs::read_to_string("emscripten-version").unwrap();
|
||||
println!(
|
||||
"cargo:rustc-env={}={}",
|
||||
"EMSCRIPTEN_VERSION", emscripten_version,
|
||||
);
|
||||
}
|
||||
|
|
|
|||
1
cli/loader/emscripten-version
Normal file
1
cli/loader/emscripten-version
Normal file
|
|
@ -0,0 +1 @@
|
|||
3.1.25
|
||||
|
|
@ -4,6 +4,7 @@ use once_cell::unsync::OnceCell;
|
|||
use regex::{Regex, RegexBuilder};
|
||||
use serde::{Deserialize, Deserializer, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::{OsStr, OsString};
|
||||
use std::io::BufReader;
|
||||
use std::ops::Range;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
|
@ -14,6 +15,9 @@ use std::{fs, mem};
|
|||
use tree_sitter::{Language, QueryError, QueryErrorKind};
|
||||
use tree_sitter_highlight::HighlightConfiguration;
|
||||
use tree_sitter_tags::{Error as TagsError, TagsConfiguration};
|
||||
use which::which;
|
||||
|
||||
pub const EMSCRIPTEN_TAG: &'static str = concat!("emscripten/emsdk:", env!("EMSCRIPTEN_VERSION"));
|
||||
|
||||
#[derive(Default, Deserialize, Serialize)]
|
||||
pub struct Config {
|
||||
|
|
@ -101,6 +105,9 @@ pub struct Loader {
|
|||
highlight_names: Box<Mutex<Vec<String>>>,
|
||||
use_all_highlight_names: bool,
|
||||
debug_build: bool,
|
||||
|
||||
#[cfg(feature = "wasm")]
|
||||
wasm_store: Mutex<Option<tree_sitter::WasmStore>>,
|
||||
}
|
||||
|
||||
unsafe impl Send for Loader {}
|
||||
|
|
@ -123,6 +130,9 @@ impl Loader {
|
|||
highlight_names: Box::new(Mutex::new(Vec::new())),
|
||||
use_all_highlight_names: true,
|
||||
debug_build: false,
|
||||
|
||||
#[cfg(feature = "wasm")]
|
||||
wasm_store: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -310,8 +320,6 @@ impl Loader {
|
|||
|
||||
pub fn load_language_at_path(&self, src_path: &Path, header_path: &Path) -> Result<Language> {
|
||||
let grammar_path = src_path.join("grammar.json");
|
||||
let parser_path = src_path.join("parser.c");
|
||||
let mut scanner_path = src_path.join("scanner.c");
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct GrammarJSON {
|
||||
|
|
@ -322,125 +330,252 @@ impl Loader {
|
|||
let grammar_json: GrammarJSON = serde_json::from_reader(BufReader::new(&mut grammar_file))
|
||||
.with_context(|| "Failed to parse grammar.json")?;
|
||||
|
||||
let scanner_path = if scanner_path.exists() {
|
||||
Some(scanner_path)
|
||||
} else {
|
||||
scanner_path.set_extension("cc");
|
||||
if scanner_path.exists() {
|
||||
Some(scanner_path)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
self.load_language_from_sources(
|
||||
&grammar_json.name,
|
||||
&header_path,
|
||||
&parser_path,
|
||||
&scanner_path,
|
||||
)
|
||||
self.load_language_at_path_with_name(src_path, &header_path, &grammar_json.name)
|
||||
}
|
||||
|
||||
pub fn load_language_from_sources(
|
||||
pub fn load_language_at_path_with_name(
|
||||
&self,
|
||||
name: &str,
|
||||
src_path: &Path,
|
||||
header_path: &Path,
|
||||
parser_path: &Path,
|
||||
scanner_path: &Option<PathBuf>,
|
||||
name: &str,
|
||||
) -> Result<Language> {
|
||||
let mut lib_name = name.to_string();
|
||||
let language_fn_name = format!("tree_sitter_{}", replace_dashes_with_underscores(name));
|
||||
if self.debug_build {
|
||||
lib_name.push_str(".debug._");
|
||||
}
|
||||
|
||||
let mut library_path = self.parser_lib_path.join(lib_name);
|
||||
library_path.set_extension(DYLIB_EXTENSION);
|
||||
fs::create_dir_all(&self.parser_lib_path)?;
|
||||
|
||||
let parser_path = src_path.join("parser.c");
|
||||
let mut scanner_path = None;
|
||||
let mut try_scanner_path = src_path.join("scanner.c");
|
||||
for extension in ["c", "cc", "cpp"] {
|
||||
try_scanner_path.set_extension(extension);
|
||||
if try_scanner_path.exists() {
|
||||
scanner_path = Some(try_scanner_path);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let recompile = needs_recompile(&library_path, &parser_path, &scanner_path)
|
||||
.with_context(|| "Failed to compare source and binary timestamps")?;
|
||||
|
||||
if recompile {
|
||||
fs::create_dir_all(&self.parser_lib_path)?;
|
||||
let mut config = cc::Build::new();
|
||||
config
|
||||
.cpp(true)
|
||||
.opt_level(2)
|
||||
.cargo_metadata(false)
|
||||
.target(BUILD_TARGET)
|
||||
.host(BUILD_TARGET);
|
||||
let compiler = config.get_compiler();
|
||||
let mut command = Command::new(compiler.path());
|
||||
for (key, value) in compiler.env() {
|
||||
command.env(key, value);
|
||||
if let Some(wasm_store) = self.wasm_store.lock().unwrap().as_mut() {
|
||||
library_path.set_extension("wasm");
|
||||
|
||||
eprintln!("library_path: {:?}", &library_path);
|
||||
|
||||
if recompile {
|
||||
self.compile_parser_to_wasm(
|
||||
name,
|
||||
src_path,
|
||||
scanner_path
|
||||
.as_ref()
|
||||
.and_then(|p| p.strip_prefix(&src_path).ok()),
|
||||
&library_path,
|
||||
false,
|
||||
)?;
|
||||
}
|
||||
|
||||
if cfg!(windows) {
|
||||
command.args(&["/nologo", "/LD", "/I"]).arg(header_path);
|
||||
if self.debug_build {
|
||||
command.arg("/Od");
|
||||
let wasm_bytes = fs::read(&library_path)?;
|
||||
|
||||
Ok(wasm_store.load_language(name, &wasm_bytes))
|
||||
} else {
|
||||
library_path.set_extension(DYLIB_EXTENSION);
|
||||
|
||||
if recompile {
|
||||
self.compile_parser_to_dylib(
|
||||
header_path,
|
||||
&parser_path,
|
||||
&scanner_path,
|
||||
&library_path,
|
||||
)?;
|
||||
}
|
||||
|
||||
let library = unsafe { Library::new(&library_path) }
|
||||
.with_context(|| format!("Error opening dynamic library {:?}", &library_path))?;
|
||||
let language = unsafe {
|
||||
let language_fn: Symbol<unsafe extern "C" fn() -> Language> = library
|
||||
.get(language_fn_name.as_bytes())
|
||||
.with_context(|| format!("Failed to load symbol {}", language_fn_name))?;
|
||||
language_fn()
|
||||
};
|
||||
mem::forget(library);
|
||||
Ok(language)
|
||||
}
|
||||
}
|
||||
|
||||
fn compile_parser_to_dylib(
|
||||
&self,
|
||||
header_path: &Path,
|
||||
parser_path: &Path,
|
||||
scanner_path: &Option<PathBuf>,
|
||||
output_path: &PathBuf,
|
||||
) -> Result<(), Error> {
|
||||
let mut config = cc::Build::new();
|
||||
config
|
||||
.cpp(true)
|
||||
.opt_level(2)
|
||||
.cargo_metadata(false)
|
||||
.target(BUILD_TARGET)
|
||||
.host(BUILD_TARGET);
|
||||
let compiler = config.get_compiler();
|
||||
let mut command = Command::new(compiler.path());
|
||||
for (key, value) in compiler.env() {
|
||||
command.env(key, value);
|
||||
}
|
||||
if cfg!(windows) {
|
||||
command.args(&["/nologo", "/LD", "/I"]).arg(header_path);
|
||||
if self.debug_build {
|
||||
command.arg("/Od");
|
||||
} else {
|
||||
command.arg("/O2");
|
||||
}
|
||||
command.arg(parser_path);
|
||||
if let Some(scanner_path) = scanner_path.as_ref() {
|
||||
command.arg(scanner_path);
|
||||
}
|
||||
command
|
||||
.arg("/link")
|
||||
.arg(format!("/out:{}", output_path.to_str().unwrap()));
|
||||
} else {
|
||||
command
|
||||
.arg("-shared")
|
||||
.arg("-fPIC")
|
||||
.arg("-fno-exceptions")
|
||||
.arg("-g")
|
||||
.arg("-I")
|
||||
.arg(header_path)
|
||||
.arg("-o")
|
||||
.arg(output_path);
|
||||
|
||||
if self.debug_build {
|
||||
command.arg("-O0");
|
||||
} else {
|
||||
command.arg("-O2");
|
||||
}
|
||||
|
||||
// For conditional compilation of external scanner code when
|
||||
// used internally by `tree-siteer parse` and other sub commands.
|
||||
command.arg("-DTREE_SITTER_INTERNAL_BUILD");
|
||||
|
||||
if let Some(scanner_path) = scanner_path.as_ref() {
|
||||
if scanner_path.extension() == Some("c".as_ref()) {
|
||||
command.arg("-xc").arg("-std=c99").arg(scanner_path);
|
||||
} else {
|
||||
command.arg("/O2");
|
||||
}
|
||||
command.arg(parser_path);
|
||||
if let Some(scanner_path) = scanner_path.as_ref() {
|
||||
command.arg(scanner_path);
|
||||
}
|
||||
command
|
||||
.arg("/link")
|
||||
.arg(format!("/out:{}", library_path.to_str().unwrap()));
|
||||
} else {
|
||||
command
|
||||
.arg("-shared")
|
||||
.arg("-fPIC")
|
||||
.arg("-fno-exceptions")
|
||||
.arg("-g")
|
||||
.arg("-I")
|
||||
.arg(header_path)
|
||||
.arg("-o")
|
||||
.arg(&library_path);
|
||||
}
|
||||
command.arg("-xc").arg(parser_path);
|
||||
}
|
||||
let output = command
|
||||
.output()
|
||||
.with_context(|| "Failed to execute C compiler")?;
|
||||
if !output.status.success() {
|
||||
return Err(anyhow!(
|
||||
"Parser compilation failed.\nStdout: {}\nStderr: {}",
|
||||
String::from_utf8_lossy(&output.stdout),
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
if self.debug_build {
|
||||
command.arg("-O0");
|
||||
} else {
|
||||
command.arg("-O2");
|
||||
}
|
||||
fn compile_parser_to_wasm(
|
||||
&self,
|
||||
language_name: &str,
|
||||
src_path: &Path,
|
||||
scanner_filename: Option<&Path>,
|
||||
output_path: &PathBuf,
|
||||
force_docker: bool,
|
||||
) -> Result<(), Error> {
|
||||
let emcc_bin = if cfg!(windows) { "emcc.bat" } else { "emcc" };
|
||||
let emcc_path = which(emcc_bin)
|
||||
.ok()
|
||||
.and_then(|p| Command::new(&p).output().and(Ok(p)).ok());
|
||||
|
||||
// For conditional compilation of external scanner code when
|
||||
// used internally by `tree-siteer parse` and other sub commands.
|
||||
command.arg("-DTREE_SITTER_INTERNAL_BUILD");
|
||||
let mut command;
|
||||
if emcc_path.is_some() && !force_docker {
|
||||
command = Command::new(emcc_path.unwrap());
|
||||
command.current_dir(&src_path);
|
||||
} else if Command::new("docker").output().is_ok() {
|
||||
command = Command::new("docker");
|
||||
command.args(&["run", "--rm"]);
|
||||
|
||||
if let Some(scanner_path) = scanner_path.as_ref() {
|
||||
if scanner_path.extension() == Some("c".as_ref()) {
|
||||
command.arg("-xc").arg("-std=c99").arg(scanner_path);
|
||||
} else {
|
||||
command.arg(scanner_path);
|
||||
}
|
||||
}
|
||||
command.arg("-xc").arg(parser_path);
|
||||
// Mount the parser directory as a volume
|
||||
command.args(&["--workdir", "/src"]);
|
||||
|
||||
let mut volume_string = OsString::from(&src_path);
|
||||
volume_string.push(":/src:Z");
|
||||
command.args(&[OsStr::new("--volume"), &volume_string]);
|
||||
|
||||
// Get the current user id so that files created in the docker container will have
|
||||
// the same owner.
|
||||
if cfg!(unix) {
|
||||
let user_id_output = Command::new("id")
|
||||
.arg("-u")
|
||||
.output()
|
||||
.with_context(|| "Failed to get get current user id")?;
|
||||
let user_id = String::from_utf8_lossy(&user_id_output.stdout);
|
||||
let user_id = user_id.trim();
|
||||
command.args(&["--user", user_id]);
|
||||
}
|
||||
|
||||
let output = command
|
||||
.output()
|
||||
.with_context(|| "Failed to execute C compiler")?;
|
||||
if !output.status.success() {
|
||||
return Err(anyhow!(
|
||||
"Parser compilation failed.\nStdout: {}\nStderr: {}",
|
||||
String::from_utf8_lossy(&output.stdout),
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
));
|
||||
}
|
||||
// Run `emcc` in a container using the `emscripten-slim` image
|
||||
command.args(&[EMSCRIPTEN_TAG, "emcc"]);
|
||||
} else {
|
||||
return Err(anyhow!(
|
||||
"You must have either emcc or docker on your PATH to run this command"
|
||||
));
|
||||
}
|
||||
|
||||
let library = unsafe { Library::new(&library_path) }
|
||||
.with_context(|| format!("Error opening dynamic library {:?}", &library_path))?;
|
||||
let language_fn_name = format!("tree_sitter_{}", replace_dashes_with_underscores(name));
|
||||
let language = unsafe {
|
||||
let language_fn: Symbol<unsafe extern "C" fn() -> Language> = library
|
||||
.get(language_fn_name.as_bytes())
|
||||
.with_context(|| format!("Failed to load symbol {}", language_fn_name))?;
|
||||
language_fn()
|
||||
};
|
||||
mem::forget(library);
|
||||
Ok(language)
|
||||
let output_name = "output.wasm";
|
||||
|
||||
command.args(&[
|
||||
"-o",
|
||||
output_name,
|
||||
"-Os",
|
||||
"-s",
|
||||
"WASM=1",
|
||||
"-s",
|
||||
"SIDE_MODULE=1",
|
||||
"-s",
|
||||
"TOTAL_MEMORY=33554432",
|
||||
"-s",
|
||||
"NODEJS_CATCH_EXIT=0",
|
||||
"-s",
|
||||
&format!("EXPORTED_FUNCTIONS=[\"_tree_sitter_{}\"]", language_name),
|
||||
"-fno-exceptions",
|
||||
"-I",
|
||||
".",
|
||||
]);
|
||||
|
||||
if let Some(scanner_filename) = scanner_filename {
|
||||
if scanner_filename
|
||||
.extension()
|
||||
.and_then(|ext| ext.to_str())
|
||||
.map_or(false, |ext| ["cc", "cpp"].contains(&ext))
|
||||
{
|
||||
command.arg("-xc++");
|
||||
}
|
||||
command.arg(&scanner_filename);
|
||||
}
|
||||
|
||||
command.arg("parser.c");
|
||||
|
||||
let output = command.output().context("Failed to run emcc command")?;
|
||||
if !output.status.success() {
|
||||
return Err(anyhow!(
|
||||
"emcc command failed - {}",
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
));
|
||||
}
|
||||
|
||||
fs::rename(&src_path.join(output_name), &output_path)
|
||||
.context("failed to rename wasm output file")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn highlight_config_for_injection_string<'a>(
|
||||
|
|
@ -660,6 +795,11 @@ impl Loader {
|
|||
pub fn use_debug_build(&mut self, flag: bool) {
|
||||
self.debug_build = flag;
|
||||
}
|
||||
|
||||
#[cfg(feature = "wasm")]
|
||||
pub fn use_wasm(&mut self, engine: tree_sitter::wasmtime::Engine) {
|
||||
*self.wasm_store.lock().unwrap() = Some(tree_sitter::WasmStore::new(engine))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> LanguageConfiguration<'a> {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue