Integrate WASM compilation into the CLI's Loader

This commit is contained in:
Max Brunsfeld 2022-09-06 22:41:52 -07:00
parent 042e6f9d57
commit d47713ee4a
15 changed files with 310 additions and 151 deletions

View file

@ -36,7 +36,7 @@ jobs:
- name: Read Emscripten version
run: |
printf 'EMSCRIPTEN_VERSION=%s\n' "$(cat cli/emscripten-version)" >> $GITHUB_ENV
printf 'EMSCRIPTEN_VERSION=%s\n' "$(cat cli/loader/emscripten-version)" >> $GITHUB_ENV
- name: Cache artifacts
id: cache

1
Cargo.lock generated
View file

@ -1514,6 +1514,7 @@ dependencies = [
"tree-sitter",
"tree-sitter-highlight",
"tree-sitter-tags",
"which",
]
[[package]]

View file

@ -56,6 +56,7 @@ path = "../highlight"
[dependencies.tree-sitter-loader]
version = "0.20"
path = "loader"
features = ["wasm"]
[dependencies.tree-sitter-tags]
version = "0.20"

View file

@ -15,12 +15,6 @@ fn main() {
"cargo:rustc-env={}={}",
"RUST_BINDING_VERSION", rust_binding_version,
);
let emscripten_version = fs::read_to_string("emscripten-version").unwrap();
println!(
"cargo:rustc-env={}={}",
"EMSCRIPTEN_VERSION", emscripten_version,
);
}
fn web_playground_files_present() -> bool {

View file

@ -10,6 +10,9 @@ keywords = ["incremental", "parsing"]
categories = ["command-line-utilities", "parsing"]
repository = "https://github.com/tree-sitter/tree-sitter"
[features]
wasm = ["tree-sitter/wasm"]
[dependencies]
anyhow = "1.0"
cc = "^1.0.58"
@ -18,6 +21,7 @@ libloading = "0.7"
once_cell = "1.7"
regex = "1"
serde = { version = "1.0.130", features = ["derive"] }
which = "4.1.0"
[dependencies.serde_json]
version = "1.0"

View file

@ -3,4 +3,10 @@ fn main() {
"cargo:rustc-env=BUILD_TARGET={}",
std::env::var("TARGET").unwrap()
);
let emscripten_version = std::fs::read_to_string("emscripten-version").unwrap();
println!(
"cargo:rustc-env={}={}",
"EMSCRIPTEN_VERSION", emscripten_version,
);
}

View file

@ -4,6 +4,7 @@ use once_cell::unsync::OnceCell;
use regex::{Regex, RegexBuilder};
use serde::{Deserialize, Deserializer, Serialize};
use std::collections::HashMap;
use std::ffi::{OsStr, OsString};
use std::io::BufReader;
use std::ops::Range;
use std::path::{Path, PathBuf};
@ -14,6 +15,9 @@ use std::{fs, mem};
use tree_sitter::{Language, QueryError, QueryErrorKind};
use tree_sitter_highlight::HighlightConfiguration;
use tree_sitter_tags::{Error as TagsError, TagsConfiguration};
use which::which;
pub const EMSCRIPTEN_TAG: &'static str = concat!("emscripten/emsdk:", env!("EMSCRIPTEN_VERSION"));
#[derive(Default, Deserialize, Serialize)]
pub struct Config {
@ -101,6 +105,9 @@ pub struct Loader {
highlight_names: Box<Mutex<Vec<String>>>,
use_all_highlight_names: bool,
debug_build: bool,
#[cfg(feature = "wasm")]
wasm_store: Mutex<Option<tree_sitter::WasmStore>>,
}
unsafe impl Send for Loader {}
@ -123,6 +130,9 @@ impl Loader {
highlight_names: Box::new(Mutex::new(Vec::new())),
use_all_highlight_names: true,
debug_build: false,
#[cfg(feature = "wasm")]
wasm_store: Default::default(),
}
}
@ -310,8 +320,6 @@ impl Loader {
pub fn load_language_at_path(&self, src_path: &Path, header_path: &Path) -> Result<Language> {
let grammar_path = src_path.join("grammar.json");
let parser_path = src_path.join("parser.c");
let mut scanner_path = src_path.join("scanner.c");
#[derive(Deserialize)]
struct GrammarJSON {
@ -322,125 +330,252 @@ impl Loader {
let grammar_json: GrammarJSON = serde_json::from_reader(BufReader::new(&mut grammar_file))
.with_context(|| "Failed to parse grammar.json")?;
let scanner_path = if scanner_path.exists() {
Some(scanner_path)
} else {
scanner_path.set_extension("cc");
if scanner_path.exists() {
Some(scanner_path)
} else {
None
}
};
self.load_language_from_sources(
&grammar_json.name,
&header_path,
&parser_path,
&scanner_path,
)
self.load_language_at_path_with_name(src_path, &header_path, &grammar_json.name)
}
pub fn load_language_from_sources(
pub fn load_language_at_path_with_name(
&self,
name: &str,
src_path: &Path,
header_path: &Path,
parser_path: &Path,
scanner_path: &Option<PathBuf>,
name: &str,
) -> Result<Language> {
let mut lib_name = name.to_string();
let language_fn_name = format!("tree_sitter_{}", replace_dashes_with_underscores(name));
if self.debug_build {
lib_name.push_str(".debug._");
}
let mut library_path = self.parser_lib_path.join(lib_name);
library_path.set_extension(DYLIB_EXTENSION);
fs::create_dir_all(&self.parser_lib_path)?;
let parser_path = src_path.join("parser.c");
let mut scanner_path = None;
let mut try_scanner_path = src_path.join("scanner.c");
for extension in ["c", "cc", "cpp"] {
try_scanner_path.set_extension(extension);
if try_scanner_path.exists() {
scanner_path = Some(try_scanner_path);
break;
}
}
let recompile = needs_recompile(&library_path, &parser_path, &scanner_path)
.with_context(|| "Failed to compare source and binary timestamps")?;
if recompile {
fs::create_dir_all(&self.parser_lib_path)?;
let mut config = cc::Build::new();
config
.cpp(true)
.opt_level(2)
.cargo_metadata(false)
.target(BUILD_TARGET)
.host(BUILD_TARGET);
let compiler = config.get_compiler();
let mut command = Command::new(compiler.path());
for (key, value) in compiler.env() {
command.env(key, value);
if let Some(wasm_store) = self.wasm_store.lock().unwrap().as_mut() {
library_path.set_extension("wasm");
eprintln!("library_path: {:?}", &library_path);
if recompile {
self.compile_parser_to_wasm(
name,
src_path,
scanner_path
.as_ref()
.and_then(|p| p.strip_prefix(&src_path).ok()),
&library_path,
false,
)?;
}
if cfg!(windows) {
command.args(&["/nologo", "/LD", "/I"]).arg(header_path);
if self.debug_build {
command.arg("/Od");
let wasm_bytes = fs::read(&library_path)?;
Ok(wasm_store.load_language(name, &wasm_bytes))
} else {
library_path.set_extension(DYLIB_EXTENSION);
if recompile {
self.compile_parser_to_dylib(
header_path,
&parser_path,
&scanner_path,
&library_path,
)?;
}
let library = unsafe { Library::new(&library_path) }
.with_context(|| format!("Error opening dynamic library {:?}", &library_path))?;
let language = unsafe {
let language_fn: Symbol<unsafe extern "C" fn() -> Language> = library
.get(language_fn_name.as_bytes())
.with_context(|| format!("Failed to load symbol {}", language_fn_name))?;
language_fn()
};
mem::forget(library);
Ok(language)
}
}
fn compile_parser_to_dylib(
&self,
header_path: &Path,
parser_path: &Path,
scanner_path: &Option<PathBuf>,
output_path: &PathBuf,
) -> Result<(), Error> {
let mut config = cc::Build::new();
config
.cpp(true)
.opt_level(2)
.cargo_metadata(false)
.target(BUILD_TARGET)
.host(BUILD_TARGET);
let compiler = config.get_compiler();
let mut command = Command::new(compiler.path());
for (key, value) in compiler.env() {
command.env(key, value);
}
if cfg!(windows) {
command.args(&["/nologo", "/LD", "/I"]).arg(header_path);
if self.debug_build {
command.arg("/Od");
} else {
command.arg("/O2");
}
command.arg(parser_path);
if let Some(scanner_path) = scanner_path.as_ref() {
command.arg(scanner_path);
}
command
.arg("/link")
.arg(format!("/out:{}", output_path.to_str().unwrap()));
} else {
command
.arg("-shared")
.arg("-fPIC")
.arg("-fno-exceptions")
.arg("-g")
.arg("-I")
.arg(header_path)
.arg("-o")
.arg(output_path);
if self.debug_build {
command.arg("-O0");
} else {
command.arg("-O2");
}
// For conditional compilation of external scanner code when
// used internally by `tree-siteer parse` and other sub commands.
command.arg("-DTREE_SITTER_INTERNAL_BUILD");
if let Some(scanner_path) = scanner_path.as_ref() {
if scanner_path.extension() == Some("c".as_ref()) {
command.arg("-xc").arg("-std=c99").arg(scanner_path);
} else {
command.arg("/O2");
}
command.arg(parser_path);
if let Some(scanner_path) = scanner_path.as_ref() {
command.arg(scanner_path);
}
command
.arg("/link")
.arg(format!("/out:{}", library_path.to_str().unwrap()));
} else {
command
.arg("-shared")
.arg("-fPIC")
.arg("-fno-exceptions")
.arg("-g")
.arg("-I")
.arg(header_path)
.arg("-o")
.arg(&library_path);
}
command.arg("-xc").arg(parser_path);
}
let output = command
.output()
.with_context(|| "Failed to execute C compiler")?;
if !output.status.success() {
return Err(anyhow!(
"Parser compilation failed.\nStdout: {}\nStderr: {}",
String::from_utf8_lossy(&output.stdout),
String::from_utf8_lossy(&output.stderr)
));
}
Ok(())
}
if self.debug_build {
command.arg("-O0");
} else {
command.arg("-O2");
}
fn compile_parser_to_wasm(
&self,
language_name: &str,
src_path: &Path,
scanner_filename: Option<&Path>,
output_path: &PathBuf,
force_docker: bool,
) -> Result<(), Error> {
let emcc_bin = if cfg!(windows) { "emcc.bat" } else { "emcc" };
let emcc_path = which(emcc_bin)
.ok()
.and_then(|p| Command::new(&p).output().and(Ok(p)).ok());
// For conditional compilation of external scanner code when
// used internally by `tree-siteer parse` and other sub commands.
command.arg("-DTREE_SITTER_INTERNAL_BUILD");
let mut command;
if emcc_path.is_some() && !force_docker {
command = Command::new(emcc_path.unwrap());
command.current_dir(&src_path);
} else if Command::new("docker").output().is_ok() {
command = Command::new("docker");
command.args(&["run", "--rm"]);
if let Some(scanner_path) = scanner_path.as_ref() {
if scanner_path.extension() == Some("c".as_ref()) {
command.arg("-xc").arg("-std=c99").arg(scanner_path);
} else {
command.arg(scanner_path);
}
}
command.arg("-xc").arg(parser_path);
// Mount the parser directory as a volume
command.args(&["--workdir", "/src"]);
let mut volume_string = OsString::from(&src_path);
volume_string.push(":/src:Z");
command.args(&[OsStr::new("--volume"), &volume_string]);
// Get the current user id so that files created in the docker container will have
// the same owner.
if cfg!(unix) {
let user_id_output = Command::new("id")
.arg("-u")
.output()
.with_context(|| "Failed to get get current user id")?;
let user_id = String::from_utf8_lossy(&user_id_output.stdout);
let user_id = user_id.trim();
command.args(&["--user", user_id]);
}
let output = command
.output()
.with_context(|| "Failed to execute C compiler")?;
if !output.status.success() {
return Err(anyhow!(
"Parser compilation failed.\nStdout: {}\nStderr: {}",
String::from_utf8_lossy(&output.stdout),
String::from_utf8_lossy(&output.stderr)
));
}
// Run `emcc` in a container using the `emscripten-slim` image
command.args(&[EMSCRIPTEN_TAG, "emcc"]);
} else {
return Err(anyhow!(
"You must have either emcc or docker on your PATH to run this command"
));
}
let library = unsafe { Library::new(&library_path) }
.with_context(|| format!("Error opening dynamic library {:?}", &library_path))?;
let language_fn_name = format!("tree_sitter_{}", replace_dashes_with_underscores(name));
let language = unsafe {
let language_fn: Symbol<unsafe extern "C" fn() -> Language> = library
.get(language_fn_name.as_bytes())
.with_context(|| format!("Failed to load symbol {}", language_fn_name))?;
language_fn()
};
mem::forget(library);
Ok(language)
let output_name = "output.wasm";
command.args(&[
"-o",
output_name,
"-Os",
"-s",
"WASM=1",
"-s",
"SIDE_MODULE=1",
"-s",
"TOTAL_MEMORY=33554432",
"-s",
"NODEJS_CATCH_EXIT=0",
"-s",
&format!("EXPORTED_FUNCTIONS=[\"_tree_sitter_{}\"]", language_name),
"-fno-exceptions",
"-I",
".",
]);
if let Some(scanner_filename) = scanner_filename {
if scanner_filename
.extension()
.and_then(|ext| ext.to_str())
.map_or(false, |ext| ["cc", "cpp"].contains(&ext))
{
command.arg("-xc++");
}
command.arg(&scanner_filename);
}
command.arg("parser.c");
let output = command.output().context("Failed to run emcc command")?;
if !output.status.success() {
return Err(anyhow!(
"emcc command failed - {}",
String::from_utf8_lossy(&output.stderr)
));
}
fs::rename(&src_path.join(output_name), &output_path)
.context("failed to rename wasm output file")?;
Ok(())
}
pub fn highlight_config_for_injection_string<'a>(
@ -660,6 +795,11 @@ impl Loader {
pub fn use_debug_build(&mut self, flag: bool) {
self.debug_build = flag;
}
#[cfg(feature = "wasm")]
pub fn use_wasm(&mut self, engine: tree_sitter::wasmtime::Engine) {
*self.wasm_store.lock().unwrap() = Some(tree_sitter::WasmStore::new(engine))
}
}
impl<'a> LanguageConfiguration<'a> {

View file

@ -3,6 +3,7 @@ use clap::{App, AppSettings, Arg, SubCommand};
use glob::glob;
use std::path::Path;
use std::{env, fs, u64};
use tree_sitter::{Parser, WasmStore};
use tree_sitter_cli::{
generate, highlight, logger, parse, playground, query, tags, test, test_highlight, test_tags,
util, wasm,
@ -359,6 +360,7 @@ fn run() -> Result<()> {
.values_of("edits")
.map_or(Vec::new(), |e| e.collect());
let cancellation_flag = util::cancel_on_stdin();
let mut parser = Parser::new();
if debug {
// For augmenting debug logging in external scanners
@ -367,6 +369,14 @@ fn run() -> Result<()> {
loader.use_debug_build(debug_build);
if wasm {
let engine = tree_sitter::wasmtime::Engine::default();
parser
.set_wasm_store(WasmStore::new(engine.clone()))
.unwrap();
loader.use_wasm(engine);
}
let timeout = matches
.value_of("timeout")
.map_or(0, |t| u64::from_str_radix(t, 10).unwrap());
@ -381,23 +391,17 @@ fn run() -> Result<()> {
let should_track_stats = matches.is_present("stat");
let mut stats = parse::Stats::default();
let mut wasm_language = None;
if wasm {
let (language_name, wasm_file) = wasm::load_language_wasm_file(&current_dir)?;
let engine = tree_sitter::wasmtime::Engine::default();
let mut context = tree_sitter::WasmStore::new(engine);
wasm_language = Some(context.load_language(&language_name, &wasm_file));
std::mem::forget(context);
}
for path in paths {
let path = Path::new(&path);
let language =
loader.select_language(path, &current_dir, matches.value_of("scope"))?;
parser
.set_language(language)
.context("incompatible language")?;
let this_file_errored = parse::parse_file_at_path(
language,
&mut parser,
path,
&edits,
max_path_length,
@ -539,7 +543,11 @@ fn run() -> Result<()> {
("build-wasm", Some(matches)) => {
let grammar_path = current_dir.join(matches.value_of("path").unwrap_or(""));
wasm::compile_language_to_wasm(&grammar_path, matches.is_present("docker"))?;
wasm::compile_language_to_wasm(
&grammar_path,
&current_dir,
matches.is_present("docker"),
)?;
}
("playground", Some(matches)) => {

View file

@ -5,7 +5,7 @@ use std::path::Path;
use std::sync::atomic::AtomicUsize;
use std::time::Instant;
use std::{fmt, fs, usize};
use tree_sitter::{InputEdit, Language, LogType, Parser, Point, Tree};
use tree_sitter::{InputEdit, LogType, Parser, Point, Tree};
#[derive(Debug)]
pub struct Edit {
@ -31,7 +31,7 @@ impl fmt::Display for Stats {
}
pub fn parse_file_at_path(
language: Language,
parser: &mut Parser,
path: &Path,
edits: &Vec<&str>,
max_path_length: usize,
@ -44,8 +44,6 @@ pub fn parse_file_at_path(
cancellation_flag: Option<&AtomicUsize>,
) -> Result<bool> {
let mut _log_session = None;
let mut parser = Parser::new();
parser.set_language(language)?;
let mut source_code =
fs::read(path).with_context(|| format!("Error reading source file {:?}", path))?;
@ -58,7 +56,7 @@ pub fn parse_file_at_path(
// Render an HTML graph if `--debug-graph` was passed
if debug_graph {
_log_session = Some(util::log_graphs(&mut parser, "log.html")?);
_log_session = Some(util::log_graphs(parser, "log.html")?);
}
// Log to stderr if `--debug` was passed
else if debug {

View file

@ -9,7 +9,7 @@ use tree_sitter_tags::TagsConfiguration;
include!("./dirs.rs");
lazy_static! {
static ref TEST_LOADER: Loader = Loader::with_parser_lib_path(SCRATCH_DIR.clone());
static ref TEST_LOADER: Loader = Loader::with_parser_lib_path(SCRATCH_DIR.join("lib"));
}
pub fn test_loader<'a>() -> &'a Loader {
@ -63,29 +63,28 @@ pub fn get_tags_config(language_name: &str) -> TagsConfiguration {
TagsConfiguration::new(language, &tags_query, &locals_query).unwrap()
}
pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> Language {
let parser_c_path = SCRATCH_DIR.join(&format!("{}-parser.c", name));
if !fs::read_to_string(&parser_c_path)
.map(|content| content == parser_code)
.unwrap_or(false)
{
fs::write(&parser_c_path, parser_code).unwrap();
pub fn get_test_language(
name: &str,
parser_code: &str,
scanner_src_path: Option<&Path>,
) -> Language {
let src_dir = SCRATCH_DIR.join("src").join(name);
fs::create_dir_all(&src_dir).unwrap();
let parser_path = src_dir.join("parser.c");
if !fs::read_to_string(&parser_path).map_or(false, |content| content == parser_code) {
fs::write(&parser_path, parser_code).unwrap();
}
let scanner_path = path.and_then(|p| {
let result = p.join("scanner.c");
if result.exists() {
Some(result)
} else {
None
if let Some(scanner_src_path) = scanner_src_path {
let scanner_code = fs::read_to_string(&scanner_src_path).unwrap();
let scanner_path = src_dir.join("scanner.c");
if !fs::read_to_string(&scanner_path).map_or(false, |content| content == scanner_code) {
fs::write(&scanner_path, scanner_code).unwrap();
}
});
}
TEST_LOADER
.load_language_from_sources(name, &HEADER_DIR, &parser_c_path, &scanner_path)
.load_language_at_path_with_name(&src_dir, &HEADER_DIR, name)
.unwrap()
}
pub fn get_test_grammar(name: &str) -> (String, Option<PathBuf>) {
let dir = fixtures_dir().join("test_grammars").join(name);
let grammar = fs::read_to_string(&dir.join("grammar.json")).unwrap();
(grammar, Some(dir))
}

View file

@ -2,13 +2,15 @@ use super::helpers::{
allocations,
edits::invert_edit,
edits::ReadRecorder,
fixtures::{get_language, get_test_grammar, get_test_language},
fixtures::{get_language, get_test_language},
};
use crate::{
generate::generate_parser_for_grammar,
parse::{perform_edit, Edit},
tests::helpers::fixtures::fixtures_dir,
};
use std::{
fs,
sync::atomic::{AtomicUsize, Ordering},
thread, time,
};
@ -421,7 +423,11 @@ fn test_parsing_empty_file_with_reused_tree() {
#[test]
fn test_parsing_after_editing_tree_that_depends_on_column_values() {
let (grammar, path) = get_test_grammar("uses_current_column");
let dir = fixtures_dir()
.join("test_grammars")
.join("uses_current_column");
let grammar = fs::read_to_string(&dir.join("grammar.json")).unwrap();
let scanner_path = dir.join("scanner.c");
let (grammar_name, parser_code) = generate_parser_for_grammar(&grammar).unwrap();
let mut parser = Parser::new();
@ -429,7 +435,7 @@ fn test_parsing_after_editing_tree_that_depends_on_column_values() {
.set_language(get_test_language(
&grammar_name,
&parser_code,
path.as_ref().map(AsRef::as_ref),
Some(&scanner_path),
))
.unwrap();

View file

@ -6,10 +6,9 @@ use std::{
path::Path,
process::Command,
};
use tree_sitter_loader::EMSCRIPTEN_TAG;
use which::which;
const EMSCRIPTEN_TAG: &'static str = concat!("emscripten/emsdk:", env!("EMSCRIPTEN_VERSION"));
pub fn load_language_wasm_file(language_dir: &Path) -> Result<(String, Vec<u8>)> {
let grammar_name = get_grammar_name(&language_dir)
.with_context(|| "Failed to get wasm filename")
@ -34,9 +33,13 @@ pub fn get_grammar_name(language_dir: &Path) -> Result<String> {
Ok(grammar.name)
}
pub fn compile_language_to_wasm(language_dir: &Path, force_docker: bool) -> Result<()> {
pub fn compile_language_to_wasm(
language_dir: &Path,
output_dir: &Path,
force_docker: bool,
) -> Result<()> {
let grammar_name = get_grammar_name(&language_dir)?;
let output_filename = format!("tree-sitter-{}.wasm", grammar_name);
let output_filename = output_dir.join(&format!("tree-sitter-{}.wasm", grammar_name));
let emcc_bin = if cfg!(windows) { "emcc.bat" } else { "emcc" };
let emcc_path = which(emcc_bin)
@ -86,9 +89,8 @@ pub fn compile_language_to_wasm(language_dir: &Path, force_docker: bool) -> Resu
));
}
command.arg("-o").arg(&output_filename);
command.args(&[
"-o",
&output_filename,
"-Os",
"-s",
"WASM=1",

View file

@ -33,7 +33,7 @@ web_dir=lib/binding_web
emscripten_flags="-O3"
minify_js=1
force_docker=0
emscripen_version=$(cat "$(dirname "$0")"/../cli/emscripten-version)
emscripen_version=$(cat "$(dirname "$0")"/../cli/loader/emscripten-version)
while [[ $# > 0 ]]; do
case "$1" in

View file

@ -2,7 +2,7 @@
set -e
EMSCRIPTEN_VERSION=$(cat "$(dirname "$0")/../cli/emscripten-version")
EMSCRIPTEN_VERSION=$(cat "$(dirname "$0")/../cli/loader/emscripten-version")
mkdir -p target
EMSDK_DIR="./target/emsdk"