Merge pull request #1864 from tree-sitter/wasm-language

Add optional WASM feature to the native library, allowing it to run wasm-compiled parsers via wasmtime
This commit is contained in:
Max Brunsfeld 2023-11-28 12:08:47 -08:00 committed by GitHub
commit 034f0d0280
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
64 changed files with 9642 additions and 517 deletions

View file

@ -20,6 +20,10 @@ doc = false
name = "benchmark"
harness = false
[features]
# default = ["wasm"]
wasm = ["tree-sitter/wasm", "tree-sitter-loader/wasm"]
[dependencies]
ansi_term = "0.12.1"
anyhow = "1.0.72"
@ -38,8 +42,8 @@ regex = "1.9.1"
regex-syntax = "0.7.4"
rustc-hash = "1.1.0"
semver = "1.0.18"
# Due to https://github.com/serde-rs/serde/issues/2538
serde = { version = "1.0, < 1.0.172", features = ["derive"] }
serde = "1.0.188"
serde_derive = "1.0"
smallbitvec = "2.5.1"
tiny_http = "0.12.0"
walkdir = "2.3.3"

View file

@ -16,12 +16,6 @@ fn main() {
"cargo:rustc-env={}={}",
"RUST_BINDING_VERSION", rust_binding_version,
);
let emscripten_version = fs::read_to_string("emscripten-version").unwrap();
println!(
"cargo:rustc-env={}={}",
"EMSCRIPTEN_VERSION", emscripten_version,
);
}
fn web_playground_files_present() -> bool {

View file

@ -14,8 +14,7 @@ rust-version.workspace = true
[dependencies]
anyhow = "1.0"
dirs = "3.0"
# Due to https://github.com/serde-rs/serde/issues/2538
serde = { version = "1.0, < 1.0.172", features = ["derive"] }
serde = { version = "1.0", features = ["derive"] }
[dependencies.serde_json]
version = "1.0"

View file

@ -11,6 +11,9 @@ categories = ["command-line-utilities", "parsing"]
repository = "https://github.com/tree-sitter/tree-sitter"
rust-version.workspace = true
[features]
wasm = ["tree-sitter/wasm"]
[dependencies]
anyhow = "1.0"
cc = "^1.0.58"
@ -18,8 +21,8 @@ dirs = "3.0"
libloading = "0.7"
once_cell = "1.7"
regex = "1"
# Due to https://github.com/serde-rs/serde/issues/2538
serde = { version = "1.0, < 1.0.172", features = ["derive"] }
serde = { version = "1.0", features = ["derive"] }
which = "4.1.0"
[dependencies.serde_json]
version = "1.0"

View file

@ -3,4 +3,10 @@ fn main() {
"cargo:rustc-env=BUILD_TARGET={}",
std::env::var("TARGET").unwrap()
);
let emscripten_version = std::fs::read_to_string("emscripten-version").unwrap();
println!(
"cargo:rustc-env={}={}",
"EMSCRIPTEN_VERSION", emscripten_version,
);
}

View file

@ -6,6 +6,7 @@ use once_cell::unsync::OnceCell;
use regex::{Regex, RegexBuilder};
use serde::{Deserialize, Deserializer, Serialize};
use std::collections::HashMap;
use std::ffi::{OsStr, OsString};
use std::io::BufReader;
use std::ops::Range;
use std::path::{Path, PathBuf};
@ -16,6 +17,9 @@ use std::{env, fs, mem};
use tree_sitter::{Language, QueryError, QueryErrorKind};
use tree_sitter_highlight::HighlightConfiguration;
use tree_sitter_tags::{Error as TagsError, TagsConfiguration};
use which::which;
pub const EMSCRIPTEN_TAG: &'static str = concat!("emscripten/emsdk:", env!("EMSCRIPTEN_VERSION"));
#[derive(Default, Deserialize, Serialize)]
pub struct Config {
@ -105,6 +109,9 @@ pub struct Loader {
highlight_names: Box<Mutex<Vec<String>>>,
use_all_highlight_names: bool,
debug_build: bool,
#[cfg(feature = "wasm")]
wasm_store: Mutex<Option<tree_sitter::WasmStore>>,
}
unsafe impl Send for Loader {}
@ -132,6 +139,9 @@ impl Loader {
highlight_names: Box::new(Mutex::new(Vec::new())),
use_all_highlight_names: true,
debug_build: false,
#[cfg(feature = "wasm")]
wasm_store: Default::default(),
}
}
@ -319,8 +329,6 @@ impl Loader {
pub fn load_language_at_path(&self, src_path: &Path, header_path: &Path) -> Result<Language> {
let grammar_path = src_path.join("grammar.json");
let parser_path = src_path.join("parser.c");
let mut scanner_path = src_path.join("scanner.c");
#[derive(Deserialize)]
struct GrammarJSON {
@ -331,116 +339,156 @@ impl Loader {
let grammar_json: GrammarJSON = serde_json::from_reader(BufReader::new(&mut grammar_file))
.with_context(|| "Failed to parse grammar.json")?;
let scanner_path = if scanner_path.exists() {
Some(scanner_path)
} else {
scanner_path.set_extension("cc");
if scanner_path.exists() {
Some(scanner_path)
} else {
None
}
};
self.load_language_from_sources(
&grammar_json.name,
header_path,
&parser_path,
scanner_path.as_deref(),
)
self.load_language_at_path_with_name(src_path, &header_path, &grammar_json.name)
}
pub fn load_language_from_sources(
pub fn load_language_at_path_with_name(
&self,
name: &str,
src_path: &Path,
header_path: &Path,
parser_path: &Path,
scanner_path: Option<&Path>,
name: &str,
) -> Result<Language> {
let mut lib_name = name.to_string();
let language_fn_name = format!("tree_sitter_{}", replace_dashes_with_underscores(name));
if self.debug_build {
lib_name.push_str(".debug._");
}
fs::create_dir_all(&self.parser_lib_path)?;
let mut library_path = self.parser_lib_path.join(lib_name);
library_path.set_extension(DYLIB_EXTENSION);
let recompile = needs_recompile(&library_path, parser_path, scanner_path)
let parser_path = src_path.join("parser.c");
let scanner_path = self.get_scanner_path(&src_path);
#[cfg(feature = "wasm")]
if self.wasm_store.lock().unwrap().is_some() {
library_path.set_extension("wasm");
}
let recompile = needs_recompile(&library_path, &parser_path, scanner_path.as_deref())
.with_context(|| "Failed to compare source and binary timestamps")?;
if recompile {
fs::create_dir_all(&self.parser_lib_path)?;
let mut config = cc::Build::new();
config
.cpp(true)
.opt_level(2)
.cargo_metadata(false)
.target(BUILD_TARGET)
.host(BUILD_TARGET)
.flag_if_supported("-Werror=implicit-function-declaration");
let compiler = config.get_compiler();
let mut command = Command::new(compiler.path());
for (key, value) in compiler.env() {
command.env(key, value);
#[cfg(feature = "wasm")]
if let Some(wasm_store) = self.wasm_store.lock().unwrap().as_mut() {
if recompile {
self.compile_parser_to_wasm(
name,
src_path,
scanner_path
.as_ref()
.and_then(|p| p.strip_prefix(&src_path).ok()),
&library_path,
false,
)?;
}
if compiler.is_like_msvc() {
command.args(["/nologo", "/LD", "/I"]).arg(header_path);
if self.debug_build {
command.arg("/Od");
let wasm_bytes = fs::read(&library_path)?;
return Ok(wasm_store.load_language(name, &wasm_bytes)?);
}
{
if recompile {
self.compile_parser_to_dylib(
header_path,
&parser_path,
&scanner_path,
&library_path,
)?;
}
let library = unsafe { Library::new(&library_path) }
.with_context(|| format!("Error opening dynamic library {:?}", &library_path))?;
let language = unsafe {
let language_fn: Symbol<unsafe extern "C" fn() -> Language> = library
.get(language_fn_name.as_bytes())
.with_context(|| format!("Failed to load symbol {}", language_fn_name))?;
language_fn()
};
mem::forget(library);
return Ok(language);
}
}
fn compile_parser_to_dylib(
&self,
header_path: &Path,
parser_path: &Path,
scanner_path: &Option<PathBuf>,
library_path: &PathBuf,
) -> Result<(), Error> {
let mut config = cc::Build::new();
config
.cpp(true)
.opt_level(2)
.cargo_metadata(false)
.target(BUILD_TARGET)
.host(BUILD_TARGET)
.flag_if_supported("-Werror=implicit-function-declaration");
let compiler = config.get_compiler();
let mut command = Command::new(compiler.path());
for (key, value) in compiler.env() {
command.env(key, value);
}
if compiler.is_like_msvc() {
command.args(&["/nologo", "/LD", "/I"]).arg(header_path);
if self.debug_build {
command.arg("/Od");
} else {
command.arg("/O2");
}
command.arg(parser_path);
if let Some(scanner_path) = scanner_path.as_ref() {
command.arg(scanner_path);
}
command
.arg("/link")
.arg(format!("/out:{}", library_path.to_str().unwrap()));
} else {
command
.arg("-shared")
.arg("-fno-exceptions")
.arg("-g")
.arg("-I")
.arg(header_path)
.arg("-o")
.arg(&library_path);
if !cfg!(windows) {
command.arg("-fPIC");
}
if self.debug_build {
command.arg("-O0");
} else {
command.arg("-O2");
}
// For conditional compilation of external scanner code when
// used internally by `tree-siteer parse` and other sub commands.
command.arg("-DTREE_SITTER_INTERNAL_BUILD");
if let Some(scanner_path) = scanner_path.as_ref() {
if scanner_path.extension() == Some("c".as_ref()) {
command.arg("-xc").arg("-std=c99").arg(scanner_path);
} else {
command.arg("/O2");
}
command.arg(parser_path);
if let Some(scanner_path) = scanner_path.as_ref() {
command.arg(scanner_path);
}
command
.arg("/link")
.arg(format!("/out:{}", library_path.to_str().unwrap()));
} else {
command
.arg("-shared")
.arg("-fno-exceptions")
.arg("-g")
.arg("-I")
.arg(header_path)
.arg("-o")
.arg(&library_path);
if !cfg!(windows) {
command.arg("-fPIC");
}
if self.debug_build {
command.arg("-O0");
} else {
command.arg("-O2");
}
// For conditional compilation of external scanner code when
// used internally by `tree-siteer parse` and other sub commands.
command.arg("-DTREE_SITTER_INTERNAL_BUILD");
if let Some(scanner_path) = scanner_path.as_ref() {
if scanner_path.extension() == Some("c".as_ref()) {
command.arg("-xc").arg("-std=c99").arg(scanner_path);
} else {
command.arg(scanner_path);
}
}
command.arg("-xc").arg(parser_path);
}
command.arg("-xc").arg(parser_path);
}
let output = command
.output()
.with_context(|| "Failed to execute C compiler")?;
if !output.status.success() {
return Err(anyhow!(
"Parser compilation failed.\nStdout: {}\nStderr: {}",
String::from_utf8_lossy(&output.stdout),
String::from_utf8_lossy(&output.stderr)
));
}
let output = command
.output()
.with_context(|| "Failed to execute C compiler")?;
if !output.status.success() {
return Err(anyhow!(
"Parser compilation failed.\nStdout: {}\nStderr: {}",
String::from_utf8_lossy(&output.stdout),
String::from_utf8_lossy(&output.stderr)
));
}
#[cfg(any(target_os = "macos", target_os = "linux"))]
@ -473,17 +521,103 @@ impl Loader {
}
}
let library = unsafe { Library::new(&library_path) }
.with_context(|| format!("Error opening dynamic library {:?}", &library_path))?;
let language_fn_name = format!("tree_sitter_{}", replace_dashes_with_underscores(name));
let language = unsafe {
let language_fn: Symbol<unsafe extern "C" fn() -> Language> = library
.get(language_fn_name.as_bytes())
.with_context(|| format!("Failed to load symbol {}", language_fn_name))?;
language_fn()
};
mem::forget(library);
Ok(language)
Ok(())
}
pub fn compile_parser_to_wasm(
&self,
language_name: &str,
src_path: &Path,
scanner_filename: Option<&Path>,
output_path: &PathBuf,
force_docker: bool,
) -> Result<(), Error> {
let emcc_bin = if cfg!(windows) { "emcc.bat" } else { "emcc" };
let emcc_path = which(emcc_bin)
.ok()
.and_then(|p| Command::new(&p).output().and(Ok(p)).ok());
let mut command;
if emcc_path.is_some() && !force_docker {
command = Command::new(emcc_path.unwrap());
command.current_dir(&src_path);
} else if Command::new("docker").output().is_ok() {
command = Command::new("docker");
command.args(&["run", "--rm"]);
// Mount the parser directory as a volume
command.args(&["--workdir", "/src"]);
let mut volume_string = OsString::from(&src_path);
volume_string.push(":/src:Z");
command.args(&[OsStr::new("--volume"), &volume_string]);
// Get the current user id so that files created in the docker container will have
// the same owner.
if cfg!(unix) {
let user_id_output = Command::new("id")
.arg("-u")
.output()
.with_context(|| "Failed to get get current user id")?;
let user_id = String::from_utf8_lossy(&user_id_output.stdout);
let user_id = user_id.trim();
command.args(&["--user", user_id]);
}
// Run `emcc` in a container using the `emscripten-slim` image
command.args(&[EMSCRIPTEN_TAG, "emcc"]);
} else {
return Err(anyhow!(
"You must have either emcc or docker on your PATH to run this command"
));
}
let output_name = "output.wasm";
command.args(&[
"-o",
output_name,
"-Os",
"-s",
"WASM=1",
"-s",
"SIDE_MODULE=1",
"-s",
"TOTAL_MEMORY=33554432",
"-s",
"NODEJS_CATCH_EXIT=0",
"-s",
&format!("EXPORTED_FUNCTIONS=[\"_tree_sitter_{}\"]", language_name),
"-fno-exceptions",
"-fvisibility=hidden",
"-I",
".",
]);
if let Some(scanner_filename) = scanner_filename {
if scanner_filename
.extension()
.and_then(|ext| ext.to_str())
.map_or(false, |ext| ["cc", "cpp"].contains(&ext))
{
command.arg("-xc++");
}
command.arg(&scanner_filename);
}
command.arg("parser.c");
let output = command.output().context("Failed to run emcc command")?;
if !output.status.success() {
return Err(anyhow!(
"emcc command failed - {}",
String::from_utf8_lossy(&output.stderr)
));
}
fs::rename(&src_path.join(output_name), &output_path)
.context("failed to rename wasm output file")?;
Ok(())
}
pub fn highlight_config_for_injection_string<'a>(
@ -731,6 +865,22 @@ impl Loader {
pub fn use_debug_build(&mut self, flag: bool) {
self.debug_build = flag;
}
#[cfg(feature = "wasm")]
pub fn use_wasm(&mut self, engine: tree_sitter::wasmtime::Engine) {
*self.wasm_store.lock().unwrap() = Some(tree_sitter::WasmStore::new(engine).unwrap())
}
pub fn get_scanner_path(&self, src_path: &Path) -> Option<PathBuf> {
let mut path = src_path.join("scanner.c");
for extension in ["c", "cc", "cpp"] {
path.set_extension(extension);
if path.exists() {
return Some(path);
}
}
None
}
}
impl<'a> LanguageConfiguration<'a> {

View file

@ -4,14 +4,16 @@ use glob::glob;
use std::collections::HashSet;
use std::path::{Path, PathBuf};
use std::{env, fs, u64};
use tree_sitter::{ffi, Point};
use tree_sitter_cli::parse::{ParseFileOptions, ParseOutput};
use tree_sitter::{ffi, Parser, Point};
use tree_sitter_cli::{
generate, highlight, logger, parse, playground, query, tags, test, test_highlight, test_tags,
util, wasm,
generate, highlight, logger,
parse::{self, ParseFileOptions, ParseOutput},
playground, query, tags, test, test_highlight, test_tags, util, wasm,
};
use tree_sitter_config::Config;
use tree_sitter_highlight::Highlighter;
use tree_sitter_loader as loader;
use tree_sitter_tags::TagsContext;
const BUILD_VERSION: &'static str = env!("CARGO_PKG_VERSION");
const BUILD_SHA: Option<&'static str> = option_env!("BUILD_SHA");
@ -79,6 +81,9 @@ fn run() -> Result<()> {
.long("quiet")
.short("q");
let wasm_arg = Arg::with_name("wasm")
.long("wasm")
.help("compile parsers to wasm instead of native dynamic libraries");
let apply_all_captures_arg = Arg::with_name("apply-all-captures")
.help("Apply all captures to highlights")
.long("apply-all-captures");
@ -151,6 +156,7 @@ fn run() -> Result<()> {
.arg(&debug_arg)
.arg(&debug_build_arg)
.arg(&debug_graph_arg)
.arg(&wasm_arg)
.arg(Arg::with_name("output-dot").long("dot"))
.arg(Arg::with_name("output-xml").long("xml").short("x"))
.arg(
@ -242,6 +248,7 @@ fn run() -> Result<()> {
.arg(&debug_arg)
.arg(&debug_build_arg)
.arg(&debug_graph_arg)
.arg(&wasm_arg)
.arg(&apply_all_captures_arg),
)
.subcommand(
@ -392,10 +399,23 @@ fn run() -> Result<()> {
loader.use_debug_build(debug_build);
let mut parser = Parser::new();
#[cfg(feature = "wasm")]
if matches.is_present("wasm") {
let engine = tree_sitter::wasmtime::Engine::default();
parser
.set_wasm_store(tree_sitter::WasmStore::new(engine.clone()).unwrap())
.unwrap();
loader.use_wasm(engine);
}
let languages = loader.languages_at_path(&current_dir)?;
let language = languages
.first()
.ok_or_else(|| anyhow!("No language found"))?;
parser.set_language(*language)?;
let test_dir = current_dir.join("test");
// Run the corpus tests. Look for them at two paths: `test/corpus` and `corpus`.
@ -405,7 +425,7 @@ fn run() -> Result<()> {
}
if test_corpus_dir.is_dir() {
test::run_tests_at_path(
*language,
&mut parser,
&test_corpus_dir,
debug,
debug_graph,
@ -420,12 +440,22 @@ fn run() -> Result<()> {
// Run the syntax highlighting tests.
let test_highlight_dir = test_dir.join("highlight");
if test_highlight_dir.is_dir() {
test_highlight::test_highlights(&loader, &test_highlight_dir, apply_all_captures)?;
let mut highlighter = Highlighter::new();
highlighter.parser = parser;
test_highlight::test_highlights(
&loader,
&mut highlighter,
&test_highlight_dir,
apply_all_captures,
)?;
parser = highlighter.parser;
}
let test_tag_dir = test_dir.join("tags");
if test_tag_dir.is_dir() {
test_tags::test_tags(&loader, &test_tag_dir)?;
let mut tags_context = TagsContext::new();
tags_context.parser = parser;
test_tags::test_tags(&loader, &mut tags_context, &test_tag_dir)?;
}
}
@ -459,6 +489,7 @@ fn run() -> Result<()> {
.values_of("edits")
.map_or(Vec::new(), |e| e.collect());
let cancellation_flag = util::cancel_on_signal();
let mut parser = Parser::new();
if debug {
// For augmenting debug logging in external scanners
@ -467,6 +498,15 @@ fn run() -> Result<()> {
loader.use_debug_build(debug_build);
#[cfg(feature = "wasm")]
if matches.is_present("wasm") {
let engine = tree_sitter::wasmtime::Engine::default();
parser
.set_wasm_store(tree_sitter::WasmStore::new(engine.clone()).unwrap())
.unwrap();
loader.use_wasm(engine);
}
let timeout = matches
.value_of("timeout")
.map_or(0, |t| u64::from_str_radix(t, 10).unwrap());
@ -483,8 +523,12 @@ fn run() -> Result<()> {
for path in paths {
let path = Path::new(&path);
let language =
loader.select_language(path, &current_dir, matches.value_of("scope"))?;
parser
.set_language(language)
.context("incompatible language")?;
let opts = ParseFileOptions {
language,
@ -500,7 +544,7 @@ fn run() -> Result<()> {
encoding,
};
let this_file_errored = parse::parse_file_at_path(opts)?;
let this_file_errored = parse::parse_file_at_path(&mut parser, opts)?;
if should_track_stats {
stats.total_parses += 1;
@ -694,7 +738,12 @@ fn run() -> Result<()> {
("build-wasm", Some(matches)) => {
let grammar_path = current_dir.join(matches.value_of("path").unwrap_or(""));
wasm::compile_language_to_wasm(&grammar_path, matches.is_present("docker"))?;
wasm::compile_language_to_wasm(
&loader,
&grammar_path,
&current_dir,
matches.is_present("docker"),
)?;
}
("playground", Some(matches)) => {

View file

@ -52,9 +52,8 @@ pub struct ParseFileOptions<'a> {
pub encoding: Option<u32>,
}
pub fn parse_file_at_path(opts: ParseFileOptions) -> Result<bool> {
pub fn parse_file_at_path(parser: &mut Parser, opts: ParseFileOptions) -> Result<bool> {
let mut _log_session = None;
let mut parser = Parser::new();
parser.set_language(opts.language)?;
let mut source_code = fs::read(opts.path)
.with_context(|| format!("Error reading source file {:?}", opts.path))?;
@ -68,7 +67,7 @@ pub fn parse_file_at_path(opts: ParseFileOptions) -> Result<bool> {
// Render an HTML graph if `--debug-graph` was passed
if opts.debug_graph {
_log_session = Some(util::log_graphs(&mut parser, "log.html")?);
_log_session = Some(util::log_graphs(parser, "log.html")?);
}
// Log to stderr if `--debug` was passed
else if opts.debug {
@ -105,6 +104,8 @@ pub fn parse_file_at_path(opts: ParseFileOptions) -> Result<bool> {
_ => parser.parse(&source_code, None),
};
parser.stop_printing_dot_graphs();
let stdout = io::stdout();
let mut stdout = stdout.lock();

View file

@ -45,15 +45,7 @@ fn get_main_html(tree_sitter_dir: Option<&PathBuf>) -> Cow<'static, [u8]> {
pub fn serve(grammar_path: &Path, open_in_browser: bool) -> Result<()> {
let server = get_server()?;
let grammar_name = wasm::get_grammar_name(&grammar_path.join("src"))
.with_context(|| "Failed to get wasm filename")?;
let wasm_filename = format!("tree-sitter-{}.wasm", grammar_name);
let language_wasm = fs::read(grammar_path.join(&wasm_filename)).with_context(|| {
format!(
"Failed to read {}. Run `tree-sitter build-wasm` first.",
wasm_filename
)
})?;
let (grammar_name, language_wasm) = wasm::load_language_wasm_file(&grammar_path).unwrap();
let url = format!("http://{}", server.server_addr());
println!("Started playground on: {}", url);
if open_in_browser {

View file

@ -57,7 +57,7 @@ impl Default for TestEntry {
}
pub fn run_tests_at_path(
language: Language,
parser: &mut Parser,
path: &Path,
debug: bool,
debug_graph: bool,
@ -66,11 +66,9 @@ pub fn run_tests_at_path(
) -> Result<()> {
let test_entry = parse_tests(path)?;
let mut _log_session = None;
let mut parser = Parser::new();
parser.set_language(language)?;
if debug_graph {
_log_session = Some(util::log_graphs(&mut parser, "log.html")?);
_log_session = Some(util::log_graphs(parser, "log.html")?);
} else if debug {
parser.set_logger(Some(Box::new(|log_type, message| {
if log_type == LogType::Lex {
@ -83,7 +81,7 @@ pub fn run_tests_at_path(
let mut failures = Vec::new();
let mut corrected_entries = Vec::new();
run_tests(
&mut parser,
parser,
test_entry,
filter,
0,
@ -92,6 +90,8 @@ pub fn run_tests_at_path(
&mut corrected_entries,
)?;
parser.stop_printing_dot_graphs();
if failures.len() > 0 {
println!("");
@ -721,7 +721,7 @@ code
---
; Line start comment
(a
(a
; ignore this
(b)
; also ignore this

View file

@ -38,19 +38,24 @@ impl std::fmt::Display for Failure {
}
}
pub fn test_highlights(loader: &Loader, directory: &Path, apply_all_captures: bool) -> Result<()> {
pub fn test_highlights(
loader: &Loader,
highlighter: &mut Highlighter,
directory: &Path,
apply_all_captures: bool,
) -> Result<()> {
println!("syntax highlighting:");
test_highlights_indented(loader, directory, apply_all_captures, 2)
test_highlights_indented(loader, highlighter, directory, apply_all_captures, 2)
}
fn test_highlights_indented(
loader: &Loader,
highlighter: &mut Highlighter,
directory: &Path,
apply_all_captures: bool,
indent_level: usize,
) -> Result<()> {
let mut failed = false;
let mut highlighter = Highlighter::new();
for highlight_test_file in fs::read_dir(directory)? {
let highlight_test_file = highlight_test_file?;
@ -65,6 +70,7 @@ fn test_highlights_indented(
println!("{}:", test_file_name.into_string().unwrap());
if let Err(_) = test_highlights_indented(
loader,
highlighter,
&test_file_path,
apply_all_captures,
indent_level + 1,
@ -80,7 +86,7 @@ fn test_highlights_indented(
.ok_or_else(|| anyhow!("No highlighting config found for {:?}", test_file_path))?;
match test_highlight(
&loader,
&mut highlighter,
highlighter,
highlight_config,
fs::read(&test_file_path)?.as_slice(),
) {

View file

@ -38,9 +38,8 @@ impl std::fmt::Display for Failure {
}
}
pub fn test_tags(loader: &Loader, directory: &Path) -> Result<()> {
pub fn test_tags(loader: &Loader, tags_context: &mut TagsContext, directory: &Path) -> Result<()> {
let mut failed = false;
let mut tags_context = TagsContext::new();
println!("tags:");
for tag_test_file in fs::read_dir(directory)? {
@ -54,7 +53,7 @@ pub fn test_tags(loader: &Loader, directory: &Path) -> Result<()> {
.tags_config(language)?
.ok_or_else(|| anyhow!("No tags config found for {:?}", test_file_path))?;
match test_tag(
&mut tags_context,
tags_context,
tags_config,
fs::read(&test_file_path)?.as_slice(),
) {

View file

@ -8,6 +8,7 @@ lazy_static! {
fs::create_dir_all(&result).unwrap();
result
};
pub static ref WASM_DIR: PathBuf = ROOT_DIR.join("target").join("release");
pub static ref SCRATCH_DIR: PathBuf = {
// https://doc.rust-lang.org/reference/conditional-compilation.html
let vendor = if cfg!(target_vendor = "apple") {

View file

@ -72,31 +72,28 @@ pub fn get_tags_config(language_name: &str) -> TagsConfiguration {
}
pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> Language {
let parser_c_path = SCRATCH_DIR.join(&format!("{}-parser.c", name));
if !fs::read_to_string(&parser_c_path)
.map(|content| content == parser_code)
.unwrap_or(false)
{
fs::write(&parser_c_path, parser_code).unwrap();
let src_dir = SCRATCH_DIR.join("src").join(name);
fs::create_dir_all(&src_dir).unwrap();
let parser_path = src_dir.join("parser.c");
if !fs::read_to_string(&parser_path).map_or(false, |content| content == parser_code) {
fs::write(&parser_path, parser_code).unwrap();
}
let scanner_path = path.and_then(|p| {
let result = p.join("scanner.c");
if result.exists() {
Some(result)
} else {
None
if let Some(path) = path {
let scanner_path = path.join("scanner.c");
if scanner_path.exists() {
let scanner_code = fs::read_to_string(&scanner_path).unwrap();
let scanner_copy_path = src_dir.join("scanner.c");
if !fs::read_to_string(&scanner_copy_path)
.map_or(false, |content| content == scanner_code)
{
fs::write(&scanner_copy_path, scanner_code).unwrap();
}
}
});
}
TEST_LOADER
.load_language_from_sources(name, &HEADER_DIR, &parser_c_path, scanner_path.as_deref())
.load_language_at_path_with_name(&src_dir, &HEADER_DIR, name)
.unwrap()
}
pub fn get_test_grammar(name: &str) -> (String, Option<PathBuf>) {
let dir = fixtures_dir().join("test_grammars").join(name);
let grammar = fs::read_to_string(&dir.join("grammar.json")).expect(&format!(
"Can't find grammar.json for test grammar {}",
name
));
(grammar, Some(dir))
}

View file

@ -14,3 +14,6 @@ mod test_highlight_test;
mod test_tags_test;
mod text_provider_test;
mod tree_test;
#[cfg(feature = "wasm")]
mod wasm_language_test;

View file

@ -2,13 +2,15 @@ use super::helpers::{
allocations,
edits::invert_edit,
edits::ReadRecorder,
fixtures::{get_language, get_test_grammar, get_test_language},
fixtures::{get_language, get_test_language},
};
use crate::{
generate::generate_parser_for_grammar,
parse::{perform_edit, Edit},
tests::helpers::fixtures::fixtures_dir,
};
use std::{
fs,
sync::atomic::{AtomicUsize, Ordering},
thread, time,
};
@ -427,16 +429,15 @@ fn test_parsing_empty_file_with_reused_tree() {
#[test]
fn test_parsing_after_editing_tree_that_depends_on_column_values() {
let (grammar, path) = get_test_grammar("uses_current_column");
let dir = fixtures_dir()
.join("test_grammars")
.join("uses_current_column");
let grammar = fs::read_to_string(&dir.join("grammar.json")).unwrap();
let (grammar_name, parser_code) = generate_parser_for_grammar(&grammar).unwrap();
let mut parser = Parser::new();
parser
.set_language(get_test_language(
&grammar_name,
&parser_code,
path.as_ref().map(AsRef::as_ref),
))
.set_language(get_test_language(&grammar_name, &parser_code, Some(&dir)))
.unwrap();
let mut code = b"

View file

@ -0,0 +1,92 @@
use crate::tests::helpers::fixtures::WASM_DIR;
use lazy_static::lazy_static;
use std::fs;
use tree_sitter::{wasmtime::Engine, Parser, WasmError, WasmErrorKind, WasmStore};
lazy_static! {
static ref ENGINE: Engine = Engine::default();
}
#[test]
fn test_load_wasm_language() {
let mut store = WasmStore::new(ENGINE.clone()).unwrap();
let mut parser = Parser::new();
let wasm_cpp = fs::read(&WASM_DIR.join(format!("tree-sitter-cpp.wasm"))).unwrap();
let wasm_rs = fs::read(&WASM_DIR.join(format!("tree-sitter-rust.wasm"))).unwrap();
let wasm_rb = fs::read(&WASM_DIR.join(format!("tree-sitter-ruby.wasm"))).unwrap();
let wasm_typescript = fs::read(&WASM_DIR.join(format!("tree-sitter-typescript.wasm"))).unwrap();
let language_rust = store.load_language("rust", &wasm_rs).unwrap();
let language_cpp = store.load_language("cpp", &wasm_cpp).unwrap();
let language_ruby = store.load_language("ruby", &wasm_rb).unwrap();
let language_typescript = store.load_language("typescript", &wasm_typescript).unwrap();
parser.set_wasm_store(store).unwrap();
let mut parser2 = Parser::new();
parser2
.set_wasm_store(WasmStore::new(ENGINE.clone()).unwrap())
.unwrap();
for mut parser in [parser, parser2] {
for _ in 0..2 {
parser.set_language(language_cpp).unwrap();
let tree = parser.parse("A<B> c = d();", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(translation_unit (declaration type: (template_type name: (type_identifier) arguments: (template_argument_list (type_descriptor type: (type_identifier)))) declarator: (init_declarator declarator: (identifier) value: (call_expression function: (identifier) arguments: (argument_list)))))"
);
parser.set_language(language_rust).unwrap();
let tree = parser.parse("const A: B = c();", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(source_file (const_item name: (identifier) type: (type_identifier) value: (call_expression function: (identifier) arguments: (arguments))))"
);
parser.set_language(language_ruby).unwrap();
let tree = parser.parse("class A; end", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(program (class name: (constant)))"
);
parser.set_language(language_typescript).unwrap();
let tree = parser.parse("class A {}", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(program (class_declaration name: (type_identifier) body: (class_body)))"
);
}
}
}
#[test]
fn test_load_wasm_errors() {
let mut store = WasmStore::new(ENGINE.clone()).unwrap();
let wasm = fs::read(&WASM_DIR.join(format!("tree-sitter-rust.wasm"))).unwrap();
let bad_wasm = &wasm[1..];
assert_eq!(
store.load_language("rust", &bad_wasm).unwrap_err(),
WasmError {
kind: WasmErrorKind::Parse,
message: "failed to parse dylink section of wasm module".into(),
}
);
assert_eq!(
store.load_language("not_rust", &wasm).unwrap_err(),
WasmError {
kind: WasmErrorKind::Instantiate,
message: "module did not contain language function: tree_sitter_not_rust".into(),
}
);
let mut bad_wasm = wasm.clone();
bad_wasm[300..500].iter_mut().for_each(|b| *b = 0);
assert_eq!(
store.load_language("rust", &bad_wasm).unwrap_err().kind,
WasmErrorKind::Compile,
);
}

View file

@ -1,15 +1,24 @@
use super::generate::parse_grammar::GrammarJSON;
use anyhow::{anyhow, Context, Result};
use path_slash::PathExt as _;
use std::ffi::{OsStr, OsString};
use std::fs;
use std::path::Path;
use std::process::Command;
use which::which;
use anyhow::{Context, Result};
use std::{fs, path::Path};
use tree_sitter_loader::Loader;
const EMSCRIPTEN_TAG: &'static str = concat!("emscripten/emsdk:", env!("EMSCRIPTEN_VERSION"));
pub fn load_language_wasm_file(language_dir: &Path) -> Result<(String, Vec<u8>)> {
let grammar_name = get_grammar_name(&language_dir)
.with_context(|| "Failed to get wasm filename")
.unwrap();
let wasm_filename = format!("tree-sitter-{}.wasm", grammar_name);
let contents = fs::read(language_dir.join(&wasm_filename)).with_context(|| {
format!(
"Failed to read {}. Run `tree-sitter build-wasm` first.",
wasm_filename
)
})?;
Ok((grammar_name, contents))
}
pub fn get_grammar_name(src_dir: &Path) -> Result<String> {
pub fn get_grammar_name(language_dir: &Path) -> Result<String> {
let src_dir = language_dir.join("src");
let grammar_json_path = src_dir.join("grammar.json");
let grammar_json = fs::read_to_string(&grammar_json_path)
.with_context(|| format!("Failed to read grammar file {:?}", grammar_json_path))?;
@ -18,118 +27,24 @@ pub fn get_grammar_name(src_dir: &Path) -> Result<String> {
Ok(grammar.name)
}
pub fn compile_language_to_wasm(language_dir: &Path, force_docker: bool) -> Result<()> {
let src_dir = language_dir.join("src");
let grammar_name = get_grammar_name(&src_dir)?;
let output_filename = format!("tree-sitter-{}.wasm", grammar_name);
let emcc_bin = if cfg!(windows) { "emcc.bat" } else { "emcc" };
let emcc_path = which(emcc_bin)
.ok()
.and_then(|p| Command::new(&p).output().and(Ok(p)).ok());
let mut command;
if !force_docker && emcc_path.is_some() {
command = Command::new(emcc_path.unwrap());
command.current_dir(&language_dir);
} else if Command::new("docker").output().is_ok() {
command = Command::new("docker");
command.args(&["run", "--rm"]);
// Mount the parser directory as a volume
let mut volume_string;
if let (Some(parent), Some(filename)) = (language_dir.parent(), language_dir.file_name()) {
volume_string = OsString::from(parent);
volume_string.push(":/src:Z");
command.arg("--workdir");
command.arg(Path::new("/src").join(filename).to_slash_lossy().as_ref());
} else {
volume_string = OsString::from(language_dir);
volume_string.push(":/src:Z");
command.args(&["--workdir", "/src"]);
}
command.args(&[OsStr::new("--volume"), &volume_string]);
// Get the current user id so that files created in the docker container will have
// the same owner.
if cfg!(unix) {
let user_id_output = Command::new("id")
.arg("-u")
.output()
.with_context(|| "Failed to get get current user id")?;
let user_id = String::from_utf8_lossy(&user_id_output.stdout);
let user_id = user_id.trim();
command.args(&["--user", user_id]);
}
// Run `emcc` in a container using the `emscripten-slim` image
command.args(&[EMSCRIPTEN_TAG, "emcc"]);
} else {
if force_docker {
return Err(anyhow!(
"You must have docker on your PATH to run this command with --docker"
));
}
return Err(anyhow!(
"You must have either emcc or docker on your PATH to run this command"
));
}
command.args(&[
"-o",
pub fn compile_language_to_wasm(
loader: &Loader,
language_dir: &Path,
output_dir: &Path,
force_docker: bool,
) -> Result<()> {
let grammar_name = get_grammar_name(&language_dir)?;
let output_filename = output_dir.join(&format!("tree-sitter-{}.wasm", grammar_name));
let src_path = language_dir.join("src");
let scanner_path = loader.get_scanner_path(&src_path);
loader.compile_parser_to_wasm(
&grammar_name,
&src_path,
scanner_path
.as_ref()
.and_then(|p| Some(Path::new(p.file_name()?))),
&output_filename,
"-Os",
"-s",
"WASM=1",
"-s",
"SIDE_MODULE=1",
"-s",
"TOTAL_MEMORY=33554432",
"-s",
"NODEJS_CATCH_EXIT=0",
"-s",
"NODEJS_CATCH_REJECTION=0",
"-s",
&format!("EXPORTED_FUNCTIONS=[\"_tree_sitter_{}\"]", grammar_name),
"-fno-exceptions",
"-I",
"src",
]);
let src = Path::new("src");
let parser_c_path = src.join("parser.c");
let scanner_c_path = src.join("scanner.c");
let scanner_cc_path = src.join("scanner.cc");
let scanner_cpp_path = src.join("scanner.cpp");
if language_dir.join(&scanner_cc_path).exists() {
command
.arg("-xc++")
.arg(scanner_cc_path.to_slash_lossy().as_ref());
} else if language_dir.join(&scanner_cpp_path).exists() {
command
.arg("-xc++")
.arg(scanner_cpp_path.to_slash_lossy().as_ref());
} else if language_dir.join(&scanner_c_path).exists() {
command.arg(scanner_c_path.to_slash_lossy().as_ref());
}
command.arg(parser_c_path.to_slash_lossy().as_ref());
let output = command
.output()
.with_context(|| "Failed to run emcc command")?;
if !output.status.success() {
return Err(anyhow!(
"emcc command failed - {}",
String::from_utf8_lossy(&output.stderr)
));
}
// Move the created `.wasm` file into the current working directory.
fs::rename(&language_dir.join(&output_filename), &output_filename)
.with_context(|| format!("Couldn't find output file {:?}", output_filename))?;
force_docker,
)?;
Ok(())
}