Merge pull request #1864 from tree-sitter/wasm-language

Add optional WASM feature to the native library, allowing it to run wasm-compiled parsers via wasmtime
This commit is contained in:
Max Brunsfeld 2023-11-28 12:08:47 -08:00 committed by GitHub
commit 034f0d0280
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
64 changed files with 9642 additions and 517 deletions

View file

@ -4,14 +4,16 @@ use glob::glob;
use std::collections::HashSet;
use std::path::{Path, PathBuf};
use std::{env, fs, u64};
use tree_sitter::{ffi, Point};
use tree_sitter_cli::parse::{ParseFileOptions, ParseOutput};
use tree_sitter::{ffi, Parser, Point};
use tree_sitter_cli::{
generate, highlight, logger, parse, playground, query, tags, test, test_highlight, test_tags,
util, wasm,
generate, highlight, logger,
parse::{self, ParseFileOptions, ParseOutput},
playground, query, tags, test, test_highlight, test_tags, util, wasm,
};
use tree_sitter_config::Config;
use tree_sitter_highlight::Highlighter;
use tree_sitter_loader as loader;
use tree_sitter_tags::TagsContext;
const BUILD_VERSION: &'static str = env!("CARGO_PKG_VERSION");
const BUILD_SHA: Option<&'static str> = option_env!("BUILD_SHA");
@ -79,6 +81,9 @@ fn run() -> Result<()> {
.long("quiet")
.short("q");
let wasm_arg = Arg::with_name("wasm")
.long("wasm")
.help("compile parsers to wasm instead of native dynamic libraries");
let apply_all_captures_arg = Arg::with_name("apply-all-captures")
.help("Apply all captures to highlights")
.long("apply-all-captures");
@ -151,6 +156,7 @@ fn run() -> Result<()> {
.arg(&debug_arg)
.arg(&debug_build_arg)
.arg(&debug_graph_arg)
.arg(&wasm_arg)
.arg(Arg::with_name("output-dot").long("dot"))
.arg(Arg::with_name("output-xml").long("xml").short("x"))
.arg(
@ -242,6 +248,7 @@ fn run() -> Result<()> {
.arg(&debug_arg)
.arg(&debug_build_arg)
.arg(&debug_graph_arg)
.arg(&wasm_arg)
.arg(&apply_all_captures_arg),
)
.subcommand(
@ -392,10 +399,23 @@ fn run() -> Result<()> {
loader.use_debug_build(debug_build);
let mut parser = Parser::new();
#[cfg(feature = "wasm")]
if matches.is_present("wasm") {
let engine = tree_sitter::wasmtime::Engine::default();
parser
.set_wasm_store(tree_sitter::WasmStore::new(engine.clone()).unwrap())
.unwrap();
loader.use_wasm(engine);
}
let languages = loader.languages_at_path(&current_dir)?;
let language = languages
.first()
.ok_or_else(|| anyhow!("No language found"))?;
parser.set_language(*language)?;
let test_dir = current_dir.join("test");
// Run the corpus tests. Look for them at two paths: `test/corpus` and `corpus`.
@ -405,7 +425,7 @@ fn run() -> Result<()> {
}
if test_corpus_dir.is_dir() {
test::run_tests_at_path(
*language,
&mut parser,
&test_corpus_dir,
debug,
debug_graph,
@ -420,12 +440,22 @@ fn run() -> Result<()> {
// Run the syntax highlighting tests.
let test_highlight_dir = test_dir.join("highlight");
if test_highlight_dir.is_dir() {
test_highlight::test_highlights(&loader, &test_highlight_dir, apply_all_captures)?;
let mut highlighter = Highlighter::new();
highlighter.parser = parser;
test_highlight::test_highlights(
&loader,
&mut highlighter,
&test_highlight_dir,
apply_all_captures,
)?;
parser = highlighter.parser;
}
let test_tag_dir = test_dir.join("tags");
if test_tag_dir.is_dir() {
test_tags::test_tags(&loader, &test_tag_dir)?;
let mut tags_context = TagsContext::new();
tags_context.parser = parser;
test_tags::test_tags(&loader, &mut tags_context, &test_tag_dir)?;
}
}
@ -459,6 +489,7 @@ fn run() -> Result<()> {
.values_of("edits")
.map_or(Vec::new(), |e| e.collect());
let cancellation_flag = util::cancel_on_signal();
let mut parser = Parser::new();
if debug {
// For augmenting debug logging in external scanners
@ -467,6 +498,15 @@ fn run() -> Result<()> {
loader.use_debug_build(debug_build);
#[cfg(feature = "wasm")]
if matches.is_present("wasm") {
let engine = tree_sitter::wasmtime::Engine::default();
parser
.set_wasm_store(tree_sitter::WasmStore::new(engine.clone()).unwrap())
.unwrap();
loader.use_wasm(engine);
}
let timeout = matches
.value_of("timeout")
.map_or(0, |t| u64::from_str_radix(t, 10).unwrap());
@ -483,8 +523,12 @@ fn run() -> Result<()> {
for path in paths {
let path = Path::new(&path);
let language =
loader.select_language(path, &current_dir, matches.value_of("scope"))?;
parser
.set_language(language)
.context("incompatible language")?;
let opts = ParseFileOptions {
language,
@ -500,7 +544,7 @@ fn run() -> Result<()> {
encoding,
};
let this_file_errored = parse::parse_file_at_path(opts)?;
let this_file_errored = parse::parse_file_at_path(&mut parser, opts)?;
if should_track_stats {
stats.total_parses += 1;
@ -694,7 +738,12 @@ fn run() -> Result<()> {
("build-wasm", Some(matches)) => {
let grammar_path = current_dir.join(matches.value_of("path").unwrap_or(""));
wasm::compile_language_to_wasm(&grammar_path, matches.is_present("docker"))?;
wasm::compile_language_to_wasm(
&loader,
&grammar_path,
&current_dir,
matches.is_present("docker"),
)?;
}
("playground", Some(matches)) => {

View file

@ -52,9 +52,8 @@ pub struct ParseFileOptions<'a> {
pub encoding: Option<u32>,
}
pub fn parse_file_at_path(opts: ParseFileOptions) -> Result<bool> {
pub fn parse_file_at_path(parser: &mut Parser, opts: ParseFileOptions) -> Result<bool> {
let mut _log_session = None;
let mut parser = Parser::new();
parser.set_language(opts.language)?;
let mut source_code = fs::read(opts.path)
.with_context(|| format!("Error reading source file {:?}", opts.path))?;
@ -68,7 +67,7 @@ pub fn parse_file_at_path(opts: ParseFileOptions) -> Result<bool> {
// Render an HTML graph if `--debug-graph` was passed
if opts.debug_graph {
_log_session = Some(util::log_graphs(&mut parser, "log.html")?);
_log_session = Some(util::log_graphs(parser, "log.html")?);
}
// Log to stderr if `--debug` was passed
else if opts.debug {
@ -105,6 +104,8 @@ pub fn parse_file_at_path(opts: ParseFileOptions) -> Result<bool> {
_ => parser.parse(&source_code, None),
};
parser.stop_printing_dot_graphs();
let stdout = io::stdout();
let mut stdout = stdout.lock();

View file

@ -45,15 +45,7 @@ fn get_main_html(tree_sitter_dir: Option<&PathBuf>) -> Cow<'static, [u8]> {
pub fn serve(grammar_path: &Path, open_in_browser: bool) -> Result<()> {
let server = get_server()?;
let grammar_name = wasm::get_grammar_name(&grammar_path.join("src"))
.with_context(|| "Failed to get wasm filename")?;
let wasm_filename = format!("tree-sitter-{}.wasm", grammar_name);
let language_wasm = fs::read(grammar_path.join(&wasm_filename)).with_context(|| {
format!(
"Failed to read {}. Run `tree-sitter build-wasm` first.",
wasm_filename
)
})?;
let (grammar_name, language_wasm) = wasm::load_language_wasm_file(&grammar_path).unwrap();
let url = format!("http://{}", server.server_addr());
println!("Started playground on: {}", url);
if open_in_browser {

View file

@ -57,7 +57,7 @@ impl Default for TestEntry {
}
pub fn run_tests_at_path(
language: Language,
parser: &mut Parser,
path: &Path,
debug: bool,
debug_graph: bool,
@ -66,11 +66,9 @@ pub fn run_tests_at_path(
) -> Result<()> {
let test_entry = parse_tests(path)?;
let mut _log_session = None;
let mut parser = Parser::new();
parser.set_language(language)?;
if debug_graph {
_log_session = Some(util::log_graphs(&mut parser, "log.html")?);
_log_session = Some(util::log_graphs(parser, "log.html")?);
} else if debug {
parser.set_logger(Some(Box::new(|log_type, message| {
if log_type == LogType::Lex {
@ -83,7 +81,7 @@ pub fn run_tests_at_path(
let mut failures = Vec::new();
let mut corrected_entries = Vec::new();
run_tests(
&mut parser,
parser,
test_entry,
filter,
0,
@ -92,6 +90,8 @@ pub fn run_tests_at_path(
&mut corrected_entries,
)?;
parser.stop_printing_dot_graphs();
if failures.len() > 0 {
println!("");
@ -721,7 +721,7 @@ code
---
; Line start comment
(a
(a
; ignore this
(b)
; also ignore this

View file

@ -38,19 +38,24 @@ impl std::fmt::Display for Failure {
}
}
pub fn test_highlights(loader: &Loader, directory: &Path, apply_all_captures: bool) -> Result<()> {
pub fn test_highlights(
loader: &Loader,
highlighter: &mut Highlighter,
directory: &Path,
apply_all_captures: bool,
) -> Result<()> {
println!("syntax highlighting:");
test_highlights_indented(loader, directory, apply_all_captures, 2)
test_highlights_indented(loader, highlighter, directory, apply_all_captures, 2)
}
fn test_highlights_indented(
loader: &Loader,
highlighter: &mut Highlighter,
directory: &Path,
apply_all_captures: bool,
indent_level: usize,
) -> Result<()> {
let mut failed = false;
let mut highlighter = Highlighter::new();
for highlight_test_file in fs::read_dir(directory)? {
let highlight_test_file = highlight_test_file?;
@ -65,6 +70,7 @@ fn test_highlights_indented(
println!("{}:", test_file_name.into_string().unwrap());
if let Err(_) = test_highlights_indented(
loader,
highlighter,
&test_file_path,
apply_all_captures,
indent_level + 1,
@ -80,7 +86,7 @@ fn test_highlights_indented(
.ok_or_else(|| anyhow!("No highlighting config found for {:?}", test_file_path))?;
match test_highlight(
&loader,
&mut highlighter,
highlighter,
highlight_config,
fs::read(&test_file_path)?.as_slice(),
) {

View file

@ -38,9 +38,8 @@ impl std::fmt::Display for Failure {
}
}
pub fn test_tags(loader: &Loader, directory: &Path) -> Result<()> {
pub fn test_tags(loader: &Loader, tags_context: &mut TagsContext, directory: &Path) -> Result<()> {
let mut failed = false;
let mut tags_context = TagsContext::new();
println!("tags:");
for tag_test_file in fs::read_dir(directory)? {
@ -54,7 +53,7 @@ pub fn test_tags(loader: &Loader, directory: &Path) -> Result<()> {
.tags_config(language)?
.ok_or_else(|| anyhow!("No tags config found for {:?}", test_file_path))?;
match test_tag(
&mut tags_context,
tags_context,
tags_config,
fs::read(&test_file_path)?.as_slice(),
) {

View file

@ -8,6 +8,7 @@ lazy_static! {
fs::create_dir_all(&result).unwrap();
result
};
pub static ref WASM_DIR: PathBuf = ROOT_DIR.join("target").join("release");
pub static ref SCRATCH_DIR: PathBuf = {
// https://doc.rust-lang.org/reference/conditional-compilation.html
let vendor = if cfg!(target_vendor = "apple") {

View file

@ -72,31 +72,28 @@ pub fn get_tags_config(language_name: &str) -> TagsConfiguration {
}
pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> Language {
let parser_c_path = SCRATCH_DIR.join(&format!("{}-parser.c", name));
if !fs::read_to_string(&parser_c_path)
.map(|content| content == parser_code)
.unwrap_or(false)
{
fs::write(&parser_c_path, parser_code).unwrap();
let src_dir = SCRATCH_DIR.join("src").join(name);
fs::create_dir_all(&src_dir).unwrap();
let parser_path = src_dir.join("parser.c");
if !fs::read_to_string(&parser_path).map_or(false, |content| content == parser_code) {
fs::write(&parser_path, parser_code).unwrap();
}
let scanner_path = path.and_then(|p| {
let result = p.join("scanner.c");
if result.exists() {
Some(result)
} else {
None
if let Some(path) = path {
let scanner_path = path.join("scanner.c");
if scanner_path.exists() {
let scanner_code = fs::read_to_string(&scanner_path).unwrap();
let scanner_copy_path = src_dir.join("scanner.c");
if !fs::read_to_string(&scanner_copy_path)
.map_or(false, |content| content == scanner_code)
{
fs::write(&scanner_copy_path, scanner_code).unwrap();
}
}
});
}
TEST_LOADER
.load_language_from_sources(name, &HEADER_DIR, &parser_c_path, scanner_path.as_deref())
.load_language_at_path_with_name(&src_dir, &HEADER_DIR, name)
.unwrap()
}
pub fn get_test_grammar(name: &str) -> (String, Option<PathBuf>) {
let dir = fixtures_dir().join("test_grammars").join(name);
let grammar = fs::read_to_string(&dir.join("grammar.json")).expect(&format!(
"Can't find grammar.json for test grammar {}",
name
));
(grammar, Some(dir))
}

View file

@ -14,3 +14,6 @@ mod test_highlight_test;
mod test_tags_test;
mod text_provider_test;
mod tree_test;
#[cfg(feature = "wasm")]
mod wasm_language_test;

View file

@ -2,13 +2,15 @@ use super::helpers::{
allocations,
edits::invert_edit,
edits::ReadRecorder,
fixtures::{get_language, get_test_grammar, get_test_language},
fixtures::{get_language, get_test_language},
};
use crate::{
generate::generate_parser_for_grammar,
parse::{perform_edit, Edit},
tests::helpers::fixtures::fixtures_dir,
};
use std::{
fs,
sync::atomic::{AtomicUsize, Ordering},
thread, time,
};
@ -427,16 +429,15 @@ fn test_parsing_empty_file_with_reused_tree() {
#[test]
fn test_parsing_after_editing_tree_that_depends_on_column_values() {
let (grammar, path) = get_test_grammar("uses_current_column");
let dir = fixtures_dir()
.join("test_grammars")
.join("uses_current_column");
let grammar = fs::read_to_string(&dir.join("grammar.json")).unwrap();
let (grammar_name, parser_code) = generate_parser_for_grammar(&grammar).unwrap();
let mut parser = Parser::new();
parser
.set_language(get_test_language(
&grammar_name,
&parser_code,
path.as_ref().map(AsRef::as_ref),
))
.set_language(get_test_language(&grammar_name, &parser_code, Some(&dir)))
.unwrap();
let mut code = b"

View file

@ -0,0 +1,92 @@
use crate::tests::helpers::fixtures::WASM_DIR;
use lazy_static::lazy_static;
use std::fs;
use tree_sitter::{wasmtime::Engine, Parser, WasmError, WasmErrorKind, WasmStore};
lazy_static! {
static ref ENGINE: Engine = Engine::default();
}
#[test]
fn test_load_wasm_language() {
let mut store = WasmStore::new(ENGINE.clone()).unwrap();
let mut parser = Parser::new();
let wasm_cpp = fs::read(&WASM_DIR.join(format!("tree-sitter-cpp.wasm"))).unwrap();
let wasm_rs = fs::read(&WASM_DIR.join(format!("tree-sitter-rust.wasm"))).unwrap();
let wasm_rb = fs::read(&WASM_DIR.join(format!("tree-sitter-ruby.wasm"))).unwrap();
let wasm_typescript = fs::read(&WASM_DIR.join(format!("tree-sitter-typescript.wasm"))).unwrap();
let language_rust = store.load_language("rust", &wasm_rs).unwrap();
let language_cpp = store.load_language("cpp", &wasm_cpp).unwrap();
let language_ruby = store.load_language("ruby", &wasm_rb).unwrap();
let language_typescript = store.load_language("typescript", &wasm_typescript).unwrap();
parser.set_wasm_store(store).unwrap();
let mut parser2 = Parser::new();
parser2
.set_wasm_store(WasmStore::new(ENGINE.clone()).unwrap())
.unwrap();
for mut parser in [parser, parser2] {
for _ in 0..2 {
parser.set_language(language_cpp).unwrap();
let tree = parser.parse("A<B> c = d();", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(translation_unit (declaration type: (template_type name: (type_identifier) arguments: (template_argument_list (type_descriptor type: (type_identifier)))) declarator: (init_declarator declarator: (identifier) value: (call_expression function: (identifier) arguments: (argument_list)))))"
);
parser.set_language(language_rust).unwrap();
let tree = parser.parse("const A: B = c();", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(source_file (const_item name: (identifier) type: (type_identifier) value: (call_expression function: (identifier) arguments: (arguments))))"
);
parser.set_language(language_ruby).unwrap();
let tree = parser.parse("class A; end", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(program (class name: (constant)))"
);
parser.set_language(language_typescript).unwrap();
let tree = parser.parse("class A {}", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(program (class_declaration name: (type_identifier) body: (class_body)))"
);
}
}
}
#[test]
fn test_load_wasm_errors() {
let mut store = WasmStore::new(ENGINE.clone()).unwrap();
let wasm = fs::read(&WASM_DIR.join(format!("tree-sitter-rust.wasm"))).unwrap();
let bad_wasm = &wasm[1..];
assert_eq!(
store.load_language("rust", &bad_wasm).unwrap_err(),
WasmError {
kind: WasmErrorKind::Parse,
message: "failed to parse dylink section of wasm module".into(),
}
);
assert_eq!(
store.load_language("not_rust", &wasm).unwrap_err(),
WasmError {
kind: WasmErrorKind::Instantiate,
message: "module did not contain language function: tree_sitter_not_rust".into(),
}
);
let mut bad_wasm = wasm.clone();
bad_wasm[300..500].iter_mut().for_each(|b| *b = 0);
assert_eq!(
store.load_language("rust", &bad_wasm).unwrap_err().kind,
WasmErrorKind::Compile,
);
}

View file

@ -1,15 +1,24 @@
use super::generate::parse_grammar::GrammarJSON;
use anyhow::{anyhow, Context, Result};
use path_slash::PathExt as _;
use std::ffi::{OsStr, OsString};
use std::fs;
use std::path::Path;
use std::process::Command;
use which::which;
use anyhow::{Context, Result};
use std::{fs, path::Path};
use tree_sitter_loader::Loader;
const EMSCRIPTEN_TAG: &'static str = concat!("emscripten/emsdk:", env!("EMSCRIPTEN_VERSION"));
pub fn load_language_wasm_file(language_dir: &Path) -> Result<(String, Vec<u8>)> {
let grammar_name = get_grammar_name(&language_dir)
.with_context(|| "Failed to get wasm filename")
.unwrap();
let wasm_filename = format!("tree-sitter-{}.wasm", grammar_name);
let contents = fs::read(language_dir.join(&wasm_filename)).with_context(|| {
format!(
"Failed to read {}. Run `tree-sitter build-wasm` first.",
wasm_filename
)
})?;
Ok((grammar_name, contents))
}
pub fn get_grammar_name(src_dir: &Path) -> Result<String> {
pub fn get_grammar_name(language_dir: &Path) -> Result<String> {
let src_dir = language_dir.join("src");
let grammar_json_path = src_dir.join("grammar.json");
let grammar_json = fs::read_to_string(&grammar_json_path)
.with_context(|| format!("Failed to read grammar file {:?}", grammar_json_path))?;
@ -18,118 +27,24 @@ pub fn get_grammar_name(src_dir: &Path) -> Result<String> {
Ok(grammar.name)
}
pub fn compile_language_to_wasm(language_dir: &Path, force_docker: bool) -> Result<()> {
let src_dir = language_dir.join("src");
let grammar_name = get_grammar_name(&src_dir)?;
let output_filename = format!("tree-sitter-{}.wasm", grammar_name);
let emcc_bin = if cfg!(windows) { "emcc.bat" } else { "emcc" };
let emcc_path = which(emcc_bin)
.ok()
.and_then(|p| Command::new(&p).output().and(Ok(p)).ok());
let mut command;
if !force_docker && emcc_path.is_some() {
command = Command::new(emcc_path.unwrap());
command.current_dir(&language_dir);
} else if Command::new("docker").output().is_ok() {
command = Command::new("docker");
command.args(&["run", "--rm"]);
// Mount the parser directory as a volume
let mut volume_string;
if let (Some(parent), Some(filename)) = (language_dir.parent(), language_dir.file_name()) {
volume_string = OsString::from(parent);
volume_string.push(":/src:Z");
command.arg("--workdir");
command.arg(Path::new("/src").join(filename).to_slash_lossy().as_ref());
} else {
volume_string = OsString::from(language_dir);
volume_string.push(":/src:Z");
command.args(&["--workdir", "/src"]);
}
command.args(&[OsStr::new("--volume"), &volume_string]);
// Get the current user id so that files created in the docker container will have
// the same owner.
if cfg!(unix) {
let user_id_output = Command::new("id")
.arg("-u")
.output()
.with_context(|| "Failed to get get current user id")?;
let user_id = String::from_utf8_lossy(&user_id_output.stdout);
let user_id = user_id.trim();
command.args(&["--user", user_id]);
}
// Run `emcc` in a container using the `emscripten-slim` image
command.args(&[EMSCRIPTEN_TAG, "emcc"]);
} else {
if force_docker {
return Err(anyhow!(
"You must have docker on your PATH to run this command with --docker"
));
}
return Err(anyhow!(
"You must have either emcc or docker on your PATH to run this command"
));
}
command.args(&[
"-o",
pub fn compile_language_to_wasm(
loader: &Loader,
language_dir: &Path,
output_dir: &Path,
force_docker: bool,
) -> Result<()> {
let grammar_name = get_grammar_name(&language_dir)?;
let output_filename = output_dir.join(&format!("tree-sitter-{}.wasm", grammar_name));
let src_path = language_dir.join("src");
let scanner_path = loader.get_scanner_path(&src_path);
loader.compile_parser_to_wasm(
&grammar_name,
&src_path,
scanner_path
.as_ref()
.and_then(|p| Some(Path::new(p.file_name()?))),
&output_filename,
"-Os",
"-s",
"WASM=1",
"-s",
"SIDE_MODULE=1",
"-s",
"TOTAL_MEMORY=33554432",
"-s",
"NODEJS_CATCH_EXIT=0",
"-s",
"NODEJS_CATCH_REJECTION=0",
"-s",
&format!("EXPORTED_FUNCTIONS=[\"_tree_sitter_{}\"]", grammar_name),
"-fno-exceptions",
"-I",
"src",
]);
let src = Path::new("src");
let parser_c_path = src.join("parser.c");
let scanner_c_path = src.join("scanner.c");
let scanner_cc_path = src.join("scanner.cc");
let scanner_cpp_path = src.join("scanner.cpp");
if language_dir.join(&scanner_cc_path).exists() {
command
.arg("-xc++")
.arg(scanner_cc_path.to_slash_lossy().as_ref());
} else if language_dir.join(&scanner_cpp_path).exists() {
command
.arg("-xc++")
.arg(scanner_cpp_path.to_slash_lossy().as_ref());
} else if language_dir.join(&scanner_c_path).exists() {
command.arg(scanner_c_path.to_slash_lossy().as_ref());
}
command.arg(parser_c_path.to_slash_lossy().as_ref());
let output = command
.output()
.with_context(|| "Failed to run emcc command")?;
if !output.status.success() {
return Err(anyhow!(
"emcc command failed - {}",
String::from_utf8_lossy(&output.stderr)
));
}
// Move the created `.wasm` file into the current working directory.
fs::rename(&language_dir.join(&output_filename), &output_filename)
.with_context(|| format!("Couldn't find output file {:?}", output_filename))?;
force_docker,
)?;
Ok(())
}