Return an error from build-wasm if scanner uses unavailable functions

This commit is contained in:
Max Brunsfeld 2024-02-01 22:18:04 -08:00
parent f383b02741
commit e054de4191
7 changed files with 104 additions and 14 deletions

22
Cargo.lock generated
View file

@ -306,7 +306,7 @@ dependencies = [
"itertools",
"log",
"smallvec",
"wasmparser",
"wasmparser 0.118.1",
"wasmtime-types",
]
@ -1274,6 +1274,7 @@ dependencies = [
"tree-sitter-tests-proc-macro",
"unindent",
"walkdir",
"wasmparser 0.121.0",
"webbrowser",
"which",
]
@ -1487,6 +1488,17 @@ dependencies = [
"semver",
]
[[package]]
name = "wasmparser"
version = "0.121.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "953cf6a7606ab31382cb1caa5ae403e77ba70c7f8e12eeda167e7040d42bfda8"
dependencies = [
"bitflags 2.4.1",
"indexmap",
"semver",
]
[[package]]
name = "wasmtime"
version = "16.0.0"
@ -1506,7 +1518,7 @@ dependencies = [
"serde_derive",
"serde_json",
"target-lexicon",
"wasmparser",
"wasmparser 0.118.1",
"wasmtime-cranelift",
"wasmtime-environ",
"wasmtime-jit",
@ -1562,7 +1574,7 @@ dependencies = [
"object",
"target-lexicon",
"thiserror",
"wasmparser",
"wasmparser 0.118.1",
"wasmtime-cranelift-shared",
"wasmtime-environ",
"wasmtime-versioned-export-macros",
@ -1598,7 +1610,7 @@ dependencies = [
"serde_derive",
"target-lexicon",
"thiserror",
"wasmparser",
"wasmparser 0.118.1",
"wasmtime-types",
]
@ -1668,7 +1680,7 @@ dependencies = [
"serde",
"serde_derive",
"thiserror",
"wasmparser",
"wasmparser 0.118.1",
]
[[package]]

View file

@ -47,6 +47,7 @@ serde_derive = "1.0"
smallbitvec = "2.5.1"
tiny_http = "0.12.0"
walkdir = "2.3.3"
wasmparser = "0.121"
webbrowser = "0.8.10"
which = "4.4.0"

View file

@ -9,6 +9,25 @@ lazy_static! {
static ref ENGINE: Engine = Engine::default();
}
#[test]
fn test_wasm_stdlib_symbols() {
let symbols = tree_sitter::wasm_stdlib_symbols().collect::<Vec<_>>();
assert_eq!(
symbols,
{
let mut symbols = symbols.clone();
symbols.sort();
symbols
},
"symbols aren't sorted"
);
assert!(symbols.contains(&"malloc"));
assert!(symbols.contains(&"free"));
assert!(symbols.contains(&"memset"));
assert!(symbols.contains(&"memcpy"));
}
#[test]
fn test_load_wasm_language() {
allocations::record(|| {

View file

@ -1,7 +1,9 @@
use super::generate::parse_grammar::GrammarJSON;
use anyhow::{Context, Result};
use anyhow::{anyhow, Context, Result};
use std::{fs, path::Path};
use tree_sitter::wasm_stdlib_symbols;
use tree_sitter_loader::Loader;
use wasmparser::Parser;
pub fn load_language_wasm_file(language_dir: &Path) -> Result<(String, Vec<u8>)> {
let grammar_name = get_grammar_name(&language_dir)
@ -46,5 +48,46 @@ pub fn compile_language_to_wasm(
&output_filename,
force_docker,
)?;
// Exit with an error if the external scanner uses symbols from the
// C or C++ standard libraries that aren't available to wasm parsers.
let stdlib_symbols: Vec<_> = wasm_stdlib_symbols().collect();
let builtin_symbols = [
"__indirect_function_table",
"__memory_base",
"__table_base",
"memory",
];
let mut missing_symbols = Vec::new();
let wasm_bytes = fs::read(&output_filename)?;
let parser = Parser::new(0);
for payload in parser.parse_all(&wasm_bytes) {
if let wasmparser::Payload::ImportSection(imports) = payload? {
for import in imports {
let import = import?.name;
if !builtin_symbols.contains(&import) && !stdlib_symbols.contains(&import) {
missing_symbols.push(import);
}
}
}
}
if !missing_symbols.is_empty() {
Err(anyhow!(
concat!(
"This external scanner uses a symbol that isn't available to wasm parsers.\n",
"\n",
"Missing symbols:\n",
" {}\n",
"\n",
"Available symbols:\n",
" {}",
),
missing_symbols.join("\n "),
stdlib_symbols.join("\n ")
))?;
}
Ok(())
}

View file

@ -2,6 +2,8 @@ use std::path::{Path, PathBuf};
use std::{env, fs};
fn main() {
let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
println!("cargo:rerun-if-env-changed=TREE_SITTER_STATIC_ANALYSIS");
if env::var("TREE_SITTER_STATIC_ANALYSIS").is_ok() {
if let (Some(clang_path), Some(scan_build_path)) = (which("clang"), which("scan-build")) {
@ -18,7 +20,13 @@ fn main() {
}
#[cfg(feature = "bindgen")]
generate_bindings();
generate_bindings(&out_dir);
fs::copy(
"src/wasm/stdlib-symbols.txt",
out_dir.join("stdlib-symbols.txt"),
)
.unwrap();
let mut config = cc::Build::new();
@ -47,7 +55,7 @@ fn main() {
}
#[cfg(feature = "bindgen")]
fn generate_bindings() {
fn generate_bindings(out_dir: &PathBuf) {
const HEADER_PATH: &str = "include/tree_sitter/api.h";
println!("cargo:rerun-if-changed={}", HEADER_PATH);
@ -77,9 +85,7 @@ fn generate_bindings() {
.generate()
.expect("Failed to generate bindings");
let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
let bindings_rs = out_dir.join("bindings.rs");
bindings.write_to_file(&bindings_rs).expect(&*format!(
"Failed to write bindings into path: {bindings_rs:?}"
));

View file

@ -2706,6 +2706,15 @@ impl fmt::Display for QueryError {
}
}
pub fn wasm_stdlib_symbols() -> impl Iterator<Item = &'static str> {
const WASM_STDLIB_SYMBOLS: &'static str =
include_str!(concat!(env!("OUT_DIR"), "/stdlib-symbols.txt"));
WASM_STDLIB_SYMBOLS
.lines()
.map(|s| s.trim_matches(|c| c == '"' || c == ','))
}
extern "C" {
fn free(ptr: *mut c_void);
}

View file

@ -8,10 +8,10 @@ use std::{
};
pub use wasmtime;
#[cfg(feature = "wasm")]
pub fn test() {
// Force Cargo to include wasmtime-c-api as a dependency of this crate,
// even though it is only used by the C code.
// Force Cargo to include wasmtime-c-api as a dependency of this crate,
// even though it is only used by the C code.
#[allow(unused)]
fn _use_wasmtime() {
wasmtime_c_api::wasm_engine_new();
}