tree-sitter/cli/src/tests/wasm_language_test.rs
Nia 32d784ec8f fix(wasm): fix alias map size computation
This fixes a crash where parsing with certain languages can lead to a crash due to how the alias map was allocated and laid out in wasm memory

(cherry picked from commit f09dc3cf46)
2025-09-22 17:19:21 -04:00

299 lines
12 KiB
Rust

use std::fs;
use streaming_iterator::StreamingIterator;
use tree_sitter::{Parser, Query, QueryCursor, WasmError, WasmErrorKind, WasmStore};
use crate::tests::helpers::{
allocations,
fixtures::{get_test_fixture_language_wasm, ENGINE, WASM_DIR},
};
#[test]
fn test_wasm_stdlib_symbols() {
let symbols = tree_sitter::wasm_stdlib_symbols().collect::<Vec<_>>();
assert_eq!(
symbols,
{
let mut symbols = symbols.clone();
symbols.sort_unstable();
symbols
},
"symbols aren't sorted"
);
assert!(symbols.contains(&"malloc"));
assert!(symbols.contains(&"free"));
assert!(symbols.contains(&"memset"));
assert!(symbols.contains(&"memcpy"));
}
#[test]
fn test_load_wasm_ruby_language() {
allocations::record(|| {
let mut store = WasmStore::new(&ENGINE).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-ruby.wasm")).unwrap();
let language = store.load_language("ruby", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tree = parser.parse("class A; end", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(program (class name: (constant)))"
);
});
}
#[test]
fn test_load_wasm_html_language() {
allocations::record(|| {
let mut store = WasmStore::new(&ENGINE).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-html.wasm")).unwrap();
let language = store.load_language("html", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tree = parser
.parse("<div><span></span><p></p></div>", None)
.unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(document (element (start_tag (tag_name)) (element (start_tag (tag_name)) (end_tag (tag_name))) (element (start_tag (tag_name)) (end_tag (tag_name))) (end_tag (tag_name))))"
);
});
}
#[test]
fn test_load_wasm_rust_language() {
allocations::record(|| {
let mut store = WasmStore::new(&ENGINE).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap();
let language = store.load_language("rust", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tree = parser.parse("fn main() {}", None).unwrap();
assert_eq!(tree.root_node().to_sexp(), "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))");
});
}
#[test]
fn test_load_wasm_javascript_language() {
allocations::record(|| {
let mut store = WasmStore::new(&ENGINE).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-javascript.wasm")).unwrap();
let language = store.load_language("javascript", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tree = parser.parse("const a = b\nconst c = d", None).unwrap();
assert_eq!(tree.root_node().to_sexp(), "(program (lexical_declaration (variable_declarator name: (identifier) value: (identifier))) (lexical_declaration (variable_declarator name: (identifier) value: (identifier))))");
});
}
#[test]
fn test_load_wasm_python_language() {
allocations::record(|| {
let mut store = WasmStore::new(&ENGINE).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-python.wasm")).unwrap();
let language = store.load_language("python", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tree = parser.parse("a = b\nc = d", None).unwrap();
assert_eq!(tree.root_node().to_sexp(), "(module (expression_statement (assignment left: (identifier) right: (identifier))) (expression_statement (assignment left: (identifier) right: (identifier))))");
});
}
#[test]
fn test_load_fixture_language_wasm() {
allocations::record(|| {
let store = WasmStore::new(&ENGINE).unwrap();
let mut parser = Parser::new();
let language = get_test_fixture_language_wasm("epsilon_external_tokens");
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tree = parser.parse("hello", None).unwrap();
assert_eq!(tree.root_node().to_sexp(), "(document (zero_width))");
});
}
#[test]
fn test_load_multiple_wasm_languages() {
allocations::record(|| {
let mut store = WasmStore::new(&ENGINE).unwrap();
let mut parser = Parser::new();
let wasm_cpp = fs::read(WASM_DIR.join("tree-sitter-cpp.wasm")).unwrap();
let wasm_rs = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap();
let wasm_rb = fs::read(WASM_DIR.join("tree-sitter-ruby.wasm")).unwrap();
let wasm_typescript = fs::read(WASM_DIR.join("tree-sitter-typescript.wasm")).unwrap();
let language_rust = store.load_language("rust", &wasm_rs).unwrap();
let language_cpp = store.load_language("cpp", &wasm_cpp).unwrap();
let language_ruby = store.load_language("ruby", &wasm_rb).unwrap();
let language_typescript = store.load_language("typescript", &wasm_typescript).unwrap();
parser.set_wasm_store(store).unwrap();
let mut parser2 = Parser::new();
parser2
.set_wasm_store(WasmStore::new(&ENGINE).unwrap())
.unwrap();
let mut query_cursor = QueryCursor::new();
// First, parse with the store that originally loaded the languages.
// Then parse with a new parser and wasm store, so that the languages
// are added one-by-one, in between parses.
for mut parser in [parser, parser2] {
for _ in 0..2 {
let query_rust = Query::new(&language_rust, "(const_item) @foo").unwrap();
let query_typescript =
Query::new(&language_typescript, "(class_declaration) @foo").unwrap();
parser.set_language(&language_cpp).unwrap();
let tree = parser.parse("A<B> c = d();", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(translation_unit (declaration type: (template_type name: (type_identifier) arguments: (template_argument_list (type_descriptor type: (type_identifier)))) declarator: (init_declarator declarator: (identifier) value: (call_expression function: (identifier) arguments: (argument_list)))))"
);
parser.set_language(&language_rust).unwrap();
let source = "const A: B = c();";
let tree = parser.parse(source, None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(source_file (const_item name: (identifier) type: (type_identifier) value: (call_expression function: (identifier) arguments: (arguments))))"
);
assert_eq!(
query_cursor
.matches(&query_rust, tree.root_node(), source.as_bytes())
.count(),
1
);
parser.set_language(&language_ruby).unwrap();
let tree = parser.parse("class A; end", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(program (class name: (constant)))"
);
parser.set_language(&language_typescript).unwrap();
let tree = parser.parse("class A {}", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(program (class_declaration name: (type_identifier) body: (class_body)))"
);
assert_eq!(
query_cursor
.matches(&query_typescript, tree.root_node(), source.as_bytes())
.count(),
1
);
}
}
});
}
#[test]
fn test_load_and_reload_wasm_language() {
allocations::record(|| {
let mut store = WasmStore::new(&ENGINE).unwrap();
let wasm_rust = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap();
let wasm_typescript = fs::read(WASM_DIR.join("tree-sitter-typescript.wasm")).unwrap();
let language_rust = store.load_language("rust", &wasm_rust).unwrap();
let language_typescript = store.load_language("typescript", &wasm_typescript).unwrap();
assert_eq!(store.language_count(), 2);
// When a language is dropped, stores can release their instances of that language.
drop(language_rust);
assert_eq!(store.language_count(), 1);
let language_rust = store.load_language("rust", &wasm_rust).unwrap();
assert_eq!(store.language_count(), 2);
drop(language_rust);
drop(language_typescript);
assert_eq!(store.language_count(), 0);
});
}
#[test]
fn test_reset_wasm_store() {
allocations::record(|| {
let mut language_store = WasmStore::new(&ENGINE).unwrap();
let wasm = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap();
let language = language_store.load_language("rust", &wasm).unwrap();
let mut parser = Parser::new();
let parser_store = WasmStore::new(&ENGINE).unwrap();
parser.set_wasm_store(parser_store).unwrap();
parser.set_language(&language).unwrap();
let tree = parser.parse("fn main() {}", None).unwrap();
assert_eq!(tree.root_node().to_sexp(), "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))");
let parser_store = WasmStore::new(&ENGINE).unwrap();
parser.set_wasm_store(parser_store).unwrap();
let tree = parser.parse("fn main() {}", None).unwrap();
assert_eq!(tree.root_node().to_sexp(), "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))");
});
}
#[test]
fn test_load_wasm_errors() {
allocations::record(|| {
let mut store = WasmStore::new(&ENGINE).unwrap();
let wasm = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap();
let bad_wasm = &wasm[1..];
assert_eq!(
store.load_language("rust", bad_wasm).unwrap_err(),
WasmError {
kind: WasmErrorKind::Parse,
message: "failed to parse dylink section of wasm module".into(),
}
);
assert_eq!(
store.load_language("not_rust", &wasm).unwrap_err(),
WasmError {
kind: WasmErrorKind::Instantiate,
message: "module did not contain language function: tree_sitter_not_rust".into(),
}
);
let mut bad_wasm = wasm.clone();
bad_wasm[300..500].iter_mut().for_each(|b| *b = 0);
assert_eq!(
store.load_language("rust", &bad_wasm).unwrap_err().kind,
WasmErrorKind::Compile,
);
});
}
#[test]
fn test_wasm_oom() {
allocations::record(|| {
let mut store = WasmStore::new(&ENGINE).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-html.wasm")).unwrap();
let language = store.load_language("html", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tag_name = "a-b".repeat(2 * 1024 * 1024);
let code = format!("<{tag_name}>hello world</{tag_name}>");
assert!(parser.parse(&code, None).is_none());
let tag_name = "a-b".repeat(20);
let code = format!("<{tag_name}>hello world</{tag_name}>");
parser.set_language(&language).unwrap();
let tree = parser.parse(&code, None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(document (element (start_tag (tag_name)) (text) (end_tag (tag_name))))"
);
});
}