Merge pull request #3181 from tree-sitter/handle-wasm-oom

When loading languages via WASM, gracefully handle memory errors and leaks in external scanners
This commit is contained in:
Max Brunsfeld 2024-03-18 13:15:06 -07:00 committed by GitHub
commit 09b18fad5b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 1895 additions and 2463 deletions

View file

@ -81,7 +81,6 @@ fn record_alloc(ptr: *mut c_void) {
}
fn record_dealloc(ptr: *mut c_void) {
assert!(!ptr.is_null(), "Zero pointer deallocation!");
RECORDER.with(|recorder| {
if recorder.enabled.load(SeqCst) {
recorder

View file

@ -29,7 +29,71 @@ fn test_wasm_stdlib_symbols() {
}
#[test]
fn test_load_wasm_language() {
fn test_load_wasm_ruby_language() {
allocations::record(|| {
let mut store = WasmStore::new(ENGINE.clone()).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-ruby.wasm")).unwrap();
let language = store.load_language("ruby", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tree = parser.parse("class A; end", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(program (class name: (constant)))"
);
});
}
#[test]
fn test_load_wasm_html_language() {
allocations::record(|| {
let mut store = WasmStore::new(ENGINE.clone()).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-html.wasm")).unwrap();
let language = store.load_language("html", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tree = parser
.parse("<div><span></span><p></p></div>", None)
.unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(document (element (start_tag (tag_name)) (element (start_tag (tag_name)) (end_tag (tag_name))) (element (start_tag (tag_name)) (end_tag (tag_name))) (end_tag (tag_name))))"
);
});
}
#[test]
fn test_load_wasm_rust_language() {
allocations::record(|| {
let mut store = WasmStore::new(ENGINE.clone()).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap();
let language = store.load_language("rust", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tree = parser.parse("fn main() {}", None).unwrap();
assert_eq!(tree.root_node().to_sexp(), "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))");
});
}
#[test]
fn test_load_wasm_javascript_language() {
allocations::record(|| {
let mut store = WasmStore::new(ENGINE.clone()).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-javascript.wasm")).unwrap();
let language = store.load_language("javascript", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tree = parser.parse("const a = b\nconst c = d", None).unwrap();
assert_eq!(tree.root_node().to_sexp(), "(program (lexical_declaration (variable_declarator name: (identifier) value: (identifier))) (lexical_declaration (variable_declarator name: (identifier) value: (identifier))))");
});
}
#[test]
fn test_load_multiple_wasm_languages() {
allocations::record(|| {
let mut store = WasmStore::new(ENGINE.clone()).unwrap();
let mut parser = Parser::new();
@ -51,6 +115,9 @@ fn test_load_wasm_language() {
.unwrap();
let mut query_cursor = QueryCursor::new();
// First, parse with the store that originally loaded the languages.
// Then parse with a new parser and wasm store, so that the languages
// are added one-by-one, in between parses.
for mut parser in [parser, parser2] {
for _ in 0..2 {
let query_rust = Query::new(&language_rust, "(const_item) @foo").unwrap();
@ -158,3 +225,28 @@ fn test_load_wasm_errors() {
);
});
}
#[test]
fn test_wasm_oom() {
allocations::record(|| {
let mut store = WasmStore::new(ENGINE.clone()).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-html.wasm")).unwrap();
let language = store.load_language("html", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tag_name = "a-b".repeat(2 * 1024 * 1024);
let code = format!("<{tag_name}>hello world</{tag_name}>");
assert!(parser.parse(&code, None).is_none());
let tag_name = "a-b".repeat(20);
let code = format!("<{tag_name}>hello world</{tag_name}>");
parser.set_language(&language).unwrap();
let tree = parser.parse(&code, None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(document (element (start_tag (tag_name)) (text) (end_tag (tag_name))))"
);
});
}

View file

@ -67,6 +67,7 @@ pub fn compile_language_to_wasm(
"__cxa_atexit",
"abort",
"emscripten_notify_memory_growth",
"tree_sitter_debug_message",
"proc_exit",
];