Handle memory errors occurring in wasm scanners

* In WASM, use a custom, simple malloc implementation that lets us
  expicitly reset the heap with a new start location.
* When a WASM call traps or errors, propagate that as a parse failure.
* Reset the WASM heap after every parse.

Co-authored-by: Conrad <conrad@zed.dev>
This commit is contained in:
Max Brunsfeld 2024-03-13 23:34:17 -07:00
parent b19d85aca7
commit 7a9b3076ef
9 changed files with 1795 additions and 2384 deletions

View file

@ -81,7 +81,6 @@ fn record_alloc(ptr: *mut c_void) {
}
fn record_dealloc(ptr: *mut c_void) {
assert!(!ptr.is_null(), "Zero pointer deallocation!");
RECORDER.with(|recorder| {
if recorder.enabled.load(SeqCst) {
recorder

View file

@ -29,7 +29,71 @@ fn test_wasm_stdlib_symbols() {
}
#[test]
fn test_load_wasm_language() {
fn test_load_wasm_ruby_language() {
allocations::record(|| {
let mut store = WasmStore::new(ENGINE.clone()).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-ruby.wasm")).unwrap();
let language = store.load_language("ruby", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tree = parser.parse("class A; end", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(program (class name: (constant)))"
);
});
}
#[test]
fn test_load_wasm_html_language() {
allocations::record(|| {
let mut store = WasmStore::new(ENGINE.clone()).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-html.wasm")).unwrap();
let language = store.load_language("html", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tree = parser
.parse("<div><span></span><p></p></div>", None)
.unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(document (element (start_tag (tag_name)) (element (start_tag (tag_name)) (end_tag (tag_name))) (element (start_tag (tag_name)) (end_tag (tag_name))) (end_tag (tag_name))))"
);
});
}
#[test]
fn test_load_wasm_rust_language() {
allocations::record(|| {
let mut store = WasmStore::new(ENGINE.clone()).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap();
let language = store.load_language("rust", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tree = parser.parse("fn main() {}", None).unwrap();
assert_eq!(tree.root_node().to_sexp(), "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))");
});
}
#[test]
fn test_load_wasm_javascript_language() {
allocations::record(|| {
let mut store = WasmStore::new(ENGINE.clone()).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-javascript.wasm")).unwrap();
let language = store.load_language("javascript", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tree = parser.parse("const a = b\nconst c = d", None).unwrap();
assert_eq!(tree.root_node().to_sexp(), "(program (lexical_declaration (variable_declarator name: (identifier) value: (identifier))) (lexical_declaration (variable_declarator name: (identifier) value: (identifier))))");
});
}
#[test]
fn test_load_multiple_wasm_languages() {
allocations::record(|| {
let mut store = WasmStore::new(ENGINE.clone()).unwrap();
let mut parser = Parser::new();
@ -158,3 +222,28 @@ fn test_load_wasm_errors() {
);
});
}
#[test]
fn test_wasm_oom() {
allocations::record(|| {
let mut store = WasmStore::new(ENGINE.clone()).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-html.wasm")).unwrap();
let language = store.load_language("html", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tag_name = "a-b".repeat(2 * 1024 * 1024);
let code = format!("<{tag_name}>hello world</{tag_name}>");
assert!(parser.parse(&code, None).is_none());
let tag_name = "a-b".repeat(20);
let code = format!("<{tag_name}>hello world</{tag_name}>");
parser.set_language(&language).unwrap();
let tree = parser.parse(&code, None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(document (element (start_tag (tag_name)) (text) (end_tag (tag_name))))"
);
});
}