From edff362bd084163e144b22368e4eb43ffba07205 Mon Sep 17 00:00:00 2001 From: Trim21 Date: Mon, 5 Jan 2026 00:32:39 +0800 Subject: [PATCH] fix(rust): implement c malloc with rust alloc for wasm32 --- crates/language/Cargo.toml | 3 - crates/language/src/language.rs | 1 - crates/language/src/lib.rs | 7 ++ crates/language/src/stdlib.rs | 132 ++++++++++++++++++++++++++++ crates/language/wasm/src/stdlib.c | 140 ++---------------------------- 5 files changed, 144 insertions(+), 139 deletions(-) create mode 100644 crates/language/src/lib.rs create mode 100644 crates/language/src/stdlib.rs diff --git a/crates/language/Cargo.toml b/crates/language/Cargo.toml index b6f5cdf8..66529d89 100644 --- a/crates/language/Cargo.toml +++ b/crates/language/Cargo.toml @@ -18,6 +18,3 @@ links = "tree-sitter-language" [lints] workspace = true - -[lib] -path = "src/language.rs" diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs index cb46b8a9..269c3806 100644 --- a/crates/language/src/language.rs +++ b/crates/language/src/language.rs @@ -1,4 +1,3 @@ -#![no_std] /// `LanguageFn` wraps a C function that returns a pointer to a tree-sitter grammar. #[repr(transparent)] #[derive(Clone, Copy)] diff --git a/crates/language/src/lib.rs b/crates/language/src/lib.rs new file mode 100644 index 00000000..a8b84cf3 --- /dev/null +++ b/crates/language/src/lib.rs @@ -0,0 +1,7 @@ +mod language; +pub use language::*; + +#[cfg(all(target_arch = "wasm32", target_os = "unknown"))] +mod stdlib; +#[cfg(all(target_arch = "wasm32", target_os = "unknown"))] +pub use stdlib::*; diff --git a/crates/language/src/stdlib.rs b/crates/language/src/stdlib.rs new file mode 100644 index 00000000..dad9dd4e --- /dev/null +++ b/crates/language/src/stdlib.rs @@ -0,0 +1,132 @@ +#![cfg(all(target_arch = "wasm32", target_os = "unknown"))] + +use std::alloc::Layout; +use std::mem::{align_of, size_of}; +use std::ptr; + +// C `malloc` must return a pointer suitably aligned for any object type. +// We use a conservative alignment that is at least pointer-sized and typically 16. + +const MALLOC_ALIGN: usize = { + let a = align_of::(); + if a > align_of::() { + a + } else { + align_of::() + } +}; + +const fn align_up(value: usize, align: usize) -> usize { + // `align` is expected to be a power of two. + (value + (align - 1)) & !(align - 1) +} + +// Pad the header so that (raw_ptr + HEADER_SIZE) is still MALLOC_ALIGN-aligned. + +const HEADER_SIZE: usize = align_up(size_of::(), MALLOC_ALIGN); + +const HEADER_ALIGN: usize = MALLOC_ALIGN; + +#[unsafe(no_mangle)] +pub extern "C" fn malloc(size: usize) -> *mut u8 { + if size == 0 { + return ptr::null_mut(); + } + + let total_size = match size.checked_add(HEADER_SIZE) { + Some(v) => v, + None => return ptr::null_mut(), + }; + + let layout = match Layout::from_size_align(total_size, HEADER_ALIGN) { + Ok(l) => l, + Err(_) => return ptr::null_mut(), + }; + + unsafe { + let raw_ptr = std::alloc::alloc(layout); + if raw_ptr.is_null() { + return ptr::null_mut(); + } + + (raw_ptr as *mut usize).write(size); + raw_ptr.add(HEADER_SIZE) + } +} + +#[unsafe(no_mangle)] +pub extern "C" fn calloc(nitems: usize, size: usize) -> *mut u8 { + let total = match nitems.checked_mul(size) { + Some(v) => v, + None => return ptr::null_mut(), + }; + + let ptr = malloc(total); + if ptr.is_null() { + return ptr; + } + + unsafe { + ptr::write_bytes(ptr, 0, total); + } + + ptr +} + +#[unsafe(no_mangle)] +pub extern "C" fn free(ptr: *mut u8) { + if ptr.is_null() { + return; + } + + unsafe { + let header_ptr = ptr.sub(HEADER_SIZE); + let size = header_ptr.cast::().read(); + let total_size = match size.checked_add(HEADER_SIZE) { + Some(v) => v, + None => return, + }; + + if let Ok(layout) = Layout::from_size_align(total_size, HEADER_ALIGN) { + std::alloc::dealloc(header_ptr, layout); + } + } +} + +#[unsafe(no_mangle)] +pub extern "C" fn realloc(ptr: *mut u8, size: usize) -> *mut u8 { + if ptr.is_null() { + return malloc(size); + } + + if size == 0 { + free(ptr); + return ptr::null_mut(); + } + + unsafe { + let header_ptr = ptr.sub(HEADER_SIZE); + let old_size = header_ptr.cast::().read(); + let old_total = match old_size.checked_add(HEADER_SIZE) { + Some(v) => v, + None => return ptr::null_mut(), + }; + let new_total = match size.checked_add(HEADER_SIZE) { + Some(v) => v, + None => return ptr::null_mut(), + }; + + let old_layout = match Layout::from_size_align(old_total, HEADER_ALIGN) { + Ok(l) => l, + Err(_) => return ptr::null_mut(), + }; + + let new_header_ptr = std::alloc::realloc(header_ptr, old_layout, new_total); + if new_header_ptr.is_null() { + return ptr::null_mut(); + } + + new_header_ptr.cast::().write(size); + new_header_ptr.add(HEADER_SIZE) + } +} diff --git a/crates/language/wasm/src/stdlib.c b/crates/language/wasm/src/stdlib.c index f50e1da9..be9c22d2 100644 --- a/crates/language/wasm/src/stdlib.c +++ b/crates/language/wasm/src/stdlib.c @@ -1,138 +1,8 @@ -// This file implements a very simple allocator for external scanners running -// in Wasm. Allocation is just bumping a static pointer and growing the heap -// as needed, and freeing is just adding the freed region to a free list. -// When additional memory is allocated, the free list is searched first. -// If there is not a suitable region in the free list, the heap is -// grown as necessary, and the allocation is made at the end of the heap. -// When the heap is reset, all allocated memory is considered freed. - #include -#include -#include -extern void tree_sitter_debug_message(const char *, size_t); +__attribute__((noreturn)) void abort(void) { __builtin_trap(); } -#define PAGESIZE 0x10000 -#define MAX_HEAP_SIZE (4 * 1024 * 1024) - -typedef struct { - size_t size; - struct Region *next; - char data[0]; -} Region; - -static Region *heap_end = NULL; -static Region *heap_start = NULL; -static Region *next = NULL; -static Region *free_list = NULL; - -// Get the region metadata for the given heap pointer. -static inline Region *region_for_ptr(void *ptr) { - return ((Region *)ptr) - 1; -} - -// Get the location of the next region after the given region, -// if the given region had the given size. -static inline Region *region_after(Region *self, size_t len) { - char *address = self->data + len; - char *aligned = (char *)((uintptr_t)(address + 3) & ~0x3); - return (Region *)aligned; -} - -static void *get_heap_end() { - return (void *)(__builtin_wasm_memory_size(0) * PAGESIZE); -} - -static int grow_heap(size_t size) { - size_t new_page_count = ((size - 1) / PAGESIZE) + 1; - return __builtin_wasm_memory_grow(0, new_page_count) != SIZE_MAX; -} - -// Clear out the heap, and move it to the given address. -void reset_heap(void *new_heap_start) { - heap_start = new_heap_start; - next = new_heap_start; - heap_end = get_heap_end(); - free_list = NULL; -} - -void *malloc(size_t size) { - if (size == 0) return NULL; - - Region *prev = NULL; - Region *curr = free_list; - while (curr != NULL) { - if (curr->size >= size) { - if (prev == NULL) { - free_list = curr->next; - } else { - prev->next = curr->next; - } - return &curr->data; - } - prev = curr; - curr = curr->next; - } - - Region *region_end = region_after(next, size); - - if (region_end > heap_end) { - if ((char *)region_end - (char *)heap_start > MAX_HEAP_SIZE) { - return NULL; - } - if (!grow_heap(size)) return NULL; - heap_end = get_heap_end(); - } - - void *result = &next->data; - next->size = size; - next = region_end; - - return result; -} - -void free(void *ptr) { - if (ptr == NULL) return; - - Region *region = region_for_ptr(ptr); - Region *region_end = region_after(region, region->size); - - // When freeing the last allocated pointer, re-use that - // pointer for the next allocation. - if (region_end == next) { - next = region; - } else { - region->next = free_list; - free_list = region; - } -} - -void *calloc(size_t count, size_t size) { - void *result = malloc(count * size); - memset(result, 0, count * size); - return result; -} - -void *realloc(void *ptr, size_t new_size) { - if (ptr == NULL) { - return malloc(new_size); - } - - Region *region = region_for_ptr(ptr); - Region *region_end = region_after(region, region->size); - - // When reallocating the last allocated region, return - // the same pointer, and skip copying the data. - if (region_end == next) { - next = region; - return malloc(new_size); - } - - void *result = malloc(new_size); - memcpy(result, ®ion->data, region->size); - return result; -} - -__attribute__((noreturn)) void abort(void) { - __builtin_trap(); -} +extern void *malloc(size_t size); +extern void free(void *ptr); +extern void *realloc(void *ptr, size_t size); +extern void *calloc(size_t nitems, size_t size);