feat: support compiling to wasm32-unknown-unknown

This commit is contained in:
Amaan Qureshi 2025-09-13 05:36:32 -04:00 committed by Amaan Qureshi
parent 46ea65c89b
commit 22553b3372
20 changed files with 787 additions and 12 deletions

View file

@ -389,9 +389,48 @@ pub fn generate_grammar_files(
generate_file(path, LIB_RS_TEMPLATE, language_name, &generate_opts)
})?;
missing_path(path.join("build.rs"), |path| {
generate_file(path, BUILD_RS_TEMPLATE, language_name, &generate_opts)
})?;
missing_path_else(
path.join("build.rs"),
allow_update,
|path| generate_file(path, BUILD_RS_TEMPLATE, language_name, &generate_opts),
|path| {
let replacement = indoc!{r#"
c_config.flag("-utf-8");
if std::env::var("TARGET").unwrap() == "wasm32-unknown-unknown" {
let Ok(wasm_headers) = std::env::var("DEP_TREE_SITTER_LANGUAGE_WASM_HEADERS") else {
panic!("Environment variable DEP_TREE_SITTER_LANGUAGE_WASM_HEADERS must be set by the language crate");
};
let Ok(wasm_src) =
std::env::var("DEP_TREE_SITTER_LANGUAGE_WASM_SRC").map(std::path::PathBuf::from)
else {
panic!("Environment variable DEP_TREE_SITTER_LANGUAGE_WASM_SRC must be set by the language crate");
};
c_config.include(&wasm_headers);
c_config.files([
wasm_src.join("stdio.c"),
wasm_src.join("stdlib.c"),
wasm_src.join("string.c"),
]);
}
"#};
let indented_replacement = replacement
.lines()
.map(|line| if line.is_empty() { line.to_string() } else { format!(" {line}") })
.collect::<Vec<_>>()
.join("\n");
let mut contents = fs::read_to_string(path)?;
if !contents.contains("wasm32-unknown-unknown") {
contents = contents.replace(r#" c_config.flag("-utf-8");"#, &indented_replacement);
}
write_file(path, contents)?;
Ok(())
},
)?;
missing_path_else(
repo_path.join("Cargo.toml"),

View file

@ -7,6 +7,24 @@ fn main() {
#[cfg(target_env = "msvc")]
c_config.flag("-utf-8");
if std::env::var("TARGET").unwrap() == "wasm32-unknown-unknown" {
let Ok(wasm_headers) = std::env::var("DEP_TREE_SITTER_LANGUAGE_WASM_HEADERS") else {
panic!("Environment variable DEP_TREE_SITTER_LANGUAGE_WASM_HEADERS must be set by the language crate");
};
let Ok(wasm_src) =
std::env::var("DEP_TREE_SITTER_LANGUAGE_WASM_SRC").map(std::path::PathBuf::from)
else {
panic!("Environment variable DEP_TREE_SITTER_LANGUAGE_WASM_SRC must be set by the language crate");
};
c_config.include(&wasm_headers);
c_config.files([
wasm_src.join("stdio.c"),
wasm_src.join("stdlib.c"),
wasm_src.join("string.c"),
]);
}
let parser_path = src_dir.join("parser.c");
c_config.file(&parser_path);
println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());

View file

@ -4,7 +4,7 @@ description = "The tree-sitter Language type, used by the library and by languag
version = "0.1.4"
authors.workspace = true
edition.workspace = true
rust-version = "1.76"
rust-version = "1.77"
readme = "README.md"
homepage.workspace = true
repository.workspace = true
@ -13,6 +13,9 @@ license.workspace = true
keywords.workspace = true
categories = ["api-bindings", "development-tools::ffi", "parsing"]
build = "build.rs"
links = "tree-sitter-language"
[lints]
workspace = true

13
crates/language/build.rs Normal file
View file

@ -0,0 +1,13 @@
fn main() {
if std::env::var("TARGET")
.unwrap_or_default()
.starts_with("wasm32-unknown")
{
let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap();
let wasm_headers = std::path::Path::new(&manifest_dir).join("wasm/include");
let wasm_src = std::path::Path::new(&manifest_dir).join("wasm/src");
println!("cargo::metadata=wasm-headers={}", wasm_headers.display());
println!("cargo::metadata=wasm-src={}", wasm_src.display());
}
}

View file

@ -0,0 +1,14 @@
#ifndef TREE_SITTER_WASM_ASSERT_H_
#define TREE_SITTER_WASM_ASSERT_H_
#ifdef NDEBUG
#define assert(e) ((void)0)
#else
__attribute__((noreturn)) void __assert_fail(const char *assertion, const char *file, unsigned line, const char *function) {
__builtin_trap();
}
#define assert(expression) \
((expression) ? (void)0 : __assert_fail(#expression, __FILE__, __LINE__, __func__))
#endif
#endif // TREE_SITTER_WASM_ASSERT_H_

View file

@ -0,0 +1,8 @@
#ifndef TREE_SITTER_WASM_CTYPE_H_
#define TREE_SITTER_WASM_CTYPE_H_
static inline int isprint(int c) {
return c >= 0x20 && c <= 0x7E;
}
#endif // TREE_SITTER_WASM_CTYPE_H_

View file

@ -0,0 +1,12 @@
#ifndef TREE_SITTER_WASM_ENDIAN_H_
#define TREE_SITTER_WASM_ENDIAN_H_
#define be16toh(x) __builtin_bswap16(x)
#define be32toh(x) __builtin_bswap32(x)
#define be64toh(x) __builtin_bswap64(x)
#define le16toh(x) (x)
#define le32toh(x) (x)
#define le64toh(x) (x)
#endif // TREE_SITTER_WASM_ENDIAN_H_

View file

@ -0,0 +1,8 @@
#ifndef TREE_SITTER_WASM_INTTYPES_H_
#define TREE_SITTER_WASM_INTTYPES_H_
// https://github.com/llvm/llvm-project/blob/0c3cf200f5b918fb5c1114e9f1764c2d54d1779b/libc/include/llvm-libc-macros/inttypes-macros.h#L209
#define PRId32 "d"
#endif // TREE_SITTER_WASM_INTTYPES_H_

View file

@ -0,0 +1,40 @@
#ifndef TREE_SITTER_WASM_STDINT_H_
#define TREE_SITTER_WASM_STDINT_H_
// https://github.com/llvm/llvm-project/blob/0c3cf200f5b918fb5c1114e9f1764c2d54d1779b/clang/test/Preprocessor/init.c#L1672
typedef signed char int8_t;
typedef short int16_t;
typedef int int32_t;
typedef long long int int64_t;
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
typedef long long unsigned int uint64_t;
typedef long unsigned int size_t;
typedef long unsigned int uintptr_t;
#define UINT16_MAX 65535
#define UINT32_MAX 4294967295U
#if defined(__wasm32__)
#define SIZE_MAX 4294967295UL
#elif defined(__wasm64__)
#define SIZE_MAX 18446744073709551615UL
#endif
#endif // TREE_SITTER_WASM_STDINT_H_

View file

@ -0,0 +1,36 @@
#ifndef TREE_SITTER_WASM_STDIO_H_
#define TREE_SITTER_WASM_STDIO_H_
#include <stdbool.h>
#include <stdint.h>
typedef struct FILE FILE;
typedef __builtin_va_list va_list;
#define va_start(ap, last) __builtin_va_start(ap, last)
#define va_end(ap) __builtin_va_end(ap)
#define va_arg(ap, type) __builtin_va_arg(ap, type)
#define stdout ((FILE *)0)
#define stderr ((FILE *)1)
#define stdin ((FILE *)2)
int fclose(FILE *stream);
FILE *fdopen(int fd, const char *mode);
int fputc(int c, FILE *stream);
int fputs(const char *restrict s, FILE *restrict stream);
size_t fwrite(const void *restrict buffer, size_t size, size_t nmemb, FILE *restrict stream);
int fprintf(FILE *restrict stream, const char *restrict format, ...);
int snprintf(char *restrict buffer, size_t buffsz, const char *restrict format, ...);
int vsnprintf(char *restrict buffer, size_t buffsz, const char *restrict format, va_list vlist);
#endif // TREE_SITTER_WASM_STDIO_H_

View file

@ -0,0 +1,15 @@
#ifndef TREE_SITTER_WASM_STDLIB_H_
#define TREE_SITTER_WASM_STDLIB_H_
#include <stdint.h>
#define NULL ((void*)0)
void* malloc(size_t);
void* calloc(size_t, size_t);
void free(void*);
void* realloc(void*, size_t);
__attribute__((noreturn)) void abort(void);
#endif // TREE_SITTER_WASM_STDLIB_H_

View file

@ -0,0 +1,16 @@
#ifndef TREE_SITTER_WASM_STRING_H_
#define TREE_SITTER_WASM_STRING_H_
#include <stdint.h>
int memcmp(const void *lhs, const void *rhs, size_t count);
void *memcpy(void *restrict dst, const void *restrict src, size_t size);
void *memmove(void *dst, const void *src, size_t count);
void *memset(void *dst, int value, size_t count);
int strncmp(const char *left, const char *right, size_t n);
#endif // TREE_SITTER_WASM_STRING_H_

View file

@ -0,0 +1,168 @@
#ifndef TREE_SITTER_WASM_WCTYPE_H_
#define TREE_SITTER_WASM_WCTYPE_H_
typedef int wint_t;
static inline bool iswalpha(wint_t wch) {
switch (wch) {
case L'a':
case L'b':
case L'c':
case L'd':
case L'e':
case L'f':
case L'g':
case L'h':
case L'i':
case L'j':
case L'k':
case L'l':
case L'm':
case L'n':
case L'o':
case L'p':
case L'q':
case L'r':
case L's':
case L't':
case L'u':
case L'v':
case L'w':
case L'x':
case L'y':
case L'z':
case L'A':
case L'B':
case L'C':
case L'D':
case L'E':
case L'F':
case L'G':
case L'H':
case L'I':
case L'J':
case L'K':
case L'L':
case L'M':
case L'N':
case L'O':
case L'P':
case L'Q':
case L'R':
case L'S':
case L'T':
case L'U':
case L'V':
case L'W':
case L'X':
case L'Y':
case L'Z':
return true;
default:
return false;
}
}
static inline bool iswdigit(wint_t wch) {
switch (wch) {
case L'0':
case L'1':
case L'2':
case L'3':
case L'4':
case L'5':
case L'6':
case L'7':
case L'8':
case L'9':
return true;
default:
return false;
}
}
static inline bool iswalnum(wint_t wch) {
switch (wch) {
case L'a':
case L'b':
case L'c':
case L'd':
case L'e':
case L'f':
case L'g':
case L'h':
case L'i':
case L'j':
case L'k':
case L'l':
case L'm':
case L'n':
case L'o':
case L'p':
case L'q':
case L'r':
case L's':
case L't':
case L'u':
case L'v':
case L'w':
case L'x':
case L'y':
case L'z':
case L'A':
case L'B':
case L'C':
case L'D':
case L'E':
case L'F':
case L'G':
case L'H':
case L'I':
case L'J':
case L'K':
case L'L':
case L'M':
case L'N':
case L'O':
case L'P':
case L'Q':
case L'R':
case L'S':
case L'T':
case L'U':
case L'V':
case L'W':
case L'X':
case L'Y':
case L'Z':
case L'0':
case L'1':
case L'2':
case L'3':
case L'4':
case L'5':
case L'6':
case L'7':
case L'8':
case L'9':
return true;
default:
return false;
}
}
static inline bool iswspace(wint_t wch) {
switch (wch) {
case L' ':
case L'\t':
case L'\n':
case L'\v':
case L'\f':
case L'\r':
return true;
default:
return false;
}
}
#endif // TREE_SITTER_WASM_WCTYPE_H_

View file

@ -0,0 +1,304 @@
#include <stdio.h>
typedef struct {
bool left_justify; // -
bool zero_pad; // 0
bool show_sign; // +
bool space_prefix; // ' '
bool alternate_form; // #
} format_flags_t;
static const char* parse_format_spec(
const char *format,
int *width,
int *precision,
format_flags_t *flags
) {
*width = 0;
*precision = -1;
flags->left_justify = false;
flags->zero_pad = false;
flags->show_sign = false;
flags->space_prefix = false;
flags->alternate_form = false;
const char *p = format;
// Parse flags
while (*p == '-' || *p == '+' || *p == ' ' || *p == '#' || *p == '0') {
switch (*p) {
case '-': flags->left_justify = true; break;
case '0': flags->zero_pad = true; break;
case '+': flags->show_sign = true; break;
case ' ': flags->space_prefix = true; break;
case '#': flags->alternate_form = true; break;
}
p++;
}
// width
while (*p >= '0' && *p <= '9') {
*width = (*width * 10) + (*p - '0');
p++;
}
// precision
if (*p == '.') {
p++;
*precision = 0;
while (*p >= '0' && *p <= '9') {
*precision = (*precision * 10) + (*p - '0');
p++;
}
}
return p;
}
static int int_to_str(
long long value,
char *buffer,
int base,
bool is_signed,
bool uppercase
) {
if (base < 2 || base > 16) return 0;
const char *digits = uppercase ? "0123456789ABCDEF" : "0123456789abcdef";
char temp[32];
int i = 0, len = 0;
bool is_negative = false;
if (value == 0) {
buffer[0] = '0';
buffer[1] = '\0';
return 1;
}
if (is_signed && value < 0 && base == 10) {
is_negative = true;
value = -value;
}
unsigned long long uval = (unsigned long long)value;
while (uval > 0) {
temp[i++] = digits[uval % base];
uval /= base;
}
if (is_negative) {
buffer[len++] = '-';
}
while (i > 0) {
buffer[len++] = temp[--i];
}
buffer[len] = '\0';
return len;
}
static int ptr_to_str(void *ptr, char *buffer) {
buffer[0] = '0';
buffer[1] = 'x';
int len = int_to_str((uintptr_t)ptr, buffer + 2, 16, 0, 0);
return 2 + len;
}
size_t strlen(const char *str) {
const char *s = str;
while (*s) s++;
return s - str;
}
char *strncpy(char *dest, const char *src, size_t n) {
char *d = dest;
const char *s = src;
while (n-- && (*d++ = *s++));
if (n == (size_t)-1) *d = '\0';
return dest;
}
static int write_formatted_to_buffer(
char *buffer,
size_t buffer_size,
size_t *pos,
const char *str,
int width,
const format_flags_t *flags
) {
int len = strlen(str);
int written = 0;
int pad_len = (width > len) ? (width - len) : 0;
int zero_pad = flags->zero_pad && !flags->left_justify;
if (!flags->left_justify && pad_len > 0) {
char pad_char = zero_pad ? '0' : ' ';
for (int i = 0; i < pad_len && *pos < buffer_size - 1; i++) {
buffer[(*pos)++] = pad_char;
written++;
}
}
for (int i = 0; i < len && *pos < buffer_size - 1; i++) {
buffer[(*pos)++] = str[i];
written++;
}
if (flags->left_justify && pad_len > 0) {
for (int i = 0; i < pad_len && *pos < buffer_size - 1; i++) {
buffer[(*pos)++] = ' ';
written++;
}
}
return written;
}
static int vsnprintf_impl(char *buffer, size_t buffsz, const char *format, va_list args) {
if (!buffer || buffsz == 0 || !format) return -1;
size_t pos = 0;
int total_chars = 0;
const char *p = format;
while (*p) {
if (*p == '%') {
p++;
if (*p == '%') {
if (pos < buffsz - 1) buffer[pos++] = '%';
total_chars++;
p++;
continue;
}
int width, precision;
format_flags_t flags;
p = parse_format_spec(p, &width, &precision, &flags);
char temp_buf[64];
const char *output_str = temp_buf;
switch (*p) {
case 's': {
const char *str = va_arg(args, const char*);
if (!str) str = "(null)";
int str_len = strlen(str);
if (precision >= 0 && str_len > precision) {
strncpy(temp_buf, str, precision);
temp_buf[precision] = '\0';
output_str = temp_buf;
} else {
output_str = str;
}
break;
}
case 'd':
case 'i': {
int value = va_arg(args, int);
int_to_str(value, temp_buf, 10, true, false);
break;
}
case 'u': {
unsigned int value = va_arg(args, unsigned int);
int_to_str(value, temp_buf, 10, false, false);
break;
}
case 'x': {
unsigned int value = va_arg(args, unsigned int);
int_to_str(value, temp_buf, 16, false, false);
break;
}
case 'X': {
unsigned int value = va_arg(args, unsigned int);
int_to_str(value, temp_buf, 16, false, true);
break;
}
case 'p': {
void *ptr = va_arg(args, void*);
ptr_to_str(ptr, temp_buf);
break;
}
case 'c': {
int c = va_arg(args, int);
temp_buf[0] = (char)c;
temp_buf[1] = '\0';
break;
}
case 'z': {
if (*(p + 1) == 'u') {
size_t value = va_arg(args, size_t);
int_to_str(value, temp_buf, 10, false, false);
p++;
} else {
temp_buf[0] = 'z';
temp_buf[1] = '\0';
}
break;
}
default:
temp_buf[0] = '%';
temp_buf[1] = *p;
temp_buf[2] = '\0';
break;
}
int str_len = strlen(output_str);
int formatted_len = (width > str_len) ? width : str_len;
total_chars += formatted_len;
if (pos < buffsz - 1) {
write_formatted_to_buffer(buffer, buffsz, &pos, output_str, width, &flags);
}
} else {
if (pos < buffsz - 1) buffer[pos++] = *p;
total_chars++;
}
p++;
}
if (buffsz > 0) buffer[pos < buffsz ? pos : buffsz - 1] = '\0';
return total_chars;
}
int snprintf(char *restrict buffer, size_t buffsz, const char *restrict format, ...) {
if (!buffer || buffsz == 0 || !format) return -1;
va_list args;
va_start(args, format);
int result = vsnprintf_impl(buffer, buffsz, format, args);
va_end(args);
return result;
}
int vsnprintf(char *restrict buffer, size_t buffsz, const char *restrict format, va_list vlist) {
return vsnprintf_impl(buffer, buffsz, format, vlist);
}
int fclose(FILE *stream) {
return 0;
}
FILE* fdopen(int fd, const char *mode) {
return 0;
}
int fputc(int c, FILE *stream) {
return c;
}
int fputs(const char *restrict str, FILE *restrict stream) {
return 0;
}
size_t fwrite(const void *restrict buffer, size_t size, size_t nmemb, FILE *restrict stream) {
return size * nmemb;
}
int fprintf(FILE *restrict stream, const char *restrict format, ...) {
return 0;
}

View file

@ -6,10 +6,7 @@
// grown as necessary, and the allocation is made at the end of the heap.
// When the heap is reset, all allocated memory is considered freed.
#ifdef TREE_SITTER_FEATURE_WASM
#include <stdio.h>
#include <unistd.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
@ -136,4 +133,6 @@ void *realloc(void *ptr, size_t new_size) {
return result;
}
#endif
__attribute__((noreturn)) void abort(void) {
__builtin_trap();
}

View file

@ -0,0 +1,60 @@
#include <string.h>
int memcmp(const void *lhs, const void *rhs, size_t count) {
const unsigned char *l = lhs;
const unsigned char *r = rhs;
while (count--) {
if (*l != *r) {
return *l - *r;
}
l++;
r++;
}
return 0;
}
void *memcpy(void *restrict dst, const void *restrict src, size_t size) {
unsigned char *d = dst;
const unsigned char *s = src;
while (size--) {
*d++ = *s++;
}
return dst;
}
void *memmove(void *dst, const void *src, size_t count) {
unsigned char *d = dst;
const unsigned char *s = src;
if (d < s) {
while (count--) {
*d++ = *s++;
}
} else if (d > s) {
d += count;
s += count;
while (count--) {
*(--d) = *(--s);
}
}
return dst;
}
void *memset(void *dst, int value, size_t count) {
unsigned char *p = dst;
while (count--) {
*p++ = (unsigned char)value;
}
return dst;
}
int strncmp(const char *left, const char *right, size_t n) {
while (n-- > 0) {
if (*left != *right) {
return *(unsigned char *)left - *(unsigned char *)right;
}
if (*left == '\0') break;
left++;
right++;
}
return 0;
}

View file

@ -374,7 +374,7 @@ pub fn run_wasm_stdlib() -> Result<()> {
"-Wl,--export=reset_heap",
])
.args(&export_flags)
.arg("lib/src/wasm/stdlib.c")
.arg("crates/language/wasm/src/stdlib.c")
.output()?;
bail_on_err(&output, "Failed to compile the Tree-sitter Wasm stdlib")?;

View file

@ -4,7 +4,7 @@ version.workspace = true
description = "Rust bindings to the Tree-sitter parsing library"
authors.workspace = true
edition.workspace = true
rust-version = "1.76"
rust-version = "1.77"
readme = "binding_rust/README.md"
homepage.workspace = true
repository.workspace = true

View file

@ -2,6 +2,7 @@ use std::{env, fs, path::PathBuf};
fn main() {
let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
let target = env::var("TARGET").unwrap();
#[cfg(feature = "bindgen")]
generate_bindings(&out_dir);
@ -26,6 +27,11 @@ fn main() {
let include_path = manifest_path.join("include");
let src_path = manifest_path.join("src");
let wasm_path = src_path.join("wasm");
if target.starts_with("wasm32-unknown") {
configure_wasm_build(&mut config);
}
for entry in fs::read_dir(&src_path).unwrap() {
let entry = entry.unwrap();
let path = src_path.join(entry.file_name());
@ -50,6 +56,22 @@ fn main() {
println!("cargo:include={}", include_path.display());
}
fn configure_wasm_build(config: &mut cc::Build) {
let Ok(wasm_headers) = env::var("DEP_TREE_SITTER_LANGUAGE_WASM_HEADERS") else {
panic!("Environment variable DEP_TREE_SITTER_LANGUAGE_WASM_HEADERS must be set by the language crate");
};
let Ok(wasm_src) = env::var("DEP_TREE_SITTER_LANGUAGE_WASM_SRC").map(PathBuf::from) else {
panic!("Environment variable DEP_TREE_SITTER_LANGUAGE_WASM_SRC must be set by the language crate");
};
config.include(&wasm_headers);
config.files([
wasm_src.join("stdio.c"),
wasm_src.join("stdlib.c"),
wasm_src.join("string.c"),
]);
}
#[cfg(feature = "bindgen")]
fn generate_bindings(out_dir: &std::path::Path) {
use std::str::FromStr;

View file

@ -146,7 +146,7 @@ void ts_tree_print_dot_graph(const TSTree *self, int fd) {
fclose(file);
}
#elif !defined(__wasi__) // WASI doesn't support dup
#elif !defined(__wasm__) // Wasm doesn't support dup
#include <unistd.h>