From eedbec8f24966a64fe413c24fb5d52185c01e54a Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Thu, 11 Sep 2025 18:41:29 -0400 Subject: [PATCH] feat: remove the need of an external JS runtime for processing grammars --- Cargo.lock | 237 +++++++++++++++- crates/cli/Cargo.toml | 2 + crates/cli/package.nix | 5 + crates/cli/src/main.rs | 13 + crates/generate/Cargo.toml | 13 +- crates/generate/src/dsl.js | 24 +- crates/generate/src/generate.rs | 18 ++ crates/generate/src/quickjs.rs | 463 ++++++++++++++++++++++++++++++++ 8 files changed, 760 insertions(+), 15 deletions(-) create mode 100644 crates/generate/src/quickjs.rs diff --git a/Cargo.lock b/Cargo.lock index d42f2e23..0476d424 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -103,6 +103,29 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d92bec98840b8f03a5ff5413de5293bfcd8bf96467cf5452609f939ec6f5de16" +[[package]] +name = "bindgen" +version = "0.69.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" +dependencies = [ + "bitflags 2.9.4", + "cexpr", + "clang-sys", + "itertools 0.12.1", + "lazy_static", + "lazycell", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash 1.1.0", + "shlex", + "syn", + "which", +] + [[package]] name = "bindgen" version = "0.72.1" @@ -118,7 +141,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "rustc-hash", + "rustc-hash 2.1.1", "shlex", "syn", ] @@ -318,6 +341,15 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "convert_case" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "core-foundation" version = "0.10.1" @@ -391,7 +423,7 @@ dependencies = [ "log", "pulley-interpreter", "regalloc2", - "rustc-hash", + "rustc-hash 2.1.1", "serde", "smallvec", "target-lexicon", @@ -636,6 +668,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fd99930f64d146689264c637b5af2f0233a933bef0d8570e2526bf9e083192d" +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "foldhash" version = "0.1.5" @@ -845,6 +883,12 @@ dependencies = [ "zerovec", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "1.1.0" @@ -909,6 +953,15 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.13.0" @@ -985,6 +1038,18 @@ dependencies = [ "libc", ] +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + [[package]] name = "leb128fmt" version = "0.1.0" @@ -1210,12 +1275,46 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e91099d4268b0e11973f036e885d652fb0b21fedcf69738c627f94db6a44f42" +[[package]] +name = "pathdiff" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" + [[package]] name = "percent-encoding" version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "phf" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_shared" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project-lite" version = "0.2.16" @@ -1272,6 +1371,15 @@ dependencies = [ "syn", ] +[[package]] +name = "proc-macro-crate" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edce586971a4dfaa28950c6f18ed55e0406c1ab88bbce2c6f6293a7aaba73d35" +dependencies = [ + "toml_edit", +] + [[package]] name = "proc-macro2" version = "1.0.101" @@ -1365,7 +1473,7 @@ dependencies = [ "bumpalo", "hashbrown", "log", - "rustc-hash", + "rustc-hash 2.1.1", "smallvec", ] @@ -1398,6 +1506,12 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" +[[package]] +name = "relative-path" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" + [[package]] name = "rgb" version = "0.8.52" @@ -1407,6 +1521,62 @@ dependencies = [ "bytemuck", ] +[[package]] +name = "rquickjs" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c5227859c4dfc83f428e58f9569bf439e628c8d139020e7faff437e6f5abaa0" +dependencies = [ + "rquickjs-core", + "rquickjs-macro", +] + +[[package]] +name = "rquickjs-core" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e82e0ca83028ad5b533b53b96c395bbaab905a5774de4aaf1004eeacafa3d85d" +dependencies = [ + "phf", + "relative-path", + "rquickjs-sys", +] + +[[package]] +name = "rquickjs-macro" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4d2eccd988a924a470a76fbd81a191b22d1f5f4f4619cf5662a8c1ab4ca1db7" +dependencies = [ + "convert_case", + "fnv", + "ident_case", + "indexmap", + "phf_generator", + "phf_shared", + "proc-macro-crate", + "proc-macro2", + "quote", + "rquickjs-core", + "syn", +] + +[[package]] +name = "rquickjs-sys" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fed0097b0b4fbb2a87f6dd3b995a7c64ca56de30007eb7e867dfdfc78324ba5" +dependencies = [ + "bindgen 0.69.5", + "cc", +] + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustc-hash" version = "2.1.1" @@ -1526,6 +1696,12 @@ version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" +[[package]] +name = "siphasher" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" + [[package]] name = "smallbitvec" version = "2.6.0" @@ -1686,6 +1862,23 @@ dependencies = [ "zerovec", ] +[[package]] +name = "toml_datetime" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" + +[[package]] +name = "toml_edit" +version = "0.22.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" +dependencies = [ + "indexmap", + "toml_datetime", + "winnow", +] + [[package]] name = "topological-sort" version = "0.2.2" @@ -1727,7 +1920,7 @@ dependencies = [ name = "tree-sitter" version = "0.26.0" dependencies = [ - "bindgen", + "bindgen 0.72.1", "cc", "regex", "regex-syntax", @@ -1764,7 +1957,7 @@ dependencies = [ "rand", "regex", "regex-syntax", - "rustc-hash", + "rustc-hash 2.1.1", "semver", "serde", "serde_derive", @@ -1809,13 +2002,16 @@ dependencies = [ "indexmap", "indoc", "log", + "pathdiff", "regex", "regex-syntax", - "rustc-hash", + "rquickjs", + "rustc-hash 2.1.1", "semver", "serde", "serde_json", "smallbitvec", + "tempfile", "thiserror 2.0.16", "topological-sort", "tree-sitter", @@ -1886,6 +2082,12 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + [[package]] name = "unicode-width" version = "0.2.1" @@ -2263,6 +2465,18 @@ dependencies = [ "web-sys", ] +[[package]] +name = "which" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" +dependencies = [ + "either", + "home", + "once_cell", + "rustix 0.38.44", +] + [[package]] name = "widestring" version = "1.2.0" @@ -2540,6 +2754,15 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" +[[package]] +name = "winnow" +version = "0.7.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf" +dependencies = [ + "memchr", +] + [[package]] name = "wit-bindgen" version = "0.45.0" @@ -2558,7 +2781,7 @@ version = "0.1.0" dependencies = [ "anstyle", "anyhow", - "bindgen", + "bindgen 0.72.1", "cc", "clap", "indoc", diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index 02de411b..741e8b42 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -30,7 +30,9 @@ name = "benchmark" harness = false [features] +default = ["qjs-rt"] wasm = ["tree-sitter/wasm", "tree-sitter-loader/wasm"] +qjs-rt = ["tree-sitter-generate/qjs-rt"] [dependencies] ansi_colours.workspace = true diff --git a/crates/cli/package.nix b/crates/cli/package.nix index ceab7bc7..eea05e12 100644 --- a/crates/cli/package.nix +++ b/crates/cli/package.nix @@ -3,6 +3,8 @@ src, rustPlatform, version, + clang, + libclang, cmake, pkg-config, nodejs_22, @@ -21,6 +23,7 @@ rustPlatform.buildRustPackage { cargoBuildFlags = [ "--all-features" ]; nativeBuildInputs = [ + clang cmake pkg-config nodejs_22 @@ -29,6 +32,8 @@ rustPlatform.buildRustPackage { cargoLock.lockFile = ../../Cargo.lock; + env.LIBCLANG_PATH = "${libclang.lib}/lib"; + preBuild = '' rm -rf test/fixtures mkdir -p test/fixtures diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs index dab1806a..960c1f87 100644 --- a/crates/cli/src/main.rs +++ b/crates/cli/src/main.rs @@ -143,6 +143,7 @@ struct Generate { #[arg(long)] pub json: bool, /// The name or path of the JavaScript runtime to use for generating parsers + #[cfg(not(feature = "qjs-rt"))] #[arg( long, value_name = "EXECUTABLE", @@ -150,6 +151,17 @@ struct Generate { default_value = "node" )] pub js_runtime: Option, + + #[cfg(feature = "qjs-rt")] + #[arg( + long, + value_name = "EXECUTABLE", + env = "TREE_SITTER_JS_RUNTIME", + default_value = "node" + )] + /// The name or path of the JavaScript runtime to use for generating parsers, specify `native` + /// to use the native `QuickJS` runtime + pub js_runtime: Option, } #[derive(Args)] @@ -868,6 +880,7 @@ impl Generate { // TODO: migrate to `warn!` once https://github.com/tree-sitter/tree-sitter/pull/4604 is merged eprintln!("Warning: --build is deprecated, use --stage=lib instead"); } + if let Err(err) = tree_sitter_generate::generate_parser_in_directory( current_dir, self.output.as_deref(), diff --git a/crates/generate/Cargo.toml b/crates/generate/Cargo.toml index 20147948..1a296229 100644 --- a/crates/generate/Cargo.toml +++ b/crates/generate/Cargo.toml @@ -20,8 +20,9 @@ path = "src/generate.rs" workspace = true [features] -default = ["load"] +default = ["qjs-rt"] load = ["dep:semver", "dep:url"] +qjs-rt = ["load", "rquickjs", "pathdiff"] [dependencies] anyhow.workspace = true @@ -29,8 +30,15 @@ heck.workspace = true indexmap.workspace = true indoc.workspace = true log.workspace = true +pathdiff = { version = "0.2.3", optional = true } regex.workspace = true regex-syntax.workspace = true +rquickjs = { version = "0.9.0", optional = true, features = [ + "bindgen", + "loader", + "macro", + "phf", +] } rustc-hash.workspace = true semver = { workspace = true, optional = true } serde.workspace = true @@ -43,3 +51,6 @@ tree-sitter.workspace = true [target.'cfg(windows)'.dependencies] url = { workspace = true, optional = true } + +[dev-dependencies] +tempfile.workspace = true diff --git a/crates/generate/src/dsl.js b/crates/generate/src/dsl.js index faaace05..f522fd7f 100644 --- a/crates/generate/src/dsl.js +++ b/crates/generate/src/dsl.js @@ -70,7 +70,7 @@ function prec(number, rule) { }; } -prec.left = function(number, rule) { +prec.left = function (number, rule) { if (rule == null) { rule = number; number = 0; @@ -92,7 +92,7 @@ prec.left = function(number, rule) { }; } -prec.right = function(number, rule) { +prec.right = function (number, rule) { if (rule == null) { rule = number; number = 0; @@ -114,7 +114,7 @@ prec.right = function(number, rule) { }; } -prec.dynamic = function(number, rule) { +prec.dynamic = function (number, rule) { checkPrecedence(number); checkArguments( arguments, @@ -184,7 +184,7 @@ function token(value) { }; } -token.immediate = function(value) { +token.immediate = function (value) { checkArguments(arguments, arguments.length, token.immediate, 'token.immediate', '', 'literal'); return { type: "IMMEDIATE_TOKEN", @@ -517,6 +517,7 @@ function checkPrecedence(value) { } function getEnv(name) { + if (globalThis.native) return globalThis.__ts_grammar_path; if (globalThis.process) return process.env[name]; // Node/Bun if (globalThis.Deno) return Deno.env.get(name); // Deno throw Error("Unsupported JS runtime"); @@ -537,14 +538,23 @@ globalThis.grammar = grammar; globalThis.field = field; globalThis.RustRegex = RustRegex; -const result = await import(getEnv("TREE_SITTER_GRAMMAR_PATH")); +const grammarPath = getEnv("TREE_SITTER_GRAMMAR_PATH"); +let result = await import(grammarPath); +let grammarObj = result.default?.grammar ?? result.grammar; + +if (globalThis.native && !grammarObj) { + grammarObj = module.exports.grammar; +} + const object = { "$schema": "https://tree-sitter.github.io/tree-sitter/assets/schemas/grammar.schema.json", - ...(result.default?.grammar ?? result.grammar) + ...grammarObj, }; const output = JSON.stringify(object); -if (globalThis.process) { // Node/Bun +if (globalThis.native) { + globalThis.output = output; +} else if (globalThis.process) { // Node/Bun process.stdout.write(output); } else if (globalThis.Deno) { // Deno Deno.stdout.writeSync(new TextEncoder().encode(output)); diff --git a/crates/generate/src/generate.rs b/crates/generate/src/generate.rs index 983f0e1a..4583ab9a 100644 --- a/crates/generate/src/generate.rs +++ b/crates/generate/src/generate.rs @@ -25,6 +25,8 @@ mod nfa; mod node_types; pub mod parse_grammar; mod prepare_grammar; +#[cfg(feature = "qjs-rt")] +mod quickjs; mod render; mod rules; mod tables; @@ -150,6 +152,9 @@ pub enum JSError { Semver(String), #[error("Failed to serialze grammar JSON -- {0}")] Serialzation(String), + #[cfg(feature = "qjs-rt")] + #[error("QuickJS error: {0}")] + QuickJS(String), } #[cfg(feature = "load")] @@ -173,7 +178,15 @@ impl From for JSError { } } +#[cfg(feature = "qjs-rt")] +impl From for JSError { + fn from(value: rquickjs::Error) -> Self { + Self::QuickJS(value.to_string()) + } +} + #[cfg(feature = "load")] +#[allow(clippy::too_many_arguments)] pub fn generate_parser_in_directory( repo_path: T, out_path: Option, @@ -420,6 +433,11 @@ fn load_js_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> JSResu .expect("Failed to convert path to URL") .to_string(); + #[cfg(feature = "qjs-rt")] + if js_runtime == Some("native") { + return quickjs::execute_native_runtime(&grammar_path); + } + let js_runtime = js_runtime.unwrap_or("node"); let mut js_command = Command::new(js_runtime); diff --git a/crates/generate/src/quickjs.rs b/crates/generate/src/quickjs.rs new file mode 100644 index 00000000..51703de5 --- /dev/null +++ b/crates/generate/src/quickjs.rs @@ -0,0 +1,463 @@ +use std::{ + collections::HashMap, + path::{Path, PathBuf}, + sync::{LazyLock, Mutex}, +}; + +use rquickjs::{ + loader::{FileResolver, ScriptLoader}, + Context, Ctx, Function, Module, Object, Runtime, Type, Value, +}; + +use super::{JSError, JSResult}; + +const DSL: &[u8] = include_bytes!("dsl.js"); + +trait JSResultExt { + fn or_js_error(self, ctx: &Ctx) -> JSResult; +} + +impl JSResultExt for Result { + fn or_js_error(self, ctx: &Ctx) -> JSResult { + match self { + Ok(v) => Ok(v), + Err(rquickjs::Error::Exception) => Err(format_js_exception(ctx.catch())), + Err(e) => Err(JSError::QuickJS(e.to_string())), + } + } +} + +fn format_js_exception(v: Value) -> JSError { + let Some(exception) = v.into_exception() else { + return JSError::QuickJS("Expected a JS exception".to_string()); + }; + + let error_obj = exception.as_object(); + let mut parts = Vec::new(); + + for (key, label) in [("message", "Message"), ("stack", "Stack"), ("name", "Type")] { + if let Ok(value) = error_obj.get::<_, String>(key) { + parts.push(format!("{label}: {value}")); + } + } + + if parts.is_empty() { + JSError::QuickJS(exception.to_string()) + } else { + JSError::QuickJS(parts.join("\n")) + } +} + +static FILE_CACHE: LazyLock>> = + LazyLock::new(|| Mutex::new(HashMap::new())); + +#[rquickjs::function] +fn load_file(path: String) -> rquickjs::Result { + { + let cache = FILE_CACHE.lock().unwrap(); + if let Some(cached) = cache.get(&path) { + return Ok(cached.clone()); + } + } + + let content = std::fs::read_to_string(&path).map_err(|e| { + rquickjs::Error::new_from_js_message("IOError", "FileReadError", e.to_string()) + })?; + + { + let mut cache = FILE_CACHE.lock().unwrap(); + cache.insert(path, content.clone()); + } + + Ok(content) +} + +#[rquickjs::class] +#[derive(rquickjs::class::Trace, rquickjs::JsLifetime, Default)] +pub struct Console {} + +impl Console { + fn format_args(args: &[Value<'_>]) -> String { + args.iter() + .map(|v| match v.type_of() { + Type::Bool => v.as_bool().unwrap().to_string(), + Type::Int => v.as_int().unwrap().to_string(), + Type::Float => v.as_float().unwrap().to_string(), + Type::String => v + .as_string() + .unwrap() + .to_string() + .unwrap_or_else(|_| String::new()), + Type::Null => "null".to_string(), + Type::Undefined => "undefined".to_string(), + Type::Uninitialized => "uninitialized".to_string(), + Type::Module => "module".to_string(), + Type::BigInt => v.get::().unwrap_or_else(|_| "BigInt".to_string()), + Type::Unknown => "unknown".to_string(), + Type::Symbol + | Type::Object + | Type::Array + | Type::Function + | Type::Constructor + | Type::Promise + | Type::Exception => "[object Object]".to_string(), + }) + .collect::>() + .join(" ") + } +} + +#[rquickjs::methods] +impl Console { + #[qjs(constructor)] + pub const fn new() -> Self { + Console {} + } + + #[allow(clippy::needless_pass_by_value)] + pub fn log(&self, args: rquickjs::function::Rest>) -> rquickjs::Result<()> { + println!("{}", Self::format_args(&args)); + Ok(()) + } + + #[allow(clippy::needless_pass_by_value)] + pub fn warn(&self, args: rquickjs::function::Rest>) -> rquickjs::Result<()> { + eprintln!("Warning: {}", Self::format_args(&args)); + Ok(()) + } + + #[allow(clippy::needless_pass_by_value)] + pub fn error(&self, args: rquickjs::function::Rest>) -> rquickjs::Result<()> { + eprintln!("Error: {}", Self::format_args(&args)); + Ok(()) + } +} + +fn resolve_module_path(base_path: &Path, module_path: &str) -> rquickjs::Result { + let candidates = if module_path.starts_with("./") || module_path.starts_with("../") { + let target = base_path.join(module_path); + vec![ + target.with_extension("js"), + target.with_extension("json"), + target.clone(), + ] + } else { + let local_target = base_path.join(module_path); + let node_modules_target = Path::new("node_modules").join(module_path); + + vec![ + local_target.with_extension("js"), + local_target.with_extension("json"), + local_target.clone(), + node_modules_target.with_extension("js"), + node_modules_target.with_extension("json"), + node_modules_target, + ] + }; + + for candidate in candidates { + if let Ok(resolved) = try_resolve_path(&candidate) { + return Ok(resolved); + } + } + + Err(rquickjs::Error::new_from_js_message( + "Error", + "ModuleNotFound", + format!("Module not found: {module_path}"), + )) +} + +fn try_resolve_path(path: &Path) -> rquickjs::Result { + let metadata = std::fs::metadata(path).map_err(|_| { + rquickjs::Error::new_from_js_message( + "Error", + "FileNotFound", + format!("Path not found: {}", path.display()), + ) + })?; + + if metadata.is_file() { + return Ok(path.to_path_buf()); + } + + if metadata.is_dir() { + let index_path = path.join("index.js"); + if index_path.exists() { + return Ok(index_path); + } + } + + Err(rquickjs::Error::new_from_js_message( + "Error", + "ResolutionFailed", + format!("Cannot resolve: {}", path.display()), + )) +} + +#[allow(clippy::needless_pass_by_value)] +fn require_from_module<'a>( + ctx: Ctx<'a>, + module_path: String, + from_module: &str, +) -> rquickjs::Result> { + let current_module = PathBuf::from(from_module); + let current_dir = if current_module.is_file() { + current_module.parent().unwrap_or(Path::new(".")) + } else { + current_module.as_path() + }; + + let resolved_path = resolve_module_path(current_dir, &module_path)?; + + let contents = load_file(resolved_path.to_string_lossy().to_string())?; + + load_module_from_content(&ctx, &resolved_path, &contents) +} + +fn load_module_from_content<'a>( + ctx: &Ctx<'a>, + path: &Path, + contents: &str, +) -> rquickjs::Result> { + if path.extension().is_some_and(|ext| ext == "json") { + return ctx.eval::(format!("JSON.parse({contents:?})")); + } + + let exports = Object::new(ctx.clone())?; + let module_obj = Object::new(ctx.clone())?; + module_obj.set("exports", exports.clone())?; + + let filename = path.to_string_lossy().to_string(); + let dirname = path + .parent() + .map_or_else(|| ".".to_string(), |p| p.to_string_lossy().to_string()); + + // Require function specific to *this* module + let module_path = filename.clone(); + let require = Function::new( + ctx.clone(), + move |ctx_inner: Ctx<'a>, target_path: String| -> rquickjs::Result> { + require_from_module(ctx_inner, target_path, &module_path) + }, + )?; + + let wrapper = + format!("(function(exports, require, module, __filename, __dirname) {{ {contents} }})"); + + let module_func = ctx.eval::(wrapper)?; + module_func.call::<_, Value>((exports, require, module_obj.clone(), filename, dirname))?; + + module_obj.get("exports") +} + +pub fn execute_native_runtime( + #[cfg(windows)] grammar_path: &str, + #[cfg(not(windows))] grammar_path: &Path, +) -> JSResult { + #[cfg(not(windows))] + let grammar_path = grammar_path.to_string_lossy(); + + let runtime = Runtime::new()?; + + runtime.set_memory_limit(64 * 1024 * 1024); // 64MB + runtime.set_max_stack_size(256 * 1024); // 256KB + + let context = Context::full(&runtime)?; + + let resolver = FileResolver::default() + .with_path("./") + .with_pattern("{}.mjs"); + let loader = ScriptLoader::default().with_extension("mjs"); + runtime.set_loader(resolver, loader); + + let cwd = std::env::current_dir()?; + let relative_path = pathdiff::diff_paths(&*grammar_path, &cwd) + .map(|p| p.to_string_lossy().to_string()) + .ok_or_else(|| JSError::IO("Failed to get relative path".to_string()))?; + + context.with(|ctx| -> JSResult { + let globals = ctx.globals(); + + globals.set("native", true).or_js_error(&ctx)?; + globals + .set("__ts_grammar_path", relative_path) + .or_js_error(&ctx)?; + + let console = rquickjs::Class::instance(ctx.clone(), Console::new()).or_js_error(&ctx)?; + globals.set("console", console).or_js_error(&ctx)?; + + let process = Object::new(ctx.clone()).or_js_error(&ctx)?; + let env = Object::new(ctx.clone()).or_js_error(&ctx)?; + for (key, value) in std::env::vars() { + env.set(key, value).or_js_error(&ctx)?; + } + process.set("env", env).or_js_error(&ctx)?; + globals.set("process", process).or_js_error(&ctx)?; + + let module = Object::new(ctx.clone()).or_js_error(&ctx)?; + module + .set("exports", Object::new(ctx.clone()).or_js_error(&ctx)?) + .or_js_error(&ctx)?; + globals.set("module", module).or_js_error(&ctx)?; + + let grammar_path_string = grammar_path.to_string(); + let main_require = Function::new( + ctx.clone(), + move |ctx_inner, target_path: String| -> rquickjs::Result { + require_from_module(ctx_inner, target_path, &grammar_path_string) + }, + )?; + globals.set("require", main_require).or_js_error(&ctx)?; + + let promise = Module::evaluate(ctx.clone(), "dsl", DSL).or_js_error(&ctx)?; + promise.finish::<()>().or_js_error(&ctx)?; + + let grammar_json = ctx + .eval::("globalThis.output") + .map(|s| s.to_string()) + .or_js_error(&ctx)? + .or_js_error(&ctx)?; + + let parsed = serde_json::from_str::(&grammar_json)?; + Ok(serde_json::to_string_pretty(&parsed)?) + }) +} + +#[cfg(test)] +mod tests { + use std::{ + fs, + sync::{Arc, Mutex, OnceLock}, + }; + use tempfile::TempDir; + + use super::*; + + static TEST_MUTEX: OnceLock>> = OnceLock::new(); + + fn with_test_lock(test: F) -> R + where + F: FnOnce() -> R, + { + let _guard = TEST_MUTEX.get_or_init(|| Arc::new(Mutex::new(()))).lock(); + let result = test(); + cleanup_runtime_state(); + result + } + + fn cleanup_runtime_state() { + FILE_CACHE.lock().unwrap().clear(); + } + + #[test] + fn test_basic_grammar_execution() { + with_test_lock(|| { + let temp_dir = TempDir::new().unwrap(); + std::env::set_current_dir(temp_dir.path()).unwrap(); + + let grammar_path = temp_dir.path().join("grammar.js"); + fs::write( + &grammar_path, + r" + module.exports = grammar({ + name: 'test', + rules: { source_file: $ => 'hello' } + }); + ", + ) + .unwrap(); + + let json = execute_native_runtime(&grammar_path).expect("Failed to execute grammar"); + assert!(json.contains("\"name\": \"test\"")); + assert!(json.contains("\"hello\"")); + }); + } + + #[test] + fn test_module_imports() { + with_test_lock(|| { + let temp_dir = TempDir::new().unwrap(); + std::env::set_current_dir(temp_dir.path()).unwrap(); + + fs::write( + temp_dir.path().join("common.js"), + r" + module.exports = { identifier: $ => /[a-zA-Z_][a-zA-Z0-9_]*/ }; + ", + ) + .unwrap(); + + fs::write( + temp_dir.path().join("grammar.js"), + r" + const common = require('./common'); + module.exports = grammar({ + name: 'test_import', + rules: { source_file: common.identifier } + }); + ", + ) + .unwrap(); + + let json = execute_native_runtime(&temp_dir.path().join("grammar.js")) + .expect("Failed to execute grammar with imports"); + assert!(json.contains("\"name\": \"test_import\"")); + }); + } + + #[test] + fn test_json_module_loading() { + with_test_lock(|| { + let temp_dir = TempDir::new().unwrap(); + std::env::set_current_dir(temp_dir.path()).unwrap(); + + fs::write( + temp_dir.path().join("package.json"), + r#"{"version": "1.0.0"}"#, + ) + .unwrap(); + fs::write( + temp_dir.path().join("grammar.js"), + r" + const pkg = require('./package.json'); + module.exports = grammar({ + name: 'json_test', + rules: { + source_file: $ => 'version_' + pkg.version.replace(/\./g, '_') + } + }); + ", + ) + .unwrap(); + + let json = execute_native_runtime(&temp_dir.path().join("grammar.js")) + .expect("Failed to execute grammar with JSON import"); + assert!(json.contains("version_1_0_0")); + }); + } + + #[test] + fn test_resource_limits() { + with_test_lock(|| { + let temp_dir = TempDir::new().unwrap(); + std::env::set_current_dir(temp_dir.path()).unwrap(); + + fs::write( + temp_dir.path().join("grammar.js"), + r" + const huge = new Array(10000000).fill('x'.repeat(1000)); + module.exports = grammar({ + name: 'resource_test', + rules: { source_file: $ => 'test' } + }); + ", + ) + .unwrap(); + + let result = execute_native_runtime(&temp_dir.path().join("grammar.js")); + assert!(result.is_err()); + assert!(matches!(result.unwrap_err(), JSError::QuickJS(_))); + }); + } +}