use std::{ env, fs, io::Write, path::{Path, PathBuf}, process::{Command, Stdio}, }; use anyhow::{anyhow, Context, Result}; use build_tables::build_tables; use grammars::InputGrammar; use lazy_static::lazy_static; use parse_grammar::parse_grammar; use prepare_grammar::prepare_grammar; use regex::{Regex, RegexBuilder}; use render::render_c_code; use semver::Version; mod build_tables; mod dedup; mod grammar_files; mod grammars; mod nfa; mod node_types; pub mod parse_grammar; mod prepare_grammar; mod render; mod rules; mod tables; lazy_static! { static ref JSON_COMMENT_REGEX: Regex = RegexBuilder::new("^\\s*//.*") .multi_line(true) .build() .unwrap(); } struct GeneratedParser { c_code: String, node_types_json: String, } pub const ALLOC_HEADER: &str = include_str!("../../src/templates/alloc.h"); pub const ARRAY_HEADER: &str = include_str!("../../src/templates/array.h"); pub fn generate_parser_in_directory( repo_path: &Path, grammar_path: Option<&str>, abi_version: usize, report_symbol_name: Option<&str>, js_runtime: Option<&str>, ) -> Result<()> { let mut repo_path = repo_path.to_owned(); let mut grammar_path = grammar_path; // Populate a new empty grammar directory. if let Some(path) = grammar_path { let path = PathBuf::from(path); if !path .try_exists() .with_context(|| "Some error with specified path")? { fs::create_dir_all(&path)?; grammar_path = None; repo_path = path; } } let grammar_path = grammar_path .map(PathBuf::from) .unwrap_or_else(|| repo_path.join("grammar.js")); // Read the grammar file. let grammar_json = load_grammar_file(&grammar_path, js_runtime)?; let src_path = repo_path.join("src"); let header_path = src_path.join("tree_sitter"); // Ensure that the output directories exist. fs::create_dir_all(&src_path)?; fs::create_dir_all(&header_path)?; if grammar_path.file_name().unwrap() != "grammar.json" { fs::write(src_path.join("grammar.json"), &grammar_json) .with_context(|| format!("Failed to write grammar.json to {src_path:?}"))?; } // Parse and preprocess the grammar. let input_grammar = parse_grammar(&grammar_json)?; // Generate the parser and related files. let GeneratedParser { c_code, node_types_json, } = generate_parser_for_grammar_with_opts(&input_grammar, abi_version, report_symbol_name)?; write_file(&src_path.join("parser.c"), c_code)?; write_file(&src_path.join("node-types.json"), node_types_json)?; write_file(&header_path.join("alloc.h"), ALLOC_HEADER)?; write_file(&header_path.join("array.h"), ARRAY_HEADER)?; write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?; Ok(()) } pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String)> { let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n"); let input_grammar = parse_grammar(&grammar_json)?; let parser = generate_parser_for_grammar_with_opts(&input_grammar, tree_sitter::LANGUAGE_VERSION, None)?; Ok((input_grammar.name, parser.c_code)) } fn generate_parser_for_grammar_with_opts( input_grammar: &InputGrammar, abi_version: usize, report_symbol_name: Option<&str>, ) -> Result { let (syntax_grammar, lexical_grammar, inlines, simple_aliases) = prepare_grammar(input_grammar)?; let variable_info = node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases)?; let node_types_json = node_types::generate_node_types_json( &syntax_grammar, &lexical_grammar, &simple_aliases, &variable_info, ); let tables = build_tables( &syntax_grammar, &lexical_grammar, &simple_aliases, &variable_info, &inlines, report_symbol_name, )?; let c_code = render_c_code( &input_grammar.name, tables, syntax_grammar, lexical_grammar, simple_aliases, abi_version, ); Ok(GeneratedParser { c_code, node_types_json: serde_json::to_string_pretty(&node_types_json).unwrap(), }) } pub fn load_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result { if grammar_path.is_dir() { return Err(anyhow!( "Path to a grammar file with `.js` or `.json` extension is required" )); } match grammar_path.extension().and_then(|e| e.to_str()) { Some("js") => Ok(load_js_grammar_file(grammar_path, js_runtime) .with_context(|| "Failed to load grammar.js")?), Some("json") => { Ok(fs::read_to_string(grammar_path).with_context(|| "Failed to load grammar.json")?) } _ => Err(anyhow!("Unknown grammar file extension: {grammar_path:?}",)), } } fn load_js_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result { let grammar_path = fs::canonicalize(grammar_path)?; #[cfg(windows)] let grammar_path = url::Url::from_file_path(grammar_path) .expect("Failed to convert path to URL") .to_string(); let js_runtime = js_runtime.unwrap_or("node"); let mut js_command = Command::new(js_runtime); match js_runtime { "node" => { js_command.args(["--input-type=module", "-"]); } "bun" => { js_command.arg("-"); } "deno" => { js_command.args(["run", "--allow-all", "-"]); } _ => {} } let mut js_process = js_command .env("TREE_SITTER_GRAMMAR_PATH", grammar_path) .stdin(Stdio::piped()) .stdout(Stdio::piped()) .spawn() .with_context(|| format!("Failed to run `{js_runtime}`"))?; let mut js_stdin = js_process .stdin .take() .with_context(|| format!("Failed to open stdin for {js_runtime}"))?; let cli_version = Version::parse(env!("CARGO_PKG_VERSION")) .with_context(|| "Could not parse this package's version as semver.")?; write!( js_stdin, "globalThis.TREE_SITTER_CLI_VERSION_MAJOR = {}; globalThis.TREE_SITTER_CLI_VERSION_MINOR = {}; globalThis.TREE_SITTER_CLI_VERSION_PATCH = {};", cli_version.major, cli_version.minor, cli_version.patch, ) .with_context(|| format!("Failed to write tree-sitter version to {js_runtime}'s stdin"))?; js_stdin .write(include_bytes!("./dsl.js")) .with_context(|| format!("Failed to write grammar dsl to {js_runtime}'s stdin"))?; drop(js_stdin); let output = js_process .wait_with_output() .with_context(|| format!("Failed to read output from {js_runtime}"))?; match output.status.code() { None => panic!("{js_runtime} process was killed"), Some(0) => { let stdout = String::from_utf8(output.stdout) .with_context(|| format!("Got invalid UTF8 from {js_runtime}"))?; let mut grammar_json = &stdout[..]; if let Some(pos) = stdout.rfind('\n') { // If there's a newline, split the last line from the rest of the output let node_output = &stdout[..pos]; grammar_json = &stdout[pos + 1..]; let mut stdout = std::io::stdout().lock(); stdout.write_all(node_output.as_bytes())?; stdout.write_all(b"\n")?; stdout.flush()?; } Ok(serde_json::to_string_pretty( &serde_json::from_str::(grammar_json) .with_context(|| "Failed to parse grammar JSON")?, ) .with_context(|| "Failed to serialize grammar JSON")? + "\n") } Some(code) => Err(anyhow!("{js_runtime} process exited with status {code}")), } } pub fn write_file(path: &Path, body: impl AsRef<[u8]>) -> Result<()> { fs::write(path, body) .with_context(|| format!("Failed to write {:?}", path.file_name().unwrap())) }