From acfeed006afcbfd6e9a789c3a994c5a6b4e9e88f Mon Sep 17 00:00:00 2001 From: Will Lillis Date: Sat, 26 Jul 2025 20:06:42 -0400 Subject: [PATCH] feat(cli): allow users to specify dynamic libraries directly for parse, query, test, and fuzz subcommands --- crates/cli/src/main.rs | 169 +++++++++++++++++++++++++++++++----- crates/loader/src/loader.rs | 24 +++-- 2 files changed, 164 insertions(+), 29 deletions(-) diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs index 1a04cabd..983f8f20 100644 --- a/crates/cli/src/main.rs +++ b/crates/cli/src/main.rs @@ -185,6 +185,13 @@ struct Parse { /// The path to the tree-sitter grammar directory #[arg(long, short = 'p')] pub grammar_path: Option, + /// The path to the parser's dynamic library + #[arg(long, short = 'l')] + pub lib_path: Option, + /// If `--lib_path` is used, the name of the language used to extract the + /// library's language function + #[arg(long)] + pub lang_name: Option, /// Select a language by the scope instead of a file extension #[arg(long)] pub scope: Option, @@ -276,6 +283,13 @@ struct Test { /// The path to the tree-sitter grammar directory #[arg(long, short = 'p')] pub grammar_path: Option, + /// The path to the parser's dynamic library + #[arg(long, short = 'l')] + pub lib_path: Option, + /// If `--lib_path` is used, the name of the language used to extract the + /// library's language function + #[arg(long)] + pub lang_name: Option, /// Update all syntax trees in corpus files with current parser output #[arg(long, short)] pub update: bool, @@ -335,6 +349,13 @@ struct Fuzz { /// The path to the tree-sitter grammar directory #[arg(long, short = 'p')] pub grammar_path: Option, + /// The path to the parser's dynamic library + #[arg(long)] + pub lib_path: Option, + /// If `--lib_path` is used, the name of the language used to extract the + /// library's language function + #[arg(long)] + pub lang_name: Option, /// Maximum number of edits to perform per fuzz test #[arg(long)] pub edits: Option, @@ -367,6 +388,13 @@ struct Query { /// The path to the tree-sitter grammar directory #[arg(long, short = 'p')] pub grammar_path: Option, + /// The path to the parser's dynamic library + #[arg(long, short = 'l')] + pub lib_path: Option, + /// If `--lib_path` is used, the name of the language used to extract the + /// library's language function + #[arg(long)] + pub lang_name: Option, /// Measure execution time #[arg(long, short)] pub time: bool, @@ -1007,6 +1035,11 @@ impl Parse { has_error |= !parse_result.successful; }; + if self.lib_path.is_none() && self.lang_name.is_some() { + eprintln!("Warning: --lang-name` specified without --lib-path. This argument will be ignored."); + } + let lib_info = get_lib_info(self.lib_path.as_ref(), self.lang_name.as_ref()); + let input = get_input( self.paths_file.as_deref(), self.paths, @@ -1024,7 +1057,7 @@ impl Parse { for path in &paths { let path = Path::new(&path); let language = loader - .select_language(path, current_dir, self.scope.as_deref()) + .select_language(path, current_dir, self.scope.as_deref(), lib_info) .with_context(|| { anyhow!("Failed to load langauge for path \"{}\"", path.display()) })?; @@ -1048,16 +1081,29 @@ impl Parse { } => { let path = get_tmp_source_file(&contents)?; let languages = loader.languages_at_path(current_dir)?; - let language = languages - .iter() - .find(|(_, n)| language_names.contains(&Box::from(n.as_str()))) - .or_else(|| languages.first()) - .map(|(l, _)| l.clone()) - .ok_or_else(|| anyhow!("No language found"))?; + + let language = if let Some(ref lib_path) = self.lib_path { + let lib_info = get_lib_info(self.lib_path.as_ref(), self.lang_name.as_ref()); + &loader + .select_language(lib_path, current_dir, None, lib_info) + .with_context(|| { + anyhow!( + "Failed to load language for path \"{}\"", + lib_path.display() + ) + })? + } else { + &languages + .iter() + .find(|(_, n)| language_names.contains(&Box::from(n.as_str()))) + .or_else(|| languages.first()) + .map(|(l, _)| l.clone()) + .ok_or_else(|| anyhow!("No language found"))? + }; parse::parse_file_at_path( &mut parser, - &language, + language, &path, &name, name.chars().count(), @@ -1073,7 +1119,7 @@ impl Parse { let path = get_tmp_source_file(&contents)?; let name = "stdin"; - let language = loader.select_language(&path, current_dir, None)?; + let language = loader.select_language(&path, current_dir, None, lib_info)?; parse::parse_file_at_path( &mut parser, @@ -1123,11 +1169,26 @@ impl Test { loader.use_wasm(&engine); } + if self.lib_path.is_none() && self.lang_name.is_some() { + eprintln!("Warning: --lang-name` specified without --lib-path. This argument will be ignored."); + } let languages = loader.languages_at_path(current_dir)?; - let language = &languages - .first() - .ok_or_else(|| anyhow!("No language found"))? - .0; + let language = if let Some(ref lib_path) = self.lib_path { + let lib_info = get_lib_info(self.lib_path.as_ref(), self.lang_name.as_ref()); + &loader + .select_language(lib_path, current_dir, None, lib_info) + .with_context(|| { + anyhow!( + "Failed to load language for path \"{}\"", + lib_path.display() + ) + })? + } else { + &languages + .first() + .ok_or_else(|| anyhow!("No language found"))? + .0 + }; parser.set_language(language)?; let test_dir = current_dir.join("test"); @@ -1253,10 +1314,29 @@ impl Fuzz { loader.sanitize_build(true); loader.force_rebuild(self.rebuild); + if self.lib_path.is_none() && self.lang_name.is_some() { + eprintln!("Warning: --lang-name` specified without --lib-path. This argument will be ignored."); + } let languages = loader.languages_at_path(current_dir)?; - let (language, language_name) = &languages - .first() - .ok_or_else(|| anyhow!("No language found"))?; + let (language, language_name) = if let Some(ref lib_path) = self.lib_path { + let lib_info = get_lib_info(self.lib_path.as_ref(), self.lang_name.as_ref()) + .with_context(|| anyhow!("No language name found for {}", lib_path.display()))?; + &( + loader + .select_language(lib_path, current_dir, None, Some(lib_info)) + .with_context(|| { + anyhow!( + "Failed to load language for path \"{}\"", + lib_path.display() + ) + })?, + lib_info.1.to_string(), + ) + } else { + languages + .first() + .ok_or_else(|| anyhow!("No language found"))? + }; let mut fuzz_options = FuzzOptions { skipped: self.skip, @@ -1302,6 +1382,13 @@ impl Query { let cancellation_flag = util::cancel_on_signal(); + if self.lib_path.is_none() && self.lang_name.is_some() { + eprintln!( + "Warning: --lang-name specified without --lib-path. This argument will be ignored." + ); + } + let lib_info = get_lib_info(self.lib_path.as_ref(), self.lang_name.as_ref()); + let input = get_input( self.paths_file.as_deref(), self.paths, @@ -1315,6 +1402,7 @@ impl Query { Path::new(&paths[0]), current_dir, self.scope.as_deref(), + lib_info, )?; for path in paths { @@ -1340,14 +1428,26 @@ impl Query { } => { let path = get_tmp_source_file(&contents)?; let languages = loader.languages_at_path(current_dir)?; - let language = languages - .iter() - .find(|(_, n)| language_names.contains(&Box::from(n.as_str()))) - .or_else(|| languages.first()) - .map(|(l, _)| l.clone()) - .ok_or_else(|| anyhow!("No language found"))?; + let language = if let Some(ref lib_path) = self.lib_path { + let lib_info = get_lib_info(self.lib_path.as_ref(), self.lang_name.as_ref()); + &loader + .select_language(lib_path, current_dir, None, lib_info) + .with_context(|| { + anyhow!( + "Failed to load language for path \"{}\"", + lib_path.display() + ) + })? + } else { + &languages + .iter() + .find(|(_, n)| language_names.contains(&Box::from(n.as_str()))) + .or_else(|| languages.first()) + .map(|(l, _)| l.clone()) + .ok_or_else(|| anyhow!("No language found"))? + }; query::query_file_at_path( - &language, + language, &path, &name, query_path, @@ -1366,7 +1466,7 @@ impl Query { println!(); let path = get_tmp_source_file(&contents)?; - let language = loader.select_language(&path, current_dir, None)?; + let language = loader.select_language(&path, current_dir, None, lib_info)?; query::query_file_at_path( &language, &path, @@ -1841,3 +1941,24 @@ const fn get_styles() -> clap::builder::Styles { ) .placeholder(Style::new().fg_color(Some(Color::Ansi(AnsiColor::White)))) } + +/// Utility to extract the shared library path and language function name from user-provided +/// arguments if present. +fn get_lib_info<'a>( + lib_path: Option<&'a PathBuf>, + language_name: Option<&'a String>, +) -> Option<(&'a Path, &'a str)> { + if let Some(lib_path) = lib_path { + // Use the user-specified name if present, otherwise try to derive it from + // the lib path + match ( + language_name.map(|s| s.as_str()), + lib_path.file_stem().and_then(|s| s.to_str()), + ) { + (Some(name), _) | (None, Some(name)) => Some((lib_path.as_path(), name)), + _ => None, + } + } else { + None + } +} diff --git a/crates/loader/src/loader.rs b/crates/loader/src/loader.rs index 9ff556fa..fad997db 100644 --- a/crates/loader/src/loader.rs +++ b/crates/loader/src/loader.rs @@ -849,12 +849,21 @@ impl Loader { } } - let library = unsafe { Library::new(&output_path) } - .with_context(|| format!("Error opening dynamic library {}", output_path.display()))?; + Self::load_language(&output_path, &language_fn_name) + } + + pub fn load_language(path: &Path, function_name: &str) -> Result { + let library = unsafe { Library::new(path) } + .with_context(|| format!("Error opening dynamic library {}", path.display()))?; let language = unsafe { let language_fn = library - .get:: Language>>(language_fn_name.as_bytes()) - .with_context(|| format!("Failed to load symbol {language_fn_name}"))?; + .get:: Language>>(function_name.as_bytes()) + .with_context(|| { + format!( + "Failed to load symbol {function_name} from {}", + path.display() + ) + })?; language_fn() }; mem::forget(library); @@ -1410,8 +1419,13 @@ impl Loader { path: &Path, current_dir: &Path, scope: Option<&str>, + // path to dynamic library, name of language + lib_info: Option<(&Path, &str)>, ) -> Result { - if let Some(scope) = scope { + if let Some((lib_path, language_name)) = lib_info { + let language_fn_name = format!("tree_sitter_{}", language_name.replace('-', "_")); + Self::load_language(lib_path, &language_fn_name) + } else if let Some(scope) = scope { if let Some(config) = self .language_configuration_for_scope(scope) .with_context(|| format!("Failed to load language for scope '{scope}'"))?