From 4342efd57e60019c1c6587981240e766301af69b Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Fri, 16 Feb 2024 14:42:19 -0500 Subject: [PATCH] feat: allow specifying an external scanner's files --- cli/benches/benchmark.rs | 2 +- cli/loader/src/lib.rs | 80 +++++++++++++++++++-------- cli/src/tests/helpers/fixtures.rs | 18 +++++- docs/section-4-syntax-highlighting.md | 5 ++ 4 files changed, 79 insertions(+), 26 deletions(-) diff --git a/cli/benches/benchmark.rs b/cli/benches/benchmark.rs index eccd1310..f7700dd7 100644 --- a/cli/benches/benchmark.rs +++ b/cli/benches/benchmark.rs @@ -212,7 +212,7 @@ fn parse(path: &Path, max_path_length: usize, mut action: impl FnMut(&[u8])) -> fn get_language(path: &Path) -> Language { let src_dir = GRAMMARS_DIR.join(path).join("src"); TEST_LOADER - .load_language_at_path(&src_dir, &[&src_dir]) + .load_language_at_path(&src_dir, &[&src_dir], None) .with_context(|| format!("Failed to load language at path {src_dir:?}")) .unwrap() } diff --git a/cli/loader/src/lib.rs b/cli/loader/src/lib.rs index f040e603..f785de29 100644 --- a/cli/loader/src/lib.rs +++ b/cli/loader/src/lib.rs @@ -105,7 +105,7 @@ pub struct LanguageConfiguration<'a> { pub struct Loader { parser_lib_path: PathBuf, - languages_by_id: Vec<(PathBuf, OnceCell)>, + languages_by_id: Vec<(PathBuf, OnceCell, Option>)>, language_configurations: Vec>, language_configuration_ids_by_file_type: HashMap>, language_configuration_in_current_path: Option, @@ -347,11 +347,11 @@ impl Loader { } fn language_for_id(&self, id: usize) -> Result { - let (path, language) = &self.languages_by_id[id]; + let (path, language, externals) = &self.languages_by_id[id]; language .get_or_try_init(|| { let src_path = path.join("src"); - self.load_language_at_path(&src_path, &[&src_path]) + self.load_language_at_path(&src_path, &[&src_path], externals.as_deref()) }) .cloned() } @@ -360,6 +360,7 @@ impl Loader { &self, src_path: &Path, header_paths: &[&Path], + external_files: Option<&[PathBuf]>, ) -> Result { let grammar_path = src_path.join("grammar.json"); @@ -372,7 +373,12 @@ impl Loader { let grammar_json: GrammarJSON = serde_json::from_reader(BufReader::new(&mut grammar_file)) .with_context(|| "Failed to parse grammar.json")?; - self.load_language_at_path_with_name(src_path, header_paths, &grammar_json.name) + self.load_language_at_path_with_name( + src_path, + header_paths, + &grammar_json.name, + external_files, + ) } pub fn load_language_at_path_with_name( @@ -380,6 +386,7 @@ impl Loader { src_path: &Path, header_paths: &[&Path], name: &str, + external_files: Option<&[PathBuf]>, ) -> Result { let mut lib_name = name.to_string(); let language_fn_name = format!("tree_sitter_{}", replace_dashes_with_underscores(name)); @@ -395,12 +402,26 @@ impl Loader { let parser_path = src_path.join("parser.c"); let scanner_path = self.get_scanner_path(src_path); + let paths_to_check = if let Some(external_files) = external_files { + let mut files = if let Some(scanner_path) = scanner_path.as_ref() { + vec![parser_path.clone(), scanner_path.to_path_buf()] + } else { + vec![parser_path.clone()] + }; + for path in external_files { + files.push(src_path.join(path)); + } + files + } else { + Vec::new() + }; + #[cfg(feature = "wasm")] if self.wasm_store.lock().unwrap().is_some() { library_path.set_extension("wasm"); } - let mut recompile = needs_recompile(&library_path, &parser_path, scanner_path.as_deref()) + let mut recompile = needs_recompile(&library_path, &paths_to_check) .with_context(|| "Failed to compare source and binary timestamps")?; #[cfg(feature = "wasm")] @@ -808,7 +829,7 @@ impl Loader { parser_path: &Path, set_current_path_config: bool, ) -> Result<&[LanguageConfiguration]> { - #[derive(Default, Deserialize)] + #[derive(Deserialize, Clone, Default)] #[serde(untagged)] enum PathsJSON { #[default] @@ -848,6 +869,8 @@ impl Loader { locals: PathsJSON, #[serde(default)] tags: PathsJSON, + #[serde(default, rename = "external-files")] + external_files: PathsJSON, } #[derive(Deserialize)] @@ -883,7 +906,7 @@ impl Loader { // Determine if a previous language configuration in this package.json file // already uses the same language. let mut language_id = None; - for (id, (path, _)) in + for (id, (path, _, _)) in self.languages_by_id.iter().enumerate().skip(language_count) { if language_path == *path { @@ -892,10 +915,29 @@ impl Loader { } // If not, add a new language path to the list. - let language_id = language_id.unwrap_or_else(|| { - self.languages_by_id.push((language_path, OnceCell::new())); + let language_id = if let Some(language_id) = language_id { + language_id + } else { + self.languages_by_id.push(( + language_path, + OnceCell::new(), + config_json.external_files.clone().into_vec().map(|files| { + files.into_iter() + .map(|path| { + let path = parser_path.join(path); + // prevent p being above/outside of parser_path + + if path.starts_with(parser_path) { + Ok(path) + } else { + Err(anyhow!("External file path {path:?} is outside of parser directory {parser_path:?}")) + } + }) + .collect::>>() + }).transpose()?, + )); self.languages_by_id.len() - 1 - }); + }; let configuration = LanguageConfiguration { root_path: parser_path.to_path_buf(), @@ -972,7 +1014,7 @@ impl Loader { self.language_configurations .push(unsafe { mem::transmute(configuration) }); self.languages_by_id - .push((parser_path.to_owned(), OnceCell::new())); + .push((parser_path.to_owned(), OnceCell::new(), None)); } Ok(&self.language_configurations[initial_language_configuration_count..]) @@ -1254,20 +1296,14 @@ impl<'a> LanguageConfiguration<'a> { } } -fn needs_recompile( - lib_path: &Path, - parser_c_path: &Path, - scanner_path: Option<&Path>, -) -> Result { +fn needs_recompile(lib_path: &Path, paths_to_check: &[PathBuf]) -> Result { if !lib_path.exists() { return Ok(true); } - let lib_mtime = mtime(lib_path)?; - if mtime(parser_c_path)? > lib_mtime { - return Ok(true); - } - if let Some(scanner_path) = scanner_path { - if mtime(scanner_path)? > lib_mtime { + let lib_mtime = + mtime(lib_path).with_context(|| format!("Failed to read mtime of {lib_path:?}"))?; + for path in paths_to_check { + if mtime(path)? > lib_mtime { return Ok(true); } } diff --git a/cli/src/tests/helpers/fixtures.rs b/cli/src/tests/helpers/fixtures.rs index 69eb7c8a..f274801d 100644 --- a/cli/src/tests/helpers/fixtures.rs +++ b/cli/src/tests/helpers/fixtures.rs @@ -36,6 +36,7 @@ pub fn get_language(name: &str) -> Language { .load_language_at_path( &GRAMMARS_DIR.join(name).join("src"), &[&HEADER_DIR, &GRAMMARS_DIR.join(name).join("src")], + None, ) .unwrap() } @@ -86,7 +87,7 @@ pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> fs::write(&parser_path, parser_code).unwrap(); } - if let Some(path) = path { + let scanner_path = if let Some(path) = path { let scanner_path = path.join("scanner.c"); if scanner_path.exists() { let scanner_code = fs::read_to_string(&scanner_path).unwrap(); @@ -96,8 +97,13 @@ pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> { fs::write(&scanner_copy_path, scanner_code).unwrap(); } + Some(scanner_copy_path) + } else { + None } - } + } else { + None + }; let header_path = src_dir.join("tree_sitter"); fs::create_dir_all(&header_path).unwrap(); @@ -110,7 +116,13 @@ pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> }) .unwrap(); + let paths_to_check = if let Some(scanner_path) = &scanner_path { + vec![parser_path.clone(), scanner_path.to_path_buf()] + } else { + vec![parser_path.clone()] + }; + TEST_LOADER - .load_language_at_path_with_name(&src_dir, &[&HEADER_DIR], name) + .load_language_at_path_with_name(&src_dir, &[&HEADER_DIR], name, Some(&paths_to_check)) .unwrap() } diff --git a/docs/section-4-syntax-highlighting.md b/docs/section-4-syntax-highlighting.md index 8fd73cf6..818172fd 100644 --- a/docs/section-4-syntax-highlighting.md +++ b/docs/section-4-syntax-highlighting.md @@ -92,6 +92,11 @@ These keys specify basic information about the parser: * `path` (optional) - A relative path from the directory containing `package.json` to another directory containing the `src/` folder, which contains the actual generated parser. The default value is `"."` (so that `src/` is in the same folder as `package.json`), and this very rarely needs to be overridden. +* `external-files` (optional) - A list of relative paths from the root dir of a +parser to files that should be checked for modifications during recompilation. +This is useful during development to have changes to other files besides scanner.c +be picked up by the cli. + ### Language Detection These keys help to decide whether the language applies to a given file: