diff --git a/Cargo.lock b/Cargo.lock index 7e4ccfa0..606f1e95 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -859,8 +859,9 @@ dependencies = [ [[package]] name = "tree-sitter-highlight" -version = "0.20.1" +version = "0.20.2" dependencies = [ + "lazy_static", "regex", "thiserror", "tree-sitter", diff --git a/cli/src/main.rs b/cli/src/main.rs index 18e50aad..7d2f6a17 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -239,6 +239,11 @@ fn run() -> Result<()> { .long("html") .short("H"), ) + .arg( + Arg::with_name("check") + .help("Check that highlighting captures conform strictly to standards") + .long("check"), + ) .arg(&scope_arg) .arg(&time_arg) .arg(&quiet_arg) @@ -543,6 +548,7 @@ fn run() -> Result<()> { let time = matches.is_present("time"); let quiet = matches.is_present("quiet"); let html_mode = quiet || matches.is_present("html"); + let should_check = matches.is_present("check"); let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?; if html_mode && !quiet { @@ -573,6 +579,25 @@ fn run() -> Result<()> { }; if let Some(highlight_config) = language_config.highlight_config(language)? { + if should_check { + let names = highlight_config.nonconformant_capture_names(); + if names.is_empty() { + eprintln!("All highlight captures conform to standards."); + } else { + eprintln!( + "Non-standard highlight {} detected:", + if names.len() > 1 { + "captures" + } else { + "capture" + } + ); + for name in names { + eprintln!("* {}", name); + } + } + } + let source = fs::read(path)?; if html_mode { highlight::html( diff --git a/highlight/Cargo.toml b/highlight/Cargo.toml index e85ced8e..84c748b6 100644 --- a/highlight/Cargo.toml +++ b/highlight/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter-highlight" description = "Library for performing syntax highlighting with Tree-sitter" -version = "0.20.1" +version = "0.20.2" authors = [ "Max Brunsfeld ", "Tim Clem ", @@ -18,6 +18,7 @@ rust-version.workspace = true crate-type = ["lib", "staticlib"] [dependencies] +lazy_static = "1.2.0" regex = "1" thiserror = "1.0" diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs index 8a79c624..ce3c3e06 100644 --- a/highlight/src/lib.rs +++ b/highlight/src/lib.rs @@ -2,6 +2,8 @@ pub mod c_lib; pub mod util; pub use c_lib as c; +use lazy_static::lazy_static; +use std::collections::HashSet; use std::sync::atomic::{AtomicUsize, Ordering}; use std::{iter, mem, ops, str, usize}; use thiserror::Error; @@ -14,6 +16,42 @@ const CANCELLATION_CHECK_INTERVAL: usize = 100; const BUFFER_HTML_RESERVE_CAPACITY: usize = 10 * 1024; const BUFFER_LINES_RESERVE_CAPACITY: usize = 1000; +lazy_static! { + static ref STANDARD_CAPTURE_NAMES: HashSet<&'static str> = vec![ + "attribute", + "carriage-return", + "comment", + "constant", + "constant.builtin", + "constructor", + "constructor.builtin", + "embedded", + "escape", + "function", + "function.builtin", + "keyword", + "number", + "module", + "operator", + "property", + "property.builtin", + "punctuation", + "punctuation.bracket", + "punctuation.delimiter", + "punctuation.special", + "string", + "string.special", + "tag", + "type", + "type.builtin", + "variable", + "variable.builtin", + "variable.parameter", + ] + .into_iter() + .collect(); +} + /// Indicates which highlight should be applied to a region of source code. #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub struct Highlight(pub usize); @@ -321,6 +359,17 @@ impl HighlightConfiguration { best_index.map(Highlight) })); } + + // Return the list of this configuration's capture names that are neither present in the + // list of predefined 'canonical' names nor start with an underscore (denoting 'private' captures + // used as part of capture internals). + pub fn nonconformant_capture_names(&self) -> Vec<&String> { + return self + .names() + .iter() + .filter(|&n| !STANDARD_CAPTURE_NAMES.contains(n.as_str())) + .collect(); + } } impl<'a> HighlightIterLayer<'a> {