Add --check flag to tree-sitter highlight.
Recently I've been pulling a lot of grammars into GitHub's highlighting backend, replacing legacy language support with tree-sitter highlighting queries. Our backend systems have a standard set of highlight captures we expect, very similar to the standard tagging captures we expect. Though end-user applications are free to choose whatever tagging nomenclature they want, I think it's nice to include a checking stage that will help us ensure that we know whether a capture might be recognized or not. It will also help us figure out where we need to expand our standard set of captures (see #1539).
This commit is contained in:
parent
d30e9c9d71
commit
cb58bc593f
4 changed files with 78 additions and 2 deletions
3
Cargo.lock
generated
3
Cargo.lock
generated
|
|
@ -859,8 +859,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tree-sitter-highlight"
|
||||
version = "0.20.1"
|
||||
version = "0.20.2"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
"regex",
|
||||
"thiserror",
|
||||
"tree-sitter",
|
||||
|
|
|
|||
|
|
@ -239,6 +239,11 @@ fn run() -> Result<()> {
|
|||
.long("html")
|
||||
.short("H"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("check")
|
||||
.help("Check that highlighting captures conform strictly to standards")
|
||||
.long("check"),
|
||||
)
|
||||
.arg(&scope_arg)
|
||||
.arg(&time_arg)
|
||||
.arg(&quiet_arg)
|
||||
|
|
@ -543,6 +548,7 @@ fn run() -> Result<()> {
|
|||
let time = matches.is_present("time");
|
||||
let quiet = matches.is_present("quiet");
|
||||
let html_mode = quiet || matches.is_present("html");
|
||||
let should_check = matches.is_present("check");
|
||||
let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
|
||||
|
||||
if html_mode && !quiet {
|
||||
|
|
@ -573,6 +579,25 @@ fn run() -> Result<()> {
|
|||
};
|
||||
|
||||
if let Some(highlight_config) = language_config.highlight_config(language)? {
|
||||
if should_check {
|
||||
let names = highlight_config.nonconformant_capture_names();
|
||||
if names.is_empty() {
|
||||
eprintln!("All highlight captures conform to standards.");
|
||||
} else {
|
||||
eprintln!(
|
||||
"Non-standard highlight {} detected:",
|
||||
if names.len() > 1 {
|
||||
"captures"
|
||||
} else {
|
||||
"capture"
|
||||
}
|
||||
);
|
||||
for name in names {
|
||||
eprintln!("* {}", name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let source = fs::read(path)?;
|
||||
if html_mode {
|
||||
highlight::html(
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
name = "tree-sitter-highlight"
|
||||
description = "Library for performing syntax highlighting with Tree-sitter"
|
||||
version = "0.20.1"
|
||||
version = "0.20.2"
|
||||
authors = [
|
||||
"Max Brunsfeld <maxbrunsfeld@gmail.com>",
|
||||
"Tim Clem <timothy.clem@gmail.com>",
|
||||
|
|
@ -18,6 +18,7 @@ rust-version.workspace = true
|
|||
crate-type = ["lib", "staticlib"]
|
||||
|
||||
[dependencies]
|
||||
lazy_static = "1.2.0"
|
||||
regex = "1"
|
||||
thiserror = "1.0"
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@ pub mod c_lib;
|
|||
pub mod util;
|
||||
pub use c_lib as c;
|
||||
|
||||
use lazy_static::lazy_static;
|
||||
use std::collections::HashSet;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::{iter, mem, ops, str, usize};
|
||||
use thiserror::Error;
|
||||
|
|
@ -14,6 +16,42 @@ const CANCELLATION_CHECK_INTERVAL: usize = 100;
|
|||
const BUFFER_HTML_RESERVE_CAPACITY: usize = 10 * 1024;
|
||||
const BUFFER_LINES_RESERVE_CAPACITY: usize = 1000;
|
||||
|
||||
lazy_static! {
|
||||
static ref STANDARD_CAPTURE_NAMES: HashSet<&'static str> = vec![
|
||||
"attribute",
|
||||
"carriage-return",
|
||||
"comment",
|
||||
"constant",
|
||||
"constant.builtin",
|
||||
"constructor",
|
||||
"constructor.builtin",
|
||||
"embedded",
|
||||
"escape",
|
||||
"function",
|
||||
"function.builtin",
|
||||
"keyword",
|
||||
"number",
|
||||
"module",
|
||||
"operator",
|
||||
"property",
|
||||
"property.builtin",
|
||||
"punctuation",
|
||||
"punctuation.bracket",
|
||||
"punctuation.delimiter",
|
||||
"punctuation.special",
|
||||
"string",
|
||||
"string.special",
|
||||
"tag",
|
||||
"type",
|
||||
"type.builtin",
|
||||
"variable",
|
||||
"variable.builtin",
|
||||
"variable.parameter",
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
}
|
||||
|
||||
/// Indicates which highlight should be applied to a region of source code.
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
pub struct Highlight(pub usize);
|
||||
|
|
@ -321,6 +359,17 @@ impl HighlightConfiguration {
|
|||
best_index.map(Highlight)
|
||||
}));
|
||||
}
|
||||
|
||||
// Return the list of this configuration's capture names that are neither present in the
|
||||
// list of predefined 'canonical' names nor start with an underscore (denoting 'private' captures
|
||||
// used as part of capture internals).
|
||||
pub fn nonconformant_capture_names(&self) -> Vec<&String> {
|
||||
return self
|
||||
.names()
|
||||
.iter()
|
||||
.filter(|&n| !STANDARD_CAPTURE_NAMES.contains(n.as_str()))
|
||||
.collect();
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> HighlightIterLayer<'a> {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue