Add --check flag to tree-sitter highlight.

Recently I've been pulling a lot of grammars into GitHub's highlighting backend,
replacing legacy language support with tree-sitter highlighting queries.
Our backend systems have a standard set of highlight captures we expect, very
similar to the standard tagging captures we expect. Though end-user applications
are free to choose whatever tagging nomenclature they want, I think it's nice to
include a checking stage that will help us ensure that we know whether a capture
might be recognized or not. It will also help us figure out where we need to
expand our standard set of captures (see #1539).
This commit is contained in:
Patrick Thomson 2023-06-22 09:18:53 -04:00 committed by Amaan Qureshi
parent d30e9c9d71
commit cb58bc593f
No known key found for this signature in database
GPG key ID: E67890ADC4227273
4 changed files with 78 additions and 2 deletions

View file

@ -2,6 +2,8 @@ pub mod c_lib;
pub mod util;
pub use c_lib as c;
use lazy_static::lazy_static;
use std::collections::HashSet;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::{iter, mem, ops, str, usize};
use thiserror::Error;
@ -14,6 +16,42 @@ const CANCELLATION_CHECK_INTERVAL: usize = 100;
const BUFFER_HTML_RESERVE_CAPACITY: usize = 10 * 1024;
const BUFFER_LINES_RESERVE_CAPACITY: usize = 1000;
lazy_static! {
static ref STANDARD_CAPTURE_NAMES: HashSet<&'static str> = vec![
"attribute",
"carriage-return",
"comment",
"constant",
"constant.builtin",
"constructor",
"constructor.builtin",
"embedded",
"escape",
"function",
"function.builtin",
"keyword",
"number",
"module",
"operator",
"property",
"property.builtin",
"punctuation",
"punctuation.bracket",
"punctuation.delimiter",
"punctuation.special",
"string",
"string.special",
"tag",
"type",
"type.builtin",
"variable",
"variable.builtin",
"variable.parameter",
]
.into_iter()
.collect();
}
/// Indicates which highlight should be applied to a region of source code.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct Highlight(pub usize);
@ -321,6 +359,17 @@ impl HighlightConfiguration {
best_index.map(Highlight)
}));
}
// Return the list of this configuration's capture names that are neither present in the
// list of predefined 'canonical' names nor start with an underscore (denoting 'private' captures
// used as part of capture internals).
pub fn nonconformant_capture_names(&self) -> Vec<&String> {
return self
.names()
.iter()
.filter(|&n| !STANDARD_CAPTURE_NAMES.contains(n.as_str()))
.collect();
}
}
impl<'a> HighlightIterLayer<'a> {