Merge pull request #2412 from amaanq/apply-all-captures

feat: add an `--apply-all-captures` argument to highlight & test
This commit is contained in:
Amaan Qureshi 2023-08-17 23:38:18 -04:00 committed by GitHub
commit 35a6052fbc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 301 additions and 42 deletions

View file

@ -484,6 +484,7 @@ impl Loader {
pub fn highlight_config_for_injection_string<'a>(
&'a self,
string: &str,
apply_all_captures: bool,
) -> Option<&'a HighlightConfiguration> {
match self.language_configuration_for_injection_string(string) {
Err(e) => {
@ -494,17 +495,19 @@ impl Loader {
None
}
Ok(None) => None,
Ok(Some((language, configuration))) => match configuration.highlight_config(language) {
Err(e) => {
eprintln!(
"Failed to load property sheet for injection string '{}': {}",
string, e
);
None
Ok(Some((language, configuration))) => {
match configuration.highlight_config(language, apply_all_captures, None) {
Err(e) => {
eprintln!(
"Failed to load property sheet for injection string '{}': {}",
string, e
);
None
}
Ok(None) => None,
Ok(Some(config)) => Some(config),
}
Ok(None) => None,
Ok(Some(config)) => Some(config),
},
}
}
}
@ -701,16 +704,65 @@ impl Loader {
}
impl<'a> LanguageConfiguration<'a> {
pub fn highlight_config(&self, language: Language) -> Result<Option<&HighlightConfiguration>> {
pub fn highlight_config(
&self,
language: Language,
apply_all_captures: bool,
paths: Option<&[String]>,
) -> Result<Option<&HighlightConfiguration>> {
let (highlights_filenames, injections_filenames, locals_filenames) = match paths {
Some(paths) => (
Some(
paths
.iter()
.filter(|p| p.ends_with("highlights.scm"))
.cloned()
.collect(),
),
Some(
paths
.iter()
.filter(|p| p.ends_with("tags.scm"))
.cloned()
.collect(),
),
Some(
paths
.iter()
.filter(|p| p.ends_with("locals.scm"))
.cloned()
.collect(),
),
),
None => (None, None, None),
};
return self
.highlight_config
.get_or_try_init(|| {
let (highlights_query, highlight_ranges) =
self.read_queries(&self.highlights_filenames, "highlights.scm")?;
let (injections_query, injection_ranges) =
self.read_queries(&self.injections_filenames, "injections.scm")?;
let (locals_query, locals_ranges) =
self.read_queries(&self.locals_filenames, "locals.scm")?;
let (highlights_query, highlight_ranges) = self.read_queries(
if highlights_filenames.is_some() {
&highlights_filenames
} else {
&self.highlights_filenames
},
"highlights.scm",
)?;
let (injections_query, injection_ranges) = self.read_queries(
if injections_filenames.is_some() {
&injections_filenames
} else {
&self.injections_filenames
},
"injections.scm",
)?;
let (locals_query, locals_ranges) = self.read_queries(
if locals_filenames.is_some() {
&locals_filenames
} else {
&self.locals_filenames
},
"locals.scm",
)?;
if highlights_query.is_empty() {
Ok(None)
@ -720,6 +772,7 @@ impl<'a> LanguageConfiguration<'a> {
&highlights_query,
&injections_query,
&locals_query,
apply_all_captures,
)
.map_err(|error| match error.kind {
QueryErrorKind::Language => Error::from(error),

View file

@ -348,7 +348,7 @@ pub fn ansi(
let mut highlighter = Highlighter::new();
let events = highlighter.highlight(config, source, cancellation_flag, |string| {
loader.highlight_config_for_injection_string(string)
loader.highlight_config_for_injection_string(string, config.apply_all_captures)
})?;
let mut style_stack = vec![theme.default_style().ansi];
@ -394,7 +394,7 @@ pub fn html(
let mut highlighter = Highlighter::new();
let events = highlighter.highlight(config, source, cancellation_flag, |string| {
loader.highlight_config_for_injection_string(string)
loader.highlight_config_for_injection_string(string, config.apply_all_captures)
})?;
let mut renderer = HtmlRenderer::new();

View file

@ -1,6 +1,7 @@
use anyhow::{anyhow, Context, Error, Result};
use clap::{App, AppSettings, Arg, SubCommand};
use glob::glob;
use std::collections::HashSet;
use std::path::{Path, PathBuf};
use std::{env, fs, u64};
use tree_sitter::{ffi, Point};
@ -78,6 +79,10 @@ fn run() -> Result<()> {
.long("quiet")
.short("q");
let apply_all_captures_arg = Arg::with_name("apply-all-captures")
.help("Apply all captures to highlights")
.long("apply-all-captures");
let matches = App::new("tree-sitter")
.author("Max Brunsfeld <maxbrunsfeld@gmail.com>")
.about("Generates and tests parsers")
@ -236,7 +241,8 @@ fn run() -> Result<()> {
)
.arg(&debug_arg)
.arg(&debug_build_arg)
.arg(&debug_graph_arg),
.arg(&debug_graph_arg)
.arg(&apply_all_captures_arg),
)
.subcommand(
SubCommand::with_name("highlight")
@ -252,11 +258,26 @@ fn run() -> Result<()> {
.help("Check that highlighting captures conform strictly to standards")
.long("check"),
)
.arg(
Arg::with_name("captures-path")
.help("Path to a file with captures")
.long("captures-path")
.takes_value(true),
)
.arg(
Arg::with_name("query-paths")
.help("Paths to files with queries")
.long("query-paths")
.takes_value(true)
.multiple(true)
.number_of_values(1),
)
.arg(&scope_arg)
.arg(&time_arg)
.arg(&quiet_arg)
.arg(&paths_file_arg)
.arg(&paths_arg),
.arg(&paths_arg)
.arg(&apply_all_captures_arg),
)
.subcommand(
SubCommand::with_name("build-wasm")
@ -362,6 +383,7 @@ fn run() -> Result<()> {
let debug_build = matches.is_present("debug-build");
let update = matches.is_present("update");
let filter = matches.value_of("filter");
let apply_all_captures = matches.is_present("apply-all-captures");
if debug {
// For augmenting debug logging in external scanners
@ -398,7 +420,7 @@ fn run() -> Result<()> {
// Run the syntax highlighting tests.
let test_highlight_dir = test_dir.join("highlight");
if test_highlight_dir.is_dir() {
test_highlight::test_highlights(&loader, &test_highlight_dir)?;
test_highlight::test_highlights(&loader, &test_highlight_dir, apply_all_captures)?;
}
let test_tag_dir = test_dir.join("tags");
@ -562,6 +584,7 @@ fn run() -> Result<()> {
let html_mode = quiet || matches.is_present("html");
let should_check = matches.is_present("check");
let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
let apply_all_captures = matches.is_present("apply-all-captures");
if html_mode && !quiet {
println!("{}", highlight::HTML_HEADER);
@ -577,6 +600,15 @@ fn run() -> Result<()> {
}
}
let query_paths = matches.values_of("query-paths").map_or(None, |e| {
Some(
e.collect::<Vec<_>>()
.into_iter()
.map(|s| s.to_string())
.collect::<Vec<_>>(),
)
});
for path in paths {
let path = Path::new(&path);
let (language, language_config) = match lang {
@ -590,9 +622,28 @@ fn run() -> Result<()> {
},
};
if let Some(highlight_config) = language_config.highlight_config(language)? {
if let Some(highlight_config) = language_config.highlight_config(
language,
apply_all_captures,
query_paths.as_deref(),
)? {
if should_check {
let names = highlight_config.nonconformant_capture_names();
let names = if let Some(path) = matches.value_of("captures-path") {
let path = Path::new(path);
let file = fs::read_to_string(path)?;
let capture_names = file
.lines()
.filter_map(|line| {
if line.trim().is_empty() || line.trim().starts_with(';') {
return None;
}
line.split(';').next().map(|s| s.trim().trim_matches('"'))
})
.collect::<HashSet<_>>();
highlight_config.nonconformant_capture_names(&capture_names)
} else {
highlight_config.nonconformant_capture_names(&HashSet::new())
};
if names.is_empty() {
eprintln!("All highlight captures conform to standards.");
} else {

View file

@ -38,12 +38,17 @@ impl std::fmt::Display for Failure {
}
}
pub fn test_highlights(loader: &Loader, directory: &Path) -> Result<()> {
pub fn test_highlights(loader: &Loader, directory: &Path, apply_all_captures: bool) -> Result<()> {
println!("syntax highlighting:");
test_highlights_indented(loader, directory, 2)
test_highlights_indented(loader, directory, apply_all_captures, 2)
}
fn test_highlights_indented(loader: &Loader, directory: &Path, indent_level: usize) -> Result<()> {
fn test_highlights_indented(
loader: &Loader,
directory: &Path,
apply_all_captures: bool,
indent_level: usize,
) -> Result<()> {
let mut failed = false;
let mut highlighter = Highlighter::new();
@ -58,7 +63,12 @@ fn test_highlights_indented(loader: &Loader, directory: &Path, indent_level: usi
);
if test_file_path.is_dir() && !test_file_path.read_dir()?.next().is_none() {
println!("{}:", test_file_name.into_string().unwrap());
if let Err(_) = test_highlights_indented(loader, &test_file_path, indent_level + 1) {
if let Err(_) = test_highlights_indented(
loader,
&test_file_path,
apply_all_captures,
indent_level + 1,
) {
failed = true;
}
} else {
@ -66,7 +76,7 @@ fn test_highlights_indented(loader: &Loader, directory: &Path, indent_level: usi
.language_configuration_for_file_name(&test_file_path)?
.ok_or_else(|| anyhow!("No language found for path {:?}", test_file_path))?;
let highlight_config = language_config
.highlight_config(language)?
.highlight_config(language, apply_all_captures, None)?
.ok_or_else(|| anyhow!("No highlighting config found for {:?}", test_file_path))?;
match test_highlight(
&loader,
@ -111,7 +121,7 @@ pub fn iterate_assertions(
// Iterate through all of the highlighting assertions, checking each one against the
// actual highlights.
let mut i = 0;
let mut actual_highlights = Vec::<&String>::new();
let mut actual_highlights = Vec::new();
for Assertion {
position,
negative,
@ -202,7 +212,7 @@ pub fn get_highlight_positions(
let source = String::from_utf8_lossy(source);
let mut char_indices = source.char_indices();
for event in highlighter.highlight(highlight_config, source.as_bytes(), None, |string| {
loader.highlight_config_for_injection_string(string)
loader.highlight_config_for_injection_string(string, highlight_config.apply_all_captures)
})? {
match event? {
HighlightEvent::HighlightStart(h) => highlight_stack.push(h),

View file

@ -55,6 +55,7 @@ pub fn get_highlight_config(
&highlights_query,
&injections_query,
&locals_query,
false,
)
.unwrap();
result.configure(&highlight_names);

View file

@ -522,6 +522,7 @@ fn test_highlighting_via_c_api() {
highlights_query.len() as u32,
injections_query.len() as u32,
locals_query.len() as u32,
false,
);
let html_scope = c_string("text.html.basic");
@ -541,6 +542,7 @@ fn test_highlighting_via_c_api() {
highlights_query.len() as u32,
injections_query.len() as u32,
0,
false,
);
let buffer = c::ts_highlight_buffer_new();
@ -587,6 +589,65 @@ fn test_highlighting_via_c_api() {
c::ts_highlight_buffer_delete(buffer);
}
#[test]
fn test_highlighting_with_all_captures_applied() {
let source = "fn main(a: u32, b: u32) -> { let c = a + b; }";
let language = get_language("rust");
let highlights_query = indoc::indoc! {"
[
\"fn\"
\"let\"
] @keyword
(identifier) @variable
(function_item name: (identifier) @function)
(parameter pattern: (identifier) @variable.parameter)
(primitive_type) @type.builtin
\"=\" @operator
[ \"->\" \":\" \";\" ] @punctuation.delimiter
[ \"{\" \"}\" \"(\" \")\" ] @punctuation.bracket
"};
let mut rust_highlight_reverse =
HighlightConfiguration::new(language, &highlights_query, "", "", true).unwrap();
rust_highlight_reverse.configure(&HIGHLIGHT_NAMES);
assert_eq!(
&to_token_vector(&source, &rust_highlight_reverse).unwrap(),
&[[
("fn", vec!["keyword"]),
(" ", vec![]),
("main", vec!["function"]),
("(", vec!["punctuation.bracket"]),
("a", vec!["variable.parameter"]),
(":", vec!["punctuation.delimiter"]),
(" ", vec![]),
("u32", vec!["type.builtin"]),
(", ", vec![]),
("b", vec!["variable.parameter"]),
(":", vec!["punctuation.delimiter"]),
(" ", vec![]),
("u32", vec!["type.builtin"]),
(")", vec!["punctuation.bracket"]),
(" ", vec![]),
("->", vec!["punctuation.delimiter"]),
(" ", vec![]),
("{", vec!["punctuation.bracket"]),
(" ", vec![]),
("let", vec!["keyword"]),
(" ", vec![]),
("c", vec!["variable"]),
(" ", vec![]),
("=", vec!["operator"]),
(" ", vec![]),
("a", vec!["variable"]),
(" + ", vec![]),
("b", vec!["variable"]),
(";", vec!["punctuation.delimiter"]),
(" ", vec![]),
("}", vec!["punctuation.bracket"])
]],
);
}
#[test]
fn test_decode_utf8_lossy() {
use tree_sitter::LossyUtf8;

View file

@ -2842,6 +2842,14 @@ fn test_query_captures_with_text_conditions() {
((identifier) @function.builtin
(#eq? @function.builtin "require"))
((identifier) @variable.builtin
(#any-of? @variable.builtin
"arguments"
"module"
"console"
"window"
"document"))
((identifier) @variable
(#not-match? @variable "^(lambda|load)$"))
"#,
@ -2855,6 +2863,9 @@ fn test_query_captures_with_text_conditions() {
lambda
const ab = require('./ab');
new Cd(EF);
document;
module;
console;
";
let mut parser = Parser::new();
@ -2876,6 +2887,12 @@ fn test_query_captures_with_text_conditions() {
("constant", "EF"),
("constructor", "EF"),
("variable", "EF"),
("variable.builtin", "document"),
("variable", "document"),
("variable.builtin", "module"),
("variable", "module"),
("variable.builtin", "console"),
("variable", "console"),
],
);
});

View file

@ -48,7 +48,8 @@ TSHighlightError ts_highlighter_add_language(
const char *locals_query,
uint32_t highlight_query_len,
uint32_t injection_query_len,
uint32_t locals_query_len
uint32_t locals_query_len,
bool apply_all_captures
);
// Compute syntax highlighting for a given document. You must first

View file

@ -70,6 +70,7 @@ pub extern "C" fn ts_highlighter_add_language(
highlight_query_len: u32,
injection_query_len: u32,
locals_query_len: u32,
apply_all_captures: bool,
) -> ErrorCode {
let f = move || {
let this = unwrap_mut_ptr(this);
@ -109,9 +110,14 @@ pub extern "C" fn ts_highlighter_add_language(
""
};
let mut config =
HighlightConfiguration::new(language, highlight_query, injection_query, locals_query)
.or(Err(ErrorCode::InvalidQuery))?;
let mut config = HighlightConfiguration::new(
language,
highlight_query,
injection_query,
locals_query,
apply_all_captures,
)
.or(Err(ErrorCode::InvalidQuery))?;
config.configure(&this.highlight_names.as_slice());
this.languages.insert(scope_name, (injection_regex, config));

View file

@ -103,6 +103,7 @@ pub enum HighlightEvent {
pub struct HighlightConfiguration {
pub language: Language,
pub query: Query,
pub apply_all_captures: bool,
combined_injections_query: Option<Query>,
locals_pattern_index: usize,
highlights_pattern_index: usize,
@ -160,6 +161,7 @@ where
iter_count: usize,
next_event: Option<HighlightEvent>,
last_highlight_range: Option<(usize, usize, usize)>,
apply_all_captures: bool,
}
struct HighlightIterLayer<'a> {
@ -215,9 +217,10 @@ impl Highlighter {
cancellation_flag,
highlighter: self,
iter_count: 0,
layers: layers,
layers,
next_event: None,
last_highlight_range: None,
apply_all_captures: config.apply_all_captures,
};
result.sort_layers();
Ok(result)
@ -244,6 +247,7 @@ impl HighlightConfiguration {
highlights_query: &str,
injection_query: &str,
locals_query: &str,
apply_all_captures: bool,
) -> Result<Self, QueryError> {
// Concatenate the query strings, keeping track of the start offset of each section.
let mut query_source = String::new();
@ -324,6 +328,7 @@ impl HighlightConfiguration {
Ok(HighlightConfiguration {
language,
query,
apply_all_captures,
combined_injections_query,
locals_pattern_index,
highlights_pattern_index,
@ -385,12 +390,16 @@ impl HighlightConfiguration {
// Return the list of this configuration's capture names that are neither present in the
// list of predefined 'canonical' names nor start with an underscore (denoting 'private' captures
// used as part of capture internals).
pub fn nonconformant_capture_names(&self) -> Vec<&String> {
return self
.names()
pub fn nonconformant_capture_names(&self, capture_names: &HashSet<&str>) -> Vec<&String> {
let capture_names = if capture_names.is_empty() {
&*STANDARD_CAPTURE_NAMES
} else {
&capture_names
};
self.names()
.iter()
.filter(|&n| !(n.starts_with('_') || STANDARD_CAPTURE_NAMES.contains(n.as_str())))
.collect();
.filter(|&n| !(n.starts_with('_') || capture_names.contains(n.as_str())))
.collect()
}
}
@ -929,7 +938,13 @@ where
while let Some((next_match, next_capture_index)) = layer.captures.peek() {
let next_capture = next_match.captures[*next_capture_index];
if next_capture.node == capture.node {
layer.captures.next();
if self.apply_all_captures {
match_.remove();
capture = next_capture;
match_ = layer.captures.next().unwrap().0;
} else {
layer.captures.next();
}
} else {
break;
}

View file

@ -251,6 +251,7 @@ enum TextPredicate {
CaptureEqString(u32, String, bool),
CaptureEqCapture(u32, u32, bool),
CaptureMatchString(u32, regex::bytes::Regex, bool),
CaptureAnyString(u32, Vec<String>, bool),
}
// TODO: Remove this struct at at some point. If `core::str::lossy::Utf8Lossy`
@ -1811,6 +1812,38 @@ impl Query {
operator_name == "is?",
)),
"any-of?" | "not-any-of?" => {
if p.len() < 2 {
return Err(predicate_error(row, format!(
"Wrong number of arguments to #any-of? predicate. Expected at least 1, got {}.",
p.len() - 1
)));
}
if p[1].type_ != type_capture {
return Err(predicate_error(row, format!(
"First argument to #any-of? predicate must be a capture name. Got literal \"{}\".",
string_values[p[1].value_id as usize],
)));
}
let is_positive = operator_name == "any-of?";
let mut values = Vec::new();
for arg in &p[2..] {
if arg.type_ == type_capture {
return Err(predicate_error(row, format!(
"Arguments to #any-of? predicate must be literals. Got capture @{}.",
result.capture_names[arg.value_id as usize],
)));
}
values.push(string_values[arg.value_id as usize].clone());
}
text_predicates.push(TextPredicate::CaptureAnyString(
p[1].value_id,
values,
is_positive,
));
}
_ => general_predicates.push(QueryPredicate {
operator: operator_name.clone().into_boxed_str(),
args: p[1..]
@ -2265,6 +2298,17 @@ impl<'tree> QueryMatch<'_, 'tree> {
None => true,
}
}
TextPredicate::CaptureAnyString(i, v, is_positive) => {
let node = self.nodes_for_capture_index(*i).next();
match node {
Some(node) => {
let mut text = text_provider.text(node);
let text = node_text1.get_text(&mut text);
v.iter().any(|s| text == s.as_bytes()) == *is_positive
}
None => true,
}
}
})
}
}