From f4903578f8becc499c1243baa344d727eea392e5 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 18 Sep 2019 17:35:47 -0700 Subject: [PATCH] Start reimplementing highlight crate with tree queries --- cli/src/error.rs | 24 +- cli/src/highlight.rs | 317 +++-- cli/src/loader.rs | 71 +- cli/src/main.rs | 19 +- cli/src/tests/helpers/fixtures.rs | 46 +- cli/src/tests/highlight_test.rs | 516 ++++--- highlight/include/tree_sitter/highlight.h | 47 +- highlight/src/c_lib.rs | 192 ++- highlight/src/lib.rs | 1514 ++++++++------------- 9 files changed, 1259 insertions(+), 1487 deletions(-) diff --git a/cli/src/error.rs b/cli/src/error.rs index 968486f4..324ad8b1 100644 --- a/cli/src/error.rs +++ b/cli/src/error.rs @@ -1,6 +1,6 @@ use std::fmt::Write; use std::io; -use tree_sitter_highlight::PropertySheetError; +use tree_sitter::QueryError; #[derive(Debug)] pub struct Error(pub Vec); @@ -50,6 +50,18 @@ impl Error { } } +impl<'a> From for Error { + fn from(error: QueryError) -> Self { + Error::new(format!("{:?}", error)) + } +} + +impl<'a> From for Error { + fn from(error: tree_sitter_highlight::Error) -> Self { + Error::new(format!("{:?}", error)) + } +} + impl From for Error { fn from(error: serde_json::Error) -> Self { Error::new(error.to_string()) @@ -79,13 +91,3 @@ impl From for Error { Error::new(error) } } - -impl From for Error { - fn from(error: PropertySheetError) -> Self { - match error { - PropertySheetError::InvalidFormat(e) => Self::from(e), - PropertySheetError::InvalidRegex(e) => Self::regex(&e.to_string()), - PropertySheetError::InvalidJSON(e) => Self::from(e), - } - } -} diff --git a/cli/src/highlight.rs b/cli/src/highlight.rs index dff8fd2c..d92d642e 100644 --- a/cli/src/highlight.rs +++ b/cli/src/highlight.rs @@ -1,6 +1,6 @@ use crate::error::Result; use crate::loader::Loader; -use ansi_term::{Color, Style}; +use ansi_term::Color; use lazy_static::lazy_static; use serde::ser::SerializeMap; use serde::{Deserialize, Deserializer, Serialize, Serializer}; @@ -9,18 +9,52 @@ use std::collections::HashMap; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; use std::time::Instant; -use std::{fmt, fs, io, path, thread}; -use tree_sitter::{Language, PropertySheet}; -use tree_sitter_highlight::{highlight, highlight_html, Highlight, HighlightEvent, Properties}; +use std::{fs, io, path, str, thread, usize}; +use tree_sitter_highlight::{ + HighlightConfiguration, HighlightContext, HighlightEvent, Highlighter, HtmlRenderer, +}; + +pub const HTML_HEADER: &'static str = " + + + Tree-sitter Highlighting + + + +"; + +pub const HTML_FOOTER: &'static str = " + +"; lazy_static! { static ref CSS_STYLES_BY_COLOR_ID: Vec = serde_json::from_str(include_str!("../vendor/xterm-colors.json")).unwrap(); } +#[derive(Debug, Default)] +pub struct Style { + pub ansi: ansi_term::Style, + pub css: Option, +} + +#[derive(Debug)] pub struct Theme { - ansi_styles: Vec>, - css_styles: Vec>, + pub highlighter: Highlighter, + styles: Vec - - -"; - -pub const HTML_FOOTER: &'static str = " - -"; - pub fn html( loader: &Loader, theme: &Theme, source: &[u8], - language: Language, - property_sheet: &PropertySheet, + config: &HighlightConfiguration, + print_time: bool, ) -> Result<()> { use std::io::Write; + let stdout = io::stdout(); let mut stdout = stdout.lock(); - write!(&mut stdout, "\n")?; - + let time = Instant::now(); let cancellation_flag = cancel_on_stdin(); - let lines = highlight_html( + let mut context = HighlightContext::new(); + + let events = theme.highlighter.highlight( + &mut context, + config, source, - language, - property_sheet, - Some(cancellation_flag.as_ref()), - |s| language_for_injection_string(loader, s), - |highlight| { - if let Some(css_style) = theme.css_style(highlight) { - css_style - } else { - "" - } - }, - ) - .map_err(|e| e.to_string())?; - for (i, line) in lines.into_iter().enumerate() { + Some(&cancellation_flag), + |string| language_for_injection_string(loader, theme, string), + )?; + + let mut renderer = HtmlRenderer::new(); + renderer.render(events, source, &move |highlight| { + if let Some(css_style) = &theme.styles[highlight.0].css { + css_style.as_bytes() + } else { + "".as_bytes() + } + })?; + + for (i, line) in renderer.lines().enumerate() { write!( &mut stdout, "\n", @@ -380,14 +360,21 @@ pub fn html( line )?; } + write!(&mut stdout, "
{}{}
\n")?; + + if print_time { + eprintln!("Time: {}ms", time.elapsed().as_millis()); + } + Ok(()) } fn language_for_injection_string<'a>( loader: &'a Loader, + theme: &Theme, string: &str, -) -> Option<(Language, &'a PropertySheet)> { +) -> Option<&'a HighlightConfiguration> { match loader.language_configuration_for_injection_string(string) { Err(e) => { eprintln!( @@ -399,7 +386,7 @@ fn language_for_injection_string<'a>( } Ok(None) => None, Ok(Some((language, configuration))) => { - match configuration.highlight_property_sheet(language) { + match configuration.highlight_config(&theme.highlighter, language) { Err(e) => { eprintln!( "Failed to load property sheet for injection string '{}': {}", @@ -409,7 +396,7 @@ fn language_for_injection_string<'a>( None } Ok(None) => None, - Ok(Some(sheet)) => Some((language, sheet)), + Ok(Some(config)) => Some(config), } } } diff --git a/cli/src/loader.rs b/cli/src/loader.rs index 237718bb..328b8063 100644 --- a/cli/src/loader.rs +++ b/cli/src/loader.rs @@ -9,8 +9,8 @@ use std::path::{Path, PathBuf}; use std::process::Command; use std::time::SystemTime; use std::{fs, mem}; -use tree_sitter::{Language, PropertySheet}; -use tree_sitter_highlight::{load_property_sheet, Properties}; +use tree_sitter::Language; +use tree_sitter_highlight::{HighlightConfiguration, Highlighter}; #[cfg(unix)] const DYLIB_EXTENSION: &'static str = "so"; @@ -27,9 +27,9 @@ pub struct LanguageConfiguration { pub _first_line_regex: Option, pub injection_regex: Option, pub file_types: Vec, - pub highlight_property_sheet_path: Option, + pub root_path: PathBuf, language_id: usize, - highlight_property_sheet: OnceCell>>, + highlight_config: OnceCell>, } pub struct Loader { @@ -134,7 +134,6 @@ impl Loader { if configuration_ids.len() == 1 { configuration = &self.language_configurations[configuration_ids[0]]; } - // If multiple language configurations match, then determine which // one to use by applying the configurations' content regexes. else { @@ -151,7 +150,6 @@ impl Loader { if let Some(mat) = content_regex.find(&file_contents) { score = (mat.end() - mat.start()) as isize; } - // If the content regex does not match, then *penalize* this // language configuration, so that language configurations // without content regexes are preferred over those with @@ -394,6 +392,7 @@ impl Loader { }); let configuration = LanguageConfiguration { + root_path: parser_path.to_path_buf(), scope: config_json.scope, language_id, file_types: config_json.file_types.unwrap_or(Vec::new()), @@ -406,10 +405,7 @@ impl Loader { injection_regex: config_json .injection_regex .and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()), - highlight_property_sheet_path: config_json - .highlights - .map(|h| parser_path.join(h)), - highlight_property_sheet: OnceCell::new(), + highlight_config: OnceCell::new(), }; for file_type in &configuration.file_types { @@ -428,14 +424,14 @@ impl Loader { && parser_path.join("src").join("grammar.json").exists() { self.language_configurations.push(LanguageConfiguration { + root_path: parser_path.to_owned(), language_id: self.languages_by_id.len(), scope: None, content_regex: None, injection_regex: None, file_types: Vec::new(), _first_line_regex: None, - highlight_property_sheet_path: None, - highlight_property_sheet: OnceCell::new(), + highlight_config: OnceCell::new(), }); self.languages_by_id .push((parser_path.to_owned(), OnceCell::new())); @@ -446,30 +442,41 @@ impl Loader { } impl LanguageConfiguration { - pub fn highlight_property_sheet( + pub fn highlight_config( &self, + highlighter: &Highlighter, language: Language, - ) -> Result>> { - self.highlight_property_sheet + ) -> Result> { + self.highlight_config .get_or_try_init(|| { - if let Some(path) = &self.highlight_property_sheet_path { - let sheet_json = fs::read_to_string(path).map_err(Error::wrap(|| { - format!( - "Failed to read property sheet {:?}", - path.file_name().unwrap() - ) - }))?; - let sheet = - load_property_sheet(language, &sheet_json).map_err(Error::wrap(|| { - format!( - "Failed to parse property sheet {:?}", - path.file_name().unwrap() - ) - }))?; - Ok(Some(sheet)) - } else { - Ok(None) + let queries_path = self.root_path.join("queries"); + + let highlights_path = queries_path.join("highlights.scm"); + let injections_path = queries_path.join("injections.scm"); + let locals_path = queries_path.join("locals.scm"); + + if !highlights_path.exists() { + return Ok(None); } + + let highlights_query = fs::read_to_string(highlights_path)?; + let injections_query = if injections_path.exists() { + fs::read_to_string(injections_path)? + } else { + String::new() + }; + let locals_query = if locals_path.exists() { + fs::read_to_string(locals_path)? + } else { + String::new() + }; + + Ok(Some(highlighter.load_configuration( + language, + &highlights_query, + &injections_query, + &locals_query, + )?)) }) .map(Option::as_ref) } diff --git a/cli/src/main.rs b/cli/src/main.rs index 8de7ed67..25ffe5f7 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -110,7 +110,8 @@ fn run() -> error::Result<()> { ) .arg(Arg::with_name("scope").long("scope").takes_value(true)) .arg(Arg::with_name("html").long("html").short("h")) - .arg(Arg::with_name("time").long("time").short("t")), + .arg(Arg::with_name("time").long("time").short("t")) + .arg(Arg::with_name("q").short("q")), ) .subcommand( SubCommand::with_name("build-wasm") @@ -260,15 +261,18 @@ fn run() -> error::Result<()> { }, }; - if let Some(sheet) = language_config.highlight_property_sheet(language)? { - let source = fs::read(path)?; + let source = fs::read(path)?; + + if let Some(highlight_config) = + language_config.highlight_config(&config.theme.highlighter, language)? + { if html_mode { - highlight::html(&loader, &config.theme, &source, language, sheet)?; + highlight::html(&loader, &config.theme, &source, highlight_config, time)?; } else { - highlight::ansi(&loader, &config.theme, &source, language, sheet, time)?; + highlight::ansi(&loader, &config.theme, &source, highlight_config, time)?; } } else { - return Error::err(format!("No syntax highlighting property sheet specified")); + return Error::err(format!("No syntax highlighting query found")); } } } else if let Some(matches) = matches.subcommand_matches("build-wasm") { @@ -280,10 +284,9 @@ fn run() -> error::Result<()> { loader.find_all_languages(&config.parser_directories)?; for (configuration, language_path) in loader.get_all_language_configurations() { println!( - "scope: {}\nparser: {:?}\nproperties: {:?}\nfile_types: {:?}\ncontent_regex: {:?}\ninjection_regex: {:?}\n", + "scope: {}\nparser: {:?}\nfile_types: {:?}\ncontent_regex: {:?}\ninjection_regex: {:?}\n", configuration.scope.as_ref().unwrap_or(&String::new()), language_path, - configuration.highlight_property_sheet_path, configuration.file_types, configuration.content_regex, configuration.injection_regex, diff --git a/cli/src/tests/helpers/fixtures.rs b/cli/src/tests/helpers/fixtures.rs index 4389797e..af1df2bf 100644 --- a/cli/src/tests/helpers/fixtures.rs +++ b/cli/src/tests/helpers/fixtures.rs @@ -2,8 +2,8 @@ use crate::loader::Loader; use lazy_static::lazy_static; use std::fs; use std::path::{Path, PathBuf}; -use tree_sitter::{Language, PropertySheet}; -use tree_sitter_highlight::{load_property_sheet, Properties}; +use tree_sitter::Language; +use tree_sitter_highlight::{HighlightConfiguration, Highlighter}; include!("./dirs.rs"); @@ -21,18 +21,42 @@ pub fn get_language(name: &str) -> Language { .unwrap() } -pub fn get_property_sheet_json(language_name: &str, sheet_name: &str) -> String { - let path = GRAMMARS_DIR - .join(language_name) - .join("src") - .join(sheet_name); - fs::read_to_string(path).unwrap() +pub fn get_highlight_query_sources(language_name: &str) -> (String, String, String) { + let queries_path = GRAMMARS_DIR.join(language_name).join("queries"); + let highlights_path = queries_path.join("highlights.scm"); + let injections_path = queries_path.join("injections.scm"); + let locals_path = queries_path.join("locals.scm"); + + let highlights_query = fs::read_to_string(highlights_path).unwrap(); + let injections_query = if injections_path.exists() { + fs::read_to_string(injections_path).unwrap() + } else { + String::new() + }; + let locals_query = if locals_path.exists() { + fs::read_to_string(locals_path).unwrap() + } else { + String::new() + }; + + (highlights_query, injections_query, locals_query) } -pub fn get_property_sheet(language_name: &str, sheet_name: &str) -> PropertySheet { - let json = get_property_sheet_json(language_name, sheet_name); +pub fn get_highlight_config( + language_name: &str, + highlighter: &Highlighter, +) -> HighlightConfiguration { let language = get_language(language_name); - load_property_sheet(language, &json).unwrap() + let (highlights_query, injections_query, locals_query) = + get_highlight_query_sources(language_name); + highlighter + .load_configuration( + language, + &highlights_query, + &injections_query, + &locals_query, + ) + .unwrap() } pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> Language { diff --git a/cli/src/tests/highlight_test.rs b/cli/src/tests/highlight_test.rs index 34b545ff..a996d2d1 100644 --- a/cli/src/tests/highlight_test.rs +++ b/cli/src/tests/highlight_test.rs @@ -1,32 +1,85 @@ -use super::helpers::fixtures::{get_language, get_property_sheet, get_property_sheet_json}; +use super::helpers::fixtures::{get_highlight_config, get_highlight_query_sources, get_language}; use lazy_static::lazy_static; use std::ffi::CString; - use std::sync::atomic::{AtomicUsize, Ordering}; use std::{ptr, slice, str}; -use tree_sitter::{Language, PropertySheet}; use tree_sitter_highlight::{ - c, highlight, highlight_html, Error, Highlight, HighlightEvent, Properties, + c, Error, HighlightConfiguration, HighlightContext, HighlightEvent, Highlighter, HtmlRenderer, }; lazy_static! { - static ref JS_SHEET: PropertySheet = - get_property_sheet("javascript", "highlights.json"); - static ref HTML_SHEET: PropertySheet = - get_property_sheet("html", "highlights.json"); - static ref EJS_SHEET: PropertySheet = - get_property_sheet("embedded-template", "highlights-ejs.json"); - static ref RUST_SHEET: PropertySheet = - get_property_sheet("rust", "highlights.json"); - static ref SCOPE_CLASS_STRINGS: Vec = { - let mut result = Vec::new(); - let mut i = 0; - while let Some(highlight) = Highlight::from_usize(i) { - result.push(format!("class={:?}", highlight)); - i += 1; - } - result - }; + static ref JS_HIGHLIGHT: HighlightConfiguration = + get_highlight_config("javascript", &HIGHLIGHTER); + static ref HTML_HIGHLIGHT: HighlightConfiguration = get_highlight_config("html", &HIGHLIGHTER); + static ref EJS_HIGHLIGHT: HighlightConfiguration = + get_highlight_config("embedded-template", &HIGHLIGHTER); + static ref RUST_HIGHLIGHT: HighlightConfiguration = get_highlight_config("rust", &HIGHLIGHTER); + static ref HIGHLIGHTER: Highlighter = Highlighter::new( + [ + "attribute", + "constructor", + "function.builtin", + "function", + "embedded", + "keyword", + "operator", + "property.builtin", + "property", + "punctuation", + "punctuation.bracket", + "punctuation.delimiter", + "punctuation.special", + "string", + "tag", + "type.builtin", + "type", + "variable.builtin", + "variable.parameter", + "variable", + ] + .iter() + .cloned() + .map(String::from) + .collect() + ); + static ref HTML_ATTRS: Vec = HIGHLIGHTER + .highlight_names + .iter() + .map(|s| format!("class={}", s)) + .collect(); +} + +#[test] +fn test_highlighting_javascript() { + let source = "const a = function(b) { return b + c; }"; + assert_eq!( + &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(), + &[vec![ + ("const", vec!["keyword"]), + (" ", vec![]), + ("a", vec!["function"]), + (" ", vec![]), + ("=", vec!["operator"]), + (" ", vec![]), + ("function", vec!["keyword"]), + ("(", vec!["punctuation.bracket"]), + ("b", vec!["variable.parameter"]), + (")", vec!["punctuation.bracket"]), + (" ", vec![]), + ("{", vec!["punctuation.bracket"]), + (" ", vec![]), + ("return", vec!["keyword"]), + (" ", vec![]), + ("b", vec!["variable.parameter"]), + (" ", vec![]), + ("+", vec!["operator"]), + (" ", vec![]), + ("c", vec!["variable"]), + (";", vec!["punctuation.delimiter"]), + (" ", vec![]), + ("}", vec!["punctuation.bracket"]), + ]] + ); } #[test] @@ -34,57 +87,68 @@ fn test_highlighting_injected_html_in_javascript() { let source = vec!["const s = html `
${a < b}
`;"].join("\n"); assert_eq!( - &to_token_vector(&source, get_language("javascript"), &JS_SHEET).unwrap(), + &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(), &[vec![ - ("const", vec![Highlight::Keyword]), + ("const", vec!["keyword"]), (" ", vec![]), - ("s", vec![Highlight::Variable]), + ("s", vec!["variable"]), (" ", vec![]), - ("=", vec![Highlight::Operator]), + ("=", vec!["operator"]), (" ", vec![]), - ("html", vec![Highlight::Function]), + ("html", vec!["function"]), (" ", vec![]), - ("`<", vec![Highlight::String]), - ("div", vec![Highlight::String, Highlight::Tag]), - (">", vec![Highlight::String]), - ( - "${", - vec![ - Highlight::String, - Highlight::Embedded, - Highlight::PunctuationSpecial - ] - ), - ( - "a", - vec![Highlight::String, Highlight::Embedded, Highlight::Variable] - ), - (" ", vec![Highlight::String, Highlight::Embedded]), - ( - "<", - vec![Highlight::String, Highlight::Embedded, Highlight::Operator] - ), - (" ", vec![Highlight::String, Highlight::Embedded]), - ( - "b", - vec![Highlight::String, Highlight::Embedded, Highlight::Variable] - ), - ( - "}", - vec![ - Highlight::String, - Highlight::Embedded, - Highlight::PunctuationSpecial - ] - ), - ("`", vec![Highlight::String]), - (";", vec![Highlight::PunctuationDelimiter]), + ("`", vec!["string"]), + ("<", vec!["string", "punctuation.bracket"]), + ("div", vec!["string", "tag"]), + (">", vec!["string", "punctuation.bracket"]), + ("${", vec!["string", "embedded", "punctuation.special"]), + ("a", vec!["string", "embedded", "variable"]), + (" ", vec!["string", "embedded"]), + ("<", vec!["string", "embedded", "operator"]), + (" ", vec!["string", "embedded"]), + ("b", vec!["string", "embedded", "variable"]), + ("}", vec!["string", "embedded", "punctuation.special"]), + ("", vec!["string", "punctuation.bracket"]), + ("`", vec!["string"]), + (";", vec!["punctuation.delimiter"]), ]] ); } +#[test] +fn test_highlighting_injected_javascript_in_html_mini() { + let source = ""; + + eprintln!("HTML {:?}", HTML_HIGHLIGHT.language); + eprintln!("JavaScript {:?}", JS_HIGHLIGHT.language); + + assert_eq!( + &to_token_vector(source, &HTML_HIGHLIGHT).unwrap(), + &[vec![ + ("<", vec!["punctuation.bracket"]), + ("script", vec!["tag"]), + (">", vec!["punctuation.bracket"]), + ("const", vec!["keyword"]), + (" ", vec![]), + ("x", vec!["variable"]), + (" ", vec![]), + ("=", vec!["operator"]), + (" ", vec![]), + ("new", vec!["keyword"]), + (" ", vec![]), + ("Thing", vec!["constructor"]), + ("(", vec!["punctuation.bracket"]), + (")", vec!["punctuation.bracket"]), + (";", vec!["punctuation.delimiter"]), + ("", vec!["punctuation.bracket"]), + ],] + ); +} + #[test] fn test_highlighting_injected_javascript_in_html() { let source = vec![ @@ -97,38 +161,44 @@ fn test_highlighting_injected_javascript_in_html() { .join("\n"); assert_eq!( - &to_token_vector(&source, get_language("html"), &HTML_SHEET).unwrap(), + &to_token_vector(&source, &HTML_HIGHLIGHT).unwrap(), &[ - vec![("<", vec![]), ("body", vec![Highlight::Tag]), (">", vec![]),], vec![ - (" <", vec![]), - ("script", vec![Highlight::Tag]), - (">", vec![]), + ("<", vec!["punctuation.bracket"]), + ("body", vec!["tag"]), + (">", vec!["punctuation.bracket"]), + ], + vec![ + (" ", vec![]), + ("<", vec!["punctuation.bracket"]), + ("script", vec!["tag"]), + (">", vec!["punctuation.bracket"]), ], vec![ (" ", vec![]), - ("const", vec![Highlight::Keyword]), + ("const", vec!["keyword"]), (" ", vec![]), - ("x", vec![Highlight::Variable]), + ("x", vec!["variable"]), (" ", vec![]), - ("=", vec![Highlight::Operator]), + ("=", vec!["operator"]), (" ", vec![]), - ("new", vec![Highlight::Keyword]), + ("new", vec!["keyword"]), (" ", vec![]), - ("Thing", vec![Highlight::Constructor]), - ("(", vec![Highlight::PunctuationBracket]), - (")", vec![Highlight::PunctuationBracket]), - (";", vec![Highlight::PunctuationDelimiter]), + ("Thing", vec!["constructor"]), + ("(", vec!["punctuation.bracket"]), + (")", vec!["punctuation.bracket"]), + (";", vec!["punctuation.delimiter"]), ], vec![ - (" ", vec![]), + (" ", vec![]), + ("", vec!["punctuation.bracket"]), ], vec![ - ("", vec![]), + ("", vec!["punctuation.bracket"]), ], ] ); @@ -147,7 +217,7 @@ fn test_highlighting_multiline_nodes_to_html() { .join("\n"); assert_eq!( - &to_html(&source, get_language("javascript"), &JS_SHEET,).unwrap(), + &to_html(&source, &JS_HIGHLIGHT).unwrap(), &[ "const SOMETHING = `\n".to_string(), " one ${\n".to_string(), @@ -169,51 +239,51 @@ fn test_highlighting_with_local_variable_tracking() { .join("\n"); assert_eq!( - &to_token_vector(&source, get_language("javascript"), &JS_SHEET).unwrap(), + &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(), &[ vec![ - ("module", vec![Highlight::VariableBuiltin]), - (".", vec![Highlight::PunctuationDelimiter]), - ("exports", vec![Highlight::Property]), + ("module", vec!["variable.builtin"]), + (".", vec!["punctuation.delimiter"]), + ("exports", vec!["function"]), (" ", vec![]), - ("=", vec![Highlight::Operator]), + ("=", vec!["operator"]), (" ", vec![]), - ("function", vec![Highlight::Keyword]), + ("function", vec!["keyword"]), (" ", vec![]), - ("a", vec![Highlight::Function]), - ("(", vec![Highlight::PunctuationBracket]), - ("b", vec![Highlight::VariableParameter]), - (")", vec![Highlight::PunctuationBracket]), + ("a", vec!["function"]), + ("(", vec!["punctuation.bracket"]), + ("b", vec!["variable.parameter"]), + (")", vec!["punctuation.bracket"]), (" ", vec![]), - ("{", vec![Highlight::PunctuationBracket]) + ("{", vec!["punctuation.bracket"]) ], vec![ (" ", vec![]), - ("const", vec![Highlight::Keyword]), + ("const", vec!["keyword"]), (" ", vec![]), - ("module", vec![Highlight::Variable]), + ("module", vec!["variable"]), (" ", vec![]), - ("=", vec![Highlight::Operator]), + ("=", vec!["operator"]), (" ", vec![]), - ("c", vec![Highlight::Variable]), - (";", vec![Highlight::PunctuationDelimiter]) + ("c", vec!["variable"]), + (";", vec!["punctuation.delimiter"]) ], vec![ (" ", vec![]), - ("console", vec![Highlight::VariableBuiltin]), - (".", vec![Highlight::PunctuationDelimiter]), - ("log", vec![Highlight::Function]), - ("(", vec![Highlight::PunctuationBracket]), + ("console", vec!["variable.builtin"]), + (".", vec!["punctuation.delimiter"]), + ("log", vec!["function"]), + ("(", vec!["punctuation.bracket"]), // Not a builtin, because `module` was defined as a variable above. - ("module", vec![Highlight::Variable]), - (",", vec![Highlight::PunctuationDelimiter]), + ("module", vec!["variable"]), + (",", vec!["punctuation.delimiter"]), (" ", vec![]), // A parameter, because `b` was defined as a parameter above. - ("b", vec![Highlight::VariableParameter]), - (")", vec![Highlight::PunctuationBracket]), - (";", vec![Highlight::PunctuationDelimiter]), + ("b", vec!["variable.parameter"]), + (")", vec!["punctuation.bracket"]), + (";", vec!["punctuation.delimiter"]), ], - vec![("}", vec![Highlight::PunctuationBracket])] + vec![("}", vec!["punctuation.bracket"])] ], ); } @@ -234,17 +304,17 @@ fn test_highlighting_empty_lines() { .join("\n"); assert_eq!( - &to_html(&source, get_language("javascript"), &JS_SHEET,).unwrap(), + &to_html(&source, &JS_HIGHLIGHT,).unwrap(), &[ - "class A {\n".to_string(), + "class A {\n".to_string(), "\n".to_string(), - " b(c) {\n".to_string(), + " b(c) {\n".to_string(), "\n".to_string(), - " d(e)\n".to_string(), + " d(e)\n".to_string(), "\n".to_string(), - " }\n".to_string(), + " }\n".to_string(), "\n".to_string(), - "}\n".to_string(), + "}\n".to_string(), ] ); } @@ -254,20 +324,20 @@ fn test_highlighting_ejs() { let source = vec!["
<% foo() %>
"].join("\n"); assert_eq!( - &to_token_vector(&source, get_language("embedded-template"), &EJS_SHEET).unwrap(), + &to_token_vector(&source, &EJS_HIGHLIGHT).unwrap(), &[[ ("<", vec![]), - ("div", vec![Highlight::Tag]), + ("div", vec!["tag"]), (">", vec![]), - ("<%", vec![Highlight::Keyword]), + ("<%", vec!["keyword"]), (" ", vec![]), - ("foo", vec![Highlight::Function]), - ("(", vec![Highlight::PunctuationBracket]), - (")", vec![Highlight::PunctuationBracket]), + ("foo", vec!["function"]), + ("(", vec!["punctuation.bracket"]), + (")", vec!["punctuation.bracket"]), (" ", vec![]), - ("%>", vec![Highlight::Keyword]), + ("%>", vec!["keyword"]), ("", vec![]) ]], ); @@ -278,33 +348,33 @@ fn test_highlighting_with_content_children_included() { let source = vec!["assert!(", " a.b.c() < D::e::()", ");"].join("\n"); assert_eq!( - &to_token_vector(&source, get_language("rust"), &RUST_SHEET).unwrap(), + &to_token_vector(&source, &RUST_HIGHLIGHT).unwrap(), &[ vec![ - ("assert", vec![Highlight::Function]), - ("!", vec![Highlight::Function]), - ("(", vec![Highlight::PunctuationBracket]), + ("assert", vec!["function"]), + ("!", vec!["function"]), + ("(", vec!["punctuation.bracket"]), ], vec![ (" a", vec![]), - (".", vec![Highlight::PunctuationDelimiter]), - ("b", vec![Highlight::Property]), - (".", vec![Highlight::PunctuationDelimiter]), - ("c", vec![Highlight::Function]), - ("(", vec![Highlight::PunctuationBracket]), - (")", vec![Highlight::PunctuationBracket]), + (".", vec!["punctuation.delimiter"]), + ("b", vec!["property"]), + (".", vec!["punctuation.delimiter"]), + ("c", vec!["function"]), + ("(", vec!["punctuation.bracket"]), + (")", vec!["punctuation.bracket"]), (" < ", vec![]), - ("D", vec![Highlight::Type]), - ("::", vec![Highlight::PunctuationDelimiter]), - ("e", vec![Highlight::Function]), - ("::", vec![Highlight::PunctuationDelimiter]), - ("<", vec![Highlight::PunctuationBracket]), - ("F", vec![Highlight::Type]), - (">", vec![Highlight::PunctuationBracket]), - ("(", vec![Highlight::PunctuationBracket]), - (")", vec![Highlight::PunctuationBracket]), + ("D", vec!["type"]), + ("::", vec!["punctuation.delimiter"]), + ("e", vec!["function"]), + ("::", vec!["punctuation.delimiter"]), + ("<", vec!["punctuation.bracket"]), + ("F", vec!["type"]), + (">", vec!["punctuation.bracket"]), + ("(", vec!["punctuation.bracket"]), + (")", vec!["punctuation.bracket"]), ], - vec![(")", vec![Highlight::PunctuationBracket]), (";", vec![]),] + vec![(")", vec!["punctuation.bracket"]), (";", vec![]),] ], ); } @@ -327,18 +397,20 @@ fn test_highlighting_cancellation() { // Constructing the highlighter, which eagerly parses the outer document, // should not fail. - let highlighter = highlight( - source.as_bytes(), - get_language("html"), - &HTML_SHEET, - Some(&cancellation_flag), - injection_callback, - ) - .unwrap(); + let mut context = HighlightContext::new(); + let events = HIGHLIGHTER + .highlight( + &mut context, + &HTML_HIGHLIGHT, + source.as_bytes(), + Some(&cancellation_flag), + injection_callback, + ) + .unwrap(); // Iterating the scopes should not panic. It should return an error // once the cancellation is detected. - for event in highlighter { + for event in events { if let Err(e) = event { assert_eq!(e, Error::Cancelled); return; @@ -349,49 +421,68 @@ fn test_highlighting_cancellation() { #[test] fn test_highlighting_via_c_api() { - let js_lang = get_language("javascript"); - let html_lang = get_language("html"); - let js_sheet = get_property_sheet_json("javascript", "highlights.json"); - let js_sheet = c_string(&js_sheet); - let html_sheet = get_property_sheet_json("html", "highlights.json"); - let html_sheet = c_string(&html_sheet); + let highlights = vec![ + "class=tag\0", + "class=function\0", + "class=string\0", + "class=keyword\0", + ]; + let highlight_names = highlights + .iter() + .map(|h| h["class=".len()..].as_ptr() as *const i8) + .collect::>(); + let highlight_attrs = highlights + .iter() + .map(|h| h.as_bytes().as_ptr() as *const i8) + .collect::>(); + let highlighter = c::ts_highlighter_new( + &highlight_names[0] as *const *const i8, + &highlight_attrs[0] as *const *const i8, + highlights.len() as u32, + ); - let class_tag = c_string("class=tag"); - let class_function = c_string("class=function"); - let class_string = c_string("class=string"); - let class_keyword = c_string("class=keyword"); - - let js_scope_name = c_string("source.js"); - let html_scope_name = c_string("text.html.basic"); - let injection_regex = c_string("^(javascript|js)$"); let source_code = c_string(""); - let attribute_strings = &mut [ptr::null(); Highlight::Unknown as usize + 1]; - attribute_strings[Highlight::Tag as usize] = class_tag.as_ptr(); - attribute_strings[Highlight::String as usize] = class_string.as_ptr(); - attribute_strings[Highlight::Keyword as usize] = class_keyword.as_ptr(); - attribute_strings[Highlight::Function as usize] = class_function.as_ptr(); + let js_scope = c_string("source.js"); + let js_injection_regex = c_string("^javascript"); + let language = get_language("javascript"); + let (highlights_query, injections_query, locals_query) = + get_highlight_query_sources("javascript"); + c::ts_highlighter_add_language( + highlighter, + js_scope.as_ptr(), + js_injection_regex.as_ptr(), + language, + highlights_query.as_ptr() as *const i8, + injections_query.as_ptr() as *const i8, + locals_query.as_ptr() as *const i8, + highlights_query.len() as u32, + injections_query.len() as u32, + locals_query.len() as u32, + ); + + let html_scope = c_string("text.html.basic"); + let html_injection_regex = c_string("^html"); + let language = get_language("html"); + let (highlights_query, injections_query, locals_query) = get_highlight_query_sources("html"); + c::ts_highlighter_add_language( + highlighter, + html_scope.as_ptr(), + html_injection_regex.as_ptr(), + language, + highlights_query.as_ptr() as *const i8, + injections_query.as_ptr() as *const i8, + locals_query.as_ptr() as *const i8, + highlights_query.len() as u32, + injections_query.len() as u32, + locals_query.len() as u32, + ); - let highlighter = c::ts_highlighter_new(attribute_strings.as_ptr()); let buffer = c::ts_highlight_buffer_new(); - c::ts_highlighter_add_language( - highlighter, - html_scope_name.as_ptr(), - html_lang, - html_sheet.as_ptr(), - ptr::null_mut(), - ); - c::ts_highlighter_add_language( - highlighter, - js_scope_name.as_ptr(), - js_lang, - js_sheet.as_ptr(), - injection_regex.as_ptr(), - ); c::ts_highlighter_highlight( highlighter, - html_scope_name.as_ptr(), + html_scope.as_ptr(), source_code.as_ptr(), source_code.as_bytes().len() as u32, buffer, @@ -452,50 +543,57 @@ fn c_string(s: &str) -> CString { CString::new(s.as_bytes().to_vec()).unwrap() } -fn test_language_for_injection_string<'a>( - string: &str, -) -> Option<(Language, &'a PropertySheet)> { +fn test_language_for_injection_string<'a>(string: &str) -> Option<&'a HighlightConfiguration> { match string { - "javascript" => Some((get_language("javascript"), &JS_SHEET)), - "html" => Some((get_language("html"), &HTML_SHEET)), - "rust" => Some((get_language("rust"), &RUST_SHEET)), + "javascript" => Some(&JS_HIGHLIGHT), + "html" => Some(&HTML_HIGHLIGHT), + "rust" => Some(&RUST_HIGHLIGHT), _ => None, } } fn to_html<'a>( src: &'a str, - language: Language, - property_sheet: &'a PropertySheet, + language_config: &'a HighlightConfiguration, ) -> Result, Error> { - highlight_html( - src.as_bytes(), - language, - property_sheet, + let src = src.as_bytes(); + let mut renderer = HtmlRenderer::new(); + let mut context = HighlightContext::new(); + let events = HIGHLIGHTER.highlight( + &mut context, + language_config, + src, None, &test_language_for_injection_string, - &|highlight| SCOPE_CLASS_STRINGS[highlight as usize].as_str(), - ) + )?; + + renderer + .render(events, src, &|highlight| HTML_ATTRS[highlight.0].as_bytes()) + .unwrap(); + Ok(renderer.lines().map(|s| s.to_string()).collect()) } fn to_token_vector<'a>( src: &'a str, - language: Language, - property_sheet: &'a PropertySheet, -) -> Result)>>, Error> { + language_config: &'a HighlightConfiguration, +) -> Result)>>, Error> { let src = src.as_bytes(); + let mut context = HighlightContext::new(); let mut lines = Vec::new(); let mut highlights = Vec::new(); let mut line = Vec::new(); - for event in highlight( + let events = HIGHLIGHTER.highlight( + &mut context, + language_config, src, - language, - property_sheet, None, &test_language_for_injection_string, - )? { + )?; + for event in events { match event? { - HighlightEvent::HighlightStart(s) => highlights.push(s), + HighlightEvent::HighlightStart(s) => { + highlights.push(HIGHLIGHTER.highlight_names[s.0].as_str()) + } HighlightEvent::HighlightEnd => { highlights.pop(); } diff --git a/highlight/include/tree_sitter/highlight.h b/highlight/include/tree_sitter/highlight.h index 8e879b5e..fb44f7a0 100644 --- a/highlight/include/tree_sitter/highlight.h +++ b/highlight/include/tree_sitter/highlight.h @@ -14,47 +14,15 @@ typedef enum { TSHighlightInvalidLanguage, } TSHighlightError; -// The list of scopes which can be styled for syntax highlighting. -// When constructing a `TSHighlighter`, you need to construct an -// `attribute_strings` array whose elements correspond to these values. -enum TSHighlightValue { - TSHighlightValueAttribute, - TSHighlightValueComment, - TSHighlightValueConstant, - TSHighlightValueConstantBuiltin, - TSHighlightValueConstructor, - TSHighlightValueConstructorBuiltin, - TSHighlightValueEmbedded, - TSHighlightValueEscape, - TSHighlightValueFunction, - TSHighlightValueFunctionBuiltin, - TSHighlightValueKeyword, - TSHighlightValueNumber, - TSHighlightValueOperator, - TSHighlightValueProperty, - TSHighlightValuePropertyBuiltin, - TSHighlightValuePunctuation, - TSHighlightValuePunctuationBracket, - TSHighlightValuePunctuationDelimiter, - TSHighlightValuePunctuationSpecial, - TSHighlightValueString, - TSHighlightValueStringSpecial, - TSHighlightValueTag, - TSHighlightValueType, - TSHighlightValueTypeBuiltin, - TSHighlightValueVariable, - TSHighlightValueVariableBuiltin, - TSHighlightValueVariableParameter, - TSHighlightValueUnknown, -}; - typedef struct TSHighlighter TSHighlighter; typedef struct TSHighlightBuffer TSHighlightBuffer; // Construct a `TSHighlighter` by providing a list of strings containing // the HTML attributes that should be applied for each highlight value. TSHighlighter *ts_highlighter_new( - const char **attribute_strings + const char **highlight_names, + const char **attribute_strings, + uint32_t highlight_count ); // Delete a syntax highlighter. @@ -70,9 +38,14 @@ void ts_highlighter_delete(TSHighlighter *); int ts_highlighter_add_language( TSHighlighter *self, const char *scope_name, + const char *injection_regex, const TSLanguage *language, - const char *property_sheet_json, - const char *injection_regex + const char *highlight_query, + const char *injection_query, + const char *locals_query, + uint32_t highlight_query_len, + uint32_t injection_query_len, + uint32_t locals_query_len ); // Compute syntax highlighting for a given document. You must first diff --git a/highlight/src/c_lib.rs b/highlight/src/c_lib.rs index 063ab990..57eed04c 100644 --- a/highlight/src/c_lib.rs +++ b/highlight/src/c_lib.rs @@ -1,25 +1,23 @@ -use super::{load_property_sheet, Error, Highlight, Highlighter, HtmlRenderer, Properties}; +use super::{Error, HighlightConfiguration, HighlightContext, Highlighter, HtmlRenderer}; use regex::Regex; use std::collections::HashMap; use std::ffi::CStr; use std::os::raw::c_char; use std::process::abort; use std::sync::atomic::AtomicUsize; -use std::{fmt, slice}; -use tree_sitter::{Language, PropertySheet}; - -struct LanguageConfiguration { - language: Language, - property_sheet: PropertySheet, - injection_regex: Option, -} +use std::{fmt, slice, str}; +use tree_sitter::Language; pub struct TSHighlighter { - languages: HashMap, + languages: HashMap, HighlightConfiguration)>, attribute_strings: Vec<&'static [u8]>, + highlighter: Highlighter, } -pub struct TSHighlightBuffer(HtmlRenderer); +pub struct TSHighlightBuffer { + context: HighlightContext, + renderer: HtmlRenderer, +} #[repr(C)] pub enum ErrorCode { @@ -27,33 +25,113 @@ pub enum ErrorCode { UnknownScope, Timeout, InvalidLanguage, + InvalidUtf8, + InvalidRegex, + InvalidQuery, } #[no_mangle] pub extern "C" fn ts_highlighter_new( + highlight_names: *const *const c_char, attribute_strings: *const *const c_char, + highlight_count: u32, ) -> *mut TSHighlighter { + let highlight_names = + unsafe { slice::from_raw_parts(highlight_names, highlight_count as usize) }; let attribute_strings = - unsafe { slice::from_raw_parts(attribute_strings, Highlight::Unknown as usize + 1) }; + unsafe { slice::from_raw_parts(attribute_strings, highlight_count as usize) }; + let highlight_names = highlight_names + .into_iter() + .map(|s| unsafe { CStr::from_ptr(*s).to_string_lossy().to_string() }) + .collect(); let attribute_strings = attribute_strings .into_iter() - .map(|s| { - if s.is_null() { - &[] - } else { - unsafe { CStr::from_ptr(*s).to_bytes() } - } - }) + .map(|s| unsafe { CStr::from_ptr(*s).to_bytes() }) .collect(); + let highlighter = Highlighter::new(highlight_names); Box::into_raw(Box::new(TSHighlighter { languages: HashMap::new(), attribute_strings, + highlighter, })) } +#[no_mangle] +pub extern "C" fn ts_highlighter_add_language( + this: *mut TSHighlighter, + scope_name: *const c_char, + injection_regex: *const c_char, + language: Language, + highlight_query: *const c_char, + injection_query: *const c_char, + locals_query: *const c_char, + highlight_query_len: u32, + injection_query_len: u32, + locals_query_len: u32, +) -> ErrorCode { + let f = move || { + let this = unwrap_mut_ptr(this); + let scope_name = unsafe { CStr::from_ptr(scope_name) }; + let scope_name = scope_name + .to_str() + .or(Err(ErrorCode::InvalidUtf8))? + .to_string(); + let injection_regex = if injection_regex.is_null() { + None + } else { + let pattern = unsafe { CStr::from_ptr(injection_regex) }; + let pattern = pattern.to_str().or(Err(ErrorCode::InvalidUtf8))?; + Some(Regex::new(pattern).or(Err(ErrorCode::InvalidRegex))?) + }; + + let highlight_query = unsafe { + slice::from_raw_parts(highlight_query as *const u8, highlight_query_len as usize) + }; + let highlight_query = str::from_utf8(highlight_query).or(Err(ErrorCode::InvalidUtf8))?; + + let injection_query = if injection_query_len > 0 { + let query = unsafe { + slice::from_raw_parts(injection_query as *const u8, injection_query_len as usize) + }; + str::from_utf8(query).or(Err(ErrorCode::InvalidUtf8))? + } else { + "" + }; + + let locals_query = if locals_query_len > 0 { + let query = unsafe { + slice::from_raw_parts(locals_query as *const u8, locals_query_len as usize) + }; + str::from_utf8(query).or(Err(ErrorCode::InvalidUtf8))? + } else { + "" + }; + + this.languages.insert( + scope_name, + ( + injection_regex, + this.highlighter + .load_configuration(language, highlight_query, injection_query, locals_query) + .or(Err(ErrorCode::InvalidQuery))?, + ), + ); + + Ok(()) + }; + + match f() { + Ok(()) => ErrorCode::Ok, + Err(e) => e, + } +} + #[no_mangle] pub extern "C" fn ts_highlight_buffer_new() -> *mut TSHighlightBuffer { - Box::into_raw(Box::new(TSHighlightBuffer(HtmlRenderer::new()))) + Box::into_raw(Box::new(TSHighlightBuffer { + context: HighlightContext::new(), + renderer: HtmlRenderer::new(), + })) } #[no_mangle] @@ -69,59 +147,25 @@ pub extern "C" fn ts_highlight_buffer_delete(this: *mut TSHighlightBuffer) { #[no_mangle] pub extern "C" fn ts_highlight_buffer_content(this: *const TSHighlightBuffer) -> *const u8 { let this = unwrap_ptr(this); - this.0.html.as_slice().as_ptr() + this.renderer.html.as_slice().as_ptr() } #[no_mangle] pub extern "C" fn ts_highlight_buffer_line_offsets(this: *const TSHighlightBuffer) -> *const u32 { let this = unwrap_ptr(this); - this.0.line_offsets.as_slice().as_ptr() + this.renderer.line_offsets.as_slice().as_ptr() } #[no_mangle] pub extern "C" fn ts_highlight_buffer_len(this: *const TSHighlightBuffer) -> u32 { let this = unwrap_ptr(this); - this.0.html.len() as u32 + this.renderer.html.len() as u32 } #[no_mangle] pub extern "C" fn ts_highlight_buffer_line_count(this: *const TSHighlightBuffer) -> u32 { let this = unwrap_ptr(this); - this.0.line_offsets.len() as u32 -} - -#[no_mangle] -pub extern "C" fn ts_highlighter_add_language( - this: *mut TSHighlighter, - scope_name: *const c_char, - language: Language, - property_sheet_json: *const c_char, - injection_regex: *const c_char, -) -> ErrorCode { - let this = unwrap_mut_ptr(this); - let scope_name = unsafe { CStr::from_ptr(scope_name) }; - let scope_name = unwrap(scope_name.to_str()).to_string(); - let property_sheet_json = unsafe { CStr::from_ptr(property_sheet_json) }; - let property_sheet_json = unwrap(property_sheet_json.to_str()); - - let property_sheet = unwrap(load_property_sheet(language, property_sheet_json)); - let injection_regex = if injection_regex.is_null() { - None - } else { - let pattern = unsafe { CStr::from_ptr(injection_regex) }; - Some(unwrap(Regex::new(unwrap(pattern.to_str())))) - }; - - this.languages.insert( - scope_name, - LanguageConfiguration { - language, - property_sheet, - injection_regex, - }, - ); - - ErrorCode::Ok + this.renderer.line_offsets.len() as u32 } #[no_mangle] @@ -150,36 +194,36 @@ impl TSHighlighter { output: &mut TSHighlightBuffer, cancellation_flag: Option<&AtomicUsize>, ) -> ErrorCode { - let configuration = self.languages.get(scope_name); - if configuration.is_none() { + let entry = self.languages.get(scope_name); + if entry.is_none() { return ErrorCode::UnknownScope; } - let configuration = configuration.unwrap(); + let (_, configuration) = entry.unwrap(); let languages = &self.languages; - let highlighter = Highlighter::new( + let highlights = self.highlighter.highlight( + &mut output.context, + configuration, source_code, - configuration.language, - &configuration.property_sheet, - |injection_string| { - languages.values().find_map(|conf| { - conf.injection_regex.as_ref().and_then(|regex| { + cancellation_flag, + move |injection_string| { + languages.values().find_map(|(injection_regex, config)| { + injection_regex.as_ref().and_then(|regex| { if regex.is_match(injection_string) { - Some((conf.language, &conf.property_sheet)) + Some(config) } else { None } }) }) }, - cancellation_flag, ); - if let Ok(highlighter) = highlighter { - output.0.reset(); - let result = output.0.render(highlighter, source_code, &|s| { - self.attribute_strings[s as usize] - }); + if let Ok(highlights) = highlights { + output.renderer.reset(); + let result = output + .renderer + .render(highlights, source_code, &|s| self.attribute_strings[s.0]); match result { Err(Error::Cancelled) => { return ErrorCode::Timeout; diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs index 477a640d..a362dab0 100644 --- a/highlight/src/lib.rs +++ b/highlight/src/lib.rs @@ -1,16 +1,18 @@ pub mod c_lib; pub mod util; - pub use c_lib as c; -use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use serde_derive::*; -use std::mem::transmute; + use std::sync::atomic::{AtomicUsize, Ordering}; -use std::{cmp, fmt, str, usize}; -use tree_sitter::{Language, Node, Parser, Point, PropertySheet, Range, Tree, TreePropertyCursor}; +use std::{iter, mem, ops, str, usize}; +use tree_sitter::{ + Language, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError, Range, Tree, +}; const CANCELLATION_CHECK_INTERVAL: usize = 100; +#[derive(Copy, Clone, Debug)] +pub struct Highlight(pub usize); + #[derive(Debug, PartialEq, Eq)] pub enum Error { Cancelled, @@ -19,104 +21,10 @@ pub enum Error { } #[derive(Debug)] -enum TreeStep { - Child { - index: isize, - kinds: Option>, - }, - Children { - kinds: Option>, - }, - Next { - kinds: Option>, - }, -} - -#[derive(Debug)] -enum InjectionLanguage { - Literal(String), - TreePath(Vec), -} - -#[derive(Debug)] -struct Injection { - language: InjectionLanguage, - content: Vec, - includes_children: bool, -} - -#[derive(Debug)] -pub struct Properties { - highlight: Option, - highlight_nonlocal: Option, - injections: Vec, - local_scope: Option, - local_definition: bool, - local_reference: bool, -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[repr(u16)] -pub enum Highlight { - Attribute, - Comment, - Constant, - ConstantBuiltin, - Constructor, - ConstructorBuiltin, - Embedded, - Escape, - Function, - FunctionBuiltin, - Keyword, - Number, - Operator, - Property, - PropertyBuiltin, - Punctuation, - PunctuationBracket, - PunctuationDelimiter, - PunctuationSpecial, - String, - StringSpecial, - Tag, - Type, - TypeBuiltin, - Variable, - VariableBuiltin, - VariableParameter, - Unknown, -} - -#[derive(Debug)] -struct Scope<'a> { +struct LocalScope<'a> { inherits: bool, - local_defs: Vec<(&'a str, Highlight)>, -} - -struct Layer<'a> { - _tree: Tree, - cursor: TreePropertyCursor<'a, Properties>, - ranges: Vec, - at_node_end: bool, - depth: usize, - opaque: bool, - scope_stack: Vec>, - local_highlight: Option, -} - -struct Highlighter<'a, T> -where - T: Fn(&str) -> Option<(Language, &'a PropertySheet)>, -{ - injection_callback: T, - source: &'a [u8], - source_offset: usize, - parser: Parser, - layers: Vec>, - max_opaque_layer_depth: usize, - operation_count: usize, - cancellation_flag: Option<&'a AtomicUsize>, + range: ops::Range, + local_defs: Vec<(&'a str, Option)>, } #[derive(Copy, Clone, Debug)] @@ -126,439 +34,256 @@ pub enum HighlightEvent { HighlightEnd, } -#[derive(Debug, Deserialize)] -#[serde(untagged)] -enum TreePathArgJSON { - TreePath(TreePathJSON), - Number(isize), - String(String), +pub struct HighlightConfiguration { + pub language: Language, + pub query: Query, + locals_pattern_index: usize, + highlights_pattern_index: usize, + highlight_indices: Vec>, + non_local_variable_patterns: Vec, + injection_site_capture_index: Option, + injection_content_capture_index: Option, + injection_language_capture_index: Option, + local_scope_capture_index: Option, + local_def_capture_index: Option, + local_ref_capture_index: Option, } -#[derive(Debug, Deserialize)] -#[serde(tag = "name")] -enum TreePathJSON { - #[serde(rename = "this")] - This, - #[serde(rename = "child")] - Child { args: Vec }, - #[serde(rename = "next")] - Next { args: Vec }, - #[serde(rename = "children")] - Children { args: Vec }, +#[derive(Clone, Debug)] +pub struct Highlighter { + pub highlight_names: Vec, } -#[derive(Debug, Deserialize)] -#[serde(untagged)] -enum InjectionLanguageJSON { - List(Vec), - TreePath(TreePathJSON), - Literal(String), +pub struct HighlightContext { + parser: Parser, + cursors: Vec, } -#[derive(Debug, Deserialize)] -#[serde(untagged)] -enum InjectionContentJSON { - List(Vec), - TreePath(TreePathJSON), -} - -#[derive(Debug, Deserialize)] -#[serde(untagged)] -enum InjectionIncludesChildrenJSON { - List(Vec), - Single(bool), -} - -#[derive(Debug, Deserialize)] -struct PropertiesJSON { - highlight: Option, - #[serde(rename = "highlight-nonlocal")] - highlight_nonlocal: Option, - - #[serde(rename = "injection-language")] - injection_language: Option, - #[serde(rename = "injection-content")] - injection_content: Option, - #[serde(default, rename = "injection-includes-children")] - injection_includes_children: Option, - - #[serde(default, rename = "local-scope")] - local_scope: bool, - #[serde(default, rename = "local-scope-inherit")] - local_scope_inherit: bool, - #[serde(default, rename = "local-definition")] - local_definition: bool, - #[serde(default, rename = "local-reference")] - local_reference: bool, -} - -#[derive(Debug)] -pub enum PropertySheetError { - InvalidJSON(serde_json::Error), - InvalidRegex(regex::Error), - InvalidFormat(String), -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - Error::Cancelled => write!(f, "Cancelled"), - Error::InvalidLanguage => write!(f, "Invalid language"), - Error::Unknown => write!(f, "Unknown error"), - } - } -} - -impl fmt::Display for PropertySheetError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - PropertySheetError::InvalidJSON(e) => e.fmt(f), - PropertySheetError::InvalidRegex(e) => e.fmt(f), - PropertySheetError::InvalidFormat(e) => e.fmt(f), - } - } -} - -impl<'a> fmt::Debug for Layer<'a> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "Layer {{ at_node_end: {}, node: {:?} }}", - self.at_node_end, - self.cursor.node() - )?; - Ok(()) - } -} - -pub fn load_property_sheet( - language: Language, - json: &str, -) -> Result, PropertySheetError> { - let sheet = PropertySheet::new(language, json).map_err(|e| match e { - tree_sitter::PropertySheetError::InvalidJSON(e) => PropertySheetError::InvalidJSON(e), - tree_sitter::PropertySheetError::InvalidRegex(e) => PropertySheetError::InvalidRegex(e), - })?; - let sheet = sheet - .map(|p| Properties::new(p, language)) - .map_err(PropertySheetError::InvalidFormat)?; - Ok(sheet) -} - -impl Highlight { - pub fn from_usize(i: usize) -> Option { - if i <= (Highlight::Unknown as usize) { - Some(unsafe { transmute(i as u16) }) - } else { - None - } - } -} - -impl Properties { - fn new(json: PropertiesJSON, language: Language) -> Result { - let injections = match (json.injection_language, json.injection_content) { - (None, None) => Ok(Vec::new()), - (Some(_), None) => Err( - "Must specify an injection-content along with an injection-language".to_string(), - ), - (None, Some(_)) => Err( - "Must specify an injection-language along with an injection-content".to_string(), - ), - (Some(language_json), Some(content_json)) => { - let languages = match language_json { - InjectionLanguageJSON::List(list) => { - let mut result = Vec::with_capacity(list.len()); - for element in list { - result.push(match element { - InjectionLanguageJSON::TreePath(p) => { - let mut result = Vec::new(); - Self::flatten_tree_path(p, &mut result, language)?; - InjectionLanguage::TreePath(result) - } - InjectionLanguageJSON::Literal(s) => InjectionLanguage::Literal(s), - InjectionLanguageJSON::List(_) => { - panic!("Injection-language cannot be a list of lists") - } - }) - } - result - } - InjectionLanguageJSON::TreePath(p) => vec![{ - let mut result = Vec::new(); - Self::flatten_tree_path(p, &mut result, language)?; - InjectionLanguage::TreePath(result) - }], - InjectionLanguageJSON::Literal(s) => vec![InjectionLanguage::Literal(s)], - }; - - let contents = match content_json { - InjectionContentJSON::List(l) => { - let mut result = Vec::with_capacity(l.len()); - for element in l { - result.push(match element { - InjectionContentJSON::TreePath(p) => { - let mut result = Vec::new(); - Self::flatten_tree_path(p, &mut result, language)?; - result - } - InjectionContentJSON::List(_) => { - panic!("Injection-content cannot be a list of lists") - } - }) - } - result - } - InjectionContentJSON::TreePath(p) => vec![{ - let mut result = Vec::new(); - Self::flatten_tree_path(p, &mut result, language)?; - result - }], - }; - - let mut includes_children = match json.injection_includes_children { - Some(InjectionIncludesChildrenJSON::List(v)) => v, - Some(InjectionIncludesChildrenJSON::Single(v)) => vec![v], - None => vec![false], - }; - - if languages.len() == contents.len() { - includes_children.resize(languages.len(), includes_children[0]); - Ok(languages - .into_iter() - .zip(contents.into_iter()) - .zip(includes_children.into_iter()) - .map(|((language, content), includes_children)| Injection { - language, - content, - includes_children, - }) - .collect()) - } else { - Err(format!( - "Mismatch: got {} injection-language values but {} injection-content values", - languages.len(), - contents.len(), - )) - } - } - }?; - - Ok(Self { - highlight: json.highlight, - highlight_nonlocal: json.highlight_nonlocal, - local_scope: if json.local_scope { - Some(json.local_scope_inherit) - } else { - None - }, - local_definition: json.local_definition, - local_reference: json.local_reference, - injections, - }) - } - - // Transform a tree path from the format expressed directly in the property sheet - // (nested function calls), to a flat sequence of steps for transforming a list of - // nodes. This way, we can evaluate these tree paths with no recursion and a single - // vector of intermediate storage. - fn flatten_tree_path( - p: TreePathJSON, - steps: &mut Vec, - language: Language, - ) -> Result<(), String> { - match p { - TreePathJSON::This => {} - TreePathJSON::Child { args } => { - let (tree_path, index, kinds) = Self::parse_args("child", args, language)?; - Self::flatten_tree_path(tree_path, steps, language)?; - steps.push(TreeStep::Child { - index: index - .ok_or_else(|| "The `child` function requires an index".to_string())?, - kinds: kinds, - }); - } - TreePathJSON::Children { args } => { - let (tree_path, _, kinds) = Self::parse_args("children", args, language)?; - Self::flatten_tree_path(tree_path, steps, language)?; - steps.push(TreeStep::Children { kinds }); - } - TreePathJSON::Next { args } => { - let (tree_path, _, kinds) = Self::parse_args("next", args, language)?; - Self::flatten_tree_path(tree_path, steps, language)?; - steps.push(TreeStep::Next { kinds }); - } - } - Ok(()) - } - - fn parse_args( - name: &str, - args: Vec, - language: Language, - ) -> Result<(TreePathJSON, Option, Option>), String> { - let tree_path; - let mut index = None; - let mut kinds = Vec::new(); - let mut iter = args.into_iter(); - - match iter.next() { - Some(TreePathArgJSON::TreePath(p)) => tree_path = p, - _ => { - return Err(format!( - "First argument to `{}()` must be a tree path", - name - )); - } - } - - for arg in iter { - match arg { - TreePathArgJSON::TreePath(_) => { - return Err(format!( - "Other arguments to `{}()` must be strings or numbers", - name - )); - } - TreePathArgJSON::Number(i) => index = Some(i), - TreePathArgJSON::String(s) => kinds.push(s), - } - } - - if kinds.len() > 0 { - let mut kind_ids = Vec::new(); - for i in 0..(language.node_kind_count() as u16) { - if kinds.iter().any(|s| s == language.node_kind_for_id(i)) - && language.node_kind_is_named(i) - { - kind_ids.push(i); - } - } - if kind_ids.len() == 0 { - return Err(format!("Non-existent node kinds: {:?}", kinds)); - } - - Ok((tree_path, index, Some(kind_ids))) - } else { - Ok((tree_path, index, None)) - } - } -} - -impl<'a, F> Highlighter<'a, F> +struct HighlightIter<'a, F> where - F: Fn(&str) -> Option<(Language, &'a PropertySheet)>, + F: Fn(&str) -> Option<&'a HighlightConfiguration> + 'a, { - fn new( - source: &'a [u8], + source: &'a [u8], + byte_offset: usize, + context: &'a mut HighlightContext, + injection_callback: F, + cancellation_flag: Option<&'a AtomicUsize>, + layers: Vec>, + iter_count: usize, + next_event: Option, +} + +struct HighlightIterLayer<'a> { + _tree: Tree, + cursor: QueryCursor, + captures: iter::Peekable>, + config: &'a HighlightConfiguration, + highlight_end_stack: Vec, + scope_stack: Vec>, + ranges: Vec, +} + +impl HighlightContext { + pub fn new() -> Self { + HighlightContext { + parser: Parser::new(), + cursors: Vec::new(), + } + } +} + +impl Highlighter { + pub fn new(highlight_names: Vec) -> Self { + Highlighter { highlight_names } + } + + pub fn load_configuration( + &self, language: Language, - property_sheet: &'a PropertySheet, - injection_callback: F, - cancellation_flag: Option<&'a AtomicUsize>, - ) -> Result { - let mut parser = Parser::new(); - unsafe { parser.set_cancellation_flag(cancellation_flag.clone()) }; - parser - .set_language(language) - .map_err(|_| Error::InvalidLanguage)?; - let tree = parser.parse(source, None).ok_or_else(|| Error::Cancelled)?; - Ok(Self { - parser, - source, - cancellation_flag, - injection_callback, - source_offset: 0, - operation_count: 0, - max_opaque_layer_depth: 0, - layers: vec![Layer::new( - source, - tree, - property_sheet, - vec![Range { - start_byte: 0, - end_byte: usize::MAX, - start_point: Point::new(0, 0), - end_point: Point::new(usize::MAX, usize::MAX), - }], - 0, - true, - )], + highlights_query: &str, + injection_query: &str, + locals_query: &str, + ) -> Result { + // Concatenate the query strings, keeping track of the start offset of each section. + let mut query_source = String::new(); + query_source.push_str(injection_query); + let locals_query_offset = query_source.len(); + query_source.push_str(locals_query); + let highlights_query_offset = query_source.len(); + query_source.push_str(highlights_query); + + // Construct a query with the concatenated string. + let query = Query::new(language, &query_source)?; + + // Determine the range of pattern indices that belong to each section of the query. + let mut locals_pattern_index = 0; + let mut highlights_pattern_index = 0; + for i in 0..(query.pattern_count()) { + let pattern_offset = query.start_byte_for_pattern(i); + if pattern_offset < highlights_query_offset { + if pattern_offset < highlights_query_offset { + highlights_pattern_index += 1; + } + if pattern_offset < locals_query_offset { + locals_pattern_index += 1; + } + } + } + + // Compute a mapping from the query's capture ids to the indices of the highlighter's + // recognized highlight names. + let highlight_indices = query + .capture_names() + .iter() + .map(move |capture_name| { + let mut best_index = None; + let mut best_name_len = 0; + let mut best_common_prefix_len = 0; + for (i, highlight_name) in self.highlight_names.iter().enumerate() { + if highlight_name.len() > capture_name.len() { + continue; + } + + let capture_parts = capture_name.split('.'); + let highlight_parts = highlight_name.split('.'); + let common_prefix_len = capture_parts + .zip(highlight_parts) + .take_while(|(a, b)| a == b) + .count(); + let is_best_match = common_prefix_len > best_common_prefix_len + || (common_prefix_len == best_common_prefix_len + && highlight_name.len() < best_name_len); + if is_best_match { + best_index = Some(i); + best_name_len = highlight_name.len(); + best_common_prefix_len = common_prefix_len; + } + } + best_index.map(Highlight) + }) + .collect(); + + let non_local_variable_patterns = (0..query.pattern_count()) + .map(|i| { + query + .property_predicates(i) + .iter() + .any(|(prop, positive)| !*positive && prop.key.as_ref() == "local") + }) + .collect(); + + let mut injection_content_capture_index = None; + let mut injection_language_capture_index = None; + let mut injection_site_capture_index = None; + let mut local_def_capture_index = None; + let mut local_ref_capture_index = None; + let mut local_scope_capture_index = None; + for (i, name) in query.capture_names().iter().enumerate() { + let i = Some(i as u32); + match name.as_str() { + "injection.content" => injection_content_capture_index = i, + "injection.language" => injection_language_capture_index = i, + "injection.site" => injection_site_capture_index = i, + "local.definition" => local_def_capture_index = i, + "local.reference" => local_ref_capture_index = i, + "local.scope" => local_scope_capture_index = i, + _ => {} + } + } + + Ok(HighlightConfiguration { + query, + language, + locals_pattern_index, + highlights_pattern_index, + highlight_indices, + non_local_variable_patterns, + injection_content_capture_index, + injection_language_capture_index, + injection_site_capture_index, + local_def_capture_index, + local_ref_capture_index, + local_scope_capture_index, }) } - fn emit_source(&mut self, next_offset: usize) -> HighlightEvent { - let result = HighlightEvent::Source { - start: self.source_offset, - end: next_offset, - }; - self.source_offset = next_offset; - result - } + pub fn highlight<'a>( + &'a self, + context: &'a mut HighlightContext, + config: &'a HighlightConfiguration, + source: &'a [u8], + cancellation_flag: Option<&'a AtomicUsize>, + injection_callback: impl Fn(&str) -> Option<&'a HighlightConfiguration> + 'a, + ) -> Result> + 'a, Error> { + let layer = HighlightIterLayer::new( + config, + source, + context, + cancellation_flag, + vec![Range { + start_byte: 0, + end_byte: usize::MAX, + start_point: Point::new(0, 0), + end_point: Point::new(usize::MAX, usize::MAX), + }], + )?; - fn process_tree_step(&self, step: &TreeStep, nodes: &mut Vec) { - let len = nodes.len(); - for i in 0..len { - let node = nodes[i]; - match step { - TreeStep::Child { index, kinds } => { - let index = if *index >= 0 { - *index as usize - } else { - (node.child_count() as isize + *index) as usize - }; - if let Some(child) = node.child(index) { - if let Some(kinds) = kinds { - if kinds.contains(&child.kind_id()) { - nodes.push(child); - } - } else { - nodes.push(child); - } - } - } - TreeStep::Children { kinds } => { - for child in node.children() { - if let Some(kinds) = kinds { - if kinds.contains(&child.kind_id()) { - nodes.push(child); - } - } else { - nodes.push(child); - } - } - } - TreeStep::Next { .. } => unimplemented!(), - } - } - nodes.drain(0..len); + Ok(HighlightIter { + source, + byte_offset: 0, + injection_callback, + cancellation_flag, + context, + iter_count: 0, + layers: vec![layer], + next_event: None, + }) } +} - fn nodes_for_tree_path(&self, node: Node<'a>, steps: &Vec) -> Vec> { - let mut nodes = vec![node]; - for step in steps.iter() { - self.process_tree_step(step, &mut nodes); - } - nodes - } +impl<'a> HighlightIterLayer<'a> { + fn new( + config: &'a HighlightConfiguration, + source: &'a [u8], + context: &mut HighlightContext, + cancellation_flag: Option<&'a AtomicUsize>, + ranges: Vec, + ) -> Result { + context + .parser + .set_language(config.language) + .map_err(|_| Error::InvalidLanguage)?; + unsafe { context.parser.set_cancellation_flag(cancellation_flag) }; - // An injected language name may either be specified as a fixed string, or based - // on the text of some node in the syntax tree. - fn injection_language_string( - &self, - node: &Node<'a>, - language: &InjectionLanguage, - ) -> Option { - match language { - InjectionLanguage::Literal(s) => Some(s.to_string()), - InjectionLanguage::TreePath(steps) => self - .nodes_for_tree_path(*node, steps) - .first() - .and_then(|node| { - str::from_utf8(&self.source[node.start_byte()..node.end_byte()]) - .map(|s| s.to_owned()) - .ok() - }), - } + context.parser.set_included_ranges(&ranges); + + let tree = context.parser.parse(source, None).ok_or(Error::Cancelled)?; + let mut cursor = context.cursors.pop().unwrap_or(QueryCursor::new()); + + // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which + // prevents them from being moved. But both of these values are really just + // pointers, so it's actually ok to move them. + let tree_ref = unsafe { mem::transmute::<_, &'static Tree>(&tree) }; + let cursor_ref = unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) }; + let captures = cursor_ref + .captures(&config.query, tree_ref.root_node(), move |n| { + &source[n.byte_range()] + }) + .peekable(); + + Ok(HighlightIterLayer { + highlight_end_stack: Vec::new(), + scope_stack: vec![LocalScope { + inherits: false, + range: 0..usize::MAX, + local_defs: Vec::new(), + }], + cursor, + _tree: tree, + captures, + config, + ranges, + }) } // Compute the ranges that should be included when parsing an injection. @@ -572,13 +297,9 @@ where // excluded from the nested document, so that only the content nodes' *own* content // is reparsed. For other injections, the content nodes' entire ranges should be // reparsed, including the ranges of their children. - fn intersect_ranges( - parent_ranges: &Vec, - nodes: &Vec, - includes_children: bool, - ) -> Vec { + fn intersect_ranges(&self, nodes: &Vec, includes_children: bool) -> Vec { let mut result = Vec::new(); - let mut parent_range_iter = parent_ranges.iter(); + let mut parent_range_iter = self.ranges.iter(); let mut parent_range = parent_range_iter .next() .expect("Layers should only be constructed with non-empty ranges vectors"); @@ -656,456 +377,354 @@ where result } - fn add_layer( - &mut self, - language_string: &str, - ranges: Vec, - depth: usize, - includes_children: bool, - ) -> Option { - if let Some((language, property_sheet)) = (self.injection_callback)(language_string) { - if self.parser.set_language(language).is_err() { - return Some(Error::InvalidLanguage); - } - self.parser.set_included_ranges(&ranges); - if let Some(tree) = self.parser.parse(self.source, None) { - let layer = Layer::new( - self.source, - tree, - property_sheet, - ranges, - depth, - includes_children, - ); - if includes_children && depth > self.max_opaque_layer_depth { - self.max_opaque_layer_depth = depth; - } - match self.layers.binary_search_by(|l| l.cmp(&layer)) { - Ok(i) | Err(i) => self.layers.insert(i, layer), - }; - } else { - return Some(Error::Cancelled); - } - } - None - } - - fn remove_first_layer(&mut self) { - let layer = self.layers.remove(0); - if layer.opaque && layer.depth == self.max_opaque_layer_depth { - self.max_opaque_layer_depth = self - .layers - .iter() - .filter_map(|l| if l.opaque { Some(l.depth) } else { None }) - .max() - .unwrap_or(0); + fn offset(&mut self) -> Option { + let next_start = self + .captures + .peek() + .map(|(m, i)| m.captures[*i].node.start_byte()); + let next_end = self.highlight_end_stack.last().cloned(); + match (next_start, next_end) { + (Some(i), Some(j)) => Some(usize::min(i, j)), + (Some(i), None) => Some(i), + (None, Some(j)) => Some(j), + _ => None, } } } -impl<'a, T> Iterator for Highlighter<'a, T> +impl<'a, F> HighlightIter<'a, F> where - T: Fn(&str) -> Option<(Language, &'a PropertySheet)>, + F: Fn(&str) -> Option<&'a HighlightConfiguration> + 'a, +{ + fn emit_event( + &mut self, + offset: usize, + event: Option, + ) -> Option> { + let result; + if self.byte_offset < offset { + result = Some(Ok(HighlightEvent::Source { + start: self.byte_offset, + end: offset, + })); + self.byte_offset = offset; + self.next_event = event; + } else { + result = event.map(Ok); + } + self.sort_layers(); + result + } + + fn sort_layers(&mut self) { + if let Some(offset) = self.layers[0].offset() { + let mut i = 0; + while i + 1 < self.layers.len() { + if let Some(next_offset) = self.layers[i + 1].offset() { + if next_offset < offset { + i += 1; + continue; + } + } + break; + } + if i > 0 { + &self.layers[0..(i + 1)].rotate_left(i); + } + } else { + let layer = self.layers.remove(0); + self.context.cursors.push(layer.cursor); + } + } +} + +impl<'a, F> Iterator for HighlightIter<'a, F> +where + F: Fn(&str) -> Option<&'a HighlightConfiguration> + 'a, { type Item = Result; fn next(&mut self) -> Option { - if let Some(cancellation_flag) = self.cancellation_flag { - self.operation_count += 1; - if self.operation_count >= CANCELLATION_CHECK_INTERVAL { - self.operation_count = 0; - if cancellation_flag.load(Ordering::Relaxed) != 0 { - return Some(Err(Error::Cancelled)); + loop { + // If we've already determined the next highlight boundary, just return it. + if let Some(e) = self.next_event.take() { + return Some(Ok(e)); + } + + // Periodically check for cancellation, returning `Cancelled` error if the + // cancellation flag was flipped. + if let Some(cancellation_flag) = self.cancellation_flag { + self.iter_count += 1; + if self.iter_count >= CANCELLATION_CHECK_INTERVAL { + self.iter_count = 0; + if cancellation_flag.load(Ordering::Relaxed) != 0 { + return Some(Err(Error::Cancelled)); + } } } - } - while !self.layers.is_empty() { - let mut scope_event = None; - let first_layer = &self.layers[0]; + // If none of the layers have any more scope boundaries, terminate. + if self.layers.is_empty() { + if self.byte_offset < self.source.len() { + let result = Some(Ok(HighlightEvent::Source { + start: self.byte_offset, + end: self.source.len(), + })); + self.byte_offset = self.source.len(); + return result; + } else { + return None; + } + } - // If the current layer is not covered up by a nested layer, then - // process any scope boundaries and language injections for the layer's - // current position. - let first_layer_is_visible = first_layer.depth >= self.max_opaque_layer_depth; - if first_layer_is_visible { - let local_highlight = first_layer.local_highlight; - let properties = &first_layer.cursor.node_properties(); + // Get the next capture. If there are no more captures, then emit the rest of the + // source code. + let match_; + let mut capture; + let mut pattern_index; + let layer = &mut self.layers[0]; + if let Some((m, capture_index)) = layer.captures.peek() { + match_ = m; + pattern_index = match_.pattern_index; + capture = match_.captures[*capture_index]; + } else if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { + layer.highlight_end_stack.pop(); + return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); + } else { + return self.emit_event(self.source.len(), None); + }; - // Add any injections for the current node. - if !first_layer.at_node_end { - let node = first_layer.cursor.node(); - let injections = properties - .injections + // If any previous highlight ends before this node starts, then before + // processing this capture, emit the source code up until the end of the + // previous highlight, and an end event for that highlight. + let range = capture.node.byte_range(); + if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { + if end_byte <= range.start { + layer.highlight_end_stack.pop(); + return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); + } + } + + // Remove from the scope stack any local scopes that have already ended. + while range.start > layer.scope_stack.last().unwrap().range.end { + layer.scope_stack.pop(); + } + + // If this capture represents an injection, then process the injection. + if pattern_index < layer.config.locals_pattern_index { + let site_capture_index = layer.config.injection_site_capture_index; + let content_capture_index = layer.config.injection_content_capture_index; + let language_capture_index = layer.config.injection_language_capture_index; + + // Find the language name and the node that represents the injection content. + let mut injection_site = None; + let mut injection_language = None; + let mut injection_contents = Vec::new(); + for capture in match_.captures { + let index = Some(capture.index); + if index == site_capture_index { + injection_site = Some(capture.node); + } else if index == language_capture_index { + injection_language = capture.node.utf8_text(self.source).ok(); + } else if index == content_capture_index { + injection_contents.push(capture.node); + } + } + + // In addition to specifying the language name via the text of a captured node, + // it can also be hard-coded via a `(set! injection.language )` + // predicate. + if injection_language.is_none() { + injection_language = layer + .config + .query + .property_settings(pattern_index) .iter() - .filter_map( - |Injection { - language, - content, - includes_children, - }| { - if let Some(language) = - self.injection_language_string(&node, language) - { - let nodes = self.nodes_for_tree_path(node, content); - let ranges = Self::intersect_ranges( - &first_layer.ranges, - &nodes, - *includes_children, - ); - if ranges.len() > 0 { - return Some((language, ranges, *includes_children)); - } - } + .find_map(|prop| { + if prop.key.as_ref() == "injection.language" { + prop.value.as_ref().map(|s| s.as_ref()) + } else { None - }, - ) - .collect::>(); + } + }); + } - let depth = first_layer.depth + 1; - for (language, ranges, includes_children) in injections { - if let Some(error) = - self.add_layer(&language, ranges, depth, includes_children) + // For injections, we process entire matches at once, as opposed to processing + // each capture separately, interspersed with captures form other patterns. + // Explicitly remove this match so that none of its other captures will remain + // in the stream of captures. + layer.captures.next().unwrap().0.remove(); + + // If an `injection.site` was captured, then find any subsequent matches + // with the same pattern and `injection.site` capture. Those matches should + // all be combined into this match. This allows you to specify that a single + // injected document spans multiple 'content' nodes. + if let Some(injection_site) = injection_site { + while let Some((next_match, _)) = layer.captures.peek() { + if next_match.pattern_index == pattern_index + && next_match.captures.iter().any(|c| { + Some(c.index) == site_capture_index && c.node == injection_site + }) { - return Some(Err(error)); + injection_contents.extend(next_match.captures.iter().filter_map(|c| { + if Some(c.index) == content_capture_index { + Some(c.node) + } else { + None + } + })); + layer.captures.next().unwrap().0.remove(); + continue; + } + break; + } + } + + // If a language is found with the given name, then add a new language layer + // to the highlighted document. + if let Some(config) = injection_language.and_then(&self.injection_callback) { + if !injection_contents.is_empty() { + match HighlightIterLayer::new( + config, + self.source, + self.context, + self.cancellation_flag, + layer.intersect_ranges(&injection_contents, false), + ) { + Ok(layer) => self.layers.push(layer), + Err(e) => return Some(Err(e)), } } } - // Determine if any scopes start or end at the current position. - let first_layer = &mut self.layers[0]; - if let Some(highlight) = local_highlight - .or(properties.highlight_nonlocal) - .or(properties.highlight) - { - let next_offset = cmp::min(self.source.len(), first_layer.offset()); + self.sort_layers(); + continue; + } - // Before returning any highlight boundaries, return any remaining slice of - // the source code the precedes that highlight boundary. - if self.source_offset < next_offset { - return Some(Ok(self.emit_source(next_offset))); + layer.captures.next(); + + // If this capture is for tracking local variables, then process the + // local variable info. + let mut reference_highlight = None; + let mut definition_highlight = None; + while pattern_index < layer.config.highlights_pattern_index { + // If the node represents a local scope, push a new local scope onto + // the scope stack. + if Some(capture.index) == layer.config.local_scope_capture_index { + definition_highlight = None; + layer.scope_stack.push(LocalScope { + inherits: true, + range: range.clone(), + local_defs: Vec::new(), + }); + } + // If the node represents a definition, add a new definition to the + // local scope at the top of the scope stack. + else if Some(capture.index) == layer.config.local_def_capture_index { + reference_highlight = None; + definition_highlight = None; + let scope = layer.scope_stack.last_mut().unwrap(); + if let Ok(name) = str::from_utf8(&self.source[range.clone()]) { + scope.local_defs.push((name, None)); + definition_highlight = scope.local_defs.last_mut().map(|s| &mut s.1); } + } + // If the node represents a reference, then try to find the corresponding + // definition in the scope stack. + else if Some(capture.index) == layer.config.local_ref_capture_index { + if definition_highlight.is_none() { + definition_highlight = None; + if let Ok(name) = str::from_utf8(&self.source[range.clone()]) { + for scope in layer.scope_stack.iter().rev() { + if let Some(highlight) = + scope.local_defs.iter().rev().find_map(|i| { + if i.0 == name { + Some(i.1) + } else { + None + } + }) + { + reference_highlight = highlight; + break; + } + if !scope.inherits { + break; + } + } + } + } + } - scope_event = if first_layer.at_node_end { - Some(Ok(HighlightEvent::HighlightEnd)) + // Continue processing any additional local-variable-tracking patterns + // for the same node. + if let Some((next_match, next_capture_index)) = layer.captures.peek() { + let next_capture = next_match.captures[*next_capture_index]; + if next_capture.node == capture.node { + pattern_index = next_match.pattern_index; + capture = next_capture; + layer.captures.next(); + continue; } else { - Some(Ok(HighlightEvent::HighlightStart(highlight))) - }; + break; + } } + + break; } - // Advance the current layer's tree cursor. This might cause that cursor to move - // beyond one of the other layers' cursors for a different syntax tree, so we need - // to re-sort the layers. If the cursor is already at the end of its syntax tree, - // remove it. - if self.layers[0].advance() { - let mut index = 0; - while self.layers.get(index + 1).map_or(false, |next| { - self.layers[index].cmp(next) == cmp::Ordering::Greater - }) { - self.layers.swap(index, index + 1); - index += 1; - } - } else { - self.remove_first_layer(); - } - - if scope_event.is_some() { - return scope_event; - } - } - - if self.source_offset < self.source.len() { - Some(Ok(self.emit_source(self.source.len()))) - } else { - None - } - } -} - -impl<'a, T> fmt::Debug for Highlighter<'a, T> -where - T: Fn(&str) -> Option<(Language, &'a PropertySheet)>, -{ - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - if let Some(layer) = self.layers.first() { - let node = layer.cursor.node(); - let position = if layer.at_node_end { - node.end_position() - } else { - node.start_position() - }; - write!( - f, - "{{Highlighter position: {:?}, kind: {}, at_end: {}, props: {:?}}}", - position, - node.kind(), - layer.at_node_end, - layer.cursor.node_properties() - )?; - } - Ok(()) - } -} - -impl<'a> Layer<'a> { - fn new( - source: &'a [u8], - tree: Tree, - sheet: &'a PropertySheet, - ranges: Vec, - depth: usize, - opaque: bool, - ) -> Self { - // The cursor's lifetime parameter indicates that the tree must outlive the cursor. - // But because the tree is really a pointer to the heap, the cursor can remain - // valid when the tree is moved. There's no way to express this with lifetimes - // right now, so we have to `transmute` the cursor's lifetime. - let cursor = unsafe { transmute(tree.walk_with_properties(sheet, source)) }; - Self { - _tree: tree, - cursor, - ranges, - depth, - opaque, - at_node_end: false, - scope_stack: vec![Scope { - inherits: false, - local_defs: Vec::new(), - }], - local_highlight: None, - } - } - - fn cmp(&self, other: &Layer) -> cmp::Ordering { - // Events are ordered primarily by their position in the document. But if - // one highlight starts at a given position and another highlight ends at that - // same position, return the highlight end event before the highlight start event. - self.offset() - .cmp(&other.offset()) - .then_with(|| other.at_node_end.cmp(&self.at_node_end)) - .then_with(|| self.depth.cmp(&other.depth)) - } - - fn offset(&self) -> usize { - if self.at_node_end { - self.cursor.node().end_byte() - } else { - self.cursor.node().start_byte() - } - } - - fn advance(&mut self) -> bool { - // Clear the current local highlighting class, which may be re-populated - // if we enter a node that represents a local definition or local reference. - self.local_highlight = None; - - // Step through the tree in a depth-first traversal, stopping at both - // the start and end position of every node. - if self.at_node_end { - self.leave_node(); - if self.cursor.goto_next_sibling() { - self.enter_node(); - self.at_node_end = false; - } else if !self.cursor.goto_parent() { - return false; - } - } else if self.cursor.goto_first_child() { - self.enter_node(); - } else { - self.at_node_end = true; - } - true - } - - fn enter_node(&mut self) { - let node = self.cursor.node(); - let props = self.cursor.node_properties(); - let node_text = if props.local_definition || props.local_reference { - node.utf8_text(self.cursor.source()).ok() - } else { - None - }; - - // If this node represents a local definition, then record its highlighting class - // and store the highlighting class in the current local scope. - if props.local_definition { - if let (Some(text), Some(inner_scope), Some(highlight)) = - (node_text, self.scope_stack.last_mut(), props.highlight) + // If the current node was found to be a local variable, then skip over any + // highlighting patterns that are disabled for local variables. + let mut has_highlight = true; + while (definition_highlight.is_some() || reference_highlight.is_some()) + && layer.config.non_local_variable_patterns[pattern_index] { - self.local_highlight = props.highlight; - if let Err(i) = inner_scope.local_defs.binary_search_by_key(&text, |e| e.0) { - inner_scope.local_defs.insert(i, (text, highlight)); - } - } - } - // If this node represents a local reference, then look it up in the current scope - // stack. If a local definition is found, record its highlighting class. - else if props.local_reference { - if let Some(text) = node_text { - for scope in self.scope_stack.iter().rev() { - if let Ok(i) = scope.local_defs.binary_search_by_key(&text, |e| e.0) { - self.local_highlight = Some(scope.local_defs[i].1); - break; + has_highlight = false; + if let Some((next_match, next_capture_index)) = layer.captures.peek() { + let next_capture = next_match.captures[*next_capture_index]; + if next_capture.node == capture.node { + capture = next_capture; + has_highlight = true; + layer.captures.next(); + continue; } - if !scope.inherits { + } + break; + } + + if has_highlight { + // Once a highlighting pattern is found for the current node, skip over + // any later highlighting patterns that also match this node. Captures + // for a given node are ordered by pattern index, so these subsequent + // captures are guaranteed to be for highlighting, not injections or + // local variables. + while let Some((next_match, next_capture_index)) = layer.captures.peek() { + if next_match.captures[*next_capture_index].node == capture.node { + layer.captures.next(); + } else { break; } } + + let current_highlight = layer.config.highlight_indices[capture.index as usize]; + + // If this node represents a local definition, then store the current + // highlight value on the local scope entry representing this node. + if let Some(definition_highlight) = definition_highlight { + *definition_highlight = current_highlight; + } + + // Emit a scope start event and push the node's end position to the stack. + if let Some(highlight) = reference_highlight.or(current_highlight) { + layer.highlight_end_stack.push(range.end); + return self + .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight))); + } } - } - // If this node represents a new local scope, then push it onto the scope stack. - if let Some(inherits) = props.local_scope { - self.scope_stack.push(Scope { - inherits, - local_defs: Vec::new(), - }); + + self.sort_layers(); } } - - fn leave_node(&mut self) { - let props = self.cursor.node_properties(); - if props.local_scope.is_some() { - self.scope_stack.pop(); - } - } -} - -impl<'de> Deserialize<'de> for Highlight { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - let s = String::deserialize(deserializer)?; - match s.as_str() { - "attribute" => Ok(Highlight::Attribute), - "comment" => Ok(Highlight::Comment), - "constant" => Ok(Highlight::Constant), - "constant.builtin" => Ok(Highlight::ConstantBuiltin), - "constructor" => Ok(Highlight::Constructor), - "constructor.builtin" => Ok(Highlight::ConstructorBuiltin), - "embedded" => Ok(Highlight::Embedded), - "escape" => Ok(Highlight::Escape), - "function" => Ok(Highlight::Function), - "function.builtin" => Ok(Highlight::FunctionBuiltin), - "keyword" => Ok(Highlight::Keyword), - "number" => Ok(Highlight::Number), - "operator" => Ok(Highlight::Operator), - "property" => Ok(Highlight::Property), - "property.builtin" => Ok(Highlight::PropertyBuiltin), - "punctuation" => Ok(Highlight::Punctuation), - "punctuation.bracket" => Ok(Highlight::PunctuationBracket), - "punctuation.delimiter" => Ok(Highlight::PunctuationDelimiter), - "punctuation.special" => Ok(Highlight::PunctuationSpecial), - "string" => Ok(Highlight::String), - "string.special" => Ok(Highlight::StringSpecial), - "type" => Ok(Highlight::Type), - "type.builtin" => Ok(Highlight::TypeBuiltin), - "variable" => Ok(Highlight::Variable), - "variable.builtin" => Ok(Highlight::VariableBuiltin), - "variable.parameter" => Ok(Highlight::VariableParameter), - "tag" => Ok(Highlight::Tag), - _ => Ok(Highlight::Unknown), - } - } -} - -impl Serialize for Highlight { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - match self { - Highlight::Attribute => serializer.serialize_str("attribute"), - Highlight::Comment => serializer.serialize_str("comment"), - Highlight::Constant => serializer.serialize_str("constant"), - Highlight::ConstantBuiltin => serializer.serialize_str("constant.builtin"), - Highlight::Constructor => serializer.serialize_str("constructor"), - Highlight::ConstructorBuiltin => serializer.serialize_str("constructor.builtin"), - Highlight::Embedded => serializer.serialize_str("embedded"), - Highlight::Escape => serializer.serialize_str("escape"), - Highlight::Function => serializer.serialize_str("function"), - Highlight::FunctionBuiltin => serializer.serialize_str("function.builtin"), - Highlight::Keyword => serializer.serialize_str("keyword"), - Highlight::Number => serializer.serialize_str("number"), - Highlight::Operator => serializer.serialize_str("operator"), - Highlight::Property => serializer.serialize_str("property"), - Highlight::PropertyBuiltin => serializer.serialize_str("property.builtin"), - Highlight::Punctuation => serializer.serialize_str("punctuation"), - Highlight::PunctuationBracket => serializer.serialize_str("punctuation.bracket"), - Highlight::PunctuationDelimiter => serializer.serialize_str("punctuation.delimiter"), - Highlight::PunctuationSpecial => serializer.serialize_str("punctuation.special"), - Highlight::String => serializer.serialize_str("string"), - Highlight::StringSpecial => serializer.serialize_str("string.special"), - Highlight::Type => serializer.serialize_str("type"), - Highlight::TypeBuiltin => serializer.serialize_str("type.builtin"), - Highlight::Variable => serializer.serialize_str("variable"), - Highlight::VariableBuiltin => serializer.serialize_str("variable.builtin"), - Highlight::VariableParameter => serializer.serialize_str("variable.parameter"), - Highlight::Tag => serializer.serialize_str("tag"), - Highlight::Unknown => serializer.serialize_str(""), - } - } -} - -pub trait HTMLAttributeCallback<'a>: Fn(Highlight) -> &'a str {} - -pub fn highlight<'a, F>( - source: &'a [u8], - language: Language, - property_sheet: &'a PropertySheet, - cancellation_flag: Option<&'a AtomicUsize>, - injection_callback: F, -) -> Result> + 'a, Error> -where - F: Fn(&str) -> Option<(Language, &'a PropertySheet)> + 'a, -{ - Highlighter::new( - source, - language, - property_sheet, - injection_callback, - cancellation_flag, - ) -} - -pub fn highlight_html<'a, F1, F2>( - source: &'a [u8], - language: Language, - property_sheet: &'a PropertySheet, - cancellation_flag: Option<&'a AtomicUsize>, - injection_callback: F1, - attribute_callback: F2, -) -> Result, Error> -where - F1: Fn(&str) -> Option<(Language, &'a PropertySheet)>, - F2: Fn(Highlight) -> &'a str, -{ - let mut renderer = HtmlRenderer::new(); - renderer.render( - Highlighter::new( - source, - language, - property_sheet, - injection_callback, - cancellation_flag, - )?, - source, - &|s| (attribute_callback)(s).as_bytes(), - )?; - Ok(renderer - .line_offsets - .iter() - .enumerate() - .map(|(i, offset)| { - let offset = *offset as usize; - let next_offset = renderer - .line_offsets - .get(i + 1) - .map_or(renderer.html.len(), |i| *i as usize); - String::from_utf8(renderer.html[offset..next_offset].to_vec()).unwrap() - }) - .collect()) } pub struct HtmlRenderer { @@ -1114,7 +733,7 @@ pub struct HtmlRenderer { } impl HtmlRenderer { - fn new() -> Self { + pub fn new() -> Self { HtmlRenderer { html: Vec::new(), line_offsets: vec![0], @@ -1162,6 +781,21 @@ impl HtmlRenderer { Ok(()) } + pub fn lines(&self) -> impl Iterator { + self.line_offsets + .iter() + .enumerate() + .map(move |(i, line_start)| { + let line_start = *line_start as usize; + let line_end = if i + 1 == self.line_offsets.len() { + self.html.len() + } else { + self.line_offsets[i + 1] as usize + }; + str::from_utf8(&self.html[line_start..line_end]).unwrap() + }) + } + fn start_highlight<'a, F>(&mut self, h: Highlight, attribute_callback: &F) where F: Fn(Highlight) -> &'a [u8],