From f4903578f8becc499c1243baa344d727eea392e5 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 18 Sep 2019 17:35:47 -0700 Subject: [PATCH 01/14] Start reimplementing highlight crate with tree queries --- cli/src/error.rs | 24 +- cli/src/highlight.rs | 317 +++-- cli/src/loader.rs | 71 +- cli/src/main.rs | 19 +- cli/src/tests/helpers/fixtures.rs | 46 +- cli/src/tests/highlight_test.rs | 516 ++++--- highlight/include/tree_sitter/highlight.h | 47 +- highlight/src/c_lib.rs | 192 ++- highlight/src/lib.rs | 1514 ++++++++------------- 9 files changed, 1259 insertions(+), 1487 deletions(-) diff --git a/cli/src/error.rs b/cli/src/error.rs index 968486f4..324ad8b1 100644 --- a/cli/src/error.rs +++ b/cli/src/error.rs @@ -1,6 +1,6 @@ use std::fmt::Write; use std::io; -use tree_sitter_highlight::PropertySheetError; +use tree_sitter::QueryError; #[derive(Debug)] pub struct Error(pub Vec); @@ -50,6 +50,18 @@ impl Error { } } +impl<'a> From for Error { + fn from(error: QueryError) -> Self { + Error::new(format!("{:?}", error)) + } +} + +impl<'a> From for Error { + fn from(error: tree_sitter_highlight::Error) -> Self { + Error::new(format!("{:?}", error)) + } +} + impl From for Error { fn from(error: serde_json::Error) -> Self { Error::new(error.to_string()) @@ -79,13 +91,3 @@ impl From for Error { Error::new(error) } } - -impl From for Error { - fn from(error: PropertySheetError) -> Self { - match error { - PropertySheetError::InvalidFormat(e) => Self::from(e), - PropertySheetError::InvalidRegex(e) => Self::regex(&e.to_string()), - PropertySheetError::InvalidJSON(e) => Self::from(e), - } - } -} diff --git a/cli/src/highlight.rs b/cli/src/highlight.rs index dff8fd2c..d92d642e 100644 --- a/cli/src/highlight.rs +++ b/cli/src/highlight.rs @@ -1,6 +1,6 @@ use crate::error::Result; use crate::loader::Loader; -use ansi_term::{Color, Style}; +use ansi_term::Color; use lazy_static::lazy_static; use serde::ser::SerializeMap; use serde::{Deserialize, Deserializer, Serialize, Serializer}; @@ -9,18 +9,52 @@ use std::collections::HashMap; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; use std::time::Instant; -use std::{fmt, fs, io, path, thread}; -use tree_sitter::{Language, PropertySheet}; -use tree_sitter_highlight::{highlight, highlight_html, Highlight, HighlightEvent, Properties}; +use std::{fs, io, path, str, thread, usize}; +use tree_sitter_highlight::{ + HighlightConfiguration, HighlightContext, HighlightEvent, Highlighter, HtmlRenderer, +}; + +pub const HTML_HEADER: &'static str = " + + + Tree-sitter Highlighting + + + +"; + +pub const HTML_FOOTER: &'static str = " + +"; lazy_static! { static ref CSS_STYLES_BY_COLOR_ID: Vec = serde_json::from_str(include_str!("../vendor/xterm-colors.json")).unwrap(); } +#[derive(Debug, Default)] +pub struct Style { + pub ansi: ansi_term::Style, + pub css: Option, +} + +#[derive(Debug)] pub struct Theme { - ansi_styles: Vec>, - css_styles: Vec>, + pub highlighter: Highlighter, + styles: Vec - - -"; - -pub const HTML_FOOTER: &'static str = " - -"; - pub fn html( loader: &Loader, theme: &Theme, source: &[u8], - language: Language, - property_sheet: &PropertySheet, + config: &HighlightConfiguration, + print_time: bool, ) -> Result<()> { use std::io::Write; + let stdout = io::stdout(); let mut stdout = stdout.lock(); - write!(&mut stdout, "\n")?; - + let time = Instant::now(); let cancellation_flag = cancel_on_stdin(); - let lines = highlight_html( + let mut context = HighlightContext::new(); + + let events = theme.highlighter.highlight( + &mut context, + config, source, - language, - property_sheet, - Some(cancellation_flag.as_ref()), - |s| language_for_injection_string(loader, s), - |highlight| { - if let Some(css_style) = theme.css_style(highlight) { - css_style - } else { - "" - } - }, - ) - .map_err(|e| e.to_string())?; - for (i, line) in lines.into_iter().enumerate() { + Some(&cancellation_flag), + |string| language_for_injection_string(loader, theme, string), + )?; + + let mut renderer = HtmlRenderer::new(); + renderer.render(events, source, &move |highlight| { + if let Some(css_style) = &theme.styles[highlight.0].css { + css_style.as_bytes() + } else { + "".as_bytes() + } + })?; + + for (i, line) in renderer.lines().enumerate() { write!( &mut stdout, "\n", @@ -380,14 +360,21 @@ pub fn html( line )?; } + write!(&mut stdout, "
{}{}
\n")?; + + if print_time { + eprintln!("Time: {}ms", time.elapsed().as_millis()); + } + Ok(()) } fn language_for_injection_string<'a>( loader: &'a Loader, + theme: &Theme, string: &str, -) -> Option<(Language, &'a PropertySheet)> { +) -> Option<&'a HighlightConfiguration> { match loader.language_configuration_for_injection_string(string) { Err(e) => { eprintln!( @@ -399,7 +386,7 @@ fn language_for_injection_string<'a>( } Ok(None) => None, Ok(Some((language, configuration))) => { - match configuration.highlight_property_sheet(language) { + match configuration.highlight_config(&theme.highlighter, language) { Err(e) => { eprintln!( "Failed to load property sheet for injection string '{}': {}", @@ -409,7 +396,7 @@ fn language_for_injection_string<'a>( None } Ok(None) => None, - Ok(Some(sheet)) => Some((language, sheet)), + Ok(Some(config)) => Some(config), } } } diff --git a/cli/src/loader.rs b/cli/src/loader.rs index 237718bb..328b8063 100644 --- a/cli/src/loader.rs +++ b/cli/src/loader.rs @@ -9,8 +9,8 @@ use std::path::{Path, PathBuf}; use std::process::Command; use std::time::SystemTime; use std::{fs, mem}; -use tree_sitter::{Language, PropertySheet}; -use tree_sitter_highlight::{load_property_sheet, Properties}; +use tree_sitter::Language; +use tree_sitter_highlight::{HighlightConfiguration, Highlighter}; #[cfg(unix)] const DYLIB_EXTENSION: &'static str = "so"; @@ -27,9 +27,9 @@ pub struct LanguageConfiguration { pub _first_line_regex: Option, pub injection_regex: Option, pub file_types: Vec, - pub highlight_property_sheet_path: Option, + pub root_path: PathBuf, language_id: usize, - highlight_property_sheet: OnceCell>>, + highlight_config: OnceCell>, } pub struct Loader { @@ -134,7 +134,6 @@ impl Loader { if configuration_ids.len() == 1 { configuration = &self.language_configurations[configuration_ids[0]]; } - // If multiple language configurations match, then determine which // one to use by applying the configurations' content regexes. else { @@ -151,7 +150,6 @@ impl Loader { if let Some(mat) = content_regex.find(&file_contents) { score = (mat.end() - mat.start()) as isize; } - // If the content regex does not match, then *penalize* this // language configuration, so that language configurations // without content regexes are preferred over those with @@ -394,6 +392,7 @@ impl Loader { }); let configuration = LanguageConfiguration { + root_path: parser_path.to_path_buf(), scope: config_json.scope, language_id, file_types: config_json.file_types.unwrap_or(Vec::new()), @@ -406,10 +405,7 @@ impl Loader { injection_regex: config_json .injection_regex .and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()), - highlight_property_sheet_path: config_json - .highlights - .map(|h| parser_path.join(h)), - highlight_property_sheet: OnceCell::new(), + highlight_config: OnceCell::new(), }; for file_type in &configuration.file_types { @@ -428,14 +424,14 @@ impl Loader { && parser_path.join("src").join("grammar.json").exists() { self.language_configurations.push(LanguageConfiguration { + root_path: parser_path.to_owned(), language_id: self.languages_by_id.len(), scope: None, content_regex: None, injection_regex: None, file_types: Vec::new(), _first_line_regex: None, - highlight_property_sheet_path: None, - highlight_property_sheet: OnceCell::new(), + highlight_config: OnceCell::new(), }); self.languages_by_id .push((parser_path.to_owned(), OnceCell::new())); @@ -446,30 +442,41 @@ impl Loader { } impl LanguageConfiguration { - pub fn highlight_property_sheet( + pub fn highlight_config( &self, + highlighter: &Highlighter, language: Language, - ) -> Result>> { - self.highlight_property_sheet + ) -> Result> { + self.highlight_config .get_or_try_init(|| { - if let Some(path) = &self.highlight_property_sheet_path { - let sheet_json = fs::read_to_string(path).map_err(Error::wrap(|| { - format!( - "Failed to read property sheet {:?}", - path.file_name().unwrap() - ) - }))?; - let sheet = - load_property_sheet(language, &sheet_json).map_err(Error::wrap(|| { - format!( - "Failed to parse property sheet {:?}", - path.file_name().unwrap() - ) - }))?; - Ok(Some(sheet)) - } else { - Ok(None) + let queries_path = self.root_path.join("queries"); + + let highlights_path = queries_path.join("highlights.scm"); + let injections_path = queries_path.join("injections.scm"); + let locals_path = queries_path.join("locals.scm"); + + if !highlights_path.exists() { + return Ok(None); } + + let highlights_query = fs::read_to_string(highlights_path)?; + let injections_query = if injections_path.exists() { + fs::read_to_string(injections_path)? + } else { + String::new() + }; + let locals_query = if locals_path.exists() { + fs::read_to_string(locals_path)? + } else { + String::new() + }; + + Ok(Some(highlighter.load_configuration( + language, + &highlights_query, + &injections_query, + &locals_query, + )?)) }) .map(Option::as_ref) } diff --git a/cli/src/main.rs b/cli/src/main.rs index 8de7ed67..25ffe5f7 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -110,7 +110,8 @@ fn run() -> error::Result<()> { ) .arg(Arg::with_name("scope").long("scope").takes_value(true)) .arg(Arg::with_name("html").long("html").short("h")) - .arg(Arg::with_name("time").long("time").short("t")), + .arg(Arg::with_name("time").long("time").short("t")) + .arg(Arg::with_name("q").short("q")), ) .subcommand( SubCommand::with_name("build-wasm") @@ -260,15 +261,18 @@ fn run() -> error::Result<()> { }, }; - if let Some(sheet) = language_config.highlight_property_sheet(language)? { - let source = fs::read(path)?; + let source = fs::read(path)?; + + if let Some(highlight_config) = + language_config.highlight_config(&config.theme.highlighter, language)? + { if html_mode { - highlight::html(&loader, &config.theme, &source, language, sheet)?; + highlight::html(&loader, &config.theme, &source, highlight_config, time)?; } else { - highlight::ansi(&loader, &config.theme, &source, language, sheet, time)?; + highlight::ansi(&loader, &config.theme, &source, highlight_config, time)?; } } else { - return Error::err(format!("No syntax highlighting property sheet specified")); + return Error::err(format!("No syntax highlighting query found")); } } } else if let Some(matches) = matches.subcommand_matches("build-wasm") { @@ -280,10 +284,9 @@ fn run() -> error::Result<()> { loader.find_all_languages(&config.parser_directories)?; for (configuration, language_path) in loader.get_all_language_configurations() { println!( - "scope: {}\nparser: {:?}\nproperties: {:?}\nfile_types: {:?}\ncontent_regex: {:?}\ninjection_regex: {:?}\n", + "scope: {}\nparser: {:?}\nfile_types: {:?}\ncontent_regex: {:?}\ninjection_regex: {:?}\n", configuration.scope.as_ref().unwrap_or(&String::new()), language_path, - configuration.highlight_property_sheet_path, configuration.file_types, configuration.content_regex, configuration.injection_regex, diff --git a/cli/src/tests/helpers/fixtures.rs b/cli/src/tests/helpers/fixtures.rs index 4389797e..af1df2bf 100644 --- a/cli/src/tests/helpers/fixtures.rs +++ b/cli/src/tests/helpers/fixtures.rs @@ -2,8 +2,8 @@ use crate::loader::Loader; use lazy_static::lazy_static; use std::fs; use std::path::{Path, PathBuf}; -use tree_sitter::{Language, PropertySheet}; -use tree_sitter_highlight::{load_property_sheet, Properties}; +use tree_sitter::Language; +use tree_sitter_highlight::{HighlightConfiguration, Highlighter}; include!("./dirs.rs"); @@ -21,18 +21,42 @@ pub fn get_language(name: &str) -> Language { .unwrap() } -pub fn get_property_sheet_json(language_name: &str, sheet_name: &str) -> String { - let path = GRAMMARS_DIR - .join(language_name) - .join("src") - .join(sheet_name); - fs::read_to_string(path).unwrap() +pub fn get_highlight_query_sources(language_name: &str) -> (String, String, String) { + let queries_path = GRAMMARS_DIR.join(language_name).join("queries"); + let highlights_path = queries_path.join("highlights.scm"); + let injections_path = queries_path.join("injections.scm"); + let locals_path = queries_path.join("locals.scm"); + + let highlights_query = fs::read_to_string(highlights_path).unwrap(); + let injections_query = if injections_path.exists() { + fs::read_to_string(injections_path).unwrap() + } else { + String::new() + }; + let locals_query = if locals_path.exists() { + fs::read_to_string(locals_path).unwrap() + } else { + String::new() + }; + + (highlights_query, injections_query, locals_query) } -pub fn get_property_sheet(language_name: &str, sheet_name: &str) -> PropertySheet { - let json = get_property_sheet_json(language_name, sheet_name); +pub fn get_highlight_config( + language_name: &str, + highlighter: &Highlighter, +) -> HighlightConfiguration { let language = get_language(language_name); - load_property_sheet(language, &json).unwrap() + let (highlights_query, injections_query, locals_query) = + get_highlight_query_sources(language_name); + highlighter + .load_configuration( + language, + &highlights_query, + &injections_query, + &locals_query, + ) + .unwrap() } pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> Language { diff --git a/cli/src/tests/highlight_test.rs b/cli/src/tests/highlight_test.rs index 34b545ff..a996d2d1 100644 --- a/cli/src/tests/highlight_test.rs +++ b/cli/src/tests/highlight_test.rs @@ -1,32 +1,85 @@ -use super::helpers::fixtures::{get_language, get_property_sheet, get_property_sheet_json}; +use super::helpers::fixtures::{get_highlight_config, get_highlight_query_sources, get_language}; use lazy_static::lazy_static; use std::ffi::CString; - use std::sync::atomic::{AtomicUsize, Ordering}; use std::{ptr, slice, str}; -use tree_sitter::{Language, PropertySheet}; use tree_sitter_highlight::{ - c, highlight, highlight_html, Error, Highlight, HighlightEvent, Properties, + c, Error, HighlightConfiguration, HighlightContext, HighlightEvent, Highlighter, HtmlRenderer, }; lazy_static! { - static ref JS_SHEET: PropertySheet = - get_property_sheet("javascript", "highlights.json"); - static ref HTML_SHEET: PropertySheet = - get_property_sheet("html", "highlights.json"); - static ref EJS_SHEET: PropertySheet = - get_property_sheet("embedded-template", "highlights-ejs.json"); - static ref RUST_SHEET: PropertySheet = - get_property_sheet("rust", "highlights.json"); - static ref SCOPE_CLASS_STRINGS: Vec = { - let mut result = Vec::new(); - let mut i = 0; - while let Some(highlight) = Highlight::from_usize(i) { - result.push(format!("class={:?}", highlight)); - i += 1; - } - result - }; + static ref JS_HIGHLIGHT: HighlightConfiguration = + get_highlight_config("javascript", &HIGHLIGHTER); + static ref HTML_HIGHLIGHT: HighlightConfiguration = get_highlight_config("html", &HIGHLIGHTER); + static ref EJS_HIGHLIGHT: HighlightConfiguration = + get_highlight_config("embedded-template", &HIGHLIGHTER); + static ref RUST_HIGHLIGHT: HighlightConfiguration = get_highlight_config("rust", &HIGHLIGHTER); + static ref HIGHLIGHTER: Highlighter = Highlighter::new( + [ + "attribute", + "constructor", + "function.builtin", + "function", + "embedded", + "keyword", + "operator", + "property.builtin", + "property", + "punctuation", + "punctuation.bracket", + "punctuation.delimiter", + "punctuation.special", + "string", + "tag", + "type.builtin", + "type", + "variable.builtin", + "variable.parameter", + "variable", + ] + .iter() + .cloned() + .map(String::from) + .collect() + ); + static ref HTML_ATTRS: Vec = HIGHLIGHTER + .highlight_names + .iter() + .map(|s| format!("class={}", s)) + .collect(); +} + +#[test] +fn test_highlighting_javascript() { + let source = "const a = function(b) { return b + c; }"; + assert_eq!( + &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(), + &[vec![ + ("const", vec!["keyword"]), + (" ", vec![]), + ("a", vec!["function"]), + (" ", vec![]), + ("=", vec!["operator"]), + (" ", vec![]), + ("function", vec!["keyword"]), + ("(", vec!["punctuation.bracket"]), + ("b", vec!["variable.parameter"]), + (")", vec!["punctuation.bracket"]), + (" ", vec![]), + ("{", vec!["punctuation.bracket"]), + (" ", vec![]), + ("return", vec!["keyword"]), + (" ", vec![]), + ("b", vec!["variable.parameter"]), + (" ", vec![]), + ("+", vec!["operator"]), + (" ", vec![]), + ("c", vec!["variable"]), + (";", vec!["punctuation.delimiter"]), + (" ", vec![]), + ("}", vec!["punctuation.bracket"]), + ]] + ); } #[test] @@ -34,57 +87,68 @@ fn test_highlighting_injected_html_in_javascript() { let source = vec!["const s = html `
${a < b}
`;"].join("\n"); assert_eq!( - &to_token_vector(&source, get_language("javascript"), &JS_SHEET).unwrap(), + &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(), &[vec![ - ("const", vec![Highlight::Keyword]), + ("const", vec!["keyword"]), (" ", vec![]), - ("s", vec![Highlight::Variable]), + ("s", vec!["variable"]), (" ", vec![]), - ("=", vec![Highlight::Operator]), + ("=", vec!["operator"]), (" ", vec![]), - ("html", vec![Highlight::Function]), + ("html", vec!["function"]), (" ", vec![]), - ("`<", vec![Highlight::String]), - ("div", vec![Highlight::String, Highlight::Tag]), - (">", vec![Highlight::String]), - ( - "${", - vec![ - Highlight::String, - Highlight::Embedded, - Highlight::PunctuationSpecial - ] - ), - ( - "a", - vec![Highlight::String, Highlight::Embedded, Highlight::Variable] - ), - (" ", vec![Highlight::String, Highlight::Embedded]), - ( - "<", - vec![Highlight::String, Highlight::Embedded, Highlight::Operator] - ), - (" ", vec![Highlight::String, Highlight::Embedded]), - ( - "b", - vec![Highlight::String, Highlight::Embedded, Highlight::Variable] - ), - ( - "}", - vec![ - Highlight::String, - Highlight::Embedded, - Highlight::PunctuationSpecial - ] - ), - ("`", vec![Highlight::String]), - (";", vec![Highlight::PunctuationDelimiter]), + ("`", vec!["string"]), + ("<", vec!["string", "punctuation.bracket"]), + ("div", vec!["string", "tag"]), + (">", vec!["string", "punctuation.bracket"]), + ("${", vec!["string", "embedded", "punctuation.special"]), + ("a", vec!["string", "embedded", "variable"]), + (" ", vec!["string", "embedded"]), + ("<", vec!["string", "embedded", "operator"]), + (" ", vec!["string", "embedded"]), + ("b", vec!["string", "embedded", "variable"]), + ("}", vec!["string", "embedded", "punctuation.special"]), + ("", vec!["string", "punctuation.bracket"]), + ("`", vec!["string"]), + (";", vec!["punctuation.delimiter"]), ]] ); } +#[test] +fn test_highlighting_injected_javascript_in_html_mini() { + let source = ""; + + eprintln!("HTML {:?}", HTML_HIGHLIGHT.language); + eprintln!("JavaScript {:?}", JS_HIGHLIGHT.language); + + assert_eq!( + &to_token_vector(source, &HTML_HIGHLIGHT).unwrap(), + &[vec![ + ("<", vec!["punctuation.bracket"]), + ("script", vec!["tag"]), + (">", vec!["punctuation.bracket"]), + ("const", vec!["keyword"]), + (" ", vec![]), + ("x", vec!["variable"]), + (" ", vec![]), + ("=", vec!["operator"]), + (" ", vec![]), + ("new", vec!["keyword"]), + (" ", vec![]), + ("Thing", vec!["constructor"]), + ("(", vec!["punctuation.bracket"]), + (")", vec!["punctuation.bracket"]), + (";", vec!["punctuation.delimiter"]), + ("", vec!["punctuation.bracket"]), + ],] + ); +} + #[test] fn test_highlighting_injected_javascript_in_html() { let source = vec![ @@ -97,38 +161,44 @@ fn test_highlighting_injected_javascript_in_html() { .join("\n"); assert_eq!( - &to_token_vector(&source, get_language("html"), &HTML_SHEET).unwrap(), + &to_token_vector(&source, &HTML_HIGHLIGHT).unwrap(), &[ - vec![("<", vec![]), ("body", vec![Highlight::Tag]), (">", vec![]),], vec![ - (" <", vec![]), - ("script", vec![Highlight::Tag]), - (">", vec![]), + ("<", vec!["punctuation.bracket"]), + ("body", vec!["tag"]), + (">", vec!["punctuation.bracket"]), + ], + vec![ + (" ", vec![]), + ("<", vec!["punctuation.bracket"]), + ("script", vec!["tag"]), + (">", vec!["punctuation.bracket"]), ], vec![ (" ", vec![]), - ("const", vec![Highlight::Keyword]), + ("const", vec!["keyword"]), (" ", vec![]), - ("x", vec![Highlight::Variable]), + ("x", vec!["variable"]), (" ", vec![]), - ("=", vec![Highlight::Operator]), + ("=", vec!["operator"]), (" ", vec![]), - ("new", vec![Highlight::Keyword]), + ("new", vec!["keyword"]), (" ", vec![]), - ("Thing", vec![Highlight::Constructor]), - ("(", vec![Highlight::PunctuationBracket]), - (")", vec![Highlight::PunctuationBracket]), - (";", vec![Highlight::PunctuationDelimiter]), + ("Thing", vec!["constructor"]), + ("(", vec!["punctuation.bracket"]), + (")", vec!["punctuation.bracket"]), + (";", vec!["punctuation.delimiter"]), ], vec![ - (" ", vec![]), + (" ", vec![]), + ("", vec!["punctuation.bracket"]), ], vec![ - ("", vec![]), + ("", vec!["punctuation.bracket"]), ], ] ); @@ -147,7 +217,7 @@ fn test_highlighting_multiline_nodes_to_html() { .join("\n"); assert_eq!( - &to_html(&source, get_language("javascript"), &JS_SHEET,).unwrap(), + &to_html(&source, &JS_HIGHLIGHT).unwrap(), &[ "const SOMETHING = `\n".to_string(), " one ${\n".to_string(), @@ -169,51 +239,51 @@ fn test_highlighting_with_local_variable_tracking() { .join("\n"); assert_eq!( - &to_token_vector(&source, get_language("javascript"), &JS_SHEET).unwrap(), + &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(), &[ vec![ - ("module", vec![Highlight::VariableBuiltin]), - (".", vec![Highlight::PunctuationDelimiter]), - ("exports", vec![Highlight::Property]), + ("module", vec!["variable.builtin"]), + (".", vec!["punctuation.delimiter"]), + ("exports", vec!["function"]), (" ", vec![]), - ("=", vec![Highlight::Operator]), + ("=", vec!["operator"]), (" ", vec![]), - ("function", vec![Highlight::Keyword]), + ("function", vec!["keyword"]), (" ", vec![]), - ("a", vec![Highlight::Function]), - ("(", vec![Highlight::PunctuationBracket]), - ("b", vec![Highlight::VariableParameter]), - (")", vec![Highlight::PunctuationBracket]), + ("a", vec!["function"]), + ("(", vec!["punctuation.bracket"]), + ("b", vec!["variable.parameter"]), + (")", vec!["punctuation.bracket"]), (" ", vec![]), - ("{", vec![Highlight::PunctuationBracket]) + ("{", vec!["punctuation.bracket"]) ], vec![ (" ", vec![]), - ("const", vec![Highlight::Keyword]), + ("const", vec!["keyword"]), (" ", vec![]), - ("module", vec![Highlight::Variable]), + ("module", vec!["variable"]), (" ", vec![]), - ("=", vec![Highlight::Operator]), + ("=", vec!["operator"]), (" ", vec![]), - ("c", vec![Highlight::Variable]), - (";", vec![Highlight::PunctuationDelimiter]) + ("c", vec!["variable"]), + (";", vec!["punctuation.delimiter"]) ], vec![ (" ", vec![]), - ("console", vec![Highlight::VariableBuiltin]), - (".", vec![Highlight::PunctuationDelimiter]), - ("log", vec![Highlight::Function]), - ("(", vec![Highlight::PunctuationBracket]), + ("console", vec!["variable.builtin"]), + (".", vec!["punctuation.delimiter"]), + ("log", vec!["function"]), + ("(", vec!["punctuation.bracket"]), // Not a builtin, because `module` was defined as a variable above. - ("module", vec![Highlight::Variable]), - (",", vec![Highlight::PunctuationDelimiter]), + ("module", vec!["variable"]), + (",", vec!["punctuation.delimiter"]), (" ", vec![]), // A parameter, because `b` was defined as a parameter above. - ("b", vec![Highlight::VariableParameter]), - (")", vec![Highlight::PunctuationBracket]), - (";", vec![Highlight::PunctuationDelimiter]), + ("b", vec!["variable.parameter"]), + (")", vec!["punctuation.bracket"]), + (";", vec!["punctuation.delimiter"]), ], - vec![("}", vec![Highlight::PunctuationBracket])] + vec![("}", vec!["punctuation.bracket"])] ], ); } @@ -234,17 +304,17 @@ fn test_highlighting_empty_lines() { .join("\n"); assert_eq!( - &to_html(&source, get_language("javascript"), &JS_SHEET,).unwrap(), + &to_html(&source, &JS_HIGHLIGHT,).unwrap(), &[ - "class A {\n".to_string(), + "class A {\n".to_string(), "\n".to_string(), - " b(c) {\n".to_string(), + " b(c) {\n".to_string(), "\n".to_string(), - " d(e)\n".to_string(), + " d(e)\n".to_string(), "\n".to_string(), - " }\n".to_string(), + " }\n".to_string(), "\n".to_string(), - "}\n".to_string(), + "}\n".to_string(), ] ); } @@ -254,20 +324,20 @@ fn test_highlighting_ejs() { let source = vec!["
<% foo() %>
"].join("\n"); assert_eq!( - &to_token_vector(&source, get_language("embedded-template"), &EJS_SHEET).unwrap(), + &to_token_vector(&source, &EJS_HIGHLIGHT).unwrap(), &[[ ("<", vec![]), - ("div", vec![Highlight::Tag]), + ("div", vec!["tag"]), (">", vec![]), - ("<%", vec![Highlight::Keyword]), + ("<%", vec!["keyword"]), (" ", vec![]), - ("foo", vec![Highlight::Function]), - ("(", vec![Highlight::PunctuationBracket]), - (")", vec![Highlight::PunctuationBracket]), + ("foo", vec!["function"]), + ("(", vec!["punctuation.bracket"]), + (")", vec!["punctuation.bracket"]), (" ", vec![]), - ("%>", vec![Highlight::Keyword]), + ("%>", vec!["keyword"]), ("", vec![]) ]], ); @@ -278,33 +348,33 @@ fn test_highlighting_with_content_children_included() { let source = vec!["assert!(", " a.b.c() < D::e::()", ");"].join("\n"); assert_eq!( - &to_token_vector(&source, get_language("rust"), &RUST_SHEET).unwrap(), + &to_token_vector(&source, &RUST_HIGHLIGHT).unwrap(), &[ vec![ - ("assert", vec![Highlight::Function]), - ("!", vec![Highlight::Function]), - ("(", vec![Highlight::PunctuationBracket]), + ("assert", vec!["function"]), + ("!", vec!["function"]), + ("(", vec!["punctuation.bracket"]), ], vec![ (" a", vec![]), - (".", vec![Highlight::PunctuationDelimiter]), - ("b", vec![Highlight::Property]), - (".", vec![Highlight::PunctuationDelimiter]), - ("c", vec![Highlight::Function]), - ("(", vec![Highlight::PunctuationBracket]), - (")", vec![Highlight::PunctuationBracket]), + (".", vec!["punctuation.delimiter"]), + ("b", vec!["property"]), + (".", vec!["punctuation.delimiter"]), + ("c", vec!["function"]), + ("(", vec!["punctuation.bracket"]), + (")", vec!["punctuation.bracket"]), (" < ", vec![]), - ("D", vec![Highlight::Type]), - ("::", vec![Highlight::PunctuationDelimiter]), - ("e", vec![Highlight::Function]), - ("::", vec![Highlight::PunctuationDelimiter]), - ("<", vec![Highlight::PunctuationBracket]), - ("F", vec![Highlight::Type]), - (">", vec![Highlight::PunctuationBracket]), - ("(", vec![Highlight::PunctuationBracket]), - (")", vec![Highlight::PunctuationBracket]), + ("D", vec!["type"]), + ("::", vec!["punctuation.delimiter"]), + ("e", vec!["function"]), + ("::", vec!["punctuation.delimiter"]), + ("<", vec!["punctuation.bracket"]), + ("F", vec!["type"]), + (">", vec!["punctuation.bracket"]), + ("(", vec!["punctuation.bracket"]), + (")", vec!["punctuation.bracket"]), ], - vec![(")", vec![Highlight::PunctuationBracket]), (";", vec![]),] + vec![(")", vec!["punctuation.bracket"]), (";", vec![]),] ], ); } @@ -327,18 +397,20 @@ fn test_highlighting_cancellation() { // Constructing the highlighter, which eagerly parses the outer document, // should not fail. - let highlighter = highlight( - source.as_bytes(), - get_language("html"), - &HTML_SHEET, - Some(&cancellation_flag), - injection_callback, - ) - .unwrap(); + let mut context = HighlightContext::new(); + let events = HIGHLIGHTER + .highlight( + &mut context, + &HTML_HIGHLIGHT, + source.as_bytes(), + Some(&cancellation_flag), + injection_callback, + ) + .unwrap(); // Iterating the scopes should not panic. It should return an error // once the cancellation is detected. - for event in highlighter { + for event in events { if let Err(e) = event { assert_eq!(e, Error::Cancelled); return; @@ -349,49 +421,68 @@ fn test_highlighting_cancellation() { #[test] fn test_highlighting_via_c_api() { - let js_lang = get_language("javascript"); - let html_lang = get_language("html"); - let js_sheet = get_property_sheet_json("javascript", "highlights.json"); - let js_sheet = c_string(&js_sheet); - let html_sheet = get_property_sheet_json("html", "highlights.json"); - let html_sheet = c_string(&html_sheet); + let highlights = vec![ + "class=tag\0", + "class=function\0", + "class=string\0", + "class=keyword\0", + ]; + let highlight_names = highlights + .iter() + .map(|h| h["class=".len()..].as_ptr() as *const i8) + .collect::>(); + let highlight_attrs = highlights + .iter() + .map(|h| h.as_bytes().as_ptr() as *const i8) + .collect::>(); + let highlighter = c::ts_highlighter_new( + &highlight_names[0] as *const *const i8, + &highlight_attrs[0] as *const *const i8, + highlights.len() as u32, + ); - let class_tag = c_string("class=tag"); - let class_function = c_string("class=function"); - let class_string = c_string("class=string"); - let class_keyword = c_string("class=keyword"); - - let js_scope_name = c_string("source.js"); - let html_scope_name = c_string("text.html.basic"); - let injection_regex = c_string("^(javascript|js)$"); let source_code = c_string(""); - let attribute_strings = &mut [ptr::null(); Highlight::Unknown as usize + 1]; - attribute_strings[Highlight::Tag as usize] = class_tag.as_ptr(); - attribute_strings[Highlight::String as usize] = class_string.as_ptr(); - attribute_strings[Highlight::Keyword as usize] = class_keyword.as_ptr(); - attribute_strings[Highlight::Function as usize] = class_function.as_ptr(); + let js_scope = c_string("source.js"); + let js_injection_regex = c_string("^javascript"); + let language = get_language("javascript"); + let (highlights_query, injections_query, locals_query) = + get_highlight_query_sources("javascript"); + c::ts_highlighter_add_language( + highlighter, + js_scope.as_ptr(), + js_injection_regex.as_ptr(), + language, + highlights_query.as_ptr() as *const i8, + injections_query.as_ptr() as *const i8, + locals_query.as_ptr() as *const i8, + highlights_query.len() as u32, + injections_query.len() as u32, + locals_query.len() as u32, + ); + + let html_scope = c_string("text.html.basic"); + let html_injection_regex = c_string("^html"); + let language = get_language("html"); + let (highlights_query, injections_query, locals_query) = get_highlight_query_sources("html"); + c::ts_highlighter_add_language( + highlighter, + html_scope.as_ptr(), + html_injection_regex.as_ptr(), + language, + highlights_query.as_ptr() as *const i8, + injections_query.as_ptr() as *const i8, + locals_query.as_ptr() as *const i8, + highlights_query.len() as u32, + injections_query.len() as u32, + locals_query.len() as u32, + ); - let highlighter = c::ts_highlighter_new(attribute_strings.as_ptr()); let buffer = c::ts_highlight_buffer_new(); - c::ts_highlighter_add_language( - highlighter, - html_scope_name.as_ptr(), - html_lang, - html_sheet.as_ptr(), - ptr::null_mut(), - ); - c::ts_highlighter_add_language( - highlighter, - js_scope_name.as_ptr(), - js_lang, - js_sheet.as_ptr(), - injection_regex.as_ptr(), - ); c::ts_highlighter_highlight( highlighter, - html_scope_name.as_ptr(), + html_scope.as_ptr(), source_code.as_ptr(), source_code.as_bytes().len() as u32, buffer, @@ -452,50 +543,57 @@ fn c_string(s: &str) -> CString { CString::new(s.as_bytes().to_vec()).unwrap() } -fn test_language_for_injection_string<'a>( - string: &str, -) -> Option<(Language, &'a PropertySheet)> { +fn test_language_for_injection_string<'a>(string: &str) -> Option<&'a HighlightConfiguration> { match string { - "javascript" => Some((get_language("javascript"), &JS_SHEET)), - "html" => Some((get_language("html"), &HTML_SHEET)), - "rust" => Some((get_language("rust"), &RUST_SHEET)), + "javascript" => Some(&JS_HIGHLIGHT), + "html" => Some(&HTML_HIGHLIGHT), + "rust" => Some(&RUST_HIGHLIGHT), _ => None, } } fn to_html<'a>( src: &'a str, - language: Language, - property_sheet: &'a PropertySheet, + language_config: &'a HighlightConfiguration, ) -> Result, Error> { - highlight_html( - src.as_bytes(), - language, - property_sheet, + let src = src.as_bytes(); + let mut renderer = HtmlRenderer::new(); + let mut context = HighlightContext::new(); + let events = HIGHLIGHTER.highlight( + &mut context, + language_config, + src, None, &test_language_for_injection_string, - &|highlight| SCOPE_CLASS_STRINGS[highlight as usize].as_str(), - ) + )?; + + renderer + .render(events, src, &|highlight| HTML_ATTRS[highlight.0].as_bytes()) + .unwrap(); + Ok(renderer.lines().map(|s| s.to_string()).collect()) } fn to_token_vector<'a>( src: &'a str, - language: Language, - property_sheet: &'a PropertySheet, -) -> Result)>>, Error> { + language_config: &'a HighlightConfiguration, +) -> Result)>>, Error> { let src = src.as_bytes(); + let mut context = HighlightContext::new(); let mut lines = Vec::new(); let mut highlights = Vec::new(); let mut line = Vec::new(); - for event in highlight( + let events = HIGHLIGHTER.highlight( + &mut context, + language_config, src, - language, - property_sheet, None, &test_language_for_injection_string, - )? { + )?; + for event in events { match event? { - HighlightEvent::HighlightStart(s) => highlights.push(s), + HighlightEvent::HighlightStart(s) => { + highlights.push(HIGHLIGHTER.highlight_names[s.0].as_str()) + } HighlightEvent::HighlightEnd => { highlights.pop(); } diff --git a/highlight/include/tree_sitter/highlight.h b/highlight/include/tree_sitter/highlight.h index 8e879b5e..fb44f7a0 100644 --- a/highlight/include/tree_sitter/highlight.h +++ b/highlight/include/tree_sitter/highlight.h @@ -14,47 +14,15 @@ typedef enum { TSHighlightInvalidLanguage, } TSHighlightError; -// The list of scopes which can be styled for syntax highlighting. -// When constructing a `TSHighlighter`, you need to construct an -// `attribute_strings` array whose elements correspond to these values. -enum TSHighlightValue { - TSHighlightValueAttribute, - TSHighlightValueComment, - TSHighlightValueConstant, - TSHighlightValueConstantBuiltin, - TSHighlightValueConstructor, - TSHighlightValueConstructorBuiltin, - TSHighlightValueEmbedded, - TSHighlightValueEscape, - TSHighlightValueFunction, - TSHighlightValueFunctionBuiltin, - TSHighlightValueKeyword, - TSHighlightValueNumber, - TSHighlightValueOperator, - TSHighlightValueProperty, - TSHighlightValuePropertyBuiltin, - TSHighlightValuePunctuation, - TSHighlightValuePunctuationBracket, - TSHighlightValuePunctuationDelimiter, - TSHighlightValuePunctuationSpecial, - TSHighlightValueString, - TSHighlightValueStringSpecial, - TSHighlightValueTag, - TSHighlightValueType, - TSHighlightValueTypeBuiltin, - TSHighlightValueVariable, - TSHighlightValueVariableBuiltin, - TSHighlightValueVariableParameter, - TSHighlightValueUnknown, -}; - typedef struct TSHighlighter TSHighlighter; typedef struct TSHighlightBuffer TSHighlightBuffer; // Construct a `TSHighlighter` by providing a list of strings containing // the HTML attributes that should be applied for each highlight value. TSHighlighter *ts_highlighter_new( - const char **attribute_strings + const char **highlight_names, + const char **attribute_strings, + uint32_t highlight_count ); // Delete a syntax highlighter. @@ -70,9 +38,14 @@ void ts_highlighter_delete(TSHighlighter *); int ts_highlighter_add_language( TSHighlighter *self, const char *scope_name, + const char *injection_regex, const TSLanguage *language, - const char *property_sheet_json, - const char *injection_regex + const char *highlight_query, + const char *injection_query, + const char *locals_query, + uint32_t highlight_query_len, + uint32_t injection_query_len, + uint32_t locals_query_len ); // Compute syntax highlighting for a given document. You must first diff --git a/highlight/src/c_lib.rs b/highlight/src/c_lib.rs index 063ab990..57eed04c 100644 --- a/highlight/src/c_lib.rs +++ b/highlight/src/c_lib.rs @@ -1,25 +1,23 @@ -use super::{load_property_sheet, Error, Highlight, Highlighter, HtmlRenderer, Properties}; +use super::{Error, HighlightConfiguration, HighlightContext, Highlighter, HtmlRenderer}; use regex::Regex; use std::collections::HashMap; use std::ffi::CStr; use std::os::raw::c_char; use std::process::abort; use std::sync::atomic::AtomicUsize; -use std::{fmt, slice}; -use tree_sitter::{Language, PropertySheet}; - -struct LanguageConfiguration { - language: Language, - property_sheet: PropertySheet, - injection_regex: Option, -} +use std::{fmt, slice, str}; +use tree_sitter::Language; pub struct TSHighlighter { - languages: HashMap, + languages: HashMap, HighlightConfiguration)>, attribute_strings: Vec<&'static [u8]>, + highlighter: Highlighter, } -pub struct TSHighlightBuffer(HtmlRenderer); +pub struct TSHighlightBuffer { + context: HighlightContext, + renderer: HtmlRenderer, +} #[repr(C)] pub enum ErrorCode { @@ -27,33 +25,113 @@ pub enum ErrorCode { UnknownScope, Timeout, InvalidLanguage, + InvalidUtf8, + InvalidRegex, + InvalidQuery, } #[no_mangle] pub extern "C" fn ts_highlighter_new( + highlight_names: *const *const c_char, attribute_strings: *const *const c_char, + highlight_count: u32, ) -> *mut TSHighlighter { + let highlight_names = + unsafe { slice::from_raw_parts(highlight_names, highlight_count as usize) }; let attribute_strings = - unsafe { slice::from_raw_parts(attribute_strings, Highlight::Unknown as usize + 1) }; + unsafe { slice::from_raw_parts(attribute_strings, highlight_count as usize) }; + let highlight_names = highlight_names + .into_iter() + .map(|s| unsafe { CStr::from_ptr(*s).to_string_lossy().to_string() }) + .collect(); let attribute_strings = attribute_strings .into_iter() - .map(|s| { - if s.is_null() { - &[] - } else { - unsafe { CStr::from_ptr(*s).to_bytes() } - } - }) + .map(|s| unsafe { CStr::from_ptr(*s).to_bytes() }) .collect(); + let highlighter = Highlighter::new(highlight_names); Box::into_raw(Box::new(TSHighlighter { languages: HashMap::new(), attribute_strings, + highlighter, })) } +#[no_mangle] +pub extern "C" fn ts_highlighter_add_language( + this: *mut TSHighlighter, + scope_name: *const c_char, + injection_regex: *const c_char, + language: Language, + highlight_query: *const c_char, + injection_query: *const c_char, + locals_query: *const c_char, + highlight_query_len: u32, + injection_query_len: u32, + locals_query_len: u32, +) -> ErrorCode { + let f = move || { + let this = unwrap_mut_ptr(this); + let scope_name = unsafe { CStr::from_ptr(scope_name) }; + let scope_name = scope_name + .to_str() + .or(Err(ErrorCode::InvalidUtf8))? + .to_string(); + let injection_regex = if injection_regex.is_null() { + None + } else { + let pattern = unsafe { CStr::from_ptr(injection_regex) }; + let pattern = pattern.to_str().or(Err(ErrorCode::InvalidUtf8))?; + Some(Regex::new(pattern).or(Err(ErrorCode::InvalidRegex))?) + }; + + let highlight_query = unsafe { + slice::from_raw_parts(highlight_query as *const u8, highlight_query_len as usize) + }; + let highlight_query = str::from_utf8(highlight_query).or(Err(ErrorCode::InvalidUtf8))?; + + let injection_query = if injection_query_len > 0 { + let query = unsafe { + slice::from_raw_parts(injection_query as *const u8, injection_query_len as usize) + }; + str::from_utf8(query).or(Err(ErrorCode::InvalidUtf8))? + } else { + "" + }; + + let locals_query = if locals_query_len > 0 { + let query = unsafe { + slice::from_raw_parts(locals_query as *const u8, locals_query_len as usize) + }; + str::from_utf8(query).or(Err(ErrorCode::InvalidUtf8))? + } else { + "" + }; + + this.languages.insert( + scope_name, + ( + injection_regex, + this.highlighter + .load_configuration(language, highlight_query, injection_query, locals_query) + .or(Err(ErrorCode::InvalidQuery))?, + ), + ); + + Ok(()) + }; + + match f() { + Ok(()) => ErrorCode::Ok, + Err(e) => e, + } +} + #[no_mangle] pub extern "C" fn ts_highlight_buffer_new() -> *mut TSHighlightBuffer { - Box::into_raw(Box::new(TSHighlightBuffer(HtmlRenderer::new()))) + Box::into_raw(Box::new(TSHighlightBuffer { + context: HighlightContext::new(), + renderer: HtmlRenderer::new(), + })) } #[no_mangle] @@ -69,59 +147,25 @@ pub extern "C" fn ts_highlight_buffer_delete(this: *mut TSHighlightBuffer) { #[no_mangle] pub extern "C" fn ts_highlight_buffer_content(this: *const TSHighlightBuffer) -> *const u8 { let this = unwrap_ptr(this); - this.0.html.as_slice().as_ptr() + this.renderer.html.as_slice().as_ptr() } #[no_mangle] pub extern "C" fn ts_highlight_buffer_line_offsets(this: *const TSHighlightBuffer) -> *const u32 { let this = unwrap_ptr(this); - this.0.line_offsets.as_slice().as_ptr() + this.renderer.line_offsets.as_slice().as_ptr() } #[no_mangle] pub extern "C" fn ts_highlight_buffer_len(this: *const TSHighlightBuffer) -> u32 { let this = unwrap_ptr(this); - this.0.html.len() as u32 + this.renderer.html.len() as u32 } #[no_mangle] pub extern "C" fn ts_highlight_buffer_line_count(this: *const TSHighlightBuffer) -> u32 { let this = unwrap_ptr(this); - this.0.line_offsets.len() as u32 -} - -#[no_mangle] -pub extern "C" fn ts_highlighter_add_language( - this: *mut TSHighlighter, - scope_name: *const c_char, - language: Language, - property_sheet_json: *const c_char, - injection_regex: *const c_char, -) -> ErrorCode { - let this = unwrap_mut_ptr(this); - let scope_name = unsafe { CStr::from_ptr(scope_name) }; - let scope_name = unwrap(scope_name.to_str()).to_string(); - let property_sheet_json = unsafe { CStr::from_ptr(property_sheet_json) }; - let property_sheet_json = unwrap(property_sheet_json.to_str()); - - let property_sheet = unwrap(load_property_sheet(language, property_sheet_json)); - let injection_regex = if injection_regex.is_null() { - None - } else { - let pattern = unsafe { CStr::from_ptr(injection_regex) }; - Some(unwrap(Regex::new(unwrap(pattern.to_str())))) - }; - - this.languages.insert( - scope_name, - LanguageConfiguration { - language, - property_sheet, - injection_regex, - }, - ); - - ErrorCode::Ok + this.renderer.line_offsets.len() as u32 } #[no_mangle] @@ -150,36 +194,36 @@ impl TSHighlighter { output: &mut TSHighlightBuffer, cancellation_flag: Option<&AtomicUsize>, ) -> ErrorCode { - let configuration = self.languages.get(scope_name); - if configuration.is_none() { + let entry = self.languages.get(scope_name); + if entry.is_none() { return ErrorCode::UnknownScope; } - let configuration = configuration.unwrap(); + let (_, configuration) = entry.unwrap(); let languages = &self.languages; - let highlighter = Highlighter::new( + let highlights = self.highlighter.highlight( + &mut output.context, + configuration, source_code, - configuration.language, - &configuration.property_sheet, - |injection_string| { - languages.values().find_map(|conf| { - conf.injection_regex.as_ref().and_then(|regex| { + cancellation_flag, + move |injection_string| { + languages.values().find_map(|(injection_regex, config)| { + injection_regex.as_ref().and_then(|regex| { if regex.is_match(injection_string) { - Some((conf.language, &conf.property_sheet)) + Some(config) } else { None } }) }) }, - cancellation_flag, ); - if let Ok(highlighter) = highlighter { - output.0.reset(); - let result = output.0.render(highlighter, source_code, &|s| { - self.attribute_strings[s as usize] - }); + if let Ok(highlights) = highlights { + output.renderer.reset(); + let result = output + .renderer + .render(highlights, source_code, &|s| self.attribute_strings[s.0]); match result { Err(Error::Cancelled) => { return ErrorCode::Timeout; diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs index 477a640d..a362dab0 100644 --- a/highlight/src/lib.rs +++ b/highlight/src/lib.rs @@ -1,16 +1,18 @@ pub mod c_lib; pub mod util; - pub use c_lib as c; -use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use serde_derive::*; -use std::mem::transmute; + use std::sync::atomic::{AtomicUsize, Ordering}; -use std::{cmp, fmt, str, usize}; -use tree_sitter::{Language, Node, Parser, Point, PropertySheet, Range, Tree, TreePropertyCursor}; +use std::{iter, mem, ops, str, usize}; +use tree_sitter::{ + Language, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError, Range, Tree, +}; const CANCELLATION_CHECK_INTERVAL: usize = 100; +#[derive(Copy, Clone, Debug)] +pub struct Highlight(pub usize); + #[derive(Debug, PartialEq, Eq)] pub enum Error { Cancelled, @@ -19,104 +21,10 @@ pub enum Error { } #[derive(Debug)] -enum TreeStep { - Child { - index: isize, - kinds: Option>, - }, - Children { - kinds: Option>, - }, - Next { - kinds: Option>, - }, -} - -#[derive(Debug)] -enum InjectionLanguage { - Literal(String), - TreePath(Vec), -} - -#[derive(Debug)] -struct Injection { - language: InjectionLanguage, - content: Vec, - includes_children: bool, -} - -#[derive(Debug)] -pub struct Properties { - highlight: Option, - highlight_nonlocal: Option, - injections: Vec, - local_scope: Option, - local_definition: bool, - local_reference: bool, -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[repr(u16)] -pub enum Highlight { - Attribute, - Comment, - Constant, - ConstantBuiltin, - Constructor, - ConstructorBuiltin, - Embedded, - Escape, - Function, - FunctionBuiltin, - Keyword, - Number, - Operator, - Property, - PropertyBuiltin, - Punctuation, - PunctuationBracket, - PunctuationDelimiter, - PunctuationSpecial, - String, - StringSpecial, - Tag, - Type, - TypeBuiltin, - Variable, - VariableBuiltin, - VariableParameter, - Unknown, -} - -#[derive(Debug)] -struct Scope<'a> { +struct LocalScope<'a> { inherits: bool, - local_defs: Vec<(&'a str, Highlight)>, -} - -struct Layer<'a> { - _tree: Tree, - cursor: TreePropertyCursor<'a, Properties>, - ranges: Vec, - at_node_end: bool, - depth: usize, - opaque: bool, - scope_stack: Vec>, - local_highlight: Option, -} - -struct Highlighter<'a, T> -where - T: Fn(&str) -> Option<(Language, &'a PropertySheet)>, -{ - injection_callback: T, - source: &'a [u8], - source_offset: usize, - parser: Parser, - layers: Vec>, - max_opaque_layer_depth: usize, - operation_count: usize, - cancellation_flag: Option<&'a AtomicUsize>, + range: ops::Range, + local_defs: Vec<(&'a str, Option)>, } #[derive(Copy, Clone, Debug)] @@ -126,439 +34,256 @@ pub enum HighlightEvent { HighlightEnd, } -#[derive(Debug, Deserialize)] -#[serde(untagged)] -enum TreePathArgJSON { - TreePath(TreePathJSON), - Number(isize), - String(String), +pub struct HighlightConfiguration { + pub language: Language, + pub query: Query, + locals_pattern_index: usize, + highlights_pattern_index: usize, + highlight_indices: Vec>, + non_local_variable_patterns: Vec, + injection_site_capture_index: Option, + injection_content_capture_index: Option, + injection_language_capture_index: Option, + local_scope_capture_index: Option, + local_def_capture_index: Option, + local_ref_capture_index: Option, } -#[derive(Debug, Deserialize)] -#[serde(tag = "name")] -enum TreePathJSON { - #[serde(rename = "this")] - This, - #[serde(rename = "child")] - Child { args: Vec }, - #[serde(rename = "next")] - Next { args: Vec }, - #[serde(rename = "children")] - Children { args: Vec }, +#[derive(Clone, Debug)] +pub struct Highlighter { + pub highlight_names: Vec, } -#[derive(Debug, Deserialize)] -#[serde(untagged)] -enum InjectionLanguageJSON { - List(Vec), - TreePath(TreePathJSON), - Literal(String), +pub struct HighlightContext { + parser: Parser, + cursors: Vec, } -#[derive(Debug, Deserialize)] -#[serde(untagged)] -enum InjectionContentJSON { - List(Vec), - TreePath(TreePathJSON), -} - -#[derive(Debug, Deserialize)] -#[serde(untagged)] -enum InjectionIncludesChildrenJSON { - List(Vec), - Single(bool), -} - -#[derive(Debug, Deserialize)] -struct PropertiesJSON { - highlight: Option, - #[serde(rename = "highlight-nonlocal")] - highlight_nonlocal: Option, - - #[serde(rename = "injection-language")] - injection_language: Option, - #[serde(rename = "injection-content")] - injection_content: Option, - #[serde(default, rename = "injection-includes-children")] - injection_includes_children: Option, - - #[serde(default, rename = "local-scope")] - local_scope: bool, - #[serde(default, rename = "local-scope-inherit")] - local_scope_inherit: bool, - #[serde(default, rename = "local-definition")] - local_definition: bool, - #[serde(default, rename = "local-reference")] - local_reference: bool, -} - -#[derive(Debug)] -pub enum PropertySheetError { - InvalidJSON(serde_json::Error), - InvalidRegex(regex::Error), - InvalidFormat(String), -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - Error::Cancelled => write!(f, "Cancelled"), - Error::InvalidLanguage => write!(f, "Invalid language"), - Error::Unknown => write!(f, "Unknown error"), - } - } -} - -impl fmt::Display for PropertySheetError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - PropertySheetError::InvalidJSON(e) => e.fmt(f), - PropertySheetError::InvalidRegex(e) => e.fmt(f), - PropertySheetError::InvalidFormat(e) => e.fmt(f), - } - } -} - -impl<'a> fmt::Debug for Layer<'a> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "Layer {{ at_node_end: {}, node: {:?} }}", - self.at_node_end, - self.cursor.node() - )?; - Ok(()) - } -} - -pub fn load_property_sheet( - language: Language, - json: &str, -) -> Result, PropertySheetError> { - let sheet = PropertySheet::new(language, json).map_err(|e| match e { - tree_sitter::PropertySheetError::InvalidJSON(e) => PropertySheetError::InvalidJSON(e), - tree_sitter::PropertySheetError::InvalidRegex(e) => PropertySheetError::InvalidRegex(e), - })?; - let sheet = sheet - .map(|p| Properties::new(p, language)) - .map_err(PropertySheetError::InvalidFormat)?; - Ok(sheet) -} - -impl Highlight { - pub fn from_usize(i: usize) -> Option { - if i <= (Highlight::Unknown as usize) { - Some(unsafe { transmute(i as u16) }) - } else { - None - } - } -} - -impl Properties { - fn new(json: PropertiesJSON, language: Language) -> Result { - let injections = match (json.injection_language, json.injection_content) { - (None, None) => Ok(Vec::new()), - (Some(_), None) => Err( - "Must specify an injection-content along with an injection-language".to_string(), - ), - (None, Some(_)) => Err( - "Must specify an injection-language along with an injection-content".to_string(), - ), - (Some(language_json), Some(content_json)) => { - let languages = match language_json { - InjectionLanguageJSON::List(list) => { - let mut result = Vec::with_capacity(list.len()); - for element in list { - result.push(match element { - InjectionLanguageJSON::TreePath(p) => { - let mut result = Vec::new(); - Self::flatten_tree_path(p, &mut result, language)?; - InjectionLanguage::TreePath(result) - } - InjectionLanguageJSON::Literal(s) => InjectionLanguage::Literal(s), - InjectionLanguageJSON::List(_) => { - panic!("Injection-language cannot be a list of lists") - } - }) - } - result - } - InjectionLanguageJSON::TreePath(p) => vec![{ - let mut result = Vec::new(); - Self::flatten_tree_path(p, &mut result, language)?; - InjectionLanguage::TreePath(result) - }], - InjectionLanguageJSON::Literal(s) => vec![InjectionLanguage::Literal(s)], - }; - - let contents = match content_json { - InjectionContentJSON::List(l) => { - let mut result = Vec::with_capacity(l.len()); - for element in l { - result.push(match element { - InjectionContentJSON::TreePath(p) => { - let mut result = Vec::new(); - Self::flatten_tree_path(p, &mut result, language)?; - result - } - InjectionContentJSON::List(_) => { - panic!("Injection-content cannot be a list of lists") - } - }) - } - result - } - InjectionContentJSON::TreePath(p) => vec![{ - let mut result = Vec::new(); - Self::flatten_tree_path(p, &mut result, language)?; - result - }], - }; - - let mut includes_children = match json.injection_includes_children { - Some(InjectionIncludesChildrenJSON::List(v)) => v, - Some(InjectionIncludesChildrenJSON::Single(v)) => vec![v], - None => vec![false], - }; - - if languages.len() == contents.len() { - includes_children.resize(languages.len(), includes_children[0]); - Ok(languages - .into_iter() - .zip(contents.into_iter()) - .zip(includes_children.into_iter()) - .map(|((language, content), includes_children)| Injection { - language, - content, - includes_children, - }) - .collect()) - } else { - Err(format!( - "Mismatch: got {} injection-language values but {} injection-content values", - languages.len(), - contents.len(), - )) - } - } - }?; - - Ok(Self { - highlight: json.highlight, - highlight_nonlocal: json.highlight_nonlocal, - local_scope: if json.local_scope { - Some(json.local_scope_inherit) - } else { - None - }, - local_definition: json.local_definition, - local_reference: json.local_reference, - injections, - }) - } - - // Transform a tree path from the format expressed directly in the property sheet - // (nested function calls), to a flat sequence of steps for transforming a list of - // nodes. This way, we can evaluate these tree paths with no recursion and a single - // vector of intermediate storage. - fn flatten_tree_path( - p: TreePathJSON, - steps: &mut Vec, - language: Language, - ) -> Result<(), String> { - match p { - TreePathJSON::This => {} - TreePathJSON::Child { args } => { - let (tree_path, index, kinds) = Self::parse_args("child", args, language)?; - Self::flatten_tree_path(tree_path, steps, language)?; - steps.push(TreeStep::Child { - index: index - .ok_or_else(|| "The `child` function requires an index".to_string())?, - kinds: kinds, - }); - } - TreePathJSON::Children { args } => { - let (tree_path, _, kinds) = Self::parse_args("children", args, language)?; - Self::flatten_tree_path(tree_path, steps, language)?; - steps.push(TreeStep::Children { kinds }); - } - TreePathJSON::Next { args } => { - let (tree_path, _, kinds) = Self::parse_args("next", args, language)?; - Self::flatten_tree_path(tree_path, steps, language)?; - steps.push(TreeStep::Next { kinds }); - } - } - Ok(()) - } - - fn parse_args( - name: &str, - args: Vec, - language: Language, - ) -> Result<(TreePathJSON, Option, Option>), String> { - let tree_path; - let mut index = None; - let mut kinds = Vec::new(); - let mut iter = args.into_iter(); - - match iter.next() { - Some(TreePathArgJSON::TreePath(p)) => tree_path = p, - _ => { - return Err(format!( - "First argument to `{}()` must be a tree path", - name - )); - } - } - - for arg in iter { - match arg { - TreePathArgJSON::TreePath(_) => { - return Err(format!( - "Other arguments to `{}()` must be strings or numbers", - name - )); - } - TreePathArgJSON::Number(i) => index = Some(i), - TreePathArgJSON::String(s) => kinds.push(s), - } - } - - if kinds.len() > 0 { - let mut kind_ids = Vec::new(); - for i in 0..(language.node_kind_count() as u16) { - if kinds.iter().any(|s| s == language.node_kind_for_id(i)) - && language.node_kind_is_named(i) - { - kind_ids.push(i); - } - } - if kind_ids.len() == 0 { - return Err(format!("Non-existent node kinds: {:?}", kinds)); - } - - Ok((tree_path, index, Some(kind_ids))) - } else { - Ok((tree_path, index, None)) - } - } -} - -impl<'a, F> Highlighter<'a, F> +struct HighlightIter<'a, F> where - F: Fn(&str) -> Option<(Language, &'a PropertySheet)>, + F: Fn(&str) -> Option<&'a HighlightConfiguration> + 'a, { - fn new( - source: &'a [u8], + source: &'a [u8], + byte_offset: usize, + context: &'a mut HighlightContext, + injection_callback: F, + cancellation_flag: Option<&'a AtomicUsize>, + layers: Vec>, + iter_count: usize, + next_event: Option, +} + +struct HighlightIterLayer<'a> { + _tree: Tree, + cursor: QueryCursor, + captures: iter::Peekable>, + config: &'a HighlightConfiguration, + highlight_end_stack: Vec, + scope_stack: Vec>, + ranges: Vec, +} + +impl HighlightContext { + pub fn new() -> Self { + HighlightContext { + parser: Parser::new(), + cursors: Vec::new(), + } + } +} + +impl Highlighter { + pub fn new(highlight_names: Vec) -> Self { + Highlighter { highlight_names } + } + + pub fn load_configuration( + &self, language: Language, - property_sheet: &'a PropertySheet, - injection_callback: F, - cancellation_flag: Option<&'a AtomicUsize>, - ) -> Result { - let mut parser = Parser::new(); - unsafe { parser.set_cancellation_flag(cancellation_flag.clone()) }; - parser - .set_language(language) - .map_err(|_| Error::InvalidLanguage)?; - let tree = parser.parse(source, None).ok_or_else(|| Error::Cancelled)?; - Ok(Self { - parser, - source, - cancellation_flag, - injection_callback, - source_offset: 0, - operation_count: 0, - max_opaque_layer_depth: 0, - layers: vec![Layer::new( - source, - tree, - property_sheet, - vec![Range { - start_byte: 0, - end_byte: usize::MAX, - start_point: Point::new(0, 0), - end_point: Point::new(usize::MAX, usize::MAX), - }], - 0, - true, - )], + highlights_query: &str, + injection_query: &str, + locals_query: &str, + ) -> Result { + // Concatenate the query strings, keeping track of the start offset of each section. + let mut query_source = String::new(); + query_source.push_str(injection_query); + let locals_query_offset = query_source.len(); + query_source.push_str(locals_query); + let highlights_query_offset = query_source.len(); + query_source.push_str(highlights_query); + + // Construct a query with the concatenated string. + let query = Query::new(language, &query_source)?; + + // Determine the range of pattern indices that belong to each section of the query. + let mut locals_pattern_index = 0; + let mut highlights_pattern_index = 0; + for i in 0..(query.pattern_count()) { + let pattern_offset = query.start_byte_for_pattern(i); + if pattern_offset < highlights_query_offset { + if pattern_offset < highlights_query_offset { + highlights_pattern_index += 1; + } + if pattern_offset < locals_query_offset { + locals_pattern_index += 1; + } + } + } + + // Compute a mapping from the query's capture ids to the indices of the highlighter's + // recognized highlight names. + let highlight_indices = query + .capture_names() + .iter() + .map(move |capture_name| { + let mut best_index = None; + let mut best_name_len = 0; + let mut best_common_prefix_len = 0; + for (i, highlight_name) in self.highlight_names.iter().enumerate() { + if highlight_name.len() > capture_name.len() { + continue; + } + + let capture_parts = capture_name.split('.'); + let highlight_parts = highlight_name.split('.'); + let common_prefix_len = capture_parts + .zip(highlight_parts) + .take_while(|(a, b)| a == b) + .count(); + let is_best_match = common_prefix_len > best_common_prefix_len + || (common_prefix_len == best_common_prefix_len + && highlight_name.len() < best_name_len); + if is_best_match { + best_index = Some(i); + best_name_len = highlight_name.len(); + best_common_prefix_len = common_prefix_len; + } + } + best_index.map(Highlight) + }) + .collect(); + + let non_local_variable_patterns = (0..query.pattern_count()) + .map(|i| { + query + .property_predicates(i) + .iter() + .any(|(prop, positive)| !*positive && prop.key.as_ref() == "local") + }) + .collect(); + + let mut injection_content_capture_index = None; + let mut injection_language_capture_index = None; + let mut injection_site_capture_index = None; + let mut local_def_capture_index = None; + let mut local_ref_capture_index = None; + let mut local_scope_capture_index = None; + for (i, name) in query.capture_names().iter().enumerate() { + let i = Some(i as u32); + match name.as_str() { + "injection.content" => injection_content_capture_index = i, + "injection.language" => injection_language_capture_index = i, + "injection.site" => injection_site_capture_index = i, + "local.definition" => local_def_capture_index = i, + "local.reference" => local_ref_capture_index = i, + "local.scope" => local_scope_capture_index = i, + _ => {} + } + } + + Ok(HighlightConfiguration { + query, + language, + locals_pattern_index, + highlights_pattern_index, + highlight_indices, + non_local_variable_patterns, + injection_content_capture_index, + injection_language_capture_index, + injection_site_capture_index, + local_def_capture_index, + local_ref_capture_index, + local_scope_capture_index, }) } - fn emit_source(&mut self, next_offset: usize) -> HighlightEvent { - let result = HighlightEvent::Source { - start: self.source_offset, - end: next_offset, - }; - self.source_offset = next_offset; - result - } + pub fn highlight<'a>( + &'a self, + context: &'a mut HighlightContext, + config: &'a HighlightConfiguration, + source: &'a [u8], + cancellation_flag: Option<&'a AtomicUsize>, + injection_callback: impl Fn(&str) -> Option<&'a HighlightConfiguration> + 'a, + ) -> Result> + 'a, Error> { + let layer = HighlightIterLayer::new( + config, + source, + context, + cancellation_flag, + vec![Range { + start_byte: 0, + end_byte: usize::MAX, + start_point: Point::new(0, 0), + end_point: Point::new(usize::MAX, usize::MAX), + }], + )?; - fn process_tree_step(&self, step: &TreeStep, nodes: &mut Vec) { - let len = nodes.len(); - for i in 0..len { - let node = nodes[i]; - match step { - TreeStep::Child { index, kinds } => { - let index = if *index >= 0 { - *index as usize - } else { - (node.child_count() as isize + *index) as usize - }; - if let Some(child) = node.child(index) { - if let Some(kinds) = kinds { - if kinds.contains(&child.kind_id()) { - nodes.push(child); - } - } else { - nodes.push(child); - } - } - } - TreeStep::Children { kinds } => { - for child in node.children() { - if let Some(kinds) = kinds { - if kinds.contains(&child.kind_id()) { - nodes.push(child); - } - } else { - nodes.push(child); - } - } - } - TreeStep::Next { .. } => unimplemented!(), - } - } - nodes.drain(0..len); + Ok(HighlightIter { + source, + byte_offset: 0, + injection_callback, + cancellation_flag, + context, + iter_count: 0, + layers: vec![layer], + next_event: None, + }) } +} - fn nodes_for_tree_path(&self, node: Node<'a>, steps: &Vec) -> Vec> { - let mut nodes = vec![node]; - for step in steps.iter() { - self.process_tree_step(step, &mut nodes); - } - nodes - } +impl<'a> HighlightIterLayer<'a> { + fn new( + config: &'a HighlightConfiguration, + source: &'a [u8], + context: &mut HighlightContext, + cancellation_flag: Option<&'a AtomicUsize>, + ranges: Vec, + ) -> Result { + context + .parser + .set_language(config.language) + .map_err(|_| Error::InvalidLanguage)?; + unsafe { context.parser.set_cancellation_flag(cancellation_flag) }; - // An injected language name may either be specified as a fixed string, or based - // on the text of some node in the syntax tree. - fn injection_language_string( - &self, - node: &Node<'a>, - language: &InjectionLanguage, - ) -> Option { - match language { - InjectionLanguage::Literal(s) => Some(s.to_string()), - InjectionLanguage::TreePath(steps) => self - .nodes_for_tree_path(*node, steps) - .first() - .and_then(|node| { - str::from_utf8(&self.source[node.start_byte()..node.end_byte()]) - .map(|s| s.to_owned()) - .ok() - }), - } + context.parser.set_included_ranges(&ranges); + + let tree = context.parser.parse(source, None).ok_or(Error::Cancelled)?; + let mut cursor = context.cursors.pop().unwrap_or(QueryCursor::new()); + + // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which + // prevents them from being moved. But both of these values are really just + // pointers, so it's actually ok to move them. + let tree_ref = unsafe { mem::transmute::<_, &'static Tree>(&tree) }; + let cursor_ref = unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) }; + let captures = cursor_ref + .captures(&config.query, tree_ref.root_node(), move |n| { + &source[n.byte_range()] + }) + .peekable(); + + Ok(HighlightIterLayer { + highlight_end_stack: Vec::new(), + scope_stack: vec![LocalScope { + inherits: false, + range: 0..usize::MAX, + local_defs: Vec::new(), + }], + cursor, + _tree: tree, + captures, + config, + ranges, + }) } // Compute the ranges that should be included when parsing an injection. @@ -572,13 +297,9 @@ where // excluded from the nested document, so that only the content nodes' *own* content // is reparsed. For other injections, the content nodes' entire ranges should be // reparsed, including the ranges of their children. - fn intersect_ranges( - parent_ranges: &Vec, - nodes: &Vec, - includes_children: bool, - ) -> Vec { + fn intersect_ranges(&self, nodes: &Vec, includes_children: bool) -> Vec { let mut result = Vec::new(); - let mut parent_range_iter = parent_ranges.iter(); + let mut parent_range_iter = self.ranges.iter(); let mut parent_range = parent_range_iter .next() .expect("Layers should only be constructed with non-empty ranges vectors"); @@ -656,456 +377,354 @@ where result } - fn add_layer( - &mut self, - language_string: &str, - ranges: Vec, - depth: usize, - includes_children: bool, - ) -> Option { - if let Some((language, property_sheet)) = (self.injection_callback)(language_string) { - if self.parser.set_language(language).is_err() { - return Some(Error::InvalidLanguage); - } - self.parser.set_included_ranges(&ranges); - if let Some(tree) = self.parser.parse(self.source, None) { - let layer = Layer::new( - self.source, - tree, - property_sheet, - ranges, - depth, - includes_children, - ); - if includes_children && depth > self.max_opaque_layer_depth { - self.max_opaque_layer_depth = depth; - } - match self.layers.binary_search_by(|l| l.cmp(&layer)) { - Ok(i) | Err(i) => self.layers.insert(i, layer), - }; - } else { - return Some(Error::Cancelled); - } - } - None - } - - fn remove_first_layer(&mut self) { - let layer = self.layers.remove(0); - if layer.opaque && layer.depth == self.max_opaque_layer_depth { - self.max_opaque_layer_depth = self - .layers - .iter() - .filter_map(|l| if l.opaque { Some(l.depth) } else { None }) - .max() - .unwrap_or(0); + fn offset(&mut self) -> Option { + let next_start = self + .captures + .peek() + .map(|(m, i)| m.captures[*i].node.start_byte()); + let next_end = self.highlight_end_stack.last().cloned(); + match (next_start, next_end) { + (Some(i), Some(j)) => Some(usize::min(i, j)), + (Some(i), None) => Some(i), + (None, Some(j)) => Some(j), + _ => None, } } } -impl<'a, T> Iterator for Highlighter<'a, T> +impl<'a, F> HighlightIter<'a, F> where - T: Fn(&str) -> Option<(Language, &'a PropertySheet)>, + F: Fn(&str) -> Option<&'a HighlightConfiguration> + 'a, +{ + fn emit_event( + &mut self, + offset: usize, + event: Option, + ) -> Option> { + let result; + if self.byte_offset < offset { + result = Some(Ok(HighlightEvent::Source { + start: self.byte_offset, + end: offset, + })); + self.byte_offset = offset; + self.next_event = event; + } else { + result = event.map(Ok); + } + self.sort_layers(); + result + } + + fn sort_layers(&mut self) { + if let Some(offset) = self.layers[0].offset() { + let mut i = 0; + while i + 1 < self.layers.len() { + if let Some(next_offset) = self.layers[i + 1].offset() { + if next_offset < offset { + i += 1; + continue; + } + } + break; + } + if i > 0 { + &self.layers[0..(i + 1)].rotate_left(i); + } + } else { + let layer = self.layers.remove(0); + self.context.cursors.push(layer.cursor); + } + } +} + +impl<'a, F> Iterator for HighlightIter<'a, F> +where + F: Fn(&str) -> Option<&'a HighlightConfiguration> + 'a, { type Item = Result; fn next(&mut self) -> Option { - if let Some(cancellation_flag) = self.cancellation_flag { - self.operation_count += 1; - if self.operation_count >= CANCELLATION_CHECK_INTERVAL { - self.operation_count = 0; - if cancellation_flag.load(Ordering::Relaxed) != 0 { - return Some(Err(Error::Cancelled)); + loop { + // If we've already determined the next highlight boundary, just return it. + if let Some(e) = self.next_event.take() { + return Some(Ok(e)); + } + + // Periodically check for cancellation, returning `Cancelled` error if the + // cancellation flag was flipped. + if let Some(cancellation_flag) = self.cancellation_flag { + self.iter_count += 1; + if self.iter_count >= CANCELLATION_CHECK_INTERVAL { + self.iter_count = 0; + if cancellation_flag.load(Ordering::Relaxed) != 0 { + return Some(Err(Error::Cancelled)); + } } } - } - while !self.layers.is_empty() { - let mut scope_event = None; - let first_layer = &self.layers[0]; + // If none of the layers have any more scope boundaries, terminate. + if self.layers.is_empty() { + if self.byte_offset < self.source.len() { + let result = Some(Ok(HighlightEvent::Source { + start: self.byte_offset, + end: self.source.len(), + })); + self.byte_offset = self.source.len(); + return result; + } else { + return None; + } + } - // If the current layer is not covered up by a nested layer, then - // process any scope boundaries and language injections for the layer's - // current position. - let first_layer_is_visible = first_layer.depth >= self.max_opaque_layer_depth; - if first_layer_is_visible { - let local_highlight = first_layer.local_highlight; - let properties = &first_layer.cursor.node_properties(); + // Get the next capture. If there are no more captures, then emit the rest of the + // source code. + let match_; + let mut capture; + let mut pattern_index; + let layer = &mut self.layers[0]; + if let Some((m, capture_index)) = layer.captures.peek() { + match_ = m; + pattern_index = match_.pattern_index; + capture = match_.captures[*capture_index]; + } else if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { + layer.highlight_end_stack.pop(); + return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); + } else { + return self.emit_event(self.source.len(), None); + }; - // Add any injections for the current node. - if !first_layer.at_node_end { - let node = first_layer.cursor.node(); - let injections = properties - .injections + // If any previous highlight ends before this node starts, then before + // processing this capture, emit the source code up until the end of the + // previous highlight, and an end event for that highlight. + let range = capture.node.byte_range(); + if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { + if end_byte <= range.start { + layer.highlight_end_stack.pop(); + return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); + } + } + + // Remove from the scope stack any local scopes that have already ended. + while range.start > layer.scope_stack.last().unwrap().range.end { + layer.scope_stack.pop(); + } + + // If this capture represents an injection, then process the injection. + if pattern_index < layer.config.locals_pattern_index { + let site_capture_index = layer.config.injection_site_capture_index; + let content_capture_index = layer.config.injection_content_capture_index; + let language_capture_index = layer.config.injection_language_capture_index; + + // Find the language name and the node that represents the injection content. + let mut injection_site = None; + let mut injection_language = None; + let mut injection_contents = Vec::new(); + for capture in match_.captures { + let index = Some(capture.index); + if index == site_capture_index { + injection_site = Some(capture.node); + } else if index == language_capture_index { + injection_language = capture.node.utf8_text(self.source).ok(); + } else if index == content_capture_index { + injection_contents.push(capture.node); + } + } + + // In addition to specifying the language name via the text of a captured node, + // it can also be hard-coded via a `(set! injection.language )` + // predicate. + if injection_language.is_none() { + injection_language = layer + .config + .query + .property_settings(pattern_index) .iter() - .filter_map( - |Injection { - language, - content, - includes_children, - }| { - if let Some(language) = - self.injection_language_string(&node, language) - { - let nodes = self.nodes_for_tree_path(node, content); - let ranges = Self::intersect_ranges( - &first_layer.ranges, - &nodes, - *includes_children, - ); - if ranges.len() > 0 { - return Some((language, ranges, *includes_children)); - } - } + .find_map(|prop| { + if prop.key.as_ref() == "injection.language" { + prop.value.as_ref().map(|s| s.as_ref()) + } else { None - }, - ) - .collect::>(); + } + }); + } - let depth = first_layer.depth + 1; - for (language, ranges, includes_children) in injections { - if let Some(error) = - self.add_layer(&language, ranges, depth, includes_children) + // For injections, we process entire matches at once, as opposed to processing + // each capture separately, interspersed with captures form other patterns. + // Explicitly remove this match so that none of its other captures will remain + // in the stream of captures. + layer.captures.next().unwrap().0.remove(); + + // If an `injection.site` was captured, then find any subsequent matches + // with the same pattern and `injection.site` capture. Those matches should + // all be combined into this match. This allows you to specify that a single + // injected document spans multiple 'content' nodes. + if let Some(injection_site) = injection_site { + while let Some((next_match, _)) = layer.captures.peek() { + if next_match.pattern_index == pattern_index + && next_match.captures.iter().any(|c| { + Some(c.index) == site_capture_index && c.node == injection_site + }) { - return Some(Err(error)); + injection_contents.extend(next_match.captures.iter().filter_map(|c| { + if Some(c.index) == content_capture_index { + Some(c.node) + } else { + None + } + })); + layer.captures.next().unwrap().0.remove(); + continue; + } + break; + } + } + + // If a language is found with the given name, then add a new language layer + // to the highlighted document. + if let Some(config) = injection_language.and_then(&self.injection_callback) { + if !injection_contents.is_empty() { + match HighlightIterLayer::new( + config, + self.source, + self.context, + self.cancellation_flag, + layer.intersect_ranges(&injection_contents, false), + ) { + Ok(layer) => self.layers.push(layer), + Err(e) => return Some(Err(e)), } } } - // Determine if any scopes start or end at the current position. - let first_layer = &mut self.layers[0]; - if let Some(highlight) = local_highlight - .or(properties.highlight_nonlocal) - .or(properties.highlight) - { - let next_offset = cmp::min(self.source.len(), first_layer.offset()); + self.sort_layers(); + continue; + } - // Before returning any highlight boundaries, return any remaining slice of - // the source code the precedes that highlight boundary. - if self.source_offset < next_offset { - return Some(Ok(self.emit_source(next_offset))); + layer.captures.next(); + + // If this capture is for tracking local variables, then process the + // local variable info. + let mut reference_highlight = None; + let mut definition_highlight = None; + while pattern_index < layer.config.highlights_pattern_index { + // If the node represents a local scope, push a new local scope onto + // the scope stack. + if Some(capture.index) == layer.config.local_scope_capture_index { + definition_highlight = None; + layer.scope_stack.push(LocalScope { + inherits: true, + range: range.clone(), + local_defs: Vec::new(), + }); + } + // If the node represents a definition, add a new definition to the + // local scope at the top of the scope stack. + else if Some(capture.index) == layer.config.local_def_capture_index { + reference_highlight = None; + definition_highlight = None; + let scope = layer.scope_stack.last_mut().unwrap(); + if let Ok(name) = str::from_utf8(&self.source[range.clone()]) { + scope.local_defs.push((name, None)); + definition_highlight = scope.local_defs.last_mut().map(|s| &mut s.1); } + } + // If the node represents a reference, then try to find the corresponding + // definition in the scope stack. + else if Some(capture.index) == layer.config.local_ref_capture_index { + if definition_highlight.is_none() { + definition_highlight = None; + if let Ok(name) = str::from_utf8(&self.source[range.clone()]) { + for scope in layer.scope_stack.iter().rev() { + if let Some(highlight) = + scope.local_defs.iter().rev().find_map(|i| { + if i.0 == name { + Some(i.1) + } else { + None + } + }) + { + reference_highlight = highlight; + break; + } + if !scope.inherits { + break; + } + } + } + } + } - scope_event = if first_layer.at_node_end { - Some(Ok(HighlightEvent::HighlightEnd)) + // Continue processing any additional local-variable-tracking patterns + // for the same node. + if let Some((next_match, next_capture_index)) = layer.captures.peek() { + let next_capture = next_match.captures[*next_capture_index]; + if next_capture.node == capture.node { + pattern_index = next_match.pattern_index; + capture = next_capture; + layer.captures.next(); + continue; } else { - Some(Ok(HighlightEvent::HighlightStart(highlight))) - }; + break; + } } + + break; } - // Advance the current layer's tree cursor. This might cause that cursor to move - // beyond one of the other layers' cursors for a different syntax tree, so we need - // to re-sort the layers. If the cursor is already at the end of its syntax tree, - // remove it. - if self.layers[0].advance() { - let mut index = 0; - while self.layers.get(index + 1).map_or(false, |next| { - self.layers[index].cmp(next) == cmp::Ordering::Greater - }) { - self.layers.swap(index, index + 1); - index += 1; - } - } else { - self.remove_first_layer(); - } - - if scope_event.is_some() { - return scope_event; - } - } - - if self.source_offset < self.source.len() { - Some(Ok(self.emit_source(self.source.len()))) - } else { - None - } - } -} - -impl<'a, T> fmt::Debug for Highlighter<'a, T> -where - T: Fn(&str) -> Option<(Language, &'a PropertySheet)>, -{ - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - if let Some(layer) = self.layers.first() { - let node = layer.cursor.node(); - let position = if layer.at_node_end { - node.end_position() - } else { - node.start_position() - }; - write!( - f, - "{{Highlighter position: {:?}, kind: {}, at_end: {}, props: {:?}}}", - position, - node.kind(), - layer.at_node_end, - layer.cursor.node_properties() - )?; - } - Ok(()) - } -} - -impl<'a> Layer<'a> { - fn new( - source: &'a [u8], - tree: Tree, - sheet: &'a PropertySheet, - ranges: Vec, - depth: usize, - opaque: bool, - ) -> Self { - // The cursor's lifetime parameter indicates that the tree must outlive the cursor. - // But because the tree is really a pointer to the heap, the cursor can remain - // valid when the tree is moved. There's no way to express this with lifetimes - // right now, so we have to `transmute` the cursor's lifetime. - let cursor = unsafe { transmute(tree.walk_with_properties(sheet, source)) }; - Self { - _tree: tree, - cursor, - ranges, - depth, - opaque, - at_node_end: false, - scope_stack: vec![Scope { - inherits: false, - local_defs: Vec::new(), - }], - local_highlight: None, - } - } - - fn cmp(&self, other: &Layer) -> cmp::Ordering { - // Events are ordered primarily by their position in the document. But if - // one highlight starts at a given position and another highlight ends at that - // same position, return the highlight end event before the highlight start event. - self.offset() - .cmp(&other.offset()) - .then_with(|| other.at_node_end.cmp(&self.at_node_end)) - .then_with(|| self.depth.cmp(&other.depth)) - } - - fn offset(&self) -> usize { - if self.at_node_end { - self.cursor.node().end_byte() - } else { - self.cursor.node().start_byte() - } - } - - fn advance(&mut self) -> bool { - // Clear the current local highlighting class, which may be re-populated - // if we enter a node that represents a local definition or local reference. - self.local_highlight = None; - - // Step through the tree in a depth-first traversal, stopping at both - // the start and end position of every node. - if self.at_node_end { - self.leave_node(); - if self.cursor.goto_next_sibling() { - self.enter_node(); - self.at_node_end = false; - } else if !self.cursor.goto_parent() { - return false; - } - } else if self.cursor.goto_first_child() { - self.enter_node(); - } else { - self.at_node_end = true; - } - true - } - - fn enter_node(&mut self) { - let node = self.cursor.node(); - let props = self.cursor.node_properties(); - let node_text = if props.local_definition || props.local_reference { - node.utf8_text(self.cursor.source()).ok() - } else { - None - }; - - // If this node represents a local definition, then record its highlighting class - // and store the highlighting class in the current local scope. - if props.local_definition { - if let (Some(text), Some(inner_scope), Some(highlight)) = - (node_text, self.scope_stack.last_mut(), props.highlight) + // If the current node was found to be a local variable, then skip over any + // highlighting patterns that are disabled for local variables. + let mut has_highlight = true; + while (definition_highlight.is_some() || reference_highlight.is_some()) + && layer.config.non_local_variable_patterns[pattern_index] { - self.local_highlight = props.highlight; - if let Err(i) = inner_scope.local_defs.binary_search_by_key(&text, |e| e.0) { - inner_scope.local_defs.insert(i, (text, highlight)); - } - } - } - // If this node represents a local reference, then look it up in the current scope - // stack. If a local definition is found, record its highlighting class. - else if props.local_reference { - if let Some(text) = node_text { - for scope in self.scope_stack.iter().rev() { - if let Ok(i) = scope.local_defs.binary_search_by_key(&text, |e| e.0) { - self.local_highlight = Some(scope.local_defs[i].1); - break; + has_highlight = false; + if let Some((next_match, next_capture_index)) = layer.captures.peek() { + let next_capture = next_match.captures[*next_capture_index]; + if next_capture.node == capture.node { + capture = next_capture; + has_highlight = true; + layer.captures.next(); + continue; } - if !scope.inherits { + } + break; + } + + if has_highlight { + // Once a highlighting pattern is found for the current node, skip over + // any later highlighting patterns that also match this node. Captures + // for a given node are ordered by pattern index, so these subsequent + // captures are guaranteed to be for highlighting, not injections or + // local variables. + while let Some((next_match, next_capture_index)) = layer.captures.peek() { + if next_match.captures[*next_capture_index].node == capture.node { + layer.captures.next(); + } else { break; } } + + let current_highlight = layer.config.highlight_indices[capture.index as usize]; + + // If this node represents a local definition, then store the current + // highlight value on the local scope entry representing this node. + if let Some(definition_highlight) = definition_highlight { + *definition_highlight = current_highlight; + } + + // Emit a scope start event and push the node's end position to the stack. + if let Some(highlight) = reference_highlight.or(current_highlight) { + layer.highlight_end_stack.push(range.end); + return self + .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight))); + } } - } - // If this node represents a new local scope, then push it onto the scope stack. - if let Some(inherits) = props.local_scope { - self.scope_stack.push(Scope { - inherits, - local_defs: Vec::new(), - }); + + self.sort_layers(); } } - - fn leave_node(&mut self) { - let props = self.cursor.node_properties(); - if props.local_scope.is_some() { - self.scope_stack.pop(); - } - } -} - -impl<'de> Deserialize<'de> for Highlight { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - let s = String::deserialize(deserializer)?; - match s.as_str() { - "attribute" => Ok(Highlight::Attribute), - "comment" => Ok(Highlight::Comment), - "constant" => Ok(Highlight::Constant), - "constant.builtin" => Ok(Highlight::ConstantBuiltin), - "constructor" => Ok(Highlight::Constructor), - "constructor.builtin" => Ok(Highlight::ConstructorBuiltin), - "embedded" => Ok(Highlight::Embedded), - "escape" => Ok(Highlight::Escape), - "function" => Ok(Highlight::Function), - "function.builtin" => Ok(Highlight::FunctionBuiltin), - "keyword" => Ok(Highlight::Keyword), - "number" => Ok(Highlight::Number), - "operator" => Ok(Highlight::Operator), - "property" => Ok(Highlight::Property), - "property.builtin" => Ok(Highlight::PropertyBuiltin), - "punctuation" => Ok(Highlight::Punctuation), - "punctuation.bracket" => Ok(Highlight::PunctuationBracket), - "punctuation.delimiter" => Ok(Highlight::PunctuationDelimiter), - "punctuation.special" => Ok(Highlight::PunctuationSpecial), - "string" => Ok(Highlight::String), - "string.special" => Ok(Highlight::StringSpecial), - "type" => Ok(Highlight::Type), - "type.builtin" => Ok(Highlight::TypeBuiltin), - "variable" => Ok(Highlight::Variable), - "variable.builtin" => Ok(Highlight::VariableBuiltin), - "variable.parameter" => Ok(Highlight::VariableParameter), - "tag" => Ok(Highlight::Tag), - _ => Ok(Highlight::Unknown), - } - } -} - -impl Serialize for Highlight { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - match self { - Highlight::Attribute => serializer.serialize_str("attribute"), - Highlight::Comment => serializer.serialize_str("comment"), - Highlight::Constant => serializer.serialize_str("constant"), - Highlight::ConstantBuiltin => serializer.serialize_str("constant.builtin"), - Highlight::Constructor => serializer.serialize_str("constructor"), - Highlight::ConstructorBuiltin => serializer.serialize_str("constructor.builtin"), - Highlight::Embedded => serializer.serialize_str("embedded"), - Highlight::Escape => serializer.serialize_str("escape"), - Highlight::Function => serializer.serialize_str("function"), - Highlight::FunctionBuiltin => serializer.serialize_str("function.builtin"), - Highlight::Keyword => serializer.serialize_str("keyword"), - Highlight::Number => serializer.serialize_str("number"), - Highlight::Operator => serializer.serialize_str("operator"), - Highlight::Property => serializer.serialize_str("property"), - Highlight::PropertyBuiltin => serializer.serialize_str("property.builtin"), - Highlight::Punctuation => serializer.serialize_str("punctuation"), - Highlight::PunctuationBracket => serializer.serialize_str("punctuation.bracket"), - Highlight::PunctuationDelimiter => serializer.serialize_str("punctuation.delimiter"), - Highlight::PunctuationSpecial => serializer.serialize_str("punctuation.special"), - Highlight::String => serializer.serialize_str("string"), - Highlight::StringSpecial => serializer.serialize_str("string.special"), - Highlight::Type => serializer.serialize_str("type"), - Highlight::TypeBuiltin => serializer.serialize_str("type.builtin"), - Highlight::Variable => serializer.serialize_str("variable"), - Highlight::VariableBuiltin => serializer.serialize_str("variable.builtin"), - Highlight::VariableParameter => serializer.serialize_str("variable.parameter"), - Highlight::Tag => serializer.serialize_str("tag"), - Highlight::Unknown => serializer.serialize_str(""), - } - } -} - -pub trait HTMLAttributeCallback<'a>: Fn(Highlight) -> &'a str {} - -pub fn highlight<'a, F>( - source: &'a [u8], - language: Language, - property_sheet: &'a PropertySheet, - cancellation_flag: Option<&'a AtomicUsize>, - injection_callback: F, -) -> Result> + 'a, Error> -where - F: Fn(&str) -> Option<(Language, &'a PropertySheet)> + 'a, -{ - Highlighter::new( - source, - language, - property_sheet, - injection_callback, - cancellation_flag, - ) -} - -pub fn highlight_html<'a, F1, F2>( - source: &'a [u8], - language: Language, - property_sheet: &'a PropertySheet, - cancellation_flag: Option<&'a AtomicUsize>, - injection_callback: F1, - attribute_callback: F2, -) -> Result, Error> -where - F1: Fn(&str) -> Option<(Language, &'a PropertySheet)>, - F2: Fn(Highlight) -> &'a str, -{ - let mut renderer = HtmlRenderer::new(); - renderer.render( - Highlighter::new( - source, - language, - property_sheet, - injection_callback, - cancellation_flag, - )?, - source, - &|s| (attribute_callback)(s).as_bytes(), - )?; - Ok(renderer - .line_offsets - .iter() - .enumerate() - .map(|(i, offset)| { - let offset = *offset as usize; - let next_offset = renderer - .line_offsets - .get(i + 1) - .map_or(renderer.html.len(), |i| *i as usize); - String::from_utf8(renderer.html[offset..next_offset].to_vec()).unwrap() - }) - .collect()) } pub struct HtmlRenderer { @@ -1114,7 +733,7 @@ pub struct HtmlRenderer { } impl HtmlRenderer { - fn new() -> Self { + pub fn new() -> Self { HtmlRenderer { html: Vec::new(), line_offsets: vec![0], @@ -1162,6 +781,21 @@ impl HtmlRenderer { Ok(()) } + pub fn lines(&self) -> impl Iterator { + self.line_offsets + .iter() + .enumerate() + .map(move |(i, line_start)| { + let line_start = *line_start as usize; + let line_end = if i + 1 == self.line_offsets.len() { + self.html.len() + } else { + self.line_offsets[i + 1] as usize + }; + str::from_utf8(&self.html[line_start..line_end]).unwrap() + }) + } + fn start_highlight<'a, F>(&mut self, h: Highlight, attribute_callback: &F) where F: Fn(Highlight) -> &'a [u8], From c1537115398de77fa4d8e89b0804b4713044a744 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 26 Sep 2019 15:58:41 -0700 Subject: [PATCH 02/14] query: Avoid splitting states on nodes that don't contain captures --- lib/src/query.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/lib/src/query.c b/lib/src/query.c index a5c7afd4..84921b40 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -36,7 +36,8 @@ typedef struct { TSSymbol symbol; TSFieldId field; uint16_t capture_id; - uint16_t depth; + uint16_t depth: 15; + bool contains_captures: 1; } QueryStep; /* @@ -403,6 +404,23 @@ static inline void ts_query__pattern_map_insert( })); } +static void ts_query__finalize_steps(TSQuery *self) { + for (unsigned i = 0; i < self->steps.size; i++) { + QueryStep *step = &self->steps.contents[i]; + uint32_t depth = step->depth; + if (step->capture_id != NONE) { + step->contains_captures = true; + } else { + step->contains_captures = false; + for (unsigned j = i + 1; j < self->steps.size; j++) { + QueryStep *s = &self->steps.contents[j]; + if (s->depth == PATTERN_DONE_MARKER || s->depth <= depth) break; + if (s->capture_id != NONE) step->contains_captures = true; + } + } + } +} + // Parse a single predicate associated with a pattern, adding it to the // query's internal `predicate_steps` array. Predicates are arbitrary // S-expressions associated with a pattern which are meant to be handled at @@ -593,6 +611,7 @@ static TSQueryError ts_query_parse_pattern( .symbol = symbol, .field = 0, .capture_id = NONE, + .contains_captures = false, })); // Parse the child patterns @@ -638,6 +657,7 @@ static TSQueryError ts_query_parse_pattern( .symbol = symbol, .field = 0, .capture_id = NONE, + .contains_captures = false, })); if (stream->next != '"') return TSQueryErrorSyntax; @@ -688,6 +708,7 @@ static TSQueryError ts_query_parse_pattern( .depth = depth, .symbol = WILDCARD_SYMBOL, .field = 0, + .contains_captures = false, })); } @@ -807,6 +828,7 @@ TSQuery *ts_query_new( if (stream.input == stream.end) break; } + ts_query__finalize_steps(self); return self; } @@ -1147,11 +1169,15 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { // capturing different children. If this pattern step could match // later children within the same parent, then this query state // cannot simply be updated in place. It must be split into two - // states: one that captures this node, and one which skips over - // this node, to preserve the possibility of capturing later + // states: one that matches this node, and one which skips over + // this node, to preserve the possibility of matching later // siblings. QueryState *next_state = state; - if (step->depth > 0 && later_sibling_can_match) { + if ( + step->depth > 0 && + step->contains_captures && + later_sibling_can_match + ) { LOG( " split state. pattern:%u, step:%u\n", state->pattern_index, From 4c17af3ecdb0137b6d025af1552a00ffdea56c55 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 26 Sep 2019 15:58:41 -0700 Subject: [PATCH 03/14] Allow queries with no patterns --- cli/src/tests/query_test.rs | 10 ++++++++++ lib/src/query.c | 4 +--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 65f89bfb..c9907cfa 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -1029,6 +1029,16 @@ fn test_query_capture_names() { }); } +#[test] +fn test_query_with_no_patterns() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new(language, "").unwrap(); + assert!(query.capture_names().is_empty()); + assert_eq!(query.pattern_count(), 0); + }); +} + #[test] fn test_query_comments() { allocations::record(|| { diff --git a/lib/src/query.c b/lib/src/query.c index 84921b40..10a436f4 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -789,7 +789,7 @@ TSQuery *ts_query_new( Stream stream = stream_new(source, source_len); stream_skip_whitespace(&stream); uint32_t start_step_index; - for (;;) { + while (stream.input < stream.end) { start_step_index = self->steps.size; uint32_t capture_count = 0; array_push(&self->start_bytes_by_pattern, stream.input - source); @@ -824,8 +824,6 @@ TSQuery *ts_query_new( if (capture_count > self->max_capture_count) { self->max_capture_count = capture_count; } - - if (stream.input == stream.end) break; } ts_query__finalize_steps(self); From f490befcde2b76df04b8ac335838bd2848deacb4 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 26 Sep 2019 15:58:41 -0700 Subject: [PATCH 04/14] Add `ts_query_disable_capture` API --- lib/binding_rust/bindings.rs | 10 ++++++++++ lib/binding_rust/lib.rs | 10 ++++++++++ lib/include/tree_sitter/api.h | 7 +++++++ lib/src/query.c | 21 +++++++++++++++++++-- 4 files changed, 46 insertions(+), 2 deletions(-) diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index df1249a3..f84058b5 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -655,6 +655,16 @@ extern "C" { length: *mut u32, ) -> *const ::std::os::raw::c_char; } +extern "C" { + #[doc = " Disable a certain capture within a query. This prevents the capture"] + #[doc = " from being returned in matches, and also avoids any resource usage"] + #[doc = " associated with recording the capture."] + pub fn ts_query_disable_capture( + arg1: *mut TSQuery, + arg2: *const ::std::os::raw::c_char, + arg3: u32, + ); +} extern "C" { #[doc = " Create a new cursor for executing a given query."] #[doc = ""] diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index d824f964..87771759 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -1202,6 +1202,16 @@ impl Query { &self.property_settings[index] } + pub fn disable_capture(&mut self, name: &str) { + unsafe { + ffi::ts_query_disable_capture( + self.ptr.as_ptr(), + name.as_bytes().as_ptr() as *const c_char, + name.len() as u32, + ); + } + } + fn parse_property( function_name: &str, capture_names: &[String], diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index b53174fa..04aed846 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -716,6 +716,13 @@ const char *ts_query_string_value_for_id( uint32_t *length ); +/** + * Disable a certain capture within a query. This prevents the capture + * from being returned in matches, and also avoids any resource usage + * associated with recording the capture. + */ +void ts_query_disable_capture(TSQuery *, const char *, uint32_t); + /** * Create a new cursor for executing a given query. * diff --git a/lib/src/query.c b/lib/src/query.c index 10a436f4..c2ba3d30 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -889,6 +889,23 @@ uint32_t ts_query_start_byte_for_pattern( return self->start_bytes_by_pattern.contents[pattern_index]; } +void ts_query_disable_capture( + TSQuery *self, + const char *name, + uint32_t length +) { + int id = symbol_table_id_for_name(&self->captures, name, length); + if (id != -1) { + for (unsigned i = 0; i < self->steps.size; i++) { + QueryStep *step = &self->steps.contents[i]; + if (step->capture_id == id) { + step->capture_id = NONE; + } + } + } + ts_query__finalize_steps(self); +} + /*************** * QueryCursor ***************/ @@ -1020,7 +1037,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { } else if (ts_tree_cursor_goto_parent(&self->cursor)) { self->depth--; } else { - return false; + return self->finished_states.size > 0; } } else { bool can_have_later_siblings; @@ -1214,7 +1231,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { next_state->step_index++; QueryStep *next_step = step + 1; if (next_step->depth == PATTERN_DONE_MARKER) { - LOG("finish pattern %u\n", next_state->pattern_index); + LOG(" finish pattern %u\n", next_state->pattern_index); next_state->id = self->next_state_id++; array_push(&self->finished_states, *next_state); From a45dc67390f948897bc1745a5801fda4b8702c67 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 26 Sep 2019 15:58:41 -0700 Subject: [PATCH 05/14] Loosen unnecessary lifetime restriction in rust QueryCursor APIs --- lib/binding_rust/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 87771759..1ecde648 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -1270,7 +1270,7 @@ impl QueryCursor { } pub fn matches<'a>( - &'a mut self, + &mut self, query: &'a Query, node: Node<'a>, mut text_callback: impl FnMut(Node<'a>) -> &[u8] + 'a, @@ -1293,7 +1293,7 @@ impl QueryCursor { } pub fn captures<'a, T: AsRef<[u8]>>( - &'a mut self, + &mut self, query: &'a Query, node: Node<'a>, text_callback: impl FnMut(Node<'a>) -> T + 'a, From 324c259cbbb79e45258cf7162a01f2c68982578e Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 26 Sep 2019 15:58:41 -0700 Subject: [PATCH 06/14] rust: Create readable messages for query syntax errors --- cli/src/tests/query_test.rs | 52 +++++++++++++++++++++++++++++++------ lib/binding_rust/lib.rs | 24 ++++++++++++++--- 2 files changed, 65 insertions(+), 11 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index c9907cfa..1b147cf9 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -16,37 +16,73 @@ fn test_query_errors_on_invalid_syntax() { // Mismatched parens assert_eq!( Query::new(language, "(if_statement"), - Err(QueryError::Syntax(13)) + Err(QueryError::Syntax("Unexpected EOF".to_string())) ); assert_eq!( - Query::new(language, "(if_statement))"), - Err(QueryError::Syntax(14)) + Query::new(language, "; comment 1\n; comment 2\n (if_statement))"), + Err(QueryError::Syntax( + [ + " (if_statement))", // + " ^", + ] + .join("\n") + )) ); // Return an error at the *beginning* of a bare identifier not followed a colon. // If there's a colon but no pattern, return an error at the end of the colon. assert_eq!( Query::new(language, "(if_statement identifier)"), - Err(QueryError::Syntax(14)) + Err(QueryError::Syntax( + [ + "(if_statement identifier)", // + " ^", + ] + .join("\n") + )) ); assert_eq!( Query::new(language, "(if_statement condition:)"), - Err(QueryError::Syntax(24)) + Err(QueryError::Syntax( + [ + "(if_statement condition:)", // + " ^", + ] + .join("\n") + )) ); // Return an error at the beginning of an unterminated string. assert_eq!( Query::new(language, r#"(identifier) "h "#), - Err(QueryError::Syntax(13)) + Err(QueryError::Syntax( + [ + r#"(identifier) "h "#, // + r#" ^"#, + ] + .join("\n") + )) ); assert_eq!( Query::new(language, r#"((identifier) ()"#), - Err(QueryError::Syntax(16)) + Err(QueryError::Syntax( + [ + "((identifier) ()", // + " ^", + ] + .join("\n") + )) ); assert_eq!( Query::new(language, r#"((identifier) @x (eq? @x a"#), - Err(QueryError::Syntax(26)) + Err(QueryError::Syntax( + [ + r#"((identifier) @x (eq? @x a"#, + r#" ^"#, + ] + .join("\n") + )) ); }); } diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 1ecde648..491c7db2 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -186,7 +186,7 @@ pub struct QueryCapture<'a> { #[derive(Debug, PartialEq, Eq)] pub enum QueryError { - Syntax(usize), + Syntax(String), NodeType(String), Field(String), Capture(String), @@ -997,6 +997,24 @@ impl Query { // On failure, build an error based on the error code and offset. if ptr.is_null() { let offset = error_offset as usize; + let mut line_start = 0; + let line_containing_error = source.split("\n").find_map(|line| { + let line_end = line_start + line.len() + 1; + if line_end > offset { + Some(line) + } else { + line_start = line_end; + None + } + }); + + let message = if let Some(line) = line_containing_error { + line.to_string() + "\n" + &" ".repeat(offset - line_start) + "^" + } else { + "Unexpected EOF".to_string() + }; + + // if line_containing_error return if error_type != ffi::TSQueryError_TSQueryErrorSyntax { let suffix = source.split_at(offset).1; let end_offset = suffix @@ -1007,10 +1025,10 @@ impl Query { ffi::TSQueryError_TSQueryErrorNodeType => Err(QueryError::NodeType(name)), ffi::TSQueryError_TSQueryErrorField => Err(QueryError::Field(name)), ffi::TSQueryError_TSQueryErrorCapture => Err(QueryError::Capture(name)), - _ => Err(QueryError::Syntax(offset)), + _ => Err(QueryError::Syntax(message)), } } else { - Err(QueryError::Syntax(offset)) + Err(QueryError::Syntax(message)) }; } From 7c8216c692dd83e582fbac7b697d17a6679e518b Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 26 Sep 2019 15:58:41 -0700 Subject: [PATCH 07/14] highlight: Handle injections correctly, using a separate query cursor --- cli/src/highlight.rs | 1 + cli/src/loader.rs | 58 ++++++++---- highlight/src/lib.rs | 212 ++++++++++++++++++++++++++++--------------- 3 files changed, 178 insertions(+), 93 deletions(-) diff --git a/cli/src/highlight.rs b/cli/src/highlight.rs index d92d642e..b827dde4 100644 --- a/cli/src/highlight.rs +++ b/cli/src/highlight.rs @@ -352,6 +352,7 @@ pub fn html( } })?; + write!(&mut stdout, "\n")?; for (i, line) in renderer.lines().enumerate() { write!( &mut stdout, diff --git a/cli/src/loader.rs b/cli/src/loader.rs index 328b8063..8610eaaa 100644 --- a/cli/src/loader.rs +++ b/cli/src/loader.rs @@ -28,6 +28,9 @@ pub struct LanguageConfiguration { pub injection_regex: Option, pub file_types: Vec, pub root_path: PathBuf, + pub highlights_filename: Option, + pub injections_filename: Option, + pub locals_filename: Option, language_id: usize, highlight_config: OnceCell>, } @@ -354,6 +357,8 @@ impl Loader { #[serde(rename = "injection-regex")] injection_regex: Option, highlights: Option, + injections: Option, + locals: Option, } #[derive(Deserialize)] @@ -406,6 +411,9 @@ impl Loader { .injection_regex .and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()), highlight_config: OnceCell::new(), + injections_filename: config_json.injections, + locals_filename: config_json.locals, + highlights_filename: config_json.highlights, }; for file_type in &configuration.file_types { @@ -423,16 +431,10 @@ impl Loader { if self.language_configurations.len() == initial_language_configuration_count && parser_path.join("src").join("grammar.json").exists() { - self.language_configurations.push(LanguageConfiguration { - root_path: parser_path.to_owned(), - language_id: self.languages_by_id.len(), - scope: None, - content_regex: None, - injection_regex: None, - file_types: Vec::new(), - _first_line_regex: None, - highlight_config: OnceCell::new(), - }); + let mut configuration = LanguageConfiguration::default(); + configuration.root_path = parser_path.to_owned(); + configuration.language_id = self.languages_by_id.len(); + self.language_configurations.push(configuration); self.languages_by_id .push((parser_path.to_owned(), OnceCell::new())); } @@ -451,9 +453,21 @@ impl LanguageConfiguration { .get_or_try_init(|| { let queries_path = self.root_path.join("queries"); - let highlights_path = queries_path.join("highlights.scm"); - let injections_path = queries_path.join("injections.scm"); - let locals_path = queries_path.join("locals.scm"); + let highlights_path = queries_path.join( + self.highlights_filename + .as_ref() + .map_or("highlights.scm", String::as_str), + ); + let injections_path = queries_path.join( + self.injections_filename + .as_ref() + .map_or("injections.scm", String::as_str), + ); + let locals_path = queries_path.join( + self.locals_filename + .as_ref() + .map_or("locals.scm", String::as_str), + ); if !highlights_path.exists() { return Ok(None); @@ -471,12 +485,18 @@ impl LanguageConfiguration { String::new() }; - Ok(Some(highlighter.load_configuration( - language, - &highlights_query, - &injections_query, - &locals_query, - )?)) + Ok(Some( + highlighter + .load_configuration( + language, + &highlights_query, + &injections_query, + &locals_query, + ) + .map_err(Error::wrap(|| { + format!("Failed to load queries in {:?}", queries_path) + }))?, + )) }) .map(Option::as_ref) } diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs index a362dab0..4f4a930d 100644 --- a/highlight/src/lib.rs +++ b/highlight/src/lib.rs @@ -37,6 +37,7 @@ pub enum HighlightEvent { pub struct HighlightConfiguration { pub language: Language, pub query: Query, + injections_query: Query, locals_pattern_index: usize, highlights_pattern_index: usize, highlight_indices: Vec>, @@ -66,6 +67,7 @@ where source: &'a [u8], byte_offset: usize, context: &'a mut HighlightContext, + injections_cursor: QueryCursor, injection_callback: F, cancellation_flag: Option<&'a AtomicUsize>, layers: Vec>, @@ -81,6 +83,7 @@ struct HighlightIterLayer<'a> { highlight_end_stack: Vec, scope_stack: Vec>, ranges: Vec, + depth: usize, } impl HighlightContext { @@ -113,7 +116,14 @@ impl Highlighter { query_source.push_str(highlights_query); // Construct a query with the concatenated string. - let query = Query::new(language, &query_source)?; + let mut query = Query::new(language, &query_source)?; + + let injections_query = Query::new(language, injection_query)?; + for injection_capture in injections_query.capture_names() { + if injection_capture != "injection.site" { + query.disable_capture(injection_capture); + } + } // Determine the range of pattern indices that belong to each section of the query. let mut locals_pattern_index = 0; @@ -192,8 +202,9 @@ impl Highlighter { } Ok(HighlightConfiguration { - query, language, + query, + injections_query, locals_pattern_index, highlights_pattern_index, highlight_indices, @@ -220,6 +231,7 @@ impl Highlighter { source, context, cancellation_flag, + 0, vec![Range { start_byte: 0, end_byte: usize::MAX, @@ -228,11 +240,14 @@ impl Highlighter { }], )?; + let injections_cursor = context.cursors.pop().unwrap_or(QueryCursor::new()); + Ok(HighlightIter { source, byte_offset: 0, injection_callback, cancellation_flag, + injections_cursor, context, iter_count: 0, layers: vec![layer], @@ -247,6 +262,7 @@ impl<'a> HighlightIterLayer<'a> { source: &'a [u8], context: &mut HighlightContext, cancellation_flag: Option<&'a AtomicUsize>, + depth: usize, ranges: Vec, ) -> Result { context @@ -279,6 +295,7 @@ impl<'a> HighlightIterLayer<'a> { local_defs: Vec::new(), }], cursor, + depth, _tree: tree, captures, config, @@ -377,16 +394,25 @@ impl<'a> HighlightIterLayer<'a> { result } - fn offset(&mut self) -> Option { + // First, sort scope boundaries by their byte offset in the document. At a + // given position, emit scope endings before scope beginnings. Finally, emit + // scope boundaries from outer layers first. + fn sort_key(&mut self) -> Option<(usize, bool, usize)> { let next_start = self .captures .peek() .map(|(m, i)| m.captures[*i].node.start_byte()); let next_end = self.highlight_end_stack.last().cloned(); match (next_start, next_end) { - (Some(i), Some(j)) => Some(usize::min(i, j)), - (Some(i), None) => Some(i), - (None, Some(j)) => Some(j), + (Some(start), Some(end)) => { + if start < end { + Some((start, true, self.depth)) + } else { + Some((end, false, self.depth)) + } + } + (Some(i), None) => Some((i, true, self.depth)), + (None, Some(j)) => Some((j, false, self.depth)), _ => None, } } @@ -417,11 +443,11 @@ where } fn sort_layers(&mut self) { - if let Some(offset) = self.layers[0].offset() { + if let Some(sort_key) = self.layers[0].sort_key() { let mut i = 0; while i + 1 < self.layers.len() { - if let Some(next_offset) = self.layers[i + 1].offset() { - if next_offset < offset { + if let Some(next_offset) = self.layers[i + 1].sort_key() { + if next_offset < sort_key { i += 1; continue; } @@ -429,13 +455,26 @@ where break; } if i > 0 { - &self.layers[0..(i + 1)].rotate_left(i); + &self.layers[0..(i + 1)].rotate_left(1); } } else { let layer = self.layers.remove(0); self.context.cursors.push(layer.cursor); } } + + fn insert_layer(&mut self, mut layer: HighlightIterLayer<'a>) { + let sort_key = layer.sort_key(); + let mut i = 1; + while i < self.layers.len() { + if self.layers[i].sort_key() > sort_key { + self.layers.insert(i, layer); + return; + } + i += 1; + } + self.layers.push(layer); + } } impl<'a, F> Iterator for HighlightIter<'a, F> @@ -516,88 +555,112 @@ where let content_capture_index = layer.config.injection_content_capture_index; let language_capture_index = layer.config.injection_language_capture_index; - // Find the language name and the node that represents the injection content. - let mut injection_site = None; - let mut injection_language = None; - let mut injection_contents = Vec::new(); - for capture in match_.captures { - let index = Some(capture.index); - if index == site_capture_index { - injection_site = Some(capture.node); - } else if index == language_capture_index { - injection_language = capture.node.utf8_text(self.source).ok(); - } else if index == content_capture_index { - injection_contents.push(capture.node); + // Injections must have a `injection.site` capture, which contains all of the + // information about the injection. + let site_node = match_.captures.iter().find_map(|c| { + if Some(c.index) == site_capture_index { + return Some(c.node); + } else { + return None; } - } + }); - // In addition to specifying the language name via the text of a captured node, - // it can also be hard-coded via a `(set! injection.language )` - // predicate. - if injection_language.is_none() { - injection_language = layer - .config - .query - .property_settings(pattern_index) - .iter() - .find_map(|prop| { - if prop.key.as_ref() == "injection.language" { - prop.value.as_ref().map(|s| s.as_ref()) - } else { - None - } - }); - } - - // For injections, we process entire matches at once, as opposed to processing - // each capture separately, interspersed with captures form other patterns. // Explicitly remove this match so that none of its other captures will remain // in the stream of captures. layer.captures.next().unwrap().0.remove(); - // If an `injection.site` was captured, then find any subsequent matches - // with the same pattern and `injection.site` capture. Those matches should - // all be combined into this match. This allows you to specify that a single - // injected document spans multiple 'content' nodes. - if let Some(injection_site) = injection_site { + if let Some(site_node) = site_node { + // Discard any subsequent matches for same injection site. while let Some((next_match, _)) = layer.captures.peek() { - if next_match.pattern_index == pattern_index - && next_match.captures.iter().any(|c| { - Some(c.index) == site_capture_index && c.node == injection_site - }) + if next_match.pattern_index < layer.config.locals_pattern_index + && next_match + .captures + .iter() + .any(|c| Some(c.index) == site_capture_index && c.node == site_node) { - injection_contents.extend(next_match.captures.iter().filter_map(|c| { - if Some(c.index) == content_capture_index { - Some(c.node) - } else { - None - } - })); layer.captures.next().unwrap().0.remove(); continue; } break; } - } - // If a language is found with the given name, then add a new language layer - // to the highlighted document. - if let Some(config) = injection_language.and_then(&self.injection_callback) { - if !injection_contents.is_empty() { - match HighlightIterLayer::new( - config, - self.source, - self.context, - self.cancellation_flag, - layer.intersect_ranges(&injection_contents, false), - ) { - Ok(layer) => self.layers.push(layer), - Err(e) => return Some(Err(e)), + // Find the language name and the nodes that represents the injection content. + // Use a separate Query and QueryCursor in order to avoid the injection + // captures being intermixed with other captures related to local variables + // and syntax highlighting. + let source = self.source; + let mut injections = Vec::<(usize, Option<&str>, Vec)>::new(); + for mat in self.injections_cursor.matches( + &layer.config.injections_query, + site_node, + move |node| &source[node.byte_range()], + ) { + let entry = if let Some(entry) = + injections.iter_mut().find(|e| e.0 == mat.pattern_index) + { + entry + } else { + injections.push((mat.pattern_index, None, Vec::new())); + injections.last_mut().unwrap() + }; + + for capture in mat.captures { + let index = Some(capture.index); + if index == site_capture_index { + if capture.node != site_node { + break; + } + } else if index == language_capture_index && entry.1.is_none() { + entry.1 = capture.node.utf8_text(self.source).ok(); + } else if index == content_capture_index { + entry.2.push(capture.node); + } } } + + for (pattern_index, language, _) in injections.iter_mut() { + // In addition to specifying the language name via the text of a captured node, + // it can also be hard-coded via a `(set! injection.language )` + // predicate. + if language.is_none() { + *language = layer + .config + .query + .property_settings(*pattern_index) + .iter() + .find_map(|prop| { + if prop.key.as_ref() == "injection.language" { + prop.value.as_ref().map(|s| s.as_ref()) + } else { + None + } + }); + } + } + + for (_, language, content_nodes) in injections { + // If a language is found with the given name, then add a new language layer + // to the highlighted document. + if let Some(config) = language.and_then(&self.injection_callback) { + if !content_nodes.is_empty() { + match HighlightIterLayer::new( + config, + self.source, + self.context, + self.cancellation_flag, + self.layers[0].depth + 1, + self.layers[0].intersect_ranges(&content_nodes, false), + ) { + Ok(layer) => self.insert_layer(layer), + Err(e) => return Some(Err(e)), + } + } + } + } + + self.sort_layers(); } - self.sort_layers(); continue; } @@ -685,6 +748,7 @@ where if next_capture.node == capture.node { capture = next_capture; has_highlight = true; + pattern_index = next_match.pattern_index; layer.captures.next(); continue; } From b3809274f000c1c797b7f0e8319ee0e51a59beca Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 14 Oct 2019 12:31:07 -0700 Subject: [PATCH 08/14] Load highlight queries correctly in highlight unit tests --- cli/src/tests/helpers/fixtures.rs | 30 +++++------------- cli/src/tests/highlight_test.rs | 51 ++++++++++++++++++------------- script/fetch-fixtures | 6 ++-- script/fetch-fixtures.cmd | 8 ++--- 4 files changed, 44 insertions(+), 51 deletions(-) diff --git a/cli/src/tests/helpers/fixtures.rs b/cli/src/tests/helpers/fixtures.rs index af1df2bf..e1e3f974 100644 --- a/cli/src/tests/helpers/fixtures.rs +++ b/cli/src/tests/helpers/fixtures.rs @@ -21,34 +21,20 @@ pub fn get_language(name: &str) -> Language { .unwrap() } -pub fn get_highlight_query_sources(language_name: &str) -> (String, String, String) { - let queries_path = GRAMMARS_DIR.join(language_name).join("queries"); - let highlights_path = queries_path.join("highlights.scm"); - let injections_path = queries_path.join("injections.scm"); - let locals_path = queries_path.join("locals.scm"); - - let highlights_query = fs::read_to_string(highlights_path).unwrap(); - let injections_query = if injections_path.exists() { - fs::read_to_string(injections_path).unwrap() - } else { - String::new() - }; - let locals_query = if locals_path.exists() { - fs::read_to_string(locals_path).unwrap() - } else { - String::new() - }; - - (highlights_query, injections_query, locals_query) +pub fn get_language_queries_path(language_name: &str) -> PathBuf { + GRAMMARS_DIR.join(language_name).join("queries") } pub fn get_highlight_config( - language_name: &str, highlighter: &Highlighter, + language_name: &str, + injection_query_filename: &str, ) -> HighlightConfiguration { let language = get_language(language_name); - let (highlights_query, injections_query, locals_query) = - get_highlight_query_sources(language_name); + let queries_path = get_language_queries_path(language_name); + let highlights_query = fs::read_to_string(queries_path.join("highlights.scm")).unwrap(); + let injections_query = fs::read_to_string(queries_path.join(injection_query_filename)).unwrap(); + let locals_query = fs::read_to_string(queries_path.join("locals.scm")).unwrap_or(String::new()); highlighter .load_configuration( language, diff --git a/cli/src/tests/highlight_test.rs b/cli/src/tests/highlight_test.rs index a996d2d1..759ee9a3 100644 --- a/cli/src/tests/highlight_test.rs +++ b/cli/src/tests/highlight_test.rs @@ -1,22 +1,25 @@ -use super::helpers::fixtures::{get_highlight_config, get_highlight_query_sources, get_language}; +use super::helpers::fixtures::{get_highlight_config, get_language, get_language_queries_path}; use lazy_static::lazy_static; use std::ffi::CString; use std::sync::atomic::{AtomicUsize, Ordering}; -use std::{ptr, slice, str}; +use std::{fs, ptr, slice, str}; use tree_sitter_highlight::{ c, Error, HighlightConfiguration, HighlightContext, HighlightEvent, Highlighter, HtmlRenderer, }; lazy_static! { static ref JS_HIGHLIGHT: HighlightConfiguration = - get_highlight_config("javascript", &HIGHLIGHTER); - static ref HTML_HIGHLIGHT: HighlightConfiguration = get_highlight_config("html", &HIGHLIGHTER); + get_highlight_config(&HIGHLIGHTER, "javascript", "injections.scm"); + static ref HTML_HIGHLIGHT: HighlightConfiguration = + get_highlight_config(&HIGHLIGHTER, "html", "injections.scm"); static ref EJS_HIGHLIGHT: HighlightConfiguration = - get_highlight_config("embedded-template", &HIGHLIGHTER); - static ref RUST_HIGHLIGHT: HighlightConfiguration = get_highlight_config("rust", &HIGHLIGHTER); + get_highlight_config(&HIGHLIGHTER, "embedded-template", "injections-ejs.scm"); + static ref RUST_HIGHLIGHT: HighlightConfiguration = + get_highlight_config(&HIGHLIGHTER, "rust", "injections.scm"); static ref HIGHLIGHTER: Highlighter = Highlighter::new( [ "attribute", + "constant", "constructor", "function.builtin", "function", @@ -219,11 +222,11 @@ fn test_highlighting_multiline_nodes_to_html() { assert_eq!( &to_html(&source, &JS_HIGHLIGHT).unwrap(), &[ - "const SOMETHING = `\n".to_string(), - " one ${\n".to_string(), - " two()\n".to_string(), - " } three\n".to_string(), - "`\n".to_string(), + "const SOMETHING = `\n".to_string(), + " one ${\n".to_string(), + " two()\n".to_string(), + " } three\n".to_string(), + "`\n".to_string(), ] ); } @@ -326,9 +329,9 @@ fn test_highlighting_ejs() { assert_eq!( &to_token_vector(&source, &EJS_HIGHLIGHT).unwrap(), &[[ - ("<", vec![]), + ("<", vec!["punctuation.bracket"]), ("div", vec!["tag"]), - (">", vec![]), + (">", vec!["punctuation.bracket"]), ("<%", vec!["keyword"]), (" ", vec![]), ("foo", vec!["function"]), @@ -336,9 +339,9 @@ fn test_highlighting_ejs() { (")", vec!["punctuation.bracket"]), (" ", vec![]), ("%>", vec!["keyword"]), - ("", vec![]) + (">", vec!["punctuation.bracket"]) ]], ); } @@ -446,8 +449,10 @@ fn test_highlighting_via_c_api() { let js_scope = c_string("source.js"); let js_injection_regex = c_string("^javascript"); let language = get_language("javascript"); - let (highlights_query, injections_query, locals_query) = - get_highlight_query_sources("javascript"); + let queries = get_language_queries_path("javascript"); + let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap(); + let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap(); + let locals_query = fs::read_to_string(queries.join("locals.scm")).unwrap(); c::ts_highlighter_add_language( highlighter, js_scope.as_ptr(), @@ -464,7 +469,9 @@ fn test_highlighting_via_c_api() { let html_scope = c_string("text.html.basic"); let html_injection_regex = c_string("^html"); let language = get_language("html"); - let (highlights_query, injections_query, locals_query) = get_highlight_query_sources("html"); + let queries = get_language_queries_path("html"); + let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap(); + let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap(); c::ts_highlighter_add_language( highlighter, html_scope.as_ptr(), @@ -472,10 +479,10 @@ fn test_highlighting_via_c_api() { language, highlights_query.as_ptr() as *const i8, injections_query.as_ptr() as *const i8, - locals_query.as_ptr() as *const i8, + ptr::null(), highlights_query.len() as u32, injections_query.len() as u32, - locals_query.len() as u32, + 0, ); let buffer = c::ts_highlight_buffer_new(); @@ -512,8 +519,8 @@ fn test_highlighting_via_c_api() { lines, vec![ "<script>\n", - "const a = b('c');\n", - "c.d();\n", + "const a = b('c');\n", + "c.d();\n", "</script>\n", ] ); diff --git a/script/fetch-fixtures b/script/fetch-fixtures index 94f9eddd..7baf4032 100755 --- a/script/fetch-fixtures +++ b/script/fetch-fixtures @@ -24,10 +24,10 @@ fetch_grammar() { fetch_grammar bash master fetch_grammar c master fetch_grammar cpp master -fetch_grammar embedded-template master +fetch_grammar embedded-template highlight-queries fetch_grammar go master -fetch_grammar html master -fetch_grammar javascript master +fetch_grammar html highlight-queries +fetch_grammar javascript highlight-queries fetch_grammar json master fetch_grammar python master fetch_grammar ruby master diff --git a/script/fetch-fixtures.cmd b/script/fetch-fixtures.cmd index 011d73ff..70713cf4 100644 --- a/script/fetch-fixtures.cmd +++ b/script/fetch-fixtures.cmd @@ -3,14 +3,14 @@ call:fetch_grammar bash master call:fetch_grammar c master call:fetch_grammar cpp master -call:fetch_grammar embedded-template master +call:fetch_grammar embedded-template highlight-queries call:fetch_grammar go master -call:fetch_grammar html master -call:fetch_grammar javascript master +call:fetch_grammar html highlight-queries +call:fetch_grammar javascript highlight-queries call:fetch_grammar json master call:fetch_grammar python master call:fetch_grammar ruby master -call:fetch_grammar rust master +call:fetch_grammar rust highlight-queries call:fetch_grammar typescript master exit /B 0 From 060e00463dde12ebe2c778c799f8a567841bd9be Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 14 Oct 2019 16:55:14 -0700 Subject: [PATCH 09/14] Implement include-children directive in injection queries --- cli/src/tests/highlight_test.rs | 8 ++-- cli/src/tests/parser_test.rs | 38 ++++++++++++---- cli/src/tests/query_test.rs | 8 +++- highlight/src/lib.rs | 68 +++++++++++++++++------------ lib/binding_web/test/parser-test.js | 5 ++- script/fetch-fixtures | 2 +- 6 files changed, 86 insertions(+), 43 deletions(-) diff --git a/cli/src/tests/highlight_test.rs b/cli/src/tests/highlight_test.rs index 759ee9a3..a5579c65 100644 --- a/cli/src/tests/highlight_test.rs +++ b/cli/src/tests/highlight_test.rs @@ -124,9 +124,6 @@ fn test_highlighting_injected_html_in_javascript() { fn test_highlighting_injected_javascript_in_html_mini() { let source = ""; - eprintln!("HTML {:?}", HTML_HIGHLIGHT.language); - eprintln!("JavaScript {:?}", JS_HIGHLIGHT.language); - assert_eq!( &to_token_vector(source, &HTML_HIGHLIGHT).unwrap(), &[vec![ @@ -377,7 +374,10 @@ fn test_highlighting_with_content_children_included() { ("(", vec!["punctuation.bracket"]), (")", vec!["punctuation.bracket"]), ], - vec![(")", vec!["punctuation.bracket"]), (";", vec![]),] + vec![ + (")", vec!["punctuation.bracket"]), + (";", vec!["punctuation.delimiter"]), + ] ], ); } diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index 882f5963..dc25e081 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -7,7 +7,7 @@ use std::{thread, time}; use tree_sitter::{InputEdit, LogType, Parser, Point, Range}; #[test] -fn test_basic_parsing() { +fn test_parsing_simple_string() { let mut parser = Parser::new(); parser.set_language(get_language("rust")).unwrap(); @@ -26,7 +26,11 @@ fn test_basic_parsing() { assert_eq!( root_node.to_sexp(), - "(source_file (struct_item (type_identifier) (field_declaration_list)) (function_item (identifier) (parameters) (block)))" + concat!( + "(source_file ", + "(struct_item name: (type_identifier) body: (field_declaration_list)) ", + "(function_item name: (identifier) parameters: (parameters) body: (block)))" + ) ); let struct_node = root_node.child(0).unwrap(); @@ -118,7 +122,17 @@ fn test_parsing_with_custom_utf8_input() { .unwrap(); let root = tree.root_node(); - assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))"); + assert_eq!( + root.to_sexp(), + concat!( + "(source_file ", + "(function_item ", + "(visibility_modifier) ", + "name: (identifier) ", + "parameters: (parameters) ", + "body: (block (integer_literal))))" + ) + ); assert_eq!(root.kind(), "source_file"); assert_eq!(root.has_error(), false); assert_eq!(root.child(0).unwrap().kind(), "function_item"); @@ -154,7 +168,10 @@ fn test_parsing_with_custom_utf16_input() { .unwrap(); let root = tree.root_node(); - assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))"); + assert_eq!( + root.to_sexp(), + "(source_file (function_item (visibility_modifier) name: (identifier) parameters: (parameters) body: (block (integer_literal))))" + ); assert_eq!(root.kind(), "source_file"); assert_eq!(root.has_error(), false); assert_eq!(root.child(0).unwrap().kind(), "function_item"); @@ -175,7 +192,10 @@ fn test_parsing_with_callback_returning_owned_strings() { .unwrap(); let root = tree.root_node(); - assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))"); + assert_eq!( + root.to_sexp(), + "(source_file (function_item (visibility_modifier) name: (identifier) parameters: (parameters) body: (block (integer_literal))))" + ); } #[test] @@ -192,7 +212,7 @@ fn test_parsing_text_with_byte_order_mark() { .unwrap(); assert_eq!( tree.root_node().to_sexp(), - "(source_file (function_item (identifier) (parameters) (block)))" + "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))" ); assert_eq!(tree.root_node().start_byte(), 2); @@ -200,7 +220,7 @@ fn test_parsing_text_with_byte_order_mark() { let mut tree = parser.parse("\u{FEFF}fn a() {}", None).unwrap(); assert_eq!( tree.root_node().to_sexp(), - "(source_file (function_item (identifier) (parameters) (block)))" + "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))" ); assert_eq!(tree.root_node().start_byte(), 3); @@ -216,7 +236,7 @@ fn test_parsing_text_with_byte_order_mark() { let mut tree = parser.parse(" \u{FEFF}fn a() {}", Some(&tree)).unwrap(); assert_eq!( tree.root_node().to_sexp(), - "(source_file (ERROR (UNEXPECTED 65279)) (function_item (identifier) (parameters) (block)))" + "(source_file (ERROR (UNEXPECTED 65279)) (function_item name: (identifier) parameters: (parameters) body: (block)))" ); assert_eq!(tree.root_node().start_byte(), 1); @@ -232,7 +252,7 @@ fn test_parsing_text_with_byte_order_mark() { let tree = parser.parse("\u{FEFF}fn a() {}", Some(&tree)).unwrap(); assert_eq!( tree.root_node().to_sexp(), - "(source_file (function_item (identifier) (parameters) (block)))" + "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))" ); assert_eq!(tree.root_node().start_byte(), 3); } diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 1b147cf9..1d7554c0 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -16,7 +16,13 @@ fn test_query_errors_on_invalid_syntax() { // Mismatched parens assert_eq!( Query::new(language, "(if_statement"), - Err(QueryError::Syntax("Unexpected EOF".to_string())) + Err(QueryError::Syntax( + [ + "(if_statement", // + " ^", + ] + .join("\n") + )) ); assert_eq!( Query::new(language, "; comment 1\n; comment 2\n (if_statement))"), diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs index 4f4a930d..ec2cdafb 100644 --- a/highlight/src/lib.rs +++ b/highlight/src/lib.rs @@ -73,6 +73,7 @@ where layers: Vec>, iter_count: usize, next_event: Option, + last_highlight_range: Option<(usize, usize, usize)>, } struct HighlightIterLayer<'a> { @@ -252,6 +253,7 @@ impl Highlighter { iter_count: 0, layers: vec![layer], next_event: None, + last_highlight_range: None, }) } } @@ -397,7 +399,8 @@ impl<'a> HighlightIterLayer<'a> { // First, sort scope boundaries by their byte offset in the document. At a // given position, emit scope endings before scope beginnings. Finally, emit // scope boundaries from outer layers first. - fn sort_key(&mut self) -> Option<(usize, bool, usize)> { + fn sort_key(&mut self) -> Option<(usize, bool, isize)> { + let depth = -(self.depth as isize); let next_start = self .captures .peek() @@ -406,13 +409,13 @@ impl<'a> HighlightIterLayer<'a> { match (next_start, next_end) { (Some(start), Some(end)) => { if start < end { - Some((start, true, self.depth)) + Some((start, true, depth)) } else { - Some((end, false, self.depth)) + Some((end, false, depth)) } } - (Some(i), None) => Some((i, true, self.depth)), - (None, Some(j)) => Some((j, false, self.depth)), + (Some(i), None) => Some((i, true, depth)), + (None, Some(j)) => Some((j, false, depth)), _ => None, } } @@ -589,7 +592,7 @@ where // captures being intermixed with other captures related to local variables // and syntax highlighting. let source = self.source; - let mut injections = Vec::<(usize, Option<&str>, Vec)>::new(); + let mut injections = Vec::<(usize, Option<&str>, Vec, bool)>::new(); for mat in self.injections_cursor.matches( &layer.config.injections_query, site_node, @@ -600,7 +603,7 @@ where { entry } else { - injections.push((mat.pattern_index, None, Vec::new())); + injections.push((mat.pattern_index, None, Vec::new(), false)); injections.last_mut().unwrap() }; @@ -618,27 +621,29 @@ where } } - for (pattern_index, language, _) in injections.iter_mut() { - // In addition to specifying the language name via the text of a captured node, - // it can also be hard-coded via a `(set! injection.language )` - // predicate. - if language.is_none() { - *language = layer - .config - .query - .property_settings(*pattern_index) - .iter() - .find_map(|prop| { - if prop.key.as_ref() == "injection.language" { - prop.value.as_ref().map(|s| s.as_ref()) - } else { - None + for (pattern_index, language, _, include_children) in injections.iter_mut() { + for prop in layer.config.query.property_settings(*pattern_index) { + match prop.key.as_ref() { + // In addition to specifying the language name via the text of a + // captured node, it can also be hard-coded via a `set!` predicate + // that sets the injection.language key. + "injection.language" => { + if language.is_none() { + *language = prop.value.as_ref().map(|s| s.as_ref()) } - }); + } + + // By default, injections do not include the *children* of an + // `injection.content` node - only the ranges that belong to the + // node itself. This can be changed using a `set!` predicate that + // sets the `injection.include-children` key. + "injection.include-children" => *include_children = true, + _ => {} + } } } - for (_, language, content_nodes) in injections { + for (_, language, content_nodes, include_children) in injections { // If a language is found with the given name, then add a new language layer // to the highlighted document. if let Some(config) = language.and_then(&self.injection_callback) { @@ -649,7 +654,8 @@ where self.context, self.cancellation_flag, self.layers[0].depth + 1, - self.layers[0].intersect_ranges(&content_nodes, false), + self.layers[0] + .intersect_ranges(&content_nodes, include_children), ) { Ok(layer) => self.insert_layer(layer), Err(e) => return Some(Err(e)), @@ -736,10 +742,17 @@ where break; } + let mut has_highlight = true; + if let Some((last_start, last_end, last_depth)) = self.last_highlight_range { + if range.start == last_start && range.end == last_end && layer.depth < last_depth { + has_highlight = false; + } + } + // If the current node was found to be a local variable, then skip over any // highlighting patterns that are disabled for local variables. - let mut has_highlight = true; - while (definition_highlight.is_some() || reference_highlight.is_some()) + while has_highlight + && (definition_highlight.is_some() || reference_highlight.is_some()) && layer.config.non_local_variable_patterns[pattern_index] { has_highlight = false; @@ -780,6 +793,7 @@ where // Emit a scope start event and push the node's end position to the stack. if let Some(highlight) = reference_highlight.or(current_highlight) { + self.last_highlight_range = Some((range.start, range.end, layer.depth)); layer.highlight_end_stack.push(range.end); return self .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight))); diff --git a/lib/binding_web/test/parser-test.js b/lib/binding_web/test/parser-test.js index d6851539..90fdaf7b 100644 --- a/lib/binding_web/test/parser-test.js +++ b/lib/binding_web/test/parser-test.js @@ -185,7 +185,10 @@ describe("Parser", () => { tree = parser.parse("const x: &'static str = r###\"hello\"###;"); assert.equal( tree.rootNode.toString(), - '(source_file (const_item (identifier) (reference_type (lifetime (identifier)) (primitive_type)) (raw_string_literal)))' + '(source_file (const_item ' + + 'name: (identifier) ' + + 'type: (reference_type (lifetime (identifier)) type: (primitive_type)) ' + + 'value: (raw_string_literal)))' ); }).timeout(5000); diff --git a/script/fetch-fixtures b/script/fetch-fixtures index 7baf4032..1f3d9000 100755 --- a/script/fetch-fixtures +++ b/script/fetch-fixtures @@ -31,5 +31,5 @@ fetch_grammar javascript highlight-queries fetch_grammar json master fetch_grammar python master fetch_grammar ruby master -fetch_grammar rust master +fetch_grammar rust highlight-queries fetch_grammar typescript master From 40408fe6bbee7d4b3308df2d1e294f8388fa2553 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 14 Oct 2019 16:55:14 -0700 Subject: [PATCH 10/14] Update tree-sitter-highlight readme --- highlight/README.md | 85 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 67 insertions(+), 18 deletions(-) diff --git a/highlight/README.md b/highlight/README.md index b6b311cc..721d8892 100644 --- a/highlight/README.md +++ b/highlight/README.md @@ -1,4 +1,4 @@ -Tree-sitter Highlighting +Tree-sitter Highlight ========================= [![Build Status](https://travis-ci.org/tree-sitter/tree-sitter.svg?branch=master)](https://travis-ci.org/tree-sitter/tree-sitter) @@ -14,42 +14,91 @@ extern "C" tree_sitter_html(); extern "C" tree_sitter_javascript(); ``` -Load some *property sheets*: +Create a highlighter. You only need one of these: ```rust -use tree_sitter_highlight::load_property_sheet; +use tree_sitter_highlight::Highlighter; -let javascript_property_sheet = load_property_sheet( - fs::read_to_string("./tree-sitter-javascript/src/highlights.json").unwrap() -).unwrap(); +let highlighter = Highlighter::new( + [ + "attribute", + "constant", + "function.builtin", + "function", + "keyword", + "operator", + "property", + "punctuation", + "punctuation.bracket", + "punctuation.delimiter", + "string", + "string.special", + "tag", + "type", + "type.builtin", + "variable", + "variable.builtin", + "variable.parameter", + ] + .iter() + .cloned() + .map(String::from) + .collect() +); +``` -let html_property_sheet = load_property_sheet( - fs::read_to_string("./tree-sitter-html/src/highlights.json").unwrap() -).unwrap(); +Create a highlight context. You need one of these for each thread that you're using for syntax highlighting: + +```rust +use tree_sitter_highlight::HighlightContext; + +let context = HighlightContext::new(); +``` + +Load some highlighting queries from the `queries` directory of some language repositories: + +```rust +let html_language = unsafe { tree_sitter_html() }; +let javascript_language = unsafe { tree_sitter_javascript() }; + +let html_config = highlighter.load_configuration( + html_language, + &fs::read_to_string("./tree-sitter-html/queries/highlights.scm").unwrap(), + &fs::read_to_string("./tree-sitter-html/queries/injections.scm").unwrap(), + "", +); + +let javascript_config = highlighter.load_configuration( + javascript_language, + &fs::read_to_string("./tree-sitter-javascript/queries/highlights.scm").unwrap(), + &fs::read_to_string("./tree-sitter-javascript/queries/injections.scm").unwrap(), + &fs::read_to_string("./tree-sitter-javascript/queries/locals.scm").unwrap(), +); ``` Highlight some code: ```rust -use tree_sitter_highlight::{highlight, HighlightEvent}; +use tree_sitter_highlight::HighlightEvent; -let highlights = highlight( +let highlights = highlighter.highlight( + &mut context, + javascript_config, b"const x = new Y();", - unsafe { tree_sitter_javascript() }, - &javascript_property_sheet, + None, &|_| None ).unwrap(); for event in highlights { - match event { + match event? { HighlightEvent::Source(s) { eprintln!("source: {:?}", s); }, - HighlightEvent::ScopeStart(s) { - eprintln!("scope started: {:?}", s); + HighlightEvent::HighlightStart(s) { + eprintln!("highlight style started: {:?}", s); }, - HighlightEvent::ScopeEnd(s) { - eprintln!("scope ended: {:?}", s); + HighlightEvent::HighlightEnd(s) { + eprintln!("highlight style ended: {:?}", s); }, } } From fa43ce01a6674e02df0980a5e90fff8f1802328a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 16 Oct 2019 11:54:32 -0700 Subject: [PATCH 11/14] Allow queries to capture ERROR nodes --- cli/src/tests/query_test.rs | 30 ++++++++++++++++++++++++++++++ lib/src/query.c | 1 + 2 files changed, 31 insertions(+) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 1d7554c0..9120ce88 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -365,6 +365,36 @@ fn test_query_matches_with_many() { }); } +#[test] +fn test_query_matches_capturing_error_nodes() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + " + (ERROR (identifier) @the-error-identifier) @the-error + ", + ) + .unwrap(); + + let source = "function a(b,, c, d :e:) {}"; + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + let mut cursor = QueryCursor::new(); + let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); + + assert_eq!( + collect_matches(matches, &query, source), + &[( + 0, + vec![("the-error", ":e:"), ("the-error-identifier", "e"),] + ),] + ); + }); +} + #[test] fn test_query_matches_in_language_with_simple_aliases() { allocations::record(|| { diff --git a/lib/src/query.c b/lib/src/query.c index c2ba3d30..db966dc1 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -328,6 +328,7 @@ static TSSymbol ts_query_intern_node_name( uint32_t length, TSSymbolType symbol_type ) { + if (!strncmp(name, "ERROR", length)) return ts_builtin_sym_error; uint32_t symbol_count = ts_language_symbol_count(self->language); for (TSSymbol i = 0; i < symbol_count; i++) { if (ts_language_symbol_type(self->language, i) != symbol_type) continue; From 075a552430fa412dd72dfb1392d08eca7f9c3e45 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 17 Oct 2019 10:14:05 -0700 Subject: [PATCH 12/14] Tweak how highlight query paths are specified in package.json --- cli/src/loader.rs | 126 +++++++++++++++++++++++++++------------------- cli/src/main.rs | 10 +++- 2 files changed, 84 insertions(+), 52 deletions(-) diff --git a/cli/src/loader.rs b/cli/src/loader.rs index 8610eaaa..9b95faaf 100644 --- a/cli/src/loader.rs +++ b/cli/src/loader.rs @@ -28,9 +28,9 @@ pub struct LanguageConfiguration { pub injection_regex: Option, pub file_types: Vec, pub root_path: PathBuf, - pub highlights_filename: Option, - pub injections_filename: Option, - pub locals_filename: Option, + pub highlights_filenames: Option>, + pub injections_filenames: Option>, + pub locals_filenames: Option>, language_id: usize, highlight_config: OnceCell>, } @@ -343,6 +343,30 @@ impl Loader { &'a mut self, parser_path: &Path, ) -> Result<&[LanguageConfiguration]> { + #[derive(Deserialize)] + #[serde(untagged)] + enum PathsJSON { + Empty, + Single(String), + Multiple(Vec), + } + + impl Default for PathsJSON { + fn default() -> Self { + PathsJSON::Empty + } + } + + impl PathsJSON { + fn into_vec(self) -> Option> { + match self { + PathsJSON::Empty => None, + PathsJSON::Single(s) => Some(vec![s]), + PathsJSON::Multiple(s) => Some(s), + } + } + } + #[derive(Deserialize)] struct LanguageConfigurationJSON { #[serde(default)] @@ -356,9 +380,12 @@ impl Loader { first_line_regex: Option, #[serde(rename = "injection-regex")] injection_regex: Option, - highlights: Option, - injections: Option, - locals: Option, + #[serde(default)] + highlights: PathsJSON, + #[serde(default)] + injections: PathsJSON, + #[serde(default)] + locals: PathsJSON, } #[derive(Deserialize)] @@ -411,9 +438,9 @@ impl Loader { .injection_regex .and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()), highlight_config: OnceCell::new(), - injections_filename: config_json.injections, - locals_filename: config_json.locals, - highlights_filename: config_json.highlights, + injections_filenames: config_json.injections.into_vec(), + locals_filenames: config_json.locals.into_vec(), + highlights_filenames: config_json.highlights.into_vec(), }; for file_type in &configuration.file_types { @@ -452,51 +479,48 @@ impl LanguageConfiguration { self.highlight_config .get_or_try_init(|| { let queries_path = self.root_path.join("queries"); + let read_queries = |paths: &Option>, default_path: &str| { + if let Some(paths) = paths.as_ref() { + let mut query = String::new(); + for path in paths { + let path = self.root_path.join(path); + query += &fs::read_to_string(&path).map_err(Error::wrap(|| { + format!("Failed to read query file {:?}", path) + }))?; + } + Ok(query) + } else { + let path = queries_path.join(default_path); + if path.exists() { + fs::read_to_string(&path).map_err(Error::wrap(|| { + format!("Failed to read query file {:?}", path) + })) + } else { + Ok(String::new()) + } + } + }; - let highlights_path = queries_path.join( - self.highlights_filename - .as_ref() - .map_or("highlights.scm", String::as_str), - ); - let injections_path = queries_path.join( - self.injections_filename - .as_ref() - .map_or("injections.scm", String::as_str), - ); - let locals_path = queries_path.join( - self.locals_filename - .as_ref() - .map_or("locals.scm", String::as_str), - ); + let highlights_query = read_queries(&self.highlights_filenames, "highlights.scm")?; + let injections_query = read_queries(&self.injections_filenames, "injections.scm")?; + let locals_query = read_queries(&self.locals_filenames, "locals.scm")?; - if !highlights_path.exists() { - return Ok(None); + if highlights_query.is_empty() { + Ok(None) + } else { + Ok(Some( + highlighter + .load_configuration( + language, + &highlights_query, + &injections_query, + &locals_query, + ) + .map_err(Error::wrap(|| { + format!("Failed to load queries in {:?}", self.root_path) + }))?, + )) } - - let highlights_query = fs::read_to_string(highlights_path)?; - let injections_query = if injections_path.exists() { - fs::read_to_string(injections_path)? - } else { - String::new() - }; - let locals_query = if locals_path.exists() { - fs::read_to_string(locals_path)? - } else { - String::new() - }; - - Ok(Some( - highlighter - .load_configuration( - language, - &highlights_query, - &injections_query, - &locals_query, - ) - .map_err(Error::wrap(|| { - format!("Failed to load queries in {:?}", queries_path) - }))?, - )) }) .map(Option::as_ref) } diff --git a/cli/src/main.rs b/cli/src/main.rs index 25ffe5f7..832cd92c 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -284,9 +284,17 @@ fn run() -> error::Result<()> { loader.find_all_languages(&config.parser_directories)?; for (configuration, language_path) in loader.get_all_language_configurations() { println!( - "scope: {}\nparser: {:?}\nfile_types: {:?}\ncontent_regex: {:?}\ninjection_regex: {:?}\n", + concat!( + "scope: {}\n", + "parser: {:?}\n", + "highlights: {:?}\n", + "file_types: {:?}\n", + "content_regex: {:?}\n", + "injection_regex: {:?}\n", + ), configuration.scope.as_ref().unwrap_or(&String::new()), language_path, + configuration.highlights_filenames, configuration.file_types, configuration.content_regex, configuration.injection_regex, From b79295e1a011a208076a1647b8d6f6f87225061f Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 17 Oct 2019 11:00:31 -0700 Subject: [PATCH 13/14] :fire: Everything related to property sheets --- Cargo.lock | 77 -- cli/Cargo.toml | 1 - cli/src/error.rs | 6 - cli/src/generate/mod.rs | 130 +-- cli/src/generate/properties.rs | 1499 ------------------------------ cli/src/main.rs | 3 - cli/src/tests/mod.rs | 1 - cli/src/tests/properties_test.rs | 265 ------ lib/binding_rust/lib.rs | 354 +------ 9 files changed, 47 insertions(+), 2289 deletions(-) delete mode 100644 cli/src/generate/properties.rs delete mode 100644 cli/src/tests/properties_test.rs diff --git a/Cargo.lock b/Cargo.lock index 5374a563..9376a35b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -93,11 +93,6 @@ dependencies = [ "constant_time_eq 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "bytecount" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "byteorder" version = "1.3.2" @@ -266,18 +261,6 @@ name = "lazy_static" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "lexical-core" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", - "ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", - "stackvector 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", - "static_assertions 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "libc" version = "0.2.61" @@ -328,16 +311,6 @@ name = "nodrop" version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "nom" -version = "5.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "lexical-core 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", - "memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "num-integer" version = "0.1.39" @@ -346,15 +319,6 @@ dependencies = [ "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "num-rational" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)", - "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "num-traits" version = "0.2.6" @@ -589,19 +553,6 @@ dependencies = [ "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "rsass" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "bytecount 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", - "nom 5.0.0 (registry+https://github.com/rust-lang/crates.io-index)", - "num-rational 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", - "rand 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "rust-argon2" version = "0.5.1" @@ -630,11 +581,6 @@ name = "ryu" version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "ryu" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "scopeguard" version = "0.3.3" @@ -697,20 +643,6 @@ name = "spin" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "stackvector" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", - "unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "static_assertions" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "strsim" version = "0.7.0" @@ -825,7 +757,6 @@ dependencies = [ "rand 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", - "rsass 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", "serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)", @@ -964,7 +895,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum base64 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0b25d992356d2eb0ed82172f5248873db5560c4721f564b13cb5193bda5e668e" "checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12" "checksum blake2b_simd 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "461f4b879a8eb70c1debf7d0788a9a5ff15f1ea9d25925fea264ef4258bed6b2" -"checksum bytecount 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "be0fdd54b507df8f22012890aadd099979befdba27713c767993f8380112ca7c" "checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5" "checksum c2-chacha 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7d64d04786e0f528460fc884753cf8dddcc466be308f6026f8e355c41a0e4101" "checksum cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)" = "f159dfd43363c4d08055a07703eb7a3406b0dac4d0584d96965a3262db3c9d16" @@ -987,7 +917,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7e81a7c05f79578dbc15793d8b619db9ba32b4577003ef3af1a91c416798c58d" "checksum itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1306f3464951f30e30d12373d31c79fbd52d236e5e896fd92f96ec7babbbe60b" "checksum lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a374c89b9db55895453a74c1e38861d9deec0b01b405a82516e9d5de4820dea1" -"checksum lexical-core 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "b8b0f90c979adde96d19eb10eb6431ba0c441e2f9e9bdff868b2f6f5114ff519" "checksum libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)" = "c665266eb592905e8503ba3403020f4b8794d26263f412ca33171600eca9a6fa" "checksum libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3ad660d7cb8c5822cd83d10897b0f1f1526792737a179e73896152f85b88c2" "checksum lock_api 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "62ebf1391f6acad60e5c8b43706dde4582df75c06698ab44511d15016bc2442c" @@ -995,9 +924,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" "checksum memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0a3eb002f0535929f1199681417029ebea04aadc0c7a4224b46be99c7f5d6a16" "checksum nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945" -"checksum nom 5.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e9761d859320e381010a4f7f8ed425f2c924de33ad121ace447367c713ad561b" "checksum num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)" = "e83d528d2677f0518c570baf2b7abdcf0cd2d248860b68507bdcb3e91d4c0cea" -"checksum num-rational 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4e96f040177bb3da242b5b1ecf3f54b5d5af3efbbfb18608977a5d2767b22f10" "checksum num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0b3a5d7cc97d6d30d8b9bc8fa19bf45349ffe46241e8816f50f62f6d6aaabee1" "checksum once_cell 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "532c29a261168a45ce28948f9537ddd7a5dd272cc513b3017b1e82a88f962c37" "checksum parking_lot 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ab41b4aed082705d1056416ae4468b6ea99d52599ecf3169b00088d43113e337" @@ -1025,12 +952,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "37e7cbbd370869ce2e8dff25c7018702d10b21a20ef7135316f8daecd6c25b7f" "checksum regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4e47a2ed29da7a9e1960e1639e7a982e6edc6d49be308a3b02daf511504a16d1" "checksum remove_dir_all 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3488ba1b9a2084d38645c4c08276a1752dcbf2c7130d74f1569681ad5d2799c5" -"checksum rsass 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4520dc8a2786c0319f3947e3d79e735b27f0c63c555b854aaa802e49e3f45098" "checksum rust-argon2 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4ca4eaef519b494d1f2848fc602d18816fed808a981aedf4f1f00ceb7c9d32cf" "checksum rustc-demangle 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "bcfe5b13211b4d78e5c2cadfebd7769197d95c639c35a50057eb4c05de811395" "checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" "checksum ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "eb9e9b8cde282a9fe6a42dd4681319bfb63f121b8a8ee9439c6f4107e58a46f7" -"checksum ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c92464b447c0ee8c4fb3824ecc8383b81717b9f1e74ba2e72540aef7b9f82997" "checksum scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27" "checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" "checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" @@ -1040,8 +965,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum smallbitvec 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1764fe2b30ee783bfe3b9b37b2649d8d590b3148bb12e0079715d4d5c673562e" "checksum smallvec 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)" = "88aea073965ab29f6edb5493faf96ad662fb18aa9eeb186a3b7057951605ed15" "checksum spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "44363f6f51401c34e7be73db0db371c04705d35efbe9f7d6082e03a921a32c55" -"checksum stackvector 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "1c4725650978235083241fab0fdc8e694c3de37821524e7534a1a9061d1068af" -"checksum static_assertions 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "7f3eb36b47e512f8f1c9e3d10c2c1965bc992bd9cdb024fa581e2194501c83d3" "checksum strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550" "checksum syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)" = "ae8b29eb5210bc5cf63ed6149cbf9adfc82ac0be023d8735c176ee74a2db4da7" "checksum synstructure 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "73687139bf99285483c96ac0add482c3776528beac1d97d444f6e91f203a2015" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index fac9d61a..8dd7cb18 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -32,7 +32,6 @@ serde = "1.0" serde_derive = "1.0" regex-syntax = "0.6.4" regex = "1" -rsass = "^0.11.0" tiny_http = "0.6" webbrowser = "0.5.1" diff --git a/cli/src/error.rs b/cli/src/error.rs index 324ad8b1..fab87478 100644 --- a/cli/src/error.rs +++ b/cli/src/error.rs @@ -74,12 +74,6 @@ impl From for Error { } } -impl From for Error { - fn from(error: rsass::Error) -> Self { - Error::new(error.to_string()) - } -} - impl From for Error { fn from(error: regex_syntax::ast::Error) -> Self { Error::new(error.to_string()) diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs index be293047..0644e2cd 100644 --- a/cli/src/generate/mod.rs +++ b/cli/src/generate/mod.rs @@ -6,13 +6,12 @@ mod node_types; mod npm_files; pub mod parse_grammar; mod prepare_grammar; -pub mod properties; mod render; mod rules; mod tables; use self::build_tables::build_tables; -use self::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType}; +use self::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar}; use self::parse_grammar::parse_grammar; use self::prepare_grammar::prepare_grammar; use self::render::render_c_code; @@ -20,9 +19,8 @@ use self::rules::AliasMap; use crate::error::{Error, Result}; use lazy_static::lazy_static; use regex::{Regex, RegexBuilder}; -use std::collections::HashSet; -use std::fs::{self, File}; -use std::io::{BufWriter, Write}; +use std::fs; +use std::io::Write; use std::path::{Path, PathBuf}; use std::process::{Command, Stdio}; @@ -51,13 +49,11 @@ struct GeneratedParser { pub fn generate_parser_in_directory( repo_path: &PathBuf, grammar_path: Option<&str>, - properties_only: bool, next_abi: bool, report_symbol_name: Option<&str>, ) -> Result<()> { let src_path = repo_path.join("src"); let header_path = src_path.join("tree_sitter"); - let properties_dir_path = repo_path.join("properties"); // Ensure that the output directories exist. fs::create_dir_all(&src_path)?; @@ -82,71 +78,48 @@ pub fn generate_parser_in_directory( prepare_grammar(&input_grammar)?; let language_name = input_grammar.name; - // If run with no arguments, read all of the property sheets and compile them to JSON. - if grammar_path.is_none() { - let token_names = get_token_names(&syntax_grammar, &lexical_grammar); - if let Ok(entries) = fs::read_dir(properties_dir_path) { - for entry in entries { - let css_path = entry?.path(); - let css = fs::read_to_string(&css_path)?; - let sheet = properties::generate_property_sheet(&css_path, &css, &token_names)?; - let property_sheet_json_path = src_path - .join(css_path.file_name().unwrap()) - .with_extension("json"); - let property_sheet_json_file = - File::create(&property_sheet_json_path).map_err(Error::wrap(|| { - format!("Failed to create {:?}", property_sheet_json_path) - }))?; - let mut writer = BufWriter::new(property_sheet_json_file); - serde_json::to_writer_pretty(&mut writer, &sheet)?; - } - } - } - // Generate the parser and related files. - if !properties_only { - let GeneratedParser { - c_code, - node_types_json, - } = generate_parser_for_grammar_with_opts( - &language_name, - syntax_grammar, - lexical_grammar, - inlines, - simple_aliases, - next_abi, - report_symbol_name, - )?; + let GeneratedParser { + c_code, + node_types_json, + } = generate_parser_for_grammar_with_opts( + &language_name, + syntax_grammar, + lexical_grammar, + inlines, + simple_aliases, + next_abi, + report_symbol_name, + )?; - write_file(&src_path.join("parser.c"), c_code)?; - write_file(&src_path.join("node-types.json"), node_types_json)?; + write_file(&src_path.join("parser.c"), c_code)?; + write_file(&src_path.join("node-types.json"), node_types_json)?; - if next_abi { - write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?; - } else { - let mut header = tree_sitter::PARSER_HEADER.to_string(); + if next_abi { + write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?; + } else { + let mut header = tree_sitter::PARSER_HEADER.to_string(); - for part in &NEW_HEADER_PARTS { - let pos = header - .find(part) - .expect("Missing expected part of parser.h header"); - header.replace_range(pos..(pos + part.len()), ""); - } - - write_file(&header_path.join("parser.h"), header)?; + for part in &NEW_HEADER_PARTS { + let pos = header + .find(part) + .expect("Missing expected part of parser.h header"); + header.replace_range(pos..(pos + part.len()), ""); } - ensure_file(&repo_path.join("index.js"), || { - npm_files::index_js(&language_name) - })?; - ensure_file(&src_path.join("binding.cc"), || { - npm_files::binding_cc(&language_name) - })?; - ensure_file(&repo_path.join("binding.gyp"), || { - npm_files::binding_gyp(&language_name) - })?; + write_file(&header_path.join("parser.h"), header)?; } + ensure_file(&repo_path.join("index.js"), || { + npm_files::index_js(&language_name) + })?; + ensure_file(&src_path.join("binding.cc"), || { + npm_files::binding_cc(&language_name) + })?; + ensure_file(&repo_path.join("binding.gyp"), || { + npm_files::binding_gyp(&language_name) + })?; + Ok(()) } @@ -208,35 +181,6 @@ fn generate_parser_for_grammar_with_opts( }) } -fn get_token_names( - syntax_grammar: &SyntaxGrammar, - lexical_grammar: &LexicalGrammar, -) -> HashSet { - let mut result = HashSet::new(); - for variable in &lexical_grammar.variables { - if variable.kind == VariableType::Named { - result.insert(variable.name.clone()); - } - } - for token in &syntax_grammar.external_tokens { - if token.kind == VariableType::Named { - result.insert(token.name.clone()); - } - } - for variable in &syntax_grammar.variables { - for production in &variable.productions { - for step in &production.steps { - if let Some(alias) = &step.alias { - if !step.symbol.is_non_terminal() && alias.is_named { - result.insert(alias.value.clone()); - } - } - } - } - } - result -} - fn load_grammar_file(grammar_path: &Path) -> Result { match grammar_path.extension().and_then(|e| e.to_str()) { Some("js") => Ok(load_js_grammar_file(grammar_path)?), diff --git a/cli/src/generate/properties.rs b/cli/src/generate/properties.rs deleted file mode 100644 index 5091eafc..00000000 --- a/cli/src/generate/properties.rs +++ /dev/null @@ -1,1499 +0,0 @@ -use crate::error::{Error, Result}; -use crate::generate::dedup::split_state_id_groups; -use rsass; -use rsass::sass::Value; -use rsass::selectors::SelectorPart; -use serde_derive::Serialize; -use std::cmp::Ordering; -use std::collections::hash_map::Entry; -use std::collections::{btree_map, BTreeMap, HashMap, HashSet, VecDeque}; -use std::fmt::{self, Write}; -use std::hash::{Hash, Hasher}; -use std::mem; -use std::path::{Path, PathBuf}; -use tree_sitter::{self, PropertyStateJSON, PropertyTransitionJSON}; - -#[derive(Clone, Debug, PartialEq, Eq, Serialize)] -#[serde(untagged)] -pub(crate) enum PropertyValue { - Number(isize), - Boolean(bool), - String(String), - Object(PropertySet), - Array(Vec), -} - -type PropertySet = BTreeMap; -type PropertySheetJSON = tree_sitter::PropertySheetJSON; -type StateId = usize; -type PropertySetId = usize; - -#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] -struct SelectorStep { - kind: Option, - field: Option, - child_index: Option, - text_pattern: Option, - is_named: Option, - is_immediate: bool, -} - -#[derive(PartialEq, Eq, PartialOrd, Ord)] -struct Selector(Vec); - -#[derive(Debug, PartialEq, Eq)] -struct Rule { - selectors: Vec, - properties: PropertySet, -} - -#[derive(Clone, Copy, Debug)] -struct Item<'a> { - rule_id: u32, - selector: &'a Selector, - step_id: u32, -} - -#[derive(Clone, PartialEq, Eq)] -struct ItemSet<'a>(Vec>); - -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] -struct SelectorMatch { - specificity: u32, - rule_id: u32, -} - -struct Builder<'a> { - rules: &'a Vec, - output: PropertySheetJSON, - start_item_set: ItemSet<'a>, - token_names: &'a HashSet, - ids_by_item_set: HashMap, StateId>, - item_set_queue: VecDeque<(ItemSet<'a>, StateId)>, - item_set_list: Vec>, -} - -impl<'a> Item<'a> { - fn next_step(&self) -> Option<&SelectorStep> { - self.selector.0.get(self.step_id as usize) - } - - fn is_done(&self) -> bool { - self.step_id as usize == self.selector.0.len() - } -} - -impl<'a> Ord for Item<'a> { - fn cmp(&self, other: &Item) -> Ordering { - self.rule_id - .cmp(&other.rule_id) - .then_with(|| self.selector.0.len().cmp(&other.selector.0.len())) - .then_with(|| { - for (i, step) in self - .selector - .0 - .iter() - .enumerate() - .skip(self.step_id as usize) - { - let result = step.cmp(&other.selector.0[i]); - if result != Ordering::Equal { - return result; - } - } - Ordering::Equal - }) - } -} - -impl<'a> PartialOrd for Item<'a> { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl<'a> Hash for Item<'a> { - fn hash(&self, hasher: &mut H) { - hasher.write_u32(self.rule_id); - hasher.write_usize(self.selector.0.len()); - hasher.write_u32(self.step_id); - for step in &self.selector.0[self.step_id as usize..] { - step.hash(hasher); - } - } -} - -impl<'a> PartialEq for Item<'a> { - fn eq(&self, other: &Self) -> bool { - if self.rule_id != other.rule_id - || self.selector.0.len() != other.selector.0.len() - || self.step_id != other.step_id - { - return false; - } - - for (i, step) in self - .selector - .0 - .iter() - .enumerate() - .skip(self.step_id as usize) - { - if *step != other.selector.0[i] { - return false; - } - } - - true - } -} - -impl<'a> Eq for Item<'a> {} - -impl<'a> Hash for ItemSet<'a> { - fn hash(&self, hasher: &mut H) { - hasher.write_usize(self.0.len()); - for item in &self.0 { - item.hash(hasher); - } - } -} - -impl<'a> ItemSet<'a> { - fn new() -> Self { - ItemSet(Vec::new()) - } - - fn insert(&mut self, item: Item<'a>) { - match self.0.binary_search(&item) { - Err(i) => self.0.insert(i, item), - _ => {} - } - } -} - -impl<'a> Builder<'a> { - fn new(rules: &'a Vec, token_names: &'a HashSet) -> Self { - Builder { - rules, - start_item_set: ItemSet::new(), - item_set_list: Vec::new(), - output: PropertySheetJSON { - states: Vec::new(), - property_sets: Vec::new(), - }, - token_names, - ids_by_item_set: HashMap::new(), - item_set_queue: VecDeque::new(), - } - } - - fn build(mut self) -> PropertySheetJSON { - for (i, rule) in self.rules.iter().enumerate() { - for selector in &rule.selectors { - self.start_item_set.insert(Item { - rule_id: i as u32, - selector, - step_id: 0, - }); - } - } - - self.add_state(ItemSet::new()); - self.output.states[0].id = Some(0); - while let Some((item_set, state_id)) = self.item_set_queue.pop_front() { - self.populate_state(item_set, state_id); - } - - self.remove_duplicate_states(); - - for (i, state) in self.output.states.iter_mut().enumerate() { - state.id = Some(i); - } - - self.output - } - - fn add_state(&mut self, item_set: ItemSet<'a>) -> StateId { - match self.ids_by_item_set.entry(item_set) { - Entry::Occupied(o) => *o.get(), - Entry::Vacant(v) => { - let state_id = self.output.states.len(); - self.output.states.push(PropertyStateJSON { - id: None, - transitions: Vec::new(), - property_set_id: 0, - default_next_state_id: 0, - }); - self.item_set_queue.push_back((v.key().clone(), state_id)); - v.insert(state_id); - state_id - } - } - } - - fn add_property_set(&mut self, properties: PropertySet) -> PropertySetId { - if let Some(index) = self - .output - .property_sets - .iter() - .position(|i| *i == properties) - { - index - } else { - self.output.property_sets.push(properties); - self.output.property_sets.len() - 1 - } - } - - fn populate_state(&mut self, item_set: ItemSet<'a>, state_id: StateId) { - let is_start_state = state_id == 0; - let mut transitions: HashMap = HashMap::new(); - let mut selector_matches = Vec::new(); - - // First, compute all of the possible state transition conditions for - // this state, and all of the rules that are currently matching. - for item in item_set.0.iter().chain(self.start_item_set.0.iter()) { - // If this item has more elements remaining in its selector, then - // add a state transition based on the next step. - if let Some(step) = item.next_step() { - transitions - .entry(PropertyTransitionJSON { - kind: step.kind.clone(), - field: step.field.clone(), - named: step.is_named, - index: step.child_index, - text: step.text_pattern.clone(), - state_id: 0, - }) - .and_modify(|rule_id| { - if item.rule_id > *rule_id { - *rule_id = item.rule_id; - } - }) - .or_insert(item.rule_id); - } - // If the item has matched its entire selector, then the item's - // properties are applicable to this state. - else { - selector_matches.push(SelectorMatch { - rule_id: item.rule_id, - specificity: selector_specificity(item.selector), - }); - } - } - - // Compute the merged properties that apply in the current state. - // Sort the matching property sets by ascending specificity and by - // their order in the sheet. This way, more specific selectors and later - // rules will override less specific selectors and earlier rules. - let mut properties = PropertySet::new(); - selector_matches.sort_unstable_by(|a, b| { - (a.specificity.cmp(&b.specificity)).then_with(|| a.rule_id.cmp(&b.rule_id)) - }); - selector_matches.dedup(); - for selector_match in selector_matches { - let rule = &self.rules[selector_match.rule_id as usize]; - for (property, value) in &rule.properties { - properties.insert(property.clone(), value.clone()); - } - } - self.output.states[state_id].property_set_id = self.add_property_set(properties); - - // If there are multiple transitions that could *both* match (e.g. one based on a - // a node type and one based on a field name), then create an additional transition - // for the intersection of the two. - let mut i = 0; - let mut transition_list = transitions.into_iter().collect::>(); - while i < transition_list.len() { - for j in 0..i { - if let Some(intersection) = - self.intersect_transitions(&transition_list[j].0, &transition_list[i].0) - { - transition_list.push(( - intersection, - u32::max(transition_list[i].1, transition_list[j].1), - )); - } - } - i += 1; - } - - // Ensure that for a given node type, more specific transitions are tried - // first, and in the event of a tie, transitions corresponding to later rules - // in the cascade are tried first. Also, sort the non-intersecting transitions - // by name to guarantee a deterministic order. - transition_list.sort_by(|a, b| { - (transition_specificity(&b.0).cmp(&transition_specificity(&a.0))) - .then_with(|| b.1.cmp(&a.1)) - .then_with(|| a.0.kind.cmp(&b.0.kind)) - .then_with(|| a.0.named.cmp(&b.0.named)) - .then_with(|| a.0.field.cmp(&b.0.field)) - }); - - // For eacy possible state transition, compute the set of items in that transition's - // destination state. - i = 0; - while i < transition_list.len() { - let transition = &mut transition_list[i].0; - let transition_is_leaf = transition.named == Some(false) - || transition - .kind - .as_ref() - .map_or(false, |kind| self.token_names.contains(kind)); - - let mut next_item_set = ItemSet::new(); - let mut transition_differs_from_start_state = false; - for item in item_set.0.iter().chain(self.start_item_set.0.iter()) { - if let Some(next_step) = item.next_step() { - // If the next step of the item's selector satisfies this transition, - // advance the item to the next part of its selector and add the - // resulting item to this transition's destination state. - if step_matches_transition(next_step, transition) { - let next_item = Item { - rule_id: item.rule_id, - selector: item.selector, - step_id: item.step_id + 1, - }; - if !transition_is_leaf || next_item.is_done() { - next_item_set.insert(next_item); - if item.step_id > 0 { - transition_differs_from_start_state = true; - } - } - } - - // If the next step of the item is not an immediate child, then - // include this item in this transition's destination state, because - // the next step of the item might match a descendant node. - if !transition_is_leaf && !next_step.is_immediate && item.step_id > 0 { - next_item_set.insert(*item); - transition_differs_from_start_state = true; - } - } - } - - if (is_start_state || transition_differs_from_start_state) - && !next_item_set.0.is_empty() - { - transition.state_id = self.add_state(next_item_set); - if is_start_state || !self.output.states[0].transitions.contains(&transition) { - i += 1; - continue; - } - } - transition_list.remove(i); - } - - self.output.states[state_id] - .transitions - .extend(transition_list.into_iter().map(|i| i.0)); - - // Compute the default successor item set - the item set that - // we should advance to if the next element doesn't match any - // of the next elements in the item set's selectors. - let mut default_next_item_set = ItemSet::new(); - for item in &item_set.0 { - let next_step = item.selector.0.get(item.step_id as usize); - if let Some(step) = next_step { - if !step.is_immediate { - default_next_item_set.insert(*item); - } - } - } - self.output.states[state_id].default_next_state_id = self.add_state(default_next_item_set); - - self.item_set_list.push(item_set); - } - - fn intersect_transitions( - &self, - left: &PropertyTransitionJSON, - right: &PropertyTransitionJSON, - ) -> Option { - let mut left_contributes = false; - let mut right_contributes = false; - let mut result = left.clone(); - - if let Some(left_kind) = &left.kind { - if let Some(right_kind) = &right.kind { - if left_kind != right_kind || left.named != right.named { - return None; - } - } else { - left_contributes = true; - } - } else if let Some(right_kind) = &right.kind { - result.kind = Some(right_kind.clone()); - result.named = right.named; - right_contributes = true; - } - - if let Some(left_field) = &left.field { - if let Some(right_field) = &right.field { - if left_field != right_field { - return None; - } - } else { - left_contributes = true; - } - } else if let Some(right_field) = &right.field { - result.field = Some(right_field.clone()); - right_contributes = true; - } - - if let Some(left_text) = &left.text { - if let Some(right_text) = &right.text { - if left_text != right_text { - return None; - } - } else { - left_contributes = true; - } - } else if let Some(right_text) = &right.text { - result.text = Some(right_text.clone()); - right_contributes = true; - } - - if let Some(left_index) = &left.index { - if let Some(right_index) = &right.index { - if left_index != right_index { - return None; - } - } else { - left_contributes = true; - } - } else if let Some(right_index) = &right.index { - result.index = Some(right_index.clone()); - right_contributes = true; - } - - if left_contributes && right_contributes { - Some(result) - } else { - None - } - } - - fn remove_duplicate_states(&mut self) { - let mut state_ids_by_properties = HashMap::new(); - for (i, state) in self.output.states.iter().enumerate() { - state_ids_by_properties - .entry(state.property_set_id) - .or_insert(Vec::new()) - .push(i); - } - let mut state_ids_by_group_id = state_ids_by_properties - .into_iter() - .map(|e| e.1) - .collect::>(); - state_ids_by_group_id.sort(); - let start_group_index = state_ids_by_group_id - .iter() - .position(|g| g.contains(&0)) - .unwrap(); - state_ids_by_group_id.swap(start_group_index, 0); - - let mut group_ids_by_state_id = vec![0; self.output.states.len()]; - for (group_id, state_ids) in state_ids_by_group_id.iter().enumerate() { - for state_id in state_ids { - group_ids_by_state_id[*state_id] = group_id; - } - } - - while split_state_id_groups( - &self.output.states, - &mut state_ids_by_group_id, - &mut group_ids_by_state_id, - 0, - property_states_differ, - ) { - continue; - } - - let mut new_states = Vec::with_capacity(state_ids_by_group_id.len()); - for state_ids in state_ids_by_group_id.iter() { - let mut new_state = PropertyStateJSON::default(); - mem::swap(&mut new_state, &mut self.output.states[state_ids[0]]); - for transition in new_state.transitions.iter_mut() { - transition.state_id = group_ids_by_state_id[transition.state_id]; - } - new_state.default_next_state_id = - group_ids_by_state_id[new_state.default_next_state_id]; - new_states.push(new_state); - } - self.output.states = new_states; - } -} - -fn property_states_differ( - left: &PropertyStateJSON, - right: &PropertyStateJSON, - group_ids_by_state_id: &Vec, -) -> bool { - if group_ids_by_state_id[left.default_next_state_id] - != group_ids_by_state_id[right.default_next_state_id] - { - return true; - } - - left.transitions - .iter() - .zip(right.transitions.iter()) - .any(|(left, right)| { - left.kind != right.kind - || left.named != right.named - || left.index != right.index - || left.field != right.field - || left.text != right.text - || group_ids_by_state_id[left.state_id] != group_ids_by_state_id[right.state_id] - }) -} - -fn selector_specificity(selector: &Selector) -> u32 { - let mut result = 0; - for step in &selector.0 { - if step.kind.is_some() { - result += 1; - } - if step.field.is_some() { - result += 1; - } - if step.child_index.is_some() { - result += 1; - } - if step.text_pattern.is_some() { - result += 1; - } - } - result -} - -fn transition_specificity(transition: &PropertyTransitionJSON) -> u32 { - let mut result = 0; - if transition.kind.is_some() { - result += 1; - } - if transition.field.is_some() { - result += 1; - } - if transition.index.is_some() { - result += 1; - } - if transition.text.is_some() { - result += 1; - } - result -} - -fn step_matches_transition(step: &SelectorStep, transition: &PropertyTransitionJSON) -> bool { - step.kind - .as_ref() - .map_or(true, |kind| transition.kind.as_ref() == Some(kind)) - && step - .is_named - .map_or(true, |named| transition.named == Some(named)) - && step - .field - .as_ref() - .map_or(true, |field| transition.field.as_ref() == Some(field)) - && step - .child_index - .map_or(true, |index| transition.index == Some(index)) - && step - .text_pattern - .as_ref() - .map_or(true, |text| transition.text.as_ref() == Some(text)) -} - -impl fmt::Debug for SelectorStep { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - if self.is_immediate { - write!(f, "> ")?; - } - write!(f, "(")?; - if let Some(kind) = &self.kind { - if self.is_named.unwrap() { - write!(f, "{}", kind)?; - } else { - write!(f, "[token='{}']", kind)?; - } - } - if let Some(field) = &self.field { - write!(f, ".{}", field)?; - } - if let Some(n) = self.child_index { - write!(f, ":nth-child({})", n)?; - } - if let Some(t) = &self.text_pattern { - write!(f, "[text='{}']", t)?; - } - write!(f, ")")?; - Ok(()) - } -} - -impl fmt::Debug for Selector { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "[")?; - for (i, step) in self.0.iter().enumerate() { - if step.is_immediate { - write!(f, " > ")?; - } else if i > 0 { - write!(f, " ")?; - } - write!(f, "{:?}", step)?; - } - write!(f, " (specificity: {})]", selector_specificity(self))?; - Ok(()) - } -} - -pub(crate) fn generate_property_sheet( - path: impl AsRef, - css: &str, - token_names: &HashSet, -) -> Result { - let rules = parse_property_sheet(path.as_ref(), &css)?; - Ok(Builder::new(&rules, token_names).build()) -} - -fn parse_property_sheet(path: &Path, css: &str) -> Result> { - let mut schema_paths = Vec::new(); - let css = css.as_bytes(); - let mut items = rsass::parse_scss_data(css).map_err(|(pos, kind)| rsass::Error::ParseError { - file: path.to_string_lossy().into(), - pos: rsass::ErrPos::pos_of(pos, &css), - kind, - })?; - - process_at_rules(&mut items, &mut schema_paths, path)?; - let mut result = Vec::new(); - let selector_prefixes = vec![Vec::new()]; - parse_sass_items(items, &selector_prefixes, &mut result)?; - Ok(result) -} - -fn parse_sass_items( - items: Vec, - selector_prefixes: &Vec>, - result: &mut Vec, -) -> Result<()> { - let mut properties = PropertySet::new(); - for item in items { - match item { - rsass::Item::None | rsass::Item::Comment(_) => {} - rsass::Item::Property(name, value) => { - let value = parse_sass_value(&value)?; - match properties.entry(name.to_string()) { - btree_map::Entry::Vacant(v) => { - v.insert(value); - } - btree_map::Entry::Occupied(mut o) => { - let existing_value = o.get_mut(); - if let PropertyValue::Array(items) = existing_value { - items.push(value); - continue; - } else { - let v = existing_value.clone(); - *existing_value = PropertyValue::Array(vec![v, value]); - } - } - } - } - rsass::Item::Rule(selectors, items) => { - let mut full_selectors = Vec::new(); - for prefix in selector_prefixes { - for selector in &selectors.s { - let mut prefix = prefix.clone(); - let mut operator_was_immediate: Option = Some(false); - for part in &selector.0 { - match part { - SelectorPart::BackRef => { - operator_was_immediate = None; - } - SelectorPart::Simple(value) => { - if let Some(value) = value.single_raw() { - for (i, value) in value.split('.').enumerate() { - if value.is_empty() { - continue; - } - let value = value.to_string(); - check_node_kind(&value)?; - if i > 0 { - if let Some(immediate) = operator_was_immediate { - prefix.push(SelectorStep { - kind: None, - field: Some(value), - is_named: None, - child_index: None, - text_pattern: None, - is_immediate: immediate, - }) - } else { - prefix.last_mut().unwrap().field = Some(value); - } - } else { - if let Some(immediate) = operator_was_immediate { - prefix.push(SelectorStep { - kind: Some(value.to_string()), - field: None, - child_index: None, - text_pattern: None, - is_named: Some(true), - is_immediate: immediate, - }); - } else { - return Error::err(format!("Node type {} must be separated by whitespace or the `>` operator", value)); - } - } - operator_was_immediate = None; - } - } else { - return Err(interpolation_error()); - } - operator_was_immediate = None; - } - SelectorPart::Attribute { name, val, .. } => { - match name.single_raw() { - None => return Err(interpolation_error()), - Some("text") => { - if operator_was_immediate.is_some() { - return Error::err("The `text` attribute must be used in combination with a node type or field".to_string()); - } - if let Some(last_step) = prefix.last_mut() { - last_step.text_pattern = - Some(get_string_value(val.to_string())?) - } - } - Some("token") => { - if let Some(immediate) = operator_was_immediate { - prefix.push(SelectorStep { - kind: Some(get_string_value(val.to_string())?), - field: None, - is_named: Some(false), - child_index: None, - text_pattern: None, - is_immediate: immediate, - }); - operator_was_immediate = None; - } else { - return Error::err("The `token` attribute canot be used in combination with a node type".to_string()); - } - } - _ => { - return Error::err(format!( - "Unsupported attribute {}", - part - )); - } - } - } - SelectorPart::PseudoElement { .. } => { - return Error::err( - "Pseudo elements are not supported".to_string(), - ); - } - SelectorPart::Pseudo { name, arg } => match name.single_raw() { - None => return Err(interpolation_error()), - Some("nth-child") => { - if let Some(arg) = arg { - let mut arg_str = String::new(); - write!(&mut arg_str, "{}", arg).unwrap(); - if let Some(last_step) = prefix.last_mut() { - if let Ok(i) = usize::from_str_radix(&arg_str, 10) { - last_step.child_index = Some(i); - } else { - return Error::err(format!( - "Invalid child index {}", - arg - )); - } - } - } - } - _ => { - return Error::err(format!( - "Unsupported pseudo-class {}", - part - )); - } - }, - SelectorPart::Descendant => { - operator_was_immediate = Some(false); - } - SelectorPart::RelOp(operator) => { - let operator = *operator as char; - if operator == '>' { - operator_was_immediate = Some(true); - } else { - return Error::err(format!( - "Unsupported operator {}", - operator - )); - } - } - } - } - full_selectors.push(prefix); - } - } - parse_sass_items(items, &full_selectors, result)?; - } - _ => return Error::err(format!("Unsupported syntax type {:?}", item)), - } - } - - if !properties.is_empty() { - result.push(Rule { - selectors: selector_prefixes.iter().cloned().map(Selector).collect(), - properties, - }); - } - - Ok(()) -} - -fn process_at_rules( - items: &mut Vec, - schema_paths: &mut Vec, - path: &Path, -) -> Result<()> { - let mut i = 0; - while i < items.len() { - match &items[i] { - rsass::Item::Import(arg) => { - if let Some(s) = get_sass_string(arg) { - let import_path = resolve_path(path, s)?; - let mut imported_items = rsass::parse_scss_file(&import_path)?; - process_at_rules(&mut imported_items, schema_paths, &import_path)?; - items.splice(i..(i + 1), imported_items); - continue; - } else { - return Err(Error::new("@import arguments must be strings".to_string())); - } - } - rsass::Item::AtRule { name, args, .. } => match name.as_str() { - "schema" => { - if let Some(s) = get_sass_string(args) { - let schema_path = resolve_path(path, s)?; - schema_paths.push(schema_path); - items.remove(i); - continue; - } else { - return Error::err("@schema arguments must be strings".to_string()); - } - } - _ => return Error::err(format!("Unsupported at-rule '{}'", name)), - }, - _ => {} - } - i += 1; - } - Ok(()) -} - -fn parse_sass_value(value: &Value) -> Result { - match value { - Value::Literal(s) => { - if let Some(s) = s.single_raw() { - Ok(PropertyValue::String(s.to_string())) - } else { - Err(interpolation_error()) - } - } - Value::Call(name, raw_args) => { - if let Some(name) = name.single_raw() { - let mut args = Vec::new(); - for (_, arg) in raw_args.iter() { - args.push(parse_sass_value(arg)?); - } - let mut result = PropertySet::new(); - result.insert("name".to_string(), PropertyValue::String(name.to_string())); - result.insert("args".to_string(), PropertyValue::Array(args)); - Ok(PropertyValue::Object(result)) - } else { - Err(Error::new( - "String interpolation is not supported".to_string(), - )) - } - } - Value::List(elements, ..) => { - let mut result = Vec::new(); - for element in elements { - result.push(parse_sass_value(element)?); - } - Ok(PropertyValue::Array(result)) - } - Value::Color(_, Some(name)) => Ok(PropertyValue::String(name.clone())), - Value::Numeric(n, _) => Ok(PropertyValue::Number(n.to_integer())), - Value::True => Ok(PropertyValue::Boolean(true)), - Value::False => Ok(PropertyValue::Boolean(false)), - _ => Err(Error::new(format!( - "Property values must be strings or function calls. Got {:?}", - value - ))), - } -} - -fn get_sass_string(value: &Value) -> Option<&str> { - if let Value::Literal(s) = value { - s.single_raw() - } else { - None - } -} - -fn resolve_path(base: &Path, p: &str) -> Result { - let path = Path::new(p); - let mut base = base.to_owned(); - base.pop(); - if path.starts_with(".") { - base.push(path); - if base.exists() { - return Ok(base); - } - } else { - loop { - let mut result = base.clone(); - result.push("node_modules"); - result.push(path); - if result.exists() { - return Ok(result); - } - if !base.pop() { - break; - } - } - } - Err(Error::new(format!("Could not resolve import path `{}`", p))) -} - -fn check_node_kind(name: &String) -> Result<()> { - for c in name.chars() { - if !c.is_alphanumeric() && c != '_' { - return Err(Error::new(format!("Invalid identifier '{}'", name))); - } - } - Ok(()) -} - -fn get_string_value(mut s: String) -> Result { - if s.starts_with("'") && s.ends_with("'") || s.starts_with('"') && s.ends_with('"') { - s.pop(); - s.remove(0); - Ok(s) - } else { - Err(Error::new(format!("Unsupported string literal {}", s))) - } -} - -fn interpolation_error() -> Error { - Error::new("String interpolation is not supported".to_string()) -} - -#[cfg(test)] -mod tests { - use super::*; - use regex::Regex; - use std::fs; - use tempfile::TempDir; - - #[test] - fn test_property_sheet_with_immediate_child_and_descendant_selectors() { - let sheet = generate_property_sheet( - "foo.css", - " - f1 { - color: red; - - & > f2 { - color: green; - } - - & f3 { - color: blue; - } - } - - f2 { - color: indigo; - height: 2; - } - - f3 { - color: violet; - height: 3; - } - ", - &HashSet::new(), - ) - .unwrap(); - - // f1 single-element selector - assert_eq!( - *query_simple(&sheet, vec!["f1"]), - props(&[("color", string("red"))]) - ); - assert_eq!( - *query_simple(&sheet, vec!["f2", "f1"]), - props(&[("color", string("red"))]) - ); - assert_eq!( - *query_simple(&sheet, vec!["f2", "f3", "f1"]), - props(&[("color", string("red"))]) - ); - - // f2 single-element selector - assert_eq!( - *query_simple(&sheet, vec!["f2"]), - props(&[("color", string("indigo")), ("height", num(2))]) - ); - assert_eq!( - *query_simple(&sheet, vec!["f2", "f2"]), - props(&[("color", string("indigo")), ("height", num(2))]) - ); - assert_eq!( - *query_simple(&sheet, vec!["f1", "f3", "f2"]), - props(&[("color", string("indigo")), ("height", num(2))]) - ); - assert_eq!( - *query_simple(&sheet, vec!["f1", "f6", "f2"]), - props(&[("color", string("indigo")), ("height", num(2))]) - ); - - // f3 single-element selector - assert_eq!( - *query_simple(&sheet, vec!["f3"]), - props(&[("color", string("violet")), ("height", num(3))]) - ); - assert_eq!( - *query_simple(&sheet, vec!["f2", "f3"]), - props(&[("color", string("violet")), ("height", num(3))]) - ); - - // f2 child selector - assert_eq!( - *query_simple(&sheet, vec!["f1", "f2"]), - props(&[("color", string("green")), ("height", num(2))]) - ); - assert_eq!( - *query_simple(&sheet, vec!["f2", "f1", "f2"]), - props(&[("color", string("green")), ("height", num(2))]) - ); - assert_eq!( - *query_simple(&sheet, vec!["f3", "f1", "f2"]), - props(&[("color", string("green")), ("height", num(2))]) - ); - - // f3 descendant selector - assert_eq!( - *query_simple(&sheet, vec!["f1", "f3"]), - props(&[("color", string("blue")), ("height", num(3))]) - ); - assert_eq!( - *query_simple(&sheet, vec!["f1", "f2", "f3"]), - props(&[("color", string("blue")), ("height", num(3))]) - ); - assert_eq!( - *query_simple(&sheet, vec!["f1", "f6", "f7", "f8", "f3"]), - props(&[("color", string("blue")), ("height", num(3))]) - ); - - // no match - assert_eq!(*query_simple(&sheet, vec!["f1", "f3", "f4"]), props(&[])); - assert_eq!(*query_simple(&sheet, vec!["f1", "f2", "f5"]), props(&[])); - } - - #[test] - fn test_property_sheet_with_text_attribute() { - let sheet = generate_property_sheet( - "foo.css", - " - f1 { - color: red; - - &[text='^[A-Z]'] { - color: green; - } - - &[text='^[A-Z_]+$'] { - color: blue; - } - } - - f2[text='^[A-Z_]+$'] { - color: purple; - } - ", - &HashSet::new(), - ) - .unwrap(); - - assert_eq!( - *query(&sheet, vec![("f1", None, true, 0)], "abc"), - props(&[("color", string("red"))]) - ); - assert_eq!( - *query(&sheet, vec![("f1", None, true, 0)], "Abc"), - props(&[("color", string("green"))]) - ); - assert_eq!( - *query(&sheet, vec![("f1", None, true, 0)], "AB_CD"), - props(&[("color", string("blue"))]) - ); - assert_eq!( - *query(&sheet, vec![("f2", None, true, 0)], "Abc"), - props(&[]) - ); - assert_eq!( - *query(&sheet, vec![("f2", None, true, 0)], "ABC"), - props(&[("color", string("purple"))]) - ); - } - - #[test] - fn test_property_sheet_with_fields() { - let sheet = generate_property_sheet( - "foo.css", - " - a { - color: red; - &.x { - color: green; - b { - color: blue; - &.y { color: yellow; } - } - } - b { color: orange; } - b.y { color: indigo; } - } - .x { color: violet; } - ", - &HashSet::new(), - ) - .unwrap(); - - assert_eq!( - *query(&sheet, vec![("a", None, true, 0)], ""), - props(&[("color", string("red"))]) - ); - assert_eq!( - *query(&sheet, vec![("a", Some("x"), true, 0)], ""), - props(&[("color", string("green"))]) - ); - assert_eq!( - *query( - &sheet, - vec![("a", Some("x"), true, 0), ("b", None, true, 0)], - "" - ), - props(&[("color", string("blue"))]) - ); - assert_eq!( - *query( - &sheet, - vec![("a", Some("x"), true, 0), ("b", Some("y"), true, 0)], - "" - ), - props(&[("color", string("yellow"))]) - ); - assert_eq!( - *query(&sheet, vec![("b", Some("x"), true, 0)], ""), - props(&[("color", string("violet"))]) - ); - assert_eq!( - *query(&sheet, vec![("a", None, true, 0), ("b", None, true, 0)], ""), - props(&[("color", string("orange"))]) - ); - assert_eq!( - *query( - &sheet, - vec![("a", None, true, 0), ("b", Some("y"), true, 0)], - "" - ), - props(&[("color", string("indigo"))]) - ); - } - - #[test] - fn test_property_sheet_with_cascade_ordering_as_tie_breaker() { - let sheet = generate_property_sheet( - "foo.css", - " - f1 f2:nth-child(1) { color: red; } - f1:nth-child(1) f2 { color: green; } - f1 f2[text='a'] { color: blue; } - f1 f2[text='b'] { color: violet; } - ", - &HashSet::new(), - ) - .unwrap(); - - assert_eq!( - *query( - &sheet, - vec![("f1", None, true, 0), ("f2", None, true, 0)], - "x" - ), - props(&[]) - ); - assert_eq!( - *query( - &sheet, - vec![("f1", None, true, 0), ("f2", None, true, 1)], - "x" - ), - props(&[("color", string("red"))]) - ); - assert_eq!( - *query( - &sheet, - vec![("f1", None, true, 1), ("f2", None, true, 1)], - "x" - ), - props(&[("color", string("green"))]) - ); - assert_eq!( - *query( - &sheet, - vec![("f1", None, true, 1), ("f2", None, true, 1)], - "a" - ), - props(&[("color", string("blue"))]) - ); - assert_eq!( - *query( - &sheet, - vec![("f1", None, true, 1), ("f2", None, true, 1)], - "ab" - ), - props(&[("color", string("violet"))]) - ); - } - - #[test] - fn test_property_sheet_with_css_function_calls() { - let sheet = generate_property_sheet( - "foo.css", - " - a { - b: f(); - c: f(g(h), i, \"j\", 10); - } - ", - &HashSet::new(), - ) - .unwrap(); - - let p = query_simple(&sheet, vec!["a"]); - - assert_eq!( - p["b"], - object(&[("name", string("f")), ("args", array(vec![])),]) - ); - - assert_eq!( - p["c"], - object(&[ - ("name", string("f")), - ( - "args", - array(vec![ - object(&[("name", string("g")), ("args", array(vec![string("h"),]))]), - string("i"), - string("j"), - num(10), - ]) - ), - ]) - ); - - // Handle differently-formatted calls - let sheet2 = generate_property_sheet( - "foo.css", - " - a { - b: f(); - c: f( - g(h), - i, - \"j\", - 10 - ); - } - ", - &HashSet::new(), - ) - .unwrap(); - - assert_eq!( - query_simple(&sheet2, vec!["a"])["c"], - query_simple(&sheet, vec!["a"])["c"] - ); - } - - #[test] - fn test_property_sheet_with_array_by_declaring_property_multiple_times() { - let sheet = generate_property_sheet( - "foo.css", - " - a { - b: 'foo'; - b: 'bar'; - b: 'baz'; - c: f(g()); - c: h(); - } - ", - &HashSet::new(), - ) - .unwrap(); - - let p = query_simple(&sheet, vec!["a"]); - - assert_eq!( - p["b"], - array(vec![string("foo"), string("bar"), string("baz"),]) - ); - - assert_eq!( - p["c"], - array(vec![ - object(&[ - ("name", string("f")), - ( - "args", - array(vec![object(&[ - ("name", string("g")), - ("args", array(vec![])), - ])]) - ) - ]), - object(&[("name", string("h")), ("args", array(vec![])),]), - ]), - ); - } - - #[test] - fn test_property_sheet_with_imports() { - let repo_dir = TempDir::new().unwrap(); - let properties_dir = repo_dir.path().join("properties"); - let dependency_properties_dir = repo_dir - .path() - .join("node_modules") - .join("the-dependency") - .join("properties"); - fs::create_dir_all(&properties_dir).unwrap(); - fs::create_dir_all(&dependency_properties_dir).unwrap(); - let sheet_path1 = properties_dir.join("sheet1.css"); - let sheet_path2 = properties_dir.join("sheet2.css"); - let dependency_sheet_path1 = dependency_properties_dir.join("dependency-sheet1.css"); - let dependency_sheet_path2 = dependency_properties_dir.join("dependency-sheet2.css"); - - fs::write( - sheet_path2, - r#" - a { x: '1'; } - "#, - ) - .unwrap(); - fs::write( - dependency_sheet_path1, - r#" - @import "./dependency-sheet2.css"; - a { y: '2'; } - "#, - ) - .unwrap(); - fs::write( - dependency_sheet_path2, - r#" - b { x: '3'; } - "#, - ) - .unwrap(); - let sheet = generate_property_sheet( - sheet_path1, - r#" - @import "./sheet2.css"; - @import "the-dependency/properties/dependency-sheet1.css"; - b { y: '4'; } - "#, - &HashSet::new(), - ) - .unwrap(); - - let a = query_simple(&sheet, vec!["a"]); - assert_eq!(a["x"], string("1"),); - assert_eq!(a["y"], string("2"),); - let b = query_simple(&sheet, vec!["b"]); - assert_eq!(b["x"], string("3"),); - assert_eq!(b["y"], string("4"),); - } - - fn query_simple<'a>( - sheet: &'a PropertySheetJSON, - node_stack: Vec<&'static str>, - ) -> &'a PropertySet { - query( - sheet, - node_stack.into_iter().map(|s| (s, None, true, 0)).collect(), - "", - ) - } - - fn query<'a>( - sheet: &'a PropertySheetJSON, - node_stack: Vec<(&'static str, Option<&'static str>, bool, usize)>, - leaf_text: &str, - ) -> &'a PropertySet { - let mut state_id = 0; - for (kind, field, is_named, child_index) in node_stack { - let state = &sheet.states[state_id]; - state_id = state - .transitions - .iter() - .chain(sheet.states[0].transitions.iter()) - .find(|transition| { - transition.kind.as_ref().map_or(true, |k| k == kind) - && transition.named.map_or(true, |n| n == is_named) - && transition.field.as_ref().map_or(true, |f| field == Some(f)) - && transition.index.map_or(true, |index| index == child_index) - && (transition - .text - .as_ref() - .map_or(true, |text| Regex::new(text).unwrap().is_match(leaf_text))) - }) - .map_or(state.default_next_state_id, |t| t.state_id); - } - &sheet.property_sets[sheet.states[state_id].property_set_id] - } - - fn array(s: Vec) -> PropertyValue { - PropertyValue::Array(s) - } - - fn object<'a>(s: &'a [(&'a str, PropertyValue)]) -> PropertyValue { - PropertyValue::Object( - s.into_iter() - .map(|(a, b)| (a.to_string(), b.clone())) - .collect(), - ) - } - - fn string(s: &str) -> PropertyValue { - PropertyValue::String(s.to_string()) - } - - fn num(n: isize) -> PropertyValue { - PropertyValue::Number(n) - } - - fn props<'a>(s: &'a [(&'a str, PropertyValue)]) -> PropertySet { - s.into_iter() - .map(|(a, b)| (a.to_string(), b.clone())) - .collect() - } -} diff --git a/cli/src/main.rs b/cli/src/main.rs index 832cd92c..8f5ac503 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -40,7 +40,6 @@ fn run() -> error::Result<()> { .arg(Arg::with_name("grammar-path").index(1)) .arg(Arg::with_name("log").long("log")) .arg(Arg::with_name("next-abi").long("next-abi")) - .arg(Arg::with_name("properties-only").long("properties")) .arg( Arg::with_name("report-states-for-rule") .long("report-states-for-rule") @@ -142,7 +141,6 @@ fn run() -> error::Result<()> { config.save(&home_dir)?; } else if let Some(matches) = matches.subcommand_matches("generate") { let grammar_path = matches.value_of("grammar-path"); - let properties_only = matches.is_present("properties-only"); let report_symbol_name = matches.value_of("report-states-for-rule").or_else(|| { if matches.is_present("report-states") { Some("") @@ -157,7 +155,6 @@ fn run() -> error::Result<()> { generate::generate_parser_in_directory( ¤t_dir, grammar_path, - properties_only, next_abi, report_symbol_name, )?; diff --git a/cli/src/tests/mod.rs b/cli/src/tests/mod.rs index 1a2a71ff..cc1d5967 100644 --- a/cli/src/tests/mod.rs +++ b/cli/src/tests/mod.rs @@ -3,6 +3,5 @@ mod helpers; mod highlight_test; mod node_test; mod parser_test; -mod properties_test; mod query_test; mod tree_test; diff --git a/cli/src/tests/properties_test.rs b/cli/src/tests/properties_test.rs deleted file mode 100644 index 51f0e820..00000000 --- a/cli/src/tests/properties_test.rs +++ /dev/null @@ -1,265 +0,0 @@ -use super::helpers::fixtures::get_language; -use crate::generate::properties; -use serde_derive::Deserialize; -use serde_json; - -use std::collections::HashSet; -use tree_sitter::{Parser, PropertySheet}; -#[derive(Debug, Default, Deserialize, PartialEq, Eq)] -struct Properties { - a: Option, - b: Option, -} - -#[test] -fn test_walk_with_properties_with_nth_child() { - let language = get_language("javascript"); - let property_sheet = PropertySheet::::new( - language, - &generate_property_sheet_string( - "/some/path.css", - " - binary_expression > identifier:nth-child(2) { - a: x; - } - - binary_expression > identifier { - a: y; - } - - identifier { - a: z; - } - ", - ), - ) - .unwrap(); - - let source_code = "a = b || c;"; - - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(source_code, None).unwrap(); - - let mut cursor = tree.walk_with_properties(&property_sheet, source_code.as_bytes()); - assert_eq!(cursor.node().kind(), "program"); - assert!(cursor.goto_first_child()); - assert_eq!(cursor.node().kind(), "expression_statement"); - assert!(cursor.goto_first_child()); - assert_eq!(cursor.node().kind(), "assignment_expression"); - - assert!(cursor.goto_first_child()); - assert_eq!(cursor.node().kind(), "identifier"); - assert_eq!( - *cursor.node_properties(), - Properties { - a: Some("z".to_string()), - b: None - } - ); - - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "="); - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "binary_expression"); - - assert!(cursor.goto_first_child()); - assert_eq!(cursor.node().kind(), "identifier"); - assert_eq!( - *cursor.node_properties(), - Properties { - a: Some("y".to_string()), - b: None - } - ); - - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "||"); - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "identifier"); - assert_eq!( - *cursor.node_properties(), - Properties { - a: Some("x".to_string()), - b: None - } - ); -} - -#[test] -fn test_walk_with_properties_with_regexes() { - let language = get_language("javascript"); - let property_sheet = PropertySheet::::new( - language, - &generate_property_sheet_string( - "/some/path.css", - " - identifier { - &[text='^[A-Z]'] { - a: y; - } - - &[text='^[A-Z_]+$'] { - a: z; - } - - a: x; - } - ", - ), - ) - .unwrap(); - - let source_code = "const ABC = Def(ghi);"; - - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(source_code, None).unwrap(); - - let mut cursor = tree.walk_with_properties(&property_sheet, source_code.as_bytes()); - assert_eq!(cursor.node().kind(), "program"); - assert!(cursor.goto_first_child()); - assert_eq!(cursor.node().kind(), "lexical_declaration"); - assert!(cursor.goto_first_child()); - assert_eq!(cursor.node().kind(), "const"); - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "variable_declarator"); - - // The later selector with a text regex overrides the earlier one. - assert!(cursor.goto_first_child()); - assert_eq!(cursor.node().kind(), "identifier"); - assert_eq!( - *cursor.node_properties(), - Properties { - a: Some("z".to_string()), - b: None - } - ); - - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "="); - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "call_expression"); - - // The selectors with text regexes override the selector without one. - assert!(cursor.goto_first_child()); - assert_eq!(cursor.node().kind(), "identifier"); - assert_eq!( - *cursor.node_properties(), - Properties { - a: Some("y".to_string()), - b: None - } - ); - - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "arguments"); - assert!(cursor.goto_first_child()); - assert_eq!(cursor.node().kind(), "("); - - // This node doesn't match either of the regexes. - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "identifier"); - assert_eq!( - *cursor.node_properties(), - Properties { - a: Some("x".to_string()), - b: None - } - ); -} - -#[test] -fn test_walk_with_properties_based_on_fields() { - let language = get_language("javascript"); - let property_sheet = PropertySheet::::new( - language, - &generate_property_sheet_string( - "/some/path.css", - " - arrow_function > .parameter { - a: x; - } - - function_declaration { - & > .parameters > identifier { - a: y; - } - - & > .name { - b: z; - } - } - - identifier { - a: w; - } - ", - ), - ) - .unwrap(); - - let source_code = "function a(b) { return c => c + b; }"; - - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(source_code, None).unwrap(); - let mut cursor = tree.walk_with_properties(&property_sheet, source_code.as_bytes()); - - assert!(cursor.goto_first_child()); - assert_eq!(cursor.node().kind(), "function_declaration"); - assert!(cursor.goto_first_child()); - assert_eq!(cursor.node().kind(), "function"); - assert_eq!(*cursor.node_properties(), Properties::default()); - - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "identifier"); - assert_eq!( - *cursor.node_properties(), - Properties { - a: Some("w".to_string()), - b: Some("z".to_string()) - } - ); - - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "formal_parameters"); - assert_eq!(*cursor.node_properties(), Properties::default()); - - assert!(cursor.goto_first_child()); - assert_eq!(cursor.node().kind(), "("); - assert_eq!(*cursor.node_properties(), Properties::default()); - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "identifier"); - assert_eq!( - *cursor.node_properties(), - Properties { - a: Some("y".to_string()), - b: None, - } - ); - - assert!(cursor.goto_parent()); - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "statement_block"); - assert!(cursor.goto_first_child()); - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "return_statement"); - assert!(cursor.goto_first_child()); - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "arrow_function"); - assert!(cursor.goto_first_child()); - assert_eq!(cursor.node().kind(), "identifier"); - assert_eq!( - *cursor.node_properties(), - Properties { - a: Some("x".to_string()), - b: None, - } - ); -} - -fn generate_property_sheet_string(path: &str, css: &str) -> String { - serde_json::to_string(&properties::generate_property_sheet(path, css, &HashSet::new()).unwrap()) - .unwrap() -} diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 491c7db2..1688dbd3 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -1,18 +1,14 @@ mod ffi; mod util; -#[macro_use] -extern crate serde_derive; extern crate regex; extern crate serde; +extern crate serde_derive; extern crate serde_json; #[cfg(unix)] use std::os::unix::io::AsRawFd; -use regex::Regex; -use serde::de::DeserializeOwned; -use std::collections::HashMap; use std::ffi::CStr; use std::marker::PhantomData; use std::mem::MaybeUninit; @@ -65,62 +61,6 @@ pub struct InputEdit { pub new_end_position: Point, } -struct PropertyTransition { - state_id: u16, - child_index: Option, - text_regex_index: Option, - node_kind_id: Option, -} - -struct PropertyState { - field_transitions: HashMap>, - kind_transitions: HashMap>, - property_set_id: usize, - default_next_state_id: usize, -} - -#[derive(Debug)] -pub enum PropertySheetError { - InvalidJSON(serde_json::Error), - InvalidRegex(regex::Error), -} - -pub struct PropertySheet

> { - states: Vec, - property_sets: Vec

, - text_regexes: Vec, -} - -#[derive(Clone, Debug, Deserialize, Serialize, Hash, PartialEq, Eq)] -pub struct PropertyTransitionJSON { - #[serde(rename = "type")] - #[serde(skip_serializing_if = "Option::is_none")] - pub kind: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub named: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub index: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub field: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub text: Option, - pub state_id: usize, -} - -#[derive(Debug, Default, Deserialize, Serialize, PartialEq, Eq)] -pub struct PropertyStateJSON { - pub id: Option, - pub property_set_id: usize, - pub transitions: Vec, - pub default_next_state_id: usize, -} - -#[derive(Debug, Deserialize, Serialize)] -pub struct PropertySheetJSON

{ - pub states: Vec, - pub property_sets: Vec

, -} - #[derive(Clone, Copy)] #[repr(transparent)] pub struct Node<'a>(ffi::TSNode, PhantomData<&'a ()>); @@ -131,14 +71,6 @@ pub struct Tree(NonNull); pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>); -pub struct TreePropertyCursor<'a, P> { - cursor: TreeCursor<'a>, - state_stack: Vec, - child_index_stack: Vec, - property_sheet: &'a PropertySheet

, - source: &'a [u8], -} - #[derive(Debug)] enum TextPredicate { CaptureEqString(u32, String), @@ -252,6 +184,7 @@ impl fmt::Display for LanguageError { } impl Parser { + /// Create a new parser. pub fn new() -> Parser { unsafe { let parser = ffi::ts_parser_new(); @@ -259,6 +192,14 @@ impl Parser { } } + /// Set the language that the parser should use for parsing. + /// + /// Returns a Result indicating whether or not the language was successfully + /// assigned. True means assignment succeeded. False means there was a version + /// mismatch: the language was generated with an incompatible version of the + /// Tree-sitter CLI. Check the language's version using `ts_language_version` + /// and compare it to this library's `TREE_SITTER_LANGUAGE_VERSION` and + /// `TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION` constants. pub fn set_language(&mut self, language: Language) -> Result<(), LanguageError> { let version = language.version(); if version < ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION @@ -554,14 +495,6 @@ impl Tree { self.root_node().walk() } - pub fn walk_with_properties<'a, P>( - &'a self, - property_sheet: &'a PropertySheet

, - source: &'a [u8], - ) -> TreePropertyCursor<'a, P> { - TreePropertyCursor::new(self, property_sheet, source) - } - pub fn changed_ranges(&self, other: &Tree) -> impl ExactSizeIterator { let mut count = 0; unsafe { @@ -858,125 +791,6 @@ impl<'a> Drop for TreeCursor<'a> { } } -impl<'a, P> TreePropertyCursor<'a, P> { - fn new(tree: &'a Tree, property_sheet: &'a PropertySheet

, source: &'a [u8]) -> Self { - let mut result = Self { - cursor: tree.root_node().walk(), - child_index_stack: vec![0], - state_stack: vec![0], - property_sheet, - source, - }; - let state = result.next_state(0); - result.state_stack.push(state); - result - } - - pub fn node(&self) -> Node<'a> { - self.cursor.node() - } - - pub fn node_properties(&self) -> &'a P { - &self.property_sheet.property_sets[self.current_state().property_set_id] - } - - pub fn goto_first_child(&mut self) -> bool { - if self.cursor.goto_first_child() { - let next_state_id = self.next_state(0); - self.state_stack.push(next_state_id); - self.child_index_stack.push(0); - true - } else { - false - } - } - - pub fn goto_next_sibling(&mut self) -> bool { - if self.cursor.goto_next_sibling() { - let child_index = self.child_index_stack.pop().unwrap() + 1; - self.state_stack.pop(); - let next_state_id = self.next_state(child_index); - self.state_stack.push(next_state_id); - self.child_index_stack.push(child_index); - true - } else { - false - } - } - - pub fn goto_parent(&mut self) -> bool { - if self.cursor.goto_parent() { - self.state_stack.pop(); - self.child_index_stack.pop(); - true - } else { - false - } - } - - pub fn source(&self) -> &'a [u8] { - &self.source - } - - fn next_state(&self, node_child_index: usize) -> usize { - let current_state = self.current_state(); - let default_state = self.default_state(); - - for state in [current_state, default_state].iter() { - let node_field_id = self.cursor.field_id(); - let node_kind_id = self.cursor.node().kind_id(); - let transitions = node_field_id - .and_then(|field_id| state.field_transitions.get(&field_id)) - .or_else(|| state.kind_transitions.get(&node_kind_id)); - - if let Some(transitions) = transitions { - for transition in transitions.iter() { - if transition - .node_kind_id - .map_or(false, |id| id != node_kind_id) - { - continue; - } - - if let Some(text_regex_index) = transition.text_regex_index { - let node = self.cursor.node(); - let text = &self.source[node.start_byte()..node.end_byte()]; - if let Ok(text) = str::from_utf8(text) { - if !self.property_sheet.text_regexes[text_regex_index as usize] - .is_match(text) - { - continue; - } - } - } - - if let Some(child_index) = transition.child_index { - if child_index != node_child_index as u16 { - continue; - } - } - - return transition.state_id as usize; - } - } - - if current_state as *const PropertyState == default_state as *const PropertyState { - break; - } - } - - current_state.default_next_state_id - } - - fn current_state(&self) -> &PropertyState { - &self.property_sheet.states[*self.state_stack.last().unwrap()] - } - - fn default_state(&self) -> &PropertyState { - &self.property_sheet.states.first().unwrap() - } -} - impl Query { pub fn new(language: Language, source: &str) -> Result { let mut error_offset = 0u32; @@ -1513,154 +1327,6 @@ impl<'a> Into for &'a InputEdit { } } -impl

PropertySheet

{ - pub fn new(language: Language, json: &str) -> Result - where - P: DeserializeOwned, - { - let input: PropertySheetJSON

= - serde_json::from_str(json).map_err(PropertySheetError::InvalidJSON)?; - let mut states = Vec::new(); - let mut text_regexes = Vec::new(); - let mut text_regex_patterns = Vec::new(); - - for state in input.states.iter() { - let node_kind_count = language.node_kind_count(); - let mut kind_transitions = HashMap::new(); - let mut field_transitions = HashMap::new(); - - for transition in state.transitions.iter() { - let field_id = transition - .field - .as_ref() - .and_then(|field| language.field_id_for_name(&field)); - if let Some(field_id) = field_id { - field_transitions.entry(field_id).or_insert(Vec::new()); - } - } - - for transition in state.transitions.iter() { - let text_regex_index = if let Some(regex_pattern) = transition.text.as_ref() { - if let Some(index) = - text_regex_patterns.iter().position(|r| *r == regex_pattern) - { - Some(index as u16) - } else { - text_regex_patterns.push(regex_pattern); - text_regexes.push( - Regex::new(®ex_pattern).map_err(PropertySheetError::InvalidRegex)?, - ); - Some(text_regexes.len() as u16 - 1) - } - } else { - None - }; - - let state_id = transition.state_id as u16; - let child_index = transition.index.map(|i| i as u16); - let field_id = transition - .field - .as_ref() - .and_then(|field| language.field_id_for_name(&field)); - - if let Some(kind) = transition.kind.as_ref() { - for kind_id in 0..(node_kind_count as u16) { - if kind != language.node_kind_for_id(kind_id) - || transition.named != Some(language.node_kind_is_named(kind_id)) - { - continue; - } - - if let Some(field_id) = field_id { - field_transitions - .entry(field_id) - .or_insert(Vec::new()) - .push(PropertyTransition { - node_kind_id: Some(kind_id), - state_id, - child_index, - text_regex_index, - }); - } else { - for (_, entries) in field_transitions.iter_mut() { - entries.push(PropertyTransition { - node_kind_id: Some(kind_id), - state_id, - child_index, - text_regex_index, - }); - } - - kind_transitions.entry(kind_id).or_insert(Vec::new()).push( - PropertyTransition { - node_kind_id: None, - state_id, - child_index, - text_regex_index, - }, - ); - } - } - } else if let Some(field_id) = field_id { - field_transitions - .entry(field_id) - .or_insert(Vec::new()) - .push(PropertyTransition { - node_kind_id: None, - state_id, - child_index, - text_regex_index, - }); - } - } - states.push(PropertyState { - field_transitions, - kind_transitions, - default_next_state_id: state.default_next_state_id, - property_set_id: state.property_set_id, - }); - } - Ok(Self { - property_sets: input.property_sets, - states, - text_regexes, - }) - } - - pub fn map(self, mut f: F) -> Result, E> - where - F: FnMut(P) -> Result, - { - let mut property_sets = Vec::with_capacity(self.property_sets.len()); - for set in self.property_sets { - property_sets.push(f(set)?); - } - Ok(PropertySheet { - states: self.states, - text_regexes: self.text_regexes, - property_sets, - }) - } -} - -impl fmt::Display for PropertySheetError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - PropertySheetError::InvalidJSON(e) => write!(f, "Invalid JSON: {}", e), - PropertySheetError::InvalidRegex(e) => write!(f, "Invalid Regex: {}", e), - } - } -} - -impl std::error::Error for PropertySheetError { - fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { - match self { - PropertySheetError::InvalidJSON(e) => Some(e), - PropertySheetError::InvalidRegex(e) => Some(e), - } - } -} - unsafe impl Send for Language {} unsafe impl Send for Parser {} unsafe impl Send for Query {} From 084406148b78d0292abab800e03b0bcee6fb5875 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 17 Oct 2019 12:03:34 -0700 Subject: [PATCH 14/14] Tweak highlight name matching semantics, add doc comments --- cli/src/highlight.rs | 2 +- cli/src/tests/highlight_test.rs | 6 +- highlight/src/lib.rs | 112 ++++++++++++++++++++++---------- 3 files changed, 82 insertions(+), 38 deletions(-) diff --git a/cli/src/highlight.rs b/cli/src/highlight.rs index b827dde4..6a174e9d 100644 --- a/cli/src/highlight.rs +++ b/cli/src/highlight.rs @@ -98,7 +98,7 @@ impl Serialize for Theme { S: Serializer, { let mut map = serializer.serialize_map(Some(self.styles.len()))?; - for (name, style) in self.highlighter.highlight_names.iter().zip(&self.styles) { + for (name, style) in self.highlighter.names().iter().zip(&self.styles) { let style = &style.ansi; let color = style.foreground.map(|color| match color { Color::Black => json!("black"), diff --git a/cli/src/tests/highlight_test.rs b/cli/src/tests/highlight_test.rs index a5579c65..d7734398 100644 --- a/cli/src/tests/highlight_test.rs +++ b/cli/src/tests/highlight_test.rs @@ -46,7 +46,7 @@ lazy_static! { .collect() ); static ref HTML_ATTRS: Vec = HIGHLIGHTER - .highlight_names + .names() .iter() .map(|s| format!("class={}", s)) .collect(); @@ -598,9 +598,7 @@ fn to_token_vector<'a>( )?; for event in events { match event? { - HighlightEvent::HighlightStart(s) => { - highlights.push(HIGHLIGHTER.highlight_names[s.0].as_str()) - } + HighlightEvent::HighlightStart(s) => highlights.push(HIGHLIGHTER.names()[s.0].as_str()), HighlightEvent::HighlightEnd => { highlights.pop(); } diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs index ec2cdafb..9268a928 100644 --- a/highlight/src/lib.rs +++ b/highlight/src/lib.rs @@ -10,9 +10,11 @@ use tree_sitter::{ const CANCELLATION_CHECK_INTERVAL: usize = 100; +/// Indicates which highlight should be applied to a region of source code. #[derive(Copy, Clone, Debug)] pub struct Highlight(pub usize); +/// Represents the reason why syntax highlighting failed. #[derive(Debug, PartialEq, Eq)] pub enum Error { Cancelled, @@ -20,13 +22,7 @@ pub enum Error { Unknown, } -#[derive(Debug)] -struct LocalScope<'a> { - inherits: bool, - range: ops::Range, - local_defs: Vec<(&'a str, Option)>, -} - +/// Represents a single step in rendering a syntax-highlighted document. #[derive(Copy, Clone, Debug)] pub enum HighlightEvent { Source { start: usize, end: usize }, @@ -34,6 +30,9 @@ pub enum HighlightEvent { HighlightEnd, } +/// Contains the data neeeded to higlight code written in a particular language. +/// +/// This struct is immutable and can be shared between threads. pub struct HighlightConfiguration { pub language: Language, pub query: Query, @@ -50,16 +49,48 @@ pub struct HighlightConfiguration { local_ref_capture_index: Option, } +/// Performs syntax highlighting, recognizing a given list of highlight names. +/// +/// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated +/// highlight names like `punctuation.bracket` and `function.method.builtin`. Consumers of +/// these queries can choose to recognize highlights with different levels of specificity. +/// For example, the string `function.builtin` will match against `function.method.builtin` +/// and `function.builtin.constructor`, but will not match `function.method`. +/// +/// The `Highlight` struct is instantiated with an ordered list of recognized highlight names +/// and is then used for loading highlight queries and performing syntax highlighting. +/// Highlighting results are returned as `Highlight` values, which contain the index of the +/// matched highlight this list of highlight names. +/// +/// The `Highlighter` struct is immutable and can be shared between threads. #[derive(Clone, Debug)] pub struct Highlighter { - pub highlight_names: Vec, + highlight_names: Vec, } +/// Carries the mutable state required for syntax highlighting. +/// +/// For the best performance `HighlightContext` values should be reused between +/// syntax highlighting calls. A separate context is needed for each thread that +/// is performing highlighting. pub struct HighlightContext { parser: Parser, cursors: Vec, } +/// Converts a general-purpose syntax highlighting iterator into a sequence of lines of HTML. +pub struct HtmlRenderer { + pub html: Vec, + pub line_offsets: Vec, +} + +#[derive(Debug)] +struct LocalScope<'a> { + inherits: bool, + range: ops::Range, + local_defs: Vec<(&'a str, Option)>, +} + struct HighlightIter<'a, F> where F: Fn(&str) -> Option<&'a HighlightConfiguration> + 'a, @@ -97,10 +128,30 @@ impl HighlightContext { } impl Highlighter { + /// Creates a highlighter with a given list of recognized highlight names. pub fn new(highlight_names: Vec) -> Self { Highlighter { highlight_names } } + /// Returns the list of highlight names with which this Highlighter was constructed. + pub fn names(&self) -> &[String] { + &self.highlight_names + } + + /// Creates a `HighlightConfiguration` for a given `Language` and set of highlighting + /// queries. + /// + /// # Parameters + /// + /// * `language` - The Tree-sitter `Language` that should be used for parsing. + /// * `highlights_query` - A string containing tree patterns for syntax highlighting. This + /// should be non-empty, otherwise no syntax highlights will be added. + /// * `injections_query` - A string containing tree patterns for injecting other languages + /// into the document. This can be empty if no injections are desired. + /// * `locals_query` - A string containing tree patterns for tracking local variable + /// definitions and references. This can be empty if local variable tracking is not needed. + /// + /// Returns a `HighlightConfiguration` that can then be used with the `highlight` method. pub fn load_configuration( &self, language: Language, @@ -141,33 +192,32 @@ impl Highlighter { } } + let mut capture_parts = Vec::new(); + // Compute a mapping from the query's capture ids to the indices of the highlighter's // recognized highlight names. let highlight_indices = query .capture_names() .iter() .map(move |capture_name| { - let mut best_index = None; - let mut best_name_len = 0; - let mut best_common_prefix_len = 0; - for (i, highlight_name) in self.highlight_names.iter().enumerate() { - if highlight_name.len() > capture_name.len() { - continue; - } + capture_parts.clear(); + capture_parts.extend(capture_name.split('.')); - let capture_parts = capture_name.split('.'); - let highlight_parts = highlight_name.split('.'); - let common_prefix_len = capture_parts - .zip(highlight_parts) - .take_while(|(a, b)| a == b) - .count(); - let is_best_match = common_prefix_len > best_common_prefix_len - || (common_prefix_len == best_common_prefix_len - && highlight_name.len() < best_name_len); - if is_best_match { + let mut best_index = None; + let mut best_match_len = 0; + for (i, highlight_name) in self.highlight_names.iter().enumerate() { + let mut len = 0; + let mut matches = true; + for part in highlight_name.split('.') { + len += 1; + if !capture_parts.contains(&part) { + matches = false; + break; + } + } + if matches && len > best_match_len { best_index = Some(i); - best_name_len = highlight_name.len(); - best_common_prefix_len = common_prefix_len; + best_match_len = len; } } best_index.map(Highlight) @@ -219,6 +269,7 @@ impl Highlighter { }) } + /// Iterate over the highlighted regions for a given slice of source code. pub fn highlight<'a>( &'a self, context: &'a mut HighlightContext, @@ -398,7 +449,7 @@ impl<'a> HighlightIterLayer<'a> { // First, sort scope boundaries by their byte offset in the document. At a // given position, emit scope endings before scope beginnings. Finally, emit - // scope boundaries from outer layers first. + // scope boundaries from deeper layers first. fn sort_key(&mut self) -> Option<(usize, bool, isize)> { let depth = -(self.depth as isize); let next_start = self @@ -805,11 +856,6 @@ where } } -pub struct HtmlRenderer { - pub html: Vec, - pub line_offsets: Vec, -} - impl HtmlRenderer { pub fn new() -> Self { HtmlRenderer {