diff --git a/Cargo.lock b/Cargo.lock index 9ca3a70a..50058336 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -210,6 +210,14 @@ dependencies = [ "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "lock_api" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "log" version = "0.4.6" @@ -263,6 +271,35 @@ name = "num-traits" version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "once_cell" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "parking_lot 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "parking_lot" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "lock_api 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "parking_lot_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "parking_lot_core" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", + "smallvec 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "proc-macro2" version = "0.4.24" @@ -502,6 +539,14 @@ name = "smallbitvec" version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "smallvec" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "spin" version = "0.5.0" @@ -583,6 +628,7 @@ dependencies = [ "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", + "once_cell 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", "rand 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", @@ -593,6 +639,18 @@ dependencies = [ "smallbitvec 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "tree-sitter 0.3.8", + "tree-sitter-highlight 0.1.0", +] + +[[package]] +name = "tree-sitter-highlight" +version = "0.1.0" +dependencies = [ + "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)", + "tree-sitter 0.3.8", ] [[package]] @@ -610,6 +668,14 @@ name = "unicode-xid" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "unreachable" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "utf8-ranges" version = "1.0.2" @@ -625,6 +691,11 @@ name = "version_check" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "void" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "winapi" version = "0.3.6" @@ -673,6 +744,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a374c89b9db55895453a74c1e38861d9deec0b01b405a82516e9d5de4820dea1" "checksum libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)" = "10923947f84a519a45c8fefb7dd1b3e8c08747993381adee176d7a82b4195311" "checksum libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3ad660d7cb8c5822cd83d10897b0f1f1526792737a179e73896152f85b88c2" +"checksum lock_api 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "62ebf1391f6acad60e5c8b43706dde4582df75c06698ab44511d15016bc2442c" "checksum log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6" "checksum memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0a3eb002f0535929f1199681417029ebea04aadc0c7a4224b46be99c7f5d6a16" "checksum nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945" @@ -680,6 +752,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)" = "e83d528d2677f0518c570baf2b7abdcf0cd2d248860b68507bdcb3e91d4c0cea" "checksum num-rational 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4e96f040177bb3da242b5b1ecf3f54b5d5af3efbbfb18608977a5d2767b22f10" "checksum num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0b3a5d7cc97d6d30d8b9bc8fa19bf45349ffe46241e8816f50f62f6d6aaabee1" +"checksum once_cell 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "532c29a261168a45ce28948f9537ddd7a5dd272cc513b3017b1e82a88f962c37" +"checksum parking_lot 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ab41b4aed082705d1056416ae4468b6ea99d52599ecf3169b00088d43113e337" +"checksum parking_lot_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "94c8c7923936b28d546dfd14d4472eaf34c99b14e1c973a32b3e6d4eb04298c9" "checksum proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)" = "77619697826f31a02ae974457af0b29b723e5619e113e9397b8b82c6bd253f09" "checksum quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)" = "53fa22a1994bd0f9372d7a816207d8a2677ad0325b073f5c5332760f0fb62b5c" "checksum rand 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8356f47b32624fef5b3301c1be97e5944ecdd595409cc5da11d05f211db6cfbd" @@ -709,6 +784,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)" = "225de307c6302bec3898c51ca302fc94a7a1697ef0845fcee6448f33c032249c" "checksum serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)" = "c37ccd6be3ed1fdf419ee848f7c758eb31b054d7cd3ae3600e3bae0adf569811" "checksum smallbitvec 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1764fe2b30ee783bfe3b9b37b2649d8d590b3148bb12e0079715d4d5c673562e" +"checksum smallvec 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)" = "88aea073965ab29f6edb5493faf96ad662fb18aa9eeb186a3b7057951605ed15" "checksum spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "44363f6f51401c34e7be73db0db371c04705d35efbe9f7d6082e03a921a32c55" "checksum strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550" "checksum syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)" = "ae8b29eb5210bc5cf63ed6149cbf9adfc82ac0be023d8735c176ee74a2db4da7" @@ -719,9 +795,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86" "checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" +"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" "checksum utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737" "checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" "checksum version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd" +"checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" "checksum winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "92c1eb33641e276cfa214a0522acad57be5c56b10cb348b3c5117db75f3ac4b0" "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 54d0eb5e..242ed72b 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -27,6 +27,7 @@ clap = "2.32" dirs = "1.0.2" hashbrown = "0.1" libloading = "0.5" +once_cell = "0.1.8" serde = "1.0" serde_derive = "1.0" regex-syntax = "0.6.4" @@ -37,6 +38,10 @@ rsass = "0.9" version = ">= 0.3.7" path = "../lib" +[dependencies.tree-sitter-highlight] +version = ">= 0.1.0" +path = "../highlight" + [dependencies.serde_json] version = "1.0" features = ["preserve_order"] diff --git a/cli/src/error.rs b/cli/src/error.rs index 4769b481..b0e52797 100644 --- a/cli/src/error.rs +++ b/cli/src/error.rs @@ -1,4 +1,5 @@ use std::io; +use tree_sitter_highlight::PropertySheetError; #[derive(Debug)] pub struct Error(pub String); @@ -42,3 +43,13 @@ impl From for Error { Error(error) } } + +impl From for Error { + fn from(error: PropertySheetError) -> Self { + match error { + PropertySheetError::InvalidFormat(e) => Self::from(e), + PropertySheetError::InvalidRegex(e) => Self::regex(&e.to_string()), + PropertySheetError::InvalidJSON(e) => Self::from(e), + } + } +} diff --git a/cli/src/highlight.rs b/cli/src/highlight.rs new file mode 100644 index 00000000..1651b98d --- /dev/null +++ b/cli/src/highlight.rs @@ -0,0 +1,272 @@ +use crate::error::Result; +use crate::loader::Loader; +use ansi_term::{Color, Style}; +use lazy_static::lazy_static; +use serde_json::Value; +use std::collections::HashMap; +use std::{fmt, fs, io, mem, path}; +use tree_sitter::{Language, PropertySheet}; +use tree_sitter_highlight::{highlight, highlight_html, HighlightEvent, Properties, Scope}; + +lazy_static! { + static ref CSS_STYLES_BY_COLOR_ID: Vec = + serde_json::from_str(include_str!("../vendor/xterm-colors.json")).unwrap(); +} + +pub struct Theme { + ansi_styles: Vec>, + css_styles: Vec>, +} + +impl Theme { + pub fn load(path: &path::Path) -> io::Result { + let json = fs::read_to_string(path)?; + Ok(Self::new(&json)) + } + + pub fn new(json: &str) -> Self { + let mut ansi_styles = vec![None; 30]; + let mut css_styles = vec![None; 30]; + if let Ok(colors) = serde_json::from_str::>(json) { + for (scope, style_value) in colors { + let mut style = Style::default(); + parse_style(&mut style, style_value); + ansi_styles[scope as usize] = Some(style); + css_styles[scope as usize] = Some(style_to_css(style)); + } + } + Self { + ansi_styles, + css_styles, + } + } + + fn ansi_style(&self, scope: Scope) -> Option<&Style> { + self.ansi_styles[scope as usize].as_ref() + } + + fn css_style(&self, scope: Scope) -> Option<&str> { + self.css_styles[scope as usize].as_ref().map(|s| s.as_str()) + } +} + +impl Default for Theme { + fn default() -> Self { + Theme::new( + r#" + { + "attribute": {"color": 124, "italic": true}, + "comment": {"color": 245, "italic": true}, + "constant.builtin": {"color": 94, "bold": true}, + "constant": 94, + "constructor": 136, + "embedded": null, + "function.builtin": {"color": 26, "bold": true}, + "function": 26, + "keyword": 56, + "number": {"color": 94, "bold": true}, + "property": 124, + "operator": {"color": 239, "bold": true}, + "punctuation.bracket": 239, + "punctuation.delimiter": 239, + "string.special": 30, + "string": 28, + "tag": {"color": 18}, + "variable.builtin": {"bold": true} + } + "#, + ) + } +} + +impl fmt::Debug for Theme { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{{")?; + let mut first = true; + for (i, style) in self.ansi_styles.iter().enumerate() { + if let Some(style) = style { + let scope = Scope::from_usize(i).unwrap(); + if !first { + write!(f, ", ")?; + } + write!(f, "{:?}: {:?}", scope, style)?; + first = false; + } + } + write!(f, "}}")?; + Ok(()) + } +} + +fn parse_style(style: &mut Style, json: Value) { + if let Value::Object(entries) = json { + for (property_name, value) in entries { + match property_name.as_str() { + "italic" => *style = style.italic(), + "bold" => *style = style.bold(), + "dimmed" => *style = style.dimmed(), + "underline" => *style = style.underline(), + "color" => { + if let Some(color) = parse_color(value) { + *style = style.fg(color); + } + } + _ => {} + } + } + } else if let Some(color) = parse_color(json) { + *style = style.fg(color); + } +} + +fn parse_color(json: Value) -> Option { + match json { + Value::Number(n) => match n.as_u64() { + Some(n) => Some(Color::Fixed(n as u8)), + _ => None, + }, + Value::String(s) => match s.to_lowercase().as_str() { + "blue" => Some(Color::Blue), + "cyan" => Some(Color::Cyan), + "green" => Some(Color::Green), + "purple" => Some(Color::Purple), + "red" => Some(Color::Red), + "white" => Some(Color::White), + "yellow" => Some(Color::Yellow), + s => { + if s.starts_with("#") && s.len() >= 7 { + if let (Ok(red), Ok(green), Ok(blue)) = ( + u8::from_str_radix(&s[1..3], 16), + u8::from_str_radix(&s[3..5], 16), + u8::from_str_radix(&s[5..7], 16), + ) { + Some(Color::RGB(red, green, blue)) + } else { + None + } + } else { + None + } + } + }, + _ => None, + } +} + +fn style_to_css(style: Style) -> String { + use std::fmt::Write; + let mut result = "style='".to_string(); + if style.is_bold { + write!(&mut result, "font-weight: bold;").unwrap(); + } + if style.is_italic { + write!(&mut result, "font-style: italic;").unwrap(); + } + if let Some(color) = style.foreground { + write!(&mut result, "color: {};", color_to_css(color)).unwrap(); + } + result.push('\''); + result +} + +fn color_to_css(color: Color) -> &'static str { + match color { + Color::Black => "black", + Color::Blue => "blue", + Color::Red => "red", + Color::Green => "green", + Color::Yellow => "yellow", + Color::Cyan => "cyan", + Color::Purple => "purple", + Color::White => "white", + Color::Fixed(n) => CSS_STYLES_BY_COLOR_ID[n as usize].as_str(), + _ => panic!("Unsupported color type"), + } +} + +pub fn ansi( + loader: &Loader, + theme: &Theme, + source: &[u8], + language: Language, + property_sheet: &PropertySheet, +) -> Result<()> { + use std::io::Write; + let stdout = io::stdout(); + let mut stdout = stdout.lock(); + let mut scope_stack = Vec::new(); + for event in highlight(loader, source, language, property_sheet)? { + match event { + HighlightEvent::Source(s) => { + if let Some(style) = scope_stack.last().and_then(|s| theme.ansi_style(*s)) { + write!(&mut stdout, "{}", style.paint(s))?; + } else { + write!(&mut stdout, "{}", s)?; + } + } + HighlightEvent::ScopeStart(s) => { + scope_stack.push(s); + } + HighlightEvent::ScopeEnd(_) => { + scope_stack.pop(); + } + } + } + Ok(()) +} + +pub const HTML_HEADER: &'static str = " + + + Tree-sitter Highlighting + + + +"; + +pub const HTML_FOOTER: &'static str = " + +"; + +pub fn html( + loader: &Loader, + theme: &Theme, + source: &[u8], + language: Language, + property_sheet: &PropertySheet, +) -> Result<()> { + use std::io::Write; + let stdout = io::stdout(); + let mut stdout = stdout.lock(); + write!(&mut stdout, "\n")?; + let lines = highlight_html(loader, source, language, property_sheet, |scope| { + if let Some(css_style) = theme.css_style(scope) { + css_style + } else { + "" + } + })?; + for (i, line) in lines.into_iter().enumerate() { + write!( + &mut stdout, + "\n", + i + 1, + line + )?; + } + write!(&mut stdout, "
{}{}
\n")?; + Ok(()) +} diff --git a/cli/src/lib.rs b/cli/src/lib.rs index 3a15b457..0ece9cac 100644 --- a/cli/src/lib.rs +++ b/cli/src/lib.rs @@ -1,5 +1,6 @@ pub mod error; pub mod generate; +pub mod highlight; pub mod loader; pub mod logger; pub mod parse; diff --git a/cli/src/loader.rs b/cli/src/loader.rs index 5c2a19a7..d19acf46 100644 --- a/cli/src/loader.rs +++ b/cli/src/loader.rs @@ -1,5 +1,6 @@ use super::error::{Error, Result}; use libloading::{Library, Symbol}; +use once_cell::unsync::OnceCell; use regex::{Regex, RegexBuilder}; use serde_derive::Deserialize; use std::collections::HashMap; @@ -9,6 +10,7 @@ use std::process::Command; use std::time::SystemTime; use std::{fs, mem}; use tree_sitter::{Language, PropertySheet}; +use tree_sitter_highlight::{load_property_sheet, LanguageRegistry, Properties}; #[cfg(unix)] const DYLIB_EXTENSION: &'static str = "so"; @@ -20,16 +22,18 @@ const BUILD_TARGET: &'static str = env!("BUILD_TARGET"); struct LanguageRepo { path: PathBuf, - language: Option, + language: OnceCell, configurations: Vec, } pub struct LanguageConfiguration { - _name: String, + pub name: String, _content_regex: Option, _first_line_regex: Option, + injection_regex: Option, file_types: Vec, - _highlight_property_sheet: Option>, + highlight_property_sheet_path: Option, + highlight_property_sheet: OnceCell>>, } pub struct Loader { @@ -76,7 +80,7 @@ impl Loader { } pub fn language_configuration_for_file_name( - &mut self, + &self, path: &Path, ) -> Result> { let ids = path @@ -100,20 +104,43 @@ impl Loader { Ok(None) } + pub fn language_configuration_for_injection_string( + &self, + string: &str, + ) -> Result> { + let mut best_match_length = 0; + let mut best_match_position = None; + for (i, repo) in self.language_repos.iter().enumerate() { + for (j, configuration) in repo.configurations.iter().enumerate() { + if let Some(injection_regex) = &configuration.injection_regex { + if let Some(mat) = injection_regex.find(string) { + let length = mat.end() - mat.start(); + if length > best_match_length { + best_match_position = Some((i, j)); + best_match_length = length; + } + } + } + } + } + if let Some((i, j)) = best_match_position { + let (language, configurations) = self.language_configuration_for_id(i)?; + Ok(Some((language, &configurations[j]))) + } else { + Ok(None) + } + } + fn language_configuration_for_id( - &mut self, + &self, id: usize, ) -> Result<(Language, &Vec)> { let repo = &self.language_repos[id]; - let language = if let Some(language) = repo.language { - language - } else { + let language = repo.language.get_or_try_init(|| { let src_path = repo.path.join("src"); - let language = self.load_language_at_path(&src_path, &src_path)?; - self.language_repos[id].language = Some(language); - language - }; - Ok((language, &self.language_repos[id].configurations)) + self.load_language_at_path(&src_path, &src_path) + })?; + Ok((*language, &self.language_repos[id].configurations)) } pub fn load_language_at_path(&self, src_path: &Path, header_path: &Path) -> Result { @@ -238,6 +265,8 @@ impl Loader { content_regex: Option, #[serde(rename = "first-line-regex")] first_line_regex: Option, + #[serde(rename = "injection-regex")] + injection_regex: Option, highlights: Option, } @@ -255,7 +284,7 @@ impl Loader { configurations .into_iter() .map(|conf| LanguageConfiguration { - _name: conf.name, + name: conf.name, file_types: conf.file_types.unwrap_or(Vec::new()), _content_regex: conf .content_regex @@ -263,7 +292,11 @@ impl Loader { _first_line_regex: conf .first_line_regex .and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()), - _highlight_property_sheet: conf.highlights.map(|d| Err(d.into())), + injection_regex: conf + .injection_regex + .and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()), + highlight_property_sheet_path: conf.highlights.map(|h| parser_path.join(h)), + highlight_property_sheet: OnceCell::new(), }) .collect() }); @@ -279,7 +312,7 @@ impl Loader { self.language_repos.push(LanguageRepo { path: parser_path.to_owned(), - language: None, + language: OnceCell::new(), configurations, }); @@ -287,6 +320,56 @@ impl Loader { } } +impl LanguageRegistry for Loader { + fn language_for_injection_string<'a>( + &'a self, + string: &str, + ) -> Option<(Language, &'a PropertySheet)> { + match self.language_configuration_for_injection_string(string) { + Err(message) => { + eprintln!( + "Failed to load language for injection string '{}': {}", + string, message.0 + ); + None + } + Ok(None) => None, + Ok(Some((language, configuration))) => { + match configuration.highlight_property_sheet(language) { + Err(message) => { + eprintln!( + "Failed to load property sheet for injection string '{}': {}", + string, message.0 + ); + None + } + Ok(None) => None, + Ok(Some(sheet)) => Some((language, sheet)), + } + } + } + } +} + +impl LanguageConfiguration { + pub fn highlight_property_sheet( + &self, + language: Language, + ) -> Result>> { + self.highlight_property_sheet + .get_or_try_init(|| { + if let Some(path) = &self.highlight_property_sheet_path { + let sheet_json = fs::read_to_string(path)?; + let sheet = load_property_sheet(language, &sheet_json)?; + Ok(Some(sheet)) + } else { + Ok(None) + } + }) + .map(Option::as_ref) + } +} + fn needs_recompile( lib_path: &Path, parser_c_path: &Path, diff --git a/cli/src/main.rs b/cli/src/main.rs index eb848831..9cd4e131 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -4,8 +4,7 @@ use std::fs; use std::path::Path; use std::process::exit; use std::usize; -use tree_sitter_cli::loader::Loader; -use tree_sitter_cli::{error, generate, logger, parse, properties, test}; +use tree_sitter_cli::{error, generate, highlight, loader, logger, parse, properties, test}; fn main() { if let Err(e) = run() { @@ -64,14 +63,30 @@ fn run() -> error::Result<()> { .arg(Arg::with_name("debug").long("debug").short("d")) .arg(Arg::with_name("debug-graph").long("debug-graph").short("D")), ) + .subcommand( + SubCommand::with_name("highlight") + .about("Highlight a file") + .arg( + Arg::with_name("path") + .index(1) + .multiple(true) + .required(true), + ) + .arg(Arg::with_name("html").long("html").short("h")), + ) .get_matches(); let home_dir = dirs::home_dir().unwrap(); let current_dir = env::current_dir().unwrap(); let config_dir = home_dir.join(".tree-sitter"); + let theme_path = config_dir.join("theme.json"); + let parsers_dir = config_dir.join("parsers"); - fs::create_dir_all(&config_dir).unwrap(); - let mut loader = Loader::new(config_dir); + // TODO - make configurable + let parser_repo_paths = vec![home_dir.join("github")]; + + fs::create_dir_all(&parsers_dir).unwrap(); + let mut loader = loader::Loader::new(config_dir); if let Some(matches) = matches.subcommand_matches("generate") { if matches.is_present("log") { @@ -111,7 +126,7 @@ fn run() -> error::Result<()> { let debug_graph = matches.is_present("debug-graph"); let quiet = matches.is_present("quiet"); let time = matches.is_present("time"); - loader.find_all_languages(&vec![home_dir.join("github")])?; + loader.find_all_languages(&parser_repo_paths)?; let paths = matches .values_of("path") .unwrap() @@ -144,6 +159,29 @@ fn run() -> error::Result<()> { if has_error { return Err(error::Error(String::new())); } + } else if let Some(matches) = matches.subcommand_matches("highlight") { + loader.find_all_languages(&parser_repo_paths)?; + let theme = highlight::Theme::load(&theme_path).unwrap_or_default(); + let paths = matches.values_of("path").unwrap().into_iter(); + let html_mode = matches.is_present("html"); + + if html_mode { + println!("{}", highlight::HTML_HEADER); + } + + for path in paths { + let path = Path::new(path); + if let Some((language, config)) = loader.language_configuration_for_file_name(path)? { + if let Some(sheet) = config.highlight_property_sheet(language)? { + let source = fs::read(path)?; + if html_mode { + highlight::html(&loader, &theme, &source, language, sheet)?; + } else { + highlight::ansi(&loader, &theme, &source, language, sheet)?; + } + } + } + } } Ok(()) diff --git a/cli/src/tests/helpers/fixtures.rs b/cli/src/tests/helpers/fixtures.rs index 8fc00038..e7ba2e55 100644 --- a/cli/src/tests/helpers/fixtures.rs +++ b/cli/src/tests/helpers/fixtures.rs @@ -2,7 +2,8 @@ use crate::loader::Loader; use lazy_static::lazy_static; use std::fs; use std::path::{Path, PathBuf}; -use tree_sitter::Language; +use tree_sitter::{Language, PropertySheet}; +use tree_sitter_highlight::{load_property_sheet, Properties}; include!("./dirs.rs"); @@ -20,6 +21,16 @@ pub fn get_language(name: &str) -> Language { .unwrap() } +pub fn get_property_sheet(language_name: &str, sheet_name: &str) -> PropertySheet { + let path = GRAMMARS_DIR + .join(language_name) + .join("src") + .join(sheet_name); + let json = fs::read_to_string(path).unwrap(); + let language = get_language(language_name); + load_property_sheet(language, &json).unwrap() +} + pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> Language { let parser_c_path = SCRATCH_DIR.join(&format!("{}-parser.c", name)); if !fs::read_to_string(&parser_c_path) diff --git a/cli/src/tests/highlight_test.rs b/cli/src/tests/highlight_test.rs new file mode 100644 index 00000000..ea14a1c2 --- /dev/null +++ b/cli/src/tests/highlight_test.rs @@ -0,0 +1,191 @@ +use super::helpers::fixtures::{get_language, get_property_sheet}; +use lazy_static::lazy_static; +use tree_sitter::{Language, PropertySheet}; +use tree_sitter_highlight::{ + highlight, highlight_html, HighlightEvent, LanguageRegistry, Properties, Scope, +}; + +lazy_static! { + static ref JS_SHEET: PropertySheet = + get_property_sheet("javascript", "highlights.json"); + static ref HTML_SHEET: PropertySheet = + get_property_sheet("html", "highlights.json"); + static ref SCOPE_CLASS_STRINGS: Vec = { + let mut result = Vec::new(); + let mut i = 0; + while let Some(scope) = Scope::from_usize(i) { + result.push(format!("class={:?}", scope)); + i += 1; + } + result + }; +} + +#[test] +fn test_highlighting_injected_html_in_javascript() { + let source = vec!["const s = html `
${a < b}
`;"].join("\n"); + + assert_eq!( + &to_token_vector(&source, get_language("javascript"), &JS_SHEET).unwrap(), + &[vec![ + ("const", vec![Scope::Keyword]), + (" ", vec![]), + ("s", vec![Scope::Variable]), + (" ", vec![]), + ("=", vec![Scope::Operator]), + (" ", vec![]), + ("html", vec![Scope::Function]), + (" ", vec![]), + ("`<", vec![Scope::String]), + ("div", vec![Scope::String, Scope::Tag]), + (">", vec![Scope::String]), + ( + "${", + vec![Scope::String, Scope::Embedded, Scope::PunctuationSpecial] + ), + ("a", vec![Scope::String, Scope::Embedded, Scope::Variable]), + (" ", vec![Scope::String, Scope::Embedded]), + ("<", vec![Scope::String, Scope::Embedded, Scope::Operator]), + (" ", vec![Scope::String, Scope::Embedded]), + ("b", vec![Scope::String, Scope::Embedded, Scope::Variable]), + ( + "}", + vec![Scope::String, Scope::Embedded, Scope::PunctuationSpecial] + ), + ("`", vec![Scope::String]), + (";", vec![Scope::PunctuationDelimiter]), + ]] + ); +} + +#[test] +fn test_highlighting_injected_javascript_in_html() { + let source = vec![ + "", + " ", + "", + ] + .join("\n"); + + assert_eq!( + &to_token_vector(&source, get_language("html"), &HTML_SHEET).unwrap(), + &[ + vec![("<", vec![]), ("body", vec![Scope::Tag]), (">", vec![]),], + vec![(" <", vec![]), ("script", vec![Scope::Tag]), (">", vec![]),], + vec![ + (" ", vec![]), + ("const", vec![Scope::Keyword]), + (" ", vec![]), + ("x", vec![Scope::Variable]), + (" ", vec![]), + ("=", vec![Scope::Operator]), + (" ", vec![]), + ("new", vec![Scope::Keyword]), + (" ", vec![]), + ("Thing", vec![Scope::Constructor]), + ("(", vec![Scope::PunctuationBracket]), + (")", vec![Scope::PunctuationBracket]), + (";", vec![Scope::PunctuationDelimiter]), + ], + vec![ + (" ", vec![]), + ], + vec![("", vec![]),], + ] + ); +} + +#[test] +fn test_highlighting_multiline_scopes_to_html() { + let source = vec![ + "const SOMETHING = `", + " one ${", + " two()", + " } three", + "`", + ] + .join("\n"); + + assert_eq!( + &to_html(&source, get_language("javascript"), &JS_SHEET,).unwrap(), + &[ + "const SOMETHING = `\n".to_string(), + " one ${\n".to_string(), + " two()\n".to_string(), + " } three\n".to_string(), + "`\n".to_string(), + ] + ); +} + +struct TestLanguageRegistry; + +impl LanguageRegistry for TestLanguageRegistry { + fn language_for_injection_string( + &self, + string: &str, + ) -> Option<(Language, &PropertySheet)> { + match string { + "javascript" => Some((get_language("javascript"), &JS_SHEET)), + "html" => Some((get_language("html"), &HTML_SHEET)), + _ => None, + } + } +} + +fn to_html<'a>( + src: &'a str, + language: Language, + property_sheet: &'a PropertySheet, +) -> Result, String> { + highlight_html( + &TestLanguageRegistry, + src.as_bytes(), + language, + property_sheet, + |scope| SCOPE_CLASS_STRINGS[scope as usize].as_str(), + ) +} + +fn to_token_vector<'a>( + src: &'a str, + language: Language, + property_sheet: &'a PropertySheet, +) -> Result)>>, String> { + let mut lines = Vec::new(); + let mut scopes = Vec::new(); + let mut line = Vec::new(); + for event in highlight( + &TestLanguageRegistry, + src.as_bytes(), + language, + property_sheet, + )? { + match event { + HighlightEvent::ScopeStart(s) => scopes.push(s), + HighlightEvent::ScopeEnd(s) => { + assert_eq!(*scopes.last().unwrap(), s); + scopes.pop(); + } + HighlightEvent::Source(s) => { + for (i, l) in s.lines().enumerate() { + if i > 0 { + lines.push(line); + line = Vec::new(); + } + if l.len() > 0 { + line.push((l, scopes.clone())); + } + } + } + } + } + lines.push(line); + Ok(lines) +} diff --git a/cli/src/tests/mod.rs b/cli/src/tests/mod.rs index af2b4582..143e8297 100644 --- a/cli/src/tests/mod.rs +++ b/cli/src/tests/mod.rs @@ -1,5 +1,6 @@ mod corpus_test; mod helpers; +mod highlight_test; mod node_test; mod parser_test; mod properties_test; diff --git a/cli/vendor/xterm-colors.json b/cli/vendor/xterm-colors.json new file mode 100644 index 00000000..47994496 --- /dev/null +++ b/cli/vendor/xterm-colors.json @@ -0,0 +1,258 @@ +[ + "#000000", + "#800000", + "#008000", + "#808000", + "#000080", + "#800080", + "#008080", + "#c0c0c0", + "#808080", + "#ff0000", + "#00ff00", + "#ffff00", + "#0000ff", + "#ff00ff", + "#00ffff", + "#ffffff", + "#000000", + "#00005f", + "#000087", + "#0000af", + "#0000d7", + "#0000ff", + "#005f00", + "#005f5f", + "#005f87", + "#005faf", + "#005fd7", + "#005fff", + "#008700", + "#00875f", + "#008787", + "#0087af", + "#0087d7", + "#0087ff", + "#00af00", + "#00af5f", + "#00af87", + "#00afaf", + "#00afd7", + "#00afff", + "#00d700", + "#00d75f", + "#00d787", + "#00d7af", + "#00d7d7", + "#00d7ff", + "#00ff00", + "#00ff5f", + "#00ff87", + "#00ffaf", + "#00ffd7", + "#00ffff", + "#5f0000", + "#5f005f", + "#5f0087", + "#5f00af", + "#5f00d7", + "#5f00ff", + "#5f5f00", + "#5f5f5f", + "#5f5f87", + "#5f5faf", + "#5f5fd7", + "#5f5fff", + "#5f8700", + "#5f875f", + "#5f8787", + "#5f87af", + "#5f87d7", + "#5f87ff", + "#5faf00", + "#5faf5f", + "#5faf87", + "#5fafaf", + "#5fafd7", + "#5fafff", + "#5fd700", + "#5fd75f", + "#5fd787", + "#5fd7af", + "#5fd7d7", + "#5fd7ff", + "#5fff00", + "#5fff5f", + "#5fff87", + "#5fffaf", + "#5fffd7", + "#5fffff", + "#870000", + "#87005f", + "#870087", + "#8700af", + "#8700d7", + "#8700ff", + "#875f00", + "#875f5f", + "#875f87", + "#875faf", + "#875fd7", + "#875fff", + "#878700", + "#87875f", + "#878787", + "#8787af", + "#8787d7", + "#8787ff", + "#87af00", + "#87af5f", + "#87af87", + "#87afaf", + "#87afd7", + "#87afff", + "#87d700", + "#87d75f", + "#87d787", + "#87d7af", + "#87d7d7", + "#87d7ff", + "#87ff00", + "#87ff5f", + "#87ff87", + "#87ffaf", + "#87ffd7", + "#87ffff", + "#af0000", + "#af005f", + "#af0087", + "#af00af", + "#af00d7", + "#af00ff", + "#af5f00", + "#af5f5f", + "#af5f87", + "#af5faf", + "#af5fd7", + "#af5fff", + "#af8700", + "#af875f", + "#af8787", + "#af87af", + "#af87d7", + "#af87ff", + "#afaf00", + "#afaf5f", + "#afaf87", + "#afafaf", + "#afafd7", + "#afafff", + "#afd700", + "#afd75f", + "#afd787", + "#afd7af", + "#afd7d7", + "#afd7ff", + "#afff00", + "#afff5f", + "#afff87", + "#afffaf", + "#afffd7", + "#afffff", + "#d70000", + "#d7005f", + "#d70087", + "#d700af", + "#d700d7", + "#d700ff", + "#d75f00", + "#d75f5f", + "#d75f87", + "#d75faf", + "#d75fd7", + "#d75fff", + "#d78700", + "#d7875f", + "#d78787", + "#d787af", + "#d787d7", + "#d787ff", + "#d7af00", + "#d7af5f", + "#d7af87", + "#d7afaf", + "#d7afd7", + "#d7afff", + "#d7d700", + "#d7d75f", + "#d7d787", + "#d7d7af", + "#d7d7d7", + "#d7d7ff", + "#d7ff00", + "#d7ff5f", + "#d7ff87", + "#d7ffaf", + "#d7ffd7", + "#d7ffff", + "#ff0000", + "#ff005f", + "#ff0087", + "#ff00af", + "#ff00d7", + "#ff00ff", + "#ff5f00", + "#ff5f5f", + "#ff5f87", + "#ff5faf", + "#ff5fd7", + "#ff5fff", + "#ff8700", + "#ff875f", + "#ff8787", + "#ff87af", + "#ff87d7", + "#ff87ff", + "#ffaf00", + "#ffaf5f", + "#ffaf87", + "#ffafaf", + "#ffafd7", + "#ffafff", + "#ffd700", + "#ffd75f", + "#ffd787", + "#ffd7af", + "#ffd7d7", + "#ffd7ff", + "#ffff00", + "#ffff5f", + "#ffff87", + "#ffffaf", + "#ffffd7", + "#ffffff", + "#080808", + "#121212", + "#1c1c1c", + "#262626", + "#303030", + "#3a3a3a", + "#444444", + "#4e4e4e", + "#585858", + "#626262", + "#6c6c6c", + "#767676", + "#808080", + "#8a8a8a", + "#949494", + "#9e9e9e", + "#a8a8a8", + "#b2b2b2", + "#bcbcbc", + "#c6c6c6", + "#d0d0d0", + "#dadada", + "#e4e4e4", + "#eeeeee" +] diff --git a/highlight/Cargo.toml b/highlight/Cargo.toml new file mode 100644 index 00000000..dd33add2 --- /dev/null +++ b/highlight/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "tree-sitter-highlight" +description = "Library for performing syntax highlighting with Tree-sitter" +version = "0.1.0" +authors = [ + "Max Brunsfeld ", + "Tim Clem " +] +license = "MIT" +readme = "README.md" +edition = "2018" +keywords = ["incremental", "parsing", "syntax", "highlighting"] +categories = ["parsing", "text-editors"] + +[dependencies] +regex = "1" +serde = "1.0" +serde_json = "1.0" +serde_derive = "1.0" + +[dependencies.tree-sitter] +version = ">= 0.3.7" +path = "../lib" diff --git a/highlight/src/escape.rs b/highlight/src/escape.rs new file mode 100644 index 00000000..882f160c --- /dev/null +++ b/highlight/src/escape.rs @@ -0,0 +1,53 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! HTML Escaping +//! +//! This module contains one unit-struct which can be used to HTML-escape a +//! string of text (for use in a format string). + +use std::fmt; + +/// Wrapper struct which will emit the HTML-escaped version of the contained +/// string when passed to a format string. +pub struct Escape<'a>(pub &'a str); + +impl<'a> fmt::Display for Escape<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + // Because the internet is always right, turns out there's not that many + // characters to escape: http://stackoverflow.com/questions/7381974 + let Escape(s) = *self; + let pile_o_bits = s; + let mut last = 0; + for (i, ch) in s.bytes().enumerate() { + match ch as char { + '<' | '>' | '&' | '\'' | '"' => { + fmt.write_str(&pile_o_bits[last..i])?; + let s = match ch as char { + '>' => ">", + '<' => "<", + '&' => "&", + '\'' => "'", + '"' => """, + _ => unreachable!(), + }; + fmt.write_str(s)?; + last = i + 1; + } + _ => {} + } + } + + if last < s.len() { + fmt.write_str(&pile_o_bits[last..])?; + } + Ok(()) + } +} diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs new file mode 100644 index 00000000..bdf35b9f --- /dev/null +++ b/highlight/src/lib.rs @@ -0,0 +1,823 @@ +mod escape; + +use serde::{Deserialize, Deserializer}; +use serde_derive::*; +use std::cmp; +use std::fmt::Write; +use std::mem::transmute; +use std::str; +use std::usize; +use tree_sitter::{Language, Node, Parser, Point, PropertySheet, Range, Tree, TreePropertyCursor}; + +pub trait LanguageRegistry { + fn language_for_injection_string<'a>( + &'a self, + s: &str, + ) -> Option<(Language, &'a PropertySheet)>; +} + +#[derive(Debug)] +enum TreeStep { + Child { + index: isize, + kinds: Option>, + }, + Children { + kinds: Option>, + }, + Next { + kinds: Option>, + }, +} + +#[derive(Debug)] +enum InjectionLanguage { + Literal(String), + TreePath(Vec), +} + +#[derive(Debug)] +struct Injection { + language: InjectionLanguage, + content: Vec, +} + +#[derive(Debug)] +pub struct Properties { + scope: Option, + injections: Vec, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[repr(u16)] +pub enum Scope { + Attribute, + Comment, + Constant, + ConstantBuiltin, + Constructor, + ConstructorBuiltin, + Embedded, + Escape, + Function, + FunctionBuiltin, + Keyword, + Number, + Operator, + Property, + PropertyBuiltin, + Punctuation, + PunctuationBracket, + PunctuationDelimiter, + PunctuationSpecial, + String, + StringSpecial, + Tag, + Type, + TypeBuiltin, + Variable, + VariableBuiltin, + Unknown, +} + +struct Layer<'a> { + _tree: Tree, + cursor: TreePropertyCursor<'a, Properties>, + ranges: Vec, + at_node_end: bool, +} + +struct Highlighter<'a, T: LanguageRegistry> { + language_registry: &'a T, + source: &'a [u8], + source_offset: usize, + parser: Parser, + layers: Vec>, + utf8_error_len: Option, +} + +#[derive(Copy, Clone, Debug)] +pub enum HighlightEvent<'a> { + Source(&'a str), + ScopeStart(Scope), + ScopeEnd(Scope), +} + +#[derive(Debug, Deserialize)] +#[serde(untagged)] +enum TreePathArgJSON { + TreePath(TreePathJSON), + Number(isize), + String(String), +} + +#[derive(Debug, Deserialize)] +#[serde(tag = "name")] +enum TreePathJSON { + #[serde(rename = "this")] + This, + #[serde(rename = "child")] + Child { args: Vec }, + #[serde(rename = "next")] + Next { args: Vec }, + #[serde(rename = "children")] + Children { args: Vec }, +} + +#[derive(Debug, Deserialize)] +#[serde(untagged)] +enum InjectionLanguageJSON { + List(Vec), + TreePath(TreePathJSON), + Literal(String), +} + +#[derive(Debug, Deserialize)] +#[serde(untagged)] +enum InjectionContentJSON { + List(Vec), + TreePath(TreePathJSON), +} + +#[derive(Debug, Deserialize)] +struct PropertiesJSON { + scope: Option, + #[serde(rename = "injection-language")] + injection_language: Option, + #[serde(rename = "injection-content")] + injection_content: Option, +} + +#[derive(Debug)] +pub enum PropertySheetError { + InvalidJSON(serde_json::Error), + InvalidRegex(regex::Error), + InvalidFormat(String), +} + +pub fn load_property_sheet( + language: Language, + json: &str, +) -> Result, PropertySheetError> { + let sheet = PropertySheet::new(language, json).map_err(|e| match e { + tree_sitter::PropertySheetError::InvalidJSON(e) => PropertySheetError::InvalidJSON(e), + tree_sitter::PropertySheetError::InvalidRegex(e) => PropertySheetError::InvalidRegex(e), + })?; + let sheet = sheet + .map(|p| Properties::new(p, language)) + .map_err(PropertySheetError::InvalidFormat)?; + Ok(sheet) +} + +impl Scope { + pub fn from_usize(i: usize) -> Option { + if i <= (Scope::Unknown as usize) { + Some(unsafe { transmute(i as u16) }) + } else { + None + } + } +} + +impl Properties { + fn new(json: PropertiesJSON, language: Language) -> Result { + let injections = match (json.injection_language, json.injection_content) { + (None, None) => Ok(Vec::new()), + (Some(_), None) => Err( + "Must specify an injection-content along with an injection-language".to_string(), + ), + (None, Some(_)) => Err( + "Must specify an injection-language along with an injection-content".to_string(), + ), + (Some(language_json), Some(content_json)) => { + let languages = match language_json { + InjectionLanguageJSON::List(list) => { + let mut result = Vec::with_capacity(list.len()); + for element in list { + result.push(match element { + InjectionLanguageJSON::TreePath(p) => { + let mut result = Vec::new(); + Self::flatten_tree_path(p, &mut result, language)?; + InjectionLanguage::TreePath(result) + } + InjectionLanguageJSON::Literal(s) => InjectionLanguage::Literal(s), + InjectionLanguageJSON::List(_) => { + panic!("Injection-language cannot be a list of lists") + } + }) + } + result + } + InjectionLanguageJSON::TreePath(p) => vec![{ + let mut result = Vec::new(); + Self::flatten_tree_path(p, &mut result, language)?; + InjectionLanguage::TreePath(result) + }], + InjectionLanguageJSON::Literal(s) => vec![InjectionLanguage::Literal(s)], + }; + + let contents = match content_json { + InjectionContentJSON::List(l) => { + let mut result = Vec::with_capacity(l.len()); + for element in l { + result.push(match element { + InjectionContentJSON::TreePath(p) => { + let mut result = Vec::new(); + Self::flatten_tree_path(p, &mut result, language)?; + result + } + InjectionContentJSON::List(_) => { + panic!("Injection-content cannot be a list of lists") + } + }) + } + result + } + InjectionContentJSON::TreePath(p) => vec![{ + let mut result = Vec::new(); + Self::flatten_tree_path(p, &mut result, language)?; + result + }], + }; + + if languages.len() == contents.len() { + Ok(languages + .into_iter() + .zip(contents.into_iter()) + .map(|(language, content)| Injection { language, content }) + .collect()) + } else { + Err(format!( + "Mismatch: got {} injection-language values but {} injection-content values", + languages.len(), + contents.len(), + )) + } + } + }?; + + Ok(Self { + scope: json.scope, + injections, + }) + } + + // Transform a tree path from the format expressed directly in the property sheet + // (nested function calls), to a flat sequence of steps for transforming a list of + // nodes. This way, we can evaluate these tree paths with no recursion and a single + // vector of intermediate storage. + fn flatten_tree_path( + p: TreePathJSON, + steps: &mut Vec, + language: Language, + ) -> Result<(), String> { + match p { + TreePathJSON::This => {} + TreePathJSON::Child { args } => { + let (tree_path, index, kinds) = Self::parse_args("child", args, language)?; + Self::flatten_tree_path(tree_path, steps, language)?; + steps.push(TreeStep::Child { + index: index + .ok_or_else(|| "The `child` function requires an index".to_string())?, + kinds: kinds, + }); + } + TreePathJSON::Children { args } => { + let (tree_path, _, kinds) = Self::parse_args("children", args, language)?; + Self::flatten_tree_path(tree_path, steps, language)?; + steps.push(TreeStep::Children { kinds }); + } + TreePathJSON::Next { args } => { + let (tree_path, _, kinds) = Self::parse_args("next", args, language)?; + Self::flatten_tree_path(tree_path, steps, language)?; + steps.push(TreeStep::Next { kinds }); + } + } + Ok(()) + } + + fn parse_args( + name: &str, + args: Vec, + language: Language, + ) -> Result<(TreePathJSON, Option, Option>), String> { + let tree_path; + let mut index = None; + let mut kinds = Vec::new(); + let mut iter = args.into_iter(); + + match iter.next() { + Some(TreePathArgJSON::TreePath(p)) => tree_path = p, + _ => { + return Err(format!( + "First argument to `{}()` must be a tree path", + name + )); + } + } + + for arg in iter { + match arg { + TreePathArgJSON::TreePath(_) => { + return Err(format!( + "Other arguments to `{}()` must be strings or numbers", + name + )); + } + TreePathArgJSON::Number(i) => index = Some(i), + TreePathArgJSON::String(s) => kinds.push(s), + } + } + + if kinds.len() > 0 { + let mut kind_ids = Vec::new(); + for i in 0..(language.node_kind_count() as u16) { + if kinds.iter().any(|s| s == language.node_kind_for_id(i)) + && language.node_kind_is_named(i) + { + kind_ids.push(i); + } + } + if kind_ids.len() == 0 { + return Err(format!("Non-existent node kinds: {:?}", kinds)); + } + + Ok((tree_path, index, Some(kind_ids))) + } else { + Ok((tree_path, index, None)) + } + } +} + +impl<'a, T: LanguageRegistry> Highlighter<'a, T> { + fn new( + language_registry: &'a T, + source: &'a [u8], + language: Language, + property_sheet: &'a PropertySheet, + ) -> Result { + let mut parser = Parser::new(); + parser.set_language(language)?; + let tree = parser + .parse(source, None) + .ok_or_else(|| format!("Tree-sitter: failed to parse"))?; + Ok(Self { + language_registry, + source, + source_offset: 0, + parser, + layers: vec![Layer::new( + source, + tree, + property_sheet, + vec![Range { + start_byte: 0, + end_byte: usize::MAX, + start_point: Point::new(0, 0), + end_point: Point::new(usize::MAX, usize::MAX), + }], + )], + utf8_error_len: None, + }) + } + + fn emit_source(&mut self, next_offset: usize) -> Option> { + let input = &self.source[self.source_offset..next_offset]; + match str::from_utf8(input) { + Ok(valid) => { + self.source_offset = next_offset; + Some(HighlightEvent::Source(valid)) + } + Err(error) => { + if let Some(error_len) = error.error_len() { + if error.valid_up_to() > 0 { + let prefix = &input[0..error.valid_up_to()]; + self.utf8_error_len = Some(error_len); + Some(HighlightEvent::Source(unsafe { + str::from_utf8_unchecked(prefix) + })) + } else { + self.source_offset += error_len; + Some(HighlightEvent::Source("\u{FFFD}")) + } + } else { + None + } + } + } + } + + fn process_tree_step(&self, step: &TreeStep, nodes: &mut Vec) { + let len = nodes.len(); + for i in 0..len { + let node = nodes[i]; + match step { + TreeStep::Child { index, kinds } => { + let index = if *index >= 0 { + *index as usize + } else { + (node.child_count() as isize + *index) as usize + }; + if let Some(child) = node.child(index) { + if let Some(kinds) = kinds { + if kinds.contains(&child.kind_id()) { + nodes.push(child); + } + } else { + nodes.push(child); + } + } + } + TreeStep::Children { kinds } => { + for child in node.children() { + if let Some(kinds) = kinds { + if kinds.contains(&child.kind_id()) { + nodes.push(child); + } + } else { + nodes.push(child); + } + } + } + TreeStep::Next { .. } => unimplemented!(), + } + } + nodes.drain(0..len); + } + + fn nodes_for_tree_path(&self, node: Node<'a>, steps: &Vec) -> Vec> { + let mut nodes = vec![node]; + for step in steps.iter() { + self.process_tree_step(step, &mut nodes); + } + nodes + } + + // An injected language name may either be specified as a fixed string, or based + // on the text of some node in the syntax tree. + fn injection_language_string( + &self, + node: &Node, + language: &InjectionLanguage, + ) -> Option { + match language { + InjectionLanguage::Literal(s) => Some(s.to_string()), + InjectionLanguage::TreePath(steps) => self + .nodes_for_tree_path(*node, steps) + .first() + .and_then(|node| { + str::from_utf8(&self.source[node.start_byte()..node.end_byte()]) + .map(|s| s.to_owned()) + .ok() + }), + } + } + + // Compute the ranges that should be included when parsing an injection. + // This takes into account two things: + // * `nodes` - Every injection takes place within a set of nodes. The injection ranges + // are the ranges of those nodes, *minus* the ranges of those nodes' children. + // * `parent_ranges` - The new injection may be nested inside of *another* injection + // (e.g. JavaScript within HTML within ERB). The parent injection's ranges must + // be taken into account. + fn intersect_ranges(parent_ranges: &Vec, nodes: &Vec) -> Vec { + let mut result = Vec::new(); + let mut parent_range_iter = parent_ranges.iter(); + let mut parent_range = parent_range_iter + .next() + .expect("Layers should only be constructed with non-empty ranges vectors"); + for node in nodes.iter() { + let range = node.range(); + let mut preceding_range = Range { + start_byte: 0, + start_point: Point::new(0, 0), + end_byte: range.start_byte, + end_point: range.start_point, + }; + let following_range = Range { + start_byte: node.end_byte(), + start_point: node.end_position(), + end_byte: usize::MAX, + end_point: Point::new(usize::MAX, usize::MAX), + }; + + for child_range in node + .children() + .map(|c| c.range()) + .chain([following_range].iter().cloned()) + { + let mut range = Range { + start_byte: preceding_range.end_byte, + start_point: preceding_range.end_point, + end_byte: child_range.start_byte, + end_point: child_range.start_point, + }; + preceding_range = child_range; + + if range.end_byte < parent_range.start_byte { + continue; + } + + while parent_range.start_byte <= range.end_byte { + if parent_range.end_byte > range.start_byte { + if range.start_byte < parent_range.start_byte { + range.start_byte = parent_range.start_byte; + range.start_point = parent_range.start_point; + } + + if parent_range.end_byte < range.end_byte { + if range.start_byte < parent_range.end_byte { + result.push(Range { + start_byte: range.start_byte, + start_point: range.start_point, + end_byte: parent_range.end_byte, + end_point: parent_range.end_point, + }); + } + range.start_byte = parent_range.end_byte; + range.start_point = parent_range.end_point; + } else { + if range.start_byte < range.end_byte { + result.push(range); + } + break; + } + } + + if let Some(next_range) = parent_range_iter.next() { + parent_range = next_range; + } else { + return result; + } + } + } + } + result + } + + fn add_layer(&mut self, language_string: &str, ranges: Vec) { + if let Some((language, property_sheet)) = self + .language_registry + .language_for_injection_string(language_string) + { + self.parser + .set_language(language) + .expect("Failed to set language"); + self.parser.set_included_ranges(&ranges); + let tree = self + .parser + .parse(self.source, None) + .expect("Failed to parse"); + let layer = Layer::new(self.source, tree, property_sheet, ranges); + match self + .layers + .binary_search_by_key(&(layer.offset(), 1), |l| (l.offset(), 0)) + { + Ok(i) | Err(i) => self.layers.insert(i, layer), + }; + } + } +} + +impl<'a, T: LanguageRegistry> Iterator for Highlighter<'a, T> { + type Item = HighlightEvent<'a>; + + fn next(&mut self) -> Option { + if let Some(utf8_error_len) = self.utf8_error_len.take() { + self.source_offset += utf8_error_len; + return Some(HighlightEvent::Source("\u{FFFD}")); + } + + while !self.layers.is_empty() { + let first_layer = &self.layers[0]; + let properties = &first_layer.cursor.node_properties(); + + // Add any injections for the current node. + if !first_layer.at_node_end { + let node = first_layer.cursor.node(); + let injections = properties + .injections + .iter() + .filter_map(|Injection { language, content }| { + if let Some(language) = self.injection_language_string(&node, language) { + let nodes = self.nodes_for_tree_path(node, content); + let ranges = Self::intersect_ranges(&first_layer.ranges, &nodes); + if ranges.len() > 0 { + return Some((language, ranges)); + } + } + None + }) + .collect::>(); + + for (language, ranges) in injections { + self.add_layer(&language, ranges); + } + } + + // Determine if any scopes start or end at the current position. + let scope_event; + if let Some(scope) = properties.scope { + let next_offset = cmp::min(self.source.len(), self.layers[0].offset()); + + // Before returning any scope boundaries, return any remaining slice of + // the source code the precedes that scope boundary. + if self.source_offset < next_offset { + return self.emit_source(next_offset); + } + + scope_event = if self.layers[0].at_node_end { + Some(HighlightEvent::ScopeEnd(scope)) + } else { + Some(HighlightEvent::ScopeStart(scope)) + }; + } else { + scope_event = None; + }; + + // Advance the current layer's tree cursor. This might cause that cursor to move + // beyond one of the other layers' cursors for a different syntax tree, so we need + // to re-sort the layers. If the cursor is already at the end of its syntax tree, + // remove it. + if self.layers[0].advance() { + self.layers.sort_unstable_by_key(|layer| layer.offset()); + } else { + self.layers.remove(0); + } + + if scope_event.is_some() { + return scope_event; + } + } + + if self.source_offset < self.source.len() { + self.emit_source(self.source.len()) + } else { + None + } + } +} + +impl<'a> Layer<'a> { + fn new( + source: &'a [u8], + tree: Tree, + sheet: &'a PropertySheet, + ranges: Vec, + ) -> Self { + // The cursor's lifetime parameter indicates that the tree must outlive the cursor. + // But because the tree is really a pointer to the heap, the cursor can remain + // valid when the tree is moved. There's no way to express this with lifetimes + // right now, so we have to `transmute` the cursor's lifetime. + let cursor = unsafe { transmute(tree.walk_with_properties(sheet, source)) }; + Self { + _tree: tree, + cursor, + ranges, + at_node_end: false, + } + } + + fn offset(&self) -> usize { + if self.at_node_end { + self.cursor.node().end_byte() + } else { + self.cursor.node().start_byte() + } + } + + fn advance(&mut self) -> bool { + if self.at_node_end { + if self.cursor.goto_next_sibling() { + self.at_node_end = false; + } else if !self.cursor.goto_parent() { + return false; + } + } else if !self.cursor.goto_first_child() { + self.at_node_end = true; + } + true + } +} + +impl<'de> Deserialize<'de> for Scope { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let s = String::deserialize(deserializer)?; + match s.as_str() { + "attribute" => Ok(Scope::Attribute), + "comment" => Ok(Scope::Comment), + "constant" => Ok(Scope::Constant), + "constant.builtin" => Ok(Scope::ConstantBuiltin), + "constructor" => Ok(Scope::Constructor), + "constructor.builtin" => Ok(Scope::ConstructorBuiltin), + "embedded" => Ok(Scope::Embedded), + "escape" => Ok(Scope::Escape), + "function" => Ok(Scope::Function), + "function.builtin" => Ok(Scope::FunctionBuiltin), + "keyword" => Ok(Scope::Keyword), + "number" => Ok(Scope::Number), + "operator" => Ok(Scope::Operator), + "property" => Ok(Scope::Property), + "property.builtin" => Ok(Scope::PropertyBuiltin), + "punctuation" => Ok(Scope::Punctuation), + "punctuation.bracket" => Ok(Scope::PunctuationBracket), + "punctuation.delimiter" => Ok(Scope::PunctuationDelimiter), + "punctuation.special" => Ok(Scope::PunctuationSpecial), + "string" => Ok(Scope::String), + "string.special" => Ok(Scope::StringSpecial), + "type" => Ok(Scope::Type), + "type.builtin" => Ok(Scope::TypeBuiltin), + "variable" => Ok(Scope::Variable), + "variable.builtin" => Ok(Scope::VariableBuiltin), + "tag" => Ok(Scope::Tag), + _ => Ok(Scope::Unknown), + } + } +} + +pub fn highlight<'a, T: LanguageRegistry>( + language_registry: &'a T, + source: &'a [u8], + language: Language, + property_sheet: &'a PropertySheet, +) -> Result> + 'a, String> { + Highlighter::new(language_registry, source, language, property_sheet) +} + +pub fn highlight_html<'a, T: LanguageRegistry, F: Fn(Scope) -> &'a str>( + language_registry: &'a T, + source: &'a [u8], + language: Language, + property_sheet: &'a PropertySheet, + attribute_callback: F, +) -> Result, String> { + let highlighter = Highlighter::new(language_registry, source, language, property_sheet)?; + let mut renderer = HtmlRenderer::new(attribute_callback); + let mut scopes = Vec::new(); + for event in highlighter { + match event { + HighlightEvent::ScopeStart(s) => { + scopes.push(s); + renderer.start_scope(s); + } + HighlightEvent::ScopeEnd(s) => { + assert_eq!(scopes.pop(), Some(s)); + renderer.end_scope(); + } + HighlightEvent::Source(src) => { + renderer.render_line(src, &scopes); + } + }; + } + renderer.flush(); + Ok(renderer.result) +} + +struct HtmlRenderer<'a, F: Fn(Scope) -> &'a str> { + result: Vec, + buffer: String, + attribute_callback: F, +} + +impl<'a, F: Fn(Scope) -> &'a str> HtmlRenderer<'a, F> { + fn new(attribute_callback: F) -> Self { + HtmlRenderer { + result: Vec::new(), + buffer: String::new(), + attribute_callback, + } + } + + fn start_scope(&mut self, s: Scope) { + write!(&mut self.buffer, "", (self.attribute_callback)(s),).unwrap(); + } + + fn end_scope(&mut self) { + write!(&mut self.buffer, "").unwrap(); + } + + fn flush(&mut self) { + if !self.buffer.is_empty() { + self.buffer.push('\n'); + self.result.push(self.buffer.clone()); + self.buffer.clear(); + } + } + + fn render_line(&mut self, src: &str, scopes: &Vec) { + let mut multiline = false; + for line in src.split('\n') { + let line = line.trim_end_matches('\r'); + if multiline { + scopes.iter().for_each(|_| self.end_scope()); + self.flush(); + scopes.iter().for_each(|scope| self.start_scope(*scope)); + } + write!(&mut self.buffer, "{}", escape::Escape(line)).unwrap(); + multiline = true; + } + } +}