From e89b6b2402acb0d2ebbbd87bc6f3281dc20a3bd3 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 19 Feb 2019 11:24:50 -0800 Subject: [PATCH 1/8] Add a `highlight` subcommand --- Cargo.lock | 78 +++ cli/Cargo.toml | 5 + cli/src/error.rs | 11 + cli/src/highlight.rs | 272 ++++++++++ cli/src/lib.rs | 1 + cli/src/loader.rs | 115 ++++- cli/src/main.rs | 48 +- cli/src/tests/helpers/fixtures.rs | 13 +- cli/src/tests/highlight_test.rs | 191 +++++++ cli/src/tests/mod.rs | 1 + cli/vendor/xterm-colors.json | 258 ++++++++++ highlight/Cargo.toml | 23 + highlight/src/escape.rs | 53 ++ highlight/src/lib.rs | 823 ++++++++++++++++++++++++++++++ 14 files changed, 1870 insertions(+), 22 deletions(-) create mode 100644 cli/src/highlight.rs create mode 100644 cli/src/tests/highlight_test.rs create mode 100644 cli/vendor/xterm-colors.json create mode 100644 highlight/Cargo.toml create mode 100644 highlight/src/escape.rs create mode 100644 highlight/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 9ca3a70a..50058336 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -210,6 +210,14 @@ dependencies = [ "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "lock_api" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "log" version = "0.4.6" @@ -263,6 +271,35 @@ name = "num-traits" version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "once_cell" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "parking_lot 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "parking_lot" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "lock_api 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "parking_lot_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "parking_lot_core" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", + "smallvec 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "proc-macro2" version = "0.4.24" @@ -502,6 +539,14 @@ name = "smallbitvec" version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "smallvec" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "spin" version = "0.5.0" @@ -583,6 +628,7 @@ dependencies = [ "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", + "once_cell 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", "rand 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", @@ -593,6 +639,18 @@ dependencies = [ "smallbitvec 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "tree-sitter 0.3.8", + "tree-sitter-highlight 0.1.0", +] + +[[package]] +name = "tree-sitter-highlight" +version = "0.1.0" +dependencies = [ + "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)", + "tree-sitter 0.3.8", ] [[package]] @@ -610,6 +668,14 @@ name = "unicode-xid" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "unreachable" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "utf8-ranges" version = "1.0.2" @@ -625,6 +691,11 @@ name = "version_check" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "void" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "winapi" version = "0.3.6" @@ -673,6 +744,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a374c89b9db55895453a74c1e38861d9deec0b01b405a82516e9d5de4820dea1" "checksum libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)" = "10923947f84a519a45c8fefb7dd1b3e8c08747993381adee176d7a82b4195311" "checksum libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3ad660d7cb8c5822cd83d10897b0f1f1526792737a179e73896152f85b88c2" +"checksum lock_api 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "62ebf1391f6acad60e5c8b43706dde4582df75c06698ab44511d15016bc2442c" "checksum log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6" "checksum memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0a3eb002f0535929f1199681417029ebea04aadc0c7a4224b46be99c7f5d6a16" "checksum nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945" @@ -680,6 +752,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)" = "e83d528d2677f0518c570baf2b7abdcf0cd2d248860b68507bdcb3e91d4c0cea" "checksum num-rational 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4e96f040177bb3da242b5b1ecf3f54b5d5af3efbbfb18608977a5d2767b22f10" "checksum num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0b3a5d7cc97d6d30d8b9bc8fa19bf45349ffe46241e8816f50f62f6d6aaabee1" +"checksum once_cell 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "532c29a261168a45ce28948f9537ddd7a5dd272cc513b3017b1e82a88f962c37" +"checksum parking_lot 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ab41b4aed082705d1056416ae4468b6ea99d52599ecf3169b00088d43113e337" +"checksum parking_lot_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "94c8c7923936b28d546dfd14d4472eaf34c99b14e1c973a32b3e6d4eb04298c9" "checksum proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)" = "77619697826f31a02ae974457af0b29b723e5619e113e9397b8b82c6bd253f09" "checksum quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)" = "53fa22a1994bd0f9372d7a816207d8a2677ad0325b073f5c5332760f0fb62b5c" "checksum rand 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8356f47b32624fef5b3301c1be97e5944ecdd595409cc5da11d05f211db6cfbd" @@ -709,6 +784,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)" = "225de307c6302bec3898c51ca302fc94a7a1697ef0845fcee6448f33c032249c" "checksum serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)" = "c37ccd6be3ed1fdf419ee848f7c758eb31b054d7cd3ae3600e3bae0adf569811" "checksum smallbitvec 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1764fe2b30ee783bfe3b9b37b2649d8d590b3148bb12e0079715d4d5c673562e" +"checksum smallvec 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)" = "88aea073965ab29f6edb5493faf96ad662fb18aa9eeb186a3b7057951605ed15" "checksum spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "44363f6f51401c34e7be73db0db371c04705d35efbe9f7d6082e03a921a32c55" "checksum strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550" "checksum syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)" = "ae8b29eb5210bc5cf63ed6149cbf9adfc82ac0be023d8735c176ee74a2db4da7" @@ -719,9 +795,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86" "checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" +"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" "checksum utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737" "checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" "checksum version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd" +"checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" "checksum winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "92c1eb33641e276cfa214a0522acad57be5c56b10cb348b3c5117db75f3ac4b0" "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 54d0eb5e..242ed72b 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -27,6 +27,7 @@ clap = "2.32" dirs = "1.0.2" hashbrown = "0.1" libloading = "0.5" +once_cell = "0.1.8" serde = "1.0" serde_derive = "1.0" regex-syntax = "0.6.4" @@ -37,6 +38,10 @@ rsass = "0.9" version = ">= 0.3.7" path = "../lib" +[dependencies.tree-sitter-highlight] +version = ">= 0.1.0" +path = "../highlight" + [dependencies.serde_json] version = "1.0" features = ["preserve_order"] diff --git a/cli/src/error.rs b/cli/src/error.rs index 4769b481..b0e52797 100644 --- a/cli/src/error.rs +++ b/cli/src/error.rs @@ -1,4 +1,5 @@ use std::io; +use tree_sitter_highlight::PropertySheetError; #[derive(Debug)] pub struct Error(pub String); @@ -42,3 +43,13 @@ impl From for Error { Error(error) } } + +impl From for Error { + fn from(error: PropertySheetError) -> Self { + match error { + PropertySheetError::InvalidFormat(e) => Self::from(e), + PropertySheetError::InvalidRegex(e) => Self::regex(&e.to_string()), + PropertySheetError::InvalidJSON(e) => Self::from(e), + } + } +} diff --git a/cli/src/highlight.rs b/cli/src/highlight.rs new file mode 100644 index 00000000..1651b98d --- /dev/null +++ b/cli/src/highlight.rs @@ -0,0 +1,272 @@ +use crate::error::Result; +use crate::loader::Loader; +use ansi_term::{Color, Style}; +use lazy_static::lazy_static; +use serde_json::Value; +use std::collections::HashMap; +use std::{fmt, fs, io, mem, path}; +use tree_sitter::{Language, PropertySheet}; +use tree_sitter_highlight::{highlight, highlight_html, HighlightEvent, Properties, Scope}; + +lazy_static! { + static ref CSS_STYLES_BY_COLOR_ID: Vec = + serde_json::from_str(include_str!("../vendor/xterm-colors.json")).unwrap(); +} + +pub struct Theme { + ansi_styles: Vec>, + css_styles: Vec>, +} + +impl Theme { + pub fn load(path: &path::Path) -> io::Result { + let json = fs::read_to_string(path)?; + Ok(Self::new(&json)) + } + + pub fn new(json: &str) -> Self { + let mut ansi_styles = vec![None; 30]; + let mut css_styles = vec![None; 30]; + if let Ok(colors) = serde_json::from_str::>(json) { + for (scope, style_value) in colors { + let mut style = Style::default(); + parse_style(&mut style, style_value); + ansi_styles[scope as usize] = Some(style); + css_styles[scope as usize] = Some(style_to_css(style)); + } + } + Self { + ansi_styles, + css_styles, + } + } + + fn ansi_style(&self, scope: Scope) -> Option<&Style> { + self.ansi_styles[scope as usize].as_ref() + } + + fn css_style(&self, scope: Scope) -> Option<&str> { + self.css_styles[scope as usize].as_ref().map(|s| s.as_str()) + } +} + +impl Default for Theme { + fn default() -> Self { + Theme::new( + r#" + { + "attribute": {"color": 124, "italic": true}, + "comment": {"color": 245, "italic": true}, + "constant.builtin": {"color": 94, "bold": true}, + "constant": 94, + "constructor": 136, + "embedded": null, + "function.builtin": {"color": 26, "bold": true}, + "function": 26, + "keyword": 56, + "number": {"color": 94, "bold": true}, + "property": 124, + "operator": {"color": 239, "bold": true}, + "punctuation.bracket": 239, + "punctuation.delimiter": 239, + "string.special": 30, + "string": 28, + "tag": {"color": 18}, + "variable.builtin": {"bold": true} + } + "#, + ) + } +} + +impl fmt::Debug for Theme { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{{")?; + let mut first = true; + for (i, style) in self.ansi_styles.iter().enumerate() { + if let Some(style) = style { + let scope = Scope::from_usize(i).unwrap(); + if !first { + write!(f, ", ")?; + } + write!(f, "{:?}: {:?}", scope, style)?; + first = false; + } + } + write!(f, "}}")?; + Ok(()) + } +} + +fn parse_style(style: &mut Style, json: Value) { + if let Value::Object(entries) = json { + for (property_name, value) in entries { + match property_name.as_str() { + "italic" => *style = style.italic(), + "bold" => *style = style.bold(), + "dimmed" => *style = style.dimmed(), + "underline" => *style = style.underline(), + "color" => { + if let Some(color) = parse_color(value) { + *style = style.fg(color); + } + } + _ => {} + } + } + } else if let Some(color) = parse_color(json) { + *style = style.fg(color); + } +} + +fn parse_color(json: Value) -> Option { + match json { + Value::Number(n) => match n.as_u64() { + Some(n) => Some(Color::Fixed(n as u8)), + _ => None, + }, + Value::String(s) => match s.to_lowercase().as_str() { + "blue" => Some(Color::Blue), + "cyan" => Some(Color::Cyan), + "green" => Some(Color::Green), + "purple" => Some(Color::Purple), + "red" => Some(Color::Red), + "white" => Some(Color::White), + "yellow" => Some(Color::Yellow), + s => { + if s.starts_with("#") && s.len() >= 7 { + if let (Ok(red), Ok(green), Ok(blue)) = ( + u8::from_str_radix(&s[1..3], 16), + u8::from_str_radix(&s[3..5], 16), + u8::from_str_radix(&s[5..7], 16), + ) { + Some(Color::RGB(red, green, blue)) + } else { + None + } + } else { + None + } + } + }, + _ => None, + } +} + +fn style_to_css(style: Style) -> String { + use std::fmt::Write; + let mut result = "style='".to_string(); + if style.is_bold { + write!(&mut result, "font-weight: bold;").unwrap(); + } + if style.is_italic { + write!(&mut result, "font-style: italic;").unwrap(); + } + if let Some(color) = style.foreground { + write!(&mut result, "color: {};", color_to_css(color)).unwrap(); + } + result.push('\''); + result +} + +fn color_to_css(color: Color) -> &'static str { + match color { + Color::Black => "black", + Color::Blue => "blue", + Color::Red => "red", + Color::Green => "green", + Color::Yellow => "yellow", + Color::Cyan => "cyan", + Color::Purple => "purple", + Color::White => "white", + Color::Fixed(n) => CSS_STYLES_BY_COLOR_ID[n as usize].as_str(), + _ => panic!("Unsupported color type"), + } +} + +pub fn ansi( + loader: &Loader, + theme: &Theme, + source: &[u8], + language: Language, + property_sheet: &PropertySheet, +) -> Result<()> { + use std::io::Write; + let stdout = io::stdout(); + let mut stdout = stdout.lock(); + let mut scope_stack = Vec::new(); + for event in highlight(loader, source, language, property_sheet)? { + match event { + HighlightEvent::Source(s) => { + if let Some(style) = scope_stack.last().and_then(|s| theme.ansi_style(*s)) { + write!(&mut stdout, "{}", style.paint(s))?; + } else { + write!(&mut stdout, "{}", s)?; + } + } + HighlightEvent::ScopeStart(s) => { + scope_stack.push(s); + } + HighlightEvent::ScopeEnd(_) => { + scope_stack.pop(); + } + } + } + Ok(()) +} + +pub const HTML_HEADER: &'static str = " + + + Tree-sitter Highlighting + + + +"; + +pub const HTML_FOOTER: &'static str = " + +"; + +pub fn html( + loader: &Loader, + theme: &Theme, + source: &[u8], + language: Language, + property_sheet: &PropertySheet, +) -> Result<()> { + use std::io::Write; + let stdout = io::stdout(); + let mut stdout = stdout.lock(); + write!(&mut stdout, "\n")?; + let lines = highlight_html(loader, source, language, property_sheet, |scope| { + if let Some(css_style) = theme.css_style(scope) { + css_style + } else { + "" + } + })?; + for (i, line) in lines.into_iter().enumerate() { + write!( + &mut stdout, + "\n", + i + 1, + line + )?; + } + write!(&mut stdout, "
{}{}
\n")?; + Ok(()) +} diff --git a/cli/src/lib.rs b/cli/src/lib.rs index 3a15b457..0ece9cac 100644 --- a/cli/src/lib.rs +++ b/cli/src/lib.rs @@ -1,5 +1,6 @@ pub mod error; pub mod generate; +pub mod highlight; pub mod loader; pub mod logger; pub mod parse; diff --git a/cli/src/loader.rs b/cli/src/loader.rs index 5c2a19a7..d19acf46 100644 --- a/cli/src/loader.rs +++ b/cli/src/loader.rs @@ -1,5 +1,6 @@ use super::error::{Error, Result}; use libloading::{Library, Symbol}; +use once_cell::unsync::OnceCell; use regex::{Regex, RegexBuilder}; use serde_derive::Deserialize; use std::collections::HashMap; @@ -9,6 +10,7 @@ use std::process::Command; use std::time::SystemTime; use std::{fs, mem}; use tree_sitter::{Language, PropertySheet}; +use tree_sitter_highlight::{load_property_sheet, LanguageRegistry, Properties}; #[cfg(unix)] const DYLIB_EXTENSION: &'static str = "so"; @@ -20,16 +22,18 @@ const BUILD_TARGET: &'static str = env!("BUILD_TARGET"); struct LanguageRepo { path: PathBuf, - language: Option, + language: OnceCell, configurations: Vec, } pub struct LanguageConfiguration { - _name: String, + pub name: String, _content_regex: Option, _first_line_regex: Option, + injection_regex: Option, file_types: Vec, - _highlight_property_sheet: Option>, + highlight_property_sheet_path: Option, + highlight_property_sheet: OnceCell>>, } pub struct Loader { @@ -76,7 +80,7 @@ impl Loader { } pub fn language_configuration_for_file_name( - &mut self, + &self, path: &Path, ) -> Result> { let ids = path @@ -100,20 +104,43 @@ impl Loader { Ok(None) } + pub fn language_configuration_for_injection_string( + &self, + string: &str, + ) -> Result> { + let mut best_match_length = 0; + let mut best_match_position = None; + for (i, repo) in self.language_repos.iter().enumerate() { + for (j, configuration) in repo.configurations.iter().enumerate() { + if let Some(injection_regex) = &configuration.injection_regex { + if let Some(mat) = injection_regex.find(string) { + let length = mat.end() - mat.start(); + if length > best_match_length { + best_match_position = Some((i, j)); + best_match_length = length; + } + } + } + } + } + if let Some((i, j)) = best_match_position { + let (language, configurations) = self.language_configuration_for_id(i)?; + Ok(Some((language, &configurations[j]))) + } else { + Ok(None) + } + } + fn language_configuration_for_id( - &mut self, + &self, id: usize, ) -> Result<(Language, &Vec)> { let repo = &self.language_repos[id]; - let language = if let Some(language) = repo.language { - language - } else { + let language = repo.language.get_or_try_init(|| { let src_path = repo.path.join("src"); - let language = self.load_language_at_path(&src_path, &src_path)?; - self.language_repos[id].language = Some(language); - language - }; - Ok((language, &self.language_repos[id].configurations)) + self.load_language_at_path(&src_path, &src_path) + })?; + Ok((*language, &self.language_repos[id].configurations)) } pub fn load_language_at_path(&self, src_path: &Path, header_path: &Path) -> Result { @@ -238,6 +265,8 @@ impl Loader { content_regex: Option, #[serde(rename = "first-line-regex")] first_line_regex: Option, + #[serde(rename = "injection-regex")] + injection_regex: Option, highlights: Option, } @@ -255,7 +284,7 @@ impl Loader { configurations .into_iter() .map(|conf| LanguageConfiguration { - _name: conf.name, + name: conf.name, file_types: conf.file_types.unwrap_or(Vec::new()), _content_regex: conf .content_regex @@ -263,7 +292,11 @@ impl Loader { _first_line_regex: conf .first_line_regex .and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()), - _highlight_property_sheet: conf.highlights.map(|d| Err(d.into())), + injection_regex: conf + .injection_regex + .and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()), + highlight_property_sheet_path: conf.highlights.map(|h| parser_path.join(h)), + highlight_property_sheet: OnceCell::new(), }) .collect() }); @@ -279,7 +312,7 @@ impl Loader { self.language_repos.push(LanguageRepo { path: parser_path.to_owned(), - language: None, + language: OnceCell::new(), configurations, }); @@ -287,6 +320,56 @@ impl Loader { } } +impl LanguageRegistry for Loader { + fn language_for_injection_string<'a>( + &'a self, + string: &str, + ) -> Option<(Language, &'a PropertySheet)> { + match self.language_configuration_for_injection_string(string) { + Err(message) => { + eprintln!( + "Failed to load language for injection string '{}': {}", + string, message.0 + ); + None + } + Ok(None) => None, + Ok(Some((language, configuration))) => { + match configuration.highlight_property_sheet(language) { + Err(message) => { + eprintln!( + "Failed to load property sheet for injection string '{}': {}", + string, message.0 + ); + None + } + Ok(None) => None, + Ok(Some(sheet)) => Some((language, sheet)), + } + } + } + } +} + +impl LanguageConfiguration { + pub fn highlight_property_sheet( + &self, + language: Language, + ) -> Result>> { + self.highlight_property_sheet + .get_or_try_init(|| { + if let Some(path) = &self.highlight_property_sheet_path { + let sheet_json = fs::read_to_string(path)?; + let sheet = load_property_sheet(language, &sheet_json)?; + Ok(Some(sheet)) + } else { + Ok(None) + } + }) + .map(Option::as_ref) + } +} + fn needs_recompile( lib_path: &Path, parser_c_path: &Path, diff --git a/cli/src/main.rs b/cli/src/main.rs index eb848831..9cd4e131 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -4,8 +4,7 @@ use std::fs; use std::path::Path; use std::process::exit; use std::usize; -use tree_sitter_cli::loader::Loader; -use tree_sitter_cli::{error, generate, logger, parse, properties, test}; +use tree_sitter_cli::{error, generate, highlight, loader, logger, parse, properties, test}; fn main() { if let Err(e) = run() { @@ -64,14 +63,30 @@ fn run() -> error::Result<()> { .arg(Arg::with_name("debug").long("debug").short("d")) .arg(Arg::with_name("debug-graph").long("debug-graph").short("D")), ) + .subcommand( + SubCommand::with_name("highlight") + .about("Highlight a file") + .arg( + Arg::with_name("path") + .index(1) + .multiple(true) + .required(true), + ) + .arg(Arg::with_name("html").long("html").short("h")), + ) .get_matches(); let home_dir = dirs::home_dir().unwrap(); let current_dir = env::current_dir().unwrap(); let config_dir = home_dir.join(".tree-sitter"); + let theme_path = config_dir.join("theme.json"); + let parsers_dir = config_dir.join("parsers"); - fs::create_dir_all(&config_dir).unwrap(); - let mut loader = Loader::new(config_dir); + // TODO - make configurable + let parser_repo_paths = vec![home_dir.join("github")]; + + fs::create_dir_all(&parsers_dir).unwrap(); + let mut loader = loader::Loader::new(config_dir); if let Some(matches) = matches.subcommand_matches("generate") { if matches.is_present("log") { @@ -111,7 +126,7 @@ fn run() -> error::Result<()> { let debug_graph = matches.is_present("debug-graph"); let quiet = matches.is_present("quiet"); let time = matches.is_present("time"); - loader.find_all_languages(&vec![home_dir.join("github")])?; + loader.find_all_languages(&parser_repo_paths)?; let paths = matches .values_of("path") .unwrap() @@ -144,6 +159,29 @@ fn run() -> error::Result<()> { if has_error { return Err(error::Error(String::new())); } + } else if let Some(matches) = matches.subcommand_matches("highlight") { + loader.find_all_languages(&parser_repo_paths)?; + let theme = highlight::Theme::load(&theme_path).unwrap_or_default(); + let paths = matches.values_of("path").unwrap().into_iter(); + let html_mode = matches.is_present("html"); + + if html_mode { + println!("{}", highlight::HTML_HEADER); + } + + for path in paths { + let path = Path::new(path); + if let Some((language, config)) = loader.language_configuration_for_file_name(path)? { + if let Some(sheet) = config.highlight_property_sheet(language)? { + let source = fs::read(path)?; + if html_mode { + highlight::html(&loader, &theme, &source, language, sheet)?; + } else { + highlight::ansi(&loader, &theme, &source, language, sheet)?; + } + } + } + } } Ok(()) diff --git a/cli/src/tests/helpers/fixtures.rs b/cli/src/tests/helpers/fixtures.rs index 8fc00038..e7ba2e55 100644 --- a/cli/src/tests/helpers/fixtures.rs +++ b/cli/src/tests/helpers/fixtures.rs @@ -2,7 +2,8 @@ use crate::loader::Loader; use lazy_static::lazy_static; use std::fs; use std::path::{Path, PathBuf}; -use tree_sitter::Language; +use tree_sitter::{Language, PropertySheet}; +use tree_sitter_highlight::{load_property_sheet, Properties}; include!("./dirs.rs"); @@ -20,6 +21,16 @@ pub fn get_language(name: &str) -> Language { .unwrap() } +pub fn get_property_sheet(language_name: &str, sheet_name: &str) -> PropertySheet { + let path = GRAMMARS_DIR + .join(language_name) + .join("src") + .join(sheet_name); + let json = fs::read_to_string(path).unwrap(); + let language = get_language(language_name); + load_property_sheet(language, &json).unwrap() +} + pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> Language { let parser_c_path = SCRATCH_DIR.join(&format!("{}-parser.c", name)); if !fs::read_to_string(&parser_c_path) diff --git a/cli/src/tests/highlight_test.rs b/cli/src/tests/highlight_test.rs new file mode 100644 index 00000000..ea14a1c2 --- /dev/null +++ b/cli/src/tests/highlight_test.rs @@ -0,0 +1,191 @@ +use super::helpers::fixtures::{get_language, get_property_sheet}; +use lazy_static::lazy_static; +use tree_sitter::{Language, PropertySheet}; +use tree_sitter_highlight::{ + highlight, highlight_html, HighlightEvent, LanguageRegistry, Properties, Scope, +}; + +lazy_static! { + static ref JS_SHEET: PropertySheet = + get_property_sheet("javascript", "highlights.json"); + static ref HTML_SHEET: PropertySheet = + get_property_sheet("html", "highlights.json"); + static ref SCOPE_CLASS_STRINGS: Vec = { + let mut result = Vec::new(); + let mut i = 0; + while let Some(scope) = Scope::from_usize(i) { + result.push(format!("class={:?}", scope)); + i += 1; + } + result + }; +} + +#[test] +fn test_highlighting_injected_html_in_javascript() { + let source = vec!["const s = html `
${a < b}
`;"].join("\n"); + + assert_eq!( + &to_token_vector(&source, get_language("javascript"), &JS_SHEET).unwrap(), + &[vec![ + ("const", vec![Scope::Keyword]), + (" ", vec![]), + ("s", vec![Scope::Variable]), + (" ", vec![]), + ("=", vec![Scope::Operator]), + (" ", vec![]), + ("html", vec![Scope::Function]), + (" ", vec![]), + ("`<", vec![Scope::String]), + ("div", vec![Scope::String, Scope::Tag]), + (">", vec![Scope::String]), + ( + "${", + vec![Scope::String, Scope::Embedded, Scope::PunctuationSpecial] + ), + ("a", vec![Scope::String, Scope::Embedded, Scope::Variable]), + (" ", vec![Scope::String, Scope::Embedded]), + ("<", vec![Scope::String, Scope::Embedded, Scope::Operator]), + (" ", vec![Scope::String, Scope::Embedded]), + ("b", vec![Scope::String, Scope::Embedded, Scope::Variable]), + ( + "}", + vec![Scope::String, Scope::Embedded, Scope::PunctuationSpecial] + ), + ("`", vec![Scope::String]), + (";", vec![Scope::PunctuationDelimiter]), + ]] + ); +} + +#[test] +fn test_highlighting_injected_javascript_in_html() { + let source = vec![ + "", + " ", + "", + ] + .join("\n"); + + assert_eq!( + &to_token_vector(&source, get_language("html"), &HTML_SHEET).unwrap(), + &[ + vec![("<", vec![]), ("body", vec![Scope::Tag]), (">", vec![]),], + vec![(" <", vec![]), ("script", vec![Scope::Tag]), (">", vec![]),], + vec![ + (" ", vec![]), + ("const", vec![Scope::Keyword]), + (" ", vec![]), + ("x", vec![Scope::Variable]), + (" ", vec![]), + ("=", vec![Scope::Operator]), + (" ", vec![]), + ("new", vec![Scope::Keyword]), + (" ", vec![]), + ("Thing", vec![Scope::Constructor]), + ("(", vec![Scope::PunctuationBracket]), + (")", vec![Scope::PunctuationBracket]), + (";", vec![Scope::PunctuationDelimiter]), + ], + vec![ + (" ", vec![]), + ], + vec![("", vec![]),], + ] + ); +} + +#[test] +fn test_highlighting_multiline_scopes_to_html() { + let source = vec![ + "const SOMETHING = `", + " one ${", + " two()", + " } three", + "`", + ] + .join("\n"); + + assert_eq!( + &to_html(&source, get_language("javascript"), &JS_SHEET,).unwrap(), + &[ + "const SOMETHING = `\n".to_string(), + " one ${\n".to_string(), + " two()\n".to_string(), + " } three\n".to_string(), + "`\n".to_string(), + ] + ); +} + +struct TestLanguageRegistry; + +impl LanguageRegistry for TestLanguageRegistry { + fn language_for_injection_string( + &self, + string: &str, + ) -> Option<(Language, &PropertySheet)> { + match string { + "javascript" => Some((get_language("javascript"), &JS_SHEET)), + "html" => Some((get_language("html"), &HTML_SHEET)), + _ => None, + } + } +} + +fn to_html<'a>( + src: &'a str, + language: Language, + property_sheet: &'a PropertySheet, +) -> Result, String> { + highlight_html( + &TestLanguageRegistry, + src.as_bytes(), + language, + property_sheet, + |scope| SCOPE_CLASS_STRINGS[scope as usize].as_str(), + ) +} + +fn to_token_vector<'a>( + src: &'a str, + language: Language, + property_sheet: &'a PropertySheet, +) -> Result)>>, String> { + let mut lines = Vec::new(); + let mut scopes = Vec::new(); + let mut line = Vec::new(); + for event in highlight( + &TestLanguageRegistry, + src.as_bytes(), + language, + property_sheet, + )? { + match event { + HighlightEvent::ScopeStart(s) => scopes.push(s), + HighlightEvent::ScopeEnd(s) => { + assert_eq!(*scopes.last().unwrap(), s); + scopes.pop(); + } + HighlightEvent::Source(s) => { + for (i, l) in s.lines().enumerate() { + if i > 0 { + lines.push(line); + line = Vec::new(); + } + if l.len() > 0 { + line.push((l, scopes.clone())); + } + } + } + } + } + lines.push(line); + Ok(lines) +} diff --git a/cli/src/tests/mod.rs b/cli/src/tests/mod.rs index af2b4582..143e8297 100644 --- a/cli/src/tests/mod.rs +++ b/cli/src/tests/mod.rs @@ -1,5 +1,6 @@ mod corpus_test; mod helpers; +mod highlight_test; mod node_test; mod parser_test; mod properties_test; diff --git a/cli/vendor/xterm-colors.json b/cli/vendor/xterm-colors.json new file mode 100644 index 00000000..47994496 --- /dev/null +++ b/cli/vendor/xterm-colors.json @@ -0,0 +1,258 @@ +[ + "#000000", + "#800000", + "#008000", + "#808000", + "#000080", + "#800080", + "#008080", + "#c0c0c0", + "#808080", + "#ff0000", + "#00ff00", + "#ffff00", + "#0000ff", + "#ff00ff", + "#00ffff", + "#ffffff", + "#000000", + "#00005f", + "#000087", + "#0000af", + "#0000d7", + "#0000ff", + "#005f00", + "#005f5f", + "#005f87", + "#005faf", + "#005fd7", + "#005fff", + "#008700", + "#00875f", + "#008787", + "#0087af", + "#0087d7", + "#0087ff", + "#00af00", + "#00af5f", + "#00af87", + "#00afaf", + "#00afd7", + "#00afff", + "#00d700", + "#00d75f", + "#00d787", + "#00d7af", + "#00d7d7", + "#00d7ff", + "#00ff00", + "#00ff5f", + "#00ff87", + "#00ffaf", + "#00ffd7", + "#00ffff", + "#5f0000", + "#5f005f", + "#5f0087", + "#5f00af", + "#5f00d7", + "#5f00ff", + "#5f5f00", + "#5f5f5f", + "#5f5f87", + "#5f5faf", + "#5f5fd7", + "#5f5fff", + "#5f8700", + "#5f875f", + "#5f8787", + "#5f87af", + "#5f87d7", + "#5f87ff", + "#5faf00", + "#5faf5f", + "#5faf87", + "#5fafaf", + "#5fafd7", + "#5fafff", + "#5fd700", + "#5fd75f", + "#5fd787", + "#5fd7af", + "#5fd7d7", + "#5fd7ff", + "#5fff00", + "#5fff5f", + "#5fff87", + "#5fffaf", + "#5fffd7", + "#5fffff", + "#870000", + "#87005f", + "#870087", + "#8700af", + "#8700d7", + "#8700ff", + "#875f00", + "#875f5f", + "#875f87", + "#875faf", + "#875fd7", + "#875fff", + "#878700", + "#87875f", + "#878787", + "#8787af", + "#8787d7", + "#8787ff", + "#87af00", + "#87af5f", + "#87af87", + "#87afaf", + "#87afd7", + "#87afff", + "#87d700", + "#87d75f", + "#87d787", + "#87d7af", + "#87d7d7", + "#87d7ff", + "#87ff00", + "#87ff5f", + "#87ff87", + "#87ffaf", + "#87ffd7", + "#87ffff", + "#af0000", + "#af005f", + "#af0087", + "#af00af", + "#af00d7", + "#af00ff", + "#af5f00", + "#af5f5f", + "#af5f87", + "#af5faf", + "#af5fd7", + "#af5fff", + "#af8700", + "#af875f", + "#af8787", + "#af87af", + "#af87d7", + "#af87ff", + "#afaf00", + "#afaf5f", + "#afaf87", + "#afafaf", + "#afafd7", + "#afafff", + "#afd700", + "#afd75f", + "#afd787", + "#afd7af", + "#afd7d7", + "#afd7ff", + "#afff00", + "#afff5f", + "#afff87", + "#afffaf", + "#afffd7", + "#afffff", + "#d70000", + "#d7005f", + "#d70087", + "#d700af", + "#d700d7", + "#d700ff", + "#d75f00", + "#d75f5f", + "#d75f87", + "#d75faf", + "#d75fd7", + "#d75fff", + "#d78700", + "#d7875f", + "#d78787", + "#d787af", + "#d787d7", + "#d787ff", + "#d7af00", + "#d7af5f", + "#d7af87", + "#d7afaf", + "#d7afd7", + "#d7afff", + "#d7d700", + "#d7d75f", + "#d7d787", + "#d7d7af", + "#d7d7d7", + "#d7d7ff", + "#d7ff00", + "#d7ff5f", + "#d7ff87", + "#d7ffaf", + "#d7ffd7", + "#d7ffff", + "#ff0000", + "#ff005f", + "#ff0087", + "#ff00af", + "#ff00d7", + "#ff00ff", + "#ff5f00", + "#ff5f5f", + "#ff5f87", + "#ff5faf", + "#ff5fd7", + "#ff5fff", + "#ff8700", + "#ff875f", + "#ff8787", + "#ff87af", + "#ff87d7", + "#ff87ff", + "#ffaf00", + "#ffaf5f", + "#ffaf87", + "#ffafaf", + "#ffafd7", + "#ffafff", + "#ffd700", + "#ffd75f", + "#ffd787", + "#ffd7af", + "#ffd7d7", + "#ffd7ff", + "#ffff00", + "#ffff5f", + "#ffff87", + "#ffffaf", + "#ffffd7", + "#ffffff", + "#080808", + "#121212", + "#1c1c1c", + "#262626", + "#303030", + "#3a3a3a", + "#444444", + "#4e4e4e", + "#585858", + "#626262", + "#6c6c6c", + "#767676", + "#808080", + "#8a8a8a", + "#949494", + "#9e9e9e", + "#a8a8a8", + "#b2b2b2", + "#bcbcbc", + "#c6c6c6", + "#d0d0d0", + "#dadada", + "#e4e4e4", + "#eeeeee" +] diff --git a/highlight/Cargo.toml b/highlight/Cargo.toml new file mode 100644 index 00000000..dd33add2 --- /dev/null +++ b/highlight/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "tree-sitter-highlight" +description = "Library for performing syntax highlighting with Tree-sitter" +version = "0.1.0" +authors = [ + "Max Brunsfeld ", + "Tim Clem " +] +license = "MIT" +readme = "README.md" +edition = "2018" +keywords = ["incremental", "parsing", "syntax", "highlighting"] +categories = ["parsing", "text-editors"] + +[dependencies] +regex = "1" +serde = "1.0" +serde_json = "1.0" +serde_derive = "1.0" + +[dependencies.tree-sitter] +version = ">= 0.3.7" +path = "../lib" diff --git a/highlight/src/escape.rs b/highlight/src/escape.rs new file mode 100644 index 00000000..882f160c --- /dev/null +++ b/highlight/src/escape.rs @@ -0,0 +1,53 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! HTML Escaping +//! +//! This module contains one unit-struct which can be used to HTML-escape a +//! string of text (for use in a format string). + +use std::fmt; + +/// Wrapper struct which will emit the HTML-escaped version of the contained +/// string when passed to a format string. +pub struct Escape<'a>(pub &'a str); + +impl<'a> fmt::Display for Escape<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + // Because the internet is always right, turns out there's not that many + // characters to escape: http://stackoverflow.com/questions/7381974 + let Escape(s) = *self; + let pile_o_bits = s; + let mut last = 0; + for (i, ch) in s.bytes().enumerate() { + match ch as char { + '<' | '>' | '&' | '\'' | '"' => { + fmt.write_str(&pile_o_bits[last..i])?; + let s = match ch as char { + '>' => ">", + '<' => "<", + '&' => "&", + '\'' => "'", + '"' => """, + _ => unreachable!(), + }; + fmt.write_str(s)?; + last = i + 1; + } + _ => {} + } + } + + if last < s.len() { + fmt.write_str(&pile_o_bits[last..])?; + } + Ok(()) + } +} diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs new file mode 100644 index 00000000..bdf35b9f --- /dev/null +++ b/highlight/src/lib.rs @@ -0,0 +1,823 @@ +mod escape; + +use serde::{Deserialize, Deserializer}; +use serde_derive::*; +use std::cmp; +use std::fmt::Write; +use std::mem::transmute; +use std::str; +use std::usize; +use tree_sitter::{Language, Node, Parser, Point, PropertySheet, Range, Tree, TreePropertyCursor}; + +pub trait LanguageRegistry { + fn language_for_injection_string<'a>( + &'a self, + s: &str, + ) -> Option<(Language, &'a PropertySheet)>; +} + +#[derive(Debug)] +enum TreeStep { + Child { + index: isize, + kinds: Option>, + }, + Children { + kinds: Option>, + }, + Next { + kinds: Option>, + }, +} + +#[derive(Debug)] +enum InjectionLanguage { + Literal(String), + TreePath(Vec), +} + +#[derive(Debug)] +struct Injection { + language: InjectionLanguage, + content: Vec, +} + +#[derive(Debug)] +pub struct Properties { + scope: Option, + injections: Vec, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[repr(u16)] +pub enum Scope { + Attribute, + Comment, + Constant, + ConstantBuiltin, + Constructor, + ConstructorBuiltin, + Embedded, + Escape, + Function, + FunctionBuiltin, + Keyword, + Number, + Operator, + Property, + PropertyBuiltin, + Punctuation, + PunctuationBracket, + PunctuationDelimiter, + PunctuationSpecial, + String, + StringSpecial, + Tag, + Type, + TypeBuiltin, + Variable, + VariableBuiltin, + Unknown, +} + +struct Layer<'a> { + _tree: Tree, + cursor: TreePropertyCursor<'a, Properties>, + ranges: Vec, + at_node_end: bool, +} + +struct Highlighter<'a, T: LanguageRegistry> { + language_registry: &'a T, + source: &'a [u8], + source_offset: usize, + parser: Parser, + layers: Vec>, + utf8_error_len: Option, +} + +#[derive(Copy, Clone, Debug)] +pub enum HighlightEvent<'a> { + Source(&'a str), + ScopeStart(Scope), + ScopeEnd(Scope), +} + +#[derive(Debug, Deserialize)] +#[serde(untagged)] +enum TreePathArgJSON { + TreePath(TreePathJSON), + Number(isize), + String(String), +} + +#[derive(Debug, Deserialize)] +#[serde(tag = "name")] +enum TreePathJSON { + #[serde(rename = "this")] + This, + #[serde(rename = "child")] + Child { args: Vec }, + #[serde(rename = "next")] + Next { args: Vec }, + #[serde(rename = "children")] + Children { args: Vec }, +} + +#[derive(Debug, Deserialize)] +#[serde(untagged)] +enum InjectionLanguageJSON { + List(Vec), + TreePath(TreePathJSON), + Literal(String), +} + +#[derive(Debug, Deserialize)] +#[serde(untagged)] +enum InjectionContentJSON { + List(Vec), + TreePath(TreePathJSON), +} + +#[derive(Debug, Deserialize)] +struct PropertiesJSON { + scope: Option, + #[serde(rename = "injection-language")] + injection_language: Option, + #[serde(rename = "injection-content")] + injection_content: Option, +} + +#[derive(Debug)] +pub enum PropertySheetError { + InvalidJSON(serde_json::Error), + InvalidRegex(regex::Error), + InvalidFormat(String), +} + +pub fn load_property_sheet( + language: Language, + json: &str, +) -> Result, PropertySheetError> { + let sheet = PropertySheet::new(language, json).map_err(|e| match e { + tree_sitter::PropertySheetError::InvalidJSON(e) => PropertySheetError::InvalidJSON(e), + tree_sitter::PropertySheetError::InvalidRegex(e) => PropertySheetError::InvalidRegex(e), + })?; + let sheet = sheet + .map(|p| Properties::new(p, language)) + .map_err(PropertySheetError::InvalidFormat)?; + Ok(sheet) +} + +impl Scope { + pub fn from_usize(i: usize) -> Option { + if i <= (Scope::Unknown as usize) { + Some(unsafe { transmute(i as u16) }) + } else { + None + } + } +} + +impl Properties { + fn new(json: PropertiesJSON, language: Language) -> Result { + let injections = match (json.injection_language, json.injection_content) { + (None, None) => Ok(Vec::new()), + (Some(_), None) => Err( + "Must specify an injection-content along with an injection-language".to_string(), + ), + (None, Some(_)) => Err( + "Must specify an injection-language along with an injection-content".to_string(), + ), + (Some(language_json), Some(content_json)) => { + let languages = match language_json { + InjectionLanguageJSON::List(list) => { + let mut result = Vec::with_capacity(list.len()); + for element in list { + result.push(match element { + InjectionLanguageJSON::TreePath(p) => { + let mut result = Vec::new(); + Self::flatten_tree_path(p, &mut result, language)?; + InjectionLanguage::TreePath(result) + } + InjectionLanguageJSON::Literal(s) => InjectionLanguage::Literal(s), + InjectionLanguageJSON::List(_) => { + panic!("Injection-language cannot be a list of lists") + } + }) + } + result + } + InjectionLanguageJSON::TreePath(p) => vec![{ + let mut result = Vec::new(); + Self::flatten_tree_path(p, &mut result, language)?; + InjectionLanguage::TreePath(result) + }], + InjectionLanguageJSON::Literal(s) => vec![InjectionLanguage::Literal(s)], + }; + + let contents = match content_json { + InjectionContentJSON::List(l) => { + let mut result = Vec::with_capacity(l.len()); + for element in l { + result.push(match element { + InjectionContentJSON::TreePath(p) => { + let mut result = Vec::new(); + Self::flatten_tree_path(p, &mut result, language)?; + result + } + InjectionContentJSON::List(_) => { + panic!("Injection-content cannot be a list of lists") + } + }) + } + result + } + InjectionContentJSON::TreePath(p) => vec![{ + let mut result = Vec::new(); + Self::flatten_tree_path(p, &mut result, language)?; + result + }], + }; + + if languages.len() == contents.len() { + Ok(languages + .into_iter() + .zip(contents.into_iter()) + .map(|(language, content)| Injection { language, content }) + .collect()) + } else { + Err(format!( + "Mismatch: got {} injection-language values but {} injection-content values", + languages.len(), + contents.len(), + )) + } + } + }?; + + Ok(Self { + scope: json.scope, + injections, + }) + } + + // Transform a tree path from the format expressed directly in the property sheet + // (nested function calls), to a flat sequence of steps for transforming a list of + // nodes. This way, we can evaluate these tree paths with no recursion and a single + // vector of intermediate storage. + fn flatten_tree_path( + p: TreePathJSON, + steps: &mut Vec, + language: Language, + ) -> Result<(), String> { + match p { + TreePathJSON::This => {} + TreePathJSON::Child { args } => { + let (tree_path, index, kinds) = Self::parse_args("child", args, language)?; + Self::flatten_tree_path(tree_path, steps, language)?; + steps.push(TreeStep::Child { + index: index + .ok_or_else(|| "The `child` function requires an index".to_string())?, + kinds: kinds, + }); + } + TreePathJSON::Children { args } => { + let (tree_path, _, kinds) = Self::parse_args("children", args, language)?; + Self::flatten_tree_path(tree_path, steps, language)?; + steps.push(TreeStep::Children { kinds }); + } + TreePathJSON::Next { args } => { + let (tree_path, _, kinds) = Self::parse_args("next", args, language)?; + Self::flatten_tree_path(tree_path, steps, language)?; + steps.push(TreeStep::Next { kinds }); + } + } + Ok(()) + } + + fn parse_args( + name: &str, + args: Vec, + language: Language, + ) -> Result<(TreePathJSON, Option, Option>), String> { + let tree_path; + let mut index = None; + let mut kinds = Vec::new(); + let mut iter = args.into_iter(); + + match iter.next() { + Some(TreePathArgJSON::TreePath(p)) => tree_path = p, + _ => { + return Err(format!( + "First argument to `{}()` must be a tree path", + name + )); + } + } + + for arg in iter { + match arg { + TreePathArgJSON::TreePath(_) => { + return Err(format!( + "Other arguments to `{}()` must be strings or numbers", + name + )); + } + TreePathArgJSON::Number(i) => index = Some(i), + TreePathArgJSON::String(s) => kinds.push(s), + } + } + + if kinds.len() > 0 { + let mut kind_ids = Vec::new(); + for i in 0..(language.node_kind_count() as u16) { + if kinds.iter().any(|s| s == language.node_kind_for_id(i)) + && language.node_kind_is_named(i) + { + kind_ids.push(i); + } + } + if kind_ids.len() == 0 { + return Err(format!("Non-existent node kinds: {:?}", kinds)); + } + + Ok((tree_path, index, Some(kind_ids))) + } else { + Ok((tree_path, index, None)) + } + } +} + +impl<'a, T: LanguageRegistry> Highlighter<'a, T> { + fn new( + language_registry: &'a T, + source: &'a [u8], + language: Language, + property_sheet: &'a PropertySheet, + ) -> Result { + let mut parser = Parser::new(); + parser.set_language(language)?; + let tree = parser + .parse(source, None) + .ok_or_else(|| format!("Tree-sitter: failed to parse"))?; + Ok(Self { + language_registry, + source, + source_offset: 0, + parser, + layers: vec![Layer::new( + source, + tree, + property_sheet, + vec![Range { + start_byte: 0, + end_byte: usize::MAX, + start_point: Point::new(0, 0), + end_point: Point::new(usize::MAX, usize::MAX), + }], + )], + utf8_error_len: None, + }) + } + + fn emit_source(&mut self, next_offset: usize) -> Option> { + let input = &self.source[self.source_offset..next_offset]; + match str::from_utf8(input) { + Ok(valid) => { + self.source_offset = next_offset; + Some(HighlightEvent::Source(valid)) + } + Err(error) => { + if let Some(error_len) = error.error_len() { + if error.valid_up_to() > 0 { + let prefix = &input[0..error.valid_up_to()]; + self.utf8_error_len = Some(error_len); + Some(HighlightEvent::Source(unsafe { + str::from_utf8_unchecked(prefix) + })) + } else { + self.source_offset += error_len; + Some(HighlightEvent::Source("\u{FFFD}")) + } + } else { + None + } + } + } + } + + fn process_tree_step(&self, step: &TreeStep, nodes: &mut Vec) { + let len = nodes.len(); + for i in 0..len { + let node = nodes[i]; + match step { + TreeStep::Child { index, kinds } => { + let index = if *index >= 0 { + *index as usize + } else { + (node.child_count() as isize + *index) as usize + }; + if let Some(child) = node.child(index) { + if let Some(kinds) = kinds { + if kinds.contains(&child.kind_id()) { + nodes.push(child); + } + } else { + nodes.push(child); + } + } + } + TreeStep::Children { kinds } => { + for child in node.children() { + if let Some(kinds) = kinds { + if kinds.contains(&child.kind_id()) { + nodes.push(child); + } + } else { + nodes.push(child); + } + } + } + TreeStep::Next { .. } => unimplemented!(), + } + } + nodes.drain(0..len); + } + + fn nodes_for_tree_path(&self, node: Node<'a>, steps: &Vec) -> Vec> { + let mut nodes = vec![node]; + for step in steps.iter() { + self.process_tree_step(step, &mut nodes); + } + nodes + } + + // An injected language name may either be specified as a fixed string, or based + // on the text of some node in the syntax tree. + fn injection_language_string( + &self, + node: &Node, + language: &InjectionLanguage, + ) -> Option { + match language { + InjectionLanguage::Literal(s) => Some(s.to_string()), + InjectionLanguage::TreePath(steps) => self + .nodes_for_tree_path(*node, steps) + .first() + .and_then(|node| { + str::from_utf8(&self.source[node.start_byte()..node.end_byte()]) + .map(|s| s.to_owned()) + .ok() + }), + } + } + + // Compute the ranges that should be included when parsing an injection. + // This takes into account two things: + // * `nodes` - Every injection takes place within a set of nodes. The injection ranges + // are the ranges of those nodes, *minus* the ranges of those nodes' children. + // * `parent_ranges` - The new injection may be nested inside of *another* injection + // (e.g. JavaScript within HTML within ERB). The parent injection's ranges must + // be taken into account. + fn intersect_ranges(parent_ranges: &Vec, nodes: &Vec) -> Vec { + let mut result = Vec::new(); + let mut parent_range_iter = parent_ranges.iter(); + let mut parent_range = parent_range_iter + .next() + .expect("Layers should only be constructed with non-empty ranges vectors"); + for node in nodes.iter() { + let range = node.range(); + let mut preceding_range = Range { + start_byte: 0, + start_point: Point::new(0, 0), + end_byte: range.start_byte, + end_point: range.start_point, + }; + let following_range = Range { + start_byte: node.end_byte(), + start_point: node.end_position(), + end_byte: usize::MAX, + end_point: Point::new(usize::MAX, usize::MAX), + }; + + for child_range in node + .children() + .map(|c| c.range()) + .chain([following_range].iter().cloned()) + { + let mut range = Range { + start_byte: preceding_range.end_byte, + start_point: preceding_range.end_point, + end_byte: child_range.start_byte, + end_point: child_range.start_point, + }; + preceding_range = child_range; + + if range.end_byte < parent_range.start_byte { + continue; + } + + while parent_range.start_byte <= range.end_byte { + if parent_range.end_byte > range.start_byte { + if range.start_byte < parent_range.start_byte { + range.start_byte = parent_range.start_byte; + range.start_point = parent_range.start_point; + } + + if parent_range.end_byte < range.end_byte { + if range.start_byte < parent_range.end_byte { + result.push(Range { + start_byte: range.start_byte, + start_point: range.start_point, + end_byte: parent_range.end_byte, + end_point: parent_range.end_point, + }); + } + range.start_byte = parent_range.end_byte; + range.start_point = parent_range.end_point; + } else { + if range.start_byte < range.end_byte { + result.push(range); + } + break; + } + } + + if let Some(next_range) = parent_range_iter.next() { + parent_range = next_range; + } else { + return result; + } + } + } + } + result + } + + fn add_layer(&mut self, language_string: &str, ranges: Vec) { + if let Some((language, property_sheet)) = self + .language_registry + .language_for_injection_string(language_string) + { + self.parser + .set_language(language) + .expect("Failed to set language"); + self.parser.set_included_ranges(&ranges); + let tree = self + .parser + .parse(self.source, None) + .expect("Failed to parse"); + let layer = Layer::new(self.source, tree, property_sheet, ranges); + match self + .layers + .binary_search_by_key(&(layer.offset(), 1), |l| (l.offset(), 0)) + { + Ok(i) | Err(i) => self.layers.insert(i, layer), + }; + } + } +} + +impl<'a, T: LanguageRegistry> Iterator for Highlighter<'a, T> { + type Item = HighlightEvent<'a>; + + fn next(&mut self) -> Option { + if let Some(utf8_error_len) = self.utf8_error_len.take() { + self.source_offset += utf8_error_len; + return Some(HighlightEvent::Source("\u{FFFD}")); + } + + while !self.layers.is_empty() { + let first_layer = &self.layers[0]; + let properties = &first_layer.cursor.node_properties(); + + // Add any injections for the current node. + if !first_layer.at_node_end { + let node = first_layer.cursor.node(); + let injections = properties + .injections + .iter() + .filter_map(|Injection { language, content }| { + if let Some(language) = self.injection_language_string(&node, language) { + let nodes = self.nodes_for_tree_path(node, content); + let ranges = Self::intersect_ranges(&first_layer.ranges, &nodes); + if ranges.len() > 0 { + return Some((language, ranges)); + } + } + None + }) + .collect::>(); + + for (language, ranges) in injections { + self.add_layer(&language, ranges); + } + } + + // Determine if any scopes start or end at the current position. + let scope_event; + if let Some(scope) = properties.scope { + let next_offset = cmp::min(self.source.len(), self.layers[0].offset()); + + // Before returning any scope boundaries, return any remaining slice of + // the source code the precedes that scope boundary. + if self.source_offset < next_offset { + return self.emit_source(next_offset); + } + + scope_event = if self.layers[0].at_node_end { + Some(HighlightEvent::ScopeEnd(scope)) + } else { + Some(HighlightEvent::ScopeStart(scope)) + }; + } else { + scope_event = None; + }; + + // Advance the current layer's tree cursor. This might cause that cursor to move + // beyond one of the other layers' cursors for a different syntax tree, so we need + // to re-sort the layers. If the cursor is already at the end of its syntax tree, + // remove it. + if self.layers[0].advance() { + self.layers.sort_unstable_by_key(|layer| layer.offset()); + } else { + self.layers.remove(0); + } + + if scope_event.is_some() { + return scope_event; + } + } + + if self.source_offset < self.source.len() { + self.emit_source(self.source.len()) + } else { + None + } + } +} + +impl<'a> Layer<'a> { + fn new( + source: &'a [u8], + tree: Tree, + sheet: &'a PropertySheet, + ranges: Vec, + ) -> Self { + // The cursor's lifetime parameter indicates that the tree must outlive the cursor. + // But because the tree is really a pointer to the heap, the cursor can remain + // valid when the tree is moved. There's no way to express this with lifetimes + // right now, so we have to `transmute` the cursor's lifetime. + let cursor = unsafe { transmute(tree.walk_with_properties(sheet, source)) }; + Self { + _tree: tree, + cursor, + ranges, + at_node_end: false, + } + } + + fn offset(&self) -> usize { + if self.at_node_end { + self.cursor.node().end_byte() + } else { + self.cursor.node().start_byte() + } + } + + fn advance(&mut self) -> bool { + if self.at_node_end { + if self.cursor.goto_next_sibling() { + self.at_node_end = false; + } else if !self.cursor.goto_parent() { + return false; + } + } else if !self.cursor.goto_first_child() { + self.at_node_end = true; + } + true + } +} + +impl<'de> Deserialize<'de> for Scope { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let s = String::deserialize(deserializer)?; + match s.as_str() { + "attribute" => Ok(Scope::Attribute), + "comment" => Ok(Scope::Comment), + "constant" => Ok(Scope::Constant), + "constant.builtin" => Ok(Scope::ConstantBuiltin), + "constructor" => Ok(Scope::Constructor), + "constructor.builtin" => Ok(Scope::ConstructorBuiltin), + "embedded" => Ok(Scope::Embedded), + "escape" => Ok(Scope::Escape), + "function" => Ok(Scope::Function), + "function.builtin" => Ok(Scope::FunctionBuiltin), + "keyword" => Ok(Scope::Keyword), + "number" => Ok(Scope::Number), + "operator" => Ok(Scope::Operator), + "property" => Ok(Scope::Property), + "property.builtin" => Ok(Scope::PropertyBuiltin), + "punctuation" => Ok(Scope::Punctuation), + "punctuation.bracket" => Ok(Scope::PunctuationBracket), + "punctuation.delimiter" => Ok(Scope::PunctuationDelimiter), + "punctuation.special" => Ok(Scope::PunctuationSpecial), + "string" => Ok(Scope::String), + "string.special" => Ok(Scope::StringSpecial), + "type" => Ok(Scope::Type), + "type.builtin" => Ok(Scope::TypeBuiltin), + "variable" => Ok(Scope::Variable), + "variable.builtin" => Ok(Scope::VariableBuiltin), + "tag" => Ok(Scope::Tag), + _ => Ok(Scope::Unknown), + } + } +} + +pub fn highlight<'a, T: LanguageRegistry>( + language_registry: &'a T, + source: &'a [u8], + language: Language, + property_sheet: &'a PropertySheet, +) -> Result> + 'a, String> { + Highlighter::new(language_registry, source, language, property_sheet) +} + +pub fn highlight_html<'a, T: LanguageRegistry, F: Fn(Scope) -> &'a str>( + language_registry: &'a T, + source: &'a [u8], + language: Language, + property_sheet: &'a PropertySheet, + attribute_callback: F, +) -> Result, String> { + let highlighter = Highlighter::new(language_registry, source, language, property_sheet)?; + let mut renderer = HtmlRenderer::new(attribute_callback); + let mut scopes = Vec::new(); + for event in highlighter { + match event { + HighlightEvent::ScopeStart(s) => { + scopes.push(s); + renderer.start_scope(s); + } + HighlightEvent::ScopeEnd(s) => { + assert_eq!(scopes.pop(), Some(s)); + renderer.end_scope(); + } + HighlightEvent::Source(src) => { + renderer.render_line(src, &scopes); + } + }; + } + renderer.flush(); + Ok(renderer.result) +} + +struct HtmlRenderer<'a, F: Fn(Scope) -> &'a str> { + result: Vec, + buffer: String, + attribute_callback: F, +} + +impl<'a, F: Fn(Scope) -> &'a str> HtmlRenderer<'a, F> { + fn new(attribute_callback: F) -> Self { + HtmlRenderer { + result: Vec::new(), + buffer: String::new(), + attribute_callback, + } + } + + fn start_scope(&mut self, s: Scope) { + write!(&mut self.buffer, "", (self.attribute_callback)(s),).unwrap(); + } + + fn end_scope(&mut self) { + write!(&mut self.buffer, "").unwrap(); + } + + fn flush(&mut self) { + if !self.buffer.is_empty() { + self.buffer.push('\n'); + self.result.push(self.buffer.clone()); + self.buffer.clear(); + } + } + + fn render_line(&mut self, src: &str, scopes: &Vec) { + let mut multiline = false; + for line in src.split('\n') { + let line = line.trim_end_matches('\r'); + if multiline { + scopes.iter().for_each(|_| self.end_scope()); + self.flush(); + scopes.iter().for_each(|scope| self.start_scope(*scope)); + } + write!(&mut self.buffer, "{}", escape::Escape(line)).unwrap(); + multiline = true; + } + } +} From a46515b80f18d9be80d0ae7351c6eebdd2a9b303 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 19 Feb 2019 17:07:12 -0800 Subject: [PATCH 2/8] Replace LanguageRegistry trait with a simple callback --- cli/src/highlight.rs | 55 ++++++++++++++++++++++----- cli/src/loader.rs | 33 +---------------- cli/src/tests/highlight_test.rs | 29 ++++++--------- highlight/src/lib.rs | 66 +++++++++++++++++++-------------- 4 files changed, 96 insertions(+), 87 deletions(-) diff --git a/cli/src/highlight.rs b/cli/src/highlight.rs index 1651b98d..0f88149a 100644 --- a/cli/src/highlight.rs +++ b/cli/src/highlight.rs @@ -4,7 +4,7 @@ use ansi_term::{Color, Style}; use lazy_static::lazy_static; use serde_json::Value; use std::collections::HashMap; -use std::{fmt, fs, io, mem, path}; +use std::{fmt, fs, io, path}; use tree_sitter::{Language, PropertySheet}; use tree_sitter_highlight::{highlight, highlight_html, HighlightEvent, Properties, Scope}; @@ -195,7 +195,9 @@ pub fn ansi( let stdout = io::stdout(); let mut stdout = stdout.lock(); let mut scope_stack = Vec::new(); - for event in highlight(loader, source, language, property_sheet)? { + for event in highlight(source, language, property_sheet, &|s| { + language_for_injection_string(loader, s) + })? { match event { HighlightEvent::Source(s) => { if let Some(style) = scope_stack.last().and_then(|s| theme.ansi_style(*s)) { @@ -252,13 +254,19 @@ pub fn html( let stdout = io::stdout(); let mut stdout = stdout.lock(); write!(&mut stdout, "\n")?; - let lines = highlight_html(loader, source, language, property_sheet, |scope| { - if let Some(css_style) = theme.css_style(scope) { - css_style - } else { - "" - } - })?; + let lines = highlight_html( + source, + language, + property_sheet, + &|s| language_for_injection_string(loader, s), + &|scope| { + if let Some(css_style) = theme.css_style(scope) { + css_style + } else { + "" + } + }, + )?; for (i, line) in lines.into_iter().enumerate() { write!( &mut stdout, @@ -270,3 +278,32 @@ pub fn html( write!(&mut stdout, "
\n")?; Ok(()) } + +fn language_for_injection_string<'a>( + loader: &'a Loader, + string: &str, +) -> Option<(Language, &'a PropertySheet)> { + match loader.language_configuration_for_injection_string(string) { + Err(message) => { + eprintln!( + "Failed to load language for injection string '{}': {}", + string, message.0 + ); + None + } + Ok(None) => None, + Ok(Some((language, configuration))) => { + match configuration.highlight_property_sheet(language) { + Err(message) => { + eprintln!( + "Failed to load property sheet for injection string '{}': {}", + string, message.0 + ); + None + } + Ok(None) => None, + Ok(Some(sheet)) => Some((language, sheet)), + } + } + } +} diff --git a/cli/src/loader.rs b/cli/src/loader.rs index d19acf46..49bab4b4 100644 --- a/cli/src/loader.rs +++ b/cli/src/loader.rs @@ -10,7 +10,7 @@ use std::process::Command; use std::time::SystemTime; use std::{fs, mem}; use tree_sitter::{Language, PropertySheet}; -use tree_sitter_highlight::{load_property_sheet, LanguageRegistry, Properties}; +use tree_sitter_highlight::{load_property_sheet, Properties}; #[cfg(unix)] const DYLIB_EXTENSION: &'static str = "so"; @@ -320,37 +320,6 @@ impl Loader { } } -impl LanguageRegistry for Loader { - fn language_for_injection_string<'a>( - &'a self, - string: &str, - ) -> Option<(Language, &'a PropertySheet)> { - match self.language_configuration_for_injection_string(string) { - Err(message) => { - eprintln!( - "Failed to load language for injection string '{}': {}", - string, message.0 - ); - None - } - Ok(None) => None, - Ok(Some((language, configuration))) => { - match configuration.highlight_property_sheet(language) { - Err(message) => { - eprintln!( - "Failed to load property sheet for injection string '{}': {}", - string, message.0 - ); - None - } - Ok(None) => None, - Ok(Some(sheet)) => Some((language, sheet)), - } - } - } - } -} - impl LanguageConfiguration { pub fn highlight_property_sheet( &self, diff --git a/cli/src/tests/highlight_test.rs b/cli/src/tests/highlight_test.rs index ea14a1c2..6e07ab4a 100644 --- a/cli/src/tests/highlight_test.rs +++ b/cli/src/tests/highlight_test.rs @@ -1,9 +1,7 @@ use super::helpers::fixtures::{get_language, get_property_sheet}; use lazy_static::lazy_static; use tree_sitter::{Language, PropertySheet}; -use tree_sitter_highlight::{ - highlight, highlight_html, HighlightEvent, LanguageRegistry, Properties, Scope, -}; +use tree_sitter_highlight::{highlight, highlight_html, HighlightEvent, Properties, Scope}; lazy_static! { static ref JS_SHEET: PropertySheet = @@ -124,18 +122,13 @@ fn test_highlighting_multiline_scopes_to_html() { ); } -struct TestLanguageRegistry; - -impl LanguageRegistry for TestLanguageRegistry { - fn language_for_injection_string( - &self, - string: &str, - ) -> Option<(Language, &PropertySheet)> { - match string { - "javascript" => Some((get_language("javascript"), &JS_SHEET)), - "html" => Some((get_language("html"), &HTML_SHEET)), - _ => None, - } +fn test_language_for_injection_string<'a>( + string: &str, +) -> Option<(Language, &'a PropertySheet)> { + match string { + "javascript" => Some((get_language("javascript"), &JS_SHEET)), + "html" => Some((get_language("html"), &HTML_SHEET)), + _ => None, } } @@ -145,11 +138,11 @@ fn to_html<'a>( property_sheet: &'a PropertySheet, ) -> Result, String> { highlight_html( - &TestLanguageRegistry, src.as_bytes(), language, property_sheet, - |scope| SCOPE_CLASS_STRINGS[scope as usize].as_str(), + &test_language_for_injection_string, + &|scope| SCOPE_CLASS_STRINGS[scope as usize].as_str(), ) } @@ -162,10 +155,10 @@ fn to_token_vector<'a>( let mut scopes = Vec::new(); let mut line = Vec::new(); for event in highlight( - &TestLanguageRegistry, src.as_bytes(), language, property_sheet, + &test_language_for_injection_string, )? { match event { HighlightEvent::ScopeStart(s) => scopes.push(s), diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs index bdf35b9f..453685f4 100644 --- a/highlight/src/lib.rs +++ b/highlight/src/lib.rs @@ -9,13 +9,6 @@ use std::str; use std::usize; use tree_sitter::{Language, Node, Parser, Point, PropertySheet, Range, Tree, TreePropertyCursor}; -pub trait LanguageRegistry { - fn language_for_injection_string<'a>( - &'a self, - s: &str, - ) -> Option<(Language, &'a PropertySheet)>; -} - #[derive(Debug)] enum TreeStep { Child { @@ -87,8 +80,11 @@ struct Layer<'a> { at_node_end: bool, } -struct Highlighter<'a, T: LanguageRegistry> { - language_registry: &'a T, +struct Highlighter<'a, T> +where + T: Fn(&str) -> Option<(Language, &'a PropertySheet)>, +{ + injection_callback: &'a T, source: &'a [u8], source_offset: usize, parser: Parser, @@ -349,12 +345,15 @@ impl Properties { } } -impl<'a, T: LanguageRegistry> Highlighter<'a, T> { +impl<'a, F> Highlighter<'a, F> +where + F: Fn(&str) -> Option<(Language, &'a PropertySheet)>, +{ fn new( - language_registry: &'a T, source: &'a [u8], language: Language, property_sheet: &'a PropertySheet, + injection_callback: &'a F, ) -> Result { let mut parser = Parser::new(); parser.set_language(language)?; @@ -362,7 +361,7 @@ impl<'a, T: LanguageRegistry> Highlighter<'a, T> { .parse(source, None) .ok_or_else(|| format!("Tree-sitter: failed to parse"))?; Ok(Self { - language_registry, + injection_callback, source, source_offset: 0, parser, @@ -457,7 +456,7 @@ impl<'a, T: LanguageRegistry> Highlighter<'a, T> { // on the text of some node in the syntax tree. fn injection_language_string( &self, - node: &Node, + node: &Node<'a>, language: &InjectionLanguage, ) -> Option { match language { @@ -556,10 +555,7 @@ impl<'a, T: LanguageRegistry> Highlighter<'a, T> { } fn add_layer(&mut self, language_string: &str, ranges: Vec) { - if let Some((language, property_sheet)) = self - .language_registry - .language_for_injection_string(language_string) - { + if let Some((language, property_sheet)) = (self.injection_callback)(language_string) { self.parser .set_language(language) .expect("Failed to set language"); @@ -579,7 +575,9 @@ impl<'a, T: LanguageRegistry> Highlighter<'a, T> { } } -impl<'a, T: LanguageRegistry> Iterator for Highlighter<'a, T> { +impl<'a, T: Fn(&str) -> Option<(Language, &'a PropertySheet)>> Iterator + for Highlighter<'a, T> +{ type Item = HighlightEvent<'a>; fn next(&mut self) -> Option { @@ -738,23 +736,32 @@ impl<'de> Deserialize<'de> for Scope { } } -pub fn highlight<'a, T: LanguageRegistry>( - language_registry: &'a T, +pub trait HTMLAttributeCallback<'a>: Fn(Scope) -> &'a str {} + +pub fn highlight<'a, F>( source: &'a [u8], language: Language, property_sheet: &'a PropertySheet, -) -> Result> + 'a, String> { - Highlighter::new(language_registry, source, language, property_sheet) + injection_callback: &'a F, +) -> Result> + 'a, String> +where + F: Fn(&str) -> Option<(Language, &'a PropertySheet)>, +{ + Highlighter::new(source, language, property_sheet, injection_callback) } -pub fn highlight_html<'a, T: LanguageRegistry, F: Fn(Scope) -> &'a str>( - language_registry: &'a T, +pub fn highlight_html<'a, F1, F2>( source: &'a [u8], language: Language, property_sheet: &'a PropertySheet, - attribute_callback: F, -) -> Result, String> { - let highlighter = Highlighter::new(language_registry, source, language, property_sheet)?; + injection_callback: &'a F1, + attribute_callback: &'a F2, +) -> Result, String> +where + F1: Fn(&str) -> Option<(Language, &'a PropertySheet)>, + F2: Fn(Scope) -> &'a str, +{ + let highlighter = Highlighter::new(source, language, property_sheet, injection_callback)?; let mut renderer = HtmlRenderer::new(attribute_callback); let mut scopes = Vec::new(); for event in highlighter { @@ -782,7 +789,10 @@ struct HtmlRenderer<'a, F: Fn(Scope) -> &'a str> { attribute_callback: F, } -impl<'a, F: Fn(Scope) -> &'a str> HtmlRenderer<'a, F> { +impl<'a, F> HtmlRenderer<'a, F> +where + F: Fn(Scope) -> &'a str, +{ fn new(attribute_callback: F) -> Self { HtmlRenderer { result: Vec::new(), From c20a330fa5e03dc6e8915972062cef07fbeff92f Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 19 Feb 2019 17:56:46 -0800 Subject: [PATCH 3/8] highlight: Add a README --- highlight/README.md | 58 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 highlight/README.md diff --git a/highlight/README.md b/highlight/README.md new file mode 100644 index 00000000..b6b311cc --- /dev/null +++ b/highlight/README.md @@ -0,0 +1,58 @@ +Tree-sitter Highlighting +========================= + +[![Build Status](https://travis-ci.org/tree-sitter/tree-sitter.svg?branch=master)](https://travis-ci.org/tree-sitter/tree-sitter) +[![Build status](https://ci.appveyor.com/api/projects/status/vtmbd6i92e97l55w/branch/master?svg=true)](https://ci.appveyor.com/project/maxbrunsfeld/tree-sitter/branch/master) +[![Crates.io](https://img.shields.io/crates/v/tree-sitter-highlight.svg)](https://crates.io/crates/tree-sitter-highlight) + +### Usage + +Compile some languages into your app, and declare them: + +```rust +extern "C" tree_sitter_html(); +extern "C" tree_sitter_javascript(); +``` + +Load some *property sheets*: + +```rust +use tree_sitter_highlight::load_property_sheet; + +let javascript_property_sheet = load_property_sheet( + fs::read_to_string("./tree-sitter-javascript/src/highlights.json").unwrap() +).unwrap(); + +let html_property_sheet = load_property_sheet( + fs::read_to_string("./tree-sitter-html/src/highlights.json").unwrap() +).unwrap(); +``` + +Highlight some code: + +```rust +use tree_sitter_highlight::{highlight, HighlightEvent}; + +let highlights = highlight( + b"const x = new Y();", + unsafe { tree_sitter_javascript() }, + &javascript_property_sheet, + &|_| None +).unwrap(); + +for event in highlights { + match event { + HighlightEvent::Source(s) { + eprintln!("source: {:?}", s); + }, + HighlightEvent::ScopeStart(s) { + eprintln!("scope started: {:?}", s); + }, + HighlightEvent::ScopeEnd(s) { + eprintln!("scope ended: {:?}", s); + }, + } +} +``` + +The last parameter to `highlight` is a *language injection* callback. This allows other languages to be retrieved when Tree-sitter detects an embedded document (for example, a piece of JavaScript code inside of a `script` tag within HTML). From 2ee5cbbc1dedb9238c08e7167ca50701d217699f Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 20 Feb 2019 10:27:08 -0800 Subject: [PATCH 4/8] highlight: take callback parameters by value --- cli/src/highlight.rs | 6 +++--- highlight/src/lib.rs | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/cli/src/highlight.rs b/cli/src/highlight.rs index 0f88149a..6cd19392 100644 --- a/cli/src/highlight.rs +++ b/cli/src/highlight.rs @@ -195,7 +195,7 @@ pub fn ansi( let stdout = io::stdout(); let mut stdout = stdout.lock(); let mut scope_stack = Vec::new(); - for event in highlight(source, language, property_sheet, &|s| { + for event in highlight(source, language, property_sheet, |s| { language_for_injection_string(loader, s) })? { match event { @@ -258,8 +258,8 @@ pub fn html( source, language, property_sheet, - &|s| language_for_injection_string(loader, s), - &|scope| { + |s| language_for_injection_string(loader, s), + |scope| { if let Some(css_style) = theme.css_style(scope) { css_style } else { diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs index 453685f4..bbe0b424 100644 --- a/highlight/src/lib.rs +++ b/highlight/src/lib.rs @@ -84,7 +84,7 @@ struct Highlighter<'a, T> where T: Fn(&str) -> Option<(Language, &'a PropertySheet)>, { - injection_callback: &'a T, + injection_callback: T, source: &'a [u8], source_offset: usize, parser: Parser, @@ -353,7 +353,7 @@ where source: &'a [u8], language: Language, property_sheet: &'a PropertySheet, - injection_callback: &'a F, + injection_callback: F, ) -> Result { let mut parser = Parser::new(); parser.set_language(language)?; @@ -742,10 +742,10 @@ pub fn highlight<'a, F>( source: &'a [u8], language: Language, property_sheet: &'a PropertySheet, - injection_callback: &'a F, + injection_callback: F, ) -> Result> + 'a, String> where - F: Fn(&str) -> Option<(Language, &'a PropertySheet)>, + F: Fn(&str) -> Option<(Language, &'a PropertySheet)> + 'a, { Highlighter::new(source, language, property_sheet, injection_callback) } @@ -754,8 +754,8 @@ pub fn highlight_html<'a, F1, F2>( source: &'a [u8], language: Language, property_sheet: &'a PropertySheet, - injection_callback: &'a F1, - attribute_callback: &'a F2, + injection_callback: F1, + attribute_callback: F2, ) -> Result, String> where F1: Fn(&str) -> Option<(Language, &'a PropertySheet)>, From d63368552a37823afe12234be50a7d12e674d090 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 20 Feb 2019 10:42:56 -0800 Subject: [PATCH 5/8] highlight: 0.1.1 --- Cargo.lock | 4 ++-- highlight/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 50058336..3c6825d9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -639,12 +639,12 @@ dependencies = [ "smallbitvec 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "tree-sitter 0.3.8", - "tree-sitter-highlight 0.1.0", + "tree-sitter-highlight 0.1.1", ] [[package]] name = "tree-sitter-highlight" -version = "0.1.0" +version = "0.1.1" dependencies = [ "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/highlight/Cargo.toml b/highlight/Cargo.toml index dd33add2..5f8aa7ac 100644 --- a/highlight/Cargo.toml +++ b/highlight/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter-highlight" description = "Library for performing syntax highlighting with Tree-sitter" -version = "0.1.0" +version = "0.1.1" authors = [ "Max Brunsfeld ", "Tim Clem " From d2264d597f9a14302e5d1415c1394b0723e26f2d Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 20 Feb 2019 14:38:19 -0800 Subject: [PATCH 6/8] cli: Add --scope flag to highlight command --- cli/src/loader.rs | 21 ++++++++++++++++++--- cli/src/main.rs | 38 +++++++++++++++++++++++++++++++------- 2 files changed, 49 insertions(+), 10 deletions(-) diff --git a/cli/src/loader.rs b/cli/src/loader.rs index 49bab4b4..23a55cc6 100644 --- a/cli/src/loader.rs +++ b/cli/src/loader.rs @@ -27,7 +27,7 @@ struct LanguageRepo { } pub struct LanguageConfiguration { - pub name: String, + scope: Option, _content_regex: Option, _first_line_regex: Option, injection_regex: Option, @@ -79,6 +79,21 @@ impl Loader { } } + pub fn language_configuration_for_scope( + &self, + scope: &str, + ) -> Result> { + for (i, repo) in self.language_repos.iter().enumerate() { + for configuration in &repo.configurations { + if configuration.scope.as_ref().map_or(false, |s| s == scope) { + let (language, _) = self.language_configuration_for_id(i)?; + return Ok(Some((language, &configuration))); + } + } + } + Ok(None) + } + pub fn language_configuration_for_file_name( &self, path: &Path, @@ -258,7 +273,7 @@ impl Loader { fn find_language_at_path<'a>(&'a mut self, parser_path: &Path) -> Result { #[derive(Deserialize)] struct LanguageConfigurationJSON { - name: String, + scope: Option, #[serde(rename = "file-types")] file_types: Option>, #[serde(rename = "content-regex")] @@ -284,7 +299,7 @@ impl Loader { configurations .into_iter() .map(|conf| LanguageConfiguration { - name: conf.name, + scope: conf.scope, file_types: conf.file_types.unwrap_or(Vec::new()), _content_regex: conf .content_regex diff --git a/cli/src/main.rs b/cli/src/main.rs index 9cd4e131..255f680b 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -72,6 +72,7 @@ fn run() -> error::Result<()> { .multiple(true) .required(true), ) + .arg(Arg::with_name("scope").long("scope").takes_value(true)) .arg(Arg::with_name("html").long("html").short("h")), ) .get_matches(); @@ -169,17 +170,40 @@ fn run() -> error::Result<()> { println!("{}", highlight::HTML_HEADER); } + let language_config; + if let Some(scope) = matches.value_of("scope") { + language_config = loader.language_configuration_for_scope(scope)?; + if language_config.is_none() { + return Err(error::Error(format!("Unknown scope '{}'", scope))); + } + } else { + language_config = None; + } + for path in paths { let path = Path::new(path); - if let Some((language, config)) = loader.language_configuration_for_file_name(path)? { - if let Some(sheet) = config.highlight_property_sheet(language)? { - let source = fs::read(path)?; - if html_mode { - highlight::html(&loader, &theme, &source, language, sheet)?; - } else { - highlight::ansi(&loader, &theme, &source, language, sheet)?; + let (language, config) = match language_config { + Some(v) => v, + None => match loader.language_configuration_for_file_name(path)? { + Some(v) => v, + None => { + eprintln!("No language found for path {:?}", path); + continue; } + }, + }; + + if let Some(sheet) = config.highlight_property_sheet(language)? { + let source = fs::read(path)?; + if html_mode { + highlight::html(&loader, &theme, &source, language, sheet)?; + } else { + highlight::ansi(&loader, &theme, &source, language, sheet)?; } + } else { + return Err(error::Error(format!( + "No syntax highlighting property sheet specified" + ))); } } } From e239aa82295762622069ca300b38560da47b8a3b Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 20 Feb 2019 16:45:51 -0800 Subject: [PATCH 7/8] highlight: don't include scope in ScopeEnd events When there are embedded documents, multiple scopes can start or end at the same position. Previously, there was no guarantee that the ScopeEnd events would always occur in the reverse order of the ScopeStart events. The easiest way to avoid exposing inconsistency is to not surface the scopes being ended. --- cli/src/highlight.rs | 2 +- cli/src/tests/highlight_test.rs | 3 +-- highlight/src/lib.rs | 24 +++++++++++++++--------- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/cli/src/highlight.rs b/cli/src/highlight.rs index 6cd19392..55ef4bc2 100644 --- a/cli/src/highlight.rs +++ b/cli/src/highlight.rs @@ -209,7 +209,7 @@ pub fn ansi( HighlightEvent::ScopeStart(s) => { scope_stack.push(s); } - HighlightEvent::ScopeEnd(_) => { + HighlightEvent::ScopeEnd => { scope_stack.pop(); } } diff --git a/cli/src/tests/highlight_test.rs b/cli/src/tests/highlight_test.rs index 6e07ab4a..57f61e16 100644 --- a/cli/src/tests/highlight_test.rs +++ b/cli/src/tests/highlight_test.rs @@ -162,8 +162,7 @@ fn to_token_vector<'a>( )? { match event { HighlightEvent::ScopeStart(s) => scopes.push(s), - HighlightEvent::ScopeEnd(s) => { - assert_eq!(*scopes.last().unwrap(), s); + HighlightEvent::ScopeEnd => { scopes.pop(); } HighlightEvent::Source(s) => { diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs index bbe0b424..7ec186d8 100644 --- a/highlight/src/lib.rs +++ b/highlight/src/lib.rs @@ -96,7 +96,7 @@ where pub enum HighlightEvent<'a> { Source(&'a str), ScopeStart(Scope), - ScopeEnd(Scope), + ScopeEnd, } #[derive(Debug, Deserialize)] @@ -565,10 +565,7 @@ where .parse(self.source, None) .expect("Failed to parse"); let layer = Layer::new(self.source, tree, property_sheet, ranges); - match self - .layers - .binary_search_by_key(&(layer.offset(), 1), |l| (l.offset(), 0)) - { + match self.layers.binary_search_by(|l| l.cmp(&layer)) { Ok(i) | Err(i) => self.layers.insert(i, layer), }; } @@ -625,7 +622,7 @@ impl<'a, T: Fn(&str) -> Option<(Language, &'a PropertySheet)>> Itera } scope_event = if self.layers[0].at_node_end { - Some(HighlightEvent::ScopeEnd(scope)) + Some(HighlightEvent::ScopeEnd) } else { Some(HighlightEvent::ScopeStart(scope)) }; @@ -638,7 +635,7 @@ impl<'a, T: Fn(&str) -> Option<(Language, &'a PropertySheet)>> Itera // to re-sort the layers. If the cursor is already at the end of its syntax tree, // remove it. if self.layers[0].advance() { - self.layers.sort_unstable_by_key(|layer| layer.offset()); + self.layers.sort_unstable_by(|a, b| a.cmp(&b)); } else { self.layers.remove(0); } @@ -676,6 +673,15 @@ impl<'a> Layer<'a> { } } + fn cmp(&self, other: &Layer) -> cmp::Ordering { + // Events are ordered primarily by their position in the document. But if + // one scope starts at a given position and another scope ends at that + // same position, return the scope end event before the scope start event. + self.offset() + .cmp(&other.offset()) + .then_with(|| other.at_node_end.cmp(&self.at_node_end)) + } + fn offset(&self) -> usize { if self.at_node_end { self.cursor.node().end_byte() @@ -770,8 +776,8 @@ where scopes.push(s); renderer.start_scope(s); } - HighlightEvent::ScopeEnd(s) => { - assert_eq!(scopes.pop(), Some(s)); + HighlightEvent::ScopeEnd => { + scopes.pop(); renderer.end_scope(); } HighlightEvent::Source(src) => { From 27d4f0d69dd52516aef626a04d017fc3fd7d1395 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 20 Feb 2019 16:47:27 -0800 Subject: [PATCH 8/8] highlight: 0.1.2 --- Cargo.lock | 4 ++-- highlight/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3c6825d9..1810fb4d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -639,12 +639,12 @@ dependencies = [ "smallbitvec 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "tree-sitter 0.3.8", - "tree-sitter-highlight 0.1.1", + "tree-sitter-highlight 0.1.2", ] [[package]] name = "tree-sitter-highlight" -version = "0.1.1" +version = "0.1.2" dependencies = [ "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/highlight/Cargo.toml b/highlight/Cargo.toml index 5f8aa7ac..ee2dd80e 100644 --- a/highlight/Cargo.toml +++ b/highlight/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter-highlight" description = "Library for performing syntax highlighting with Tree-sitter" -version = "0.1.1" +version = "0.1.2" authors = [ "Max Brunsfeld ", "Tim Clem "