Add a highlight subcommand

This commit is contained in:
Max Brunsfeld 2019-02-19 11:24:50 -08:00
parent 0dd15e2b02
commit e89b6b2402
14 changed files with 1870 additions and 22 deletions

78
Cargo.lock generated
View file

@ -210,6 +210,14 @@ dependencies = [
"winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "lock_api"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "log"
version = "0.4.6"
@ -263,6 +271,35 @@ name = "num-traits"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "once_cell"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"parking_lot 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "parking_lot"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"lock_api 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
"parking_lot_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "parking_lot_core"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)",
"rand 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
"smallvec 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "proc-macro2"
version = "0.4.24"
@ -502,6 +539,14 @@ name = "smallbitvec"
version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "smallvec"
version = "0.6.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "spin"
version = "0.5.0"
@ -583,6 +628,7 @@ dependencies = [
"lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"once_cell 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
"rand 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
@ -593,6 +639,18 @@ dependencies = [
"smallbitvec 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"tree-sitter 0.3.8",
"tree-sitter-highlight 0.1.0",
]
[[package]]
name = "tree-sitter-highlight"
version = "0.1.0"
dependencies = [
"regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)",
"tree-sitter 0.3.8",
]
[[package]]
@ -610,6 +668,14 @@ name = "unicode-xid"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "unreachable"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "utf8-ranges"
version = "1.0.2"
@ -625,6 +691,11 @@ name = "version_check"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "void"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "winapi"
version = "0.3.6"
@ -673,6 +744,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a374c89b9db55895453a74c1e38861d9deec0b01b405a82516e9d5de4820dea1"
"checksum libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)" = "10923947f84a519a45c8fefb7dd1b3e8c08747993381adee176d7a82b4195311"
"checksum libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3ad660d7cb8c5822cd83d10897b0f1f1526792737a179e73896152f85b88c2"
"checksum lock_api 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "62ebf1391f6acad60e5c8b43706dde4582df75c06698ab44511d15016bc2442c"
"checksum log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6"
"checksum memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0a3eb002f0535929f1199681417029ebea04aadc0c7a4224b46be99c7f5d6a16"
"checksum nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945"
@ -680,6 +752,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)" = "e83d528d2677f0518c570baf2b7abdcf0cd2d248860b68507bdcb3e91d4c0cea"
"checksum num-rational 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4e96f040177bb3da242b5b1ecf3f54b5d5af3efbbfb18608977a5d2767b22f10"
"checksum num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0b3a5d7cc97d6d30d8b9bc8fa19bf45349ffe46241e8816f50f62f6d6aaabee1"
"checksum once_cell 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "532c29a261168a45ce28948f9537ddd7a5dd272cc513b3017b1e82a88f962c37"
"checksum parking_lot 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ab41b4aed082705d1056416ae4468b6ea99d52599ecf3169b00088d43113e337"
"checksum parking_lot_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "94c8c7923936b28d546dfd14d4472eaf34c99b14e1c973a32b3e6d4eb04298c9"
"checksum proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)" = "77619697826f31a02ae974457af0b29b723e5619e113e9397b8b82c6bd253f09"
"checksum quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)" = "53fa22a1994bd0f9372d7a816207d8a2677ad0325b073f5c5332760f0fb62b5c"
"checksum rand 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8356f47b32624fef5b3301c1be97e5944ecdd595409cc5da11d05f211db6cfbd"
@ -709,6 +784,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)" = "225de307c6302bec3898c51ca302fc94a7a1697ef0845fcee6448f33c032249c"
"checksum serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)" = "c37ccd6be3ed1fdf419ee848f7c758eb31b054d7cd3ae3600e3bae0adf569811"
"checksum smallbitvec 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1764fe2b30ee783bfe3b9b37b2649d8d590b3148bb12e0079715d4d5c673562e"
"checksum smallvec 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)" = "88aea073965ab29f6edb5493faf96ad662fb18aa9eeb186a3b7057951605ed15"
"checksum spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "44363f6f51401c34e7be73db0db371c04705d35efbe9f7d6082e03a921a32c55"
"checksum strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550"
"checksum syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)" = "ae8b29eb5210bc5cf63ed6149cbf9adfc82ac0be023d8735c176ee74a2db4da7"
@ -719,9 +795,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86"
"checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526"
"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56"
"checksum utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737"
"checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a"
"checksum version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd"
"checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
"checksum winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "92c1eb33641e276cfa214a0522acad57be5c56b10cb348b3c5117db75f3ac4b0"
"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

View file

@ -27,6 +27,7 @@ clap = "2.32"
dirs = "1.0.2"
hashbrown = "0.1"
libloading = "0.5"
once_cell = "0.1.8"
serde = "1.0"
serde_derive = "1.0"
regex-syntax = "0.6.4"
@ -37,6 +38,10 @@ rsass = "0.9"
version = ">= 0.3.7"
path = "../lib"
[dependencies.tree-sitter-highlight]
version = ">= 0.1.0"
path = "../highlight"
[dependencies.serde_json]
version = "1.0"
features = ["preserve_order"]

View file

@ -1,4 +1,5 @@
use std::io;
use tree_sitter_highlight::PropertySheetError;
#[derive(Debug)]
pub struct Error(pub String);
@ -42,3 +43,13 @@ impl From<String> for Error {
Error(error)
}
}
impl From<PropertySheetError> for Error {
fn from(error: PropertySheetError) -> Self {
match error {
PropertySheetError::InvalidFormat(e) => Self::from(e),
PropertySheetError::InvalidRegex(e) => Self::regex(&e.to_string()),
PropertySheetError::InvalidJSON(e) => Self::from(e),
}
}
}

272
cli/src/highlight.rs Normal file
View file

@ -0,0 +1,272 @@
use crate::error::Result;
use crate::loader::Loader;
use ansi_term::{Color, Style};
use lazy_static::lazy_static;
use serde_json::Value;
use std::collections::HashMap;
use std::{fmt, fs, io, mem, path};
use tree_sitter::{Language, PropertySheet};
use tree_sitter_highlight::{highlight, highlight_html, HighlightEvent, Properties, Scope};
lazy_static! {
static ref CSS_STYLES_BY_COLOR_ID: Vec<String> =
serde_json::from_str(include_str!("../vendor/xterm-colors.json")).unwrap();
}
pub struct Theme {
ansi_styles: Vec<Option<Style>>,
css_styles: Vec<Option<String>>,
}
impl Theme {
pub fn load(path: &path::Path) -> io::Result<Self> {
let json = fs::read_to_string(path)?;
Ok(Self::new(&json))
}
pub fn new(json: &str) -> Self {
let mut ansi_styles = vec![None; 30];
let mut css_styles = vec![None; 30];
if let Ok(colors) = serde_json::from_str::<HashMap<Scope, Value>>(json) {
for (scope, style_value) in colors {
let mut style = Style::default();
parse_style(&mut style, style_value);
ansi_styles[scope as usize] = Some(style);
css_styles[scope as usize] = Some(style_to_css(style));
}
}
Self {
ansi_styles,
css_styles,
}
}
fn ansi_style(&self, scope: Scope) -> Option<&Style> {
self.ansi_styles[scope as usize].as_ref()
}
fn css_style(&self, scope: Scope) -> Option<&str> {
self.css_styles[scope as usize].as_ref().map(|s| s.as_str())
}
}
impl Default for Theme {
fn default() -> Self {
Theme::new(
r#"
{
"attribute": {"color": 124, "italic": true},
"comment": {"color": 245, "italic": true},
"constant.builtin": {"color": 94, "bold": true},
"constant": 94,
"constructor": 136,
"embedded": null,
"function.builtin": {"color": 26, "bold": true},
"function": 26,
"keyword": 56,
"number": {"color": 94, "bold": true},
"property": 124,
"operator": {"color": 239, "bold": true},
"punctuation.bracket": 239,
"punctuation.delimiter": 239,
"string.special": 30,
"string": 28,
"tag": {"color": 18},
"variable.builtin": {"bold": true}
}
"#,
)
}
}
impl fmt::Debug for Theme {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{{")?;
let mut first = true;
for (i, style) in self.ansi_styles.iter().enumerate() {
if let Some(style) = style {
let scope = Scope::from_usize(i).unwrap();
if !first {
write!(f, ", ")?;
}
write!(f, "{:?}: {:?}", scope, style)?;
first = false;
}
}
write!(f, "}}")?;
Ok(())
}
}
fn parse_style(style: &mut Style, json: Value) {
if let Value::Object(entries) = json {
for (property_name, value) in entries {
match property_name.as_str() {
"italic" => *style = style.italic(),
"bold" => *style = style.bold(),
"dimmed" => *style = style.dimmed(),
"underline" => *style = style.underline(),
"color" => {
if let Some(color) = parse_color(value) {
*style = style.fg(color);
}
}
_ => {}
}
}
} else if let Some(color) = parse_color(json) {
*style = style.fg(color);
}
}
fn parse_color(json: Value) -> Option<Color> {
match json {
Value::Number(n) => match n.as_u64() {
Some(n) => Some(Color::Fixed(n as u8)),
_ => None,
},
Value::String(s) => match s.to_lowercase().as_str() {
"blue" => Some(Color::Blue),
"cyan" => Some(Color::Cyan),
"green" => Some(Color::Green),
"purple" => Some(Color::Purple),
"red" => Some(Color::Red),
"white" => Some(Color::White),
"yellow" => Some(Color::Yellow),
s => {
if s.starts_with("#") && s.len() >= 7 {
if let (Ok(red), Ok(green), Ok(blue)) = (
u8::from_str_radix(&s[1..3], 16),
u8::from_str_radix(&s[3..5], 16),
u8::from_str_radix(&s[5..7], 16),
) {
Some(Color::RGB(red, green, blue))
} else {
None
}
} else {
None
}
}
},
_ => None,
}
}
fn style_to_css(style: Style) -> String {
use std::fmt::Write;
let mut result = "style='".to_string();
if style.is_bold {
write!(&mut result, "font-weight: bold;").unwrap();
}
if style.is_italic {
write!(&mut result, "font-style: italic;").unwrap();
}
if let Some(color) = style.foreground {
write!(&mut result, "color: {};", color_to_css(color)).unwrap();
}
result.push('\'');
result
}
fn color_to_css(color: Color) -> &'static str {
match color {
Color::Black => "black",
Color::Blue => "blue",
Color::Red => "red",
Color::Green => "green",
Color::Yellow => "yellow",
Color::Cyan => "cyan",
Color::Purple => "purple",
Color::White => "white",
Color::Fixed(n) => CSS_STYLES_BY_COLOR_ID[n as usize].as_str(),
_ => panic!("Unsupported color type"),
}
}
pub fn ansi(
loader: &Loader,
theme: &Theme,
source: &[u8],
language: Language,
property_sheet: &PropertySheet<Properties>,
) -> Result<()> {
use std::io::Write;
let stdout = io::stdout();
let mut stdout = stdout.lock();
let mut scope_stack = Vec::new();
for event in highlight(loader, source, language, property_sheet)? {
match event {
HighlightEvent::Source(s) => {
if let Some(style) = scope_stack.last().and_then(|s| theme.ansi_style(*s)) {
write!(&mut stdout, "{}", style.paint(s))?;
} else {
write!(&mut stdout, "{}", s)?;
}
}
HighlightEvent::ScopeStart(s) => {
scope_stack.push(s);
}
HighlightEvent::ScopeEnd(_) => {
scope_stack.pop();
}
}
}
Ok(())
}
pub const HTML_HEADER: &'static str = "
<!doctype HTML>
<head>
<title>Tree-sitter Highlighting</title>
<style>
body {
font-family: monospace
}
.line-number {
user-select: none;
text-align: right;
color: rgba(27,31,35,.3);
padding: 0 10px;
}
.line {
white-space: pre;
}
</style>
</head>
<body>
";
pub const HTML_FOOTER: &'static str = "
</body>
";
pub fn html(
loader: &Loader,
theme: &Theme,
source: &[u8],
language: Language,
property_sheet: &PropertySheet<Properties>,
) -> Result<()> {
use std::io::Write;
let stdout = io::stdout();
let mut stdout = stdout.lock();
write!(&mut stdout, "<table>\n")?;
let lines = highlight_html(loader, source, language, property_sheet, |scope| {
if let Some(css_style) = theme.css_style(scope) {
css_style
} else {
""
}
})?;
for (i, line) in lines.into_iter().enumerate() {
write!(
&mut stdout,
"<tr><td class=line-number>{}</td><td class=line>{}</td></tr>\n",
i + 1,
line
)?;
}
write!(&mut stdout, "</table>\n")?;
Ok(())
}

View file

@ -1,5 +1,6 @@
pub mod error;
pub mod generate;
pub mod highlight;
pub mod loader;
pub mod logger;
pub mod parse;

View file

@ -1,5 +1,6 @@
use super::error::{Error, Result};
use libloading::{Library, Symbol};
use once_cell::unsync::OnceCell;
use regex::{Regex, RegexBuilder};
use serde_derive::Deserialize;
use std::collections::HashMap;
@ -9,6 +10,7 @@ use std::process::Command;
use std::time::SystemTime;
use std::{fs, mem};
use tree_sitter::{Language, PropertySheet};
use tree_sitter_highlight::{load_property_sheet, LanguageRegistry, Properties};
#[cfg(unix)]
const DYLIB_EXTENSION: &'static str = "so";
@ -20,16 +22,18 @@ const BUILD_TARGET: &'static str = env!("BUILD_TARGET");
struct LanguageRepo {
path: PathBuf,
language: Option<Language>,
language: OnceCell<Language>,
configurations: Vec<LanguageConfiguration>,
}
pub struct LanguageConfiguration {
_name: String,
pub name: String,
_content_regex: Option<Regex>,
_first_line_regex: Option<Regex>,
injection_regex: Option<Regex>,
file_types: Vec<String>,
_highlight_property_sheet: Option<std::result::Result<PropertySheet, PathBuf>>,
highlight_property_sheet_path: Option<PathBuf>,
highlight_property_sheet: OnceCell<Option<PropertySheet<Properties>>>,
}
pub struct Loader {
@ -76,7 +80,7 @@ impl Loader {
}
pub fn language_configuration_for_file_name(
&mut self,
&self,
path: &Path,
) -> Result<Option<(Language, &LanguageConfiguration)>> {
let ids = path
@ -100,20 +104,43 @@ impl Loader {
Ok(None)
}
pub fn language_configuration_for_injection_string(
&self,
string: &str,
) -> Result<Option<(Language, &LanguageConfiguration)>> {
let mut best_match_length = 0;
let mut best_match_position = None;
for (i, repo) in self.language_repos.iter().enumerate() {
for (j, configuration) in repo.configurations.iter().enumerate() {
if let Some(injection_regex) = &configuration.injection_regex {
if let Some(mat) = injection_regex.find(string) {
let length = mat.end() - mat.start();
if length > best_match_length {
best_match_position = Some((i, j));
best_match_length = length;
}
}
}
}
}
if let Some((i, j)) = best_match_position {
let (language, configurations) = self.language_configuration_for_id(i)?;
Ok(Some((language, &configurations[j])))
} else {
Ok(None)
}
}
fn language_configuration_for_id(
&mut self,
&self,
id: usize,
) -> Result<(Language, &Vec<LanguageConfiguration>)> {
let repo = &self.language_repos[id];
let language = if let Some(language) = repo.language {
language
} else {
let language = repo.language.get_or_try_init(|| {
let src_path = repo.path.join("src");
let language = self.load_language_at_path(&src_path, &src_path)?;
self.language_repos[id].language = Some(language);
language
};
Ok((language, &self.language_repos[id].configurations))
self.load_language_at_path(&src_path, &src_path)
})?;
Ok((*language, &self.language_repos[id].configurations))
}
pub fn load_language_at_path(&self, src_path: &Path, header_path: &Path) -> Result<Language> {
@ -238,6 +265,8 @@ impl Loader {
content_regex: Option<String>,
#[serde(rename = "first-line-regex")]
first_line_regex: Option<String>,
#[serde(rename = "injection-regex")]
injection_regex: Option<String>,
highlights: Option<String>,
}
@ -255,7 +284,7 @@ impl Loader {
configurations
.into_iter()
.map(|conf| LanguageConfiguration {
_name: conf.name,
name: conf.name,
file_types: conf.file_types.unwrap_or(Vec::new()),
_content_regex: conf
.content_regex
@ -263,7 +292,11 @@ impl Loader {
_first_line_regex: conf
.first_line_regex
.and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()),
_highlight_property_sheet: conf.highlights.map(|d| Err(d.into())),
injection_regex: conf
.injection_regex
.and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()),
highlight_property_sheet_path: conf.highlights.map(|h| parser_path.join(h)),
highlight_property_sheet: OnceCell::new(),
})
.collect()
});
@ -279,7 +312,7 @@ impl Loader {
self.language_repos.push(LanguageRepo {
path: parser_path.to_owned(),
language: None,
language: OnceCell::new(),
configurations,
});
@ -287,6 +320,56 @@ impl Loader {
}
}
impl LanguageRegistry for Loader {
fn language_for_injection_string<'a>(
&'a self,
string: &str,
) -> Option<(Language, &'a PropertySheet<Properties>)> {
match self.language_configuration_for_injection_string(string) {
Err(message) => {
eprintln!(
"Failed to load language for injection string '{}': {}",
string, message.0
);
None
}
Ok(None) => None,
Ok(Some((language, configuration))) => {
match configuration.highlight_property_sheet(language) {
Err(message) => {
eprintln!(
"Failed to load property sheet for injection string '{}': {}",
string, message.0
);
None
}
Ok(None) => None,
Ok(Some(sheet)) => Some((language, sheet)),
}
}
}
}
}
impl LanguageConfiguration {
pub fn highlight_property_sheet(
&self,
language: Language,
) -> Result<Option<&PropertySheet<Properties>>> {
self.highlight_property_sheet
.get_or_try_init(|| {
if let Some(path) = &self.highlight_property_sheet_path {
let sheet_json = fs::read_to_string(path)?;
let sheet = load_property_sheet(language, &sheet_json)?;
Ok(Some(sheet))
} else {
Ok(None)
}
})
.map(Option::as_ref)
}
}
fn needs_recompile(
lib_path: &Path,
parser_c_path: &Path,

View file

@ -4,8 +4,7 @@ use std::fs;
use std::path::Path;
use std::process::exit;
use std::usize;
use tree_sitter_cli::loader::Loader;
use tree_sitter_cli::{error, generate, logger, parse, properties, test};
use tree_sitter_cli::{error, generate, highlight, loader, logger, parse, properties, test};
fn main() {
if let Err(e) = run() {
@ -64,14 +63,30 @@ fn run() -> error::Result<()> {
.arg(Arg::with_name("debug").long("debug").short("d"))
.arg(Arg::with_name("debug-graph").long("debug-graph").short("D")),
)
.subcommand(
SubCommand::with_name("highlight")
.about("Highlight a file")
.arg(
Arg::with_name("path")
.index(1)
.multiple(true)
.required(true),
)
.arg(Arg::with_name("html").long("html").short("h")),
)
.get_matches();
let home_dir = dirs::home_dir().unwrap();
let current_dir = env::current_dir().unwrap();
let config_dir = home_dir.join(".tree-sitter");
let theme_path = config_dir.join("theme.json");
let parsers_dir = config_dir.join("parsers");
fs::create_dir_all(&config_dir).unwrap();
let mut loader = Loader::new(config_dir);
// TODO - make configurable
let parser_repo_paths = vec![home_dir.join("github")];
fs::create_dir_all(&parsers_dir).unwrap();
let mut loader = loader::Loader::new(config_dir);
if let Some(matches) = matches.subcommand_matches("generate") {
if matches.is_present("log") {
@ -111,7 +126,7 @@ fn run() -> error::Result<()> {
let debug_graph = matches.is_present("debug-graph");
let quiet = matches.is_present("quiet");
let time = matches.is_present("time");
loader.find_all_languages(&vec![home_dir.join("github")])?;
loader.find_all_languages(&parser_repo_paths)?;
let paths = matches
.values_of("path")
.unwrap()
@ -144,6 +159,29 @@ fn run() -> error::Result<()> {
if has_error {
return Err(error::Error(String::new()));
}
} else if let Some(matches) = matches.subcommand_matches("highlight") {
loader.find_all_languages(&parser_repo_paths)?;
let theme = highlight::Theme::load(&theme_path).unwrap_or_default();
let paths = matches.values_of("path").unwrap().into_iter();
let html_mode = matches.is_present("html");
if html_mode {
println!("{}", highlight::HTML_HEADER);
}
for path in paths {
let path = Path::new(path);
if let Some((language, config)) = loader.language_configuration_for_file_name(path)? {
if let Some(sheet) = config.highlight_property_sheet(language)? {
let source = fs::read(path)?;
if html_mode {
highlight::html(&loader, &theme, &source, language, sheet)?;
} else {
highlight::ansi(&loader, &theme, &source, language, sheet)?;
}
}
}
}
}
Ok(())

View file

@ -2,7 +2,8 @@ use crate::loader::Loader;
use lazy_static::lazy_static;
use std::fs;
use std::path::{Path, PathBuf};
use tree_sitter::Language;
use tree_sitter::{Language, PropertySheet};
use tree_sitter_highlight::{load_property_sheet, Properties};
include!("./dirs.rs");
@ -20,6 +21,16 @@ pub fn get_language(name: &str) -> Language {
.unwrap()
}
pub fn get_property_sheet(language_name: &str, sheet_name: &str) -> PropertySheet<Properties> {
let path = GRAMMARS_DIR
.join(language_name)
.join("src")
.join(sheet_name);
let json = fs::read_to_string(path).unwrap();
let language = get_language(language_name);
load_property_sheet(language, &json).unwrap()
}
pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> Language {
let parser_c_path = SCRATCH_DIR.join(&format!("{}-parser.c", name));
if !fs::read_to_string(&parser_c_path)

View file

@ -0,0 +1,191 @@
use super::helpers::fixtures::{get_language, get_property_sheet};
use lazy_static::lazy_static;
use tree_sitter::{Language, PropertySheet};
use tree_sitter_highlight::{
highlight, highlight_html, HighlightEvent, LanguageRegistry, Properties, Scope,
};
lazy_static! {
static ref JS_SHEET: PropertySheet<Properties> =
get_property_sheet("javascript", "highlights.json");
static ref HTML_SHEET: PropertySheet<Properties> =
get_property_sheet("html", "highlights.json");
static ref SCOPE_CLASS_STRINGS: Vec<String> = {
let mut result = Vec::new();
let mut i = 0;
while let Some(scope) = Scope::from_usize(i) {
result.push(format!("class={:?}", scope));
i += 1;
}
result
};
}
#[test]
fn test_highlighting_injected_html_in_javascript() {
let source = vec!["const s = html `<div>${a < b}</div>`;"].join("\n");
assert_eq!(
&to_token_vector(&source, get_language("javascript"), &JS_SHEET).unwrap(),
&[vec![
("const", vec![Scope::Keyword]),
(" ", vec![]),
("s", vec![Scope::Variable]),
(" ", vec![]),
("=", vec![Scope::Operator]),
(" ", vec![]),
("html", vec![Scope::Function]),
(" ", vec![]),
("`<", vec![Scope::String]),
("div", vec![Scope::String, Scope::Tag]),
(">", vec![Scope::String]),
(
"${",
vec![Scope::String, Scope::Embedded, Scope::PunctuationSpecial]
),
("a", vec![Scope::String, Scope::Embedded, Scope::Variable]),
(" ", vec![Scope::String, Scope::Embedded]),
("<", vec![Scope::String, Scope::Embedded, Scope::Operator]),
(" ", vec![Scope::String, Scope::Embedded]),
("b", vec![Scope::String, Scope::Embedded, Scope::Variable]),
(
"}",
vec![Scope::String, Scope::Embedded, Scope::PunctuationSpecial]
),
("</", vec![Scope::String]),
("div", vec![Scope::String, Scope::Tag]),
(">`", vec![Scope::String]),
(";", vec![Scope::PunctuationDelimiter]),
]]
);
}
#[test]
fn test_highlighting_injected_javascript_in_html() {
let source = vec![
"<body>",
" <script>",
" const x = new Thing();",
" </script>",
"</body>",
]
.join("\n");
assert_eq!(
&to_token_vector(&source, get_language("html"), &HTML_SHEET).unwrap(),
&[
vec![("<", vec![]), ("body", vec![Scope::Tag]), (">", vec![]),],
vec![(" <", vec![]), ("script", vec![Scope::Tag]), (">", vec![]),],
vec![
(" ", vec![]),
("const", vec![Scope::Keyword]),
(" ", vec![]),
("x", vec![Scope::Variable]),
(" ", vec![]),
("=", vec![Scope::Operator]),
(" ", vec![]),
("new", vec![Scope::Keyword]),
(" ", vec![]),
("Thing", vec![Scope::Constructor]),
("(", vec![Scope::PunctuationBracket]),
(")", vec![Scope::PunctuationBracket]),
(";", vec![Scope::PunctuationDelimiter]),
],
vec![
(" </", vec![]),
("script", vec![Scope::Tag]),
(">", vec![]),
],
vec![("</", vec![]), ("body", vec![Scope::Tag]), (">", vec![]),],
]
);
}
#[test]
fn test_highlighting_multiline_scopes_to_html() {
let source = vec![
"const SOMETHING = `",
" one ${",
" two()",
" } three",
"`",
]
.join("\n");
assert_eq!(
&to_html(&source, get_language("javascript"), &JS_SHEET,).unwrap(),
&[
"<span class=Keyword>const</span> <span class=Constant>SOMETHING</span> <span class=Operator>=</span> <span class=String>`</span>\n".to_string(),
"<span class=String> one <span class=Embedded><span class=PunctuationSpecial>${</span></span></span>\n".to_string(),
"<span class=String><span class=Embedded> <span class=Function>two</span><span class=PunctuationBracket>(</span><span class=PunctuationBracket>)</span></span></span>\n".to_string(),
"<span class=String><span class=Embedded> <span class=PunctuationSpecial>}</span></span> three</span>\n".to_string(),
"<span class=String>`</span>\n".to_string(),
]
);
}
struct TestLanguageRegistry;
impl LanguageRegistry for TestLanguageRegistry {
fn language_for_injection_string(
&self,
string: &str,
) -> Option<(Language, &PropertySheet<Properties>)> {
match string {
"javascript" => Some((get_language("javascript"), &JS_SHEET)),
"html" => Some((get_language("html"), &HTML_SHEET)),
_ => None,
}
}
}
fn to_html<'a>(
src: &'a str,
language: Language,
property_sheet: &'a PropertySheet<Properties>,
) -> Result<Vec<String>, String> {
highlight_html(
&TestLanguageRegistry,
src.as_bytes(),
language,
property_sheet,
|scope| SCOPE_CLASS_STRINGS[scope as usize].as_str(),
)
}
fn to_token_vector<'a>(
src: &'a str,
language: Language,
property_sheet: &'a PropertySheet<Properties>,
) -> Result<Vec<Vec<(&'a str, Vec<Scope>)>>, String> {
let mut lines = Vec::new();
let mut scopes = Vec::new();
let mut line = Vec::new();
for event in highlight(
&TestLanguageRegistry,
src.as_bytes(),
language,
property_sheet,
)? {
match event {
HighlightEvent::ScopeStart(s) => scopes.push(s),
HighlightEvent::ScopeEnd(s) => {
assert_eq!(*scopes.last().unwrap(), s);
scopes.pop();
}
HighlightEvent::Source(s) => {
for (i, l) in s.lines().enumerate() {
if i > 0 {
lines.push(line);
line = Vec::new();
}
if l.len() > 0 {
line.push((l, scopes.clone()));
}
}
}
}
}
lines.push(line);
Ok(lines)
}

View file

@ -1,5 +1,6 @@
mod corpus_test;
mod helpers;
mod highlight_test;
mod node_test;
mod parser_test;
mod properties_test;

258
cli/vendor/xterm-colors.json vendored Normal file
View file

@ -0,0 +1,258 @@
[
"#000000",
"#800000",
"#008000",
"#808000",
"#000080",
"#800080",
"#008080",
"#c0c0c0",
"#808080",
"#ff0000",
"#00ff00",
"#ffff00",
"#0000ff",
"#ff00ff",
"#00ffff",
"#ffffff",
"#000000",
"#00005f",
"#000087",
"#0000af",
"#0000d7",
"#0000ff",
"#005f00",
"#005f5f",
"#005f87",
"#005faf",
"#005fd7",
"#005fff",
"#008700",
"#00875f",
"#008787",
"#0087af",
"#0087d7",
"#0087ff",
"#00af00",
"#00af5f",
"#00af87",
"#00afaf",
"#00afd7",
"#00afff",
"#00d700",
"#00d75f",
"#00d787",
"#00d7af",
"#00d7d7",
"#00d7ff",
"#00ff00",
"#00ff5f",
"#00ff87",
"#00ffaf",
"#00ffd7",
"#00ffff",
"#5f0000",
"#5f005f",
"#5f0087",
"#5f00af",
"#5f00d7",
"#5f00ff",
"#5f5f00",
"#5f5f5f",
"#5f5f87",
"#5f5faf",
"#5f5fd7",
"#5f5fff",
"#5f8700",
"#5f875f",
"#5f8787",
"#5f87af",
"#5f87d7",
"#5f87ff",
"#5faf00",
"#5faf5f",
"#5faf87",
"#5fafaf",
"#5fafd7",
"#5fafff",
"#5fd700",
"#5fd75f",
"#5fd787",
"#5fd7af",
"#5fd7d7",
"#5fd7ff",
"#5fff00",
"#5fff5f",
"#5fff87",
"#5fffaf",
"#5fffd7",
"#5fffff",
"#870000",
"#87005f",
"#870087",
"#8700af",
"#8700d7",
"#8700ff",
"#875f00",
"#875f5f",
"#875f87",
"#875faf",
"#875fd7",
"#875fff",
"#878700",
"#87875f",
"#878787",
"#8787af",
"#8787d7",
"#8787ff",
"#87af00",
"#87af5f",
"#87af87",
"#87afaf",
"#87afd7",
"#87afff",
"#87d700",
"#87d75f",
"#87d787",
"#87d7af",
"#87d7d7",
"#87d7ff",
"#87ff00",
"#87ff5f",
"#87ff87",
"#87ffaf",
"#87ffd7",
"#87ffff",
"#af0000",
"#af005f",
"#af0087",
"#af00af",
"#af00d7",
"#af00ff",
"#af5f00",
"#af5f5f",
"#af5f87",
"#af5faf",
"#af5fd7",
"#af5fff",
"#af8700",
"#af875f",
"#af8787",
"#af87af",
"#af87d7",
"#af87ff",
"#afaf00",
"#afaf5f",
"#afaf87",
"#afafaf",
"#afafd7",
"#afafff",
"#afd700",
"#afd75f",
"#afd787",
"#afd7af",
"#afd7d7",
"#afd7ff",
"#afff00",
"#afff5f",
"#afff87",
"#afffaf",
"#afffd7",
"#afffff",
"#d70000",
"#d7005f",
"#d70087",
"#d700af",
"#d700d7",
"#d700ff",
"#d75f00",
"#d75f5f",
"#d75f87",
"#d75faf",
"#d75fd7",
"#d75fff",
"#d78700",
"#d7875f",
"#d78787",
"#d787af",
"#d787d7",
"#d787ff",
"#d7af00",
"#d7af5f",
"#d7af87",
"#d7afaf",
"#d7afd7",
"#d7afff",
"#d7d700",
"#d7d75f",
"#d7d787",
"#d7d7af",
"#d7d7d7",
"#d7d7ff",
"#d7ff00",
"#d7ff5f",
"#d7ff87",
"#d7ffaf",
"#d7ffd7",
"#d7ffff",
"#ff0000",
"#ff005f",
"#ff0087",
"#ff00af",
"#ff00d7",
"#ff00ff",
"#ff5f00",
"#ff5f5f",
"#ff5f87",
"#ff5faf",
"#ff5fd7",
"#ff5fff",
"#ff8700",
"#ff875f",
"#ff8787",
"#ff87af",
"#ff87d7",
"#ff87ff",
"#ffaf00",
"#ffaf5f",
"#ffaf87",
"#ffafaf",
"#ffafd7",
"#ffafff",
"#ffd700",
"#ffd75f",
"#ffd787",
"#ffd7af",
"#ffd7d7",
"#ffd7ff",
"#ffff00",
"#ffff5f",
"#ffff87",
"#ffffaf",
"#ffffd7",
"#ffffff",
"#080808",
"#121212",
"#1c1c1c",
"#262626",
"#303030",
"#3a3a3a",
"#444444",
"#4e4e4e",
"#585858",
"#626262",
"#6c6c6c",
"#767676",
"#808080",
"#8a8a8a",
"#949494",
"#9e9e9e",
"#a8a8a8",
"#b2b2b2",
"#bcbcbc",
"#c6c6c6",
"#d0d0d0",
"#dadada",
"#e4e4e4",
"#eeeeee"
]

23
highlight/Cargo.toml Normal file
View file

@ -0,0 +1,23 @@
[package]
name = "tree-sitter-highlight"
description = "Library for performing syntax highlighting with Tree-sitter"
version = "0.1.0"
authors = [
"Max Brunsfeld <maxbrunsfeld@gmail.com>",
"Tim Clem <timothy.clem@gmail.com>"
]
license = "MIT"
readme = "README.md"
edition = "2018"
keywords = ["incremental", "parsing", "syntax", "highlighting"]
categories = ["parsing", "text-editors"]
[dependencies]
regex = "1"
serde = "1.0"
serde_json = "1.0"
serde_derive = "1.0"
[dependencies.tree-sitter]
version = ">= 0.3.7"
path = "../lib"

53
highlight/src/escape.rs Normal file
View file

@ -0,0 +1,53 @@
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! HTML Escaping
//!
//! This module contains one unit-struct which can be used to HTML-escape a
//! string of text (for use in a format string).
use std::fmt;
/// Wrapper struct which will emit the HTML-escaped version of the contained
/// string when passed to a format string.
pub struct Escape<'a>(pub &'a str);
impl<'a> fmt::Display for Escape<'a> {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
// Because the internet is always right, turns out there's not that many
// characters to escape: http://stackoverflow.com/questions/7381974
let Escape(s) = *self;
let pile_o_bits = s;
let mut last = 0;
for (i, ch) in s.bytes().enumerate() {
match ch as char {
'<' | '>' | '&' | '\'' | '"' => {
fmt.write_str(&pile_o_bits[last..i])?;
let s = match ch as char {
'>' => "&gt;",
'<' => "&lt;",
'&' => "&amp;",
'\'' => "&#39;",
'"' => "&quot;",
_ => unreachable!(),
};
fmt.write_str(s)?;
last = i + 1;
}
_ => {}
}
}
if last < s.len() {
fmt.write_str(&pile_o_bits[last..])?;
}
Ok(())
}
}

823
highlight/src/lib.rs Normal file
View file

@ -0,0 +1,823 @@
mod escape;
use serde::{Deserialize, Deserializer};
use serde_derive::*;
use std::cmp;
use std::fmt::Write;
use std::mem::transmute;
use std::str;
use std::usize;
use tree_sitter::{Language, Node, Parser, Point, PropertySheet, Range, Tree, TreePropertyCursor};
pub trait LanguageRegistry {
fn language_for_injection_string<'a>(
&'a self,
s: &str,
) -> Option<(Language, &'a PropertySheet<Properties>)>;
}
#[derive(Debug)]
enum TreeStep {
Child {
index: isize,
kinds: Option<Vec<u16>>,
},
Children {
kinds: Option<Vec<u16>>,
},
Next {
kinds: Option<Vec<u16>>,
},
}
#[derive(Debug)]
enum InjectionLanguage {
Literal(String),
TreePath(Vec<TreeStep>),
}
#[derive(Debug)]
struct Injection {
language: InjectionLanguage,
content: Vec<TreeStep>,
}
#[derive(Debug)]
pub struct Properties {
scope: Option<Scope>,
injections: Vec<Injection>,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[repr(u16)]
pub enum Scope {
Attribute,
Comment,
Constant,
ConstantBuiltin,
Constructor,
ConstructorBuiltin,
Embedded,
Escape,
Function,
FunctionBuiltin,
Keyword,
Number,
Operator,
Property,
PropertyBuiltin,
Punctuation,
PunctuationBracket,
PunctuationDelimiter,
PunctuationSpecial,
String,
StringSpecial,
Tag,
Type,
TypeBuiltin,
Variable,
VariableBuiltin,
Unknown,
}
struct Layer<'a> {
_tree: Tree,
cursor: TreePropertyCursor<'a, Properties>,
ranges: Vec<Range>,
at_node_end: bool,
}
struct Highlighter<'a, T: LanguageRegistry> {
language_registry: &'a T,
source: &'a [u8],
source_offset: usize,
parser: Parser,
layers: Vec<Layer<'a>>,
utf8_error_len: Option<usize>,
}
#[derive(Copy, Clone, Debug)]
pub enum HighlightEvent<'a> {
Source(&'a str),
ScopeStart(Scope),
ScopeEnd(Scope),
}
#[derive(Debug, Deserialize)]
#[serde(untagged)]
enum TreePathArgJSON {
TreePath(TreePathJSON),
Number(isize),
String(String),
}
#[derive(Debug, Deserialize)]
#[serde(tag = "name")]
enum TreePathJSON {
#[serde(rename = "this")]
This,
#[serde(rename = "child")]
Child { args: Vec<TreePathArgJSON> },
#[serde(rename = "next")]
Next { args: Vec<TreePathArgJSON> },
#[serde(rename = "children")]
Children { args: Vec<TreePathArgJSON> },
}
#[derive(Debug, Deserialize)]
#[serde(untagged)]
enum InjectionLanguageJSON {
List(Vec<InjectionLanguageJSON>),
TreePath(TreePathJSON),
Literal(String),
}
#[derive(Debug, Deserialize)]
#[serde(untagged)]
enum InjectionContentJSON {
List(Vec<InjectionContentJSON>),
TreePath(TreePathJSON),
}
#[derive(Debug, Deserialize)]
struct PropertiesJSON {
scope: Option<Scope>,
#[serde(rename = "injection-language")]
injection_language: Option<InjectionLanguageJSON>,
#[serde(rename = "injection-content")]
injection_content: Option<InjectionContentJSON>,
}
#[derive(Debug)]
pub enum PropertySheetError {
InvalidJSON(serde_json::Error),
InvalidRegex(regex::Error),
InvalidFormat(String),
}
pub fn load_property_sheet(
language: Language,
json: &str,
) -> Result<PropertySheet<Properties>, PropertySheetError> {
let sheet = PropertySheet::new(language, json).map_err(|e| match e {
tree_sitter::PropertySheetError::InvalidJSON(e) => PropertySheetError::InvalidJSON(e),
tree_sitter::PropertySheetError::InvalidRegex(e) => PropertySheetError::InvalidRegex(e),
})?;
let sheet = sheet
.map(|p| Properties::new(p, language))
.map_err(PropertySheetError::InvalidFormat)?;
Ok(sheet)
}
impl Scope {
pub fn from_usize(i: usize) -> Option<Self> {
if i <= (Scope::Unknown as usize) {
Some(unsafe { transmute(i as u16) })
} else {
None
}
}
}
impl Properties {
fn new(json: PropertiesJSON, language: Language) -> Result<Self, String> {
let injections = match (json.injection_language, json.injection_content) {
(None, None) => Ok(Vec::new()),
(Some(_), None) => Err(
"Must specify an injection-content along with an injection-language".to_string(),
),
(None, Some(_)) => Err(
"Must specify an injection-language along with an injection-content".to_string(),
),
(Some(language_json), Some(content_json)) => {
let languages = match language_json {
InjectionLanguageJSON::List(list) => {
let mut result = Vec::with_capacity(list.len());
for element in list {
result.push(match element {
InjectionLanguageJSON::TreePath(p) => {
let mut result = Vec::new();
Self::flatten_tree_path(p, &mut result, language)?;
InjectionLanguage::TreePath(result)
}
InjectionLanguageJSON::Literal(s) => InjectionLanguage::Literal(s),
InjectionLanguageJSON::List(_) => {
panic!("Injection-language cannot be a list of lists")
}
})
}
result
}
InjectionLanguageJSON::TreePath(p) => vec![{
let mut result = Vec::new();
Self::flatten_tree_path(p, &mut result, language)?;
InjectionLanguage::TreePath(result)
}],
InjectionLanguageJSON::Literal(s) => vec![InjectionLanguage::Literal(s)],
};
let contents = match content_json {
InjectionContentJSON::List(l) => {
let mut result = Vec::with_capacity(l.len());
for element in l {
result.push(match element {
InjectionContentJSON::TreePath(p) => {
let mut result = Vec::new();
Self::flatten_tree_path(p, &mut result, language)?;
result
}
InjectionContentJSON::List(_) => {
panic!("Injection-content cannot be a list of lists")
}
})
}
result
}
InjectionContentJSON::TreePath(p) => vec![{
let mut result = Vec::new();
Self::flatten_tree_path(p, &mut result, language)?;
result
}],
};
if languages.len() == contents.len() {
Ok(languages
.into_iter()
.zip(contents.into_iter())
.map(|(language, content)| Injection { language, content })
.collect())
} else {
Err(format!(
"Mismatch: got {} injection-language values but {} injection-content values",
languages.len(),
contents.len(),
))
}
}
}?;
Ok(Self {
scope: json.scope,
injections,
})
}
// Transform a tree path from the format expressed directly in the property sheet
// (nested function calls), to a flat sequence of steps for transforming a list of
// nodes. This way, we can evaluate these tree paths with no recursion and a single
// vector of intermediate storage.
fn flatten_tree_path(
p: TreePathJSON,
steps: &mut Vec<TreeStep>,
language: Language,
) -> Result<(), String> {
match p {
TreePathJSON::This => {}
TreePathJSON::Child { args } => {
let (tree_path, index, kinds) = Self::parse_args("child", args, language)?;
Self::flatten_tree_path(tree_path, steps, language)?;
steps.push(TreeStep::Child {
index: index
.ok_or_else(|| "The `child` function requires an index".to_string())?,
kinds: kinds,
});
}
TreePathJSON::Children { args } => {
let (tree_path, _, kinds) = Self::parse_args("children", args, language)?;
Self::flatten_tree_path(tree_path, steps, language)?;
steps.push(TreeStep::Children { kinds });
}
TreePathJSON::Next { args } => {
let (tree_path, _, kinds) = Self::parse_args("next", args, language)?;
Self::flatten_tree_path(tree_path, steps, language)?;
steps.push(TreeStep::Next { kinds });
}
}
Ok(())
}
fn parse_args(
name: &str,
args: Vec<TreePathArgJSON>,
language: Language,
) -> Result<(TreePathJSON, Option<isize>, Option<Vec<u16>>), String> {
let tree_path;
let mut index = None;
let mut kinds = Vec::new();
let mut iter = args.into_iter();
match iter.next() {
Some(TreePathArgJSON::TreePath(p)) => tree_path = p,
_ => {
return Err(format!(
"First argument to `{}()` must be a tree path",
name
));
}
}
for arg in iter {
match arg {
TreePathArgJSON::TreePath(_) => {
return Err(format!(
"Other arguments to `{}()` must be strings or numbers",
name
));
}
TreePathArgJSON::Number(i) => index = Some(i),
TreePathArgJSON::String(s) => kinds.push(s),
}
}
if kinds.len() > 0 {
let mut kind_ids = Vec::new();
for i in 0..(language.node_kind_count() as u16) {
if kinds.iter().any(|s| s == language.node_kind_for_id(i))
&& language.node_kind_is_named(i)
{
kind_ids.push(i);
}
}
if kind_ids.len() == 0 {
return Err(format!("Non-existent node kinds: {:?}", kinds));
}
Ok((tree_path, index, Some(kind_ids)))
} else {
Ok((tree_path, index, None))
}
}
}
impl<'a, T: LanguageRegistry> Highlighter<'a, T> {
fn new(
language_registry: &'a T,
source: &'a [u8],
language: Language,
property_sheet: &'a PropertySheet<Properties>,
) -> Result<Self, String> {
let mut parser = Parser::new();
parser.set_language(language)?;
let tree = parser
.parse(source, None)
.ok_or_else(|| format!("Tree-sitter: failed to parse"))?;
Ok(Self {
language_registry,
source,
source_offset: 0,
parser,
layers: vec![Layer::new(
source,
tree,
property_sheet,
vec![Range {
start_byte: 0,
end_byte: usize::MAX,
start_point: Point::new(0, 0),
end_point: Point::new(usize::MAX, usize::MAX),
}],
)],
utf8_error_len: None,
})
}
fn emit_source(&mut self, next_offset: usize) -> Option<HighlightEvent<'a>> {
let input = &self.source[self.source_offset..next_offset];
match str::from_utf8(input) {
Ok(valid) => {
self.source_offset = next_offset;
Some(HighlightEvent::Source(valid))
}
Err(error) => {
if let Some(error_len) = error.error_len() {
if error.valid_up_to() > 0 {
let prefix = &input[0..error.valid_up_to()];
self.utf8_error_len = Some(error_len);
Some(HighlightEvent::Source(unsafe {
str::from_utf8_unchecked(prefix)
}))
} else {
self.source_offset += error_len;
Some(HighlightEvent::Source("\u{FFFD}"))
}
} else {
None
}
}
}
}
fn process_tree_step(&self, step: &TreeStep, nodes: &mut Vec<Node>) {
let len = nodes.len();
for i in 0..len {
let node = nodes[i];
match step {
TreeStep::Child { index, kinds } => {
let index = if *index >= 0 {
*index as usize
} else {
(node.child_count() as isize + *index) as usize
};
if let Some(child) = node.child(index) {
if let Some(kinds) = kinds {
if kinds.contains(&child.kind_id()) {
nodes.push(child);
}
} else {
nodes.push(child);
}
}
}
TreeStep::Children { kinds } => {
for child in node.children() {
if let Some(kinds) = kinds {
if kinds.contains(&child.kind_id()) {
nodes.push(child);
}
} else {
nodes.push(child);
}
}
}
TreeStep::Next { .. } => unimplemented!(),
}
}
nodes.drain(0..len);
}
fn nodes_for_tree_path(&self, node: Node<'a>, steps: &Vec<TreeStep>) -> Vec<Node<'a>> {
let mut nodes = vec![node];
for step in steps.iter() {
self.process_tree_step(step, &mut nodes);
}
nodes
}
// An injected language name may either be specified as a fixed string, or based
// on the text of some node in the syntax tree.
fn injection_language_string(
&self,
node: &Node,
language: &InjectionLanguage,
) -> Option<String> {
match language {
InjectionLanguage::Literal(s) => Some(s.to_string()),
InjectionLanguage::TreePath(steps) => self
.nodes_for_tree_path(*node, steps)
.first()
.and_then(|node| {
str::from_utf8(&self.source[node.start_byte()..node.end_byte()])
.map(|s| s.to_owned())
.ok()
}),
}
}
// Compute the ranges that should be included when parsing an injection.
// This takes into account two things:
// * `nodes` - Every injection takes place within a set of nodes. The injection ranges
// are the ranges of those nodes, *minus* the ranges of those nodes' children.
// * `parent_ranges` - The new injection may be nested inside of *another* injection
// (e.g. JavaScript within HTML within ERB). The parent injection's ranges must
// be taken into account.
fn intersect_ranges(parent_ranges: &Vec<Range>, nodes: &Vec<Node>) -> Vec<Range> {
let mut result = Vec::new();
let mut parent_range_iter = parent_ranges.iter();
let mut parent_range = parent_range_iter
.next()
.expect("Layers should only be constructed with non-empty ranges vectors");
for node in nodes.iter() {
let range = node.range();
let mut preceding_range = Range {
start_byte: 0,
start_point: Point::new(0, 0),
end_byte: range.start_byte,
end_point: range.start_point,
};
let following_range = Range {
start_byte: node.end_byte(),
start_point: node.end_position(),
end_byte: usize::MAX,
end_point: Point::new(usize::MAX, usize::MAX),
};
for child_range in node
.children()
.map(|c| c.range())
.chain([following_range].iter().cloned())
{
let mut range = Range {
start_byte: preceding_range.end_byte,
start_point: preceding_range.end_point,
end_byte: child_range.start_byte,
end_point: child_range.start_point,
};
preceding_range = child_range;
if range.end_byte < parent_range.start_byte {
continue;
}
while parent_range.start_byte <= range.end_byte {
if parent_range.end_byte > range.start_byte {
if range.start_byte < parent_range.start_byte {
range.start_byte = parent_range.start_byte;
range.start_point = parent_range.start_point;
}
if parent_range.end_byte < range.end_byte {
if range.start_byte < parent_range.end_byte {
result.push(Range {
start_byte: range.start_byte,
start_point: range.start_point,
end_byte: parent_range.end_byte,
end_point: parent_range.end_point,
});
}
range.start_byte = parent_range.end_byte;
range.start_point = parent_range.end_point;
} else {
if range.start_byte < range.end_byte {
result.push(range);
}
break;
}
}
if let Some(next_range) = parent_range_iter.next() {
parent_range = next_range;
} else {
return result;
}
}
}
}
result
}
fn add_layer(&mut self, language_string: &str, ranges: Vec<Range>) {
if let Some((language, property_sheet)) = self
.language_registry
.language_for_injection_string(language_string)
{
self.parser
.set_language(language)
.expect("Failed to set language");
self.parser.set_included_ranges(&ranges);
let tree = self
.parser
.parse(self.source, None)
.expect("Failed to parse");
let layer = Layer::new(self.source, tree, property_sheet, ranges);
match self
.layers
.binary_search_by_key(&(layer.offset(), 1), |l| (l.offset(), 0))
{
Ok(i) | Err(i) => self.layers.insert(i, layer),
};
}
}
}
impl<'a, T: LanguageRegistry> Iterator for Highlighter<'a, T> {
type Item = HighlightEvent<'a>;
fn next(&mut self) -> Option<Self::Item> {
if let Some(utf8_error_len) = self.utf8_error_len.take() {
self.source_offset += utf8_error_len;
return Some(HighlightEvent::Source("\u{FFFD}"));
}
while !self.layers.is_empty() {
let first_layer = &self.layers[0];
let properties = &first_layer.cursor.node_properties();
// Add any injections for the current node.
if !first_layer.at_node_end {
let node = first_layer.cursor.node();
let injections = properties
.injections
.iter()
.filter_map(|Injection { language, content }| {
if let Some(language) = self.injection_language_string(&node, language) {
let nodes = self.nodes_for_tree_path(node, content);
let ranges = Self::intersect_ranges(&first_layer.ranges, &nodes);
if ranges.len() > 0 {
return Some((language, ranges));
}
}
None
})
.collect::<Vec<_>>();
for (language, ranges) in injections {
self.add_layer(&language, ranges);
}
}
// Determine if any scopes start or end at the current position.
let scope_event;
if let Some(scope) = properties.scope {
let next_offset = cmp::min(self.source.len(), self.layers[0].offset());
// Before returning any scope boundaries, return any remaining slice of
// the source code the precedes that scope boundary.
if self.source_offset < next_offset {
return self.emit_source(next_offset);
}
scope_event = if self.layers[0].at_node_end {
Some(HighlightEvent::ScopeEnd(scope))
} else {
Some(HighlightEvent::ScopeStart(scope))
};
} else {
scope_event = None;
};
// Advance the current layer's tree cursor. This might cause that cursor to move
// beyond one of the other layers' cursors for a different syntax tree, so we need
// to re-sort the layers. If the cursor is already at the end of its syntax tree,
// remove it.
if self.layers[0].advance() {
self.layers.sort_unstable_by_key(|layer| layer.offset());
} else {
self.layers.remove(0);
}
if scope_event.is_some() {
return scope_event;
}
}
if self.source_offset < self.source.len() {
self.emit_source(self.source.len())
} else {
None
}
}
}
impl<'a> Layer<'a> {
fn new(
source: &'a [u8],
tree: Tree,
sheet: &'a PropertySheet<Properties>,
ranges: Vec<Range>,
) -> Self {
// The cursor's lifetime parameter indicates that the tree must outlive the cursor.
// But because the tree is really a pointer to the heap, the cursor can remain
// valid when the tree is moved. There's no way to express this with lifetimes
// right now, so we have to `transmute` the cursor's lifetime.
let cursor = unsafe { transmute(tree.walk_with_properties(sheet, source)) };
Self {
_tree: tree,
cursor,
ranges,
at_node_end: false,
}
}
fn offset(&self) -> usize {
if self.at_node_end {
self.cursor.node().end_byte()
} else {
self.cursor.node().start_byte()
}
}
fn advance(&mut self) -> bool {
if self.at_node_end {
if self.cursor.goto_next_sibling() {
self.at_node_end = false;
} else if !self.cursor.goto_parent() {
return false;
}
} else if !self.cursor.goto_first_child() {
self.at_node_end = true;
}
true
}
}
impl<'de> Deserialize<'de> for Scope {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
match s.as_str() {
"attribute" => Ok(Scope::Attribute),
"comment" => Ok(Scope::Comment),
"constant" => Ok(Scope::Constant),
"constant.builtin" => Ok(Scope::ConstantBuiltin),
"constructor" => Ok(Scope::Constructor),
"constructor.builtin" => Ok(Scope::ConstructorBuiltin),
"embedded" => Ok(Scope::Embedded),
"escape" => Ok(Scope::Escape),
"function" => Ok(Scope::Function),
"function.builtin" => Ok(Scope::FunctionBuiltin),
"keyword" => Ok(Scope::Keyword),
"number" => Ok(Scope::Number),
"operator" => Ok(Scope::Operator),
"property" => Ok(Scope::Property),
"property.builtin" => Ok(Scope::PropertyBuiltin),
"punctuation" => Ok(Scope::Punctuation),
"punctuation.bracket" => Ok(Scope::PunctuationBracket),
"punctuation.delimiter" => Ok(Scope::PunctuationDelimiter),
"punctuation.special" => Ok(Scope::PunctuationSpecial),
"string" => Ok(Scope::String),
"string.special" => Ok(Scope::StringSpecial),
"type" => Ok(Scope::Type),
"type.builtin" => Ok(Scope::TypeBuiltin),
"variable" => Ok(Scope::Variable),
"variable.builtin" => Ok(Scope::VariableBuiltin),
"tag" => Ok(Scope::Tag),
_ => Ok(Scope::Unknown),
}
}
}
pub fn highlight<'a, T: LanguageRegistry>(
language_registry: &'a T,
source: &'a [u8],
language: Language,
property_sheet: &'a PropertySheet<Properties>,
) -> Result<impl Iterator<Item = HighlightEvent<'a>> + 'a, String> {
Highlighter::new(language_registry, source, language, property_sheet)
}
pub fn highlight_html<'a, T: LanguageRegistry, F: Fn(Scope) -> &'a str>(
language_registry: &'a T,
source: &'a [u8],
language: Language,
property_sheet: &'a PropertySheet<Properties>,
attribute_callback: F,
) -> Result<Vec<String>, String> {
let highlighter = Highlighter::new(language_registry, source, language, property_sheet)?;
let mut renderer = HtmlRenderer::new(attribute_callback);
let mut scopes = Vec::new();
for event in highlighter {
match event {
HighlightEvent::ScopeStart(s) => {
scopes.push(s);
renderer.start_scope(s);
}
HighlightEvent::ScopeEnd(s) => {
assert_eq!(scopes.pop(), Some(s));
renderer.end_scope();
}
HighlightEvent::Source(src) => {
renderer.render_line(src, &scopes);
}
};
}
renderer.flush();
Ok(renderer.result)
}
struct HtmlRenderer<'a, F: Fn(Scope) -> &'a str> {
result: Vec<String>,
buffer: String,
attribute_callback: F,
}
impl<'a, F: Fn(Scope) -> &'a str> HtmlRenderer<'a, F> {
fn new(attribute_callback: F) -> Self {
HtmlRenderer {
result: Vec::new(),
buffer: String::new(),
attribute_callback,
}
}
fn start_scope(&mut self, s: Scope) {
write!(&mut self.buffer, "<span {}>", (self.attribute_callback)(s),).unwrap();
}
fn end_scope(&mut self) {
write!(&mut self.buffer, "</span>").unwrap();
}
fn flush(&mut self) {
if !self.buffer.is_empty() {
self.buffer.push('\n');
self.result.push(self.buffer.clone());
self.buffer.clear();
}
}
fn render_line(&mut self, src: &str, scopes: &Vec<Scope>) {
let mut multiline = false;
for line in src.split('\n') {
let line = line.trim_end_matches('\r');
if multiline {
scopes.iter().for_each(|_| self.end_scope());
self.flush();
scopes.iter().for_each(|scope| self.start_scope(*scope));
}
write!(&mut self.buffer, "{}", escape::Escape(line)).unwrap();
multiline = true;
}
}
}