Add a highlight subcommand

This commit is contained in:
Max Brunsfeld 2019-02-19 11:24:50 -08:00
parent 0dd15e2b02
commit e89b6b2402
14 changed files with 1870 additions and 22 deletions

View file

@ -1,4 +1,5 @@
use std::io;
use tree_sitter_highlight::PropertySheetError;
#[derive(Debug)]
pub struct Error(pub String);
@ -42,3 +43,13 @@ impl From<String> for Error {
Error(error)
}
}
impl From<PropertySheetError> for Error {
fn from(error: PropertySheetError) -> Self {
match error {
PropertySheetError::InvalidFormat(e) => Self::from(e),
PropertySheetError::InvalidRegex(e) => Self::regex(&e.to_string()),
PropertySheetError::InvalidJSON(e) => Self::from(e),
}
}
}

272
cli/src/highlight.rs Normal file
View file

@ -0,0 +1,272 @@
use crate::error::Result;
use crate::loader::Loader;
use ansi_term::{Color, Style};
use lazy_static::lazy_static;
use serde_json::Value;
use std::collections::HashMap;
use std::{fmt, fs, io, mem, path};
use tree_sitter::{Language, PropertySheet};
use tree_sitter_highlight::{highlight, highlight_html, HighlightEvent, Properties, Scope};
lazy_static! {
static ref CSS_STYLES_BY_COLOR_ID: Vec<String> =
serde_json::from_str(include_str!("../vendor/xterm-colors.json")).unwrap();
}
pub struct Theme {
ansi_styles: Vec<Option<Style>>,
css_styles: Vec<Option<String>>,
}
impl Theme {
pub fn load(path: &path::Path) -> io::Result<Self> {
let json = fs::read_to_string(path)?;
Ok(Self::new(&json))
}
pub fn new(json: &str) -> Self {
let mut ansi_styles = vec![None; 30];
let mut css_styles = vec![None; 30];
if let Ok(colors) = serde_json::from_str::<HashMap<Scope, Value>>(json) {
for (scope, style_value) in colors {
let mut style = Style::default();
parse_style(&mut style, style_value);
ansi_styles[scope as usize] = Some(style);
css_styles[scope as usize] = Some(style_to_css(style));
}
}
Self {
ansi_styles,
css_styles,
}
}
fn ansi_style(&self, scope: Scope) -> Option<&Style> {
self.ansi_styles[scope as usize].as_ref()
}
fn css_style(&self, scope: Scope) -> Option<&str> {
self.css_styles[scope as usize].as_ref().map(|s| s.as_str())
}
}
impl Default for Theme {
fn default() -> Self {
Theme::new(
r#"
{
"attribute": {"color": 124, "italic": true},
"comment": {"color": 245, "italic": true},
"constant.builtin": {"color": 94, "bold": true},
"constant": 94,
"constructor": 136,
"embedded": null,
"function.builtin": {"color": 26, "bold": true},
"function": 26,
"keyword": 56,
"number": {"color": 94, "bold": true},
"property": 124,
"operator": {"color": 239, "bold": true},
"punctuation.bracket": 239,
"punctuation.delimiter": 239,
"string.special": 30,
"string": 28,
"tag": {"color": 18},
"variable.builtin": {"bold": true}
}
"#,
)
}
}
impl fmt::Debug for Theme {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{{")?;
let mut first = true;
for (i, style) in self.ansi_styles.iter().enumerate() {
if let Some(style) = style {
let scope = Scope::from_usize(i).unwrap();
if !first {
write!(f, ", ")?;
}
write!(f, "{:?}: {:?}", scope, style)?;
first = false;
}
}
write!(f, "}}")?;
Ok(())
}
}
fn parse_style(style: &mut Style, json: Value) {
if let Value::Object(entries) = json {
for (property_name, value) in entries {
match property_name.as_str() {
"italic" => *style = style.italic(),
"bold" => *style = style.bold(),
"dimmed" => *style = style.dimmed(),
"underline" => *style = style.underline(),
"color" => {
if let Some(color) = parse_color(value) {
*style = style.fg(color);
}
}
_ => {}
}
}
} else if let Some(color) = parse_color(json) {
*style = style.fg(color);
}
}
fn parse_color(json: Value) -> Option<Color> {
match json {
Value::Number(n) => match n.as_u64() {
Some(n) => Some(Color::Fixed(n as u8)),
_ => None,
},
Value::String(s) => match s.to_lowercase().as_str() {
"blue" => Some(Color::Blue),
"cyan" => Some(Color::Cyan),
"green" => Some(Color::Green),
"purple" => Some(Color::Purple),
"red" => Some(Color::Red),
"white" => Some(Color::White),
"yellow" => Some(Color::Yellow),
s => {
if s.starts_with("#") && s.len() >= 7 {
if let (Ok(red), Ok(green), Ok(blue)) = (
u8::from_str_radix(&s[1..3], 16),
u8::from_str_radix(&s[3..5], 16),
u8::from_str_radix(&s[5..7], 16),
) {
Some(Color::RGB(red, green, blue))
} else {
None
}
} else {
None
}
}
},
_ => None,
}
}
fn style_to_css(style: Style) -> String {
use std::fmt::Write;
let mut result = "style='".to_string();
if style.is_bold {
write!(&mut result, "font-weight: bold;").unwrap();
}
if style.is_italic {
write!(&mut result, "font-style: italic;").unwrap();
}
if let Some(color) = style.foreground {
write!(&mut result, "color: {};", color_to_css(color)).unwrap();
}
result.push('\'');
result
}
fn color_to_css(color: Color) -> &'static str {
match color {
Color::Black => "black",
Color::Blue => "blue",
Color::Red => "red",
Color::Green => "green",
Color::Yellow => "yellow",
Color::Cyan => "cyan",
Color::Purple => "purple",
Color::White => "white",
Color::Fixed(n) => CSS_STYLES_BY_COLOR_ID[n as usize].as_str(),
_ => panic!("Unsupported color type"),
}
}
pub fn ansi(
loader: &Loader,
theme: &Theme,
source: &[u8],
language: Language,
property_sheet: &PropertySheet<Properties>,
) -> Result<()> {
use std::io::Write;
let stdout = io::stdout();
let mut stdout = stdout.lock();
let mut scope_stack = Vec::new();
for event in highlight(loader, source, language, property_sheet)? {
match event {
HighlightEvent::Source(s) => {
if let Some(style) = scope_stack.last().and_then(|s| theme.ansi_style(*s)) {
write!(&mut stdout, "{}", style.paint(s))?;
} else {
write!(&mut stdout, "{}", s)?;
}
}
HighlightEvent::ScopeStart(s) => {
scope_stack.push(s);
}
HighlightEvent::ScopeEnd(_) => {
scope_stack.pop();
}
}
}
Ok(())
}
pub const HTML_HEADER: &'static str = "
<!doctype HTML>
<head>
<title>Tree-sitter Highlighting</title>
<style>
body {
font-family: monospace
}
.line-number {
user-select: none;
text-align: right;
color: rgba(27,31,35,.3);
padding: 0 10px;
}
.line {
white-space: pre;
}
</style>
</head>
<body>
";
pub const HTML_FOOTER: &'static str = "
</body>
";
pub fn html(
loader: &Loader,
theme: &Theme,
source: &[u8],
language: Language,
property_sheet: &PropertySheet<Properties>,
) -> Result<()> {
use std::io::Write;
let stdout = io::stdout();
let mut stdout = stdout.lock();
write!(&mut stdout, "<table>\n")?;
let lines = highlight_html(loader, source, language, property_sheet, |scope| {
if let Some(css_style) = theme.css_style(scope) {
css_style
} else {
""
}
})?;
for (i, line) in lines.into_iter().enumerate() {
write!(
&mut stdout,
"<tr><td class=line-number>{}</td><td class=line>{}</td></tr>\n",
i + 1,
line
)?;
}
write!(&mut stdout, "</table>\n")?;
Ok(())
}

View file

@ -1,5 +1,6 @@
pub mod error;
pub mod generate;
pub mod highlight;
pub mod loader;
pub mod logger;
pub mod parse;

View file

@ -1,5 +1,6 @@
use super::error::{Error, Result};
use libloading::{Library, Symbol};
use once_cell::unsync::OnceCell;
use regex::{Regex, RegexBuilder};
use serde_derive::Deserialize;
use std::collections::HashMap;
@ -9,6 +10,7 @@ use std::process::Command;
use std::time::SystemTime;
use std::{fs, mem};
use tree_sitter::{Language, PropertySheet};
use tree_sitter_highlight::{load_property_sheet, LanguageRegistry, Properties};
#[cfg(unix)]
const DYLIB_EXTENSION: &'static str = "so";
@ -20,16 +22,18 @@ const BUILD_TARGET: &'static str = env!("BUILD_TARGET");
struct LanguageRepo {
path: PathBuf,
language: Option<Language>,
language: OnceCell<Language>,
configurations: Vec<LanguageConfiguration>,
}
pub struct LanguageConfiguration {
_name: String,
pub name: String,
_content_regex: Option<Regex>,
_first_line_regex: Option<Regex>,
injection_regex: Option<Regex>,
file_types: Vec<String>,
_highlight_property_sheet: Option<std::result::Result<PropertySheet, PathBuf>>,
highlight_property_sheet_path: Option<PathBuf>,
highlight_property_sheet: OnceCell<Option<PropertySheet<Properties>>>,
}
pub struct Loader {
@ -76,7 +80,7 @@ impl Loader {
}
pub fn language_configuration_for_file_name(
&mut self,
&self,
path: &Path,
) -> Result<Option<(Language, &LanguageConfiguration)>> {
let ids = path
@ -100,20 +104,43 @@ impl Loader {
Ok(None)
}
pub fn language_configuration_for_injection_string(
&self,
string: &str,
) -> Result<Option<(Language, &LanguageConfiguration)>> {
let mut best_match_length = 0;
let mut best_match_position = None;
for (i, repo) in self.language_repos.iter().enumerate() {
for (j, configuration) in repo.configurations.iter().enumerate() {
if let Some(injection_regex) = &configuration.injection_regex {
if let Some(mat) = injection_regex.find(string) {
let length = mat.end() - mat.start();
if length > best_match_length {
best_match_position = Some((i, j));
best_match_length = length;
}
}
}
}
}
if let Some((i, j)) = best_match_position {
let (language, configurations) = self.language_configuration_for_id(i)?;
Ok(Some((language, &configurations[j])))
} else {
Ok(None)
}
}
fn language_configuration_for_id(
&mut self,
&self,
id: usize,
) -> Result<(Language, &Vec<LanguageConfiguration>)> {
let repo = &self.language_repos[id];
let language = if let Some(language) = repo.language {
language
} else {
let language = repo.language.get_or_try_init(|| {
let src_path = repo.path.join("src");
let language = self.load_language_at_path(&src_path, &src_path)?;
self.language_repos[id].language = Some(language);
language
};
Ok((language, &self.language_repos[id].configurations))
self.load_language_at_path(&src_path, &src_path)
})?;
Ok((*language, &self.language_repos[id].configurations))
}
pub fn load_language_at_path(&self, src_path: &Path, header_path: &Path) -> Result<Language> {
@ -238,6 +265,8 @@ impl Loader {
content_regex: Option<String>,
#[serde(rename = "first-line-regex")]
first_line_regex: Option<String>,
#[serde(rename = "injection-regex")]
injection_regex: Option<String>,
highlights: Option<String>,
}
@ -255,7 +284,7 @@ impl Loader {
configurations
.into_iter()
.map(|conf| LanguageConfiguration {
_name: conf.name,
name: conf.name,
file_types: conf.file_types.unwrap_or(Vec::new()),
_content_regex: conf
.content_regex
@ -263,7 +292,11 @@ impl Loader {
_first_line_regex: conf
.first_line_regex
.and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()),
_highlight_property_sheet: conf.highlights.map(|d| Err(d.into())),
injection_regex: conf
.injection_regex
.and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()),
highlight_property_sheet_path: conf.highlights.map(|h| parser_path.join(h)),
highlight_property_sheet: OnceCell::new(),
})
.collect()
});
@ -279,7 +312,7 @@ impl Loader {
self.language_repos.push(LanguageRepo {
path: parser_path.to_owned(),
language: None,
language: OnceCell::new(),
configurations,
});
@ -287,6 +320,56 @@ impl Loader {
}
}
impl LanguageRegistry for Loader {
fn language_for_injection_string<'a>(
&'a self,
string: &str,
) -> Option<(Language, &'a PropertySheet<Properties>)> {
match self.language_configuration_for_injection_string(string) {
Err(message) => {
eprintln!(
"Failed to load language for injection string '{}': {}",
string, message.0
);
None
}
Ok(None) => None,
Ok(Some((language, configuration))) => {
match configuration.highlight_property_sheet(language) {
Err(message) => {
eprintln!(
"Failed to load property sheet for injection string '{}': {}",
string, message.0
);
None
}
Ok(None) => None,
Ok(Some(sheet)) => Some((language, sheet)),
}
}
}
}
}
impl LanguageConfiguration {
pub fn highlight_property_sheet(
&self,
language: Language,
) -> Result<Option<&PropertySheet<Properties>>> {
self.highlight_property_sheet
.get_or_try_init(|| {
if let Some(path) = &self.highlight_property_sheet_path {
let sheet_json = fs::read_to_string(path)?;
let sheet = load_property_sheet(language, &sheet_json)?;
Ok(Some(sheet))
} else {
Ok(None)
}
})
.map(Option::as_ref)
}
}
fn needs_recompile(
lib_path: &Path,
parser_c_path: &Path,

View file

@ -4,8 +4,7 @@ use std::fs;
use std::path::Path;
use std::process::exit;
use std::usize;
use tree_sitter_cli::loader::Loader;
use tree_sitter_cli::{error, generate, logger, parse, properties, test};
use tree_sitter_cli::{error, generate, highlight, loader, logger, parse, properties, test};
fn main() {
if let Err(e) = run() {
@ -64,14 +63,30 @@ fn run() -> error::Result<()> {
.arg(Arg::with_name("debug").long("debug").short("d"))
.arg(Arg::with_name("debug-graph").long("debug-graph").short("D")),
)
.subcommand(
SubCommand::with_name("highlight")
.about("Highlight a file")
.arg(
Arg::with_name("path")
.index(1)
.multiple(true)
.required(true),
)
.arg(Arg::with_name("html").long("html").short("h")),
)
.get_matches();
let home_dir = dirs::home_dir().unwrap();
let current_dir = env::current_dir().unwrap();
let config_dir = home_dir.join(".tree-sitter");
let theme_path = config_dir.join("theme.json");
let parsers_dir = config_dir.join("parsers");
fs::create_dir_all(&config_dir).unwrap();
let mut loader = Loader::new(config_dir);
// TODO - make configurable
let parser_repo_paths = vec![home_dir.join("github")];
fs::create_dir_all(&parsers_dir).unwrap();
let mut loader = loader::Loader::new(config_dir);
if let Some(matches) = matches.subcommand_matches("generate") {
if matches.is_present("log") {
@ -111,7 +126,7 @@ fn run() -> error::Result<()> {
let debug_graph = matches.is_present("debug-graph");
let quiet = matches.is_present("quiet");
let time = matches.is_present("time");
loader.find_all_languages(&vec![home_dir.join("github")])?;
loader.find_all_languages(&parser_repo_paths)?;
let paths = matches
.values_of("path")
.unwrap()
@ -144,6 +159,29 @@ fn run() -> error::Result<()> {
if has_error {
return Err(error::Error(String::new()));
}
} else if let Some(matches) = matches.subcommand_matches("highlight") {
loader.find_all_languages(&parser_repo_paths)?;
let theme = highlight::Theme::load(&theme_path).unwrap_or_default();
let paths = matches.values_of("path").unwrap().into_iter();
let html_mode = matches.is_present("html");
if html_mode {
println!("{}", highlight::HTML_HEADER);
}
for path in paths {
let path = Path::new(path);
if let Some((language, config)) = loader.language_configuration_for_file_name(path)? {
if let Some(sheet) = config.highlight_property_sheet(language)? {
let source = fs::read(path)?;
if html_mode {
highlight::html(&loader, &theme, &source, language, sheet)?;
} else {
highlight::ansi(&loader, &theme, &source, language, sheet)?;
}
}
}
}
}
Ok(())

View file

@ -2,7 +2,8 @@ use crate::loader::Loader;
use lazy_static::lazy_static;
use std::fs;
use std::path::{Path, PathBuf};
use tree_sitter::Language;
use tree_sitter::{Language, PropertySheet};
use tree_sitter_highlight::{load_property_sheet, Properties};
include!("./dirs.rs");
@ -20,6 +21,16 @@ pub fn get_language(name: &str) -> Language {
.unwrap()
}
pub fn get_property_sheet(language_name: &str, sheet_name: &str) -> PropertySheet<Properties> {
let path = GRAMMARS_DIR
.join(language_name)
.join("src")
.join(sheet_name);
let json = fs::read_to_string(path).unwrap();
let language = get_language(language_name);
load_property_sheet(language, &json).unwrap()
}
pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> Language {
let parser_c_path = SCRATCH_DIR.join(&format!("{}-parser.c", name));
if !fs::read_to_string(&parser_c_path)

View file

@ -0,0 +1,191 @@
use super::helpers::fixtures::{get_language, get_property_sheet};
use lazy_static::lazy_static;
use tree_sitter::{Language, PropertySheet};
use tree_sitter_highlight::{
highlight, highlight_html, HighlightEvent, LanguageRegistry, Properties, Scope,
};
lazy_static! {
static ref JS_SHEET: PropertySheet<Properties> =
get_property_sheet("javascript", "highlights.json");
static ref HTML_SHEET: PropertySheet<Properties> =
get_property_sheet("html", "highlights.json");
static ref SCOPE_CLASS_STRINGS: Vec<String> = {
let mut result = Vec::new();
let mut i = 0;
while let Some(scope) = Scope::from_usize(i) {
result.push(format!("class={:?}", scope));
i += 1;
}
result
};
}
#[test]
fn test_highlighting_injected_html_in_javascript() {
let source = vec!["const s = html `<div>${a < b}</div>`;"].join("\n");
assert_eq!(
&to_token_vector(&source, get_language("javascript"), &JS_SHEET).unwrap(),
&[vec![
("const", vec![Scope::Keyword]),
(" ", vec![]),
("s", vec![Scope::Variable]),
(" ", vec![]),
("=", vec![Scope::Operator]),
(" ", vec![]),
("html", vec![Scope::Function]),
(" ", vec![]),
("`<", vec![Scope::String]),
("div", vec![Scope::String, Scope::Tag]),
(">", vec![Scope::String]),
(
"${",
vec![Scope::String, Scope::Embedded, Scope::PunctuationSpecial]
),
("a", vec![Scope::String, Scope::Embedded, Scope::Variable]),
(" ", vec![Scope::String, Scope::Embedded]),
("<", vec![Scope::String, Scope::Embedded, Scope::Operator]),
(" ", vec![Scope::String, Scope::Embedded]),
("b", vec![Scope::String, Scope::Embedded, Scope::Variable]),
(
"}",
vec![Scope::String, Scope::Embedded, Scope::PunctuationSpecial]
),
("</", vec![Scope::String]),
("div", vec![Scope::String, Scope::Tag]),
(">`", vec![Scope::String]),
(";", vec![Scope::PunctuationDelimiter]),
]]
);
}
#[test]
fn test_highlighting_injected_javascript_in_html() {
let source = vec![
"<body>",
" <script>",
" const x = new Thing();",
" </script>",
"</body>",
]
.join("\n");
assert_eq!(
&to_token_vector(&source, get_language("html"), &HTML_SHEET).unwrap(),
&[
vec![("<", vec![]), ("body", vec![Scope::Tag]), (">", vec![]),],
vec![(" <", vec![]), ("script", vec![Scope::Tag]), (">", vec![]),],
vec![
(" ", vec![]),
("const", vec![Scope::Keyword]),
(" ", vec![]),
("x", vec![Scope::Variable]),
(" ", vec![]),
("=", vec![Scope::Operator]),
(" ", vec![]),
("new", vec![Scope::Keyword]),
(" ", vec![]),
("Thing", vec![Scope::Constructor]),
("(", vec![Scope::PunctuationBracket]),
(")", vec![Scope::PunctuationBracket]),
(";", vec![Scope::PunctuationDelimiter]),
],
vec![
(" </", vec![]),
("script", vec![Scope::Tag]),
(">", vec![]),
],
vec![("</", vec![]), ("body", vec![Scope::Tag]), (">", vec![]),],
]
);
}
#[test]
fn test_highlighting_multiline_scopes_to_html() {
let source = vec![
"const SOMETHING = `",
" one ${",
" two()",
" } three",
"`",
]
.join("\n");
assert_eq!(
&to_html(&source, get_language("javascript"), &JS_SHEET,).unwrap(),
&[
"<span class=Keyword>const</span> <span class=Constant>SOMETHING</span> <span class=Operator>=</span> <span class=String>`</span>\n".to_string(),
"<span class=String> one <span class=Embedded><span class=PunctuationSpecial>${</span></span></span>\n".to_string(),
"<span class=String><span class=Embedded> <span class=Function>two</span><span class=PunctuationBracket>(</span><span class=PunctuationBracket>)</span></span></span>\n".to_string(),
"<span class=String><span class=Embedded> <span class=PunctuationSpecial>}</span></span> three</span>\n".to_string(),
"<span class=String>`</span>\n".to_string(),
]
);
}
struct TestLanguageRegistry;
impl LanguageRegistry for TestLanguageRegistry {
fn language_for_injection_string(
&self,
string: &str,
) -> Option<(Language, &PropertySheet<Properties>)> {
match string {
"javascript" => Some((get_language("javascript"), &JS_SHEET)),
"html" => Some((get_language("html"), &HTML_SHEET)),
_ => None,
}
}
}
fn to_html<'a>(
src: &'a str,
language: Language,
property_sheet: &'a PropertySheet<Properties>,
) -> Result<Vec<String>, String> {
highlight_html(
&TestLanguageRegistry,
src.as_bytes(),
language,
property_sheet,
|scope| SCOPE_CLASS_STRINGS[scope as usize].as_str(),
)
}
fn to_token_vector<'a>(
src: &'a str,
language: Language,
property_sheet: &'a PropertySheet<Properties>,
) -> Result<Vec<Vec<(&'a str, Vec<Scope>)>>, String> {
let mut lines = Vec::new();
let mut scopes = Vec::new();
let mut line = Vec::new();
for event in highlight(
&TestLanguageRegistry,
src.as_bytes(),
language,
property_sheet,
)? {
match event {
HighlightEvent::ScopeStart(s) => scopes.push(s),
HighlightEvent::ScopeEnd(s) => {
assert_eq!(*scopes.last().unwrap(), s);
scopes.pop();
}
HighlightEvent::Source(s) => {
for (i, l) in s.lines().enumerate() {
if i > 0 {
lines.push(line);
line = Vec::new();
}
if l.len() > 0 {
line.push((l, scopes.clone()));
}
}
}
}
}
lines.push(line);
Ok(lines)
}

View file

@ -1,5 +1,6 @@
mod corpus_test;
mod helpers;
mod highlight_test;
mod node_test;
mod parser_test;
mod properties_test;