From 98e4fd22efb8e59086d4d612e91a21dff5854ca5 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 8 Mar 2019 13:13:02 -0800 Subject: [PATCH 1/7] Start work on a C API for syntax highlighting --- cli/src/tests/helpers/fixtures.rs | 8 +- cli/src/tests/highlight_test.rs | 93 +++++++- highlight/Cargo.toml | 3 + highlight/include/tree_sitter/highlight.h | 102 +++++++++ highlight/src/c_lib.rs | 248 ++++++++++++++++++++++ highlight/src/lib.rs | 14 +- 6 files changed, 463 insertions(+), 5 deletions(-) create mode 100644 highlight/include/tree_sitter/highlight.h create mode 100644 highlight/src/c_lib.rs diff --git a/cli/src/tests/helpers/fixtures.rs b/cli/src/tests/helpers/fixtures.rs index e7ba2e55..4389797e 100644 --- a/cli/src/tests/helpers/fixtures.rs +++ b/cli/src/tests/helpers/fixtures.rs @@ -21,12 +21,16 @@ pub fn get_language(name: &str) -> Language { .unwrap() } -pub fn get_property_sheet(language_name: &str, sheet_name: &str) -> PropertySheet { +pub fn get_property_sheet_json(language_name: &str, sheet_name: &str) -> String { let path = GRAMMARS_DIR .join(language_name) .join("src") .join(sheet_name); - let json = fs::read_to_string(path).unwrap(); + fs::read_to_string(path).unwrap() +} + +pub fn get_property_sheet(language_name: &str, sheet_name: &str) -> PropertySheet { + let json = get_property_sheet_json(language_name, sheet_name); let language = get_language(language_name); load_property_sheet(language, &json).unwrap() } diff --git a/cli/src/tests/highlight_test.rs b/cli/src/tests/highlight_test.rs index accca617..2c70f8cc 100644 --- a/cli/src/tests/highlight_test.rs +++ b/cli/src/tests/highlight_test.rs @@ -1,7 +1,9 @@ -use super::helpers::fixtures::{get_language, get_property_sheet}; +use super::helpers::fixtures::{get_language, get_property_sheet, get_property_sheet_json}; use lazy_static::lazy_static; +use std::ffi::CString; +use std::{ptr, slice, str}; use tree_sitter::{Language, PropertySheet}; -use tree_sitter_highlight::{highlight, highlight_html, HighlightEvent, Properties, Scope}; +use tree_sitter_highlight::{c, highlight, highlight_html, HighlightEvent, Properties, Scope}; lazy_static! { static ref JS_SHEET: PropertySheet = @@ -153,6 +155,93 @@ fn test_highlighting_empty_lines() { ); } +#[test] +fn test_highlighting_via_c_api() { + let js_lang = get_language("javascript"); + let html_lang = get_language("html"); + let js_sheet = get_property_sheet_json("javascript", "highlights.json"); + let js_sheet = c_string(&js_sheet); + let html_sheet = get_property_sheet_json("html", "highlights.json"); + let html_sheet = c_string(&html_sheet); + + let class_tag = c_string("class=tag"); + let class_function = c_string("class=function"); + let class_string = c_string("class=string"); + let class_keyword = c_string("class=keyword"); + + let js_scope_name = c_string("source.js"); + let html_scope_name = c_string("text.html.basic"); + let injection_regex = c_string("^(javascript|js)$"); + let source_code = c_string(""); + + let attribute_strings = &mut [ptr::null(); Scope::Unknown as usize + 1]; + attribute_strings[Scope::Tag as usize] = class_tag.as_ptr(); + attribute_strings[Scope::String as usize] = class_string.as_ptr(); + attribute_strings[Scope::Keyword as usize] = class_keyword.as_ptr(); + attribute_strings[Scope::Function as usize] = class_function.as_ptr(); + + let highlighter = c::ts_highlighter_new(attribute_strings.as_ptr()); + let buffer = c::ts_highlight_buffer_new(); + + c::ts_highlighter_add_language( + highlighter, + html_scope_name.as_ptr(), + html_lang, + html_sheet.as_ptr(), + ptr::null_mut(), + ); + c::ts_highlighter_add_language( + highlighter, + js_scope_name.as_ptr(), + js_lang, + js_sheet.as_ptr(), + injection_regex.as_ptr(), + ); + c::ts_highlighter_highlight( + highlighter, + html_scope_name.as_ptr(), + source_code.as_ptr(), + source_code.as_bytes().len() as u32, + buffer, + ); + + let output_bytes = c::ts_highlight_buffer_content(buffer); + let output_line_offsets = c::ts_highlight_buffer_line_offsets(buffer); + let output_len = c::ts_highlight_buffer_len(buffer); + let output_line_count = c::ts_highlight_buffer_line_count(buffer); + + let output_bytes = unsafe { slice::from_raw_parts(output_bytes, output_len as usize) }; + let output_line_offsets = + unsafe { slice::from_raw_parts(output_line_offsets, output_line_count as usize) }; + + let mut lines = Vec::new(); + for i in 0..(output_line_count as usize) { + let line_start = output_line_offsets[i] as usize; + let line_end = output_line_offsets + .get(i + 1) + .map(|x| *x as usize) + .unwrap_or(output_bytes.len()); + lines.push(str::from_utf8(&output_bytes[line_start..line_end]).unwrap()); + } + + assert_eq!( + lines, + vec![ + "<script>", + "const a = b('c');", + "c.d();", + "</script>", + ] + ); + + c::ts_highlighter_delete(highlighter); + c::ts_highlight_buffer_delete(buffer); +} + +fn c_string(s: &str) -> CString { + CString::new(s.as_bytes().to_vec()).unwrap() +} + fn test_language_for_injection_string<'a>( string: &str, ) -> Option<(Language, &'a PropertySheet)> { diff --git a/highlight/Cargo.toml b/highlight/Cargo.toml index 688a2f6c..cf807d9f 100644 --- a/highlight/Cargo.toml +++ b/highlight/Cargo.toml @@ -12,6 +12,9 @@ edition = "2018" keywords = ["incremental", "parsing", "syntax", "highlighting"] categories = ["parsing", "text-editors"] +[lib] +crate-type = ["lib", "staticlib"] + [dependencies] regex = "1" serde = "1.0" diff --git a/highlight/include/tree_sitter/highlight.h b/highlight/include/tree_sitter/highlight.h new file mode 100644 index 00000000..dd2f99c3 --- /dev/null +++ b/highlight/include/tree_sitter/highlight.h @@ -0,0 +1,102 @@ +#ifndef TREE_SITTER_HIGHLIGHT_H_ +#define TREE_SITTER_HIGHLIGHT_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +typedef enum { + TSHighlightOk, + TSHighlightUnknownScope, +} TSHighlightError; + +// The list of scopes which can be styled for syntax highlighting. +// When constructing a `TSHighlighter`, you need to construct an +// `attribute_strings` array whose elements correspond to these values. +enum TSHighlightScope { + TSHighlightScopeAttribute, + TSHighlightScopeComment, + TSHighlightScopeConstant, + TSHighlightScopeConstantBuiltin, + TSHighlightScopeConstructor, + TSHighlightScopeConstructorBuiltin, + TSHighlightScopeEmbedded, + TSHighlightScopeEscape, + TSHighlightScopeFunction, + TSHighlightScopeFunctionBuiltin, + TSHighlightScopeKeyword, + TSHighlightScopeNumber, + TSHighlightScopeOperator, + TSHighlightScopeProperty, + TSHighlightScopePropertyBuiltin, + TSHighlightScopePunctuation, + TSHighlightScopePunctuationBracket, + TSHighlightScopePunctuationDelimiter, + TSHighlightScopePunctuationSpecial, + TSHighlightScopeString, + TSHighlightScopeStringSpecial, + TSHighlightScopeTag, + TSHighlightScopeType, + TSHighlightScopeTypeBuiltin, + TSHighlightScopeVariable, + TSHighlightScopeVariableBuiltin, + TSHighlightScopeUnknown, +}; + +typedef struct TSHighlighter TSHighlighter; +typedef struct TSHighlightBuffer TSHighlightBuffer; + +// Construct a `TSHighlighter` by providing a list of strings containing +// the HTML attributes that should be applied for each highlight scope. +TSHighlighter *ts_highlighter_new( + const char **attribute_strings +); + +// Delete a syntax highlighter. +void ts_highlighter_delete(TSHighlighter *); + +// Add a `TSLanguage` to a highlighter. The language is associated with a +// scope name, which can be used later to select a language for syntax +// highlighting. Along with the language, you must provide a JSON string +// containing the compiled PropertySheet to use for syntax highlighting +// with that language. You can also optionally provide an 'injection regex', +// which is used to detect when this language has been embedded in a document +// written in a different language. +int ts_highlighter_add_language( + TSHighlighter *self, + const char *scope_name, + const TSLanguage *language, + const char *property_sheet_json, + const char *injection_regex +); + +// Compute syntax highlighting for a given document. You must first +// create a `TSHighlightBuffer` to hold the output. +int ts_highlighter_highlight( + TSHighlighter *self, + const char *scope_name, + const char *source_code, + uint32_t source_code_len, + TSHighlightBuffer *output +); + +// TSHighlightBuffer: This struct stores the HTML output of syntax +// highlighting. It can be reused for multiple highlighting calls. +TSHighlightBuffer *ts_highlight_buffer_new(); + +// Delete a highlight buffer. +void ts_highlight_buffer_delete(TSHighlightBuffer *); + +// Access the HTML content of a highlight buffer. +const uint8_t *ts_highlight_buffer_content(const TSHighlightBuffer *); +const uint32_t *ts_highlight_buffer_line_offsets(const TSHighlightBuffer *); +uint32_t ts_highlight_buffer_len(const TSHighlightBuffer *); +uint32_t ts_highlight_buffer_line_count(const TSHighlightBuffer *); + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_HIGHLIGHT_H_ diff --git a/highlight/src/c_lib.rs b/highlight/src/c_lib.rs new file mode 100644 index 00000000..f6b01984 --- /dev/null +++ b/highlight/src/c_lib.rs @@ -0,0 +1,248 @@ +use super::{escape, load_property_sheet, HighlightEvent, Highlighter, Properties, Scope}; +use regex::Regex; +use std::collections::HashMap; +use std::ffi::CStr; +use std::io::Write; +use std::os::raw::c_char; +use std::process::abort; +use std::{fmt, slice}; +use tree_sitter::{Language, PropertySheet}; + +struct LanguageConfiguration { + language: Language, + property_sheet: PropertySheet, + injection_regex: Option, +} + +pub struct TSHighlighter { + languages: HashMap, + attribute_strings: Vec<&'static [u8]>, +} + +pub struct TSHighlightBuffer { + html: Vec, + line_offsets: Vec, +} + +#[repr(C)] +pub enum ErrorCode { + Ok, + UnknownScope, +} + +#[no_mangle] +pub extern "C" fn ts_highlighter_new(attribute_strings: *const *const c_char) -> *mut TSHighlighter { + let attribute_strings = + unsafe { slice::from_raw_parts(attribute_strings, Scope::Unknown as usize + 1) }; + let attribute_strings = attribute_strings + .into_iter() + .map(|s| { + if s.is_null() { + &[] + } else { + unsafe { CStr::from_ptr(*s).to_bytes() } + } + }) + .collect(); + Box::into_raw(Box::new(TSHighlighter { + languages: HashMap::new(), + attribute_strings, + })) +} + +#[no_mangle] +pub extern "C" fn ts_highlight_buffer_new() -> *mut TSHighlightBuffer { + Box::into_raw(Box::new(TSHighlightBuffer { + html: Vec::new(), + line_offsets: Vec::new(), + })) +} + +#[no_mangle] +pub extern "C" fn ts_highlighter_delete(this: *mut TSHighlighter) { + drop(unsafe { Box::from_raw(this) }) +} + +#[no_mangle] +pub extern "C" fn ts_highlight_buffer_delete(this: *mut TSHighlightBuffer) { + drop(unsafe { Box::from_raw(this) }) +} + +#[no_mangle] +pub extern "C" fn ts_highlight_buffer_content(this: *mut TSHighlightBuffer) -> *const u8 { + let this = unwrap_ptr(this); + this.html.as_slice().as_ptr() +} + +#[no_mangle] +pub extern "C" fn ts_highlight_buffer_line_offsets(this: *mut TSHighlightBuffer) -> *const u32 { + let this = unwrap_ptr(this); + this.line_offsets.as_slice().as_ptr() +} + +#[no_mangle] +pub extern "C" fn ts_highlight_buffer_len(this: *mut TSHighlightBuffer) -> u32 { + let this = unwrap_ptr(this); + this.html.len() as u32 +} + +#[no_mangle] +pub extern "C" fn ts_highlight_buffer_line_count(this: *mut TSHighlightBuffer) -> u32 { + let this = unwrap_ptr(this); + this.line_offsets.len() as u32 +} + +#[no_mangle] +pub extern "C" fn ts_highlighter_add_language( + this: *mut TSHighlighter, + scope_name: *const c_char, + language: Language, + property_sheet_json: *const c_char, + injection_regex: *const c_char, +) -> ErrorCode { + let this = unwrap_ptr(this); + let scope_name = unsafe { CStr::from_ptr(scope_name) }; + let scope_name = unwrap(scope_name.to_str()).to_string(); + let property_sheet_json = unsafe { CStr::from_ptr(property_sheet_json) }; + let property_sheet_json = unwrap(property_sheet_json.to_str()); + + let property_sheet = unwrap(load_property_sheet(language, property_sheet_json)); + let injection_regex = if injection_regex.is_null() { + None + } else { + let pattern = unsafe { CStr::from_ptr(injection_regex) }; + Some(unwrap(Regex::new(unwrap(pattern.to_str())))) + }; + + this.languages.insert( + scope_name, + LanguageConfiguration { + language, + property_sheet, + injection_regex, + }, + ); + + ErrorCode::Ok +} + +#[no_mangle] +pub extern "C" fn ts_highlighter_highlight( + this: *mut TSHighlighter, + scope_name: *const c_char, + source_code: *const c_char, + source_code_len: u32, + output: *mut TSHighlightBuffer, +) -> ErrorCode { + let this = unwrap_ptr(this); + let output = unwrap_ptr(output); + let scope_name = unwrap(unsafe { CStr::from_ptr(scope_name).to_str() }); + let source_code = + unsafe { slice::from_raw_parts(source_code as *const u8, source_code_len as usize) }; + this.highlight(source_code, scope_name, output) +} + +impl TSHighlighter { + fn highlight( + &mut self, + source_code: &[u8], + scope_name: &str, + output: &mut TSHighlightBuffer, + ) -> ErrorCode { + let configuration = self.languages.get(scope_name); + if configuration.is_none() { + return ErrorCode::UnknownScope; + } + let configuration = configuration.unwrap(); + let languages = &self.languages; + + let highlighter = unwrap(Highlighter::new( + source_code, + configuration.language, + &configuration.property_sheet, + |injection_string| { + languages.values().find_map(|conf| { + conf.injection_regex.as_ref().and_then(|regex| { + if regex.is_match(injection_string) { + Some((conf.language, &conf.property_sheet)) + } else { + None + } + }) + }) + }, + )); + + output.html.clear(); + output.line_offsets.clear(); + output.line_offsets.push(0); + let mut scopes = Vec::new(); + for event in highlighter { + match event { + HighlightEvent::ScopeStart(s) => { + scopes.push(s); + output.start_scope(s, &self.attribute_strings); + } + HighlightEvent::ScopeEnd => { + scopes.pop(); + output.end_scope(); + } + HighlightEvent::Source(src) => { + output.add_text(src, &scopes, &self.attribute_strings); + } + }; + } + + ErrorCode::Ok + } +} + +impl TSHighlightBuffer { + fn start_scope(&mut self, s: Scope, attribute_strings: &[&[u8]]) { + let attribute_string = attribute_strings[s as usize]; + self.html.extend(b""); + } + + fn end_scope(&mut self) { + self.html.extend(b""); + } + + fn finish_line(&mut self) { + self.line_offsets.push(self.html.len() as u32); + } + + fn add_text(&mut self, src: &str, scopes: &Vec, attribute_strings: &[&[u8]]) { + let mut multiline = false; + for line in src.split('\n') { + let line = line.trim_end_matches('\r'); + if multiline { + scopes.iter().for_each(|_| self.end_scope()); + self.finish_line(); + scopes + .iter() + .for_each(|scope| self.start_scope(*scope, attribute_strings)); + } + write!(&mut self.html, "{}", escape::Escape(line)).unwrap(); + multiline = true; + } + } +} + +fn unwrap_ptr<'a, T>(result: *mut T) -> &'a mut T { + unsafe { result.as_mut() }.unwrap_or_else(|| { + eprintln!("{}:{} - pointer must not be null", file!(), line!()); + abort(); + }) +} + +fn unwrap(result: Result) -> T { + result.unwrap_or_else(|error| { + eprintln!("tree-sitter highlight error: {}", error); + abort(); + }) +} diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs index e5499fbc..25d8d59f 100644 --- a/highlight/src/lib.rs +++ b/highlight/src/lib.rs @@ -1,9 +1,11 @@ +pub mod c_lib; mod escape; +pub use c_lib as c; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use serde_derive::*; use std::cmp; -use std::fmt::Write; +use std::fmt::{self, Write}; use std::mem::transmute; use std::str; use std::usize; @@ -151,6 +153,16 @@ pub enum PropertySheetError { InvalidFormat(String), } +impl fmt::Display for PropertySheetError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + PropertySheetError::InvalidJSON(e) => e.fmt(f), + PropertySheetError::InvalidRegex(e) => e.fmt(f), + PropertySheetError::InvalidFormat(e) => e.fmt(f), + } + } +} + pub fn load_property_sheet( language: Language, json: &str, From 8a675d184c57cb7582bc0a3c7a9e367c425a437e Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 12 Mar 2019 17:14:41 -0700 Subject: [PATCH 2/7] test script: All tests are in the CLI package --- script/test | 2 +- script/test.cmd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/script/test b/script/test index 09cf9f83..1ec50e30 100755 --- a/script/test +++ b/script/test @@ -79,5 +79,5 @@ if [[ "${mode}" == "debug" ]]; then test_binary=$(cargo test --no-run --message-format=json 2> /dev/null | jq -rs '.[-1].filenames[0]') lldb "${test_binary}" -- $top_level_filter else - cargo test --jobs 1 $top_level_filter -- --nocapture + cargo test -p tree-sitter-cli --jobs 1 $top_level_filter -- --nocapture fi diff --git a/script/test.cmd b/script/test.cmd index d1b462e8..de1f8500 100644 --- a/script/test.cmd +++ b/script/test.cmd @@ -4,5 +4,5 @@ setlocal set TREE_SITTER_TEST=1 set RUST_TEST_THREADS=1 set RUST_BACKTRACE=full -cargo test "%~1" -- --nocapture +cargo test -p tree-sitter-cli "%~1" -- --nocapture endlocal From eabecafa8d4f99b5ea936fdd45f2fea662284e2c Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 12 Mar 2019 17:24:21 -0700 Subject: [PATCH 3/7] Highlight.highlight does not mutate --- highlight/src/c_lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/highlight/src/c_lib.rs b/highlight/src/c_lib.rs index f6b01984..919da8c5 100644 --- a/highlight/src/c_lib.rs +++ b/highlight/src/c_lib.rs @@ -144,7 +144,7 @@ pub extern "C" fn ts_highlighter_highlight( impl TSHighlighter { fn highlight( - &mut self, + &self, source_code: &[u8], scope_name: &str, output: &mut TSHighlightBuffer, From 2a5409feab5ada09a8cce7a1fbcf74d6b9a76fd6 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 13 Mar 2019 09:22:09 -0700 Subject: [PATCH 4/7] Indicate in C API that highlight method doesn't mutate Highlighter --- highlight/include/tree_sitter/highlight.h | 2 +- highlight/src/c_lib.rs | 27 +++++++++++++++-------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/highlight/include/tree_sitter/highlight.h b/highlight/include/tree_sitter/highlight.h index dd2f99c3..7b34aef9 100644 --- a/highlight/include/tree_sitter/highlight.h +++ b/highlight/include/tree_sitter/highlight.h @@ -75,7 +75,7 @@ int ts_highlighter_add_language( // Compute syntax highlighting for a given document. You must first // create a `TSHighlightBuffer` to hold the output. int ts_highlighter_highlight( - TSHighlighter *self, + const TSHighlighter *self, const char *scope_name, const char *source_code, uint32_t source_code_len, diff --git a/highlight/src/c_lib.rs b/highlight/src/c_lib.rs index 919da8c5..ce9f3936 100644 --- a/highlight/src/c_lib.rs +++ b/highlight/src/c_lib.rs @@ -31,7 +31,9 @@ pub enum ErrorCode { } #[no_mangle] -pub extern "C" fn ts_highlighter_new(attribute_strings: *const *const c_char) -> *mut TSHighlighter { +pub extern "C" fn ts_highlighter_new( + attribute_strings: *const *const c_char, +) -> *mut TSHighlighter { let attribute_strings = unsafe { slice::from_raw_parts(attribute_strings, Scope::Unknown as usize + 1) }; let attribute_strings = attribute_strings @@ -69,25 +71,25 @@ pub extern "C" fn ts_highlight_buffer_delete(this: *mut TSHighlightBuffer) { } #[no_mangle] -pub extern "C" fn ts_highlight_buffer_content(this: *mut TSHighlightBuffer) -> *const u8 { +pub extern "C" fn ts_highlight_buffer_content(this: *const TSHighlightBuffer) -> *const u8 { let this = unwrap_ptr(this); this.html.as_slice().as_ptr() } #[no_mangle] -pub extern "C" fn ts_highlight_buffer_line_offsets(this: *mut TSHighlightBuffer) -> *const u32 { +pub extern "C" fn ts_highlight_buffer_line_offsets(this: *const TSHighlightBuffer) -> *const u32 { let this = unwrap_ptr(this); this.line_offsets.as_slice().as_ptr() } #[no_mangle] -pub extern "C" fn ts_highlight_buffer_len(this: *mut TSHighlightBuffer) -> u32 { +pub extern "C" fn ts_highlight_buffer_len(this: *const TSHighlightBuffer) -> u32 { let this = unwrap_ptr(this); this.html.len() as u32 } #[no_mangle] -pub extern "C" fn ts_highlight_buffer_line_count(this: *mut TSHighlightBuffer) -> u32 { +pub extern "C" fn ts_highlight_buffer_line_count(this: *const TSHighlightBuffer) -> u32 { let this = unwrap_ptr(this); this.line_offsets.len() as u32 } @@ -100,7 +102,7 @@ pub extern "C" fn ts_highlighter_add_language( property_sheet_json: *const c_char, injection_regex: *const c_char, ) -> ErrorCode { - let this = unwrap_ptr(this); + let this = unwrap_mut_ptr(this); let scope_name = unsafe { CStr::from_ptr(scope_name) }; let scope_name = unwrap(scope_name.to_str()).to_string(); let property_sheet_json = unsafe { CStr::from_ptr(property_sheet_json) }; @@ -128,14 +130,14 @@ pub extern "C" fn ts_highlighter_add_language( #[no_mangle] pub extern "C" fn ts_highlighter_highlight( - this: *mut TSHighlighter, + this: *const TSHighlighter, scope_name: *const c_char, source_code: *const c_char, source_code_len: u32, output: *mut TSHighlightBuffer, ) -> ErrorCode { let this = unwrap_ptr(this); - let output = unwrap_ptr(output); + let output = unwrap_mut_ptr(output); let scope_name = unwrap(unsafe { CStr::from_ptr(scope_name).to_str() }); let source_code = unsafe { slice::from_raw_parts(source_code as *const u8, source_code_len as usize) }; @@ -233,7 +235,14 @@ impl TSHighlightBuffer { } } -fn unwrap_ptr<'a, T>(result: *mut T) -> &'a mut T { +fn unwrap_ptr<'a, T>(result: *const T) -> &'a T { + unsafe { result.as_ref() }.unwrap_or_else(|| { + eprintln!("{}:{} - pointer must not be null", file!(), line!()); + abort(); + }) +} + +fn unwrap_mut_ptr<'a, T>(result: *mut T) -> &'a mut T { unsafe { result.as_mut() }.unwrap_or_else(|| { eprintln!("{}:{} - pointer must not be null", file!(), line!()); abort(); From 083e813218c7f58c6724d5c2e5634e081b611412 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 13 Mar 2019 15:28:08 -0700 Subject: [PATCH 5/7] Highlight: fix handling of root node injections like in ERB, EJS --- cli/src/tests/highlight_test.rs | 26 ++++++++++++++++++++++++ highlight/src/lib.rs | 36 ++++++++++++++++++++++++++------- 2 files changed, 55 insertions(+), 7 deletions(-) diff --git a/cli/src/tests/highlight_test.rs b/cli/src/tests/highlight_test.rs index 2c70f8cc..2847cb71 100644 --- a/cli/src/tests/highlight_test.rs +++ b/cli/src/tests/highlight_test.rs @@ -10,6 +10,8 @@ lazy_static! { get_property_sheet("javascript", "highlights.json"); static ref HTML_SHEET: PropertySheet = get_property_sheet("html", "highlights.json"); + static ref EJS_SHEET: PropertySheet = + get_property_sheet("embedded-template", "highlights-ejs.json"); static ref SCOPE_CLASS_STRINGS: Vec = { let mut result = Vec::new(); let mut i = 0; @@ -155,6 +157,30 @@ fn test_highlighting_empty_lines() { ); } +#[test] +fn test_highlighting_ejs() { + let source = vec!["
<% foo() %>
"].join("\n"); + + assert_eq!( + &to_token_vector(&source, get_language("embedded-template"), &EJS_SHEET).unwrap(), + &[[ + ("<", vec![]), + ("div", vec![Scope::Tag]), + (">", vec![]), + ("<%", vec![Scope::Keyword]), + (" ", vec![]), + ("foo", vec![Scope::Function]), + ("(", vec![Scope::PunctuationBracket]), + (")", vec![Scope::PunctuationBracket]), + (" ", vec![]), + ("%>", vec![Scope::Keyword]), + ("", vec![]) + ]], + ); +} + #[test] fn test_highlighting_via_c_api() { let js_lang = get_language("javascript"); diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs index 25d8d59f..7af0efb3 100644 --- a/highlight/src/lib.rs +++ b/highlight/src/lib.rs @@ -4,11 +4,9 @@ mod escape; pub use c_lib as c; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use serde_derive::*; -use std::cmp; use std::fmt::{self, Write}; use std::mem::transmute; -use std::str; -use std::usize; +use std::{cmp, str, usize}; use tree_sitter::{Language, Node, Parser, Point, PropertySheet, Range, Tree, TreePropertyCursor}; #[derive(Debug)] @@ -80,6 +78,7 @@ struct Layer<'a> { cursor: TreePropertyCursor<'a, Properties>, ranges: Vec, at_node_end: bool, + depth: usize, } struct Highlighter<'a, T> @@ -163,6 +162,18 @@ impl fmt::Display for PropertySheetError { } } +impl<'a> fmt::Debug for Layer<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "Layer {{ at_node_end: {}, node: {:?} }}", + self.at_node_end, + self.cursor.node() + )?; + Ok(()) + } +} + pub fn load_property_sheet( language: Language, json: &str, @@ -387,6 +398,7 @@ where start_point: Point::new(0, 0), end_point: Point::new(usize::MAX, usize::MAX), }], + 0, )], utf8_error_len: None, }) @@ -566,7 +578,7 @@ where result } - fn add_layer(&mut self, language_string: &str, ranges: Vec) { + fn add_layer(&mut self, language_string: &str, ranges: Vec, depth: usize) { if let Some((language, property_sheet)) = (self.injection_callback)(language_string) { self.parser .set_language(language) @@ -576,7 +588,7 @@ where .parser .parse(self.source, None) .expect("Failed to parse"); - let layer = Layer::new(self.source, tree, property_sheet, ranges); + let layer = Layer::new(self.source, tree, property_sheet, ranges, depth); match self.layers.binary_search_by(|l| l.cmp(&layer)) { Ok(i) | Err(i) => self.layers.insert(i, layer), }; @@ -617,8 +629,9 @@ impl<'a, T: Fn(&str) -> Option<(Language, &'a PropertySheet)>> Itera }) .collect::>(); + let depth = first_layer.depth + 1; for (language, ranges) in injections { - self.add_layer(&language, ranges); + self.add_layer(&language, ranges, depth); } } @@ -647,7 +660,13 @@ impl<'a, T: Fn(&str) -> Option<(Language, &'a PropertySheet)>> Itera // to re-sort the layers. If the cursor is already at the end of its syntax tree, // remove it. if self.layers[0].advance() { - self.layers.sort_unstable_by(|a, b| a.cmp(&b)); + let mut index = 0; + while self.layers.get(index + 1).map_or(false, |next| { + self.layers[index].cmp(next) == cmp::Ordering::Greater + }) { + self.layers.swap(index, index + 1); + index += 1; + } } else { self.layers.remove(0); } @@ -671,6 +690,7 @@ impl<'a> Layer<'a> { tree: Tree, sheet: &'a PropertySheet, ranges: Vec, + depth: usize, ) -> Self { // The cursor's lifetime parameter indicates that the tree must outlive the cursor. // But because the tree is really a pointer to the heap, the cursor can remain @@ -681,6 +701,7 @@ impl<'a> Layer<'a> { _tree: tree, cursor, ranges, + depth, at_node_end: false, } } @@ -692,6 +713,7 @@ impl<'a> Layer<'a> { self.offset() .cmp(&other.offset()) .then_with(|| other.at_node_end.cmp(&self.at_node_end)) + .then_with(|| self.depth.cmp(&other.depth)) } fn offset(&self) -> usize { From abcac40f2d8c4aeb1b87eb58747e0eb024d90e0f Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 13 Mar 2019 15:51:27 -0700 Subject: [PATCH 6/7] cli: Add a --time flag to highlight subcommand --- cli/src/highlight.rs | 11 +++++++++++ cli/src/main.rs | 6 ++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/cli/src/highlight.rs b/cli/src/highlight.rs index 703c4053..e7bb8818 100644 --- a/cli/src/highlight.rs +++ b/cli/src/highlight.rs @@ -6,6 +6,7 @@ use serde::ser::SerializeMap; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use serde_json::{json, Value}; use std::collections::HashMap; +use std::time::Instant; use std::{fmt, fs, io, path}; use tree_sitter::{Language, PropertySheet}; use tree_sitter_highlight::{highlight, highlight_html, HighlightEvent, Properties, Scope}; @@ -254,10 +255,13 @@ pub fn ansi( source: &[u8], language: Language, property_sheet: &PropertySheet, + print_time: bool, ) -> Result<()> { use std::io::Write; let stdout = io::stdout(); let mut stdout = stdout.lock(); + + let time = Instant::now(); let mut scope_stack = Vec::new(); for event in highlight(source, language, property_sheet, |s| { language_for_injection_string(loader, s) @@ -278,6 +282,13 @@ pub fn ansi( } } } + + if print_time { + let duration = time.elapsed(); + let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000; + eprintln!("{} ms", duration_ms); + } + Ok(()) } diff --git a/cli/src/main.rs b/cli/src/main.rs index 3769efa0..15499622 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -76,7 +76,8 @@ fn run() -> error::Result<()> { .required(true), ) .arg(Arg::with_name("scope").long("scope").takes_value(true)) - .arg(Arg::with_name("html").long("html").short("h")), + .arg(Arg::with_name("html").long("html").short("h")) + .arg(Arg::with_name("time").long("time").short("t")), ) .get_matches(); @@ -167,6 +168,7 @@ fn run() -> error::Result<()> { } else if let Some(matches) = matches.subcommand_matches("highlight") { let paths = matches.values_of("path").unwrap().into_iter(); let html_mode = matches.is_present("html"); + let time = matches.is_present("time"); loader.find_all_languages(&config.parser_directories)?; if html_mode { @@ -201,7 +203,7 @@ fn run() -> error::Result<()> { if html_mode { highlight::html(&loader, &config.theme, &source, language, sheet)?; } else { - highlight::ansi(&loader, &config.theme, &source, language, sheet)?; + highlight::ansi(&loader, &config.theme, &source, language, sheet, time)?; } } else { return Err(error::Error(format!( From e0f0043edd0064136d889f454d85eb2c2f146443 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 13 Mar 2019 16:31:08 -0700 Subject: [PATCH 7/7] Highlight: 0.1.5 --- Cargo.lock | 4 ++-- highlight/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 52e2c3c3..896f22f2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -661,12 +661,12 @@ dependencies = [ "spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "tempfile 3.0.7 (registry+https://github.com/rust-lang/crates.io-index)", "tree-sitter 0.3.9", - "tree-sitter-highlight 0.1.4", + "tree-sitter-highlight 0.1.5", ] [[package]] name = "tree-sitter-highlight" -version = "0.1.4" +version = "0.1.5" dependencies = [ "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/highlight/Cargo.toml b/highlight/Cargo.toml index cf807d9f..09b20774 100644 --- a/highlight/Cargo.toml +++ b/highlight/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter-highlight" description = "Library for performing syntax highlighting with Tree-sitter" -version = "0.1.4" +version = "0.1.5" authors = [ "Max Brunsfeld ", "Tim Clem "