Merge branch 'master' into node-fields
This commit is contained in:
commit
5035e194ff
34 changed files with 1178 additions and 240 deletions
266
highlight/src/c_lib.rs
Normal file
266
highlight/src/c_lib.rs
Normal file
|
|
@ -0,0 +1,266 @@
|
|||
use super::{escape, load_property_sheet, HighlightEvent, Highlighter, Properties, Scope};
|
||||
use regex::Regex;
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::CStr;
|
||||
use std::io::Write;
|
||||
use std::os::raw::c_char;
|
||||
use std::process::abort;
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
use std::{fmt, slice};
|
||||
use tree_sitter::{Language, PropertySheet};
|
||||
|
||||
struct LanguageConfiguration {
|
||||
language: Language,
|
||||
property_sheet: PropertySheet<Properties>,
|
||||
injection_regex: Option<Regex>,
|
||||
}
|
||||
|
||||
pub struct TSHighlighter {
|
||||
languages: HashMap<String, LanguageConfiguration>,
|
||||
attribute_strings: Vec<&'static [u8]>,
|
||||
}
|
||||
|
||||
pub struct TSHighlightBuffer {
|
||||
html: Vec<u8>,
|
||||
line_offsets: Vec<u32>,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
pub enum ErrorCode {
|
||||
Ok,
|
||||
UnknownScope,
|
||||
Timeout,
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlighter_new(
|
||||
attribute_strings: *const *const c_char,
|
||||
) -> *mut TSHighlighter {
|
||||
let attribute_strings =
|
||||
unsafe { slice::from_raw_parts(attribute_strings, Scope::Unknown as usize + 1) };
|
||||
let attribute_strings = attribute_strings
|
||||
.into_iter()
|
||||
.map(|s| {
|
||||
if s.is_null() {
|
||||
&[]
|
||||
} else {
|
||||
unsafe { CStr::from_ptr(*s).to_bytes() }
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
Box::into_raw(Box::new(TSHighlighter {
|
||||
languages: HashMap::new(),
|
||||
attribute_strings,
|
||||
}))
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlight_buffer_new() -> *mut TSHighlightBuffer {
|
||||
Box::into_raw(Box::new(TSHighlightBuffer {
|
||||
html: Vec::new(),
|
||||
line_offsets: Vec::new(),
|
||||
}))
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlighter_delete(this: *mut TSHighlighter) {
|
||||
drop(unsafe { Box::from_raw(this) })
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlight_buffer_delete(this: *mut TSHighlightBuffer) {
|
||||
drop(unsafe { Box::from_raw(this) })
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlight_buffer_content(this: *const TSHighlightBuffer) -> *const u8 {
|
||||
let this = unwrap_ptr(this);
|
||||
this.html.as_slice().as_ptr()
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlight_buffer_line_offsets(this: *const TSHighlightBuffer) -> *const u32 {
|
||||
let this = unwrap_ptr(this);
|
||||
this.line_offsets.as_slice().as_ptr()
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlight_buffer_len(this: *const TSHighlightBuffer) -> u32 {
|
||||
let this = unwrap_ptr(this);
|
||||
this.html.len() as u32
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlight_buffer_line_count(this: *const TSHighlightBuffer) -> u32 {
|
||||
let this = unwrap_ptr(this);
|
||||
this.line_offsets.len() as u32
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlighter_add_language(
|
||||
this: *mut TSHighlighter,
|
||||
scope_name: *const c_char,
|
||||
language: Language,
|
||||
property_sheet_json: *const c_char,
|
||||
injection_regex: *const c_char,
|
||||
) -> ErrorCode {
|
||||
let this = unwrap_mut_ptr(this);
|
||||
let scope_name = unsafe { CStr::from_ptr(scope_name) };
|
||||
let scope_name = unwrap(scope_name.to_str()).to_string();
|
||||
let property_sheet_json = unsafe { CStr::from_ptr(property_sheet_json) };
|
||||
let property_sheet_json = unwrap(property_sheet_json.to_str());
|
||||
|
||||
let property_sheet = unwrap(load_property_sheet(language, property_sheet_json));
|
||||
let injection_regex = if injection_regex.is_null() {
|
||||
None
|
||||
} else {
|
||||
let pattern = unsafe { CStr::from_ptr(injection_regex) };
|
||||
Some(unwrap(Regex::new(unwrap(pattern.to_str()))))
|
||||
};
|
||||
|
||||
this.languages.insert(
|
||||
scope_name,
|
||||
LanguageConfiguration {
|
||||
language,
|
||||
property_sheet,
|
||||
injection_regex,
|
||||
},
|
||||
);
|
||||
|
||||
ErrorCode::Ok
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlighter_highlight(
|
||||
this: *const TSHighlighter,
|
||||
scope_name: *const c_char,
|
||||
source_code: *const c_char,
|
||||
source_code_len: u32,
|
||||
output: *mut TSHighlightBuffer,
|
||||
cancellation_flag: *const AtomicUsize,
|
||||
) -> ErrorCode {
|
||||
let this = unwrap_ptr(this);
|
||||
let output = unwrap_mut_ptr(output);
|
||||
let scope_name = unwrap(unsafe { CStr::from_ptr(scope_name).to_str() });
|
||||
let source_code =
|
||||
unsafe { slice::from_raw_parts(source_code as *const u8, source_code_len as usize) };
|
||||
let cancellation_flag = unsafe { cancellation_flag.as_ref() };
|
||||
this.highlight(source_code, scope_name, output, cancellation_flag)
|
||||
}
|
||||
|
||||
impl TSHighlighter {
|
||||
fn highlight(
|
||||
&self,
|
||||
source_code: &[u8],
|
||||
scope_name: &str,
|
||||
output: &mut TSHighlightBuffer,
|
||||
cancellation_flag: Option<&AtomicUsize>,
|
||||
) -> ErrorCode {
|
||||
let configuration = self.languages.get(scope_name);
|
||||
if configuration.is_none() {
|
||||
return ErrorCode::UnknownScope;
|
||||
}
|
||||
let configuration = configuration.unwrap();
|
||||
let languages = &self.languages;
|
||||
|
||||
let highlighter = Highlighter::new(
|
||||
source_code,
|
||||
configuration.language,
|
||||
&configuration.property_sheet,
|
||||
|injection_string| {
|
||||
languages.values().find_map(|conf| {
|
||||
conf.injection_regex.as_ref().and_then(|regex| {
|
||||
if regex.is_match(injection_string) {
|
||||
Some((conf.language, &conf.property_sheet))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
})
|
||||
},
|
||||
cancellation_flag,
|
||||
);
|
||||
|
||||
if let Ok(highlighter) = highlighter {
|
||||
output.html.clear();
|
||||
output.line_offsets.clear();
|
||||
output.line_offsets.push(0);
|
||||
let mut scopes = Vec::new();
|
||||
for event in highlighter {
|
||||
match event {
|
||||
HighlightEvent::ScopeStart(s) => {
|
||||
scopes.push(s);
|
||||
output.start_scope(s, &self.attribute_strings);
|
||||
}
|
||||
HighlightEvent::ScopeEnd => {
|
||||
scopes.pop();
|
||||
output.end_scope();
|
||||
}
|
||||
HighlightEvent::Source(src) => {
|
||||
output.add_text(src, &scopes, &self.attribute_strings);
|
||||
}
|
||||
};
|
||||
}
|
||||
ErrorCode::Ok
|
||||
} else {
|
||||
ErrorCode::Timeout
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TSHighlightBuffer {
|
||||
fn start_scope(&mut self, s: Scope, attribute_strings: &[&[u8]]) {
|
||||
let attribute_string = attribute_strings[s as usize];
|
||||
self.html.extend(b"<span");
|
||||
if !attribute_string.is_empty() {
|
||||
self.html.extend(b" ");
|
||||
self.html.extend(attribute_string);
|
||||
}
|
||||
self.html.extend(b">");
|
||||
}
|
||||
|
||||
fn end_scope(&mut self) {
|
||||
self.html.extend(b"</span>");
|
||||
}
|
||||
|
||||
fn finish_line(&mut self) {
|
||||
self.line_offsets.push(self.html.len() as u32);
|
||||
}
|
||||
|
||||
fn add_text(&mut self, src: &str, scopes: &Vec<Scope>, attribute_strings: &[&[u8]]) {
|
||||
let mut multiline = false;
|
||||
for line in src.split('\n') {
|
||||
let line = line.trim_end_matches('\r');
|
||||
if multiline {
|
||||
scopes.iter().for_each(|_| self.end_scope());
|
||||
self.finish_line();
|
||||
scopes
|
||||
.iter()
|
||||
.for_each(|scope| self.start_scope(*scope, attribute_strings));
|
||||
}
|
||||
write!(&mut self.html, "{}", escape::Escape(line)).unwrap();
|
||||
multiline = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn unwrap_ptr<'a, T>(result: *const T) -> &'a T {
|
||||
unsafe { result.as_ref() }.unwrap_or_else(|| {
|
||||
eprintln!("{}:{} - pointer must not be null", file!(), line!());
|
||||
abort();
|
||||
})
|
||||
}
|
||||
|
||||
fn unwrap_mut_ptr<'a, T>(result: *mut T) -> &'a mut T {
|
||||
unsafe { result.as_mut() }.unwrap_or_else(|| {
|
||||
eprintln!("{}:{} - pointer must not be null", file!(), line!());
|
||||
abort();
|
||||
})
|
||||
}
|
||||
|
||||
fn unwrap<T, E: fmt::Display>(result: Result<T, E>) -> T {
|
||||
result.unwrap_or_else(|error| {
|
||||
eprintln!("tree-sitter highlight error: {}", error);
|
||||
abort();
|
||||
})
|
||||
}
|
||||
|
|
@ -1,14 +1,17 @@
|
|||
pub mod c_lib;
|
||||
mod escape;
|
||||
|
||||
pub use c_lib as c;
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
use serde_derive::*;
|
||||
use std::cmp;
|
||||
use std::fmt::{self, Write};
|
||||
use std::mem::transmute;
|
||||
use std::str;
|
||||
use std::usize;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::{cmp, str, usize};
|
||||
use tree_sitter::{Language, Node, Parser, Point, PropertySheet, Range, Tree, TreePropertyCursor};
|
||||
|
||||
const CANCELLATION_CHECK_INTERVAL: usize = 100;
|
||||
|
||||
#[derive(Debug)]
|
||||
enum TreeStep {
|
||||
Child {
|
||||
|
|
@ -78,6 +81,7 @@ struct Layer<'a> {
|
|||
cursor: TreePropertyCursor<'a, Properties>,
|
||||
ranges: Vec<Range>,
|
||||
at_node_end: bool,
|
||||
depth: usize,
|
||||
}
|
||||
|
||||
struct Highlighter<'a, T>
|
||||
|
|
@ -90,6 +94,8 @@ where
|
|||
parser: Parser,
|
||||
layers: Vec<Layer<'a>>,
|
||||
utf8_error_len: Option<usize>,
|
||||
operation_count: usize,
|
||||
cancellation_flag: Option<&'a AtomicUsize>,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
|
|
@ -151,6 +157,28 @@ pub enum PropertySheetError {
|
|||
InvalidFormat(String),
|
||||
}
|
||||
|
||||
impl fmt::Display for PropertySheetError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
PropertySheetError::InvalidJSON(e) => e.fmt(f),
|
||||
PropertySheetError::InvalidRegex(e) => e.fmt(f),
|
||||
PropertySheetError::InvalidFormat(e) => e.fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> fmt::Debug for Layer<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"Layer {{ at_node_end: {}, node: {:?} }}",
|
||||
self.at_node_end,
|
||||
self.cursor.node()
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn load_property_sheet(
|
||||
language: Language,
|
||||
json: &str,
|
||||
|
|
@ -354,17 +382,22 @@ where
|
|||
language: Language,
|
||||
property_sheet: &'a PropertySheet<Properties>,
|
||||
injection_callback: F,
|
||||
cancellation_flag: Option<&'a AtomicUsize>,
|
||||
) -> Result<Self, String> {
|
||||
let mut parser = Parser::new();
|
||||
unsafe { parser.set_cancellation_flag(cancellation_flag.clone()) };
|
||||
parser.set_language(language)?;
|
||||
let tree = parser
|
||||
.parse(source, None)
|
||||
.ok_or_else(|| format!("Tree-sitter: failed to parse"))?;
|
||||
Ok(Self {
|
||||
injection_callback,
|
||||
source,
|
||||
source_offset: 0,
|
||||
parser,
|
||||
source,
|
||||
cancellation_flag,
|
||||
injection_callback,
|
||||
source_offset: 0,
|
||||
operation_count: 0,
|
||||
utf8_error_len: None,
|
||||
layers: vec![Layer::new(
|
||||
source,
|
||||
tree,
|
||||
|
|
@ -375,8 +408,8 @@ where
|
|||
start_point: Point::new(0, 0),
|
||||
end_point: Point::new(usize::MAX, usize::MAX),
|
||||
}],
|
||||
0,
|
||||
)],
|
||||
utf8_error_len: None,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -554,7 +587,7 @@ where
|
|||
result
|
||||
}
|
||||
|
||||
fn add_layer(&mut self, language_string: &str, ranges: Vec<Range>) {
|
||||
fn add_layer(&mut self, language_string: &str, ranges: Vec<Range>, depth: usize) {
|
||||
if let Some((language, property_sheet)) = (self.injection_callback)(language_string) {
|
||||
self.parser
|
||||
.set_language(language)
|
||||
|
|
@ -564,7 +597,7 @@ where
|
|||
.parser
|
||||
.parse(self.source, None)
|
||||
.expect("Failed to parse");
|
||||
let layer = Layer::new(self.source, tree, property_sheet, ranges);
|
||||
let layer = Layer::new(self.source, tree, property_sheet, ranges, depth);
|
||||
match self.layers.binary_search_by(|l| l.cmp(&layer)) {
|
||||
Ok(i) | Err(i) => self.layers.insert(i, layer),
|
||||
};
|
||||
|
|
@ -579,6 +612,16 @@ where
|
|||
type Item = HighlightEvent<'a>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if let Some(cancellation_flag) = self.cancellation_flag {
|
||||
self.operation_count += 1;
|
||||
if self.operation_count >= CANCELLATION_CHECK_INTERVAL {
|
||||
self.operation_count = 0;
|
||||
if cancellation_flag.load(Ordering::Relaxed) != 0 {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(utf8_error_len) = self.utf8_error_len.take() {
|
||||
self.source_offset += utf8_error_len;
|
||||
return Some(HighlightEvent::Source("\u{FFFD}"));
|
||||
|
|
@ -606,8 +649,9 @@ where
|
|||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let depth = first_layer.depth + 1;
|
||||
for (language, ranges) in injections {
|
||||
self.add_layer(&language, ranges);
|
||||
self.add_layer(&language, ranges, depth);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -636,7 +680,13 @@ where
|
|||
// to re-sort the layers. If the cursor is already at the end of its syntax tree,
|
||||
// remove it.
|
||||
if self.layers[0].advance() {
|
||||
self.layers.sort_unstable_by(|a, b| a.cmp(&b));
|
||||
let mut index = 0;
|
||||
while self.layers.get(index + 1).map_or(false, |next| {
|
||||
self.layers[index].cmp(next) == cmp::Ordering::Greater
|
||||
}) {
|
||||
self.layers.swap(index, index + 1);
|
||||
index += 1;
|
||||
}
|
||||
} else {
|
||||
self.layers.remove(0);
|
||||
}
|
||||
|
|
@ -685,6 +735,7 @@ impl<'a> Layer<'a> {
|
|||
tree: Tree,
|
||||
sheet: &'a PropertySheet<Properties>,
|
||||
ranges: Vec<Range>,
|
||||
depth: usize,
|
||||
) -> Self {
|
||||
// The cursor's lifetime parameter indicates that the tree must outlive the cursor.
|
||||
// But because the tree is really a pointer to the heap, the cursor can remain
|
||||
|
|
@ -695,6 +746,7 @@ impl<'a> Layer<'a> {
|
|||
_tree: tree,
|
||||
cursor,
|
||||
ranges,
|
||||
depth,
|
||||
at_node_end: false,
|
||||
}
|
||||
}
|
||||
|
|
@ -706,6 +758,7 @@ impl<'a> Layer<'a> {
|
|||
self.offset()
|
||||
.cmp(&other.offset())
|
||||
.then_with(|| other.at_node_end.cmp(&self.at_node_end))
|
||||
.then_with(|| self.depth.cmp(&other.depth))
|
||||
}
|
||||
|
||||
fn offset(&self) -> usize {
|
||||
|
|
@ -816,7 +869,7 @@ pub fn highlight<'a, F>(
|
|||
where
|
||||
F: Fn(&str) -> Option<(Language, &'a PropertySheet<Properties>)> + 'a,
|
||||
{
|
||||
Highlighter::new(source, language, property_sheet, injection_callback)
|
||||
Highlighter::new(source, language, property_sheet, injection_callback, None)
|
||||
}
|
||||
|
||||
pub fn highlight_html<'a, F1, F2>(
|
||||
|
|
@ -830,7 +883,7 @@ where
|
|||
F1: Fn(&str) -> Option<(Language, &'a PropertySheet<Properties>)>,
|
||||
F2: Fn(Scope) -> &'a str,
|
||||
{
|
||||
let highlighter = Highlighter::new(source, language, property_sheet, injection_callback)?;
|
||||
let highlighter = Highlighter::new(source, language, property_sheet, injection_callback, None)?;
|
||||
let mut renderer = HtmlRenderer::new(attribute_callback);
|
||||
let mut scopes = Vec::new();
|
||||
for event in highlighter {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue