Start work on a C API for syntax highlighting
This commit is contained in:
parent
a20fc3c111
commit
98e4fd22ef
6 changed files with 463 additions and 5 deletions
|
|
@ -21,12 +21,16 @@ pub fn get_language(name: &str) -> Language {
|
|||
.unwrap()
|
||||
}
|
||||
|
||||
pub fn get_property_sheet(language_name: &str, sheet_name: &str) -> PropertySheet<Properties> {
|
||||
pub fn get_property_sheet_json(language_name: &str, sheet_name: &str) -> String {
|
||||
let path = GRAMMARS_DIR
|
||||
.join(language_name)
|
||||
.join("src")
|
||||
.join(sheet_name);
|
||||
let json = fs::read_to_string(path).unwrap();
|
||||
fs::read_to_string(path).unwrap()
|
||||
}
|
||||
|
||||
pub fn get_property_sheet(language_name: &str, sheet_name: &str) -> PropertySheet<Properties> {
|
||||
let json = get_property_sheet_json(language_name, sheet_name);
|
||||
let language = get_language(language_name);
|
||||
load_property_sheet(language, &json).unwrap()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
use super::helpers::fixtures::{get_language, get_property_sheet};
|
||||
use super::helpers::fixtures::{get_language, get_property_sheet, get_property_sheet_json};
|
||||
use lazy_static::lazy_static;
|
||||
use std::ffi::CString;
|
||||
use std::{ptr, slice, str};
|
||||
use tree_sitter::{Language, PropertySheet};
|
||||
use tree_sitter_highlight::{highlight, highlight_html, HighlightEvent, Properties, Scope};
|
||||
use tree_sitter_highlight::{c, highlight, highlight_html, HighlightEvent, Properties, Scope};
|
||||
|
||||
lazy_static! {
|
||||
static ref JS_SHEET: PropertySheet<Properties> =
|
||||
|
|
@ -153,6 +155,93 @@ fn test_highlighting_empty_lines() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_highlighting_via_c_api() {
|
||||
let js_lang = get_language("javascript");
|
||||
let html_lang = get_language("html");
|
||||
let js_sheet = get_property_sheet_json("javascript", "highlights.json");
|
||||
let js_sheet = c_string(&js_sheet);
|
||||
let html_sheet = get_property_sheet_json("html", "highlights.json");
|
||||
let html_sheet = c_string(&html_sheet);
|
||||
|
||||
let class_tag = c_string("class=tag");
|
||||
let class_function = c_string("class=function");
|
||||
let class_string = c_string("class=string");
|
||||
let class_keyword = c_string("class=keyword");
|
||||
|
||||
let js_scope_name = c_string("source.js");
|
||||
let html_scope_name = c_string("text.html.basic");
|
||||
let injection_regex = c_string("^(javascript|js)$");
|
||||
let source_code = c_string("<script>\nconst a = b('c');\nc.d();\n</script>");
|
||||
|
||||
let attribute_strings = &mut [ptr::null(); Scope::Unknown as usize + 1];
|
||||
attribute_strings[Scope::Tag as usize] = class_tag.as_ptr();
|
||||
attribute_strings[Scope::String as usize] = class_string.as_ptr();
|
||||
attribute_strings[Scope::Keyword as usize] = class_keyword.as_ptr();
|
||||
attribute_strings[Scope::Function as usize] = class_function.as_ptr();
|
||||
|
||||
let highlighter = c::ts_highlighter_new(attribute_strings.as_ptr());
|
||||
let buffer = c::ts_highlight_buffer_new();
|
||||
|
||||
c::ts_highlighter_add_language(
|
||||
highlighter,
|
||||
html_scope_name.as_ptr(),
|
||||
html_lang,
|
||||
html_sheet.as_ptr(),
|
||||
ptr::null_mut(),
|
||||
);
|
||||
c::ts_highlighter_add_language(
|
||||
highlighter,
|
||||
js_scope_name.as_ptr(),
|
||||
js_lang,
|
||||
js_sheet.as_ptr(),
|
||||
injection_regex.as_ptr(),
|
||||
);
|
||||
c::ts_highlighter_highlight(
|
||||
highlighter,
|
||||
html_scope_name.as_ptr(),
|
||||
source_code.as_ptr(),
|
||||
source_code.as_bytes().len() as u32,
|
||||
buffer,
|
||||
);
|
||||
|
||||
let output_bytes = c::ts_highlight_buffer_content(buffer);
|
||||
let output_line_offsets = c::ts_highlight_buffer_line_offsets(buffer);
|
||||
let output_len = c::ts_highlight_buffer_len(buffer);
|
||||
let output_line_count = c::ts_highlight_buffer_line_count(buffer);
|
||||
|
||||
let output_bytes = unsafe { slice::from_raw_parts(output_bytes, output_len as usize) };
|
||||
let output_line_offsets =
|
||||
unsafe { slice::from_raw_parts(output_line_offsets, output_line_count as usize) };
|
||||
|
||||
let mut lines = Vec::new();
|
||||
for i in 0..(output_line_count as usize) {
|
||||
let line_start = output_line_offsets[i] as usize;
|
||||
let line_end = output_line_offsets
|
||||
.get(i + 1)
|
||||
.map(|x| *x as usize)
|
||||
.unwrap_or(output_bytes.len());
|
||||
lines.push(str::from_utf8(&output_bytes[line_start..line_end]).unwrap());
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
lines,
|
||||
vec![
|
||||
"<<span class=tag>script</span>>",
|
||||
"<span class=keyword>const</span> <span>a</span> <span>=</span> <span class=function>b</span><span>(</span><span class=string>'c'</span><span>)</span><span>;</span>",
|
||||
"<span>c</span><span>.</span><span class=function>d</span><span>(</span><span>)</span><span>;</span>",
|
||||
"</<span class=tag>script</span>>",
|
||||
]
|
||||
);
|
||||
|
||||
c::ts_highlighter_delete(highlighter);
|
||||
c::ts_highlight_buffer_delete(buffer);
|
||||
}
|
||||
|
||||
fn c_string(s: &str) -> CString {
|
||||
CString::new(s.as_bytes().to_vec()).unwrap()
|
||||
}
|
||||
|
||||
fn test_language_for_injection_string<'a>(
|
||||
string: &str,
|
||||
) -> Option<(Language, &'a PropertySheet<Properties>)> {
|
||||
|
|
|
|||
|
|
@ -12,6 +12,9 @@ edition = "2018"
|
|||
keywords = ["incremental", "parsing", "syntax", "highlighting"]
|
||||
categories = ["parsing", "text-editors"]
|
||||
|
||||
[lib]
|
||||
crate-type = ["lib", "staticlib"]
|
||||
|
||||
[dependencies]
|
||||
regex = "1"
|
||||
serde = "1.0"
|
||||
|
|
|
|||
102
highlight/include/tree_sitter/highlight.h
Normal file
102
highlight/include/tree_sitter/highlight.h
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
#ifndef TREE_SITTER_HIGHLIGHT_H_
|
||||
#define TREE_SITTER_HIGHLIGHT_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef enum {
|
||||
TSHighlightOk,
|
||||
TSHighlightUnknownScope,
|
||||
} TSHighlightError;
|
||||
|
||||
// The list of scopes which can be styled for syntax highlighting.
|
||||
// When constructing a `TSHighlighter`, you need to construct an
|
||||
// `attribute_strings` array whose elements correspond to these values.
|
||||
enum TSHighlightScope {
|
||||
TSHighlightScopeAttribute,
|
||||
TSHighlightScopeComment,
|
||||
TSHighlightScopeConstant,
|
||||
TSHighlightScopeConstantBuiltin,
|
||||
TSHighlightScopeConstructor,
|
||||
TSHighlightScopeConstructorBuiltin,
|
||||
TSHighlightScopeEmbedded,
|
||||
TSHighlightScopeEscape,
|
||||
TSHighlightScopeFunction,
|
||||
TSHighlightScopeFunctionBuiltin,
|
||||
TSHighlightScopeKeyword,
|
||||
TSHighlightScopeNumber,
|
||||
TSHighlightScopeOperator,
|
||||
TSHighlightScopeProperty,
|
||||
TSHighlightScopePropertyBuiltin,
|
||||
TSHighlightScopePunctuation,
|
||||
TSHighlightScopePunctuationBracket,
|
||||
TSHighlightScopePunctuationDelimiter,
|
||||
TSHighlightScopePunctuationSpecial,
|
||||
TSHighlightScopeString,
|
||||
TSHighlightScopeStringSpecial,
|
||||
TSHighlightScopeTag,
|
||||
TSHighlightScopeType,
|
||||
TSHighlightScopeTypeBuiltin,
|
||||
TSHighlightScopeVariable,
|
||||
TSHighlightScopeVariableBuiltin,
|
||||
TSHighlightScopeUnknown,
|
||||
};
|
||||
|
||||
typedef struct TSHighlighter TSHighlighter;
|
||||
typedef struct TSHighlightBuffer TSHighlightBuffer;
|
||||
|
||||
// Construct a `TSHighlighter` by providing a list of strings containing
|
||||
// the HTML attributes that should be applied for each highlight scope.
|
||||
TSHighlighter *ts_highlighter_new(
|
||||
const char **attribute_strings
|
||||
);
|
||||
|
||||
// Delete a syntax highlighter.
|
||||
void ts_highlighter_delete(TSHighlighter *);
|
||||
|
||||
// Add a `TSLanguage` to a highlighter. The language is associated with a
|
||||
// scope name, which can be used later to select a language for syntax
|
||||
// highlighting. Along with the language, you must provide a JSON string
|
||||
// containing the compiled PropertySheet to use for syntax highlighting
|
||||
// with that language. You can also optionally provide an 'injection regex',
|
||||
// which is used to detect when this language has been embedded in a document
|
||||
// written in a different language.
|
||||
int ts_highlighter_add_language(
|
||||
TSHighlighter *self,
|
||||
const char *scope_name,
|
||||
const TSLanguage *language,
|
||||
const char *property_sheet_json,
|
||||
const char *injection_regex
|
||||
);
|
||||
|
||||
// Compute syntax highlighting for a given document. You must first
|
||||
// create a `TSHighlightBuffer` to hold the output.
|
||||
int ts_highlighter_highlight(
|
||||
TSHighlighter *self,
|
||||
const char *scope_name,
|
||||
const char *source_code,
|
||||
uint32_t source_code_len,
|
||||
TSHighlightBuffer *output
|
||||
);
|
||||
|
||||
// TSHighlightBuffer: This struct stores the HTML output of syntax
|
||||
// highlighting. It can be reused for multiple highlighting calls.
|
||||
TSHighlightBuffer *ts_highlight_buffer_new();
|
||||
|
||||
// Delete a highlight buffer.
|
||||
void ts_highlight_buffer_delete(TSHighlightBuffer *);
|
||||
|
||||
// Access the HTML content of a highlight buffer.
|
||||
const uint8_t *ts_highlight_buffer_content(const TSHighlightBuffer *);
|
||||
const uint32_t *ts_highlight_buffer_line_offsets(const TSHighlightBuffer *);
|
||||
uint32_t ts_highlight_buffer_len(const TSHighlightBuffer *);
|
||||
uint32_t ts_highlight_buffer_line_count(const TSHighlightBuffer *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TREE_SITTER_HIGHLIGHT_H_
|
||||
248
highlight/src/c_lib.rs
Normal file
248
highlight/src/c_lib.rs
Normal file
|
|
@ -0,0 +1,248 @@
|
|||
use super::{escape, load_property_sheet, HighlightEvent, Highlighter, Properties, Scope};
|
||||
use regex::Regex;
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::CStr;
|
||||
use std::io::Write;
|
||||
use std::os::raw::c_char;
|
||||
use std::process::abort;
|
||||
use std::{fmt, slice};
|
||||
use tree_sitter::{Language, PropertySheet};
|
||||
|
||||
struct LanguageConfiguration {
|
||||
language: Language,
|
||||
property_sheet: PropertySheet<Properties>,
|
||||
injection_regex: Option<Regex>,
|
||||
}
|
||||
|
||||
pub struct TSHighlighter {
|
||||
languages: HashMap<String, LanguageConfiguration>,
|
||||
attribute_strings: Vec<&'static [u8]>,
|
||||
}
|
||||
|
||||
pub struct TSHighlightBuffer {
|
||||
html: Vec<u8>,
|
||||
line_offsets: Vec<u32>,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
pub enum ErrorCode {
|
||||
Ok,
|
||||
UnknownScope,
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlighter_new(attribute_strings: *const *const c_char) -> *mut TSHighlighter {
|
||||
let attribute_strings =
|
||||
unsafe { slice::from_raw_parts(attribute_strings, Scope::Unknown as usize + 1) };
|
||||
let attribute_strings = attribute_strings
|
||||
.into_iter()
|
||||
.map(|s| {
|
||||
if s.is_null() {
|
||||
&[]
|
||||
} else {
|
||||
unsafe { CStr::from_ptr(*s).to_bytes() }
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
Box::into_raw(Box::new(TSHighlighter {
|
||||
languages: HashMap::new(),
|
||||
attribute_strings,
|
||||
}))
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlight_buffer_new() -> *mut TSHighlightBuffer {
|
||||
Box::into_raw(Box::new(TSHighlightBuffer {
|
||||
html: Vec::new(),
|
||||
line_offsets: Vec::new(),
|
||||
}))
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlighter_delete(this: *mut TSHighlighter) {
|
||||
drop(unsafe { Box::from_raw(this) })
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlight_buffer_delete(this: *mut TSHighlightBuffer) {
|
||||
drop(unsafe { Box::from_raw(this) })
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlight_buffer_content(this: *mut TSHighlightBuffer) -> *const u8 {
|
||||
let this = unwrap_ptr(this);
|
||||
this.html.as_slice().as_ptr()
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlight_buffer_line_offsets(this: *mut TSHighlightBuffer) -> *const u32 {
|
||||
let this = unwrap_ptr(this);
|
||||
this.line_offsets.as_slice().as_ptr()
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlight_buffer_len(this: *mut TSHighlightBuffer) -> u32 {
|
||||
let this = unwrap_ptr(this);
|
||||
this.html.len() as u32
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlight_buffer_line_count(this: *mut TSHighlightBuffer) -> u32 {
|
||||
let this = unwrap_ptr(this);
|
||||
this.line_offsets.len() as u32
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlighter_add_language(
|
||||
this: *mut TSHighlighter,
|
||||
scope_name: *const c_char,
|
||||
language: Language,
|
||||
property_sheet_json: *const c_char,
|
||||
injection_regex: *const c_char,
|
||||
) -> ErrorCode {
|
||||
let this = unwrap_ptr(this);
|
||||
let scope_name = unsafe { CStr::from_ptr(scope_name) };
|
||||
let scope_name = unwrap(scope_name.to_str()).to_string();
|
||||
let property_sheet_json = unsafe { CStr::from_ptr(property_sheet_json) };
|
||||
let property_sheet_json = unwrap(property_sheet_json.to_str());
|
||||
|
||||
let property_sheet = unwrap(load_property_sheet(language, property_sheet_json));
|
||||
let injection_regex = if injection_regex.is_null() {
|
||||
None
|
||||
} else {
|
||||
let pattern = unsafe { CStr::from_ptr(injection_regex) };
|
||||
Some(unwrap(Regex::new(unwrap(pattern.to_str()))))
|
||||
};
|
||||
|
||||
this.languages.insert(
|
||||
scope_name,
|
||||
LanguageConfiguration {
|
||||
language,
|
||||
property_sheet,
|
||||
injection_regex,
|
||||
},
|
||||
);
|
||||
|
||||
ErrorCode::Ok
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlighter_highlight(
|
||||
this: *mut TSHighlighter,
|
||||
scope_name: *const c_char,
|
||||
source_code: *const c_char,
|
||||
source_code_len: u32,
|
||||
output: *mut TSHighlightBuffer,
|
||||
) -> ErrorCode {
|
||||
let this = unwrap_ptr(this);
|
||||
let output = unwrap_ptr(output);
|
||||
let scope_name = unwrap(unsafe { CStr::from_ptr(scope_name).to_str() });
|
||||
let source_code =
|
||||
unsafe { slice::from_raw_parts(source_code as *const u8, source_code_len as usize) };
|
||||
this.highlight(source_code, scope_name, output)
|
||||
}
|
||||
|
||||
impl TSHighlighter {
|
||||
fn highlight(
|
||||
&mut self,
|
||||
source_code: &[u8],
|
||||
scope_name: &str,
|
||||
output: &mut TSHighlightBuffer,
|
||||
) -> ErrorCode {
|
||||
let configuration = self.languages.get(scope_name);
|
||||
if configuration.is_none() {
|
||||
return ErrorCode::UnknownScope;
|
||||
}
|
||||
let configuration = configuration.unwrap();
|
||||
let languages = &self.languages;
|
||||
|
||||
let highlighter = unwrap(Highlighter::new(
|
||||
source_code,
|
||||
configuration.language,
|
||||
&configuration.property_sheet,
|
||||
|injection_string| {
|
||||
languages.values().find_map(|conf| {
|
||||
conf.injection_regex.as_ref().and_then(|regex| {
|
||||
if regex.is_match(injection_string) {
|
||||
Some((conf.language, &conf.property_sheet))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
})
|
||||
},
|
||||
));
|
||||
|
||||
output.html.clear();
|
||||
output.line_offsets.clear();
|
||||
output.line_offsets.push(0);
|
||||
let mut scopes = Vec::new();
|
||||
for event in highlighter {
|
||||
match event {
|
||||
HighlightEvent::ScopeStart(s) => {
|
||||
scopes.push(s);
|
||||
output.start_scope(s, &self.attribute_strings);
|
||||
}
|
||||
HighlightEvent::ScopeEnd => {
|
||||
scopes.pop();
|
||||
output.end_scope();
|
||||
}
|
||||
HighlightEvent::Source(src) => {
|
||||
output.add_text(src, &scopes, &self.attribute_strings);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
ErrorCode::Ok
|
||||
}
|
||||
}
|
||||
|
||||
impl TSHighlightBuffer {
|
||||
fn start_scope(&mut self, s: Scope, attribute_strings: &[&[u8]]) {
|
||||
let attribute_string = attribute_strings[s as usize];
|
||||
self.html.extend(b"<span");
|
||||
if !attribute_string.is_empty() {
|
||||
self.html.extend(b" ");
|
||||
self.html.extend(attribute_string);
|
||||
}
|
||||
self.html.extend(b">");
|
||||
}
|
||||
|
||||
fn end_scope(&mut self) {
|
||||
self.html.extend(b"</span>");
|
||||
}
|
||||
|
||||
fn finish_line(&mut self) {
|
||||
self.line_offsets.push(self.html.len() as u32);
|
||||
}
|
||||
|
||||
fn add_text(&mut self, src: &str, scopes: &Vec<Scope>, attribute_strings: &[&[u8]]) {
|
||||
let mut multiline = false;
|
||||
for line in src.split('\n') {
|
||||
let line = line.trim_end_matches('\r');
|
||||
if multiline {
|
||||
scopes.iter().for_each(|_| self.end_scope());
|
||||
self.finish_line();
|
||||
scopes
|
||||
.iter()
|
||||
.for_each(|scope| self.start_scope(*scope, attribute_strings));
|
||||
}
|
||||
write!(&mut self.html, "{}", escape::Escape(line)).unwrap();
|
||||
multiline = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn unwrap_ptr<'a, T>(result: *mut T) -> &'a mut T {
|
||||
unsafe { result.as_mut() }.unwrap_or_else(|| {
|
||||
eprintln!("{}:{} - pointer must not be null", file!(), line!());
|
||||
abort();
|
||||
})
|
||||
}
|
||||
|
||||
fn unwrap<T, E: fmt::Display>(result: Result<T, E>) -> T {
|
||||
result.unwrap_or_else(|error| {
|
||||
eprintln!("tree-sitter highlight error: {}", error);
|
||||
abort();
|
||||
})
|
||||
}
|
||||
|
|
@ -1,9 +1,11 @@
|
|||
pub mod c_lib;
|
||||
mod escape;
|
||||
|
||||
pub use c_lib as c;
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
use serde_derive::*;
|
||||
use std::cmp;
|
||||
use std::fmt::Write;
|
||||
use std::fmt::{self, Write};
|
||||
use std::mem::transmute;
|
||||
use std::str;
|
||||
use std::usize;
|
||||
|
|
@ -151,6 +153,16 @@ pub enum PropertySheetError {
|
|||
InvalidFormat(String),
|
||||
}
|
||||
|
||||
impl fmt::Display for PropertySheetError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
PropertySheetError::InvalidJSON(e) => e.fmt(f),
|
||||
PropertySheetError::InvalidRegex(e) => e.fmt(f),
|
||||
PropertySheetError::InvalidFormat(e) => e.fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn load_property_sheet(
|
||||
language: Language,
|
||||
json: &str,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue