Merge branch 'master' into node-fields
This commit is contained in:
commit
5035e194ff
34 changed files with 1178 additions and 240 deletions
6
Cargo.lock
generated
6
Cargo.lock
generated
|
|
@ -1,3 +1,5 @@
|
|||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "0.6.9"
|
||||
|
|
@ -661,12 +663,12 @@ dependencies = [
|
|||
"spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tempfile 3.0.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tree-sitter 0.3.9",
|
||||
"tree-sitter-highlight 0.1.4",
|
||||
"tree-sitter-highlight 0.1.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-highlight"
|
||||
version = "0.1.4"
|
||||
version = "0.1.5"
|
||||
dependencies = [
|
||||
"regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
|
|
|||
|
|
@ -172,7 +172,9 @@ fn load_js_grammar_file(grammar_path: &Path) -> Result<String> {
|
|||
Some(code) => return Err(Error(format!("Node process exited with status {}", code))),
|
||||
}
|
||||
|
||||
Ok(String::from_utf8(output.stdout).expect("Got invalid UTF8 from node"))
|
||||
let mut result = String::from_utf8(output.stdout).expect("Got invalid UTF8 from node");
|
||||
result.push('\n');
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn ensure_file<T: AsRef<[u8]>>(path: &PathBuf, f: impl Fn() -> T) -> Result<()> {
|
||||
|
|
|
|||
|
|
@ -843,7 +843,7 @@ impl Generator {
|
|||
let external_scanner_name = format!("{}_external_scanner", language_function_name);
|
||||
|
||||
if !self.syntax_grammar.external_tokens.is_empty() {
|
||||
add_line!(self, "void *{}_create();", external_scanner_name);
|
||||
add_line!(self, "void *{}_create(void);", external_scanner_name);
|
||||
add_line!(self, "void {}_destroy(void *);", external_scanner_name);
|
||||
add_line!(
|
||||
self,
|
||||
|
|
@ -870,7 +870,7 @@ impl Generator {
|
|||
|
||||
add_line!(
|
||||
self,
|
||||
"extern const TSLanguage *{}() {{",
|
||||
"extern const TSLanguage *{}(void) {{",
|
||||
language_function_name
|
||||
);
|
||||
indent!(self);
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ use serde::ser::SerializeMap;
|
|||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
use serde_json::{json, Value};
|
||||
use std::collections::HashMap;
|
||||
use std::time::Instant;
|
||||
use std::{fmt, fs, io, path};
|
||||
use tree_sitter::{Language, PropertySheet};
|
||||
use tree_sitter_highlight::{highlight, highlight_html, HighlightEvent, Properties, Scope};
|
||||
|
|
@ -254,10 +255,13 @@ pub fn ansi(
|
|||
source: &[u8],
|
||||
language: Language,
|
||||
property_sheet: &PropertySheet<Properties>,
|
||||
print_time: bool,
|
||||
) -> Result<()> {
|
||||
use std::io::Write;
|
||||
let stdout = io::stdout();
|
||||
let mut stdout = stdout.lock();
|
||||
|
||||
let time = Instant::now();
|
||||
let mut scope_stack = Vec::new();
|
||||
for event in highlight(source, language, property_sheet, |s| {
|
||||
language_for_injection_string(loader, s)
|
||||
|
|
@ -278,6 +282,13 @@ pub fn ansi(
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
if print_time {
|
||||
let duration = time.elapsed();
|
||||
let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000;
|
||||
eprintln!("{} ms", duration_ms);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ use std::env;
|
|||
use std::fs;
|
||||
use std::path::Path;
|
||||
use std::process::exit;
|
||||
use std::usize;
|
||||
use std::{u64, usize};
|
||||
use tree_sitter_cli::{
|
||||
config, error, generate, highlight, loader, logger, parse, properties, test,
|
||||
};
|
||||
|
|
@ -49,10 +49,13 @@ fn run() -> error::Result<()> {
|
|||
.multiple(true)
|
||||
.required(true),
|
||||
)
|
||||
.arg(Arg::with_name("scope").long("scope").takes_value(true))
|
||||
.arg(Arg::with_name("debug").long("debug").short("d"))
|
||||
.arg(Arg::with_name("debug-graph").long("debug-graph").short("D"))
|
||||
.arg(Arg::with_name("quiet").long("quiet").short("q"))
|
||||
.arg(Arg::with_name("time").long("time").short("t")),
|
||||
.arg(Arg::with_name("time").long("time").short("t"))
|
||||
.arg(Arg::with_name("allow-cancellation").long("cancel"))
|
||||
.arg(Arg::with_name("timeout").long("timeout").takes_value(true)),
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("test")
|
||||
|
|
@ -76,7 +79,8 @@ fn run() -> error::Result<()> {
|
|||
.required(true),
|
||||
)
|
||||
.arg(Arg::with_name("scope").long("scope").takes_value(true))
|
||||
.arg(Arg::with_name("html").long("html").short("h")),
|
||||
.arg(Arg::with_name("html").long("html").short("h"))
|
||||
.arg(Arg::with_name("time").long("time").short("t")),
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
|
|
@ -131,6 +135,10 @@ fn run() -> error::Result<()> {
|
|||
let debug_graph = matches.is_present("debug-graph");
|
||||
let quiet = matches.is_present("quiet");
|
||||
let time = matches.is_present("time");
|
||||
let allow_cancellation = matches.is_present("allow-cancellation");
|
||||
let timeout = matches
|
||||
.value_of("timeout")
|
||||
.map_or(0, |t| u64::from_str_radix(t, 10).unwrap());
|
||||
loader.find_all_languages(&config.parser_directories)?;
|
||||
let paths = matches
|
||||
.values_of("path")
|
||||
|
|
@ -141,23 +149,30 @@ fn run() -> error::Result<()> {
|
|||
let mut has_error = false;
|
||||
for path in paths {
|
||||
let path = Path::new(path);
|
||||
let language =
|
||||
if let Some((l, _)) = loader.language_configuration_for_file_name(path)? {
|
||||
l
|
||||
} else if let Some(l) = loader.language_at_path(¤t_dir)? {
|
||||
l
|
||||
let language = if let Some(scope) = matches.value_of("scope") {
|
||||
if let Some(config) = loader.language_configuration_for_scope(scope)? {
|
||||
config.0
|
||||
} else {
|
||||
eprintln!("No language found");
|
||||
return Ok(());
|
||||
};
|
||||
return Err(error::Error(format!("Unknown scope '{}'", scope)));
|
||||
}
|
||||
} else if let Some((l, _)) = loader.language_configuration_for_file_name(path)? {
|
||||
l
|
||||
} else if let Some(l) = loader.language_at_path(¤t_dir)? {
|
||||
l
|
||||
} else {
|
||||
eprintln!("No language found");
|
||||
return Ok(());
|
||||
};
|
||||
has_error |= parse::parse_file_at_path(
|
||||
language,
|
||||
path,
|
||||
max_path_length,
|
||||
quiet,
|
||||
time,
|
||||
timeout,
|
||||
debug,
|
||||
debug_graph,
|
||||
allow_cancellation,
|
||||
)?;
|
||||
}
|
||||
|
||||
|
|
@ -167,6 +182,7 @@ fn run() -> error::Result<()> {
|
|||
} else if let Some(matches) = matches.subcommand_matches("highlight") {
|
||||
let paths = matches.values_of("path").unwrap().into_iter();
|
||||
let html_mode = matches.is_present("html");
|
||||
let time = matches.is_present("time");
|
||||
loader.find_all_languages(&config.parser_directories)?;
|
||||
|
||||
if html_mode {
|
||||
|
|
@ -201,7 +217,7 @@ fn run() -> error::Result<()> {
|
|||
if html_mode {
|
||||
highlight::html(&loader, &config.theme, &source, language, sheet)?;
|
||||
} else {
|
||||
highlight::ansi(&loader, &config.theme, &source, language, sheet)?;
|
||||
highlight::ansi(&loader, &config.theme, &source, language, sheet, time)?;
|
||||
}
|
||||
} else {
|
||||
return Err(error::Error(format!(
|
||||
|
|
|
|||
205
cli/src/parse.rs
205
cli/src/parse.rs
|
|
@ -1,9 +1,10 @@
|
|||
use super::error::{Error, Result};
|
||||
use super::util;
|
||||
use std::fs;
|
||||
use std::io::{self, Write};
|
||||
use std::path::Path;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::time::Instant;
|
||||
use std::{fs, thread};
|
||||
use tree_sitter::{Language, LogType, Parser};
|
||||
|
||||
pub fn parse_file_at_path(
|
||||
|
|
@ -12,8 +13,10 @@ pub fn parse_file_at_path(
|
|||
max_path_length: usize,
|
||||
quiet: bool,
|
||||
print_time: bool,
|
||||
timeout: u64,
|
||||
debug: bool,
|
||||
debug_graph: bool,
|
||||
allow_cancellation: bool,
|
||||
) -> Result<bool> {
|
||||
let mut _log_session = None;
|
||||
let mut parser = Parser::new();
|
||||
|
|
@ -21,9 +24,28 @@ pub fn parse_file_at_path(
|
|||
let source_code = fs::read(path)
|
||||
.map_err(|e| Error(format!("Error reading source file {:?}: {}", path, e)))?;
|
||||
|
||||
// If the `--cancel` flag was passed, then cancel the parse
|
||||
// when the user types a newline.
|
||||
if allow_cancellation {
|
||||
let flag = Box::new(AtomicUsize::new(0));
|
||||
unsafe { parser.set_cancellation_flag(Some(&flag)) };
|
||||
thread::spawn(move || {
|
||||
let mut line = String::new();
|
||||
io::stdin().read_line(&mut line).unwrap();
|
||||
eprintln!("Cancelling");
|
||||
flag.store(1, Ordering::Relaxed);
|
||||
});
|
||||
}
|
||||
|
||||
// Set a timeout based on the `--time` flag.
|
||||
parser.set_timeout_micros(timeout);
|
||||
|
||||
// Render an HTML graph if `--debug-graph` was passed
|
||||
if debug_graph {
|
||||
_log_session = Some(util::log_graphs(&mut parser, "log.html")?);
|
||||
} else if debug {
|
||||
}
|
||||
// Log to stderr if `--debug` was passed
|
||||
else if debug {
|
||||
parser.set_logger(Some(Box::new(|log_type, message| {
|
||||
if log_type == LogType::Lex {
|
||||
io::stderr().write(b" ").unwrap();
|
||||
|
|
@ -33,112 +55,123 @@ pub fn parse_file_at_path(
|
|||
}
|
||||
|
||||
let time = Instant::now();
|
||||
let tree = parser
|
||||
.parse(&source_code, None)
|
||||
.expect("Incompatible language version");
|
||||
let tree = parser.parse(&source_code, None);
|
||||
let duration = time.elapsed();
|
||||
let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000;
|
||||
|
||||
let mut cursor = tree.walk();
|
||||
|
||||
let stdout = io::stdout();
|
||||
let mut stdout = stdout.lock();
|
||||
|
||||
if !quiet {
|
||||
let mut needs_newline = false;
|
||||
let mut indent_level = 0;
|
||||
let mut did_visit_children = false;
|
||||
if let Some(tree) = tree {
|
||||
let mut cursor = tree.walk();
|
||||
|
||||
if !quiet {
|
||||
let mut needs_newline = false;
|
||||
let mut indent_level = 0;
|
||||
let mut did_visit_children = false;
|
||||
loop {
|
||||
let node = cursor.node();
|
||||
let is_named = node.is_named();
|
||||
if did_visit_children {
|
||||
if is_named {
|
||||
stdout.write(b")")?;
|
||||
needs_newline = true;
|
||||
}
|
||||
if cursor.goto_next_sibling() {
|
||||
did_visit_children = false;
|
||||
} else if cursor.goto_parent() {
|
||||
did_visit_children = true;
|
||||
indent_level -= 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if is_named {
|
||||
if needs_newline {
|
||||
stdout.write(b"\n")?;
|
||||
}
|
||||
for _ in 0..indent_level {
|
||||
stdout.write(b" ")?;
|
||||
}
|
||||
let start = node.start_position();
|
||||
let end = node.end_position();
|
||||
if let Some(field_name) = cursor.field_name() {
|
||||
write!(&mut stdout, "{}: ", field_name)?;
|
||||
}
|
||||
write!(
|
||||
&mut stdout,
|
||||
"({} [{}, {}] - [{}, {}]",
|
||||
node.kind(),
|
||||
start.row,
|
||||
start.column,
|
||||
end.row,
|
||||
end.column
|
||||
)?;
|
||||
needs_newline = true;
|
||||
}
|
||||
if cursor.goto_first_child() {
|
||||
did_visit_children = false;
|
||||
indent_level += 1;
|
||||
} else {
|
||||
did_visit_children = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
cursor.reset(tree.root_node());
|
||||
println!("");
|
||||
}
|
||||
|
||||
let mut first_error = None;
|
||||
loop {
|
||||
let node = cursor.node();
|
||||
if did_visit_children {
|
||||
if node.is_named() {
|
||||
stdout.write(b")")?;
|
||||
needs_newline = true;
|
||||
}
|
||||
if cursor.goto_next_sibling() {
|
||||
did_visit_children = false;
|
||||
} else if cursor.goto_parent() {
|
||||
did_visit_children = true;
|
||||
indent_level -= 1;
|
||||
if node.has_error() {
|
||||
if node.is_error() || node.is_missing() {
|
||||
first_error = Some(node);
|
||||
break;
|
||||
} else {
|
||||
cursor.goto_first_child();
|
||||
}
|
||||
} else if !cursor.goto_next_sibling() {
|
||||
if !cursor.goto_parent() {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if node.is_named() {
|
||||
if needs_newline {
|
||||
stdout.write(b"\n")?;
|
||||
}
|
||||
for _ in 0..indent_level {
|
||||
stdout.write(b" ")?;
|
||||
}
|
||||
if let Some(field_name) = cursor.field_name() {
|
||||
write!(&mut stdout, "{}: ", field_name)?;
|
||||
}
|
||||
let start = node.start_position();
|
||||
let end = node.end_position();
|
||||
write!(
|
||||
&mut stdout,
|
||||
"({} [{}, {}] - [{}, {}]",
|
||||
node.kind(),
|
||||
start.row,
|
||||
start.column,
|
||||
end.row,
|
||||
end.column
|
||||
)?;
|
||||
needs_newline = true;
|
||||
}
|
||||
if cursor.goto_first_child() {
|
||||
did_visit_children = false;
|
||||
indent_level += 1;
|
||||
} else {
|
||||
did_visit_children = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
cursor.reset(tree.root_node());
|
||||
println!("");
|
||||
}
|
||||
|
||||
let mut first_error = None;
|
||||
loop {
|
||||
let node = cursor.node();
|
||||
if node.has_error() {
|
||||
if node.is_error() || node.is_missing() {
|
||||
first_error = Some(node);
|
||||
break;
|
||||
} else {
|
||||
cursor.goto_first_child();
|
||||
}
|
||||
} else if !cursor.goto_next_sibling() {
|
||||
if !cursor.goto_parent() {
|
||||
break;
|
||||
if first_error.is_some() || print_time {
|
||||
write!(
|
||||
&mut stdout,
|
||||
"{:width$}\t{} ms",
|
||||
path.to_str().unwrap(),
|
||||
duration_ms,
|
||||
width = max_path_length
|
||||
)?;
|
||||
if let Some(node) = first_error {
|
||||
let start = node.start_position();
|
||||
let end = node.end_position();
|
||||
write!(
|
||||
&mut stdout,
|
||||
"\t({} [{}, {}] - [{}, {}])",
|
||||
node.kind(),
|
||||
start.row,
|
||||
start.column,
|
||||
end.row,
|
||||
end.column
|
||||
)?;
|
||||
}
|
||||
write!(&mut stdout, "\n")?;
|
||||
}
|
||||
}
|
||||
|
||||
if first_error.is_some() || print_time {
|
||||
write!(
|
||||
return Ok(first_error.is_some());
|
||||
} else if print_time {
|
||||
writeln!(
|
||||
&mut stdout,
|
||||
"{:width$}\t{} ms",
|
||||
"{:width$}\t{} ms (timed out)",
|
||||
path.to_str().unwrap(),
|
||||
duration_ms,
|
||||
width = max_path_length
|
||||
)?;
|
||||
if let Some(node) = first_error {
|
||||
let start = node.start_position();
|
||||
let end = node.end_position();
|
||||
write!(
|
||||
&mut stdout,
|
||||
"\t({} [{}, {}] - [{}, {}])",
|
||||
node.kind(),
|
||||
start.row,
|
||||
start.column,
|
||||
end.row,
|
||||
end.column
|
||||
)?;
|
||||
}
|
||||
write!(&mut stdout, "\n")?;
|
||||
}
|
||||
|
||||
Ok(first_error.is_some())
|
||||
Ok(false)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1113,6 +1113,28 @@ mod tests {
|
|||
),
|
||||
])
|
||||
);
|
||||
|
||||
// Handle differently-formatted calls
|
||||
let sheet2 = generate_property_sheet(
|
||||
"foo.css",
|
||||
"
|
||||
a {
|
||||
b: f();
|
||||
c: f(
|
||||
g(h),
|
||||
i,
|
||||
\"j\",
|
||||
10
|
||||
);
|
||||
}
|
||||
",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
query_simple(&sheet2, vec!["a"])["c"],
|
||||
query_simple(&sheet, vec!["a"])["c"]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -98,7 +98,9 @@ extern "C" fn ts_record_free(ptr: *mut c_void) {
|
|||
}
|
||||
|
||||
#[no_mangle]
|
||||
extern "C" fn ts_record_allocations_toggle() {
|
||||
extern "C" fn ts_toggle_allocation_recording(enabled: bool) -> bool {
|
||||
let mut recorder = RECORDER.lock();
|
||||
recorder.enabled = !recorder.enabled;
|
||||
let was_enabled = recorder.enabled;
|
||||
recorder.enabled = enabled;
|
||||
was_enabled
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,12 +21,16 @@ pub fn get_language(name: &str) -> Language {
|
|||
.unwrap()
|
||||
}
|
||||
|
||||
pub fn get_property_sheet(language_name: &str, sheet_name: &str) -> PropertySheet<Properties> {
|
||||
pub fn get_property_sheet_json(language_name: &str, sheet_name: &str) -> String {
|
||||
let path = GRAMMARS_DIR
|
||||
.join(language_name)
|
||||
.join("src")
|
||||
.join(sheet_name);
|
||||
let json = fs::read_to_string(path).unwrap();
|
||||
fs::read_to_string(path).unwrap()
|
||||
}
|
||||
|
||||
pub fn get_property_sheet(language_name: &str, sheet_name: &str) -> PropertySheet<Properties> {
|
||||
let json = get_property_sheet_json(language_name, sheet_name);
|
||||
let language = get_language(language_name);
|
||||
load_property_sheet(language, &json).unwrap()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,13 +1,17 @@
|
|||
use super::helpers::fixtures::{get_language, get_property_sheet};
|
||||
use super::helpers::fixtures::{get_language, get_property_sheet, get_property_sheet_json};
|
||||
use lazy_static::lazy_static;
|
||||
use std::ffi::CString;
|
||||
use std::{ptr, slice, str};
|
||||
use tree_sitter::{Language, PropertySheet};
|
||||
use tree_sitter_highlight::{highlight, highlight_html, HighlightEvent, Properties, Scope};
|
||||
use tree_sitter_highlight::{c, highlight, highlight_html, HighlightEvent, Properties, Scope};
|
||||
|
||||
lazy_static! {
|
||||
static ref JS_SHEET: PropertySheet<Properties> =
|
||||
get_property_sheet("javascript", "highlights.json");
|
||||
static ref HTML_SHEET: PropertySheet<Properties> =
|
||||
get_property_sheet("html", "highlights.json");
|
||||
static ref EJS_SHEET: PropertySheet<Properties> =
|
||||
get_property_sheet("embedded-template", "highlights-ejs.json");
|
||||
static ref SCOPE_CLASS_STRINGS: Vec<String> = {
|
||||
let mut result = Vec::new();
|
||||
let mut i = 0;
|
||||
|
|
@ -153,6 +157,118 @@ fn test_highlighting_empty_lines() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_highlighting_ejs() {
|
||||
let source = vec!["<div><% foo() %></div>"].join("\n");
|
||||
|
||||
assert_eq!(
|
||||
&to_token_vector(&source, get_language("embedded-template"), &EJS_SHEET).unwrap(),
|
||||
&[[
|
||||
("<", vec![]),
|
||||
("div", vec![Scope::Tag]),
|
||||
(">", vec![]),
|
||||
("<%", vec![Scope::Keyword]),
|
||||
(" ", vec![]),
|
||||
("foo", vec![Scope::Function]),
|
||||
("(", vec![Scope::PunctuationBracket]),
|
||||
(")", vec![Scope::PunctuationBracket]),
|
||||
(" ", vec![]),
|
||||
("%>", vec![Scope::Keyword]),
|
||||
("</", vec![]),
|
||||
("div", vec![Scope::Tag]),
|
||||
(">", vec![])
|
||||
]],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_highlighting_via_c_api() {
|
||||
let js_lang = get_language("javascript");
|
||||
let html_lang = get_language("html");
|
||||
let js_sheet = get_property_sheet_json("javascript", "highlights.json");
|
||||
let js_sheet = c_string(&js_sheet);
|
||||
let html_sheet = get_property_sheet_json("html", "highlights.json");
|
||||
let html_sheet = c_string(&html_sheet);
|
||||
|
||||
let class_tag = c_string("class=tag");
|
||||
let class_function = c_string("class=function");
|
||||
let class_string = c_string("class=string");
|
||||
let class_keyword = c_string("class=keyword");
|
||||
|
||||
let js_scope_name = c_string("source.js");
|
||||
let html_scope_name = c_string("text.html.basic");
|
||||
let injection_regex = c_string("^(javascript|js)$");
|
||||
let source_code = c_string("<script>\nconst a = b('c');\nc.d();\n</script>");
|
||||
|
||||
let attribute_strings = &mut [ptr::null(); Scope::Unknown as usize + 1];
|
||||
attribute_strings[Scope::Tag as usize] = class_tag.as_ptr();
|
||||
attribute_strings[Scope::String as usize] = class_string.as_ptr();
|
||||
attribute_strings[Scope::Keyword as usize] = class_keyword.as_ptr();
|
||||
attribute_strings[Scope::Function as usize] = class_function.as_ptr();
|
||||
|
||||
let highlighter = c::ts_highlighter_new(attribute_strings.as_ptr());
|
||||
let buffer = c::ts_highlight_buffer_new();
|
||||
|
||||
c::ts_highlighter_add_language(
|
||||
highlighter,
|
||||
html_scope_name.as_ptr(),
|
||||
html_lang,
|
||||
html_sheet.as_ptr(),
|
||||
ptr::null_mut(),
|
||||
);
|
||||
c::ts_highlighter_add_language(
|
||||
highlighter,
|
||||
js_scope_name.as_ptr(),
|
||||
js_lang,
|
||||
js_sheet.as_ptr(),
|
||||
injection_regex.as_ptr(),
|
||||
);
|
||||
c::ts_highlighter_highlight(
|
||||
highlighter,
|
||||
html_scope_name.as_ptr(),
|
||||
source_code.as_ptr(),
|
||||
source_code.as_bytes().len() as u32,
|
||||
buffer,
|
||||
ptr::null_mut(),
|
||||
);
|
||||
|
||||
let output_bytes = c::ts_highlight_buffer_content(buffer);
|
||||
let output_line_offsets = c::ts_highlight_buffer_line_offsets(buffer);
|
||||
let output_len = c::ts_highlight_buffer_len(buffer);
|
||||
let output_line_count = c::ts_highlight_buffer_line_count(buffer);
|
||||
|
||||
let output_bytes = unsafe { slice::from_raw_parts(output_bytes, output_len as usize) };
|
||||
let output_line_offsets =
|
||||
unsafe { slice::from_raw_parts(output_line_offsets, output_line_count as usize) };
|
||||
|
||||
let mut lines = Vec::new();
|
||||
for i in 0..(output_line_count as usize) {
|
||||
let line_start = output_line_offsets[i] as usize;
|
||||
let line_end = output_line_offsets
|
||||
.get(i + 1)
|
||||
.map(|x| *x as usize)
|
||||
.unwrap_or(output_bytes.len());
|
||||
lines.push(str::from_utf8(&output_bytes[line_start..line_end]).unwrap());
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
lines,
|
||||
vec![
|
||||
"<<span class=tag>script</span>>",
|
||||
"<span class=keyword>const</span> <span>a</span> <span>=</span> <span class=function>b</span><span>(</span><span class=string>'c'</span><span>)</span><span>;</span>",
|
||||
"<span>c</span><span>.</span><span class=function>d</span><span>(</span><span>)</span><span>;</span>",
|
||||
"</<span class=tag>script</span>>",
|
||||
]
|
||||
);
|
||||
|
||||
c::ts_highlighter_delete(highlighter);
|
||||
c::ts_highlight_buffer_delete(buffer);
|
||||
}
|
||||
|
||||
fn c_string(s: &str) -> CString {
|
||||
CString::new(s.as_bytes().to_vec()).unwrap()
|
||||
}
|
||||
|
||||
fn test_language_for_injection_string<'a>(
|
||||
string: &str,
|
||||
) -> Option<(Language, &'a PropertySheet<Properties>)> {
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
use super::helpers::edits::{perform_edit, Edit, ReadRecorder};
|
||||
use super::helpers::fixtures::{get_language, get_test_language};
|
||||
use crate::generate::generate_parser_for_grammar;
|
||||
use std::{thread, usize};
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::{thread, time};
|
||||
use tree_sitter::{InputEdit, LogType, Parser, Point, Range};
|
||||
|
||||
#[test]
|
||||
|
|
@ -56,6 +57,37 @@ fn test_parsing_with_logging() {
|
|||
"reduce sym:struct_item, child_count:3".to_string()
|
||||
)));
|
||||
assert!(messages.contains(&(LogType::Lex, "skip character:' '".to_string())));
|
||||
|
||||
for (_, m) in &messages {
|
||||
assert!(!m.contains("row:0"));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(unix)]
|
||||
fn test_parsing_with_debug_graph_enabled() {
|
||||
use std::io::{BufRead, BufReader, Seek};
|
||||
|
||||
let has_zero_indexed_row = |s: &str| s.contains("position: 0,");
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("javascript")).unwrap();
|
||||
|
||||
let mut debug_graph_file = tempfile::tempfile().unwrap();
|
||||
parser.print_dot_graphs(&debug_graph_file);
|
||||
parser.parse("const zero = 0", None).unwrap();
|
||||
|
||||
debug_graph_file.seek(std::io::SeekFrom::Start(0)).unwrap();
|
||||
let log_reader = BufReader::new(debug_graph_file)
|
||||
.lines()
|
||||
.map(|l| l.expect("Failed to read line from graph log"));
|
||||
for line in log_reader {
|
||||
assert!(
|
||||
!has_zero_indexed_row(&line),
|
||||
"Graph log output includes zero-indexed row: {}",
|
||||
line
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -269,84 +301,164 @@ fn test_parsing_on_multiple_threads() {
|
|||
assert_eq!(child_count_differences, &[1, 2, 3, 4]);
|
||||
}
|
||||
|
||||
// Operation limits
|
||||
#[test]
|
||||
fn test_parsing_cancelled_by_another_thread() {
|
||||
let cancellation_flag = Box::new(AtomicUsize::new(0));
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("javascript")).unwrap();
|
||||
unsafe { parser.set_cancellation_flag(Some(&cancellation_flag)) };
|
||||
|
||||
// Long input - parsing succeeds
|
||||
let tree = parser.parse_with(
|
||||
&mut |offset, _| {
|
||||
if offset == 0 {
|
||||
b" ["
|
||||
} else if offset >= 20000 {
|
||||
b""
|
||||
} else {
|
||||
b"0,"
|
||||
}
|
||||
},
|
||||
None,
|
||||
);
|
||||
assert!(tree.is_some());
|
||||
|
||||
let cancel_thread = thread::spawn(move || {
|
||||
thread::sleep(time::Duration::from_millis(100));
|
||||
cancellation_flag.store(1, Ordering::SeqCst);
|
||||
});
|
||||
|
||||
// Infinite input
|
||||
let tree = parser.parse_with(
|
||||
&mut |offset, _| {
|
||||
thread::yield_now();
|
||||
thread::sleep(time::Duration::from_millis(10));
|
||||
if offset == 0 {
|
||||
b" ["
|
||||
} else {
|
||||
b"0,"
|
||||
}
|
||||
},
|
||||
None,
|
||||
);
|
||||
|
||||
// Parsing returns None because it was cancelled.
|
||||
cancel_thread.join().unwrap();
|
||||
assert!(tree.is_none());
|
||||
}
|
||||
|
||||
// Timeouts
|
||||
|
||||
#[test]
|
||||
fn test_parsing_with_an_operation_limit() {
|
||||
fn test_parsing_with_a_timeout() {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("json")).unwrap();
|
||||
|
||||
// Start parsing from an infinite input. Parsing should abort after 5 "operations".
|
||||
parser.set_operation_limit(5);
|
||||
let mut call_count = 0;
|
||||
// Parse an infinitely-long array, but pause after 100 microseconds of processing.
|
||||
parser.set_timeout_micros(100);
|
||||
let start_time = time::Instant::now();
|
||||
let tree = parser.parse_with(
|
||||
&mut |_, _| {
|
||||
if call_count == 0 {
|
||||
call_count += 1;
|
||||
b"[0"
|
||||
&mut |offset, _| {
|
||||
if offset == 0 {
|
||||
b" ["
|
||||
} else {
|
||||
call_count += 1;
|
||||
b", 0"
|
||||
b",0"
|
||||
}
|
||||
},
|
||||
None,
|
||||
);
|
||||
assert!(tree.is_none());
|
||||
assert!(call_count >= 3);
|
||||
assert!(call_count <= 8);
|
||||
assert!(start_time.elapsed().as_micros() < 500);
|
||||
|
||||
// Resume parsing from the previous state.
|
||||
call_count = 0;
|
||||
parser.set_operation_limit(20);
|
||||
// Continue parsing, but pause after 300 microseconds of processing.
|
||||
parser.set_timeout_micros(1000);
|
||||
let start_time = time::Instant::now();
|
||||
let tree = parser.parse_with(
|
||||
&mut |offset, _| {
|
||||
if offset == 0 {
|
||||
b" ["
|
||||
} else {
|
||||
b",0"
|
||||
}
|
||||
},
|
||||
None,
|
||||
);
|
||||
assert!(tree.is_none());
|
||||
assert!(start_time.elapsed().as_micros() > 500);
|
||||
assert!(start_time.elapsed().as_micros() < 1500);
|
||||
|
||||
// Finish parsing
|
||||
parser.set_timeout_micros(0);
|
||||
let tree = parser
|
||||
.parse_with(
|
||||
&mut |_, _| {
|
||||
if call_count == 0 {
|
||||
call_count += 1;
|
||||
&mut |offset, _| {
|
||||
if offset > 5000 {
|
||||
b""
|
||||
} else if offset == 5000 {
|
||||
b"]"
|
||||
} else {
|
||||
b""
|
||||
b",0"
|
||||
}
|
||||
},
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
tree.root_node().to_sexp(),
|
||||
"(value (array (number) (number) (number)))"
|
||||
);
|
||||
assert_eq!(tree.root_node().child(0).unwrap().kind(), "array");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parsing_with_a_reset_after_reaching_an_operation_limit() {
|
||||
fn test_parsing_with_a_timeout_and_a_reset() {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("json")).unwrap();
|
||||
|
||||
parser.set_operation_limit(3);
|
||||
let tree = parser.parse("[1234, 5, 6, 7, 8]", None);
|
||||
parser.set_timeout_micros(30);
|
||||
let tree = parser.parse(
|
||||
"[\"ok\", 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]",
|
||||
None,
|
||||
);
|
||||
assert!(tree.is_none());
|
||||
|
||||
// Without calling reset, the parser continues from where it left off, so
|
||||
// it does not see the changes to the beginning of the source code.
|
||||
parser.set_operation_limit(usize::MAX);
|
||||
let tree = parser.parse("[null, 5, 6, 4, 5]", None).unwrap();
|
||||
parser.set_timeout_micros(0);
|
||||
let tree = parser.parse(
|
||||
"[null, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]",
|
||||
None,
|
||||
).unwrap();
|
||||
assert_eq!(
|
||||
tree.root_node().to_sexp(),
|
||||
"(value (array (number) (number) (number) (number) (number)))"
|
||||
tree.root_node()
|
||||
.named_child(0)
|
||||
.unwrap()
|
||||
.named_child(0)
|
||||
.unwrap()
|
||||
.kind(),
|
||||
"string"
|
||||
);
|
||||
|
||||
parser.set_operation_limit(3);
|
||||
let tree = parser.parse("[1234, 5, 6, 7, 8]", None);
|
||||
parser.set_timeout_micros(30);
|
||||
let tree = parser.parse(
|
||||
"[\"ok\", 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]",
|
||||
None,
|
||||
);
|
||||
assert!(tree.is_none());
|
||||
|
||||
// By calling reset, we force the parser to start over from scratch so
|
||||
// that it sees the changes to the beginning of the source code.
|
||||
parser.set_operation_limit(usize::MAX);
|
||||
parser.set_timeout_micros(0);
|
||||
parser.reset();
|
||||
let tree = parser.parse("[null, 5, 6, 4, 5]", None).unwrap();
|
||||
let tree = parser.parse(
|
||||
"[null, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]",
|
||||
None,
|
||||
).unwrap();
|
||||
assert_eq!(
|
||||
tree.root_node().to_sexp(),
|
||||
"(value (array (null) (number) (number) (number) (number)))"
|
||||
tree.root_node()
|
||||
.named_child(0)
|
||||
.unwrap()
|
||||
.named_child(0)
|
||||
.unwrap()
|
||||
.kind(),
|
||||
"null"
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -16,9 +16,10 @@ Tree-sitter is a parser generator tool and an incremental parsing library. It ca
|
|||
There are currently bindings that allow Tree-sitter to be used from the following languages:
|
||||
|
||||
* [JavaScript](https://github.com/tree-sitter/node-tree-sitter)
|
||||
* [Rust](https://github.com/tree-sitter/rust-tree-sitter)
|
||||
* [Haskell](https://github.com/tree-sitter/haskell-tree-sitter)
|
||||
* [Python](https://github.com/tree-sitter/py-tree-sitter)
|
||||
* [Rust](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding)
|
||||
* [Ruby](https://github.com/tree-sitter/ruby-tree-sitter)
|
||||
* [Haskell](https://github.com/tree-sitter/haskell-tree-sitter)
|
||||
|
||||
### Available Parsers
|
||||
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ Then run the the following command:
|
|||
tree-sitter generate
|
||||
```
|
||||
|
||||
This will generate the C code required to parse this trivial language, as well as all of the files needed to compile and load this native parser as a Node.js module. You can test this parser by creating a source file with the contents `hello;` and parsing it:
|
||||
This will generate the C code required to parse this trivial language, as well as all of the files needed to compile and load this native parser as a Node.js module. You can test this parser by creating a source file with the contents `hello` and parsing it:
|
||||
|
||||
```sh
|
||||
tree-sitter parse ./the-file
|
||||
|
|
@ -86,7 +86,7 @@ tree-sitter parse ./the-file
|
|||
This should print the following:
|
||||
|
||||
```
|
||||
(source_file [0, 0] - [0, 5])
|
||||
(source_file [1, 0] - [1, 5])
|
||||
```
|
||||
|
||||
You might notice that the first time you run `tree-sitter parse`, it takes a few seconds. This is because Tree-sitter automatically compiles your C code into a dynamically-loadable library. Whenever you make changes to your grammar, you can update the parser simply by re-running `tree-sitter generate`. When the parser changes, Tree-sitter will recompile it as needed.
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
name = "tree-sitter-highlight"
|
||||
description = "Library for performing syntax highlighting with Tree-sitter"
|
||||
version = "0.1.4"
|
||||
version = "0.1.5"
|
||||
authors = [
|
||||
"Max Brunsfeld <maxbrunsfeld@gmail.com>",
|
||||
"Tim Clem <timothy.clem@gmail.com>"
|
||||
|
|
@ -12,6 +12,9 @@ edition = "2018"
|
|||
keywords = ["incremental", "parsing", "syntax", "highlighting"]
|
||||
categories = ["parsing", "text-editors"]
|
||||
|
||||
[lib]
|
||||
crate-type = ["lib", "staticlib"]
|
||||
|
||||
[dependencies]
|
||||
regex = "1"
|
||||
serde = "1.0"
|
||||
|
|
|
|||
104
highlight/include/tree_sitter/highlight.h
Normal file
104
highlight/include/tree_sitter/highlight.h
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
#ifndef TREE_SITTER_HIGHLIGHT_H_
|
||||
#define TREE_SITTER_HIGHLIGHT_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef enum {
|
||||
TSHighlightOk,
|
||||
TSHighlightUnknownScope,
|
||||
TSHighlightTimeout,
|
||||
} TSHighlightError;
|
||||
|
||||
// The list of scopes which can be styled for syntax highlighting.
|
||||
// When constructing a `TSHighlighter`, you need to construct an
|
||||
// `attribute_strings` array whose elements correspond to these values.
|
||||
enum TSHighlightScope {
|
||||
TSHighlightScopeAttribute,
|
||||
TSHighlightScopeComment,
|
||||
TSHighlightScopeConstant,
|
||||
TSHighlightScopeConstantBuiltin,
|
||||
TSHighlightScopeConstructor,
|
||||
TSHighlightScopeConstructorBuiltin,
|
||||
TSHighlightScopeEmbedded,
|
||||
TSHighlightScopeEscape,
|
||||
TSHighlightScopeFunction,
|
||||
TSHighlightScopeFunctionBuiltin,
|
||||
TSHighlightScopeKeyword,
|
||||
TSHighlightScopeNumber,
|
||||
TSHighlightScopeOperator,
|
||||
TSHighlightScopeProperty,
|
||||
TSHighlightScopePropertyBuiltin,
|
||||
TSHighlightScopePunctuation,
|
||||
TSHighlightScopePunctuationBracket,
|
||||
TSHighlightScopePunctuationDelimiter,
|
||||
TSHighlightScopePunctuationSpecial,
|
||||
TSHighlightScopeString,
|
||||
TSHighlightScopeStringSpecial,
|
||||
TSHighlightScopeTag,
|
||||
TSHighlightScopeType,
|
||||
TSHighlightScopeTypeBuiltin,
|
||||
TSHighlightScopeVariable,
|
||||
TSHighlightScopeVariableBuiltin,
|
||||
TSHighlightScopeUnknown,
|
||||
};
|
||||
|
||||
typedef struct TSHighlighter TSHighlighter;
|
||||
typedef struct TSHighlightBuffer TSHighlightBuffer;
|
||||
|
||||
// Construct a `TSHighlighter` by providing a list of strings containing
|
||||
// the HTML attributes that should be applied for each highlight scope.
|
||||
TSHighlighter *ts_highlighter_new(
|
||||
const char **attribute_strings
|
||||
);
|
||||
|
||||
// Delete a syntax highlighter.
|
||||
void ts_highlighter_delete(TSHighlighter *);
|
||||
|
||||
// Add a `TSLanguage` to a highlighter. The language is associated with a
|
||||
// scope name, which can be used later to select a language for syntax
|
||||
// highlighting. Along with the language, you must provide a JSON string
|
||||
// containing the compiled PropertySheet to use for syntax highlighting
|
||||
// with that language. You can also optionally provide an 'injection regex',
|
||||
// which is used to detect when this language has been embedded in a document
|
||||
// written in a different language.
|
||||
int ts_highlighter_add_language(
|
||||
TSHighlighter *self,
|
||||
const char *scope_name,
|
||||
const TSLanguage *language,
|
||||
const char *property_sheet_json,
|
||||
const char *injection_regex
|
||||
);
|
||||
|
||||
// Compute syntax highlighting for a given document. You must first
|
||||
// create a `TSHighlightBuffer` to hold the output.
|
||||
int ts_highlighter_highlight(
|
||||
const TSHighlighter *self,
|
||||
const char *scope_name,
|
||||
const char *source_code,
|
||||
uint32_t source_code_len,
|
||||
TSHighlightBuffer *output,
|
||||
const size_t *cancellation_flag
|
||||
);
|
||||
|
||||
// TSHighlightBuffer: This struct stores the HTML output of syntax
|
||||
// highlighting. It can be reused for multiple highlighting calls.
|
||||
TSHighlightBuffer *ts_highlight_buffer_new();
|
||||
|
||||
// Delete a highlight buffer.
|
||||
void ts_highlight_buffer_delete(TSHighlightBuffer *);
|
||||
|
||||
// Access the HTML content of a highlight buffer.
|
||||
const uint8_t *ts_highlight_buffer_content(const TSHighlightBuffer *);
|
||||
const uint32_t *ts_highlight_buffer_line_offsets(const TSHighlightBuffer *);
|
||||
uint32_t ts_highlight_buffer_len(const TSHighlightBuffer *);
|
||||
uint32_t ts_highlight_buffer_line_count(const TSHighlightBuffer *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TREE_SITTER_HIGHLIGHT_H_
|
||||
266
highlight/src/c_lib.rs
Normal file
266
highlight/src/c_lib.rs
Normal file
|
|
@ -0,0 +1,266 @@
|
|||
use super::{escape, load_property_sheet, HighlightEvent, Highlighter, Properties, Scope};
|
||||
use regex::Regex;
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::CStr;
|
||||
use std::io::Write;
|
||||
use std::os::raw::c_char;
|
||||
use std::process::abort;
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
use std::{fmt, slice};
|
||||
use tree_sitter::{Language, PropertySheet};
|
||||
|
||||
struct LanguageConfiguration {
|
||||
language: Language,
|
||||
property_sheet: PropertySheet<Properties>,
|
||||
injection_regex: Option<Regex>,
|
||||
}
|
||||
|
||||
pub struct TSHighlighter {
|
||||
languages: HashMap<String, LanguageConfiguration>,
|
||||
attribute_strings: Vec<&'static [u8]>,
|
||||
}
|
||||
|
||||
pub struct TSHighlightBuffer {
|
||||
html: Vec<u8>,
|
||||
line_offsets: Vec<u32>,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
pub enum ErrorCode {
|
||||
Ok,
|
||||
UnknownScope,
|
||||
Timeout,
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlighter_new(
|
||||
attribute_strings: *const *const c_char,
|
||||
) -> *mut TSHighlighter {
|
||||
let attribute_strings =
|
||||
unsafe { slice::from_raw_parts(attribute_strings, Scope::Unknown as usize + 1) };
|
||||
let attribute_strings = attribute_strings
|
||||
.into_iter()
|
||||
.map(|s| {
|
||||
if s.is_null() {
|
||||
&[]
|
||||
} else {
|
||||
unsafe { CStr::from_ptr(*s).to_bytes() }
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
Box::into_raw(Box::new(TSHighlighter {
|
||||
languages: HashMap::new(),
|
||||
attribute_strings,
|
||||
}))
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlight_buffer_new() -> *mut TSHighlightBuffer {
|
||||
Box::into_raw(Box::new(TSHighlightBuffer {
|
||||
html: Vec::new(),
|
||||
line_offsets: Vec::new(),
|
||||
}))
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlighter_delete(this: *mut TSHighlighter) {
|
||||
drop(unsafe { Box::from_raw(this) })
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlight_buffer_delete(this: *mut TSHighlightBuffer) {
|
||||
drop(unsafe { Box::from_raw(this) })
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlight_buffer_content(this: *const TSHighlightBuffer) -> *const u8 {
|
||||
let this = unwrap_ptr(this);
|
||||
this.html.as_slice().as_ptr()
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlight_buffer_line_offsets(this: *const TSHighlightBuffer) -> *const u32 {
|
||||
let this = unwrap_ptr(this);
|
||||
this.line_offsets.as_slice().as_ptr()
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlight_buffer_len(this: *const TSHighlightBuffer) -> u32 {
|
||||
let this = unwrap_ptr(this);
|
||||
this.html.len() as u32
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlight_buffer_line_count(this: *const TSHighlightBuffer) -> u32 {
|
||||
let this = unwrap_ptr(this);
|
||||
this.line_offsets.len() as u32
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlighter_add_language(
|
||||
this: *mut TSHighlighter,
|
||||
scope_name: *const c_char,
|
||||
language: Language,
|
||||
property_sheet_json: *const c_char,
|
||||
injection_regex: *const c_char,
|
||||
) -> ErrorCode {
|
||||
let this = unwrap_mut_ptr(this);
|
||||
let scope_name = unsafe { CStr::from_ptr(scope_name) };
|
||||
let scope_name = unwrap(scope_name.to_str()).to_string();
|
||||
let property_sheet_json = unsafe { CStr::from_ptr(property_sheet_json) };
|
||||
let property_sheet_json = unwrap(property_sheet_json.to_str());
|
||||
|
||||
let property_sheet = unwrap(load_property_sheet(language, property_sheet_json));
|
||||
let injection_regex = if injection_regex.is_null() {
|
||||
None
|
||||
} else {
|
||||
let pattern = unsafe { CStr::from_ptr(injection_regex) };
|
||||
Some(unwrap(Regex::new(unwrap(pattern.to_str()))))
|
||||
};
|
||||
|
||||
this.languages.insert(
|
||||
scope_name,
|
||||
LanguageConfiguration {
|
||||
language,
|
||||
property_sheet,
|
||||
injection_regex,
|
||||
},
|
||||
);
|
||||
|
||||
ErrorCode::Ok
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlighter_highlight(
|
||||
this: *const TSHighlighter,
|
||||
scope_name: *const c_char,
|
||||
source_code: *const c_char,
|
||||
source_code_len: u32,
|
||||
output: *mut TSHighlightBuffer,
|
||||
cancellation_flag: *const AtomicUsize,
|
||||
) -> ErrorCode {
|
||||
let this = unwrap_ptr(this);
|
||||
let output = unwrap_mut_ptr(output);
|
||||
let scope_name = unwrap(unsafe { CStr::from_ptr(scope_name).to_str() });
|
||||
let source_code =
|
||||
unsafe { slice::from_raw_parts(source_code as *const u8, source_code_len as usize) };
|
||||
let cancellation_flag = unsafe { cancellation_flag.as_ref() };
|
||||
this.highlight(source_code, scope_name, output, cancellation_flag)
|
||||
}
|
||||
|
||||
impl TSHighlighter {
|
||||
fn highlight(
|
||||
&self,
|
||||
source_code: &[u8],
|
||||
scope_name: &str,
|
||||
output: &mut TSHighlightBuffer,
|
||||
cancellation_flag: Option<&AtomicUsize>,
|
||||
) -> ErrorCode {
|
||||
let configuration = self.languages.get(scope_name);
|
||||
if configuration.is_none() {
|
||||
return ErrorCode::UnknownScope;
|
||||
}
|
||||
let configuration = configuration.unwrap();
|
||||
let languages = &self.languages;
|
||||
|
||||
let highlighter = Highlighter::new(
|
||||
source_code,
|
||||
configuration.language,
|
||||
&configuration.property_sheet,
|
||||
|injection_string| {
|
||||
languages.values().find_map(|conf| {
|
||||
conf.injection_regex.as_ref().and_then(|regex| {
|
||||
if regex.is_match(injection_string) {
|
||||
Some((conf.language, &conf.property_sheet))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
})
|
||||
},
|
||||
cancellation_flag,
|
||||
);
|
||||
|
||||
if let Ok(highlighter) = highlighter {
|
||||
output.html.clear();
|
||||
output.line_offsets.clear();
|
||||
output.line_offsets.push(0);
|
||||
let mut scopes = Vec::new();
|
||||
for event in highlighter {
|
||||
match event {
|
||||
HighlightEvent::ScopeStart(s) => {
|
||||
scopes.push(s);
|
||||
output.start_scope(s, &self.attribute_strings);
|
||||
}
|
||||
HighlightEvent::ScopeEnd => {
|
||||
scopes.pop();
|
||||
output.end_scope();
|
||||
}
|
||||
HighlightEvent::Source(src) => {
|
||||
output.add_text(src, &scopes, &self.attribute_strings);
|
||||
}
|
||||
};
|
||||
}
|
||||
ErrorCode::Ok
|
||||
} else {
|
||||
ErrorCode::Timeout
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TSHighlightBuffer {
|
||||
fn start_scope(&mut self, s: Scope, attribute_strings: &[&[u8]]) {
|
||||
let attribute_string = attribute_strings[s as usize];
|
||||
self.html.extend(b"<span");
|
||||
if !attribute_string.is_empty() {
|
||||
self.html.extend(b" ");
|
||||
self.html.extend(attribute_string);
|
||||
}
|
||||
self.html.extend(b">");
|
||||
}
|
||||
|
||||
fn end_scope(&mut self) {
|
||||
self.html.extend(b"</span>");
|
||||
}
|
||||
|
||||
fn finish_line(&mut self) {
|
||||
self.line_offsets.push(self.html.len() as u32);
|
||||
}
|
||||
|
||||
fn add_text(&mut self, src: &str, scopes: &Vec<Scope>, attribute_strings: &[&[u8]]) {
|
||||
let mut multiline = false;
|
||||
for line in src.split('\n') {
|
||||
let line = line.trim_end_matches('\r');
|
||||
if multiline {
|
||||
scopes.iter().for_each(|_| self.end_scope());
|
||||
self.finish_line();
|
||||
scopes
|
||||
.iter()
|
||||
.for_each(|scope| self.start_scope(*scope, attribute_strings));
|
||||
}
|
||||
write!(&mut self.html, "{}", escape::Escape(line)).unwrap();
|
||||
multiline = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn unwrap_ptr<'a, T>(result: *const T) -> &'a T {
|
||||
unsafe { result.as_ref() }.unwrap_or_else(|| {
|
||||
eprintln!("{}:{} - pointer must not be null", file!(), line!());
|
||||
abort();
|
||||
})
|
||||
}
|
||||
|
||||
fn unwrap_mut_ptr<'a, T>(result: *mut T) -> &'a mut T {
|
||||
unsafe { result.as_mut() }.unwrap_or_else(|| {
|
||||
eprintln!("{}:{} - pointer must not be null", file!(), line!());
|
||||
abort();
|
||||
})
|
||||
}
|
||||
|
||||
fn unwrap<T, E: fmt::Display>(result: Result<T, E>) -> T {
|
||||
result.unwrap_or_else(|error| {
|
||||
eprintln!("tree-sitter highlight error: {}", error);
|
||||
abort();
|
||||
})
|
||||
}
|
||||
|
|
@ -1,14 +1,17 @@
|
|||
pub mod c_lib;
|
||||
mod escape;
|
||||
|
||||
pub use c_lib as c;
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
use serde_derive::*;
|
||||
use std::cmp;
|
||||
use std::fmt::{self, Write};
|
||||
use std::mem::transmute;
|
||||
use std::str;
|
||||
use std::usize;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::{cmp, str, usize};
|
||||
use tree_sitter::{Language, Node, Parser, Point, PropertySheet, Range, Tree, TreePropertyCursor};
|
||||
|
||||
const CANCELLATION_CHECK_INTERVAL: usize = 100;
|
||||
|
||||
#[derive(Debug)]
|
||||
enum TreeStep {
|
||||
Child {
|
||||
|
|
@ -78,6 +81,7 @@ struct Layer<'a> {
|
|||
cursor: TreePropertyCursor<'a, Properties>,
|
||||
ranges: Vec<Range>,
|
||||
at_node_end: bool,
|
||||
depth: usize,
|
||||
}
|
||||
|
||||
struct Highlighter<'a, T>
|
||||
|
|
@ -90,6 +94,8 @@ where
|
|||
parser: Parser,
|
||||
layers: Vec<Layer<'a>>,
|
||||
utf8_error_len: Option<usize>,
|
||||
operation_count: usize,
|
||||
cancellation_flag: Option<&'a AtomicUsize>,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
|
|
@ -151,6 +157,28 @@ pub enum PropertySheetError {
|
|||
InvalidFormat(String),
|
||||
}
|
||||
|
||||
impl fmt::Display for PropertySheetError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
PropertySheetError::InvalidJSON(e) => e.fmt(f),
|
||||
PropertySheetError::InvalidRegex(e) => e.fmt(f),
|
||||
PropertySheetError::InvalidFormat(e) => e.fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> fmt::Debug for Layer<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"Layer {{ at_node_end: {}, node: {:?} }}",
|
||||
self.at_node_end,
|
||||
self.cursor.node()
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn load_property_sheet(
|
||||
language: Language,
|
||||
json: &str,
|
||||
|
|
@ -354,17 +382,22 @@ where
|
|||
language: Language,
|
||||
property_sheet: &'a PropertySheet<Properties>,
|
||||
injection_callback: F,
|
||||
cancellation_flag: Option<&'a AtomicUsize>,
|
||||
) -> Result<Self, String> {
|
||||
let mut parser = Parser::new();
|
||||
unsafe { parser.set_cancellation_flag(cancellation_flag.clone()) };
|
||||
parser.set_language(language)?;
|
||||
let tree = parser
|
||||
.parse(source, None)
|
||||
.ok_or_else(|| format!("Tree-sitter: failed to parse"))?;
|
||||
Ok(Self {
|
||||
injection_callback,
|
||||
source,
|
||||
source_offset: 0,
|
||||
parser,
|
||||
source,
|
||||
cancellation_flag,
|
||||
injection_callback,
|
||||
source_offset: 0,
|
||||
operation_count: 0,
|
||||
utf8_error_len: None,
|
||||
layers: vec![Layer::new(
|
||||
source,
|
||||
tree,
|
||||
|
|
@ -375,8 +408,8 @@ where
|
|||
start_point: Point::new(0, 0),
|
||||
end_point: Point::new(usize::MAX, usize::MAX),
|
||||
}],
|
||||
0,
|
||||
)],
|
||||
utf8_error_len: None,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -554,7 +587,7 @@ where
|
|||
result
|
||||
}
|
||||
|
||||
fn add_layer(&mut self, language_string: &str, ranges: Vec<Range>) {
|
||||
fn add_layer(&mut self, language_string: &str, ranges: Vec<Range>, depth: usize) {
|
||||
if let Some((language, property_sheet)) = (self.injection_callback)(language_string) {
|
||||
self.parser
|
||||
.set_language(language)
|
||||
|
|
@ -564,7 +597,7 @@ where
|
|||
.parser
|
||||
.parse(self.source, None)
|
||||
.expect("Failed to parse");
|
||||
let layer = Layer::new(self.source, tree, property_sheet, ranges);
|
||||
let layer = Layer::new(self.source, tree, property_sheet, ranges, depth);
|
||||
match self.layers.binary_search_by(|l| l.cmp(&layer)) {
|
||||
Ok(i) | Err(i) => self.layers.insert(i, layer),
|
||||
};
|
||||
|
|
@ -579,6 +612,16 @@ where
|
|||
type Item = HighlightEvent<'a>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if let Some(cancellation_flag) = self.cancellation_flag {
|
||||
self.operation_count += 1;
|
||||
if self.operation_count >= CANCELLATION_CHECK_INTERVAL {
|
||||
self.operation_count = 0;
|
||||
if cancellation_flag.load(Ordering::Relaxed) != 0 {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(utf8_error_len) = self.utf8_error_len.take() {
|
||||
self.source_offset += utf8_error_len;
|
||||
return Some(HighlightEvent::Source("\u{FFFD}"));
|
||||
|
|
@ -606,8 +649,9 @@ where
|
|||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let depth = first_layer.depth + 1;
|
||||
for (language, ranges) in injections {
|
||||
self.add_layer(&language, ranges);
|
||||
self.add_layer(&language, ranges, depth);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -636,7 +680,13 @@ where
|
|||
// to re-sort the layers. If the cursor is already at the end of its syntax tree,
|
||||
// remove it.
|
||||
if self.layers[0].advance() {
|
||||
self.layers.sort_unstable_by(|a, b| a.cmp(&b));
|
||||
let mut index = 0;
|
||||
while self.layers.get(index + 1).map_or(false, |next| {
|
||||
self.layers[index].cmp(next) == cmp::Ordering::Greater
|
||||
}) {
|
||||
self.layers.swap(index, index + 1);
|
||||
index += 1;
|
||||
}
|
||||
} else {
|
||||
self.layers.remove(0);
|
||||
}
|
||||
|
|
@ -685,6 +735,7 @@ impl<'a> Layer<'a> {
|
|||
tree: Tree,
|
||||
sheet: &'a PropertySheet<Properties>,
|
||||
ranges: Vec<Range>,
|
||||
depth: usize,
|
||||
) -> Self {
|
||||
// The cursor's lifetime parameter indicates that the tree must outlive the cursor.
|
||||
// But because the tree is really a pointer to the heap, the cursor can remain
|
||||
|
|
@ -695,6 +746,7 @@ impl<'a> Layer<'a> {
|
|||
_tree: tree,
|
||||
cursor,
|
||||
ranges,
|
||||
depth,
|
||||
at_node_end: false,
|
||||
}
|
||||
}
|
||||
|
|
@ -706,6 +758,7 @@ impl<'a> Layer<'a> {
|
|||
self.offset()
|
||||
.cmp(&other.offset())
|
||||
.then_with(|| other.at_node_end.cmp(&self.at_node_end))
|
||||
.then_with(|| self.depth.cmp(&other.depth))
|
||||
}
|
||||
|
||||
fn offset(&self) -> usize {
|
||||
|
|
@ -816,7 +869,7 @@ pub fn highlight<'a, F>(
|
|||
where
|
||||
F: Fn(&str) -> Option<(Language, &'a PropertySheet<Properties>)> + 'a,
|
||||
{
|
||||
Highlighter::new(source, language, property_sheet, injection_callback)
|
||||
Highlighter::new(source, language, property_sheet, injection_callback, None)
|
||||
}
|
||||
|
||||
pub fn highlight_html<'a, F1, F2>(
|
||||
|
|
@ -830,7 +883,7 @@ where
|
|||
F1: Fn(&str) -> Option<(Language, &'a PropertySheet<Properties>)>,
|
||||
F2: Fn(Scope) -> &'a str,
|
||||
{
|
||||
let highlighter = Highlighter::new(source, language, property_sheet, injection_callback)?;
|
||||
let highlighter = Highlighter::new(source, language, property_sheet, injection_callback, None)?;
|
||||
let mut renderer = HtmlRenderer::new(attribute_callback);
|
||||
let mut scopes = Vec::new();
|
||||
for event in highlighter {
|
||||
|
|
|
|||
|
|
@ -138,16 +138,16 @@ extern "C" {
|
|||
) -> *mut TSTree;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_parser_enabled(arg1: *const TSParser) -> bool;
|
||||
pub fn ts_parser_cancellation_flag(arg1: *const TSParser) -> *const usize;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_parser_set_enabled(arg1: *mut TSParser, arg2: bool);
|
||||
pub fn ts_parser_set_cancellation_flag(arg1: *mut TSParser, arg2: *const usize);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_parser_operation_limit(arg1: *const TSParser) -> usize;
|
||||
pub fn ts_parser_timeout_micros(arg1: *const TSParser) -> u64;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_parser_set_operation_limit(arg1: *mut TSParser, arg2: usize);
|
||||
pub fn ts_parser_set_timeout_micros(arg1: *mut TSParser, arg2: u64);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_parser_reset(arg1: *mut TSParser);
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ use std::collections::HashMap;
|
|||
use std::ffi::CStr;
|
||||
use std::marker::PhantomData;
|
||||
use std::os::raw::{c_char, c_void};
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
use std::{fmt, ptr, slice, str, u16};
|
||||
|
||||
pub const LANGUAGE_VERSION: usize = ffi::TREE_SITTER_LANGUAGE_VERSION;
|
||||
|
|
@ -348,8 +349,12 @@ impl Parser {
|
|||
unsafe { ffi::ts_parser_reset(self.0) }
|
||||
}
|
||||
|
||||
pub fn set_operation_limit(&mut self, limit: usize) {
|
||||
unsafe { ffi::ts_parser_set_operation_limit(self.0, limit) }
|
||||
pub fn timeout_micros(&self) -> u64 {
|
||||
unsafe { ffi::ts_parser_timeout_micros(self.0) }
|
||||
}
|
||||
|
||||
pub fn set_timeout_micros(&mut self, timeout_micros: u64) {
|
||||
unsafe { ffi::ts_parser_set_timeout_micros(self.0, timeout_micros) }
|
||||
}
|
||||
|
||||
pub fn set_included_ranges(&mut self, ranges: &[Range]) {
|
||||
|
|
@ -359,6 +364,18 @@ impl Parser {
|
|||
ffi::ts_parser_set_included_ranges(self.0, ts_ranges.as_ptr(), ts_ranges.len() as u32)
|
||||
};
|
||||
}
|
||||
|
||||
pub unsafe fn cancellation_flag(&self) -> Option<&AtomicUsize> {
|
||||
(ffi::ts_parser_cancellation_flag(self.0) as *const AtomicUsize).as_ref()
|
||||
}
|
||||
|
||||
pub unsafe fn set_cancellation_flag(&self, flag: Option<&AtomicUsize>) {
|
||||
if let Some(flag) = flag {
|
||||
ffi::ts_parser_set_cancellation_flag(self.0, flag as *const AtomicUsize as *const usize);
|
||||
} else {
|
||||
ffi::ts_parser_set_cancellation_flag(self.0, ptr::null());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for Parser {
|
||||
|
|
@ -511,10 +528,11 @@ impl<'tree> Node<'tree> {
|
|||
unsafe { ffi::ts_node_child_count(self.0) as usize }
|
||||
}
|
||||
|
||||
pub fn children<'a>(&'a self) -> impl Iterator<Item = Node<'tree>> + 'a {
|
||||
pub fn children(&self) -> impl Iterator<Item = Node<'tree>> {
|
||||
let me = self.clone();
|
||||
(0..self.child_count())
|
||||
.into_iter()
|
||||
.map(move |i| self.child(i).unwrap())
|
||||
.map(move |i| me.child(i).unwrap())
|
||||
}
|
||||
|
||||
pub fn named_child<'a>(&'a self, i: usize) -> Option<Self> {
|
||||
|
|
|
|||
|
|
@ -79,7 +79,7 @@ typedef struct {
|
|||
uint32_t context[2];
|
||||
} TSTreeCursor;
|
||||
|
||||
TSParser *ts_parser_new();
|
||||
TSParser *ts_parser_new(void);
|
||||
void ts_parser_delete(TSParser *);
|
||||
const TSLanguage *ts_parser_language(const TSParser *);
|
||||
bool ts_parser_set_language(TSParser *, const TSLanguage *);
|
||||
|
|
@ -90,10 +90,10 @@ void ts_parser_halt_on_error(TSParser *, bool);
|
|||
TSTree *ts_parser_parse(TSParser *, const TSTree *, TSInput);
|
||||
TSTree *ts_parser_parse_string(TSParser *, const TSTree *, const char *, uint32_t);
|
||||
TSTree *ts_parser_parse_string_encoding(TSParser *, const TSTree *, const char *, uint32_t, TSInputEncoding);
|
||||
bool ts_parser_enabled(const TSParser *);
|
||||
void ts_parser_set_enabled(TSParser *, bool);
|
||||
size_t ts_parser_operation_limit(const TSParser *);
|
||||
void ts_parser_set_operation_limit(TSParser *, size_t);
|
||||
const size_t *ts_parser_cancellation_flag(const TSParser *);
|
||||
void ts_parser_set_cancellation_flag(TSParser *, const size_t *);
|
||||
uint64_t ts_parser_timeout_micros(const TSParser *);
|
||||
void ts_parser_set_timeout_micros(TSParser *, uint64_t);
|
||||
void ts_parser_reset(TSParser *);
|
||||
void ts_parser_set_included_ranges(TSParser *, const TSRange *, uint32_t);
|
||||
const TSRange *ts_parser_included_ranges(const TSParser *, uint32_t *);
|
||||
|
|
|
|||
|
|
@ -104,7 +104,7 @@ struct TSLanguage {
|
|||
struct {
|
||||
const bool *states;
|
||||
const TSSymbol *symbol_map;
|
||||
void *(*create)();
|
||||
void *(*create)(void);
|
||||
void (*destroy)(void *);
|
||||
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
|
||||
unsigned (*serialize)(void *, char *);
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ void *ts_record_malloc(size_t);
|
|||
void *ts_record_calloc(size_t, size_t);
|
||||
void *ts_record_realloc(void *, size_t);
|
||||
void ts_record_free(void *);
|
||||
bool ts_record_allocations_toggle(bool);
|
||||
bool ts_toggle_allocation_recording(bool);
|
||||
|
||||
static inline void *ts_malloc(size_t size) {
|
||||
return ts_record_malloc(size);
|
||||
|
|
@ -33,10 +33,6 @@ static inline void ts_free(void *buffer) {
|
|||
ts_record_free(buffer);
|
||||
}
|
||||
|
||||
static inline bool ts_toggle_allocation_recording(bool value) {
|
||||
return ts_record_allocations_toggle(value);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#include <stdlib.h>
|
||||
|
|
|
|||
|
|
@ -7,6 +7,10 @@
|
|||
|
||||
#include <windows.h>
|
||||
|
||||
static inline size_t atomic_load(const volatile size_t *p) {
|
||||
return *p;
|
||||
}
|
||||
|
||||
static inline uint32_t atomic_inc(volatile uint32_t *p) {
|
||||
return InterlockedIncrement(p);
|
||||
}
|
||||
|
|
@ -17,6 +21,10 @@ static inline uint32_t atomic_dec(volatile uint32_t *p) {
|
|||
|
||||
#else
|
||||
|
||||
static inline size_t atomic_load(const volatile size_t *p) {
|
||||
return __atomic_load_n(p, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
static inline uint32_t atomic_inc(volatile uint32_t *p) {
|
||||
return __sync_add_and_fetch(p, 1u);
|
||||
}
|
||||
|
|
|
|||
141
lib/src/clock.h
Normal file
141
lib/src/clock.h
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
#ifndef TREE_SITTER_CLOCK_H_
|
||||
#define TREE_SITTER_CLOCK_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef uint64_t TSDuration;
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
// Windows:
|
||||
// * Represent a time as a performance counter value.
|
||||
// * Represent a duration as a number of performance counter ticks.
|
||||
|
||||
#include <windows.h>
|
||||
typedef uint64_t TSClock;
|
||||
|
||||
static inline TSDuration duration_from_micros(uint64_t micros) {
|
||||
LARGE_INTEGER frequency;
|
||||
QueryPerformanceFrequency(&frequency);
|
||||
return micros * (uint64_t)frequency.QuadPart / 1000000;
|
||||
}
|
||||
|
||||
static inline uint64_t duration_to_micros(TSDuration self) {
|
||||
LARGE_INTEGER frequency;
|
||||
QueryPerformanceFrequency(&frequency);
|
||||
return self * 1000000 / (uint64_t)frequency.QuadPart;
|
||||
}
|
||||
|
||||
static inline TSClock clock_null(void) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline TSClock clock_now(void) {
|
||||
LARGE_INTEGER result;
|
||||
QueryPerformanceCounter(&result);
|
||||
return (uint64_t)result.QuadPart;
|
||||
}
|
||||
|
||||
static inline TSClock clock_after(TSClock base, TSDuration duration) {
|
||||
return base + duration;
|
||||
}
|
||||
|
||||
static inline bool clock_is_null(TSClock self) {
|
||||
return !self;
|
||||
}
|
||||
|
||||
static inline bool clock_is_gt(TSClock self, TSClock other) {
|
||||
return self > other;
|
||||
}
|
||||
|
||||
#elif defined(CLOCK_MONOTONIC)
|
||||
|
||||
// POSIX with monotonic clock support (Linux, macOS >= 10.12)
|
||||
// * Represent a time as a monotonic (seconds, nanoseconds) pair.
|
||||
// * Represent a duration as a number of microseconds.
|
||||
//
|
||||
// On these platforms, parse timeouts will correspond accurately to
|
||||
// real time, regardless of what other processes are running.
|
||||
|
||||
#include <time.h>
|
||||
typedef struct timespec TSClock;
|
||||
|
||||
static inline TSDuration duration_from_micros(uint64_t micros) {
|
||||
return micros;
|
||||
}
|
||||
|
||||
static inline uint64_t duration_to_micros(TSDuration self) {
|
||||
return self;
|
||||
}
|
||||
|
||||
static inline TSClock clock_now(void) {
|
||||
TSClock result;
|
||||
clock_gettime(CLOCK_MONOTONIC, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline TSClock clock_null(void) {
|
||||
return (TSClock) {0, 0};
|
||||
}
|
||||
|
||||
static inline TSClock clock_after(TSClock base, TSDuration duration) {
|
||||
TSClock result = base;
|
||||
result.tv_sec += duration / 1000000;
|
||||
result.tv_nsec += (duration % 1000000) * 1000;
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline bool clock_is_null(TSClock self) {
|
||||
return !self.tv_sec;
|
||||
}
|
||||
|
||||
static inline bool clock_is_gt(TSClock self, TSClock other) {
|
||||
if (self.tv_sec > other.tv_sec) return true;
|
||||
if (self.tv_sec < other.tv_sec) return false;
|
||||
return self.tv_nsec > other.tv_nsec;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// POSIX without monotonic clock support
|
||||
// * Represent a time as a process clock value.
|
||||
// * Represent a duration as a number of process clock ticks.
|
||||
//
|
||||
// On these platforms, parse timeouts may be affected by other processes,
|
||||
// which is not ideal, but is better than using a non-monotonic time API
|
||||
// like `gettimeofday`.
|
||||
|
||||
#include <time.h>
|
||||
typedef uint64_t TSClock;
|
||||
|
||||
static inline TSDuration duration_from_micros(uint64_t micros) {
|
||||
return micros * (uint64_t)CLOCKS_PER_SEC / 1000000;
|
||||
}
|
||||
|
||||
static inline uint64_t duration_to_micros(TSDuration self) {
|
||||
return self * 1000000 / (uint64_t)CLOCKS_PER_SEC;
|
||||
}
|
||||
|
||||
static inline TSClock clock_null(void) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline TSClock clock_now(void) {
|
||||
return (uint64_t)clock();
|
||||
}
|
||||
|
||||
static inline TSClock clock_after(TSClock base, TSDuration duration) {
|
||||
return base + duration;
|
||||
}
|
||||
|
||||
static inline bool clock_is_null(TSClock self) {
|
||||
return !self;
|
||||
}
|
||||
|
||||
static inline bool clock_is_gt(TSClock self, TSClock other) {
|
||||
return self > other;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif // TREE_SITTER_CLOCK_H_
|
||||
|
|
@ -326,13 +326,13 @@ static inline void iterator_print_state(Iterator *self) {
|
|||
TreeCursorEntry entry = *array_back(&self->cursor.stack);
|
||||
TSPoint start = iterator_start_position(self).extent;
|
||||
TSPoint end = iterator_end_position(self).extent;
|
||||
const char *name = ts_language_symbol_name(self->language, entry.subtree->symbol);
|
||||
const char *name = ts_language_symbol_name(self->language, ts_subtree_symbol(*entry.subtree));
|
||||
printf(
|
||||
"(%-25s %s\t depth:%u [%u, %u] - [%u, %u])",
|
||||
name, self->in_padding ? "(p)" : " ",
|
||||
self->visible_depth,
|
||||
start.row, start.column,
|
||||
end.row, end.column
|
||||
start.row + 1, start.column,
|
||||
end.row + 1, end.column
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
|
@ -361,7 +361,7 @@ unsigned ts_subtree_get_changed_ranges(const Subtree *old_tree, const Subtree *n
|
|||
|
||||
do {
|
||||
#ifdef DEBUG_GET_CHANGED_RANGES
|
||||
printf("At [%-2u, %-2u] Compare ", position.extent.row, position.extent.column);
|
||||
printf("At [%-2u, %-2u] Compare ", position.extent.row + 1, position.extent.column);
|
||||
iterator_print_state(&old_iter);
|
||||
printf("\tvs\t");
|
||||
iterator_print_state(&new_iter);
|
||||
|
|
@ -443,8 +443,8 @@ unsigned ts_subtree_get_changed_ranges(const Subtree *old_tree, const Subtree *n
|
|||
#ifdef DEBUG_GET_CHANGED_RANGES
|
||||
printf(
|
||||
" change: [[%u, %u] - [%u, %u]]\n",
|
||||
position.extent.row, position.extent.column,
|
||||
next_position.extent.row, next_position.extent.column
|
||||
position.extent.row + 1, position.extent.column,
|
||||
next_position.extent.row + 1, next_position.extent.column
|
||||
);
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ static inline Length length_sub(Length len1, Length len2) {
|
|||
return result;
|
||||
}
|
||||
|
||||
static inline Length length_zero() {
|
||||
static inline Length length_zero(void) {
|
||||
Length result = {0, {0, 0}};
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
// - include
|
||||
// - utf8proc
|
||||
|
||||
#define _POSIX_SOURCE
|
||||
#define _POSIX_C_SOURCE 200112L
|
||||
#define UTF8PROC_STATIC
|
||||
|
||||
#include "./get_changed_ranges.c"
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ TSNode ts_node_new(const TSTree *tree, const Subtree *subtree, Length position,
|
|||
};
|
||||
}
|
||||
|
||||
static inline TSNode ts_node__null() {
|
||||
static inline TSNode ts_node__null(void) {
|
||||
return ts_node_new(NULL, NULL, length_zero(), 0);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,19 +1,22 @@
|
|||
#include <time.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <limits.h>
|
||||
#include <stdbool.h>
|
||||
#include "tree_sitter/api.h"
|
||||
#include "./subtree.h"
|
||||
#include "./lexer.h"
|
||||
#include "./length.h"
|
||||
#include "./array.h"
|
||||
#include "./language.h"
|
||||
#include "./alloc.h"
|
||||
#include "./stack.h"
|
||||
#include "./reusable_node.h"
|
||||
#include "./reduce_action.h"
|
||||
#include "./array.h"
|
||||
#include "./atomic.h"
|
||||
#include "./clock.h"
|
||||
#include "./error_costs.h"
|
||||
#include "./get_changed_ranges.h"
|
||||
#include "./language.h"
|
||||
#include "./length.h"
|
||||
#include "./lexer.h"
|
||||
#include "./reduce_action.h"
|
||||
#include "./reusable_node.h"
|
||||
#include "./stack.h"
|
||||
#include "./subtree.h"
|
||||
#include "./tree.h"
|
||||
|
||||
#define LOG(...) \
|
||||
|
|
@ -42,6 +45,7 @@ static const unsigned MAX_VERSION_COUNT = 6;
|
|||
static const unsigned MAX_VERSION_COUNT_OVERFLOW = 4;
|
||||
static const unsigned MAX_SUMMARY_DEPTH = 16;
|
||||
static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE;
|
||||
static const unsigned OP_COUNT_PER_TIMEOUT_CHECK = 100;
|
||||
|
||||
typedef struct {
|
||||
Subtree token;
|
||||
|
|
@ -62,9 +66,11 @@ struct TSParser {
|
|||
ReusableNode reusable_node;
|
||||
void *external_scanner_payload;
|
||||
FILE *dot_graph_file;
|
||||
TSClock end_clock;
|
||||
TSDuration timeout_duration;
|
||||
unsigned accept_count;
|
||||
size_t operation_limit;
|
||||
volatile bool enabled;
|
||||
unsigned operation_count;
|
||||
const volatile size_t *cancellation_flag;
|
||||
bool halt_on_error;
|
||||
Subtree old_tree;
|
||||
TSRangeArray included_range_differences;
|
||||
|
|
@ -327,7 +333,7 @@ static Subtree ts_parser__lex(TSParser *self, StackVersion version, TSStateId pa
|
|||
LOG(
|
||||
"lex_external state:%d, row:%u, column:%u",
|
||||
lex_mode.external_lex_state,
|
||||
current_position.extent.row,
|
||||
current_position.extent.row + 1,
|
||||
current_position.extent.column
|
||||
);
|
||||
ts_lexer_start(&self->lexer);
|
||||
|
|
@ -365,7 +371,7 @@ static Subtree ts_parser__lex(TSParser *self, StackVersion version, TSStateId pa
|
|||
LOG(
|
||||
"lex_internal state:%d, row:%u, column:%u",
|
||||
lex_mode.lex_state,
|
||||
current_position.extent.row,
|
||||
current_position.extent.row + 1,
|
||||
current_position.extent.column
|
||||
);
|
||||
ts_lexer_start(&self->lexer);
|
||||
|
|
@ -1242,7 +1248,11 @@ static void ts_parser__recover(TSParser *self, StackVersion version, Subtree loo
|
|||
}
|
||||
}
|
||||
|
||||
static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_node_reuse) {
|
||||
static bool ts_parser__advance(
|
||||
TSParser *self,
|
||||
StackVersion version,
|
||||
bool allow_node_reuse
|
||||
) {
|
||||
TSStateId state = ts_stack_state(self->stack, version);
|
||||
uint32_t position = ts_stack_position(self->stack, version).bytes;
|
||||
Subtree last_external_token = ts_stack_last_external_token(self->stack, version);
|
||||
|
|
@ -1274,6 +1284,17 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_
|
|||
}
|
||||
|
||||
for (;;) {
|
||||
if (++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) {
|
||||
self->operation_count = 0;
|
||||
if (
|
||||
(self->cancellation_flag && atomic_load(self->cancellation_flag)) ||
|
||||
(!clock_is_null(self->end_clock) && clock_is_gt(clock_now(), self->end_clock))
|
||||
) {
|
||||
ts_subtree_release(&self->tree_pool, lookahead);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
StackVersion last_reduction_version = STACK_VERSION_NONE;
|
||||
|
||||
for (uint32_t i = 0; i < table_entry.action_count; i++) {
|
||||
|
|
@ -1302,7 +1323,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_
|
|||
|
||||
ts_parser__shift(self, version, next_state, lookahead, action.params.extra);
|
||||
if (did_reuse) reusable_node_advance(&self->reusable_node);
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
case TSParseActionTypeReduce: {
|
||||
|
|
@ -1322,7 +1343,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_
|
|||
case TSParseActionTypeAccept: {
|
||||
LOG("accept");
|
||||
ts_parser__accept(self, version, lookahead);
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
case TSParseActionTypeRecover: {
|
||||
|
|
@ -1332,7 +1353,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_
|
|||
|
||||
ts_parser__recover(self, version, lookahead);
|
||||
if (did_reuse) reusable_node_advance(&self->reusable_node);
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1371,7 +1392,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_
|
|||
|
||||
if (state == ERROR_STATE) {
|
||||
ts_parser__recover(self, version, lookahead);
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (ts_parser__breakdown_top_of_stack(self, version)) {
|
||||
|
|
@ -1381,7 +1402,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_
|
|||
LOG("detect_error");
|
||||
ts_stack_pause(self->stack, version, ts_subtree_leaf_symbol(lookahead));
|
||||
ts_subtree_release(&self->tree_pool, lookahead);
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1480,7 +1501,7 @@ static bool ts_parser_has_outstanding_parse(TSParser *self) {
|
|||
|
||||
// Parser - Public
|
||||
|
||||
TSParser *ts_parser_new() {
|
||||
TSParser *ts_parser_new(void) {
|
||||
TSParser *self = ts_calloc(1, sizeof(TSParser));
|
||||
ts_lexer_init(&self->lexer);
|
||||
array_init(&self->reduce_actions);
|
||||
|
|
@ -1491,8 +1512,10 @@ TSParser *ts_parser_new() {
|
|||
self->reusable_node = reusable_node_new();
|
||||
self->dot_graph_file = NULL;
|
||||
self->halt_on_error = false;
|
||||
self->enabled = true;
|
||||
self->operation_limit = SIZE_MAX;
|
||||
self->cancellation_flag = NULL;
|
||||
self->timeout_duration = 0;
|
||||
self->end_clock = clock_null();
|
||||
self->operation_count = 0;
|
||||
self->old_tree = NULL_SUBTREE;
|
||||
self->scratch_tree.ptr = &self->scratch_tree_data;
|
||||
self->included_range_differences = (TSRangeArray) array_new();
|
||||
|
|
@ -1569,20 +1592,20 @@ void ts_parser_halt_on_error(TSParser *self, bool should_halt_on_error) {
|
|||
self->halt_on_error = should_halt_on_error;
|
||||
}
|
||||
|
||||
bool ts_parser_enabled(const TSParser *self) {
|
||||
return self->enabled;
|
||||
const size_t *ts_parser_cancellation_flag(const TSParser *self) {
|
||||
return (const size_t *)self->cancellation_flag;
|
||||
}
|
||||
|
||||
void ts_parser_set_enabled(TSParser *self, bool enabled) {
|
||||
self->enabled = enabled;
|
||||
void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag) {
|
||||
self->cancellation_flag = (const volatile size_t *)flag;
|
||||
}
|
||||
|
||||
size_t ts_parser_operation_limit(const TSParser *self) {
|
||||
return self->operation_limit;
|
||||
uint64_t ts_parser_timeout_micros(const TSParser *self) {
|
||||
return duration_to_micros(self->timeout_duration);
|
||||
}
|
||||
|
||||
void ts_parser_set_operation_limit(TSParser *self, size_t limit) {
|
||||
self->operation_limit = limit;
|
||||
void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros) {
|
||||
self->timeout_duration = duration_from_micros(timeout_micros);
|
||||
}
|
||||
|
||||
void ts_parser_set_included_ranges(TSParser *self, const TSRange *ranges, uint32_t count) {
|
||||
|
|
@ -1645,24 +1668,26 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) {
|
|||
}
|
||||
|
||||
uint32_t position = 0, last_position = 0, version_count = 0;
|
||||
size_t operation_count = 0;
|
||||
self->operation_count = 0;
|
||||
if (self->timeout_duration) {
|
||||
self->end_clock = clock_after(clock_now(), self->timeout_duration);
|
||||
} else {
|
||||
self->end_clock = clock_null();
|
||||
}
|
||||
|
||||
do {
|
||||
for (StackVersion version = 0;
|
||||
version_count = ts_stack_version_count(self->stack), version < version_count;
|
||||
version++) {
|
||||
if (operation_count > self->operation_limit || !self->enabled) return NULL;
|
||||
operation_count++;
|
||||
|
||||
bool allow_node_reuse = version_count == 1;
|
||||
while (ts_stack_is_active(self->stack, version)) {
|
||||
LOG("process version:%d, version_count:%u, state:%d, row:%u, col:%u",
|
||||
version, ts_stack_version_count(self->stack),
|
||||
ts_stack_state(self->stack, version),
|
||||
ts_stack_position(self->stack, version).extent.row,
|
||||
ts_stack_position(self->stack, version).extent.row + 1,
|
||||
ts_stack_position(self->stack, version).extent.column);
|
||||
|
||||
ts_parser__advance(self, version, allow_node_reuse);
|
||||
if (!ts_parser__advance(self, version, allow_node_reuse)) return NULL;
|
||||
LOG_STACK();
|
||||
|
||||
position = ts_stack_position(self->stack, version).bytes;
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ typedef struct {
|
|||
Subtree last_external_token;
|
||||
} ReusableNode;
|
||||
|
||||
static inline ReusableNode reusable_node_new() {
|
||||
static inline ReusableNode reusable_node_new(void) {
|
||||
return (ReusableNode) {array_new(), NULL_SUBTREE};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -712,9 +712,9 @@ void ts_stack_clear(Stack *self) {
|
|||
}
|
||||
|
||||
bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) {
|
||||
array_reserve(&self->iterators, 32);
|
||||
bool was_recording_allocations = ts_toggle_allocation_recording(false);
|
||||
if (!f)
|
||||
f = stderr;
|
||||
if (!f) f = stderr;
|
||||
|
||||
fprintf(f, "digraph stack {\n");
|
||||
fprintf(f, "rankdir=\"RL\";\n");
|
||||
|
|
@ -785,7 +785,7 @@ bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f)
|
|||
fprintf(
|
||||
f,
|
||||
" tooltip=\"position: %u,%u\nnode_count:%u\nerror_cost: %u\ndynamic_precedence: %d\"];\n",
|
||||
node->position.extent.row,
|
||||
node->position.extent.row + 1,
|
||||
node->position.extent.column,
|
||||
node->node_count,
|
||||
node->error_cost,
|
||||
|
|
|
|||
|
|
@ -92,11 +92,11 @@ StackSummary *ts_stack_get_summary(Stack *, StackVersion);
|
|||
// Get the total cost of all errors on the given version of the stack.
|
||||
unsigned ts_stack_error_cost(const Stack *, StackVersion version);
|
||||
|
||||
// Determine whether the given two stack versions can be merged.
|
||||
bool ts_stack_merge(Stack *, StackVersion, StackVersion);
|
||||
|
||||
// Merge the given two stack versions if possible, returning true
|
||||
// if they were successfully merged and false otherwise.
|
||||
bool ts_stack_merge(Stack *, StackVersion, StackVersion);
|
||||
|
||||
// Determine whether the given two stack versions can be merged.
|
||||
bool ts_stack_can_merge(Stack *, StackVersion, StackVersion);
|
||||
|
||||
TSSymbol ts_stack_resume(Stack *, StackVersion);
|
||||
|
|
|
|||
11
script/test
11
script/test
|
|
@ -67,7 +67,7 @@ while getopts "dDghl:e:s:t:" option; do
|
|||
esac
|
||||
done
|
||||
|
||||
shift $(expr $OPTIND - 1 )
|
||||
shift $(expr $OPTIND - 1)
|
||||
|
||||
if [[ -n $TREE_SITTER_TEST_LANGUAGE_FILTER || -n $TREE_SITTER_TEST_EXAMPLE_FILTER || -n $TREE_SITTER_TEST_TRIAL_FILTER ]]; then
|
||||
top_level_filter=corpus
|
||||
|
|
@ -76,8 +76,11 @@ else
|
|||
fi
|
||||
|
||||
if [[ "${mode}" == "debug" ]]; then
|
||||
test_binary=$(cargo test --no-run --package=tree-sitter-cli --lib --message-format=json 2> /dev/null | jq -rs '.[-1].filenames[0]')
|
||||
lldb "${test_binary}" -- "${top_level_filter}"
|
||||
test_binary=$(
|
||||
cargo test -p tree-sitter-cli --no-run --message-format=json 2> /dev/null |\
|
||||
jq -rs 'map(select(.target.name == "tree-sitter-cli" and .executable))[0].executable'
|
||||
)
|
||||
lldb "${test_binary}" -- $top_level_filter
|
||||
else
|
||||
cargo test --package=tree-sitter-cli --lib --jobs 1 $top_level_filter -- --nocapture
|
||||
cargo test -p tree-sitter-cli --jobs 1 $top_level_filter -- --nocapture
|
||||
fi
|
||||
|
|
|
|||
|
|
@ -4,5 +4,5 @@ setlocal
|
|||
set TREE_SITTER_TEST=1
|
||||
set RUST_TEST_THREADS=1
|
||||
set RUST_BACKTRACE=full
|
||||
cargo test "%~1" -- --nocapture
|
||||
cargo test -p tree-sitter-cli "%~1" -- --nocapture
|
||||
endlocal
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue