Merge branch 'master' into node-fields

This commit is contained in:
Max Brunsfeld 2019-03-26 11:58:21 -07:00
commit 5035e194ff
34 changed files with 1178 additions and 240 deletions

6
Cargo.lock generated
View file

@ -1,3 +1,5 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "aho-corasick"
version = "0.6.9"
@ -661,12 +663,12 @@ dependencies = [
"spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"tempfile 3.0.7 (registry+https://github.com/rust-lang/crates.io-index)",
"tree-sitter 0.3.9",
"tree-sitter-highlight 0.1.4",
"tree-sitter-highlight 0.1.5",
]
[[package]]
name = "tree-sitter-highlight"
version = "0.1.4"
version = "0.1.5"
dependencies = [
"regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",

View file

@ -172,7 +172,9 @@ fn load_js_grammar_file(grammar_path: &Path) -> Result<String> {
Some(code) => return Err(Error(format!("Node process exited with status {}", code))),
}
Ok(String::from_utf8(output.stdout).expect("Got invalid UTF8 from node"))
let mut result = String::from_utf8(output.stdout).expect("Got invalid UTF8 from node");
result.push('\n');
Ok(result)
}
fn ensure_file<T: AsRef<[u8]>>(path: &PathBuf, f: impl Fn() -> T) -> Result<()> {

View file

@ -843,7 +843,7 @@ impl Generator {
let external_scanner_name = format!("{}_external_scanner", language_function_name);
if !self.syntax_grammar.external_tokens.is_empty() {
add_line!(self, "void *{}_create();", external_scanner_name);
add_line!(self, "void *{}_create(void);", external_scanner_name);
add_line!(self, "void {}_destroy(void *);", external_scanner_name);
add_line!(
self,
@ -870,7 +870,7 @@ impl Generator {
add_line!(
self,
"extern const TSLanguage *{}() {{",
"extern const TSLanguage *{}(void) {{",
language_function_name
);
indent!(self);

View file

@ -6,6 +6,7 @@ use serde::ser::SerializeMap;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use serde_json::{json, Value};
use std::collections::HashMap;
use std::time::Instant;
use std::{fmt, fs, io, path};
use tree_sitter::{Language, PropertySheet};
use tree_sitter_highlight::{highlight, highlight_html, HighlightEvent, Properties, Scope};
@ -254,10 +255,13 @@ pub fn ansi(
source: &[u8],
language: Language,
property_sheet: &PropertySheet<Properties>,
print_time: bool,
) -> Result<()> {
use std::io::Write;
let stdout = io::stdout();
let mut stdout = stdout.lock();
let time = Instant::now();
let mut scope_stack = Vec::new();
for event in highlight(source, language, property_sheet, |s| {
language_for_injection_string(loader, s)
@ -278,6 +282,13 @@ pub fn ansi(
}
}
}
if print_time {
let duration = time.elapsed();
let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000;
eprintln!("{} ms", duration_ms);
}
Ok(())
}

View file

@ -3,7 +3,7 @@ use std::env;
use std::fs;
use std::path::Path;
use std::process::exit;
use std::usize;
use std::{u64, usize};
use tree_sitter_cli::{
config, error, generate, highlight, loader, logger, parse, properties, test,
};
@ -49,10 +49,13 @@ fn run() -> error::Result<()> {
.multiple(true)
.required(true),
)
.arg(Arg::with_name("scope").long("scope").takes_value(true))
.arg(Arg::with_name("debug").long("debug").short("d"))
.arg(Arg::with_name("debug-graph").long("debug-graph").short("D"))
.arg(Arg::with_name("quiet").long("quiet").short("q"))
.arg(Arg::with_name("time").long("time").short("t")),
.arg(Arg::with_name("time").long("time").short("t"))
.arg(Arg::with_name("allow-cancellation").long("cancel"))
.arg(Arg::with_name("timeout").long("timeout").takes_value(true)),
)
.subcommand(
SubCommand::with_name("test")
@ -76,7 +79,8 @@ fn run() -> error::Result<()> {
.required(true),
)
.arg(Arg::with_name("scope").long("scope").takes_value(true))
.arg(Arg::with_name("html").long("html").short("h")),
.arg(Arg::with_name("html").long("html").short("h"))
.arg(Arg::with_name("time").long("time").short("t")),
)
.get_matches();
@ -131,6 +135,10 @@ fn run() -> error::Result<()> {
let debug_graph = matches.is_present("debug-graph");
let quiet = matches.is_present("quiet");
let time = matches.is_present("time");
let allow_cancellation = matches.is_present("allow-cancellation");
let timeout = matches
.value_of("timeout")
.map_or(0, |t| u64::from_str_radix(t, 10).unwrap());
loader.find_all_languages(&config.parser_directories)?;
let paths = matches
.values_of("path")
@ -141,23 +149,30 @@ fn run() -> error::Result<()> {
let mut has_error = false;
for path in paths {
let path = Path::new(path);
let language =
if let Some((l, _)) = loader.language_configuration_for_file_name(path)? {
l
} else if let Some(l) = loader.language_at_path(&current_dir)? {
l
let language = if let Some(scope) = matches.value_of("scope") {
if let Some(config) = loader.language_configuration_for_scope(scope)? {
config.0
} else {
eprintln!("No language found");
return Ok(());
};
return Err(error::Error(format!("Unknown scope '{}'", scope)));
}
} else if let Some((l, _)) = loader.language_configuration_for_file_name(path)? {
l
} else if let Some(l) = loader.language_at_path(&current_dir)? {
l
} else {
eprintln!("No language found");
return Ok(());
};
has_error |= parse::parse_file_at_path(
language,
path,
max_path_length,
quiet,
time,
timeout,
debug,
debug_graph,
allow_cancellation,
)?;
}
@ -167,6 +182,7 @@ fn run() -> error::Result<()> {
} else if let Some(matches) = matches.subcommand_matches("highlight") {
let paths = matches.values_of("path").unwrap().into_iter();
let html_mode = matches.is_present("html");
let time = matches.is_present("time");
loader.find_all_languages(&config.parser_directories)?;
if html_mode {
@ -201,7 +217,7 @@ fn run() -> error::Result<()> {
if html_mode {
highlight::html(&loader, &config.theme, &source, language, sheet)?;
} else {
highlight::ansi(&loader, &config.theme, &source, language, sheet)?;
highlight::ansi(&loader, &config.theme, &source, language, sheet, time)?;
}
} else {
return Err(error::Error(format!(

View file

@ -1,9 +1,10 @@
use super::error::{Error, Result};
use super::util;
use std::fs;
use std::io::{self, Write};
use std::path::Path;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::time::Instant;
use std::{fs, thread};
use tree_sitter::{Language, LogType, Parser};
pub fn parse_file_at_path(
@ -12,8 +13,10 @@ pub fn parse_file_at_path(
max_path_length: usize,
quiet: bool,
print_time: bool,
timeout: u64,
debug: bool,
debug_graph: bool,
allow_cancellation: bool,
) -> Result<bool> {
let mut _log_session = None;
let mut parser = Parser::new();
@ -21,9 +24,28 @@ pub fn parse_file_at_path(
let source_code = fs::read(path)
.map_err(|e| Error(format!("Error reading source file {:?}: {}", path, e)))?;
// If the `--cancel` flag was passed, then cancel the parse
// when the user types a newline.
if allow_cancellation {
let flag = Box::new(AtomicUsize::new(0));
unsafe { parser.set_cancellation_flag(Some(&flag)) };
thread::spawn(move || {
let mut line = String::new();
io::stdin().read_line(&mut line).unwrap();
eprintln!("Cancelling");
flag.store(1, Ordering::Relaxed);
});
}
// Set a timeout based on the `--time` flag.
parser.set_timeout_micros(timeout);
// Render an HTML graph if `--debug-graph` was passed
if debug_graph {
_log_session = Some(util::log_graphs(&mut parser, "log.html")?);
} else if debug {
}
// Log to stderr if `--debug` was passed
else if debug {
parser.set_logger(Some(Box::new(|log_type, message| {
if log_type == LogType::Lex {
io::stderr().write(b" ").unwrap();
@ -33,112 +55,123 @@ pub fn parse_file_at_path(
}
let time = Instant::now();
let tree = parser
.parse(&source_code, None)
.expect("Incompatible language version");
let tree = parser.parse(&source_code, None);
let duration = time.elapsed();
let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000;
let mut cursor = tree.walk();
let stdout = io::stdout();
let mut stdout = stdout.lock();
if !quiet {
let mut needs_newline = false;
let mut indent_level = 0;
let mut did_visit_children = false;
if let Some(tree) = tree {
let mut cursor = tree.walk();
if !quiet {
let mut needs_newline = false;
let mut indent_level = 0;
let mut did_visit_children = false;
loop {
let node = cursor.node();
let is_named = node.is_named();
if did_visit_children {
if is_named {
stdout.write(b")")?;
needs_newline = true;
}
if cursor.goto_next_sibling() {
did_visit_children = false;
} else if cursor.goto_parent() {
did_visit_children = true;
indent_level -= 1;
} else {
break;
}
} else {
if is_named {
if needs_newline {
stdout.write(b"\n")?;
}
for _ in 0..indent_level {
stdout.write(b" ")?;
}
let start = node.start_position();
let end = node.end_position();
if let Some(field_name) = cursor.field_name() {
write!(&mut stdout, "{}: ", field_name)?;
}
write!(
&mut stdout,
"({} [{}, {}] - [{}, {}]",
node.kind(),
start.row,
start.column,
end.row,
end.column
)?;
needs_newline = true;
}
if cursor.goto_first_child() {
did_visit_children = false;
indent_level += 1;
} else {
did_visit_children = true;
}
}
}
cursor.reset(tree.root_node());
println!("");
}
let mut first_error = None;
loop {
let node = cursor.node();
if did_visit_children {
if node.is_named() {
stdout.write(b")")?;
needs_newline = true;
}
if cursor.goto_next_sibling() {
did_visit_children = false;
} else if cursor.goto_parent() {
did_visit_children = true;
indent_level -= 1;
if node.has_error() {
if node.is_error() || node.is_missing() {
first_error = Some(node);
break;
} else {
cursor.goto_first_child();
}
} else if !cursor.goto_next_sibling() {
if !cursor.goto_parent() {
break;
}
} else {
if node.is_named() {
if needs_newline {
stdout.write(b"\n")?;
}
for _ in 0..indent_level {
stdout.write(b" ")?;
}
if let Some(field_name) = cursor.field_name() {
write!(&mut stdout, "{}: ", field_name)?;
}
let start = node.start_position();
let end = node.end_position();
write!(
&mut stdout,
"({} [{}, {}] - [{}, {}]",
node.kind(),
start.row,
start.column,
end.row,
end.column
)?;
needs_newline = true;
}
if cursor.goto_first_child() {
did_visit_children = false;
indent_level += 1;
} else {
did_visit_children = true;
}
}
}
cursor.reset(tree.root_node());
println!("");
}
let mut first_error = None;
loop {
let node = cursor.node();
if node.has_error() {
if node.is_error() || node.is_missing() {
first_error = Some(node);
break;
} else {
cursor.goto_first_child();
}
} else if !cursor.goto_next_sibling() {
if !cursor.goto_parent() {
break;
if first_error.is_some() || print_time {
write!(
&mut stdout,
"{:width$}\t{} ms",
path.to_str().unwrap(),
duration_ms,
width = max_path_length
)?;
if let Some(node) = first_error {
let start = node.start_position();
let end = node.end_position();
write!(
&mut stdout,
"\t({} [{}, {}] - [{}, {}])",
node.kind(),
start.row,
start.column,
end.row,
end.column
)?;
}
write!(&mut stdout, "\n")?;
}
}
if first_error.is_some() || print_time {
write!(
return Ok(first_error.is_some());
} else if print_time {
writeln!(
&mut stdout,
"{:width$}\t{} ms",
"{:width$}\t{} ms (timed out)",
path.to_str().unwrap(),
duration_ms,
width = max_path_length
)?;
if let Some(node) = first_error {
let start = node.start_position();
let end = node.end_position();
write!(
&mut stdout,
"\t({} [{}, {}] - [{}, {}])",
node.kind(),
start.row,
start.column,
end.row,
end.column
)?;
}
write!(&mut stdout, "\n")?;
}
Ok(first_error.is_some())
Ok(false)
}

View file

@ -1113,6 +1113,28 @@ mod tests {
),
])
);
// Handle differently-formatted calls
let sheet2 = generate_property_sheet(
"foo.css",
"
a {
b: f();
c: f(
g(h),
i,
\"j\",
10
);
}
",
)
.unwrap();
assert_eq!(
query_simple(&sheet2, vec!["a"])["c"],
query_simple(&sheet, vec!["a"])["c"]
);
}
#[test]

View file

@ -98,7 +98,9 @@ extern "C" fn ts_record_free(ptr: *mut c_void) {
}
#[no_mangle]
extern "C" fn ts_record_allocations_toggle() {
extern "C" fn ts_toggle_allocation_recording(enabled: bool) -> bool {
let mut recorder = RECORDER.lock();
recorder.enabled = !recorder.enabled;
let was_enabled = recorder.enabled;
recorder.enabled = enabled;
was_enabled
}

View file

@ -21,12 +21,16 @@ pub fn get_language(name: &str) -> Language {
.unwrap()
}
pub fn get_property_sheet(language_name: &str, sheet_name: &str) -> PropertySheet<Properties> {
pub fn get_property_sheet_json(language_name: &str, sheet_name: &str) -> String {
let path = GRAMMARS_DIR
.join(language_name)
.join("src")
.join(sheet_name);
let json = fs::read_to_string(path).unwrap();
fs::read_to_string(path).unwrap()
}
pub fn get_property_sheet(language_name: &str, sheet_name: &str) -> PropertySheet<Properties> {
let json = get_property_sheet_json(language_name, sheet_name);
let language = get_language(language_name);
load_property_sheet(language, &json).unwrap()
}

View file

@ -1,13 +1,17 @@
use super::helpers::fixtures::{get_language, get_property_sheet};
use super::helpers::fixtures::{get_language, get_property_sheet, get_property_sheet_json};
use lazy_static::lazy_static;
use std::ffi::CString;
use std::{ptr, slice, str};
use tree_sitter::{Language, PropertySheet};
use tree_sitter_highlight::{highlight, highlight_html, HighlightEvent, Properties, Scope};
use tree_sitter_highlight::{c, highlight, highlight_html, HighlightEvent, Properties, Scope};
lazy_static! {
static ref JS_SHEET: PropertySheet<Properties> =
get_property_sheet("javascript", "highlights.json");
static ref HTML_SHEET: PropertySheet<Properties> =
get_property_sheet("html", "highlights.json");
static ref EJS_SHEET: PropertySheet<Properties> =
get_property_sheet("embedded-template", "highlights-ejs.json");
static ref SCOPE_CLASS_STRINGS: Vec<String> = {
let mut result = Vec::new();
let mut i = 0;
@ -153,6 +157,118 @@ fn test_highlighting_empty_lines() {
);
}
#[test]
fn test_highlighting_ejs() {
let source = vec!["<div><% foo() %></div>"].join("\n");
assert_eq!(
&to_token_vector(&source, get_language("embedded-template"), &EJS_SHEET).unwrap(),
&[[
("<", vec![]),
("div", vec![Scope::Tag]),
(">", vec![]),
("<%", vec![Scope::Keyword]),
(" ", vec![]),
("foo", vec![Scope::Function]),
("(", vec![Scope::PunctuationBracket]),
(")", vec![Scope::PunctuationBracket]),
(" ", vec![]),
("%>", vec![Scope::Keyword]),
("</", vec![]),
("div", vec![Scope::Tag]),
(">", vec![])
]],
);
}
#[test]
fn test_highlighting_via_c_api() {
let js_lang = get_language("javascript");
let html_lang = get_language("html");
let js_sheet = get_property_sheet_json("javascript", "highlights.json");
let js_sheet = c_string(&js_sheet);
let html_sheet = get_property_sheet_json("html", "highlights.json");
let html_sheet = c_string(&html_sheet);
let class_tag = c_string("class=tag");
let class_function = c_string("class=function");
let class_string = c_string("class=string");
let class_keyword = c_string("class=keyword");
let js_scope_name = c_string("source.js");
let html_scope_name = c_string("text.html.basic");
let injection_regex = c_string("^(javascript|js)$");
let source_code = c_string("<script>\nconst a = b('c');\nc.d();\n</script>");
let attribute_strings = &mut [ptr::null(); Scope::Unknown as usize + 1];
attribute_strings[Scope::Tag as usize] = class_tag.as_ptr();
attribute_strings[Scope::String as usize] = class_string.as_ptr();
attribute_strings[Scope::Keyword as usize] = class_keyword.as_ptr();
attribute_strings[Scope::Function as usize] = class_function.as_ptr();
let highlighter = c::ts_highlighter_new(attribute_strings.as_ptr());
let buffer = c::ts_highlight_buffer_new();
c::ts_highlighter_add_language(
highlighter,
html_scope_name.as_ptr(),
html_lang,
html_sheet.as_ptr(),
ptr::null_mut(),
);
c::ts_highlighter_add_language(
highlighter,
js_scope_name.as_ptr(),
js_lang,
js_sheet.as_ptr(),
injection_regex.as_ptr(),
);
c::ts_highlighter_highlight(
highlighter,
html_scope_name.as_ptr(),
source_code.as_ptr(),
source_code.as_bytes().len() as u32,
buffer,
ptr::null_mut(),
);
let output_bytes = c::ts_highlight_buffer_content(buffer);
let output_line_offsets = c::ts_highlight_buffer_line_offsets(buffer);
let output_len = c::ts_highlight_buffer_len(buffer);
let output_line_count = c::ts_highlight_buffer_line_count(buffer);
let output_bytes = unsafe { slice::from_raw_parts(output_bytes, output_len as usize) };
let output_line_offsets =
unsafe { slice::from_raw_parts(output_line_offsets, output_line_count as usize) };
let mut lines = Vec::new();
for i in 0..(output_line_count as usize) {
let line_start = output_line_offsets[i] as usize;
let line_end = output_line_offsets
.get(i + 1)
.map(|x| *x as usize)
.unwrap_or(output_bytes.len());
lines.push(str::from_utf8(&output_bytes[line_start..line_end]).unwrap());
}
assert_eq!(
lines,
vec![
"&lt;<span class=tag>script</span>&gt;",
"<span class=keyword>const</span> <span>a</span> <span>=</span> <span class=function>b</span><span>(</span><span class=string>&#39;c&#39;</span><span>)</span><span>;</span>",
"<span>c</span><span>.</span><span class=function>d</span><span>(</span><span>)</span><span>;</span>",
"&lt;/<span class=tag>script</span>&gt;",
]
);
c::ts_highlighter_delete(highlighter);
c::ts_highlight_buffer_delete(buffer);
}
fn c_string(s: &str) -> CString {
CString::new(s.as_bytes().to_vec()).unwrap()
}
fn test_language_for_injection_string<'a>(
string: &str,
) -> Option<(Language, &'a PropertySheet<Properties>)> {

View file

@ -1,7 +1,8 @@
use super::helpers::edits::{perform_edit, Edit, ReadRecorder};
use super::helpers::fixtures::{get_language, get_test_language};
use crate::generate::generate_parser_for_grammar;
use std::{thread, usize};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::{thread, time};
use tree_sitter::{InputEdit, LogType, Parser, Point, Range};
#[test]
@ -56,6 +57,37 @@ fn test_parsing_with_logging() {
"reduce sym:struct_item, child_count:3".to_string()
)));
assert!(messages.contains(&(LogType::Lex, "skip character:' '".to_string())));
for (_, m) in &messages {
assert!(!m.contains("row:0"));
}
}
#[test]
#[cfg(unix)]
fn test_parsing_with_debug_graph_enabled() {
use std::io::{BufRead, BufReader, Seek};
let has_zero_indexed_row = |s: &str| s.contains("position: 0,");
let mut parser = Parser::new();
parser.set_language(get_language("javascript")).unwrap();
let mut debug_graph_file = tempfile::tempfile().unwrap();
parser.print_dot_graphs(&debug_graph_file);
parser.parse("const zero = 0", None).unwrap();
debug_graph_file.seek(std::io::SeekFrom::Start(0)).unwrap();
let log_reader = BufReader::new(debug_graph_file)
.lines()
.map(|l| l.expect("Failed to read line from graph log"));
for line in log_reader {
assert!(
!has_zero_indexed_row(&line),
"Graph log output includes zero-indexed row: {}",
line
);
}
}
#[test]
@ -269,84 +301,164 @@ fn test_parsing_on_multiple_threads() {
assert_eq!(child_count_differences, &[1, 2, 3, 4]);
}
// Operation limits
#[test]
fn test_parsing_cancelled_by_another_thread() {
let cancellation_flag = Box::new(AtomicUsize::new(0));
let mut parser = Parser::new();
parser.set_language(get_language("javascript")).unwrap();
unsafe { parser.set_cancellation_flag(Some(&cancellation_flag)) };
// Long input - parsing succeeds
let tree = parser.parse_with(
&mut |offset, _| {
if offset == 0 {
b" ["
} else if offset >= 20000 {
b""
} else {
b"0,"
}
},
None,
);
assert!(tree.is_some());
let cancel_thread = thread::spawn(move || {
thread::sleep(time::Duration::from_millis(100));
cancellation_flag.store(1, Ordering::SeqCst);
});
// Infinite input
let tree = parser.parse_with(
&mut |offset, _| {
thread::yield_now();
thread::sleep(time::Duration::from_millis(10));
if offset == 0 {
b" ["
} else {
b"0,"
}
},
None,
);
// Parsing returns None because it was cancelled.
cancel_thread.join().unwrap();
assert!(tree.is_none());
}
// Timeouts
#[test]
fn test_parsing_with_an_operation_limit() {
fn test_parsing_with_a_timeout() {
let mut parser = Parser::new();
parser.set_language(get_language("json")).unwrap();
// Start parsing from an infinite input. Parsing should abort after 5 "operations".
parser.set_operation_limit(5);
let mut call_count = 0;
// Parse an infinitely-long array, but pause after 100 microseconds of processing.
parser.set_timeout_micros(100);
let start_time = time::Instant::now();
let tree = parser.parse_with(
&mut |_, _| {
if call_count == 0 {
call_count += 1;
b"[0"
&mut |offset, _| {
if offset == 0 {
b" ["
} else {
call_count += 1;
b", 0"
b",0"
}
},
None,
);
assert!(tree.is_none());
assert!(call_count >= 3);
assert!(call_count <= 8);
assert!(start_time.elapsed().as_micros() < 500);
// Resume parsing from the previous state.
call_count = 0;
parser.set_operation_limit(20);
// Continue parsing, but pause after 300 microseconds of processing.
parser.set_timeout_micros(1000);
let start_time = time::Instant::now();
let tree = parser.parse_with(
&mut |offset, _| {
if offset == 0 {
b" ["
} else {
b",0"
}
},
None,
);
assert!(tree.is_none());
assert!(start_time.elapsed().as_micros() > 500);
assert!(start_time.elapsed().as_micros() < 1500);
// Finish parsing
parser.set_timeout_micros(0);
let tree = parser
.parse_with(
&mut |_, _| {
if call_count == 0 {
call_count += 1;
&mut |offset, _| {
if offset > 5000 {
b""
} else if offset == 5000 {
b"]"
} else {
b""
b",0"
}
},
None,
)
.unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(value (array (number) (number) (number)))"
);
assert_eq!(tree.root_node().child(0).unwrap().kind(), "array");
}
#[test]
fn test_parsing_with_a_reset_after_reaching_an_operation_limit() {
fn test_parsing_with_a_timeout_and_a_reset() {
let mut parser = Parser::new();
parser.set_language(get_language("json")).unwrap();
parser.set_operation_limit(3);
let tree = parser.parse("[1234, 5, 6, 7, 8]", None);
parser.set_timeout_micros(30);
let tree = parser.parse(
"[\"ok\", 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]",
None,
);
assert!(tree.is_none());
// Without calling reset, the parser continues from where it left off, so
// it does not see the changes to the beginning of the source code.
parser.set_operation_limit(usize::MAX);
let tree = parser.parse("[null, 5, 6, 4, 5]", None).unwrap();
parser.set_timeout_micros(0);
let tree = parser.parse(
"[null, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]",
None,
).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(value (array (number) (number) (number) (number) (number)))"
tree.root_node()
.named_child(0)
.unwrap()
.named_child(0)
.unwrap()
.kind(),
"string"
);
parser.set_operation_limit(3);
let tree = parser.parse("[1234, 5, 6, 7, 8]", None);
parser.set_timeout_micros(30);
let tree = parser.parse(
"[\"ok\", 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]",
None,
);
assert!(tree.is_none());
// By calling reset, we force the parser to start over from scratch so
// that it sees the changes to the beginning of the source code.
parser.set_operation_limit(usize::MAX);
parser.set_timeout_micros(0);
parser.reset();
let tree = parser.parse("[null, 5, 6, 4, 5]", None).unwrap();
let tree = parser.parse(
"[null, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]",
None,
).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(value (array (null) (number) (number) (number) (number)))"
tree.root_node()
.named_child(0)
.unwrap()
.named_child(0)
.unwrap()
.kind(),
"null"
);
}

View file

@ -16,9 +16,10 @@ Tree-sitter is a parser generator tool and an incremental parsing library. It ca
There are currently bindings that allow Tree-sitter to be used from the following languages:
* [JavaScript](https://github.com/tree-sitter/node-tree-sitter)
* [Rust](https://github.com/tree-sitter/rust-tree-sitter)
* [Haskell](https://github.com/tree-sitter/haskell-tree-sitter)
* [Python](https://github.com/tree-sitter/py-tree-sitter)
* [Rust](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding)
* [Ruby](https://github.com/tree-sitter/ruby-tree-sitter)
* [Haskell](https://github.com/tree-sitter/haskell-tree-sitter)
### Available Parsers

View file

@ -77,7 +77,7 @@ Then run the the following command:
tree-sitter generate
```
This will generate the C code required to parse this trivial language, as well as all of the files needed to compile and load this native parser as a Node.js module. You can test this parser by creating a source file with the contents `hello;` and parsing it:
This will generate the C code required to parse this trivial language, as well as all of the files needed to compile and load this native parser as a Node.js module. You can test this parser by creating a source file with the contents `hello` and parsing it:
```sh
tree-sitter parse ./the-file
@ -86,7 +86,7 @@ tree-sitter parse ./the-file
This should print the following:
```
(source_file [0, 0] - [0, 5])
(source_file [1, 0] - [1, 5])
```
You might notice that the first time you run `tree-sitter parse`, it takes a few seconds. This is because Tree-sitter automatically compiles your C code into a dynamically-loadable library. Whenever you make changes to your grammar, you can update the parser simply by re-running `tree-sitter generate`. When the parser changes, Tree-sitter will recompile it as needed.

View file

@ -1,7 +1,7 @@
[package]
name = "tree-sitter-highlight"
description = "Library for performing syntax highlighting with Tree-sitter"
version = "0.1.4"
version = "0.1.5"
authors = [
"Max Brunsfeld <maxbrunsfeld@gmail.com>",
"Tim Clem <timothy.clem@gmail.com>"
@ -12,6 +12,9 @@ edition = "2018"
keywords = ["incremental", "parsing", "syntax", "highlighting"]
categories = ["parsing", "text-editors"]
[lib]
crate-type = ["lib", "staticlib"]
[dependencies]
regex = "1"
serde = "1.0"

View file

@ -0,0 +1,104 @@
#ifndef TREE_SITTER_HIGHLIGHT_H_
#define TREE_SITTER_HIGHLIGHT_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
typedef enum {
TSHighlightOk,
TSHighlightUnknownScope,
TSHighlightTimeout,
} TSHighlightError;
// The list of scopes which can be styled for syntax highlighting.
// When constructing a `TSHighlighter`, you need to construct an
// `attribute_strings` array whose elements correspond to these values.
enum TSHighlightScope {
TSHighlightScopeAttribute,
TSHighlightScopeComment,
TSHighlightScopeConstant,
TSHighlightScopeConstantBuiltin,
TSHighlightScopeConstructor,
TSHighlightScopeConstructorBuiltin,
TSHighlightScopeEmbedded,
TSHighlightScopeEscape,
TSHighlightScopeFunction,
TSHighlightScopeFunctionBuiltin,
TSHighlightScopeKeyword,
TSHighlightScopeNumber,
TSHighlightScopeOperator,
TSHighlightScopeProperty,
TSHighlightScopePropertyBuiltin,
TSHighlightScopePunctuation,
TSHighlightScopePunctuationBracket,
TSHighlightScopePunctuationDelimiter,
TSHighlightScopePunctuationSpecial,
TSHighlightScopeString,
TSHighlightScopeStringSpecial,
TSHighlightScopeTag,
TSHighlightScopeType,
TSHighlightScopeTypeBuiltin,
TSHighlightScopeVariable,
TSHighlightScopeVariableBuiltin,
TSHighlightScopeUnknown,
};
typedef struct TSHighlighter TSHighlighter;
typedef struct TSHighlightBuffer TSHighlightBuffer;
// Construct a `TSHighlighter` by providing a list of strings containing
// the HTML attributes that should be applied for each highlight scope.
TSHighlighter *ts_highlighter_new(
const char **attribute_strings
);
// Delete a syntax highlighter.
void ts_highlighter_delete(TSHighlighter *);
// Add a `TSLanguage` to a highlighter. The language is associated with a
// scope name, which can be used later to select a language for syntax
// highlighting. Along with the language, you must provide a JSON string
// containing the compiled PropertySheet to use for syntax highlighting
// with that language. You can also optionally provide an 'injection regex',
// which is used to detect when this language has been embedded in a document
// written in a different language.
int ts_highlighter_add_language(
TSHighlighter *self,
const char *scope_name,
const TSLanguage *language,
const char *property_sheet_json,
const char *injection_regex
);
// Compute syntax highlighting for a given document. You must first
// create a `TSHighlightBuffer` to hold the output.
int ts_highlighter_highlight(
const TSHighlighter *self,
const char *scope_name,
const char *source_code,
uint32_t source_code_len,
TSHighlightBuffer *output,
const size_t *cancellation_flag
);
// TSHighlightBuffer: This struct stores the HTML output of syntax
// highlighting. It can be reused for multiple highlighting calls.
TSHighlightBuffer *ts_highlight_buffer_new();
// Delete a highlight buffer.
void ts_highlight_buffer_delete(TSHighlightBuffer *);
// Access the HTML content of a highlight buffer.
const uint8_t *ts_highlight_buffer_content(const TSHighlightBuffer *);
const uint32_t *ts_highlight_buffer_line_offsets(const TSHighlightBuffer *);
uint32_t ts_highlight_buffer_len(const TSHighlightBuffer *);
uint32_t ts_highlight_buffer_line_count(const TSHighlightBuffer *);
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_HIGHLIGHT_H_

266
highlight/src/c_lib.rs Normal file
View file

@ -0,0 +1,266 @@
use super::{escape, load_property_sheet, HighlightEvent, Highlighter, Properties, Scope};
use regex::Regex;
use std::collections::HashMap;
use std::ffi::CStr;
use std::io::Write;
use std::os::raw::c_char;
use std::process::abort;
use std::sync::atomic::AtomicUsize;
use std::{fmt, slice};
use tree_sitter::{Language, PropertySheet};
struct LanguageConfiguration {
language: Language,
property_sheet: PropertySheet<Properties>,
injection_regex: Option<Regex>,
}
pub struct TSHighlighter {
languages: HashMap<String, LanguageConfiguration>,
attribute_strings: Vec<&'static [u8]>,
}
pub struct TSHighlightBuffer {
html: Vec<u8>,
line_offsets: Vec<u32>,
}
#[repr(C)]
pub enum ErrorCode {
Ok,
UnknownScope,
Timeout,
}
#[no_mangle]
pub extern "C" fn ts_highlighter_new(
attribute_strings: *const *const c_char,
) -> *mut TSHighlighter {
let attribute_strings =
unsafe { slice::from_raw_parts(attribute_strings, Scope::Unknown as usize + 1) };
let attribute_strings = attribute_strings
.into_iter()
.map(|s| {
if s.is_null() {
&[]
} else {
unsafe { CStr::from_ptr(*s).to_bytes() }
}
})
.collect();
Box::into_raw(Box::new(TSHighlighter {
languages: HashMap::new(),
attribute_strings,
}))
}
#[no_mangle]
pub extern "C" fn ts_highlight_buffer_new() -> *mut TSHighlightBuffer {
Box::into_raw(Box::new(TSHighlightBuffer {
html: Vec::new(),
line_offsets: Vec::new(),
}))
}
#[no_mangle]
pub extern "C" fn ts_highlighter_delete(this: *mut TSHighlighter) {
drop(unsafe { Box::from_raw(this) })
}
#[no_mangle]
pub extern "C" fn ts_highlight_buffer_delete(this: *mut TSHighlightBuffer) {
drop(unsafe { Box::from_raw(this) })
}
#[no_mangle]
pub extern "C" fn ts_highlight_buffer_content(this: *const TSHighlightBuffer) -> *const u8 {
let this = unwrap_ptr(this);
this.html.as_slice().as_ptr()
}
#[no_mangle]
pub extern "C" fn ts_highlight_buffer_line_offsets(this: *const TSHighlightBuffer) -> *const u32 {
let this = unwrap_ptr(this);
this.line_offsets.as_slice().as_ptr()
}
#[no_mangle]
pub extern "C" fn ts_highlight_buffer_len(this: *const TSHighlightBuffer) -> u32 {
let this = unwrap_ptr(this);
this.html.len() as u32
}
#[no_mangle]
pub extern "C" fn ts_highlight_buffer_line_count(this: *const TSHighlightBuffer) -> u32 {
let this = unwrap_ptr(this);
this.line_offsets.len() as u32
}
#[no_mangle]
pub extern "C" fn ts_highlighter_add_language(
this: *mut TSHighlighter,
scope_name: *const c_char,
language: Language,
property_sheet_json: *const c_char,
injection_regex: *const c_char,
) -> ErrorCode {
let this = unwrap_mut_ptr(this);
let scope_name = unsafe { CStr::from_ptr(scope_name) };
let scope_name = unwrap(scope_name.to_str()).to_string();
let property_sheet_json = unsafe { CStr::from_ptr(property_sheet_json) };
let property_sheet_json = unwrap(property_sheet_json.to_str());
let property_sheet = unwrap(load_property_sheet(language, property_sheet_json));
let injection_regex = if injection_regex.is_null() {
None
} else {
let pattern = unsafe { CStr::from_ptr(injection_regex) };
Some(unwrap(Regex::new(unwrap(pattern.to_str()))))
};
this.languages.insert(
scope_name,
LanguageConfiguration {
language,
property_sheet,
injection_regex,
},
);
ErrorCode::Ok
}
#[no_mangle]
pub extern "C" fn ts_highlighter_highlight(
this: *const TSHighlighter,
scope_name: *const c_char,
source_code: *const c_char,
source_code_len: u32,
output: *mut TSHighlightBuffer,
cancellation_flag: *const AtomicUsize,
) -> ErrorCode {
let this = unwrap_ptr(this);
let output = unwrap_mut_ptr(output);
let scope_name = unwrap(unsafe { CStr::from_ptr(scope_name).to_str() });
let source_code =
unsafe { slice::from_raw_parts(source_code as *const u8, source_code_len as usize) };
let cancellation_flag = unsafe { cancellation_flag.as_ref() };
this.highlight(source_code, scope_name, output, cancellation_flag)
}
impl TSHighlighter {
fn highlight(
&self,
source_code: &[u8],
scope_name: &str,
output: &mut TSHighlightBuffer,
cancellation_flag: Option<&AtomicUsize>,
) -> ErrorCode {
let configuration = self.languages.get(scope_name);
if configuration.is_none() {
return ErrorCode::UnknownScope;
}
let configuration = configuration.unwrap();
let languages = &self.languages;
let highlighter = Highlighter::new(
source_code,
configuration.language,
&configuration.property_sheet,
|injection_string| {
languages.values().find_map(|conf| {
conf.injection_regex.as_ref().and_then(|regex| {
if regex.is_match(injection_string) {
Some((conf.language, &conf.property_sheet))
} else {
None
}
})
})
},
cancellation_flag,
);
if let Ok(highlighter) = highlighter {
output.html.clear();
output.line_offsets.clear();
output.line_offsets.push(0);
let mut scopes = Vec::new();
for event in highlighter {
match event {
HighlightEvent::ScopeStart(s) => {
scopes.push(s);
output.start_scope(s, &self.attribute_strings);
}
HighlightEvent::ScopeEnd => {
scopes.pop();
output.end_scope();
}
HighlightEvent::Source(src) => {
output.add_text(src, &scopes, &self.attribute_strings);
}
};
}
ErrorCode::Ok
} else {
ErrorCode::Timeout
}
}
}
impl TSHighlightBuffer {
fn start_scope(&mut self, s: Scope, attribute_strings: &[&[u8]]) {
let attribute_string = attribute_strings[s as usize];
self.html.extend(b"<span");
if !attribute_string.is_empty() {
self.html.extend(b" ");
self.html.extend(attribute_string);
}
self.html.extend(b">");
}
fn end_scope(&mut self) {
self.html.extend(b"</span>");
}
fn finish_line(&mut self) {
self.line_offsets.push(self.html.len() as u32);
}
fn add_text(&mut self, src: &str, scopes: &Vec<Scope>, attribute_strings: &[&[u8]]) {
let mut multiline = false;
for line in src.split('\n') {
let line = line.trim_end_matches('\r');
if multiline {
scopes.iter().for_each(|_| self.end_scope());
self.finish_line();
scopes
.iter()
.for_each(|scope| self.start_scope(*scope, attribute_strings));
}
write!(&mut self.html, "{}", escape::Escape(line)).unwrap();
multiline = true;
}
}
}
fn unwrap_ptr<'a, T>(result: *const T) -> &'a T {
unsafe { result.as_ref() }.unwrap_or_else(|| {
eprintln!("{}:{} - pointer must not be null", file!(), line!());
abort();
})
}
fn unwrap_mut_ptr<'a, T>(result: *mut T) -> &'a mut T {
unsafe { result.as_mut() }.unwrap_or_else(|| {
eprintln!("{}:{} - pointer must not be null", file!(), line!());
abort();
})
}
fn unwrap<T, E: fmt::Display>(result: Result<T, E>) -> T {
result.unwrap_or_else(|error| {
eprintln!("tree-sitter highlight error: {}", error);
abort();
})
}

View file

@ -1,14 +1,17 @@
pub mod c_lib;
mod escape;
pub use c_lib as c;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use serde_derive::*;
use std::cmp;
use std::fmt::{self, Write};
use std::mem::transmute;
use std::str;
use std::usize;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::{cmp, str, usize};
use tree_sitter::{Language, Node, Parser, Point, PropertySheet, Range, Tree, TreePropertyCursor};
const CANCELLATION_CHECK_INTERVAL: usize = 100;
#[derive(Debug)]
enum TreeStep {
Child {
@ -78,6 +81,7 @@ struct Layer<'a> {
cursor: TreePropertyCursor<'a, Properties>,
ranges: Vec<Range>,
at_node_end: bool,
depth: usize,
}
struct Highlighter<'a, T>
@ -90,6 +94,8 @@ where
parser: Parser,
layers: Vec<Layer<'a>>,
utf8_error_len: Option<usize>,
operation_count: usize,
cancellation_flag: Option<&'a AtomicUsize>,
}
#[derive(Copy, Clone, Debug)]
@ -151,6 +157,28 @@ pub enum PropertySheetError {
InvalidFormat(String),
}
impl fmt::Display for PropertySheetError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
PropertySheetError::InvalidJSON(e) => e.fmt(f),
PropertySheetError::InvalidRegex(e) => e.fmt(f),
PropertySheetError::InvalidFormat(e) => e.fmt(f),
}
}
}
impl<'a> fmt::Debug for Layer<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"Layer {{ at_node_end: {}, node: {:?} }}",
self.at_node_end,
self.cursor.node()
)?;
Ok(())
}
}
pub fn load_property_sheet(
language: Language,
json: &str,
@ -354,17 +382,22 @@ where
language: Language,
property_sheet: &'a PropertySheet<Properties>,
injection_callback: F,
cancellation_flag: Option<&'a AtomicUsize>,
) -> Result<Self, String> {
let mut parser = Parser::new();
unsafe { parser.set_cancellation_flag(cancellation_flag.clone()) };
parser.set_language(language)?;
let tree = parser
.parse(source, None)
.ok_or_else(|| format!("Tree-sitter: failed to parse"))?;
Ok(Self {
injection_callback,
source,
source_offset: 0,
parser,
source,
cancellation_flag,
injection_callback,
source_offset: 0,
operation_count: 0,
utf8_error_len: None,
layers: vec![Layer::new(
source,
tree,
@ -375,8 +408,8 @@ where
start_point: Point::new(0, 0),
end_point: Point::new(usize::MAX, usize::MAX),
}],
0,
)],
utf8_error_len: None,
})
}
@ -554,7 +587,7 @@ where
result
}
fn add_layer(&mut self, language_string: &str, ranges: Vec<Range>) {
fn add_layer(&mut self, language_string: &str, ranges: Vec<Range>, depth: usize) {
if let Some((language, property_sheet)) = (self.injection_callback)(language_string) {
self.parser
.set_language(language)
@ -564,7 +597,7 @@ where
.parser
.parse(self.source, None)
.expect("Failed to parse");
let layer = Layer::new(self.source, tree, property_sheet, ranges);
let layer = Layer::new(self.source, tree, property_sheet, ranges, depth);
match self.layers.binary_search_by(|l| l.cmp(&layer)) {
Ok(i) | Err(i) => self.layers.insert(i, layer),
};
@ -579,6 +612,16 @@ where
type Item = HighlightEvent<'a>;
fn next(&mut self) -> Option<Self::Item> {
if let Some(cancellation_flag) = self.cancellation_flag {
self.operation_count += 1;
if self.operation_count >= CANCELLATION_CHECK_INTERVAL {
self.operation_count = 0;
if cancellation_flag.load(Ordering::Relaxed) != 0 {
return None;
}
}
}
if let Some(utf8_error_len) = self.utf8_error_len.take() {
self.source_offset += utf8_error_len;
return Some(HighlightEvent::Source("\u{FFFD}"));
@ -606,8 +649,9 @@ where
})
.collect::<Vec<_>>();
let depth = first_layer.depth + 1;
for (language, ranges) in injections {
self.add_layer(&language, ranges);
self.add_layer(&language, ranges, depth);
}
}
@ -636,7 +680,13 @@ where
// to re-sort the layers. If the cursor is already at the end of its syntax tree,
// remove it.
if self.layers[0].advance() {
self.layers.sort_unstable_by(|a, b| a.cmp(&b));
let mut index = 0;
while self.layers.get(index + 1).map_or(false, |next| {
self.layers[index].cmp(next) == cmp::Ordering::Greater
}) {
self.layers.swap(index, index + 1);
index += 1;
}
} else {
self.layers.remove(0);
}
@ -685,6 +735,7 @@ impl<'a> Layer<'a> {
tree: Tree,
sheet: &'a PropertySheet<Properties>,
ranges: Vec<Range>,
depth: usize,
) -> Self {
// The cursor's lifetime parameter indicates that the tree must outlive the cursor.
// But because the tree is really a pointer to the heap, the cursor can remain
@ -695,6 +746,7 @@ impl<'a> Layer<'a> {
_tree: tree,
cursor,
ranges,
depth,
at_node_end: false,
}
}
@ -706,6 +758,7 @@ impl<'a> Layer<'a> {
self.offset()
.cmp(&other.offset())
.then_with(|| other.at_node_end.cmp(&self.at_node_end))
.then_with(|| self.depth.cmp(&other.depth))
}
fn offset(&self) -> usize {
@ -816,7 +869,7 @@ pub fn highlight<'a, F>(
where
F: Fn(&str) -> Option<(Language, &'a PropertySheet<Properties>)> + 'a,
{
Highlighter::new(source, language, property_sheet, injection_callback)
Highlighter::new(source, language, property_sheet, injection_callback, None)
}
pub fn highlight_html<'a, F1, F2>(
@ -830,7 +883,7 @@ where
F1: Fn(&str) -> Option<(Language, &'a PropertySheet<Properties>)>,
F2: Fn(Scope) -> &'a str,
{
let highlighter = Highlighter::new(source, language, property_sheet, injection_callback)?;
let highlighter = Highlighter::new(source, language, property_sheet, injection_callback, None)?;
let mut renderer = HtmlRenderer::new(attribute_callback);
let mut scopes = Vec::new();
for event in highlighter {

View file

@ -138,16 +138,16 @@ extern "C" {
) -> *mut TSTree;
}
extern "C" {
pub fn ts_parser_enabled(arg1: *const TSParser) -> bool;
pub fn ts_parser_cancellation_flag(arg1: *const TSParser) -> *const usize;
}
extern "C" {
pub fn ts_parser_set_enabled(arg1: *mut TSParser, arg2: bool);
pub fn ts_parser_set_cancellation_flag(arg1: *mut TSParser, arg2: *const usize);
}
extern "C" {
pub fn ts_parser_operation_limit(arg1: *const TSParser) -> usize;
pub fn ts_parser_timeout_micros(arg1: *const TSParser) -> u64;
}
extern "C" {
pub fn ts_parser_set_operation_limit(arg1: *mut TSParser, arg2: usize);
pub fn ts_parser_set_timeout_micros(arg1: *mut TSParser, arg2: u64);
}
extern "C" {
pub fn ts_parser_reset(arg1: *mut TSParser);

View file

@ -15,6 +15,7 @@ use std::collections::HashMap;
use std::ffi::CStr;
use std::marker::PhantomData;
use std::os::raw::{c_char, c_void};
use std::sync::atomic::AtomicUsize;
use std::{fmt, ptr, slice, str, u16};
pub const LANGUAGE_VERSION: usize = ffi::TREE_SITTER_LANGUAGE_VERSION;
@ -348,8 +349,12 @@ impl Parser {
unsafe { ffi::ts_parser_reset(self.0) }
}
pub fn set_operation_limit(&mut self, limit: usize) {
unsafe { ffi::ts_parser_set_operation_limit(self.0, limit) }
pub fn timeout_micros(&self) -> u64 {
unsafe { ffi::ts_parser_timeout_micros(self.0) }
}
pub fn set_timeout_micros(&mut self, timeout_micros: u64) {
unsafe { ffi::ts_parser_set_timeout_micros(self.0, timeout_micros) }
}
pub fn set_included_ranges(&mut self, ranges: &[Range]) {
@ -359,6 +364,18 @@ impl Parser {
ffi::ts_parser_set_included_ranges(self.0, ts_ranges.as_ptr(), ts_ranges.len() as u32)
};
}
pub unsafe fn cancellation_flag(&self) -> Option<&AtomicUsize> {
(ffi::ts_parser_cancellation_flag(self.0) as *const AtomicUsize).as_ref()
}
pub unsafe fn set_cancellation_flag(&self, flag: Option<&AtomicUsize>) {
if let Some(flag) = flag {
ffi::ts_parser_set_cancellation_flag(self.0, flag as *const AtomicUsize as *const usize);
} else {
ffi::ts_parser_set_cancellation_flag(self.0, ptr::null());
}
}
}
impl Drop for Parser {
@ -511,10 +528,11 @@ impl<'tree> Node<'tree> {
unsafe { ffi::ts_node_child_count(self.0) as usize }
}
pub fn children<'a>(&'a self) -> impl Iterator<Item = Node<'tree>> + 'a {
pub fn children(&self) -> impl Iterator<Item = Node<'tree>> {
let me = self.clone();
(0..self.child_count())
.into_iter()
.map(move |i| self.child(i).unwrap())
.map(move |i| me.child(i).unwrap())
}
pub fn named_child<'a>(&'a self, i: usize) -> Option<Self> {

View file

@ -79,7 +79,7 @@ typedef struct {
uint32_t context[2];
} TSTreeCursor;
TSParser *ts_parser_new();
TSParser *ts_parser_new(void);
void ts_parser_delete(TSParser *);
const TSLanguage *ts_parser_language(const TSParser *);
bool ts_parser_set_language(TSParser *, const TSLanguage *);
@ -90,10 +90,10 @@ void ts_parser_halt_on_error(TSParser *, bool);
TSTree *ts_parser_parse(TSParser *, const TSTree *, TSInput);
TSTree *ts_parser_parse_string(TSParser *, const TSTree *, const char *, uint32_t);
TSTree *ts_parser_parse_string_encoding(TSParser *, const TSTree *, const char *, uint32_t, TSInputEncoding);
bool ts_parser_enabled(const TSParser *);
void ts_parser_set_enabled(TSParser *, bool);
size_t ts_parser_operation_limit(const TSParser *);
void ts_parser_set_operation_limit(TSParser *, size_t);
const size_t *ts_parser_cancellation_flag(const TSParser *);
void ts_parser_set_cancellation_flag(TSParser *, const size_t *);
uint64_t ts_parser_timeout_micros(const TSParser *);
void ts_parser_set_timeout_micros(TSParser *, uint64_t);
void ts_parser_reset(TSParser *);
void ts_parser_set_included_ranges(TSParser *, const TSRange *, uint32_t);
const TSRange *ts_parser_included_ranges(const TSParser *, uint32_t *);

View file

@ -104,7 +104,7 @@ struct TSLanguage {
struct {
const bool *states;
const TSSymbol *symbol_map;
void *(*create)();
void *(*create)(void);
void (*destroy)(void *);
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
unsigned (*serialize)(void *, char *);

View file

@ -15,7 +15,7 @@ void *ts_record_malloc(size_t);
void *ts_record_calloc(size_t, size_t);
void *ts_record_realloc(void *, size_t);
void ts_record_free(void *);
bool ts_record_allocations_toggle(bool);
bool ts_toggle_allocation_recording(bool);
static inline void *ts_malloc(size_t size) {
return ts_record_malloc(size);
@ -33,10 +33,6 @@ static inline void ts_free(void *buffer) {
ts_record_free(buffer);
}
static inline bool ts_toggle_allocation_recording(bool value) {
return ts_record_allocations_toggle(value);
}
#else
#include <stdlib.h>

View file

@ -7,6 +7,10 @@
#include <windows.h>
static inline size_t atomic_load(const volatile size_t *p) {
return *p;
}
static inline uint32_t atomic_inc(volatile uint32_t *p) {
return InterlockedIncrement(p);
}
@ -17,6 +21,10 @@ static inline uint32_t atomic_dec(volatile uint32_t *p) {
#else
static inline size_t atomic_load(const volatile size_t *p) {
return __atomic_load_n(p, __ATOMIC_RELAXED);
}
static inline uint32_t atomic_inc(volatile uint32_t *p) {
return __sync_add_and_fetch(p, 1u);
}

141
lib/src/clock.h Normal file
View file

@ -0,0 +1,141 @@
#ifndef TREE_SITTER_CLOCK_H_
#define TREE_SITTER_CLOCK_H_
#include <stdint.h>
typedef uint64_t TSDuration;
#ifdef _WIN32
// Windows:
// * Represent a time as a performance counter value.
// * Represent a duration as a number of performance counter ticks.
#include <windows.h>
typedef uint64_t TSClock;
static inline TSDuration duration_from_micros(uint64_t micros) {
LARGE_INTEGER frequency;
QueryPerformanceFrequency(&frequency);
return micros * (uint64_t)frequency.QuadPart / 1000000;
}
static inline uint64_t duration_to_micros(TSDuration self) {
LARGE_INTEGER frequency;
QueryPerformanceFrequency(&frequency);
return self * 1000000 / (uint64_t)frequency.QuadPart;
}
static inline TSClock clock_null(void) {
return 0;
}
static inline TSClock clock_now(void) {
LARGE_INTEGER result;
QueryPerformanceCounter(&result);
return (uint64_t)result.QuadPart;
}
static inline TSClock clock_after(TSClock base, TSDuration duration) {
return base + duration;
}
static inline bool clock_is_null(TSClock self) {
return !self;
}
static inline bool clock_is_gt(TSClock self, TSClock other) {
return self > other;
}
#elif defined(CLOCK_MONOTONIC)
// POSIX with monotonic clock support (Linux, macOS >= 10.12)
// * Represent a time as a monotonic (seconds, nanoseconds) pair.
// * Represent a duration as a number of microseconds.
//
// On these platforms, parse timeouts will correspond accurately to
// real time, regardless of what other processes are running.
#include <time.h>
typedef struct timespec TSClock;
static inline TSDuration duration_from_micros(uint64_t micros) {
return micros;
}
static inline uint64_t duration_to_micros(TSDuration self) {
return self;
}
static inline TSClock clock_now(void) {
TSClock result;
clock_gettime(CLOCK_MONOTONIC, &result);
return result;
}
static inline TSClock clock_null(void) {
return (TSClock) {0, 0};
}
static inline TSClock clock_after(TSClock base, TSDuration duration) {
TSClock result = base;
result.tv_sec += duration / 1000000;
result.tv_nsec += (duration % 1000000) * 1000;
return result;
}
static inline bool clock_is_null(TSClock self) {
return !self.tv_sec;
}
static inline bool clock_is_gt(TSClock self, TSClock other) {
if (self.tv_sec > other.tv_sec) return true;
if (self.tv_sec < other.tv_sec) return false;
return self.tv_nsec > other.tv_nsec;
}
#else
// POSIX without monotonic clock support
// * Represent a time as a process clock value.
// * Represent a duration as a number of process clock ticks.
//
// On these platforms, parse timeouts may be affected by other processes,
// which is not ideal, but is better than using a non-monotonic time API
// like `gettimeofday`.
#include <time.h>
typedef uint64_t TSClock;
static inline TSDuration duration_from_micros(uint64_t micros) {
return micros * (uint64_t)CLOCKS_PER_SEC / 1000000;
}
static inline uint64_t duration_to_micros(TSDuration self) {
return self * 1000000 / (uint64_t)CLOCKS_PER_SEC;
}
static inline TSClock clock_null(void) {
return 0;
}
static inline TSClock clock_now(void) {
return (uint64_t)clock();
}
static inline TSClock clock_after(TSClock base, TSDuration duration) {
return base + duration;
}
static inline bool clock_is_null(TSClock self) {
return !self;
}
static inline bool clock_is_gt(TSClock self, TSClock other) {
return self > other;
}
#endif
#endif // TREE_SITTER_CLOCK_H_

View file

@ -326,13 +326,13 @@ static inline void iterator_print_state(Iterator *self) {
TreeCursorEntry entry = *array_back(&self->cursor.stack);
TSPoint start = iterator_start_position(self).extent;
TSPoint end = iterator_end_position(self).extent;
const char *name = ts_language_symbol_name(self->language, entry.subtree->symbol);
const char *name = ts_language_symbol_name(self->language, ts_subtree_symbol(*entry.subtree));
printf(
"(%-25s %s\t depth:%u [%u, %u] - [%u, %u])",
name, self->in_padding ? "(p)" : " ",
self->visible_depth,
start.row, start.column,
end.row, end.column
start.row + 1, start.column,
end.row + 1, end.column
);
}
#endif
@ -361,7 +361,7 @@ unsigned ts_subtree_get_changed_ranges(const Subtree *old_tree, const Subtree *n
do {
#ifdef DEBUG_GET_CHANGED_RANGES
printf("At [%-2u, %-2u] Compare ", position.extent.row, position.extent.column);
printf("At [%-2u, %-2u] Compare ", position.extent.row + 1, position.extent.column);
iterator_print_state(&old_iter);
printf("\tvs\t");
iterator_print_state(&new_iter);
@ -443,8 +443,8 @@ unsigned ts_subtree_get_changed_ranges(const Subtree *old_tree, const Subtree *n
#ifdef DEBUG_GET_CHANGED_RANGES
printf(
" change: [[%u, %u] - [%u, %u]]\n",
position.extent.row, position.extent.column,
next_position.extent.row, next_position.extent.column
position.extent.row + 1, position.extent.column,
next_position.extent.row + 1, next_position.extent.column
);
#endif

View file

@ -36,7 +36,7 @@ static inline Length length_sub(Length len1, Length len2) {
return result;
}
static inline Length length_zero() {
static inline Length length_zero(void) {
Length result = {0, {0, 0}};
return result;
}

View file

@ -4,7 +4,7 @@
// - include
// - utf8proc
#define _POSIX_SOURCE
#define _POSIX_C_SOURCE 200112L
#define UTF8PROC_STATIC
#include "./get_changed_ranges.c"

View file

@ -22,7 +22,7 @@ TSNode ts_node_new(const TSTree *tree, const Subtree *subtree, Length position,
};
}
static inline TSNode ts_node__null() {
static inline TSNode ts_node__null(void) {
return ts_node_new(NULL, NULL, length_zero(), 0);
}

View file

@ -1,19 +1,22 @@
#include <time.h>
#include <assert.h>
#include <stdio.h>
#include <limits.h>
#include <stdbool.h>
#include "tree_sitter/api.h"
#include "./subtree.h"
#include "./lexer.h"
#include "./length.h"
#include "./array.h"
#include "./language.h"
#include "./alloc.h"
#include "./stack.h"
#include "./reusable_node.h"
#include "./reduce_action.h"
#include "./array.h"
#include "./atomic.h"
#include "./clock.h"
#include "./error_costs.h"
#include "./get_changed_ranges.h"
#include "./language.h"
#include "./length.h"
#include "./lexer.h"
#include "./reduce_action.h"
#include "./reusable_node.h"
#include "./stack.h"
#include "./subtree.h"
#include "./tree.h"
#define LOG(...) \
@ -42,6 +45,7 @@ static const unsigned MAX_VERSION_COUNT = 6;
static const unsigned MAX_VERSION_COUNT_OVERFLOW = 4;
static const unsigned MAX_SUMMARY_DEPTH = 16;
static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE;
static const unsigned OP_COUNT_PER_TIMEOUT_CHECK = 100;
typedef struct {
Subtree token;
@ -62,9 +66,11 @@ struct TSParser {
ReusableNode reusable_node;
void *external_scanner_payload;
FILE *dot_graph_file;
TSClock end_clock;
TSDuration timeout_duration;
unsigned accept_count;
size_t operation_limit;
volatile bool enabled;
unsigned operation_count;
const volatile size_t *cancellation_flag;
bool halt_on_error;
Subtree old_tree;
TSRangeArray included_range_differences;
@ -327,7 +333,7 @@ static Subtree ts_parser__lex(TSParser *self, StackVersion version, TSStateId pa
LOG(
"lex_external state:%d, row:%u, column:%u",
lex_mode.external_lex_state,
current_position.extent.row,
current_position.extent.row + 1,
current_position.extent.column
);
ts_lexer_start(&self->lexer);
@ -365,7 +371,7 @@ static Subtree ts_parser__lex(TSParser *self, StackVersion version, TSStateId pa
LOG(
"lex_internal state:%d, row:%u, column:%u",
lex_mode.lex_state,
current_position.extent.row,
current_position.extent.row + 1,
current_position.extent.column
);
ts_lexer_start(&self->lexer);
@ -1242,7 +1248,11 @@ static void ts_parser__recover(TSParser *self, StackVersion version, Subtree loo
}
}
static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_node_reuse) {
static bool ts_parser__advance(
TSParser *self,
StackVersion version,
bool allow_node_reuse
) {
TSStateId state = ts_stack_state(self->stack, version);
uint32_t position = ts_stack_position(self->stack, version).bytes;
Subtree last_external_token = ts_stack_last_external_token(self->stack, version);
@ -1274,6 +1284,17 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_
}
for (;;) {
if (++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) {
self->operation_count = 0;
if (
(self->cancellation_flag && atomic_load(self->cancellation_flag)) ||
(!clock_is_null(self->end_clock) && clock_is_gt(clock_now(), self->end_clock))
) {
ts_subtree_release(&self->tree_pool, lookahead);
return false;
}
}
StackVersion last_reduction_version = STACK_VERSION_NONE;
for (uint32_t i = 0; i < table_entry.action_count; i++) {
@ -1302,7 +1323,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_
ts_parser__shift(self, version, next_state, lookahead, action.params.extra);
if (did_reuse) reusable_node_advance(&self->reusable_node);
return;
return true;
}
case TSParseActionTypeReduce: {
@ -1322,7 +1343,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_
case TSParseActionTypeAccept: {
LOG("accept");
ts_parser__accept(self, version, lookahead);
return;
return true;
}
case TSParseActionTypeRecover: {
@ -1332,7 +1353,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_
ts_parser__recover(self, version, lookahead);
if (did_reuse) reusable_node_advance(&self->reusable_node);
return;
return true;
}
}
}
@ -1371,7 +1392,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_
if (state == ERROR_STATE) {
ts_parser__recover(self, version, lookahead);
return;
return true;
}
if (ts_parser__breakdown_top_of_stack(self, version)) {
@ -1381,7 +1402,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_
LOG("detect_error");
ts_stack_pause(self->stack, version, ts_subtree_leaf_symbol(lookahead));
ts_subtree_release(&self->tree_pool, lookahead);
return;
return true;
}
}
@ -1480,7 +1501,7 @@ static bool ts_parser_has_outstanding_parse(TSParser *self) {
// Parser - Public
TSParser *ts_parser_new() {
TSParser *ts_parser_new(void) {
TSParser *self = ts_calloc(1, sizeof(TSParser));
ts_lexer_init(&self->lexer);
array_init(&self->reduce_actions);
@ -1491,8 +1512,10 @@ TSParser *ts_parser_new() {
self->reusable_node = reusable_node_new();
self->dot_graph_file = NULL;
self->halt_on_error = false;
self->enabled = true;
self->operation_limit = SIZE_MAX;
self->cancellation_flag = NULL;
self->timeout_duration = 0;
self->end_clock = clock_null();
self->operation_count = 0;
self->old_tree = NULL_SUBTREE;
self->scratch_tree.ptr = &self->scratch_tree_data;
self->included_range_differences = (TSRangeArray) array_new();
@ -1569,20 +1592,20 @@ void ts_parser_halt_on_error(TSParser *self, bool should_halt_on_error) {
self->halt_on_error = should_halt_on_error;
}
bool ts_parser_enabled(const TSParser *self) {
return self->enabled;
const size_t *ts_parser_cancellation_flag(const TSParser *self) {
return (const size_t *)self->cancellation_flag;
}
void ts_parser_set_enabled(TSParser *self, bool enabled) {
self->enabled = enabled;
void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag) {
self->cancellation_flag = (const volatile size_t *)flag;
}
size_t ts_parser_operation_limit(const TSParser *self) {
return self->operation_limit;
uint64_t ts_parser_timeout_micros(const TSParser *self) {
return duration_to_micros(self->timeout_duration);
}
void ts_parser_set_operation_limit(TSParser *self, size_t limit) {
self->operation_limit = limit;
void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros) {
self->timeout_duration = duration_from_micros(timeout_micros);
}
void ts_parser_set_included_ranges(TSParser *self, const TSRange *ranges, uint32_t count) {
@ -1645,24 +1668,26 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) {
}
uint32_t position = 0, last_position = 0, version_count = 0;
size_t operation_count = 0;
self->operation_count = 0;
if (self->timeout_duration) {
self->end_clock = clock_after(clock_now(), self->timeout_duration);
} else {
self->end_clock = clock_null();
}
do {
for (StackVersion version = 0;
version_count = ts_stack_version_count(self->stack), version < version_count;
version++) {
if (operation_count > self->operation_limit || !self->enabled) return NULL;
operation_count++;
bool allow_node_reuse = version_count == 1;
while (ts_stack_is_active(self->stack, version)) {
LOG("process version:%d, version_count:%u, state:%d, row:%u, col:%u",
version, ts_stack_version_count(self->stack),
ts_stack_state(self->stack, version),
ts_stack_position(self->stack, version).extent.row,
ts_stack_position(self->stack, version).extent.row + 1,
ts_stack_position(self->stack, version).extent.column);
ts_parser__advance(self, version, allow_node_reuse);
if (!ts_parser__advance(self, version, allow_node_reuse)) return NULL;
LOG_STACK();
position = ts_stack_position(self->stack, version).bytes;

View file

@ -11,7 +11,7 @@ typedef struct {
Subtree last_external_token;
} ReusableNode;
static inline ReusableNode reusable_node_new() {
static inline ReusableNode reusable_node_new(void) {
return (ReusableNode) {array_new(), NULL_SUBTREE};
}

View file

@ -712,9 +712,9 @@ void ts_stack_clear(Stack *self) {
}
bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) {
array_reserve(&self->iterators, 32);
bool was_recording_allocations = ts_toggle_allocation_recording(false);
if (!f)
f = stderr;
if (!f) f = stderr;
fprintf(f, "digraph stack {\n");
fprintf(f, "rankdir=\"RL\";\n");
@ -785,7 +785,7 @@ bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f)
fprintf(
f,
" tooltip=\"position: %u,%u\nnode_count:%u\nerror_cost: %u\ndynamic_precedence: %d\"];\n",
node->position.extent.row,
node->position.extent.row + 1,
node->position.extent.column,
node->node_count,
node->error_cost,

View file

@ -92,11 +92,11 @@ StackSummary *ts_stack_get_summary(Stack *, StackVersion);
// Get the total cost of all errors on the given version of the stack.
unsigned ts_stack_error_cost(const Stack *, StackVersion version);
// Determine whether the given two stack versions can be merged.
bool ts_stack_merge(Stack *, StackVersion, StackVersion);
// Merge the given two stack versions if possible, returning true
// if they were successfully merged and false otherwise.
bool ts_stack_merge(Stack *, StackVersion, StackVersion);
// Determine whether the given two stack versions can be merged.
bool ts_stack_can_merge(Stack *, StackVersion, StackVersion);
TSSymbol ts_stack_resume(Stack *, StackVersion);

View file

@ -67,7 +67,7 @@ while getopts "dDghl:e:s:t:" option; do
esac
done
shift $(expr $OPTIND - 1 )
shift $(expr $OPTIND - 1)
if [[ -n $TREE_SITTER_TEST_LANGUAGE_FILTER || -n $TREE_SITTER_TEST_EXAMPLE_FILTER || -n $TREE_SITTER_TEST_TRIAL_FILTER ]]; then
top_level_filter=corpus
@ -76,8 +76,11 @@ else
fi
if [[ "${mode}" == "debug" ]]; then
test_binary=$(cargo test --no-run --package=tree-sitter-cli --lib --message-format=json 2> /dev/null | jq -rs '.[-1].filenames[0]')
lldb "${test_binary}" -- "${top_level_filter}"
test_binary=$(
cargo test -p tree-sitter-cli --no-run --message-format=json 2> /dev/null |\
jq -rs 'map(select(.target.name == "tree-sitter-cli" and .executable))[0].executable'
)
lldb "${test_binary}" -- $top_level_filter
else
cargo test --package=tree-sitter-cli --lib --jobs 1 $top_level_filter -- --nocapture
cargo test -p tree-sitter-cli --jobs 1 $top_level_filter -- --nocapture
fi

View file

@ -4,5 +4,5 @@ setlocal
set TREE_SITTER_TEST=1
set RUST_TEST_THREADS=1
set RUST_BACKTRACE=full
cargo test "%~1" -- --nocapture
cargo test -p tree-sitter-cli "%~1" -- --nocapture
endlocal