From 98e4fd22efb8e59086d4d612e91a21dff5854ca5 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 8 Mar 2019 13:13:02 -0800 Subject: [PATCH 01/33] Start work on a C API for syntax highlighting --- cli/src/tests/helpers/fixtures.rs | 8 +- cli/src/tests/highlight_test.rs | 93 +++++++- highlight/Cargo.toml | 3 + highlight/include/tree_sitter/highlight.h | 102 +++++++++ highlight/src/c_lib.rs | 248 ++++++++++++++++++++++ highlight/src/lib.rs | 14 +- 6 files changed, 463 insertions(+), 5 deletions(-) create mode 100644 highlight/include/tree_sitter/highlight.h create mode 100644 highlight/src/c_lib.rs diff --git a/cli/src/tests/helpers/fixtures.rs b/cli/src/tests/helpers/fixtures.rs index e7ba2e55..4389797e 100644 --- a/cli/src/tests/helpers/fixtures.rs +++ b/cli/src/tests/helpers/fixtures.rs @@ -21,12 +21,16 @@ pub fn get_language(name: &str) -> Language { .unwrap() } -pub fn get_property_sheet(language_name: &str, sheet_name: &str) -> PropertySheet { +pub fn get_property_sheet_json(language_name: &str, sheet_name: &str) -> String { let path = GRAMMARS_DIR .join(language_name) .join("src") .join(sheet_name); - let json = fs::read_to_string(path).unwrap(); + fs::read_to_string(path).unwrap() +} + +pub fn get_property_sheet(language_name: &str, sheet_name: &str) -> PropertySheet { + let json = get_property_sheet_json(language_name, sheet_name); let language = get_language(language_name); load_property_sheet(language, &json).unwrap() } diff --git a/cli/src/tests/highlight_test.rs b/cli/src/tests/highlight_test.rs index accca617..2c70f8cc 100644 --- a/cli/src/tests/highlight_test.rs +++ b/cli/src/tests/highlight_test.rs @@ -1,7 +1,9 @@ -use super::helpers::fixtures::{get_language, get_property_sheet}; +use super::helpers::fixtures::{get_language, get_property_sheet, get_property_sheet_json}; use lazy_static::lazy_static; +use std::ffi::CString; +use std::{ptr, slice, str}; use tree_sitter::{Language, PropertySheet}; -use tree_sitter_highlight::{highlight, highlight_html, HighlightEvent, Properties, Scope}; +use tree_sitter_highlight::{c, highlight, highlight_html, HighlightEvent, Properties, Scope}; lazy_static! { static ref JS_SHEET: PropertySheet = @@ -153,6 +155,93 @@ fn test_highlighting_empty_lines() { ); } +#[test] +fn test_highlighting_via_c_api() { + let js_lang = get_language("javascript"); + let html_lang = get_language("html"); + let js_sheet = get_property_sheet_json("javascript", "highlights.json"); + let js_sheet = c_string(&js_sheet); + let html_sheet = get_property_sheet_json("html", "highlights.json"); + let html_sheet = c_string(&html_sheet); + + let class_tag = c_string("class=tag"); + let class_function = c_string("class=function"); + let class_string = c_string("class=string"); + let class_keyword = c_string("class=keyword"); + + let js_scope_name = c_string("source.js"); + let html_scope_name = c_string("text.html.basic"); + let injection_regex = c_string("^(javascript|js)$"); + let source_code = c_string(""); + + let attribute_strings = &mut [ptr::null(); Scope::Unknown as usize + 1]; + attribute_strings[Scope::Tag as usize] = class_tag.as_ptr(); + attribute_strings[Scope::String as usize] = class_string.as_ptr(); + attribute_strings[Scope::Keyword as usize] = class_keyword.as_ptr(); + attribute_strings[Scope::Function as usize] = class_function.as_ptr(); + + let highlighter = c::ts_highlighter_new(attribute_strings.as_ptr()); + let buffer = c::ts_highlight_buffer_new(); + + c::ts_highlighter_add_language( + highlighter, + html_scope_name.as_ptr(), + html_lang, + html_sheet.as_ptr(), + ptr::null_mut(), + ); + c::ts_highlighter_add_language( + highlighter, + js_scope_name.as_ptr(), + js_lang, + js_sheet.as_ptr(), + injection_regex.as_ptr(), + ); + c::ts_highlighter_highlight( + highlighter, + html_scope_name.as_ptr(), + source_code.as_ptr(), + source_code.as_bytes().len() as u32, + buffer, + ); + + let output_bytes = c::ts_highlight_buffer_content(buffer); + let output_line_offsets = c::ts_highlight_buffer_line_offsets(buffer); + let output_len = c::ts_highlight_buffer_len(buffer); + let output_line_count = c::ts_highlight_buffer_line_count(buffer); + + let output_bytes = unsafe { slice::from_raw_parts(output_bytes, output_len as usize) }; + let output_line_offsets = + unsafe { slice::from_raw_parts(output_line_offsets, output_line_count as usize) }; + + let mut lines = Vec::new(); + for i in 0..(output_line_count as usize) { + let line_start = output_line_offsets[i] as usize; + let line_end = output_line_offsets + .get(i + 1) + .map(|x| *x as usize) + .unwrap_or(output_bytes.len()); + lines.push(str::from_utf8(&output_bytes[line_start..line_end]).unwrap()); + } + + assert_eq!( + lines, + vec![ + "<script>", + "const a = b('c');", + "c.d();", + "</script>", + ] + ); + + c::ts_highlighter_delete(highlighter); + c::ts_highlight_buffer_delete(buffer); +} + +fn c_string(s: &str) -> CString { + CString::new(s.as_bytes().to_vec()).unwrap() +} + fn test_language_for_injection_string<'a>( string: &str, ) -> Option<(Language, &'a PropertySheet)> { diff --git a/highlight/Cargo.toml b/highlight/Cargo.toml index 688a2f6c..cf807d9f 100644 --- a/highlight/Cargo.toml +++ b/highlight/Cargo.toml @@ -12,6 +12,9 @@ edition = "2018" keywords = ["incremental", "parsing", "syntax", "highlighting"] categories = ["parsing", "text-editors"] +[lib] +crate-type = ["lib", "staticlib"] + [dependencies] regex = "1" serde = "1.0" diff --git a/highlight/include/tree_sitter/highlight.h b/highlight/include/tree_sitter/highlight.h new file mode 100644 index 00000000..dd2f99c3 --- /dev/null +++ b/highlight/include/tree_sitter/highlight.h @@ -0,0 +1,102 @@ +#ifndef TREE_SITTER_HIGHLIGHT_H_ +#define TREE_SITTER_HIGHLIGHT_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +typedef enum { + TSHighlightOk, + TSHighlightUnknownScope, +} TSHighlightError; + +// The list of scopes which can be styled for syntax highlighting. +// When constructing a `TSHighlighter`, you need to construct an +// `attribute_strings` array whose elements correspond to these values. +enum TSHighlightScope { + TSHighlightScopeAttribute, + TSHighlightScopeComment, + TSHighlightScopeConstant, + TSHighlightScopeConstantBuiltin, + TSHighlightScopeConstructor, + TSHighlightScopeConstructorBuiltin, + TSHighlightScopeEmbedded, + TSHighlightScopeEscape, + TSHighlightScopeFunction, + TSHighlightScopeFunctionBuiltin, + TSHighlightScopeKeyword, + TSHighlightScopeNumber, + TSHighlightScopeOperator, + TSHighlightScopeProperty, + TSHighlightScopePropertyBuiltin, + TSHighlightScopePunctuation, + TSHighlightScopePunctuationBracket, + TSHighlightScopePunctuationDelimiter, + TSHighlightScopePunctuationSpecial, + TSHighlightScopeString, + TSHighlightScopeStringSpecial, + TSHighlightScopeTag, + TSHighlightScopeType, + TSHighlightScopeTypeBuiltin, + TSHighlightScopeVariable, + TSHighlightScopeVariableBuiltin, + TSHighlightScopeUnknown, +}; + +typedef struct TSHighlighter TSHighlighter; +typedef struct TSHighlightBuffer TSHighlightBuffer; + +// Construct a `TSHighlighter` by providing a list of strings containing +// the HTML attributes that should be applied for each highlight scope. +TSHighlighter *ts_highlighter_new( + const char **attribute_strings +); + +// Delete a syntax highlighter. +void ts_highlighter_delete(TSHighlighter *); + +// Add a `TSLanguage` to a highlighter. The language is associated with a +// scope name, which can be used later to select a language for syntax +// highlighting. Along with the language, you must provide a JSON string +// containing the compiled PropertySheet to use for syntax highlighting +// with that language. You can also optionally provide an 'injection regex', +// which is used to detect when this language has been embedded in a document +// written in a different language. +int ts_highlighter_add_language( + TSHighlighter *self, + const char *scope_name, + const TSLanguage *language, + const char *property_sheet_json, + const char *injection_regex +); + +// Compute syntax highlighting for a given document. You must first +// create a `TSHighlightBuffer` to hold the output. +int ts_highlighter_highlight( + TSHighlighter *self, + const char *scope_name, + const char *source_code, + uint32_t source_code_len, + TSHighlightBuffer *output +); + +// TSHighlightBuffer: This struct stores the HTML output of syntax +// highlighting. It can be reused for multiple highlighting calls. +TSHighlightBuffer *ts_highlight_buffer_new(); + +// Delete a highlight buffer. +void ts_highlight_buffer_delete(TSHighlightBuffer *); + +// Access the HTML content of a highlight buffer. +const uint8_t *ts_highlight_buffer_content(const TSHighlightBuffer *); +const uint32_t *ts_highlight_buffer_line_offsets(const TSHighlightBuffer *); +uint32_t ts_highlight_buffer_len(const TSHighlightBuffer *); +uint32_t ts_highlight_buffer_line_count(const TSHighlightBuffer *); + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_HIGHLIGHT_H_ diff --git a/highlight/src/c_lib.rs b/highlight/src/c_lib.rs new file mode 100644 index 00000000..f6b01984 --- /dev/null +++ b/highlight/src/c_lib.rs @@ -0,0 +1,248 @@ +use super::{escape, load_property_sheet, HighlightEvent, Highlighter, Properties, Scope}; +use regex::Regex; +use std::collections::HashMap; +use std::ffi::CStr; +use std::io::Write; +use std::os::raw::c_char; +use std::process::abort; +use std::{fmt, slice}; +use tree_sitter::{Language, PropertySheet}; + +struct LanguageConfiguration { + language: Language, + property_sheet: PropertySheet, + injection_regex: Option, +} + +pub struct TSHighlighter { + languages: HashMap, + attribute_strings: Vec<&'static [u8]>, +} + +pub struct TSHighlightBuffer { + html: Vec, + line_offsets: Vec, +} + +#[repr(C)] +pub enum ErrorCode { + Ok, + UnknownScope, +} + +#[no_mangle] +pub extern "C" fn ts_highlighter_new(attribute_strings: *const *const c_char) -> *mut TSHighlighter { + let attribute_strings = + unsafe { slice::from_raw_parts(attribute_strings, Scope::Unknown as usize + 1) }; + let attribute_strings = attribute_strings + .into_iter() + .map(|s| { + if s.is_null() { + &[] + } else { + unsafe { CStr::from_ptr(*s).to_bytes() } + } + }) + .collect(); + Box::into_raw(Box::new(TSHighlighter { + languages: HashMap::new(), + attribute_strings, + })) +} + +#[no_mangle] +pub extern "C" fn ts_highlight_buffer_new() -> *mut TSHighlightBuffer { + Box::into_raw(Box::new(TSHighlightBuffer { + html: Vec::new(), + line_offsets: Vec::new(), + })) +} + +#[no_mangle] +pub extern "C" fn ts_highlighter_delete(this: *mut TSHighlighter) { + drop(unsafe { Box::from_raw(this) }) +} + +#[no_mangle] +pub extern "C" fn ts_highlight_buffer_delete(this: *mut TSHighlightBuffer) { + drop(unsafe { Box::from_raw(this) }) +} + +#[no_mangle] +pub extern "C" fn ts_highlight_buffer_content(this: *mut TSHighlightBuffer) -> *const u8 { + let this = unwrap_ptr(this); + this.html.as_slice().as_ptr() +} + +#[no_mangle] +pub extern "C" fn ts_highlight_buffer_line_offsets(this: *mut TSHighlightBuffer) -> *const u32 { + let this = unwrap_ptr(this); + this.line_offsets.as_slice().as_ptr() +} + +#[no_mangle] +pub extern "C" fn ts_highlight_buffer_len(this: *mut TSHighlightBuffer) -> u32 { + let this = unwrap_ptr(this); + this.html.len() as u32 +} + +#[no_mangle] +pub extern "C" fn ts_highlight_buffer_line_count(this: *mut TSHighlightBuffer) -> u32 { + let this = unwrap_ptr(this); + this.line_offsets.len() as u32 +} + +#[no_mangle] +pub extern "C" fn ts_highlighter_add_language( + this: *mut TSHighlighter, + scope_name: *const c_char, + language: Language, + property_sheet_json: *const c_char, + injection_regex: *const c_char, +) -> ErrorCode { + let this = unwrap_ptr(this); + let scope_name = unsafe { CStr::from_ptr(scope_name) }; + let scope_name = unwrap(scope_name.to_str()).to_string(); + let property_sheet_json = unsafe { CStr::from_ptr(property_sheet_json) }; + let property_sheet_json = unwrap(property_sheet_json.to_str()); + + let property_sheet = unwrap(load_property_sheet(language, property_sheet_json)); + let injection_regex = if injection_regex.is_null() { + None + } else { + let pattern = unsafe { CStr::from_ptr(injection_regex) }; + Some(unwrap(Regex::new(unwrap(pattern.to_str())))) + }; + + this.languages.insert( + scope_name, + LanguageConfiguration { + language, + property_sheet, + injection_regex, + }, + ); + + ErrorCode::Ok +} + +#[no_mangle] +pub extern "C" fn ts_highlighter_highlight( + this: *mut TSHighlighter, + scope_name: *const c_char, + source_code: *const c_char, + source_code_len: u32, + output: *mut TSHighlightBuffer, +) -> ErrorCode { + let this = unwrap_ptr(this); + let output = unwrap_ptr(output); + let scope_name = unwrap(unsafe { CStr::from_ptr(scope_name).to_str() }); + let source_code = + unsafe { slice::from_raw_parts(source_code as *const u8, source_code_len as usize) }; + this.highlight(source_code, scope_name, output) +} + +impl TSHighlighter { + fn highlight( + &mut self, + source_code: &[u8], + scope_name: &str, + output: &mut TSHighlightBuffer, + ) -> ErrorCode { + let configuration = self.languages.get(scope_name); + if configuration.is_none() { + return ErrorCode::UnknownScope; + } + let configuration = configuration.unwrap(); + let languages = &self.languages; + + let highlighter = unwrap(Highlighter::new( + source_code, + configuration.language, + &configuration.property_sheet, + |injection_string| { + languages.values().find_map(|conf| { + conf.injection_regex.as_ref().and_then(|regex| { + if regex.is_match(injection_string) { + Some((conf.language, &conf.property_sheet)) + } else { + None + } + }) + }) + }, + )); + + output.html.clear(); + output.line_offsets.clear(); + output.line_offsets.push(0); + let mut scopes = Vec::new(); + for event in highlighter { + match event { + HighlightEvent::ScopeStart(s) => { + scopes.push(s); + output.start_scope(s, &self.attribute_strings); + } + HighlightEvent::ScopeEnd => { + scopes.pop(); + output.end_scope(); + } + HighlightEvent::Source(src) => { + output.add_text(src, &scopes, &self.attribute_strings); + } + }; + } + + ErrorCode::Ok + } +} + +impl TSHighlightBuffer { + fn start_scope(&mut self, s: Scope, attribute_strings: &[&[u8]]) { + let attribute_string = attribute_strings[s as usize]; + self.html.extend(b""); + } + + fn end_scope(&mut self) { + self.html.extend(b""); + } + + fn finish_line(&mut self) { + self.line_offsets.push(self.html.len() as u32); + } + + fn add_text(&mut self, src: &str, scopes: &Vec, attribute_strings: &[&[u8]]) { + let mut multiline = false; + for line in src.split('\n') { + let line = line.trim_end_matches('\r'); + if multiline { + scopes.iter().for_each(|_| self.end_scope()); + self.finish_line(); + scopes + .iter() + .for_each(|scope| self.start_scope(*scope, attribute_strings)); + } + write!(&mut self.html, "{}", escape::Escape(line)).unwrap(); + multiline = true; + } + } +} + +fn unwrap_ptr<'a, T>(result: *mut T) -> &'a mut T { + unsafe { result.as_mut() }.unwrap_or_else(|| { + eprintln!("{}:{} - pointer must not be null", file!(), line!()); + abort(); + }) +} + +fn unwrap(result: Result) -> T { + result.unwrap_or_else(|error| { + eprintln!("tree-sitter highlight error: {}", error); + abort(); + }) +} diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs index e5499fbc..25d8d59f 100644 --- a/highlight/src/lib.rs +++ b/highlight/src/lib.rs @@ -1,9 +1,11 @@ +pub mod c_lib; mod escape; +pub use c_lib as c; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use serde_derive::*; use std::cmp; -use std::fmt::Write; +use std::fmt::{self, Write}; use std::mem::transmute; use std::str; use std::usize; @@ -151,6 +153,16 @@ pub enum PropertySheetError { InvalidFormat(String), } +impl fmt::Display for PropertySheetError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + PropertySheetError::InvalidJSON(e) => e.fmt(f), + PropertySheetError::InvalidRegex(e) => e.fmt(f), + PropertySheetError::InvalidFormat(e) => e.fmt(f), + } + } +} + pub fn load_property_sheet( language: Language, json: &str, From 14a483f4c395bb151f6dd3f5a6a98ac8ab850c33 Mon Sep 17 00:00:00 2001 From: Joshua Warner Date: Fri, 8 Mar 2019 20:56:24 -0800 Subject: [PATCH 02/33] Simplify lifetimes in Node::children in the rust bindings --- lib/binding/lib.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/binding/lib.rs b/lib/binding/lib.rs index 1c7a6caf..3703e299 100644 --- a/lib/binding/lib.rs +++ b/lib/binding/lib.rs @@ -469,10 +469,11 @@ impl<'tree> Node<'tree> { unsafe { ffi::ts_node_child_count(self.0) as usize } } - pub fn children<'a>(&'a self) -> impl Iterator> + 'a { + pub fn children(&self) -> impl Iterator> { + let me = self.clone(); (0..self.child_count()) .into_iter() - .map(move |i| self.child(i).unwrap()) + .map(move |i| me.child(i).unwrap()) } pub fn named_child<'a>(&'a self, i: usize) -> Option { From 6f71b8840b196a6e5c6f4b6a6586e1f990a51690 Mon Sep 17 00:00:00 2001 From: Joshua Warner Date: Fri, 8 Mar 2019 21:22:13 -0800 Subject: [PATCH 03/33] Fix warning about undefined snprintf --- lib/src/lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/src/lib.c b/lib/src/lib.c index 104ded78..fc5fbc92 100644 --- a/lib/src/lib.c +++ b/lib/src/lib.c @@ -4,7 +4,7 @@ // - include // - utf8proc -#define _POSIX_SOURCE +#define _POSIX_C_SOURCE 200112L #define UTF8PROC_STATIC #include "./get_changed_ranges.c" From d19c8751345f2063fda116a03465a555bfe0e679 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 12 Mar 2019 11:50:16 -0700 Subject: [PATCH 04/33] cli: :arrow_up: rsass --- Cargo.lock | 6 +++--- cli/Cargo.toml | 2 +- cli/src/properties.rs | 22 ++++++++++++++++++++++ 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8215ca3d..52e2c3c3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -465,7 +465,7 @@ dependencies = [ [[package]] name = "rsass" -version = "0.9.6" +version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -653,7 +653,7 @@ dependencies = [ "rand 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", - "rsass 0.9.6 (registry+https://github.com/rust-lang/crates.io-index)", + "rsass 0.9.8 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", "serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)", @@ -795,7 +795,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "37e7cbbd370869ce2e8dff25c7018702d10b21a20ef7135316f8daecd6c25b7f" "checksum regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4e47a2ed29da7a9e1960e1639e7a982e6edc6d49be308a3b02daf511504a16d1" "checksum remove_dir_all 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3488ba1b9a2084d38645c4c08276a1752dcbf2c7130d74f1569681ad5d2799c5" -"checksum rsass 0.9.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7a5dde55023a6c19470f7aeb59f75f897d8b80cbe00d61dfcaf7bbbe3de4c0a6" +"checksum rsass 0.9.8 (registry+https://github.com/rust-lang/crates.io-index)" = "7f4534cc03040beacd2668621815f26fe57e5b7cfe085790f98e5e87c1612316" "checksum rustc-demangle 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "bcfe5b13211b4d78e5c2cadfebd7769197d95c639c35a50057eb4c05de811395" "checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" "checksum ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "eb9e9b8cde282a9fe6a42dd4681319bfb63f121b8a8ee9439c6f4107e58a46f7" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index b06f5c9d..037278fa 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -32,7 +32,7 @@ serde = "1.0" serde_derive = "1.0" regex-syntax = "0.6.4" regex = "1" -rsass = "0.9" +rsass = "0.9.8" [dependencies.tree-sitter] version = ">= 0.3.7" diff --git a/cli/src/properties.rs b/cli/src/properties.rs index a7dc9ec1..eae4a129 100644 --- a/cli/src/properties.rs +++ b/cli/src/properties.rs @@ -889,6 +889,28 @@ mod tests { ), ]) ); + + // Handle differently-formatted calls + let sheet2 = generate_property_sheet( + "foo.css", + " + a { + b: f(); + c: f( + g(h), + i, + \"j\", + 10 + ); + } + ", + ) + .unwrap(); + + assert_eq!( + query_simple(&sheet2, vec!["a"])["c"], + query_simple(&sheet, vec!["a"])["c"] + ); } #[test] From 2de54c101ebb7c65423be3e6a502875dc6db4b1f Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 12 Mar 2019 11:54:31 -0700 Subject: [PATCH 05/33] cli: put a newline character at the end of grammar.json files --- cli/src/generate/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs index 816d0613..4b2ae98b 100644 --- a/cli/src/generate/mod.rs +++ b/cli/src/generate/mod.rs @@ -143,7 +143,9 @@ fn load_js_grammar_file(grammar_path: &Path) -> Result { Some(code) => return Err(Error(format!("Node process exited with status {}", code))), } - Ok(String::from_utf8(output.stdout).expect("Got invalid UTF8 from node")) + let mut result = String::from_utf8(output.stdout).expect("Got invalid UTF8 from node"); + result.push('\n'); + Ok(result) } fn ensure_file>(path: &PathBuf, f: impl Fn() -> T) -> Result<()> { From 8a675d184c57cb7582bc0a3c7a9e367c425a437e Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 12 Mar 2019 17:14:41 -0700 Subject: [PATCH 06/33] test script: All tests are in the CLI package --- script/test | 2 +- script/test.cmd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/script/test b/script/test index 09cf9f83..1ec50e30 100755 --- a/script/test +++ b/script/test @@ -79,5 +79,5 @@ if [[ "${mode}" == "debug" ]]; then test_binary=$(cargo test --no-run --message-format=json 2> /dev/null | jq -rs '.[-1].filenames[0]') lldb "${test_binary}" -- $top_level_filter else - cargo test --jobs 1 $top_level_filter -- --nocapture + cargo test -p tree-sitter-cli --jobs 1 $top_level_filter -- --nocapture fi diff --git a/script/test.cmd b/script/test.cmd index d1b462e8..de1f8500 100644 --- a/script/test.cmd +++ b/script/test.cmd @@ -4,5 +4,5 @@ setlocal set TREE_SITTER_TEST=1 set RUST_TEST_THREADS=1 set RUST_BACKTRACE=full -cargo test "%~1" -- --nocapture +cargo test -p tree-sitter-cli "%~1" -- --nocapture endlocal From eabecafa8d4f99b5ea936fdd45f2fea662284e2c Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 12 Mar 2019 17:24:21 -0700 Subject: [PATCH 07/33] Highlight.highlight does not mutate --- highlight/src/c_lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/highlight/src/c_lib.rs b/highlight/src/c_lib.rs index f6b01984..919da8c5 100644 --- a/highlight/src/c_lib.rs +++ b/highlight/src/c_lib.rs @@ -144,7 +144,7 @@ pub extern "C" fn ts_highlighter_highlight( impl TSHighlighter { fn highlight( - &mut self, + &self, source_code: &[u8], scope_name: &str, output: &mut TSHighlightBuffer, From 2a5409feab5ada09a8cce7a1fbcf74d6b9a76fd6 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 13 Mar 2019 09:22:09 -0700 Subject: [PATCH 08/33] Indicate in C API that highlight method doesn't mutate Highlighter --- highlight/include/tree_sitter/highlight.h | 2 +- highlight/src/c_lib.rs | 27 +++++++++++++++-------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/highlight/include/tree_sitter/highlight.h b/highlight/include/tree_sitter/highlight.h index dd2f99c3..7b34aef9 100644 --- a/highlight/include/tree_sitter/highlight.h +++ b/highlight/include/tree_sitter/highlight.h @@ -75,7 +75,7 @@ int ts_highlighter_add_language( // Compute syntax highlighting for a given document. You must first // create a `TSHighlightBuffer` to hold the output. int ts_highlighter_highlight( - TSHighlighter *self, + const TSHighlighter *self, const char *scope_name, const char *source_code, uint32_t source_code_len, diff --git a/highlight/src/c_lib.rs b/highlight/src/c_lib.rs index 919da8c5..ce9f3936 100644 --- a/highlight/src/c_lib.rs +++ b/highlight/src/c_lib.rs @@ -31,7 +31,9 @@ pub enum ErrorCode { } #[no_mangle] -pub extern "C" fn ts_highlighter_new(attribute_strings: *const *const c_char) -> *mut TSHighlighter { +pub extern "C" fn ts_highlighter_new( + attribute_strings: *const *const c_char, +) -> *mut TSHighlighter { let attribute_strings = unsafe { slice::from_raw_parts(attribute_strings, Scope::Unknown as usize + 1) }; let attribute_strings = attribute_strings @@ -69,25 +71,25 @@ pub extern "C" fn ts_highlight_buffer_delete(this: *mut TSHighlightBuffer) { } #[no_mangle] -pub extern "C" fn ts_highlight_buffer_content(this: *mut TSHighlightBuffer) -> *const u8 { +pub extern "C" fn ts_highlight_buffer_content(this: *const TSHighlightBuffer) -> *const u8 { let this = unwrap_ptr(this); this.html.as_slice().as_ptr() } #[no_mangle] -pub extern "C" fn ts_highlight_buffer_line_offsets(this: *mut TSHighlightBuffer) -> *const u32 { +pub extern "C" fn ts_highlight_buffer_line_offsets(this: *const TSHighlightBuffer) -> *const u32 { let this = unwrap_ptr(this); this.line_offsets.as_slice().as_ptr() } #[no_mangle] -pub extern "C" fn ts_highlight_buffer_len(this: *mut TSHighlightBuffer) -> u32 { +pub extern "C" fn ts_highlight_buffer_len(this: *const TSHighlightBuffer) -> u32 { let this = unwrap_ptr(this); this.html.len() as u32 } #[no_mangle] -pub extern "C" fn ts_highlight_buffer_line_count(this: *mut TSHighlightBuffer) -> u32 { +pub extern "C" fn ts_highlight_buffer_line_count(this: *const TSHighlightBuffer) -> u32 { let this = unwrap_ptr(this); this.line_offsets.len() as u32 } @@ -100,7 +102,7 @@ pub extern "C" fn ts_highlighter_add_language( property_sheet_json: *const c_char, injection_regex: *const c_char, ) -> ErrorCode { - let this = unwrap_ptr(this); + let this = unwrap_mut_ptr(this); let scope_name = unsafe { CStr::from_ptr(scope_name) }; let scope_name = unwrap(scope_name.to_str()).to_string(); let property_sheet_json = unsafe { CStr::from_ptr(property_sheet_json) }; @@ -128,14 +130,14 @@ pub extern "C" fn ts_highlighter_add_language( #[no_mangle] pub extern "C" fn ts_highlighter_highlight( - this: *mut TSHighlighter, + this: *const TSHighlighter, scope_name: *const c_char, source_code: *const c_char, source_code_len: u32, output: *mut TSHighlightBuffer, ) -> ErrorCode { let this = unwrap_ptr(this); - let output = unwrap_ptr(output); + let output = unwrap_mut_ptr(output); let scope_name = unwrap(unsafe { CStr::from_ptr(scope_name).to_str() }); let source_code = unsafe { slice::from_raw_parts(source_code as *const u8, source_code_len as usize) }; @@ -233,7 +235,14 @@ impl TSHighlightBuffer { } } -fn unwrap_ptr<'a, T>(result: *mut T) -> &'a mut T { +fn unwrap_ptr<'a, T>(result: *const T) -> &'a T { + unsafe { result.as_ref() }.unwrap_or_else(|| { + eprintln!("{}:{} - pointer must not be null", file!(), line!()); + abort(); + }) +} + +fn unwrap_mut_ptr<'a, T>(result: *mut T) -> &'a mut T { unsafe { result.as_mut() }.unwrap_or_else(|| { eprintln!("{}:{} - pointer must not be null", file!(), line!()); abort(); From 083e813218c7f58c6724d5c2e5634e081b611412 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 13 Mar 2019 15:28:08 -0700 Subject: [PATCH 09/33] Highlight: fix handling of root node injections like in ERB, EJS --- cli/src/tests/highlight_test.rs | 26 ++++++++++++++++++++++++ highlight/src/lib.rs | 36 ++++++++++++++++++++++++++------- 2 files changed, 55 insertions(+), 7 deletions(-) diff --git a/cli/src/tests/highlight_test.rs b/cli/src/tests/highlight_test.rs index 2c70f8cc..2847cb71 100644 --- a/cli/src/tests/highlight_test.rs +++ b/cli/src/tests/highlight_test.rs @@ -10,6 +10,8 @@ lazy_static! { get_property_sheet("javascript", "highlights.json"); static ref HTML_SHEET: PropertySheet = get_property_sheet("html", "highlights.json"); + static ref EJS_SHEET: PropertySheet = + get_property_sheet("embedded-template", "highlights-ejs.json"); static ref SCOPE_CLASS_STRINGS: Vec = { let mut result = Vec::new(); let mut i = 0; @@ -155,6 +157,30 @@ fn test_highlighting_empty_lines() { ); } +#[test] +fn test_highlighting_ejs() { + let source = vec!["
<% foo() %>
"].join("\n"); + + assert_eq!( + &to_token_vector(&source, get_language("embedded-template"), &EJS_SHEET).unwrap(), + &[[ + ("<", vec![]), + ("div", vec![Scope::Tag]), + (">", vec![]), + ("<%", vec![Scope::Keyword]), + (" ", vec![]), + ("foo", vec![Scope::Function]), + ("(", vec![Scope::PunctuationBracket]), + (")", vec![Scope::PunctuationBracket]), + (" ", vec![]), + ("%>", vec![Scope::Keyword]), + ("", vec![]) + ]], + ); +} + #[test] fn test_highlighting_via_c_api() { let js_lang = get_language("javascript"); diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs index 25d8d59f..7af0efb3 100644 --- a/highlight/src/lib.rs +++ b/highlight/src/lib.rs @@ -4,11 +4,9 @@ mod escape; pub use c_lib as c; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use serde_derive::*; -use std::cmp; use std::fmt::{self, Write}; use std::mem::transmute; -use std::str; -use std::usize; +use std::{cmp, str, usize}; use tree_sitter::{Language, Node, Parser, Point, PropertySheet, Range, Tree, TreePropertyCursor}; #[derive(Debug)] @@ -80,6 +78,7 @@ struct Layer<'a> { cursor: TreePropertyCursor<'a, Properties>, ranges: Vec, at_node_end: bool, + depth: usize, } struct Highlighter<'a, T> @@ -163,6 +162,18 @@ impl fmt::Display for PropertySheetError { } } +impl<'a> fmt::Debug for Layer<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "Layer {{ at_node_end: {}, node: {:?} }}", + self.at_node_end, + self.cursor.node() + )?; + Ok(()) + } +} + pub fn load_property_sheet( language: Language, json: &str, @@ -387,6 +398,7 @@ where start_point: Point::new(0, 0), end_point: Point::new(usize::MAX, usize::MAX), }], + 0, )], utf8_error_len: None, }) @@ -566,7 +578,7 @@ where result } - fn add_layer(&mut self, language_string: &str, ranges: Vec) { + fn add_layer(&mut self, language_string: &str, ranges: Vec, depth: usize) { if let Some((language, property_sheet)) = (self.injection_callback)(language_string) { self.parser .set_language(language) @@ -576,7 +588,7 @@ where .parser .parse(self.source, None) .expect("Failed to parse"); - let layer = Layer::new(self.source, tree, property_sheet, ranges); + let layer = Layer::new(self.source, tree, property_sheet, ranges, depth); match self.layers.binary_search_by(|l| l.cmp(&layer)) { Ok(i) | Err(i) => self.layers.insert(i, layer), }; @@ -617,8 +629,9 @@ impl<'a, T: Fn(&str) -> Option<(Language, &'a PropertySheet)>> Itera }) .collect::>(); + let depth = first_layer.depth + 1; for (language, ranges) in injections { - self.add_layer(&language, ranges); + self.add_layer(&language, ranges, depth); } } @@ -647,7 +660,13 @@ impl<'a, T: Fn(&str) -> Option<(Language, &'a PropertySheet)>> Itera // to re-sort the layers. If the cursor is already at the end of its syntax tree, // remove it. if self.layers[0].advance() { - self.layers.sort_unstable_by(|a, b| a.cmp(&b)); + let mut index = 0; + while self.layers.get(index + 1).map_or(false, |next| { + self.layers[index].cmp(next) == cmp::Ordering::Greater + }) { + self.layers.swap(index, index + 1); + index += 1; + } } else { self.layers.remove(0); } @@ -671,6 +690,7 @@ impl<'a> Layer<'a> { tree: Tree, sheet: &'a PropertySheet, ranges: Vec, + depth: usize, ) -> Self { // The cursor's lifetime parameter indicates that the tree must outlive the cursor. // But because the tree is really a pointer to the heap, the cursor can remain @@ -681,6 +701,7 @@ impl<'a> Layer<'a> { _tree: tree, cursor, ranges, + depth, at_node_end: false, } } @@ -692,6 +713,7 @@ impl<'a> Layer<'a> { self.offset() .cmp(&other.offset()) .then_with(|| other.at_node_end.cmp(&self.at_node_end)) + .then_with(|| self.depth.cmp(&other.depth)) } fn offset(&self) -> usize { From abcac40f2d8c4aeb1b87eb58747e0eb024d90e0f Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 13 Mar 2019 15:51:27 -0700 Subject: [PATCH 10/33] cli: Add a --time flag to highlight subcommand --- cli/src/highlight.rs | 11 +++++++++++ cli/src/main.rs | 6 ++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/cli/src/highlight.rs b/cli/src/highlight.rs index 703c4053..e7bb8818 100644 --- a/cli/src/highlight.rs +++ b/cli/src/highlight.rs @@ -6,6 +6,7 @@ use serde::ser::SerializeMap; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use serde_json::{json, Value}; use std::collections::HashMap; +use std::time::Instant; use std::{fmt, fs, io, path}; use tree_sitter::{Language, PropertySheet}; use tree_sitter_highlight::{highlight, highlight_html, HighlightEvent, Properties, Scope}; @@ -254,10 +255,13 @@ pub fn ansi( source: &[u8], language: Language, property_sheet: &PropertySheet, + print_time: bool, ) -> Result<()> { use std::io::Write; let stdout = io::stdout(); let mut stdout = stdout.lock(); + + let time = Instant::now(); let mut scope_stack = Vec::new(); for event in highlight(source, language, property_sheet, |s| { language_for_injection_string(loader, s) @@ -278,6 +282,13 @@ pub fn ansi( } } } + + if print_time { + let duration = time.elapsed(); + let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000; + eprintln!("{} ms", duration_ms); + } + Ok(()) } diff --git a/cli/src/main.rs b/cli/src/main.rs index 3769efa0..15499622 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -76,7 +76,8 @@ fn run() -> error::Result<()> { .required(true), ) .arg(Arg::with_name("scope").long("scope").takes_value(true)) - .arg(Arg::with_name("html").long("html").short("h")), + .arg(Arg::with_name("html").long("html").short("h")) + .arg(Arg::with_name("time").long("time").short("t")), ) .get_matches(); @@ -167,6 +168,7 @@ fn run() -> error::Result<()> { } else if let Some(matches) = matches.subcommand_matches("highlight") { let paths = matches.values_of("path").unwrap().into_iter(); let html_mode = matches.is_present("html"); + let time = matches.is_present("time"); loader.find_all_languages(&config.parser_directories)?; if html_mode { @@ -201,7 +203,7 @@ fn run() -> error::Result<()> { if html_mode { highlight::html(&loader, &config.theme, &source, language, sheet)?; } else { - highlight::ansi(&loader, &config.theme, &source, language, sheet)?; + highlight::ansi(&loader, &config.theme, &source, language, sheet, time)?; } } else { return Err(error::Error(format!( From e0f0043edd0064136d889f454d85eb2c2f146443 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 13 Mar 2019 16:31:08 -0700 Subject: [PATCH 11/33] Highlight: 0.1.5 --- Cargo.lock | 4 ++-- highlight/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 52e2c3c3..896f22f2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -661,12 +661,12 @@ dependencies = [ "spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "tempfile 3.0.7 (registry+https://github.com/rust-lang/crates.io-index)", "tree-sitter 0.3.9", - "tree-sitter-highlight 0.1.4", + "tree-sitter-highlight 0.1.5", ] [[package]] name = "tree-sitter-highlight" -version = "0.1.4" +version = "0.1.5" dependencies = [ "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/highlight/Cargo.toml b/highlight/Cargo.toml index cf807d9f..09b20774 100644 --- a/highlight/Cargo.toml +++ b/highlight/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter-highlight" description = "Library for performing syntax highlighting with Tree-sitter" -version = "0.1.4" +version = "0.1.5" authors = [ "Max Brunsfeld ", "Tim Clem " From cddb3e416d4014cfad833335879ccdfffdb56d5e Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 14 Mar 2019 11:13:38 -0700 Subject: [PATCH 12/33] Replace operation limit API with a clock-based timeout API --- cli/src/tests/parser_test.rs | 102 +++++++++++++++++++++------------- lib/binding/bindings.rs | 4 +- lib/binding/lib.rs | 18 ++++-- lib/include/tree_sitter/api.h | 4 +- lib/src/parser.c | 45 +++++++++------ 5 files changed, 109 insertions(+), 64 deletions(-) diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index 7947463a..6b7228dc 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -1,7 +1,7 @@ use super::helpers::edits::{perform_edit, Edit, ReadRecorder}; use super::helpers::fixtures::{get_language, get_test_language}; use crate::generate::generate_parser_for_grammar; -use std::{thread, usize}; +use std::{thread, time}; use tree_sitter::{InputEdit, LogType, Parser, Point, Range}; #[test] @@ -269,84 +269,108 @@ fn test_parsing_on_multiple_threads() { assert_eq!(child_count_differences, &[1, 2, 3, 4]); } -// Operation limits +// Timeouts #[test] -fn test_parsing_with_an_operation_limit() { +fn test_parsing_with_a_timeout() { let mut parser = Parser::new(); parser.set_language(get_language("json")).unwrap(); - // Start parsing from an infinite input. Parsing should abort after 5 "operations". - parser.set_operation_limit(5); - let mut call_count = 0; + // Parse an infinitely-long string, but pause after 100 microseconds of processing. + parser.set_timeout_micros(200); + let start_time = time::Instant::now(); let tree = parser.parse_with( - &mut |_, _| { - if call_count == 0 { - call_count += 1; - b"[0" + &mut |offset, _| { + if offset == 0 { + b"\"" } else { - call_count += 1; - b", 0" + b"x" } }, None, ); assert!(tree.is_none()); - assert!(call_count >= 3); - assert!(call_count <= 8); + assert!(start_time.elapsed().as_micros() > 100); + assert!(start_time.elapsed().as_micros() < 300); - // Resume parsing from the previous state. - call_count = 0; - parser.set_operation_limit(20); + // Continue parsing, but pause after 300 microseconds of processing. + parser.set_timeout_micros(400); + let start_time = time::Instant::now(); + let tree = parser.parse_with( + &mut |offset, _| { + if offset == 0 { + b"\"" + } else { + b"x" + } + }, + None, + ); + assert!(tree.is_none()); + assert!(start_time.elapsed().as_micros() > 300); + assert!(start_time.elapsed().as_micros() < 500); + + // Finish parsing + parser.set_timeout_micros(1_000_000); let tree = parser .parse_with( - &mut |_, _| { - if call_count == 0 { - call_count += 1; - b"]" - } else { + &mut |offset, _| { + if offset > 1000 { b"" + } else if offset == 1000 { + b"\"" + } else { + b"y" } }, None, ) .unwrap(); - assert_eq!( - tree.root_node().to_sexp(), - "(value (array (number) (number) (number)))" - ); + assert_eq!(tree.root_node().to_sexp(), "(value (string))"); } #[test] -fn test_parsing_with_a_reset_after_reaching_an_operation_limit() { +fn test_parsing_with_a_timeout_and_a_reset() { let mut parser = Parser::new(); parser.set_language(get_language("json")).unwrap(); - parser.set_operation_limit(3); - let tree = parser.parse("[1234, 5, 6, 7, 8]", None); + parser.set_timeout_micros(30); + let tree = parser.parse( + "[\"ok\", 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]", + None, + ); assert!(tree.is_none()); // Without calling reset, the parser continues from where it left off, so // it does not see the changes to the beginning of the source code. - parser.set_operation_limit(usize::MAX); - let tree = parser.parse("[null, 5, 6, 4, 5]", None).unwrap(); + parser.set_timeout_micros(0); + let tree = parser.parse( + "[null, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]", + None, + ).unwrap(); assert_eq!( - tree.root_node().to_sexp(), - "(value (array (number) (number) (number) (number) (number)))" + tree.root_node().named_child(0).unwrap().named_child(0).unwrap().kind(), + "string" ); - parser.set_operation_limit(3); - let tree = parser.parse("[1234, 5, 6, 7, 8]", None); + parser.set_timeout_micros(30); + let tree = parser.parse( + "[\"ok\", 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]", + None, + ); assert!(tree.is_none()); // By calling reset, we force the parser to start over from scratch so // that it sees the changes to the beginning of the source code. - parser.set_operation_limit(usize::MAX); + parser.set_timeout_micros(0); parser.reset(); - let tree = parser.parse("[null, 5, 6, 4, 5]", None).unwrap(); + let tree = parser.parse( + "[null, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]", + None, + ).unwrap(); assert_eq!( - tree.root_node().to_sexp(), - "(value (array (null) (number) (number) (number) (number)))" + tree.root_node().named_child(0).unwrap().named_child(0).unwrap().kind(), + "null" ); } diff --git a/lib/binding/bindings.rs b/lib/binding/bindings.rs index 9d1f3490..41999088 100644 --- a/lib/binding/bindings.rs +++ b/lib/binding/bindings.rs @@ -143,10 +143,10 @@ extern "C" { pub fn ts_parser_set_enabled(arg1: *mut TSParser, arg2: bool); } extern "C" { - pub fn ts_parser_operation_limit(arg1: *const TSParser) -> usize; + pub fn ts_parser_timeout_micros(arg1: *const TSParser) -> usize; } extern "C" { - pub fn ts_parser_set_operation_limit(arg1: *mut TSParser, arg2: usize); + pub fn ts_parser_set_timeout_micros(arg1: *mut TSParser, arg2: usize); } extern "C" { pub fn ts_parser_reset(arg1: *mut TSParser); diff --git a/lib/binding/lib.rs b/lib/binding/lib.rs index 3703e299..9f8f1dec 100644 --- a/lib/binding/lib.rs +++ b/lib/binding/lib.rs @@ -230,7 +230,10 @@ impl Parser { pub fn parse(&mut self, input: impl AsRef<[u8]>, old_tree: Option<&Tree>) -> Option { let bytes = input.as_ref(); let len = bytes.len(); - self.parse_with(&mut |i, _| if i < len { &bytes[i..] } else { &[] }, old_tree) + self.parse_with( + &mut |i, _| if i < len { &bytes[i..] } else { &[] }, + old_tree, + ) } pub fn parse_utf16( @@ -240,7 +243,10 @@ impl Parser { ) -> Option { let code_points = input.as_ref(); let len = code_points.len(); - self.parse_utf16_with(&mut |i, _| if i < len { &code_points[i..] } else { &[] }, old_tree) + self.parse_utf16_with( + &mut |i, _| if i < len { &code_points[i..] } else { &[] }, + old_tree, + ) } pub fn parse_with<'a, T: FnMut(usize, Point) -> &'a [u8]>( @@ -317,8 +323,12 @@ impl Parser { unsafe { ffi::ts_parser_reset(self.0) } } - pub fn set_operation_limit(&mut self, limit: usize) { - unsafe { ffi::ts_parser_set_operation_limit(self.0, limit) } + pub fn timeout_micros(&self) -> usize { + unsafe { ffi::ts_parser_timeout_micros(self.0) } + } + + pub fn set_timeout_micros(&mut self, timeout_micros: usize) { + unsafe { ffi::ts_parser_set_timeout_micros(self.0, timeout_micros) } } pub fn set_included_ranges(&mut self, ranges: &[Range]) { diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 16841c8e..5c72e7b1 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -90,8 +90,8 @@ TSTree *ts_parser_parse_string(TSParser *, const TSTree *, const char *, uint32_ TSTree *ts_parser_parse_string_encoding(TSParser *, const TSTree *, const char *, uint32_t, TSInputEncoding); bool ts_parser_enabled(const TSParser *); void ts_parser_set_enabled(TSParser *, bool); -size_t ts_parser_operation_limit(const TSParser *); -void ts_parser_set_operation_limit(TSParser *, size_t); +size_t ts_parser_timeout_micros(const TSParser *); +void ts_parser_set_timeout_micros(TSParser *, size_t); void ts_parser_reset(TSParser *); void ts_parser_set_included_ranges(TSParser *, const TSRange *, uint32_t); const TSRange *ts_parser_included_ranges(const TSParser *, uint32_t *); diff --git a/lib/src/parser.c b/lib/src/parser.c index 85452f8d..0808b786 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -42,6 +43,7 @@ static const unsigned MAX_VERSION_COUNT = 6; static const unsigned MAX_VERSION_COUNT_OVERFLOW = 4; static const unsigned MAX_SUMMARY_DEPTH = 16; static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE; +static const unsigned CLOCKS_PER_MICROSECOND = CLOCKS_PER_SEC / 1000000; typedef struct { Subtree token; @@ -63,7 +65,8 @@ struct TSParser { void *external_scanner_payload; FILE *dot_graph_file; unsigned accept_count; - size_t operation_limit; + clock_t clock_limit; + clock_t start_clock; volatile bool enabled; bool halt_on_error; Subtree old_tree; @@ -1242,7 +1245,11 @@ static void ts_parser__recover(TSParser *self, StackVersion version, Subtree loo } } -static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_node_reuse) { +static bool ts_parser__advance( + TSParser *self, + StackVersion version, + bool allow_node_reuse +) { TSStateId state = ts_stack_state(self->stack, version); uint32_t position = ts_stack_position(self->stack, version).bytes; Subtree last_external_token = ts_stack_last_external_token(self->stack, version); @@ -1274,6 +1281,11 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_ } for (;;) { + if ((size_t)(clock() - self->start_clock) > self->clock_limit || !self->enabled) { + ts_subtree_release(&self->tree_pool, lookahead); + return false; + } + StackVersion last_reduction_version = STACK_VERSION_NONE; for (uint32_t i = 0; i < table_entry.action_count; i++) { @@ -1302,7 +1314,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_ ts_parser__shift(self, version, next_state, lookahead, action.params.extra); if (did_reuse) reusable_node_advance(&self->reusable_node); - return; + return true; } case TSParseActionTypeReduce: { @@ -1322,7 +1334,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_ case TSParseActionTypeAccept: { LOG("accept"); ts_parser__accept(self, version, lookahead); - return; + return true; } case TSParseActionTypeRecover: { @@ -1332,7 +1344,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_ ts_parser__recover(self, version, lookahead); if (did_reuse) reusable_node_advance(&self->reusable_node); - return; + return true; } } } @@ -1371,7 +1383,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_ if (state == ERROR_STATE) { ts_parser__recover(self, version, lookahead); - return; + return true; } if (ts_parser__breakdown_top_of_stack(self, version)) { @@ -1381,7 +1393,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_ LOG("detect_error"); ts_stack_pause(self->stack, version, ts_subtree_leaf_symbol(lookahead)); ts_subtree_release(&self->tree_pool, lookahead); - return; + return true; } } @@ -1492,7 +1504,8 @@ TSParser *ts_parser_new() { self->dot_graph_file = NULL; self->halt_on_error = false; self->enabled = true; - self->operation_limit = SIZE_MAX; + self->clock_limit = SIZE_MAX; + self->start_clock = 0; self->old_tree = NULL_SUBTREE; self->scratch_tree.ptr = &self->scratch_tree_data; self->included_range_differences = (TSRangeArray) array_new(); @@ -1574,12 +1587,13 @@ void ts_parser_set_enabled(TSParser *self, bool enabled) { self->enabled = enabled; } -size_t ts_parser_operation_limit(const TSParser *self) { - return self->operation_limit; +size_t ts_parser_timeout_micros(const TSParser *self) { + return self->clock_limit / CLOCKS_PER_MICROSECOND; } -void ts_parser_set_operation_limit(TSParser *self, size_t limit) { - self->operation_limit = limit; +void ts_parser_set_timeout_micros(TSParser *self, size_t timeout_micros) { + self->clock_limit = timeout_micros * CLOCKS_PER_MICROSECOND; + if (self->clock_limit == 0) self->clock_limit = SIZE_MAX; } void ts_parser_set_included_ranges(TSParser *self, const TSRange *ranges, uint32_t count) { @@ -1642,15 +1656,12 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) { } uint32_t position = 0, last_position = 0, version_count = 0; - size_t operation_count = 0; + self->start_clock = clock(); do { for (StackVersion version = 0; version_count = ts_stack_version_count(self->stack), version < version_count; version++) { - if (operation_count > self->operation_limit || !self->enabled) return NULL; - operation_count++; - bool allow_node_reuse = version_count == 1; while (ts_stack_is_active(self->stack, version)) { LOG("process version:%d, version_count:%u, state:%d, row:%u, col:%u", @@ -1659,7 +1670,7 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) { ts_stack_position(self->stack, version).extent.row, ts_stack_position(self->stack, version).extent.column); - ts_parser__advance(self, version, allow_node_reuse); + if (!ts_parser__advance(self, version, allow_node_reuse)) return NULL; LOG_STACK(); position = ts_stack_position(self->stack, version).bytes; From 430f8874eab63cd8bf1856468a4114a5ce58386f Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 14 Mar 2019 11:52:25 -0700 Subject: [PATCH 13/33] Lib: reduce frequency of clock calls during parsing --- lib/src/parser.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/lib/src/parser.c b/lib/src/parser.c index 0808b786..5f850c72 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -44,6 +44,7 @@ static const unsigned MAX_VERSION_COUNT_OVERFLOW = 4; static const unsigned MAX_SUMMARY_DEPTH = 16; static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE; static const unsigned CLOCKS_PER_MICROSECOND = CLOCKS_PER_SEC / 1000000; +static const unsigned OP_COUNT_PER_TIMEOUT_CHECK = 100; typedef struct { Subtree token; @@ -67,6 +68,7 @@ struct TSParser { unsigned accept_count; clock_t clock_limit; clock_t start_clock; + unsigned operation_count; volatile bool enabled; bool halt_on_error; Subtree old_tree; @@ -1281,9 +1283,12 @@ static bool ts_parser__advance( } for (;;) { - if ((size_t)(clock() - self->start_clock) > self->clock_limit || !self->enabled) { - ts_subtree_release(&self->tree_pool, lookahead); - return false; + if (!self->enabled || ++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) { + self->operation_count = 0; + if (clock() - self->start_clock > self->clock_limit) { + ts_subtree_release(&self->tree_pool, lookahead); + return false; + } } StackVersion last_reduction_version = STACK_VERSION_NONE; @@ -1506,6 +1511,7 @@ TSParser *ts_parser_new() { self->enabled = true; self->clock_limit = SIZE_MAX; self->start_clock = 0; + self->operation_count = 0; self->old_tree = NULL_SUBTREE; self->scratch_tree.ptr = &self->scratch_tree_data; self->included_range_differences = (TSRangeArray) array_new(); @@ -1656,6 +1662,7 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) { } uint32_t position = 0, last_position = 0, version_count = 0; + self->operation_count = 0; self->start_clock = clock(); do { From e30e827c5ffd9db2ec5e5876535c2492d09a0639 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 14 Mar 2019 11:52:50 -0700 Subject: [PATCH 14/33] CLI: Add timeout flag to parse command --- cli/src/main.rs | 7 +- cli/src/parse.rs | 176 +++++++++++++++++++++++++---------------------- 2 files changed, 100 insertions(+), 83 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index 15499622..5ad072c6 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -52,7 +52,8 @@ fn run() -> error::Result<()> { .arg(Arg::with_name("debug").long("debug").short("d")) .arg(Arg::with_name("debug-graph").long("debug-graph").short("D")) .arg(Arg::with_name("quiet").long("quiet").short("q")) - .arg(Arg::with_name("time").long("time").short("t")), + .arg(Arg::with_name("time").long("time").short("t")) + .arg(Arg::with_name("timeout").long("timeout").takes_value(true)), ) .subcommand( SubCommand::with_name("test") @@ -132,6 +133,9 @@ fn run() -> error::Result<()> { let debug_graph = matches.is_present("debug-graph"); let quiet = matches.is_present("quiet"); let time = matches.is_present("time"); + let timeout = matches + .value_of("timeout") + .map_or(0, |t| usize::from_str_radix(t, 10).unwrap()); loader.find_all_languages(&config.parser_directories)?; let paths = matches .values_of("path") @@ -157,6 +161,7 @@ fn run() -> error::Result<()> { max_path_length, quiet, time, + timeout, debug, debug_graph, )?; diff --git a/cli/src/parse.rs b/cli/src/parse.rs index 2e8b3e4c..27b96c38 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -12,6 +12,7 @@ pub fn parse_file_at_path( max_path_length: usize, quiet: bool, print_time: bool, + timeout: usize, debug: bool, debug_graph: bool, ) -> Result { @@ -32,111 +33,122 @@ pub fn parse_file_at_path( }))); } + parser.set_timeout_micros(timeout); let time = Instant::now(); - let tree = parser - .parse(&source_code, None) - .expect("Incompatible language version"); + let tree = parser.parse(&source_code, None); let duration = time.elapsed(); let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000; - let mut cursor = tree.walk(); - let stdout = io::stdout(); let mut stdout = stdout.lock(); - if !quiet { - let mut needs_newline = false; - let mut indent_level = 0; - let mut did_visit_children = false; + if let Some(tree) = tree { + let mut cursor = tree.walk(); + + if !quiet { + let mut needs_newline = false; + let mut indent_level = 0; + let mut did_visit_children = false; + loop { + let node = cursor.node(); + let is_named = node.is_named(); + if did_visit_children { + if is_named { + stdout.write(b")")?; + needs_newline = true; + } + if cursor.goto_next_sibling() { + did_visit_children = false; + } else if cursor.goto_parent() { + did_visit_children = true; + indent_level -= 1; + } else { + break; + } + } else { + if is_named { + if needs_newline { + stdout.write(b"\n")?; + } + for _ in 0..indent_level { + stdout.write(b" ")?; + } + let start = node.start_position(); + let end = node.end_position(); + write!( + &mut stdout, + "({} [{}, {}] - [{}, {}]", + node.kind(), + start.row, + start.column, + end.row, + end.column + )?; + needs_newline = true; + } + if cursor.goto_first_child() { + did_visit_children = false; + indent_level += 1; + } else { + did_visit_children = true; + } + } + } + cursor.reset(tree.root_node()); + println!(""); + } + + let mut first_error = None; loop { let node = cursor.node(); - let is_named = node.is_named(); - if did_visit_children { - if is_named { - stdout.write(b")")?; - needs_newline = true; - } - if cursor.goto_next_sibling() { - did_visit_children = false; - } else if cursor.goto_parent() { - did_visit_children = true; - indent_level -= 1; + if node.has_error() { + if node.is_error() || node.is_missing() { + first_error = Some(node); + break; } else { + cursor.goto_first_child(); + } + } else if !cursor.goto_next_sibling() { + if !cursor.goto_parent() { break; } - } else { - if is_named { - if needs_newline { - stdout.write(b"\n")?; - } - for _ in 0..indent_level { - stdout.write(b" ")?; - } - let start = node.start_position(); - let end = node.end_position(); - write!( - &mut stdout, - "({} [{}, {}] - [{}, {}]", - node.kind(), - start.row, - start.column, - end.row, - end.column - )?; - needs_newline = true; - } - if cursor.goto_first_child() { - did_visit_children = false; - indent_level += 1; - } else { - did_visit_children = true; - } } } - cursor.reset(tree.root_node()); - println!(""); - } - let mut first_error = None; - loop { - let node = cursor.node(); - if node.has_error() { - if node.is_error() || node.is_missing() { - first_error = Some(node); - break; - } else { - cursor.goto_first_child(); - } - } else if !cursor.goto_next_sibling() { - if !cursor.goto_parent() { - break; + if first_error.is_some() || print_time { + write!( + &mut stdout, + "{:width$}\t{} ms", + path.to_str().unwrap(), + duration_ms, + width = max_path_length + )?; + if let Some(node) = first_error { + let start = node.start_position(); + let end = node.end_position(); + write!( + &mut stdout, + "\t({} [{}, {}] - [{}, {}])", + node.kind(), + start.row, + start.column, + end.row, + end.column + )?; } + write!(&mut stdout, "\n")?; } - } - if first_error.is_some() || print_time { - write!( + return Ok(first_error.is_some()) + } else if print_time { + writeln!( &mut stdout, - "{:width$}\t{} ms", + "{:width$}\t{} ms (timed out)", path.to_str().unwrap(), duration_ms, width = max_path_length )?; - if let Some(node) = first_error { - let start = node.start_position(); - let end = node.end_position(); - write!( - &mut stdout, - "\t({} [{}, {}] - [{}, {}])", - node.kind(), - start.row, - start.column, - end.row, - end.column - )?; - } - write!(&mut stdout, "\n")?; } - Ok(first_error.is_some()) + Ok(false) } From 88e3907cc08c999f55cf8d01dd4d83953c75ace0 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 14 Mar 2019 12:39:04 -0700 Subject: [PATCH 15/33] Use QueryPerformanceFrequency as clock on windows --- cli/src/main.rs | 26 +++++++++++++++--------- cli/src/parse.rs | 2 +- lib/binding/bindings.rs | 5 ++--- lib/binding/lib.rs | 4 ++-- lib/include/tree_sitter/api.h | 4 ++-- lib/src/clock.h | 34 +++++++++++++++++++++++++++++++ lib/src/parser.c | 38 +++++++++++++++++------------------ 7 files changed, 76 insertions(+), 37 deletions(-) create mode 100644 lib/src/clock.h diff --git a/cli/src/main.rs b/cli/src/main.rs index 5ad072c6..dc4b5ae6 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -3,7 +3,7 @@ use std::env; use std::fs; use std::path::Path; use std::process::exit; -use std::usize; +use std::{u64, usize}; use tree_sitter_cli::{ config, error, generate, highlight, loader, logger, parse, properties, test, }; @@ -49,6 +49,7 @@ fn run() -> error::Result<()> { .multiple(true) .required(true), ) + .arg(Arg::with_name("scope").long("scope").takes_value(true)) .arg(Arg::with_name("debug").long("debug").short("d")) .arg(Arg::with_name("debug-graph").long("debug-graph").short("D")) .arg(Arg::with_name("quiet").long("quiet").short("q")) @@ -135,7 +136,7 @@ fn run() -> error::Result<()> { let time = matches.is_present("time"); let timeout = matches .value_of("timeout") - .map_or(0, |t| usize::from_str_radix(t, 10).unwrap()); + .map_or(0, |t| u64::from_str_radix(t, 10).unwrap()); loader.find_all_languages(&config.parser_directories)?; let paths = matches .values_of("path") @@ -146,15 +147,20 @@ fn run() -> error::Result<()> { let mut has_error = false; for path in paths { let path = Path::new(path); - let language = - if let Some((l, _)) = loader.language_configuration_for_file_name(path)? { - l - } else if let Some(l) = loader.language_at_path(¤t_dir)? { - l + let language = if let Some(scope) = matches.value_of("scope") { + if let Some(config) = loader.language_configuration_for_scope(scope)? { + config.0 } else { - eprintln!("No language found"); - return Ok(()); - }; + return Err(error::Error(format!("Unknown scope '{}'", scope))); + } + } else if let Some((l, _)) = loader.language_configuration_for_file_name(path)? { + l + } else if let Some(l) = loader.language_at_path(¤t_dir)? { + l + } else { + eprintln!("No language found"); + return Ok(()); + }; has_error |= parse::parse_file_at_path( language, path, diff --git a/cli/src/parse.rs b/cli/src/parse.rs index 27b96c38..f4002233 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -12,7 +12,7 @@ pub fn parse_file_at_path( max_path_length: usize, quiet: bool, print_time: bool, - timeout: usize, + timeout: u64, debug: bool, debug_graph: bool, ) -> Result { diff --git a/lib/binding/bindings.rs b/lib/binding/bindings.rs index 41999088..7c8c704a 100644 --- a/lib/binding/bindings.rs +++ b/lib/binding/bindings.rs @@ -1,6 +1,5 @@ /* automatically generated by rust-bindgen */ -pub type __darwin_size_t = ::std::os::raw::c_ulong; pub type FILE = [u64; 19usize]; pub type TSSymbol = u16; #[repr(C)] @@ -143,10 +142,10 @@ extern "C" { pub fn ts_parser_set_enabled(arg1: *mut TSParser, arg2: bool); } extern "C" { - pub fn ts_parser_timeout_micros(arg1: *const TSParser) -> usize; + pub fn ts_parser_timeout_micros(arg1: *const TSParser) -> u64; } extern "C" { - pub fn ts_parser_set_timeout_micros(arg1: *mut TSParser, arg2: usize); + pub fn ts_parser_set_timeout_micros(arg1: *mut TSParser, arg2: u64); } extern "C" { pub fn ts_parser_reset(arg1: *mut TSParser); diff --git a/lib/binding/lib.rs b/lib/binding/lib.rs index 9f8f1dec..f4f161a6 100644 --- a/lib/binding/lib.rs +++ b/lib/binding/lib.rs @@ -323,11 +323,11 @@ impl Parser { unsafe { ffi::ts_parser_reset(self.0) } } - pub fn timeout_micros(&self) -> usize { + pub fn timeout_micros(&self) -> u64 { unsafe { ffi::ts_parser_timeout_micros(self.0) } } - pub fn set_timeout_micros(&mut self, timeout_micros: usize) { + pub fn set_timeout_micros(&mut self, timeout_micros: u64) { unsafe { ffi::ts_parser_set_timeout_micros(self.0, timeout_micros) } } diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 5c72e7b1..e16ca576 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -90,8 +90,8 @@ TSTree *ts_parser_parse_string(TSParser *, const TSTree *, const char *, uint32_ TSTree *ts_parser_parse_string_encoding(TSParser *, const TSTree *, const char *, uint32_t, TSInputEncoding); bool ts_parser_enabled(const TSParser *); void ts_parser_set_enabled(TSParser *, bool); -size_t ts_parser_timeout_micros(const TSParser *); -void ts_parser_set_timeout_micros(TSParser *, size_t); +uint64_t ts_parser_timeout_micros(const TSParser *); +void ts_parser_set_timeout_micros(TSParser *, uint64_t); void ts_parser_reset(TSParser *); void ts_parser_set_included_ranges(TSParser *, const TSRange *, uint32_t); const TSRange *ts_parser_included_ranges(const TSParser *, uint32_t *); diff --git a/lib/src/clock.h b/lib/src/clock.h new file mode 100644 index 00000000..3193a6b3 --- /dev/null +++ b/lib/src/clock.h @@ -0,0 +1,34 @@ +#ifndef TREE_SITTER_CLOCK_H_ +#define TREE_SITTER_CLOCK_H_ + +#include + +#ifdef _WIN32 + +#include + +static inline uint64_t get_clock() { + LARGE_INTEGER result; + QueryPerformanceCounter(&result); + return (uint64_t)result.QuadPart; +} + +static inline uint64_t get_clocks_per_second() { + LARGE_INTEGER result; + QueryPerformanceFrequency(&result); + return (uint64_t)result.QuadPart; +} + +#else + +static inline uint64_t get_clock() { + return (uint64_t)clock(); +} + +static inline uint64_t get_clocks_per_second() { + return (uint64_t)CLOCKS_PER_SEC; +} + +#endif + +#endif // TREE_SITTER_CLOCK_H_ diff --git a/lib/src/parser.c b/lib/src/parser.c index 5f850c72..15b33d54 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -4,17 +4,18 @@ #include #include #include "tree_sitter/api.h" -#include "./subtree.h" -#include "./lexer.h" -#include "./length.h" -#include "./array.h" -#include "./language.h" #include "./alloc.h" -#include "./stack.h" -#include "./reusable_node.h" -#include "./reduce_action.h" +#include "./array.h" +#include "./clock.h" #include "./error_costs.h" #include "./get_changed_ranges.h" +#include "./language.h" +#include "./length.h" +#include "./lexer.h" +#include "./reduce_action.h" +#include "./reusable_node.h" +#include "./stack.h" +#include "./subtree.h" #include "./tree.h" #define LOG(...) \ @@ -43,7 +44,6 @@ static const unsigned MAX_VERSION_COUNT = 6; static const unsigned MAX_VERSION_COUNT_OVERFLOW = 4; static const unsigned MAX_SUMMARY_DEPTH = 16; static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE; -static const unsigned CLOCKS_PER_MICROSECOND = CLOCKS_PER_SEC / 1000000; static const unsigned OP_COUNT_PER_TIMEOUT_CHECK = 100; typedef struct { @@ -66,8 +66,8 @@ struct TSParser { void *external_scanner_payload; FILE *dot_graph_file; unsigned accept_count; - clock_t clock_limit; - clock_t start_clock; + uint64_t clock_limit; + uint64_t start_clock; unsigned operation_count; volatile bool enabled; bool halt_on_error; @@ -1285,7 +1285,7 @@ static bool ts_parser__advance( for (;;) { if (!self->enabled || ++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) { self->operation_count = 0; - if (clock() - self->start_clock > self->clock_limit) { + if ((uint64_t)(get_clock() - self->start_clock) > self->clock_limit) { ts_subtree_release(&self->tree_pool, lookahead); return false; } @@ -1509,7 +1509,7 @@ TSParser *ts_parser_new() { self->dot_graph_file = NULL; self->halt_on_error = false; self->enabled = true; - self->clock_limit = SIZE_MAX; + self->clock_limit = UINT64_MAX; self->start_clock = 0; self->operation_count = 0; self->old_tree = NULL_SUBTREE; @@ -1593,13 +1593,13 @@ void ts_parser_set_enabled(TSParser *self, bool enabled) { self->enabled = enabled; } -size_t ts_parser_timeout_micros(const TSParser *self) { - return self->clock_limit / CLOCKS_PER_MICROSECOND; +uint64_t ts_parser_timeout_micros(const TSParser *self) { + return self->clock_limit / (get_clocks_per_second() / 1000000); } -void ts_parser_set_timeout_micros(TSParser *self, size_t timeout_micros) { - self->clock_limit = timeout_micros * CLOCKS_PER_MICROSECOND; - if (self->clock_limit == 0) self->clock_limit = SIZE_MAX; +void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros) { + self->clock_limit = timeout_micros * (get_clocks_per_second() / 1000000); + if (self->clock_limit == 0) self->clock_limit = UINT64_MAX; } void ts_parser_set_included_ranges(TSParser *self, const TSRange *ranges, uint32_t count) { @@ -1663,7 +1663,7 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) { uint32_t position = 0, last_position = 0, version_count = 0; self->operation_count = 0; - self->start_clock = clock(); + self->start_clock = get_clock(); do { for (StackVersion version = 0; From 006a931ab8967f6c9c89f668d4608ec119065b24 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 14 Mar 2019 13:59:09 -0700 Subject: [PATCH 16/33] Tests: Prevent array reallocations during ts_stack_print_dot_graph When debugging a test with 'script/test -D', the DOT-graph generation code was sometimes causing reallocations that were not captured by the allocation tracker, because we explicitly disable allocation-tracking for that method in order to reduce noise when debugging memory leaks. By growing the relevant array *prior* to turning off allocation tracking, we can ensure that it is not reallocated within that function, avoiding false positive memory leak errors. Fixes #302 --- cli/src/tests/helpers/allocations.rs | 6 ++++-- lib/src/alloc.h | 6 +----- lib/src/stack.c | 4 ++-- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/cli/src/tests/helpers/allocations.rs b/cli/src/tests/helpers/allocations.rs index ae246c40..c64762bd 100644 --- a/cli/src/tests/helpers/allocations.rs +++ b/cli/src/tests/helpers/allocations.rs @@ -98,7 +98,9 @@ extern "C" fn ts_record_free(ptr: *mut c_void) { } #[no_mangle] -extern "C" fn ts_record_allocations_toggle() { +extern "C" fn ts_toggle_allocation_recording(enabled: bool) -> bool { let mut recorder = RECORDER.lock(); - recorder.enabled = !recorder.enabled; + let was_enabled = recorder.enabled; + recorder.enabled = enabled; + was_enabled } diff --git a/lib/src/alloc.h b/lib/src/alloc.h index 8e027a99..c8fe6c6e 100644 --- a/lib/src/alloc.h +++ b/lib/src/alloc.h @@ -15,7 +15,7 @@ void *ts_record_malloc(size_t); void *ts_record_calloc(size_t, size_t); void *ts_record_realloc(void *, size_t); void ts_record_free(void *); -bool ts_record_allocations_toggle(bool); +bool ts_toggle_allocation_recording(bool); static inline void *ts_malloc(size_t size) { return ts_record_malloc(size); @@ -33,10 +33,6 @@ static inline void ts_free(void *buffer) { ts_record_free(buffer); } -static inline bool ts_toggle_allocation_recording(bool value) { - return ts_record_allocations_toggle(value); -} - #else #include diff --git a/lib/src/stack.c b/lib/src/stack.c index e3a1f22d..9e351d4e 100644 --- a/lib/src/stack.c +++ b/lib/src/stack.c @@ -712,9 +712,9 @@ void ts_stack_clear(Stack *self) { } bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) { + array_reserve(&self->iterators, 32); bool was_recording_allocations = ts_toggle_allocation_recording(false); - if (!f) - f = stderr; + if (!f) f = stderr; fprintf(f, "digraph stack {\n"); fprintf(f, "rankdir=\"RL\";\n"); From 9ae594a50761fc7d5255d3767acb452e686eb085 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 14 Mar 2019 14:09:14 -0700 Subject: [PATCH 17/33] Be more loose with timeout unit test assertions --- cli/src/tests/parser_test.rs | 69 ++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 30 deletions(-) diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index 6b7228dc..afa86167 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -276,57 +276,56 @@ fn test_parsing_with_a_timeout() { let mut parser = Parser::new(); parser.set_language(get_language("json")).unwrap(); - // Parse an infinitely-long string, but pause after 100 microseconds of processing. - parser.set_timeout_micros(200); + // Parse an infinitely-long array, but pause after 100 microseconds of processing. + parser.set_timeout_micros(100); let start_time = time::Instant::now(); let tree = parser.parse_with( &mut |offset, _| { if offset == 0 { - b"\"" + b" [" } else { - b"x" + b",0" } }, None, ); assert!(tree.is_none()); - assert!(start_time.elapsed().as_micros() > 100); - assert!(start_time.elapsed().as_micros() < 300); - - // Continue parsing, but pause after 300 microseconds of processing. - parser.set_timeout_micros(400); - let start_time = time::Instant::now(); - let tree = parser.parse_with( - &mut |offset, _| { - if offset == 0 { - b"\"" - } else { - b"x" - } - }, - None, - ); - assert!(tree.is_none()); - assert!(start_time.elapsed().as_micros() > 300); assert!(start_time.elapsed().as_micros() < 500); + // Continue parsing, but pause after 300 microseconds of processing. + parser.set_timeout_micros(1000); + let start_time = time::Instant::now(); + let tree = parser.parse_with( + &mut |offset, _| { + if offset == 0 { + b" [" + } else { + b",0" + } + }, + None, + ); + assert!(tree.is_none()); + assert!(start_time.elapsed().as_micros() > 500); + assert!(start_time.elapsed().as_micros() < 1500); + // Finish parsing - parser.set_timeout_micros(1_000_000); + parser.set_timeout_micros(0); let tree = parser .parse_with( &mut |offset, _| { - if offset > 1000 { + if offset > 5000 { b"" - } else if offset == 1000 { - b"\"" + } else if offset == 5000 { + b"]" } else { - b"y" + b",0" } }, None, ) .unwrap(); - assert_eq!(tree.root_node().to_sexp(), "(value (string))"); + assert_eq!(tree.root_node().child(0).unwrap().kind(), "array"); } #[test] @@ -349,7 +348,12 @@ fn test_parsing_with_a_timeout_and_a_reset() { None, ).unwrap(); assert_eq!( - tree.root_node().named_child(0).unwrap().named_child(0).unwrap().kind(), + tree.root_node() + .named_child(0) + .unwrap() + .named_child(0) + .unwrap() + .kind(), "string" ); @@ -369,7 +373,12 @@ fn test_parsing_with_a_timeout_and_a_reset() { None, ).unwrap(); assert_eq!( - tree.root_node().named_child(0).unwrap().named_child(0).unwrap().kind(), + tree.root_node() + .named_child(0) + .unwrap() + .named_child(0) + .unwrap() + .kind(), "null" ); } From 59fd8528d408f3abd22e32e3695649317ac5b5d8 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 14 Mar 2019 15:21:03 -0700 Subject: [PATCH 18/33] Avoid division rounding errors w/ clock counts --- lib/src/parser.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/src/parser.c b/lib/src/parser.c index 15b33d54..7125faa9 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -1594,11 +1594,11 @@ void ts_parser_set_enabled(TSParser *self, bool enabled) { } uint64_t ts_parser_timeout_micros(const TSParser *self) { - return self->clock_limit / (get_clocks_per_second() / 1000000); + return self->clock_limit * 1000000 / get_clocks_per_second(); } void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros) { - self->clock_limit = timeout_micros * (get_clocks_per_second() / 1000000); + self->clock_limit = timeout_micros * get_clocks_per_second() / 1000000; if (self->clock_limit == 0) self->clock_limit = UINT64_MAX; } From 23dfde067ec28836fc5753370c99b020b9a086cf Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 14 Mar 2019 16:50:10 -0700 Subject: [PATCH 19/33] Get disabled debugging code compiling again Fixes #303 --- lib/src/get_changed_ranges.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/src/get_changed_ranges.c b/lib/src/get_changed_ranges.c index 8eb89d46..b1df72cc 100644 --- a/lib/src/get_changed_ranges.c +++ b/lib/src/get_changed_ranges.c @@ -326,7 +326,7 @@ static inline void iterator_print_state(Iterator *self) { TreeCursorEntry entry = *array_back(&self->cursor.stack); TSPoint start = iterator_start_position(self).extent; TSPoint end = iterator_end_position(self).extent; - const char *name = ts_language_symbol_name(self->language, entry.subtree->symbol); + const char *name = ts_language_symbol_name(self->language, ts_subtree_symbol(*entry.subtree)); printf( "(%-25s %s\t depth:%u [%u, %u] - [%u, %u])", name, self->in_padding ? "(p)" : " ", From c8d040ca26e50de818059cbd707085d5f53713de Mon Sep 17 00:00:00 2001 From: Jacob Mitchell Date: Thu, 14 Mar 2019 12:59:29 -0700 Subject: [PATCH 20/33] Use 1-indexed rows in CLI and log output (resolves #287) --- cli/src/tests/parser_test.rs | 4 ++++ cli/src/util.rs | 19 +++++++++++++++++++ docs/section-3-creating-parsers.md | 2 +- lib/src/get_changed_ranges.c | 10 +++++----- lib/src/parser.c | 6 +++--- lib/src/stack.c | 2 +- 6 files changed, 33 insertions(+), 10 deletions(-) diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index afa86167..01724daf 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -56,6 +56,10 @@ fn test_parsing_with_logging() { "reduce sym:struct_item, child_count:3".to_string() ))); assert!(messages.contains(&(LogType::Lex, "skip character:' '".to_string()))); + + for (_, m) in &messages { + assert!(!m.contains("row:0")); + } } #[test] diff --git a/cli/src/util.rs b/cli/src/util.rs index e880bea1..c4d4f9c9 100644 --- a/cli/src/util.rs +++ b/cli/src/util.rs @@ -55,6 +55,9 @@ impl Drop for LogSession { { Command::new("open").arg(&self.0).output().unwrap(); } + + #[cfg(any(debug_assertions, test))] + validate_graph_log(&self); } else { eprintln!( "Dot failed: {} {}", @@ -64,3 +67,19 @@ impl Drop for LogSession { } } } + +#[cfg(all(unix, any(debug_assertions, test)))] +fn validate_graph_log(session: &LogSession) { + use std::io::{BufRead, BufReader}; + + let has_zero_indexed_row = |s: &str| s.contains("position: 0,"); + + let graph_log = std::fs::File::open(&session.0) + .expect("Failed to open graph log"); + let log_reader = BufReader::new(graph_log) + .lines() + .map(|l| l.expect("Failed to read line from graph log")); + for line in log_reader { + assert!(!has_zero_indexed_row(&line), "Graph log output includes zero-indexed row: {}", line); + } +} diff --git a/docs/section-3-creating-parsers.md b/docs/section-3-creating-parsers.md index 1e7989fa..37bccb65 100644 --- a/docs/section-3-creating-parsers.md +++ b/docs/section-3-creating-parsers.md @@ -86,7 +86,7 @@ tree-sitter parse ./the-file This should print the following: ``` -(source_file [0, 0] - [0, 5]) +(source_file [1, 0] - [1, 5]) ``` You might notice that the first time you run `tree-sitter parse`, it takes a few seconds. This is because Tree-sitter automatically compiles your C code into a dynamically-loadable library. Whenever you make changes to your grammar, you can update the parser simply by re-running `tree-sitter generate`. When the parser changes, Tree-sitter will recompile it as needed. diff --git a/lib/src/get_changed_ranges.c b/lib/src/get_changed_ranges.c index b1df72cc..c618dc9e 100644 --- a/lib/src/get_changed_ranges.c +++ b/lib/src/get_changed_ranges.c @@ -331,8 +331,8 @@ static inline void iterator_print_state(Iterator *self) { "(%-25s %s\t depth:%u [%u, %u] - [%u, %u])", name, self->in_padding ? "(p)" : " ", self->visible_depth, - start.row, start.column, - end.row, end.column + start.row + 1, start.column, + end.row + 1, end.column ); } #endif @@ -361,7 +361,7 @@ unsigned ts_subtree_get_changed_ranges(const Subtree *old_tree, const Subtree *n do { #ifdef DEBUG_GET_CHANGED_RANGES - printf("At [%-2u, %-2u] Compare ", position.extent.row, position.extent.column); + printf("At [%-2u, %-2u] Compare ", position.extent.row + 1, position.extent.column); iterator_print_state(&old_iter); printf("\tvs\t"); iterator_print_state(&new_iter); @@ -443,8 +443,8 @@ unsigned ts_subtree_get_changed_ranges(const Subtree *old_tree, const Subtree *n #ifdef DEBUG_GET_CHANGED_RANGES printf( " change: [[%u, %u] - [%u, %u]]\n", - position.extent.row, position.extent.column, - next_position.extent.row, next_position.extent.column + position.extent.row + 1, position.extent.column, + next_position.extent.row + 1, next_position.extent.column ); #endif diff --git a/lib/src/parser.c b/lib/src/parser.c index 7125faa9..f5cdb3cc 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -332,7 +332,7 @@ static Subtree ts_parser__lex(TSParser *self, StackVersion version, TSStateId pa LOG( "lex_external state:%d, row:%u, column:%u", lex_mode.external_lex_state, - current_position.extent.row, + current_position.extent.row + 1, current_position.extent.column ); ts_lexer_start(&self->lexer); @@ -370,7 +370,7 @@ static Subtree ts_parser__lex(TSParser *self, StackVersion version, TSStateId pa LOG( "lex_internal state:%d, row:%u, column:%u", lex_mode.lex_state, - current_position.extent.row, + current_position.extent.row + 1, current_position.extent.column ); ts_lexer_start(&self->lexer); @@ -1674,7 +1674,7 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) { LOG("process version:%d, version_count:%u, state:%d, row:%u, col:%u", version, ts_stack_version_count(self->stack), ts_stack_state(self->stack, version), - ts_stack_position(self->stack, version).extent.row, + ts_stack_position(self->stack, version).extent.row + 1, ts_stack_position(self->stack, version).extent.column); if (!ts_parser__advance(self, version, allow_node_reuse)) return NULL; diff --git a/lib/src/stack.c b/lib/src/stack.c index 9e351d4e..73c06454 100644 --- a/lib/src/stack.c +++ b/lib/src/stack.c @@ -785,7 +785,7 @@ bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) fprintf( f, " tooltip=\"position: %u,%u\nnode_count:%u\nerror_cost: %u\ndynamic_precedence: %d\"];\n", - node->position.extent.row, + node->position.extent.row + 1, node->position.extent.column, node->node_count, node->error_cost, From 15b096d6951860fae40a04a25225e382f69007fd Mon Sep 17 00:00:00 2001 From: Jacob Mitchell Date: Thu, 14 Mar 2019 21:48:29 -0700 Subject: [PATCH 21/33] Extract graph log validation into a specialized test --- cli/src/tests/parser_test.rs | 23 +++++++++++++++++++++++ cli/src/util.rs | 19 ------------------- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index 01724daf..6afac6ab 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -62,6 +62,29 @@ fn test_parsing_with_logging() { } } +#[test] +#[cfg(unix)] +fn test_parsing_with_debug_graph_enabled() { + use std::io::{BufRead, BufReader, Seek}; + + let has_zero_indexed_row = |s: &str| s.contains("position: 0,"); + + let mut parser = Parser::new(); + parser.set_language(get_language("javascript")).unwrap(); + + let mut debug_graph_file = tempfile::tempfile().unwrap(); + parser.print_dot_graphs(&debug_graph_file); + parser.parse("const zero = 0", None).unwrap(); + + debug_graph_file.seek(std::io::SeekFrom::Start(0)).unwrap(); + let log_reader = BufReader::new(debug_graph_file) + .lines() + .map(|l| l.expect("Failed to read line from graph log")); + for line in log_reader { + assert!(!has_zero_indexed_row(&line), "Graph log output includes zero-indexed row: {}", line); + } +} + #[test] fn test_parsing_with_custom_utf8_input() { let mut parser = Parser::new(); diff --git a/cli/src/util.rs b/cli/src/util.rs index c4d4f9c9..e880bea1 100644 --- a/cli/src/util.rs +++ b/cli/src/util.rs @@ -55,9 +55,6 @@ impl Drop for LogSession { { Command::new("open").arg(&self.0).output().unwrap(); } - - #[cfg(any(debug_assertions, test))] - validate_graph_log(&self); } else { eprintln!( "Dot failed: {} {}", @@ -67,19 +64,3 @@ impl Drop for LogSession { } } } - -#[cfg(all(unix, any(debug_assertions, test)))] -fn validate_graph_log(session: &LogSession) { - use std::io::{BufRead, BufReader}; - - let has_zero_indexed_row = |s: &str| s.contains("position: 0,"); - - let graph_log = std::fs::File::open(&session.0) - .expect("Failed to open graph log"); - let log_reader = BufReader::new(graph_log) - .lines() - .map(|l| l.expect("Failed to read line from graph log")); - for line in log_reader { - assert!(!has_zero_indexed_row(&line), "Graph log output includes zero-indexed row: {}", line); - } -} From 0ae304f582bc1a5d96f4c1e33903d1f75e1b4553 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 15 Mar 2019 16:10:45 -0700 Subject: [PATCH 22/33] Lib: Rework the API for cancelling a parse Also, use beta on CI until atomic::AtomicU32 lands in stable. --- .appveyor.yml | 4 ++-- .travis.yml | 2 +- Cargo.lock | 2 ++ cli/src/tests/parser_test.rs | 39 ++++++++++++++++++++++++++++++++++- lib/binding/bindings.rs | 4 ++-- lib/binding/lib.rs | 19 ++++++++++++----- lib/include/tree_sitter/api.h | 4 ++-- lib/src/atomic.h | 8 +++++++ lib/src/parser.c | 23 ++++++++++++--------- 9 files changed, 82 insertions(+), 23 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index 610ac134..a2787be7 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -5,8 +5,8 @@ install: # Install rust - appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe - - IF "%PLATFORM%" == "x86" rustup-init -y --default-toolchain stable --default-host i686-pc-windows-msvc - - IF "%PLATFORM%" == "x64" rustup-init -y --default-toolchain stable --default-host x86_64-pc-windows-msvc + - IF "%PLATFORM%" == "x86" rustup-init -y --default-toolchain beta --default-host i686-pc-windows-msvc + - IF "%PLATFORM%" == "x64" rustup-init -y --default-toolchain beta --default-host x86_64-pc-windows-msvc - set PATH=%PATH%;C:\Users\appveyor\.cargo\bin - rustc -vV - cargo -vV diff --git a/.travis.yml b/.travis.yml index 06c71b34..b92cd1f2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,6 @@ language: rust rust: - - stable + - beta os: - linux diff --git a/Cargo.lock b/Cargo.lock index 896f22f2..688a2ac4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,3 +1,5 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. [[package]] name = "aho-corasick" version = "0.6.9" diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index 6afac6ab..c13bdd50 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -1,6 +1,7 @@ use super::helpers::edits::{perform_edit, Edit, ReadRecorder}; use super::helpers::fixtures::{get_language, get_test_language}; use crate::generate::generate_parser_for_grammar; +use std::sync::atomic::{AtomicU32, Ordering}; use std::{thread, time}; use tree_sitter::{InputEdit, LogType, Parser, Point, Range}; @@ -81,7 +82,11 @@ fn test_parsing_with_debug_graph_enabled() { .lines() .map(|l| l.expect("Failed to read line from graph log")); for line in log_reader { - assert!(!has_zero_indexed_row(&line), "Graph log output includes zero-indexed row: {}", line); + assert!( + !has_zero_indexed_row(&line), + "Graph log output includes zero-indexed row: {}", + line + ); } } @@ -296,6 +301,38 @@ fn test_parsing_on_multiple_threads() { assert_eq!(child_count_differences, &[1, 2, 3, 4]); } +#[test] +fn test_parsing_cancelled_by_another_thread() { + let cancellation_flag = AtomicU32::new(0); + + let mut parser = Parser::new(); + parser.set_language(get_language("javascript")).unwrap(); + unsafe { parser.set_cancellation_flag(Some(&cancellation_flag)) }; + + let parse_thread = thread::spawn(move || { + // Infinite input + parser.parse_with( + &mut |offset, _| { + if offset == 0 { + b" [" + } else { + b"0," + } + }, + None, + ) + }); + + let cancel_thread = thread::spawn(move || { + thread::sleep(time::Duration::from_millis(80)); + cancellation_flag.store(1, Ordering::Relaxed); + }); + + cancel_thread.join().unwrap(); + let tree = parse_thread.join().unwrap(); + assert!(tree.is_none()); +} + // Timeouts #[test] diff --git a/lib/binding/bindings.rs b/lib/binding/bindings.rs index 7c8c704a..b828c9e6 100644 --- a/lib/binding/bindings.rs +++ b/lib/binding/bindings.rs @@ -136,10 +136,10 @@ extern "C" { ) -> *mut TSTree; } extern "C" { - pub fn ts_parser_enabled(arg1: *const TSParser) -> bool; + pub fn ts_parser_cancellation_flag(arg1: *const TSParser) -> *const u32; } extern "C" { - pub fn ts_parser_set_enabled(arg1: *mut TSParser, arg2: bool); + pub fn ts_parser_set_cancellation_flag(arg1: *mut TSParser, arg2: *const u32); } extern "C" { pub fn ts_parser_timeout_micros(arg1: *const TSParser) -> u64; diff --git a/lib/binding/lib.rs b/lib/binding/lib.rs index f4f161a6..cc514c22 100644 --- a/lib/binding/lib.rs +++ b/lib/binding/lib.rs @@ -13,13 +13,10 @@ use regex::Regex; use serde::de::DeserializeOwned; use std::collections::HashMap; use std::ffi::CStr; -use std::fmt; use std::marker::PhantomData; use std::os::raw::{c_char, c_void}; -use std::ptr; -use std::slice; -use std::str; -use std::u16; +use std::sync::atomic::AtomicU32; +use std::{fmt, ptr, slice, str, u16}; pub const PARSER_HEADER: &'static str = include_str!("../include/tree_sitter/parser.h"); @@ -338,6 +335,18 @@ impl Parser { ffi::ts_parser_set_included_ranges(self.0, ts_ranges.as_ptr(), ts_ranges.len() as u32) }; } + + pub unsafe fn cancellation_flag(&self) -> Option<&AtomicU32> { + (ffi::ts_parser_cancellation_flag(self.0) as *const AtomicU32).as_ref() + } + + pub unsafe fn set_cancellation_flag(&self, flag: Option<&AtomicU32>) { + if let Some(flag) = flag { + ffi::ts_parser_set_cancellation_flag(self.0, flag as *const AtomicU32 as *const u32); + } else { + ffi::ts_parser_set_cancellation_flag(self.0, ptr::null()); + } + } } impl Drop for Parser { diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index e16ca576..cfc5393d 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -88,8 +88,8 @@ void ts_parser_halt_on_error(TSParser *, bool); TSTree *ts_parser_parse(TSParser *, const TSTree *, TSInput); TSTree *ts_parser_parse_string(TSParser *, const TSTree *, const char *, uint32_t); TSTree *ts_parser_parse_string_encoding(TSParser *, const TSTree *, const char *, uint32_t, TSInputEncoding); -bool ts_parser_enabled(const TSParser *); -void ts_parser_set_enabled(TSParser *, bool); +const uint32_t *ts_parser_cancellation_flag(const TSParser *); +void ts_parser_set_cancellation_flag(TSParser *, const uint32_t *); uint64_t ts_parser_timeout_micros(const TSParser *); void ts_parser_set_timeout_micros(TSParser *, uint64_t); void ts_parser_reset(TSParser *); diff --git a/lib/src/atomic.h b/lib/src/atomic.h index 78a4d7d8..89f40e48 100644 --- a/lib/src/atomic.h +++ b/lib/src/atomic.h @@ -7,6 +7,10 @@ #include +static inline uint32_t atomic_load(const volatile uint32_t *p) { + return *p; +} + static inline uint32_t atomic_inc(volatile uint32_t *p) { return InterlockedIncrement(p); } @@ -17,6 +21,10 @@ static inline uint32_t atomic_dec(volatile uint32_t *p) { #else +static inline uint32_t atomic_load(const volatile uint32_t *p) { + return __atomic_load_n(p, __ATOMIC_RELAXED); +} + static inline uint32_t atomic_inc(volatile uint32_t *p) { return __sync_add_and_fetch(p, 1u); } diff --git a/lib/src/parser.c b/lib/src/parser.c index f5cdb3cc..4bc455d4 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -6,6 +6,7 @@ #include "tree_sitter/api.h" #include "./alloc.h" #include "./array.h" +#include "./atomic.h" #include "./clock.h" #include "./error_costs.h" #include "./get_changed_ranges.h" @@ -69,7 +70,7 @@ struct TSParser { uint64_t clock_limit; uint64_t start_clock; unsigned operation_count; - volatile bool enabled; + const volatile uint32_t *cancellation_flag; bool halt_on_error; Subtree old_tree; TSRangeArray included_range_differences; @@ -1283,9 +1284,12 @@ static bool ts_parser__advance( } for (;;) { - if (!self->enabled || ++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) { + if (++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) { self->operation_count = 0; - if ((uint64_t)(get_clock() - self->start_clock) > self->clock_limit) { + if ( + (self->cancellation_flag && !atomic_load(self->cancellation_flag)) || + (self->clock_limit && get_clock() - self->start_clock > self->clock_limit) + ) { ts_subtree_release(&self->tree_pool, lookahead); return false; } @@ -1508,8 +1512,8 @@ TSParser *ts_parser_new() { self->reusable_node = reusable_node_new(); self->dot_graph_file = NULL; self->halt_on_error = false; - self->enabled = true; - self->clock_limit = UINT64_MAX; + self->cancellation_flag = NULL; + self->clock_limit = 0; self->start_clock = 0; self->operation_count = 0; self->old_tree = NULL_SUBTREE; @@ -1585,12 +1589,12 @@ void ts_parser_halt_on_error(TSParser *self, bool should_halt_on_error) { self->halt_on_error = should_halt_on_error; } -bool ts_parser_enabled(const TSParser *self) { - return self->enabled; +const uint32_t *ts_parser_cancellation_flag(const TSParser *self) { + return (const uint32_t *)self->cancellation_flag; } -void ts_parser_set_enabled(TSParser *self, bool enabled) { - self->enabled = enabled; +void ts_parser_set_cancellation_flag(TSParser *self, const uint32_t *flag) { + self->cancellation_flag = (const volatile uint32_t *)flag; } uint64_t ts_parser_timeout_micros(const TSParser *self) { @@ -1599,7 +1603,6 @@ uint64_t ts_parser_timeout_micros(const TSParser *self) { void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros) { self->clock_limit = timeout_micros * get_clocks_per_second() / 1000000; - if (self->clock_limit == 0) self->clock_limit = UINT64_MAX; } void ts_parser_set_included_ranges(TSParser *self, const TSRange *ranges, uint32_t count) { From 8941dc1dda8f418b974b9e4778a09c2d72f9c7ca Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 18 Mar 2019 09:52:02 -0700 Subject: [PATCH 23/33] Add cancellation flag parameter to highlight API --- cli/src/tests/highlight_test.rs | 1 + highlight/include/tree_sitter/highlight.h | 3 ++- highlight/src/c_lib.rs | 7 ++++- highlight/src/lib.rs | 31 ++++++++++++++++++----- 4 files changed, 34 insertions(+), 8 deletions(-) diff --git a/cli/src/tests/highlight_test.rs b/cli/src/tests/highlight_test.rs index 2847cb71..c4e86c6f 100644 --- a/cli/src/tests/highlight_test.rs +++ b/cli/src/tests/highlight_test.rs @@ -229,6 +229,7 @@ fn test_highlighting_via_c_api() { source_code.as_ptr(), source_code.as_bytes().len() as u32, buffer, + ptr::null_mut(), ); let output_bytes = c::ts_highlight_buffer_content(buffer); diff --git a/highlight/include/tree_sitter/highlight.h b/highlight/include/tree_sitter/highlight.h index 7b34aef9..458862b8 100644 --- a/highlight/include/tree_sitter/highlight.h +++ b/highlight/include/tree_sitter/highlight.h @@ -79,7 +79,8 @@ int ts_highlighter_highlight( const char *scope_name, const char *source_code, uint32_t source_code_len, - TSHighlightBuffer *output + TSHighlightBuffer *output, + const uint32_t *cancellation_flag ); // TSHighlightBuffer: This struct stores the HTML output of syntax diff --git a/highlight/src/c_lib.rs b/highlight/src/c_lib.rs index ce9f3936..a283b0f6 100644 --- a/highlight/src/c_lib.rs +++ b/highlight/src/c_lib.rs @@ -5,6 +5,7 @@ use std::ffi::CStr; use std::io::Write; use std::os::raw::c_char; use std::process::abort; +use std::sync::atomic::AtomicU32; use std::{fmt, slice}; use tree_sitter::{Language, PropertySheet}; @@ -135,13 +136,15 @@ pub extern "C" fn ts_highlighter_highlight( source_code: *const c_char, source_code_len: u32, output: *mut TSHighlightBuffer, + cancellation_flag: *const AtomicU32, ) -> ErrorCode { let this = unwrap_ptr(this); let output = unwrap_mut_ptr(output); let scope_name = unwrap(unsafe { CStr::from_ptr(scope_name).to_str() }); let source_code = unsafe { slice::from_raw_parts(source_code as *const u8, source_code_len as usize) }; - this.highlight(source_code, scope_name, output) + let cancellation_flag = unsafe { cancellation_flag.as_ref() }; + this.highlight(source_code, scope_name, output, cancellation_flag) } impl TSHighlighter { @@ -150,6 +153,7 @@ impl TSHighlighter { source_code: &[u8], scope_name: &str, output: &mut TSHighlightBuffer, + cancellation_flag: Option<&AtomicU32>, ) -> ErrorCode { let configuration = self.languages.get(scope_name); if configuration.is_none() { @@ -173,6 +177,7 @@ impl TSHighlighter { }) }) }, + cancellation_flag, )); output.html.clear(); diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs index 7af0efb3..66c52c86 100644 --- a/highlight/src/lib.rs +++ b/highlight/src/lib.rs @@ -6,9 +6,12 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; use serde_derive::*; use std::fmt::{self, Write}; use std::mem::transmute; +use std::sync::atomic::{AtomicU32, Ordering}; use std::{cmp, str, usize}; use tree_sitter::{Language, Node, Parser, Point, PropertySheet, Range, Tree, TreePropertyCursor}; +const CANCELLATION_CHECK_INTERVAL: usize = 100; + #[derive(Debug)] enum TreeStep { Child { @@ -91,6 +94,8 @@ where parser: Parser, layers: Vec>, utf8_error_len: Option, + operation_count: usize, + cancellation_flag: Option<&'a AtomicU32>, } #[derive(Copy, Clone, Debug)] @@ -377,17 +382,22 @@ where language: Language, property_sheet: &'a PropertySheet, injection_callback: F, + cancellation_flag: Option<&'a AtomicU32>, ) -> Result { let mut parser = Parser::new(); + unsafe { parser.set_cancellation_flag(cancellation_flag.clone()) }; parser.set_language(language)?; let tree = parser .parse(source, None) .ok_or_else(|| format!("Tree-sitter: failed to parse"))?; Ok(Self { - injection_callback, - source, - source_offset: 0, parser, + source, + cancellation_flag, + injection_callback, + source_offset: 0, + operation_count: 0, + utf8_error_len: None, layers: vec![Layer::new( source, tree, @@ -400,7 +410,6 @@ where }], 0, )], - utf8_error_len: None, }) } @@ -602,6 +611,16 @@ impl<'a, T: Fn(&str) -> Option<(Language, &'a PropertySheet)>> Itera type Item = HighlightEvent<'a>; fn next(&mut self) -> Option { + if let Some(cancellation_flag) = self.cancellation_flag { + self.operation_count += 1; + if self.operation_count >= CANCELLATION_CHECK_INTERVAL { + self.operation_count = 0; + if cancellation_flag.load(Ordering::Relaxed) != 0 { + return None; + } + } + } + if let Some(utf8_error_len) = self.utf8_error_len.take() { self.source_offset += utf8_error_len; return Some(HighlightEvent::Source("\u{FFFD}")); @@ -824,7 +843,7 @@ pub fn highlight<'a, F>( where F: Fn(&str) -> Option<(Language, &'a PropertySheet)> + 'a, { - Highlighter::new(source, language, property_sheet, injection_callback) + Highlighter::new(source, language, property_sheet, injection_callback, None) } pub fn highlight_html<'a, F1, F2>( @@ -838,7 +857,7 @@ where F1: Fn(&str) -> Option<(Language, &'a PropertySheet)>, F2: Fn(Scope) -> &'a str, { - let highlighter = Highlighter::new(source, language, property_sheet, injection_callback)?; + let highlighter = Highlighter::new(source, language, property_sheet, injection_callback, None)?; let mut renderer = HtmlRenderer::new(attribute_callback); let mut scopes = Vec::new(); for event in highlighter { From 3340168097bd0375958ecdf2fce8ad694bfb69be Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 20 Mar 2019 13:14:02 -0700 Subject: [PATCH 24/33] Fix backwards logic for cancellation flag --- cli/src/tests/parser_test.rs | 49 +++++++++++++++-------- highlight/include/tree_sitter/highlight.h | 1 + highlight/src/c_lib.rs | 48 ++++++++++++---------- lib/src/atomic.h | 2 +- lib/src/parser.c | 2 +- 5 files changed, 61 insertions(+), 41 deletions(-) diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index c13bdd50..63a4f3e8 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -303,33 +303,48 @@ fn test_parsing_on_multiple_threads() { #[test] fn test_parsing_cancelled_by_another_thread() { - let cancellation_flag = AtomicU32::new(0); + let cancellation_flag = Box::new(AtomicU32::new(0)); let mut parser = Parser::new(); parser.set_language(get_language("javascript")).unwrap(); unsafe { parser.set_cancellation_flag(Some(&cancellation_flag)) }; - let parse_thread = thread::spawn(move || { - // Infinite input - parser.parse_with( - &mut |offset, _| { - if offset == 0 { - b" [" - } else { - b"0," - } - }, - None, - ) - }); + // Long input - parsing succeeds + let tree = parser.parse_with( + &mut |offset, _| { + if offset == 0 { + b" [" + } else if offset >= 20000 { + b"" + } else { + b"0," + } + }, + None, + ); + assert!(tree.is_some()); let cancel_thread = thread::spawn(move || { - thread::sleep(time::Duration::from_millis(80)); - cancellation_flag.store(1, Ordering::Relaxed); + thread::sleep(time::Duration::from_millis(100)); + cancellation_flag.store(1, Ordering::SeqCst); }); + // Infinite input + let tree = parser.parse_with( + &mut |offset, _| { + thread::yield_now(); + thread::sleep(time::Duration::from_millis(10)); + if offset == 0 { + b" [" + } else { + b"0," + } + }, + None, + ); + + // Parsing returns None because it was cancelled. cancel_thread.join().unwrap(); - let tree = parse_thread.join().unwrap(); assert!(tree.is_none()); } diff --git a/highlight/include/tree_sitter/highlight.h b/highlight/include/tree_sitter/highlight.h index 458862b8..19f09fbd 100644 --- a/highlight/include/tree_sitter/highlight.h +++ b/highlight/include/tree_sitter/highlight.h @@ -10,6 +10,7 @@ extern "C" { typedef enum { TSHighlightOk, TSHighlightUnknownScope, + TSHighlightTimeout, } TSHighlightError; // The list of scopes which can be styled for syntax highlighting. diff --git a/highlight/src/c_lib.rs b/highlight/src/c_lib.rs index a283b0f6..38596007 100644 --- a/highlight/src/c_lib.rs +++ b/highlight/src/c_lib.rs @@ -29,6 +29,7 @@ pub struct TSHighlightBuffer { pub enum ErrorCode { Ok, UnknownScope, + Timeout, } #[no_mangle] @@ -162,7 +163,7 @@ impl TSHighlighter { let configuration = configuration.unwrap(); let languages = &self.languages; - let highlighter = unwrap(Highlighter::new( + let highlighter = Highlighter::new( source_code, configuration.language, &configuration.property_sheet, @@ -178,29 +179,32 @@ impl TSHighlighter { }) }, cancellation_flag, - )); + ); - output.html.clear(); - output.line_offsets.clear(); - output.line_offsets.push(0); - let mut scopes = Vec::new(); - for event in highlighter { - match event { - HighlightEvent::ScopeStart(s) => { - scopes.push(s); - output.start_scope(s, &self.attribute_strings); - } - HighlightEvent::ScopeEnd => { - scopes.pop(); - output.end_scope(); - } - HighlightEvent::Source(src) => { - output.add_text(src, &scopes, &self.attribute_strings); - } - }; + if let Ok(highlighter) = highlighter { + output.html.clear(); + output.line_offsets.clear(); + output.line_offsets.push(0); + let mut scopes = Vec::new(); + for event in highlighter { + match event { + HighlightEvent::ScopeStart(s) => { + scopes.push(s); + output.start_scope(s, &self.attribute_strings); + } + HighlightEvent::ScopeEnd => { + scopes.pop(); + output.end_scope(); + } + HighlightEvent::Source(src) => { + output.add_text(src, &scopes, &self.attribute_strings); + } + }; + } + ErrorCode::Ok + } else { + ErrorCode::Timeout } - - ErrorCode::Ok } } diff --git a/lib/src/atomic.h b/lib/src/atomic.h index 89f40e48..51abccf8 100644 --- a/lib/src/atomic.h +++ b/lib/src/atomic.h @@ -22,7 +22,7 @@ static inline uint32_t atomic_dec(volatile uint32_t *p) { #else static inline uint32_t atomic_load(const volatile uint32_t *p) { - return __atomic_load_n(p, __ATOMIC_RELAXED); + return __atomic_load_n(p, __ATOMIC_SEQ_CST); } static inline uint32_t atomic_inc(volatile uint32_t *p) { diff --git a/lib/src/parser.c b/lib/src/parser.c index 4bc455d4..39699866 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -1287,7 +1287,7 @@ static bool ts_parser__advance( if (++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) { self->operation_count = 0; if ( - (self->cancellation_flag && !atomic_load(self->cancellation_flag)) || + (self->cancellation_flag && atomic_load(self->cancellation_flag)) || (self->clock_limit && get_clock() - self->start_clock > self->clock_limit) ) { ts_subtree_release(&self->tree_pool, lookahead); From 2fd9ffa8e5d8283b39d40761a8eff21348f7857b Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 21 Mar 2019 10:56:32 -0700 Subject: [PATCH 25/33] Add `--cancel` flag to parse command to allow command-line testing --- cli/src/main.rs | 3 +++ cli/src/parse.rs | 28 ++++++++++++++++++++++++---- lib/src/atomic.h | 2 +- 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index dc4b5ae6..db403fea 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -54,6 +54,7 @@ fn run() -> error::Result<()> { .arg(Arg::with_name("debug-graph").long("debug-graph").short("D")) .arg(Arg::with_name("quiet").long("quiet").short("q")) .arg(Arg::with_name("time").long("time").short("t")) + .arg(Arg::with_name("allow-cancellation").long("cancel")) .arg(Arg::with_name("timeout").long("timeout").takes_value(true)), ) .subcommand( @@ -134,6 +135,7 @@ fn run() -> error::Result<()> { let debug_graph = matches.is_present("debug-graph"); let quiet = matches.is_present("quiet"); let time = matches.is_present("time"); + let allow_cancellation = matches.is_present("allow-cancellation"); let timeout = matches .value_of("timeout") .map_or(0, |t| u64::from_str_radix(t, 10).unwrap()); @@ -170,6 +172,7 @@ fn run() -> error::Result<()> { timeout, debug, debug_graph, + allow_cancellation, )?; } diff --git a/cli/src/parse.rs b/cli/src/parse.rs index f4002233..b5230e77 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -1,9 +1,10 @@ use super::error::{Error, Result}; use super::util; -use std::fs; use std::io::{self, Write}; use std::path::Path; +use std::sync::atomic::{AtomicU32, Ordering}; use std::time::Instant; +use std::{fs, thread}; use tree_sitter::{Language, LogType, Parser}; pub fn parse_file_at_path( @@ -15,6 +16,7 @@ pub fn parse_file_at_path( timeout: u64, debug: bool, debug_graph: bool, + allow_cancellation: bool, ) -> Result { let mut _log_session = None; let mut parser = Parser::new(); @@ -22,9 +24,28 @@ pub fn parse_file_at_path( let source_code = fs::read(path) .map_err(|e| Error(format!("Error reading source file {:?}: {}", path, e)))?; + // If the `--cancel` flag was passed, then cancel the parse + // when the user types a newline. + if allow_cancellation { + let flag = Box::new(AtomicU32::new(0)); + unsafe { parser.set_cancellation_flag(Some(&flag)) }; + thread::spawn(move || { + let mut line = String::new(); + io::stdin().read_line(&mut line).unwrap(); + eprintln!("Cancelling"); + flag.store(1, Ordering::Relaxed); + }); + } + + // Set a timeout based on the `--time` flag. + parser.set_timeout_micros(timeout); + + // Render an HTML graph if `--debug-graph` was passed if debug_graph { _log_session = Some(util::log_graphs(&mut parser, "log.html")?); - } else if debug { + } + // Log to stderr if `--debug` was passed + else if debug { parser.set_logger(Some(Box::new(|log_type, message| { if log_type == LogType::Lex { io::stderr().write(b" ").unwrap(); @@ -33,7 +54,6 @@ pub fn parse_file_at_path( }))); } - parser.set_timeout_micros(timeout); let time = Instant::now(); let tree = parser.parse(&source_code, None); let duration = time.elapsed(); @@ -139,7 +159,7 @@ pub fn parse_file_at_path( write!(&mut stdout, "\n")?; } - return Ok(first_error.is_some()) + return Ok(first_error.is_some()); } else if print_time { writeln!( &mut stdout, diff --git a/lib/src/atomic.h b/lib/src/atomic.h index 51abccf8..89f40e48 100644 --- a/lib/src/atomic.h +++ b/lib/src/atomic.h @@ -22,7 +22,7 @@ static inline uint32_t atomic_dec(volatile uint32_t *p) { #else static inline uint32_t atomic_load(const volatile uint32_t *p) { - return __atomic_load_n(p, __ATOMIC_SEQ_CST); + return __atomic_load_n(p, __ATOMIC_RELAXED); } static inline uint32_t atomic_inc(volatile uint32_t *p) { From cfa474b82a55eca5ba3905c27d9ebbe99fcfb823 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 21 Mar 2019 11:23:30 -0700 Subject: [PATCH 26/33] test script: Find test binary correctly when `-g` is passed --- script/test | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/script/test b/script/test index 1ec50e30..5fda7cb2 100755 --- a/script/test +++ b/script/test @@ -67,7 +67,7 @@ while getopts "dDghl:e:s:t:" option; do esac done -shift $(expr $OPTIND - 1 ) +shift $(expr $OPTIND - 1) if [[ -n $TREE_SITTER_TEST_LANGUAGE_FILTER || -n $TREE_SITTER_TEST_EXAMPLE_FILTER || -n $TREE_SITTER_TEST_TRIAL_FILTER ]]; then top_level_filter=corpus @@ -76,7 +76,10 @@ else fi if [[ "${mode}" == "debug" ]]; then - test_binary=$(cargo test --no-run --message-format=json 2> /dev/null | jq -rs '.[-1].filenames[0]') + test_binary=$( + cargo test -p tree-sitter-cli --no-run --message-format=json 2> /dev/null |\ + jq -rs 'map(select(.target.name == "tree-sitter-cli" and .executable))[0].executable' + ) lldb "${test_binary}" -- $top_level_filter else cargo test -p tree-sitter-cli --jobs 1 $top_level_filter -- --nocapture From 0ccb910922218f7ee5f6ce25b16b870f762e27f6 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 21 Mar 2019 11:26:05 -0700 Subject: [PATCH 27/33] Use a size_t instead of a uint32_t for cancellation flag --- cli/src/parse.rs | 4 ++-- cli/src/tests/parser_test.rs | 4 ++-- highlight/include/tree_sitter/highlight.h | 2 +- highlight/src/c_lib.rs | 6 +++--- highlight/src/lib.rs | 6 +++--- lib/binding/bindings.rs | 5 +++-- lib/binding/lib.rs | 10 +++++----- lib/include/tree_sitter/api.h | 4 ++-- lib/src/atomic.h | 4 ++-- lib/src/parser.c | 10 +++++----- 10 files changed, 28 insertions(+), 27 deletions(-) diff --git a/cli/src/parse.rs b/cli/src/parse.rs index b5230e77..1a0a73cc 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -2,7 +2,7 @@ use super::error::{Error, Result}; use super::util; use std::io::{self, Write}; use std::path::Path; -use std::sync::atomic::{AtomicU32, Ordering}; +use std::sync::atomic::{AtomicUsize, Ordering}; use std::time::Instant; use std::{fs, thread}; use tree_sitter::{Language, LogType, Parser}; @@ -27,7 +27,7 @@ pub fn parse_file_at_path( // If the `--cancel` flag was passed, then cancel the parse // when the user types a newline. if allow_cancellation { - let flag = Box::new(AtomicU32::new(0)); + let flag = Box::new(AtomicUsize::new(0)); unsafe { parser.set_cancellation_flag(Some(&flag)) }; thread::spawn(move || { let mut line = String::new(); diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index 63a4f3e8..51b44fde 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -1,7 +1,7 @@ use super::helpers::edits::{perform_edit, Edit, ReadRecorder}; use super::helpers::fixtures::{get_language, get_test_language}; use crate::generate::generate_parser_for_grammar; -use std::sync::atomic::{AtomicU32, Ordering}; +use std::sync::atomic::{AtomicUsize, Ordering}; use std::{thread, time}; use tree_sitter::{InputEdit, LogType, Parser, Point, Range}; @@ -303,7 +303,7 @@ fn test_parsing_on_multiple_threads() { #[test] fn test_parsing_cancelled_by_another_thread() { - let cancellation_flag = Box::new(AtomicU32::new(0)); + let cancellation_flag = Box::new(AtomicUsize::new(0)); let mut parser = Parser::new(); parser.set_language(get_language("javascript")).unwrap(); diff --git a/highlight/include/tree_sitter/highlight.h b/highlight/include/tree_sitter/highlight.h index 19f09fbd..347999d2 100644 --- a/highlight/include/tree_sitter/highlight.h +++ b/highlight/include/tree_sitter/highlight.h @@ -81,7 +81,7 @@ int ts_highlighter_highlight( const char *source_code, uint32_t source_code_len, TSHighlightBuffer *output, - const uint32_t *cancellation_flag + const size_t *cancellation_flag ); // TSHighlightBuffer: This struct stores the HTML output of syntax diff --git a/highlight/src/c_lib.rs b/highlight/src/c_lib.rs index 38596007..b8098980 100644 --- a/highlight/src/c_lib.rs +++ b/highlight/src/c_lib.rs @@ -5,7 +5,7 @@ use std::ffi::CStr; use std::io::Write; use std::os::raw::c_char; use std::process::abort; -use std::sync::atomic::AtomicU32; +use std::sync::atomic::AtomicUsize; use std::{fmt, slice}; use tree_sitter::{Language, PropertySheet}; @@ -137,7 +137,7 @@ pub extern "C" fn ts_highlighter_highlight( source_code: *const c_char, source_code_len: u32, output: *mut TSHighlightBuffer, - cancellation_flag: *const AtomicU32, + cancellation_flag: *const AtomicUsize, ) -> ErrorCode { let this = unwrap_ptr(this); let output = unwrap_mut_ptr(output); @@ -154,7 +154,7 @@ impl TSHighlighter { source_code: &[u8], scope_name: &str, output: &mut TSHighlightBuffer, - cancellation_flag: Option<&AtomicU32>, + cancellation_flag: Option<&AtomicUsize>, ) -> ErrorCode { let configuration = self.languages.get(scope_name); if configuration.is_none() { diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs index 66c52c86..967b6466 100644 --- a/highlight/src/lib.rs +++ b/highlight/src/lib.rs @@ -6,7 +6,7 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; use serde_derive::*; use std::fmt::{self, Write}; use std::mem::transmute; -use std::sync::atomic::{AtomicU32, Ordering}; +use std::sync::atomic::{AtomicUsize, Ordering}; use std::{cmp, str, usize}; use tree_sitter::{Language, Node, Parser, Point, PropertySheet, Range, Tree, TreePropertyCursor}; @@ -95,7 +95,7 @@ where layers: Vec>, utf8_error_len: Option, operation_count: usize, - cancellation_flag: Option<&'a AtomicU32>, + cancellation_flag: Option<&'a AtomicUsize>, } #[derive(Copy, Clone, Debug)] @@ -382,7 +382,7 @@ where language: Language, property_sheet: &'a PropertySheet, injection_callback: F, - cancellation_flag: Option<&'a AtomicU32>, + cancellation_flag: Option<&'a AtomicUsize>, ) -> Result { let mut parser = Parser::new(); unsafe { parser.set_cancellation_flag(cancellation_flag.clone()) }; diff --git a/lib/binding/bindings.rs b/lib/binding/bindings.rs index b828c9e6..6dcbc09d 100644 --- a/lib/binding/bindings.rs +++ b/lib/binding/bindings.rs @@ -1,5 +1,6 @@ /* automatically generated by rust-bindgen */ +pub type __darwin_size_t = ::std::os::raw::c_ulong; pub type FILE = [u64; 19usize]; pub type TSSymbol = u16; #[repr(C)] @@ -136,10 +137,10 @@ extern "C" { ) -> *mut TSTree; } extern "C" { - pub fn ts_parser_cancellation_flag(arg1: *const TSParser) -> *const u32; + pub fn ts_parser_cancellation_flag(arg1: *const TSParser) -> *const usize; } extern "C" { - pub fn ts_parser_set_cancellation_flag(arg1: *mut TSParser, arg2: *const u32); + pub fn ts_parser_set_cancellation_flag(arg1: *mut TSParser, arg2: *const usize); } extern "C" { pub fn ts_parser_timeout_micros(arg1: *const TSParser) -> u64; diff --git a/lib/binding/lib.rs b/lib/binding/lib.rs index cc514c22..088a50c2 100644 --- a/lib/binding/lib.rs +++ b/lib/binding/lib.rs @@ -15,7 +15,7 @@ use std::collections::HashMap; use std::ffi::CStr; use std::marker::PhantomData; use std::os::raw::{c_char, c_void}; -use std::sync::atomic::AtomicU32; +use std::sync::atomic::AtomicUsize; use std::{fmt, ptr, slice, str, u16}; pub const PARSER_HEADER: &'static str = include_str!("../include/tree_sitter/parser.h"); @@ -336,13 +336,13 @@ impl Parser { }; } - pub unsafe fn cancellation_flag(&self) -> Option<&AtomicU32> { - (ffi::ts_parser_cancellation_flag(self.0) as *const AtomicU32).as_ref() + pub unsafe fn cancellation_flag(&self) -> Option<&AtomicUsize> { + (ffi::ts_parser_cancellation_flag(self.0) as *const AtomicUsize).as_ref() } - pub unsafe fn set_cancellation_flag(&self, flag: Option<&AtomicU32>) { + pub unsafe fn set_cancellation_flag(&self, flag: Option<&AtomicUsize>) { if let Some(flag) = flag { - ffi::ts_parser_set_cancellation_flag(self.0, flag as *const AtomicU32 as *const u32); + ffi::ts_parser_set_cancellation_flag(self.0, flag as *const AtomicUsize as *const usize); } else { ffi::ts_parser_set_cancellation_flag(self.0, ptr::null()); } diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index cfc5393d..380a1d50 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -88,8 +88,8 @@ void ts_parser_halt_on_error(TSParser *, bool); TSTree *ts_parser_parse(TSParser *, const TSTree *, TSInput); TSTree *ts_parser_parse_string(TSParser *, const TSTree *, const char *, uint32_t); TSTree *ts_parser_parse_string_encoding(TSParser *, const TSTree *, const char *, uint32_t, TSInputEncoding); -const uint32_t *ts_parser_cancellation_flag(const TSParser *); -void ts_parser_set_cancellation_flag(TSParser *, const uint32_t *); +const size_t *ts_parser_cancellation_flag(const TSParser *); +void ts_parser_set_cancellation_flag(TSParser *, const size_t *); uint64_t ts_parser_timeout_micros(const TSParser *); void ts_parser_set_timeout_micros(TSParser *, uint64_t); void ts_parser_reset(TSParser *); diff --git a/lib/src/atomic.h b/lib/src/atomic.h index 89f40e48..1df4ce08 100644 --- a/lib/src/atomic.h +++ b/lib/src/atomic.h @@ -7,7 +7,7 @@ #include -static inline uint32_t atomic_load(const volatile uint32_t *p) { +static inline size_t atomic_load(const volatile size_t *p) { return *p; } @@ -21,7 +21,7 @@ static inline uint32_t atomic_dec(volatile uint32_t *p) { #else -static inline uint32_t atomic_load(const volatile uint32_t *p) { +static inline size_t atomic_load(const volatile size_t *p) { return __atomic_load_n(p, __ATOMIC_RELAXED); } diff --git a/lib/src/parser.c b/lib/src/parser.c index 39699866..f2993a51 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -70,7 +70,7 @@ struct TSParser { uint64_t clock_limit; uint64_t start_clock; unsigned operation_count; - const volatile uint32_t *cancellation_flag; + const volatile size_t *cancellation_flag; bool halt_on_error; Subtree old_tree; TSRangeArray included_range_differences; @@ -1589,12 +1589,12 @@ void ts_parser_halt_on_error(TSParser *self, bool should_halt_on_error) { self->halt_on_error = should_halt_on_error; } -const uint32_t *ts_parser_cancellation_flag(const TSParser *self) { - return (const uint32_t *)self->cancellation_flag; +const size_t *ts_parser_cancellation_flag(const TSParser *self) { + return (const size_t *)self->cancellation_flag; } -void ts_parser_set_cancellation_flag(TSParser *self, const uint32_t *flag) { - self->cancellation_flag = (const volatile uint32_t *)flag; +void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag) { + self->cancellation_flag = (const volatile size_t *)flag; } uint64_t ts_parser_timeout_micros(const TSParser *self) { From 501708623294b369f86770892bc133f42f1f5f51 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 21 Mar 2019 11:26:13 -0700 Subject: [PATCH 28/33] Switch back to rust stable on CI --- .appveyor.yml | 4 ++-- .travis.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index a2787be7..610ac134 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -5,8 +5,8 @@ install: # Install rust - appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe - - IF "%PLATFORM%" == "x86" rustup-init -y --default-toolchain beta --default-host i686-pc-windows-msvc - - IF "%PLATFORM%" == "x64" rustup-init -y --default-toolchain beta --default-host x86_64-pc-windows-msvc + - IF "%PLATFORM%" == "x86" rustup-init -y --default-toolchain stable --default-host i686-pc-windows-msvc + - IF "%PLATFORM%" == "x64" rustup-init -y --default-toolchain stable --default-host x86_64-pc-windows-msvc - set PATH=%PATH%;C:\Users\appveyor\.cargo\bin - rustc -vV - cargo -vV diff --git a/.travis.yml b/.travis.yml index b92cd1f2..06c71b34 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,6 @@ language: rust rust: - - beta + - stable os: - linux From 74d154c706a1bf1266f7b1b2a2ff63fff12d82f9 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 21 Mar 2019 14:13:42 -0700 Subject: [PATCH 29/33] Use CLOCK_MONOTONIC on platforms that support it This way, timeouts will apply even if the current process is starved for CPU. --- lib/src/clock.h | 125 +++++++++++++++++++++++++++++++++++++++++++---- lib/src/parser.c | 20 +++++--- 2 files changed, 128 insertions(+), 17 deletions(-) diff --git a/lib/src/clock.h b/lib/src/clock.h index 3193a6b3..2792febb 100644 --- a/lib/src/clock.h +++ b/lib/src/clock.h @@ -3,30 +3,137 @@ #include +typedef uint64_t TSDuration; + #ifdef _WIN32 -#include +// Windows: +// * Represent a time as a performance counter value. +// * Represent a duration as a number of performance counter ticks. -static inline uint64_t get_clock() { +#include +typedef uint64_t TSClock; + +static inline TSDuration duration_from_micros(uint64_t micros) { + LARGE_INTEGER frequency; + QueryPerformanceFrequency(&frequency); + return micros * (uint64_t)frequency.QuadPart / 1000000; +} + +static inline uint64_t duration_to_micros(TSDuration self) { + LARGE_INTEGER frequency; + QueryPerformanceFrequency(&frequency); + return self * 1000000 / (uint64_t)frequency.QuadPart; +} + +static inline TSClock clock_null() { + return 0; +} + +static inline TSClock clock_now() { LARGE_INTEGER result; QueryPerformanceCounter(&result); return (uint64_t)result.QuadPart; } -static inline uint64_t get_clocks_per_second() { - LARGE_INTEGER result; - QueryPerformanceFrequency(&result); - return (uint64_t)result.QuadPart; +static inline TSClock clock_after(TSClock base, TSDuration duration) { + return base + duration; +} + +static inline bool clock_is_null(TSClock self) { + return !self; +} + +static inline bool clock_is_gt(TSClock self, TSClock other) { + return self > other; +} + +#elif defined(CLOCK_MONOTONIC) + +// POSIX with monotonic clock support (Linux, macOS >= 10.12) +// * Represent a time as a monotonic (seconds, nanoseconds) pair. +// * Represent a duration as a number of microseconds. +// +// On these platforms, parse timeouts will correspond accurately to +// real time, regardless of what other processes are running. + +#include +typedef struct timespec TSClock; + +static inline TSDuration duration_from_micros(uint64_t micros) { + return micros; +} + +static inline uint64_t duration_to_micros(TSDuration self) { + return self; +} + +static inline TSClock clock_now() { + TSClock result; + clock_gettime(CLOCK_MONOTONIC, &result); + return result; +} + +static inline TSClock clock_null() { + return (TSClock) {0, 0}; +} + +static inline TSClock clock_after(TSClock base, TSDuration duration) { + TSClock result = base; + result.tv_sec += duration / 1000000; + result.tv_nsec += (duration % 1000000) * 1000; + return result; +} + +static inline bool clock_is_null(TSClock self) { + return !self.tv_sec; +} + +static inline bool clock_is_gt(TSClock self, TSClock other) { + if (self.tv_sec > other.tv_sec) return true; + if (self.tv_sec < other.tv_sec) return false; + return self.tv_nsec > other.tv_nsec; } #else -static inline uint64_t get_clock() { +// POSIX without monotonic clock support +// * Represent a time as a process clock value. +// * Represent a duration as a number of process clock ticks. +// +// On these platforms, parse timeouts may be affected by other processes, +// which is not ideal, but is better than using a non-monotonic time API +// like `gettimeofday`. + +#include +typedef uint64_t TSClock; + +static inline TSDuration duration_from_micros(uint64_t micros) { + return micros * (uint64_t)CLOCKS_PER_SEC / 1000000; +} + +static inline uint64_t duration_to_micros(TSDuration self) { + return self * 1000000 / (uint64_t)CLOCKS_PER_SEC; +} + +static inline TSClock clock_null() { + return 0; +} + +static inline TSClock clock_now() { return (uint64_t)clock(); } -static inline uint64_t get_clocks_per_second() { - return (uint64_t)CLOCKS_PER_SEC; +static inline TSClock clock_after(TSClock base, TSDuration duration) { + return base + duration; +} + +static inline bool clock_is_null(TSClock self) { + return !self; +} + +static inline bool clock_is_gt(TSClock self, TSClock other) { + return self > other; } #endif diff --git a/lib/src/parser.c b/lib/src/parser.c index 39699866..3937bd64 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -66,9 +66,9 @@ struct TSParser { ReusableNode reusable_node; void *external_scanner_payload; FILE *dot_graph_file; + TSClock end_clock; + TSDuration timeout_duration; unsigned accept_count; - uint64_t clock_limit; - uint64_t start_clock; unsigned operation_count; const volatile uint32_t *cancellation_flag; bool halt_on_error; @@ -1288,7 +1288,7 @@ static bool ts_parser__advance( self->operation_count = 0; if ( (self->cancellation_flag && atomic_load(self->cancellation_flag)) || - (self->clock_limit && get_clock() - self->start_clock > self->clock_limit) + (!clock_is_null(self->end_clock) && clock_is_gt(clock_now(), self->end_clock)) ) { ts_subtree_release(&self->tree_pool, lookahead); return false; @@ -1513,8 +1513,8 @@ TSParser *ts_parser_new() { self->dot_graph_file = NULL; self->halt_on_error = false; self->cancellation_flag = NULL; - self->clock_limit = 0; - self->start_clock = 0; + self->timeout_duration = 0; + self->end_clock = clock_null(); self->operation_count = 0; self->old_tree = NULL_SUBTREE; self->scratch_tree.ptr = &self->scratch_tree_data; @@ -1598,11 +1598,11 @@ void ts_parser_set_cancellation_flag(TSParser *self, const uint32_t *flag) { } uint64_t ts_parser_timeout_micros(const TSParser *self) { - return self->clock_limit * 1000000 / get_clocks_per_second(); + return duration_to_micros(self->timeout_duration); } void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros) { - self->clock_limit = timeout_micros * get_clocks_per_second() / 1000000; + self->timeout_duration = duration_from_micros(timeout_micros); } void ts_parser_set_included_ranges(TSParser *self, const TSRange *ranges, uint32_t count) { @@ -1666,7 +1666,11 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) { uint32_t position = 0, last_position = 0, version_count = 0; self->operation_count = 0; - self->start_clock = get_clock(); + if (self->timeout_duration) { + self->end_clock = clock_after(clock_now(), self->timeout_duration); + } else { + self->end_clock = clock_null(); + } do { for (StackVersion version = 0; From 5a59f19b694993dbe841220e6b56508322201236 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 21 Mar 2019 16:06:06 -0700 Subject: [PATCH 30/33] Use explicit syntax for functions with no parameters --- cli/src/generate/render.rs | 4 ++-- lib/include/tree_sitter/api.h | 2 +- lib/include/tree_sitter/parser.h | 2 +- lib/src/clock.h | 12 ++++++------ lib/src/length.h | 2 +- lib/src/node.c | 2 +- lib/src/parser.c | 2 +- lib/src/reusable_node.h | 2 +- 8 files changed, 14 insertions(+), 14 deletions(-) diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index 86ed3dc7..644e74d3 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -713,7 +713,7 @@ impl Generator { let external_scanner_name = format!("{}_external_scanner", language_function_name); if !self.syntax_grammar.external_tokens.is_empty() { - add_line!(self, "void *{}_create();", external_scanner_name); + add_line!(self, "void *{}_create(void);", external_scanner_name); add_line!(self, "void {}_destroy(void *);", external_scanner_name); add_line!( self, @@ -740,7 +740,7 @@ impl Generator { add_line!( self, - "extern const TSLanguage *{}() {{", + "extern const TSLanguage *{}(void) {{", language_function_name ); indent!(self); diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index cfc5393d..2f576d87 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -77,7 +77,7 @@ typedef struct { uint32_t context[2]; } TSTreeCursor; -TSParser *ts_parser_new(); +TSParser *ts_parser_new(void); void ts_parser_delete(TSParser *); const TSLanguage *ts_parser_language(const TSParser *); bool ts_parser_set_language(TSParser *, const TSLanguage *); diff --git a/lib/include/tree_sitter/parser.h b/lib/include/tree_sitter/parser.h index e5037062..d7365c11 100644 --- a/lib/include/tree_sitter/parser.h +++ b/lib/include/tree_sitter/parser.h @@ -92,7 +92,7 @@ struct TSLanguage { struct { const bool *states; const TSSymbol *symbol_map; - void *(*create)(); + void *(*create)(void); void (*destroy)(void *); bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist); unsigned (*serialize)(void *, char *); diff --git a/lib/src/clock.h b/lib/src/clock.h index 2792febb..50656273 100644 --- a/lib/src/clock.h +++ b/lib/src/clock.h @@ -26,11 +26,11 @@ static inline uint64_t duration_to_micros(TSDuration self) { return self * 1000000 / (uint64_t)frequency.QuadPart; } -static inline TSClock clock_null() { +static inline TSClock clock_null(void) { return 0; } -static inline TSClock clock_now() { +static inline TSClock clock_now(void) { LARGE_INTEGER result; QueryPerformanceCounter(&result); return (uint64_t)result.QuadPart; @@ -68,13 +68,13 @@ static inline uint64_t duration_to_micros(TSDuration self) { return self; } -static inline TSClock clock_now() { +static inline TSClock clock_now(void) { TSClock result; clock_gettime(CLOCK_MONOTONIC, &result); return result; } -static inline TSClock clock_null() { +static inline TSClock clock_null(void) { return (TSClock) {0, 0}; } @@ -116,11 +116,11 @@ static inline uint64_t duration_to_micros(TSDuration self) { return self * 1000000 / (uint64_t)CLOCKS_PER_SEC; } -static inline TSClock clock_null() { +static inline TSClock clock_null(void) { return 0; } -static inline TSClock clock_now() { +static inline TSClock clock_now(void) { return (uint64_t)clock(); } diff --git a/lib/src/length.h b/lib/src/length.h index ffe0c7f4..61de9fc1 100644 --- a/lib/src/length.h +++ b/lib/src/length.h @@ -36,7 +36,7 @@ static inline Length length_sub(Length len1, Length len2) { return result; } -static inline Length length_zero() { +static inline Length length_zero(void) { Length result = {0, {0, 0}}; return result; } diff --git a/lib/src/node.c b/lib/src/node.c index eb4a3121..b4048811 100644 --- a/lib/src/node.c +++ b/lib/src/node.c @@ -22,7 +22,7 @@ TSNode ts_node_new(const TSTree *tree, const Subtree *subtree, Length position, }; } -static inline TSNode ts_node__null() { +static inline TSNode ts_node__null(void) { return ts_node_new(NULL, NULL, length_zero(), 0); } diff --git a/lib/src/parser.c b/lib/src/parser.c index 3937bd64..f83ae6da 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -1501,7 +1501,7 @@ static bool ts_parser_has_outstanding_parse(TSParser *self) { // Parser - Public -TSParser *ts_parser_new() { +TSParser *ts_parser_new(void) { TSParser *self = ts_calloc(1, sizeof(TSParser)); ts_lexer_init(&self->lexer); array_init(&self->reduce_actions); diff --git a/lib/src/reusable_node.h b/lib/src/reusable_node.h index ab91cb36..9cba9519 100644 --- a/lib/src/reusable_node.h +++ b/lib/src/reusable_node.h @@ -11,7 +11,7 @@ typedef struct { Subtree last_external_token; } ReusableNode; -static inline ReusableNode reusable_node_new() { +static inline ReusableNode reusable_node_new(void) { return (ReusableNode) {array_new(), NULL_SUBTREE}; } From d51cd86a3a12ba8730c3e1251c63c32000adf4ca Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 22 Mar 2019 14:20:31 -0700 Subject: [PATCH 31/33] Update available bindings section of docs --- docs/index.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/index.md b/docs/index.md index 0a64e1dc..3c3e0d16 100644 --- a/docs/index.md +++ b/docs/index.md @@ -16,9 +16,10 @@ Tree-sitter is a parser generator tool and an incremental parsing library. It ca There are currently bindings that allow Tree-sitter to be used from the following languages: * [JavaScript](https://github.com/tree-sitter/node-tree-sitter) -* [Rust](https://github.com/tree-sitter/rust-tree-sitter) -* [Haskell](https://github.com/tree-sitter/haskell-tree-sitter) +* [Python](https://github.com/tree-sitter/py-tree-sitter) +* [Rust](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding) * [Ruby](https://github.com/tree-sitter/ruby-tree-sitter) +* [Haskell](https://github.com/tree-sitter/haskell-tree-sitter) ### Available Parsers From 6f804fd2e2dbf7af5186d58e041cd97c3a7fad40 Mon Sep 17 00:00:00 2001 From: Justin Woo Date: Sun, 24 Mar 2019 14:35:39 +0200 Subject: [PATCH 32/33] fix example snippet in creating parsers docs --- docs/section-3-creating-parsers.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/section-3-creating-parsers.md b/docs/section-3-creating-parsers.md index 37bccb65..6ad8d5c5 100644 --- a/docs/section-3-creating-parsers.md +++ b/docs/section-3-creating-parsers.md @@ -77,7 +77,7 @@ Then run the the following command: tree-sitter generate ``` -This will generate the C code required to parse this trivial language, as well as all of the files needed to compile and load this native parser as a Node.js module. You can test this parser by creating a source file with the contents `hello;` and parsing it: +This will generate the C code required to parse this trivial language, as well as all of the files needed to compile and load this native parser as a Node.js module. You can test this parser by creating a source file with the contents `hello` and parsing it: ```sh tree-sitter parse ./the-file From fe5aa46d8d64e351f7a711acb76bea6718f3dbde Mon Sep 17 00:00:00 2001 From: Marijn Haverbeke Date: Tue, 26 Mar 2019 16:53:35 +0100 Subject: [PATCH 33/33] Swap two incorrectly placed comments --- lib/src/stack.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/src/stack.h b/lib/src/stack.h index 1ccd98cd..ec7a69d2 100644 --- a/lib/src/stack.h +++ b/lib/src/stack.h @@ -92,11 +92,11 @@ StackSummary *ts_stack_get_summary(Stack *, StackVersion); // Get the total cost of all errors on the given version of the stack. unsigned ts_stack_error_cost(const Stack *, StackVersion version); -// Determine whether the given two stack versions can be merged. -bool ts_stack_merge(Stack *, StackVersion, StackVersion); - // Merge the given two stack versions if possible, returning true // if they were successfully merged and false otherwise. +bool ts_stack_merge(Stack *, StackVersion, StackVersion); + +// Determine whether the given two stack versions can be merged. bool ts_stack_can_merge(Stack *, StackVersion, StackVersion); TSSymbol ts_stack_resume(Stack *, StackVersion);