From 6e0c49305e003d69cb6418a0cb8a9b1744035afe Mon Sep 17 00:00:00 2001 From: Amin Yahyaabadi Date: Fri, 24 Sep 2021 12:55:17 -0500 Subject: [PATCH 001/347] Use ISO C and C++ conformant name: _fdopen on windows Fixes ``` warning: In file included from src\lib.c:12: warning: src/./parser.c:1781:28: warning: 'fdopen' is deprecated: The POSIX name for this item is deprecated. Instead, use the ISO C and C++ conformant name: _fdopen. See online help for details. [-Wdeprecated-declarations] warning: self->dot_graph_file = fdopen(fd, "a"); warning: ^ warning: C:\Program Files (x86)\Windows Kits\10\Include\10.0.19041.0\ucrt\stdio.h:2431:28: note: 'fdopen' has been explicitly marked deprecated here warning: _Check_return_ _CRT_NONSTDC_DEPRECATE(_fdopen) _ACRTIMP FILE* __cdecl fdopen(_In_ int _FileHandle, _In_z_ char const* _Format); warning: ^ warning: C:\Program Files (x86)\Windows Kits\10\Include\10.0.19041.0\ucrt\corecrt.h:414:50: note: expanded from macro '_CRT_NONSTDC_DEPRECATE' warning: #define _CRT_NONSTDC_DEPRECATE(_NewName) _CRT_DEPRECATE_TEXT( \ warning: ^ warning: C:\Program Files (x86)\Microsoft Visual Studio\2019\Preview\VC\Tools\MSVC\14.29.30133\include\vcruntime.h:310:47: note: expanded from macro '_CRT_DEPRECATE_TEXT' warning: #define _CRT_DEPRECATE_TEXT(_Text) __declspec(deprecated(_Text)) ``` --- lib/src/parser.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/src/parser.c b/lib/src/parser.c index 7db1aceb..ebeb4b37 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -1776,7 +1776,11 @@ void ts_parser_print_dot_graphs(TSParser *self, int fd) { } if (fd >= 0) { + #ifdef _WIN32 + self->dot_graph_file = _fdopen(fd, "a"); + #else self->dot_graph_file = fdopen(fd, "a"); + #endif } else { self->dot_graph_file = NULL; } From 94ffcdadf3abc3890e4f1e7d4c8ec9cdc8543668 Mon Sep 17 00:00:00 2001 From: Amin Yahyaabadi Date: Fri, 24 Sep 2021 15:20:34 -0500 Subject: [PATCH 002/347] Mark stack__iter as static Fixes ``` warning: In file included from src\lib.c:14: warning: src/./stack.c:311:9: warning: static function 'ts_stack__add_slice' is used in an inline function with external linkage [-Wstatic-in-inline] warning: ts_stack__add_slice( warning: ^ warning: src/./stack.c:274:1: note: use 'static' to give inline function 'stack__iter' internal linkage warning: inline StackSliceArray stack__iter(Stack *self, StackVersion version, warning: ^ warning: static warning: src/./stack.c:15:16: note: expanded from macro 'inline' warning: #define inline __forceinline warning: ^ warning: src/./stack.c:258:13: note: 'ts_stack__add_slice' declared here warning: static void ts_stack__add_slice(Stack *self, StackVersion original_version, warning: ^ warning: 1 warning generated. Finished dev [unoptimized + debuginfo] target(s) in 2.01s ``` --- lib/src/stack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/src/stack.c b/lib/src/stack.c index 1dc6895f..e49289cf 100644 --- a/lib/src/stack.c +++ b/lib/src/stack.c @@ -302,7 +302,7 @@ static void ts_stack__add_slice( array_push(&self->slices, slice); } -inline StackSliceArray stack__iter( +static StackSliceArray stack__iter( Stack *self, StackVersion version, StackCallback callback, From a33e9141a8e0e3eeb08aeae228bb8b0214ea7d0b Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Wed, 9 Nov 2022 04:55:52 +0200 Subject: [PATCH 003/347] fix(cli): Racing on playground webserver port binding --- cli/Cargo.toml | 2 +- cli/src/playground.rs | 39 ++++++++++++++++++++------------------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 3c2bd171..5403075d 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -35,7 +35,7 @@ rustc-hash = "1" semver = "1.0" serde = { version = "1.0.130", features = ["derive"] } smallbitvec = "2.5.1" -tiny_http = "0.8" +tiny_http = "0.12.0" walkdir = "2.3" webbrowser = "0.5.1" which = "4.1.0" diff --git a/cli/src/playground.rs b/cli/src/playground.rs index be804b50..a2dbef96 100644 --- a/cli/src/playground.rs +++ b/cli/src/playground.rs @@ -45,20 +45,7 @@ fn get_main_html(tree_sitter_dir: &Option) -> Cow<'static, [u8]> { } pub fn serve(grammar_path: &Path, open_in_browser: bool) { - let port = env::var("TREE_SITTER_PLAYGROUND_PORT") - .map(|v| v.parse::().expect("Invalid port specification")) - .unwrap_or_else( - |_| get_available_port().expect( - "Couldn't find an available port, try providing a port number via the TREE_SITTER_PLAYGROUND_PORT \ - environment variable" - ) - ); - let addr = format!( - "{}:{}", - env::var("TREE_SITTER_PLAYGROUND_ADDR").unwrap_or("127.0.0.1".to_owned()), - port - ); - let server = Server::http(&addr).expect("Failed to start web server"); + let server = get_server(); let grammar_name = wasm::get_grammar_name(&grammar_path.join("src")) .with_context(|| "Failed to get wasm filename") .unwrap(); @@ -71,7 +58,7 @@ pub fn serve(grammar_path: &Path, open_in_browser: bool) { ) }) .unwrap(); - let url = format!("http://{}", addr); + let url = format!("http://{}", server.server_addr()); println!("Started playground on: {}", url); if open_in_browser { if let Err(_) = webbrowser::open(&url) { @@ -135,10 +122,24 @@ fn response<'a>(data: &'a [u8], header: &Header) -> Response<&'a [u8]> { .with_header(header.clone()) } -fn get_available_port() -> Option { - (8000..12000).find(port_is_available) +fn get_server() -> Server { + let addr = env::var("TREE_SITTER_PLAYGROUND_ADDR").unwrap_or("127.0.0.1".to_owned()); + let port = env::var("TREE_SITTER_PLAYGROUND_PORT") + .map(|v| v.parse::().expect("Invalid port specification")) + .ok(); + let listener = match port { + Some(port) => bind_to(&*addr, port).expect("Can't bind to the specified port"), + None => { + get_listener_on_available_port(&*addr).expect("Can't find a free port to bind to it") + } + }; + Server::from_listener(listener, None).expect("Failed to start web server") } -fn port_is_available(port: &u16) -> bool { - TcpListener::bind(("127.0.0.1", *port)).is_ok() +fn get_listener_on_available_port(addr: &str) -> Option { + (8000..12000).find_map(|port| bind_to(addr, port)) +} + +fn bind_to(addr: &str, port: u16) -> Option { + TcpListener::bind(format!("{addr}:{port}")).ok() } From c669e5ee159e0c59a3f094327a01dd688bc67c56 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Sun, 11 Dec 2022 19:11:58 +0200 Subject: [PATCH 004/347] Fix permanent rebuild triggering in a git worktree due to wrong git branch file path --- Cargo.lock | 508 +++++++++++++++------------------------------------ cli/build.rs | 16 +- 2 files changed, 166 insertions(+), 358 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 49f95269..f5c4e7e4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,22 +4,13 @@ version = 3 [[package]] name = "aho-corasick" -version = "0.7.15" +version = "0.7.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5" +checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" dependencies = [ "memchr", ] -[[package]] -name = "ansi_term" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" -dependencies = [ - "winapi", -] - [[package]] name = "ansi_term" version = "0.12.1" @@ -31,27 +22,15 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.40" +version = "1.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28b2cd92db5cbd74e8e5028f7e27dd7aa3090e89e4f2a197cc7c8dfb69c7063b" - -[[package]] -name = "arrayref" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544" - -[[package]] -name = "arrayvec" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" +checksum = "216261ddc8289130e551ddcd5ce8a064710c0d064a4d2895c67151c92b5443f6" [[package]] name = "ascii" -version = "1.0.0" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbf56136a5198c7b01a49e3afcbef6cf84597273d298f54432926024107b0109" +checksum = "d92bec98840b8f03a5ff5413de5293bfcd8bf96467cf5452609f939ec6f5de16" [[package]] name = "atty" @@ -66,44 +45,27 @@ dependencies = [ [[package]] name = "autocfg" -version = "1.0.1" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" - -[[package]] -name = "base64" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "bitflags" -version = "1.2.1" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" - -[[package]] -name = "blake2b_simd" -version = "0.5.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afa748e348ad3be8263be728124b24a24f268266f6f5d58af9d75f6a40b5c587" -dependencies = [ - "arrayref", - "arrayvec", - "constant_time_eq", -] +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bumpalo" -version = "3.6.1" +version = "3.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63396b8a4b9de3f4fdfb320ab6080762242f66a8ef174c49d8e19b674db4cdbe" +checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba" [[package]] name = "cc" -version = "1.0.67" +version = "1.0.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3c69b077ad434294d3ce9f1f6143a2a4b89a8a2d54ef813d85003a4fd1137fd" +checksum = "e9f73505338f7d905b19d18738976aae232eb46b8efc15554ffc56deb5d9ebe4" [[package]] name = "cfg-if" @@ -111,19 +73,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" -[[package]] -name = "chrono" -version = "0.4.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" -dependencies = [ - "libc", - "num-integer", - "num-traits", - "time", - "winapi", -] - [[package]] name = "chunked_transfer" version = "1.4.0" @@ -132,11 +81,11 @@ checksum = "fff857943da45f546682664a79488be82e69e43c1a7a2307679ab9afb3a66d2e" [[package]] name = "clap" -version = "2.33.3" +version = "2.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002" +checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" dependencies = [ - "ansi_term 0.11.0", + "ansi_term", "atty", "bitflags", "strsim", @@ -145,28 +94,11 @@ dependencies = [ "vec_map", ] -[[package]] -name = "constant_time_eq" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" - -[[package]] -name = "crossbeam-utils" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7e9d99fa91428effe99c5c6d4634cdeba32b8cf784fc428a2a687f61a952c49" -dependencies = [ - "autocfg", - "cfg-if", - "lazy_static", -] - [[package]] name = "ctor" -version = "0.1.20" +version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e98e2ad1a782e33928b96fc3948e7c355e5af34ba4de7670fe8bac2a3b2006d" +checksum = "6d2301688392eb071b0bf1a37be05c469d3cc4dbbd95df672fe28ab021e6a096" dependencies = [ "quote", "syn", @@ -174,9 +106,9 @@ dependencies = [ [[package]] name = "diff" -version = "0.1.12" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e25ea47919b1560c4e3b7fe0aaab9becf5b84a10325ddf7db0f0ba5e1026499" +checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" [[package]] name = "difference" @@ -186,18 +118,18 @@ checksum = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" [[package]] name = "dirs" -version = "3.0.1" +version = "3.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "142995ed02755914747cc6ca76fc7e4583cd18578746716d0508ea6ed558b9ff" +checksum = "30baa043103c9d0c2a57cf537cc2f35623889dc0d405e6c3cccfadbc81c71309" dependencies = [ "dirs-sys", ] [[package]] name = "dirs-sys" -version = "0.3.5" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e93d7f5705de3e49895a2b5e0b8855a1c27f080192ae9c32a6432d50741a57a" +checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6" dependencies = [ "libc", "redox_users", @@ -206,40 +138,28 @@ dependencies = [ [[package]] name = "either" -version = "1.6.1" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" +checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797" [[package]] -name = "form_urlencoded" -version = "1.0.1" +name = "fastrand" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fc25a87fa4fd2094bffb06925852034d90a17f0d1e05197d4956d3555752191" +checksum = "a7a407cfaa3385c4ae6b23e84623d48c2798d06e3e6a1878f7f59f17b3f86499" dependencies = [ - "matches", - "percent-encoding", + "instant", ] [[package]] name = "getrandom" -version = "0.1.16" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" +checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" dependencies = [ "cfg-if", "libc", - "wasi 0.9.0+wasi-snapshot-preview1", -] - -[[package]] -name = "getrandom" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9495705279e7140bf035dde1f6e750c162df8b625267cd52cc44e0b156732c8" -dependencies = [ - "cfg-if", - "libc", - "wasi 0.10.2+wasi-snapshot-preview1", + "wasi", ] [[package]] @@ -250,60 +170,64 @@ checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" [[package]] name = "hashbrown" -version = "0.9.1" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" [[package]] name = "hermit-abi" -version = "0.1.18" +version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "322f4de77956e22ed0e5032c359a0f1273f1f7f0d79bfa3b8ffbc730d7fbcc5c" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" dependencies = [ "libc", ] [[package]] name = "html-escape" -version = "0.2.6" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d348900ce941b7474395ba922ed3735a517df4546a2939ddb416ce85eeaa988e" +checksum = "15315cfa9503e9aa85a477138eff76a1b203a430703548052c330b69d8d8c205" dependencies = [ "utf8-width", ] [[package]] -name = "idna" -version = "0.2.2" +name = "httpdate" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89829a5d69c23d348314a7ac337fe39173b61149a9864deabd260983aed48c21" -dependencies = [ - "matches", - "unicode-bidi", - "unicode-normalization", -] +checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" [[package]] name = "indexmap" -version = "1.6.1" +version = "1.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fb1fa934250de4de8aef298d81c729a7d33d8c239daa3a7575e6b92bfc7313b" +checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" dependencies = [ "autocfg", "hashbrown", ] [[package]] -name = "itoa" -version = "0.4.7" +name = "instant" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "itoa" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc" [[package]] name = "js-sys" -version = "0.3.48" +version = "0.3.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc9f84f9b115ce7843d60706df1422a916680bfdfcbdb0447c5614ff9d7e4d78" +checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47" dependencies = [ "wasm-bindgen", ] @@ -316,15 +240,15 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.86" +version = "0.2.138" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7282d924be3275cec7f6756ff4121987bc6481325397dde6ba3e7802b1a8b1c" +checksum = "db6d7e329c562c5dfab7a46a2afabc8b987ab9a4834c9d1ca04dc54c1546cef8" [[package]] name = "libloading" -version = "0.7.0" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f84d96438c15fcd6c3f244c8fce01d1e2b9c6b5623e9c711dc9286d8fc92d6a" +checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" dependencies = [ "cfg-if", "winapi", @@ -332,70 +256,39 @@ dependencies = [ [[package]] name = "log" -version = "0.4.14" +version = "0.4.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" dependencies = [ "cfg-if", ] -[[package]] -name = "matches" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" - [[package]] name = "memchr" -version = "2.3.4" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" - -[[package]] -name = "num-integer" -version = "0.1.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" -dependencies = [ - "autocfg", - "num-traits", -] - -[[package]] -name = "num-traits" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" -dependencies = [ - "autocfg", -] +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" [[package]] name = "once_cell" -version = "1.7.0" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10acf907b94fc1b1a152d08ef97e7759650268cf986bf127f387e602b02c7e5a" +checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860" [[package]] name = "output_vt100" -version = "0.1.2" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53cdc5b785b7a58c5aad8216b3dfa114df64b0b06ae6e1501cef91df2fbdf8f9" +checksum = "628223faebab4e3e40667ee0b2336d34a5b960ff60ea743ddfdbcf7770bcfb66" dependencies = [ "winapi", ] -[[package]] -name = "percent-encoding" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" - [[package]] name = "ppv-lite86" -version = "0.2.10" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "pretty_assertions" @@ -403,7 +296,7 @@ version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1cab0e7c02cf376875e9335e0ba1da535775beb5450d21e1dffca068818ed98b" dependencies = [ - "ansi_term 0.12.1", + "ansi_term", "ctor", "diff", "output_vt100", @@ -411,39 +304,38 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.24" +version = "1.0.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" +checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725" dependencies = [ - "unicode-xid", + "unicode-ident", ] [[package]] name = "quote" -version = "1.0.9" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" +checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" dependencies = [ "proc-macro2", ] [[package]] name = "rand" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ef9e7e66b4468674bfcb0c81af8b7fa0bb154fa9f28eb840da5c447baeb8d7e" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", "rand_chacha", "rand_core", - "rand_hc", ] [[package]] name = "rand_chacha" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e12735cf05c9e10bf21534da50a147b924d555dc7a547c42e6bb2d5b6017ae0d" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", "rand_core", @@ -451,65 +343,49 @@ dependencies = [ [[package]] name = "rand_core" -version = "0.6.2" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34cf66eb183df1c5876e2dcf6b13d57340741e8dc255b48e40a26de954d06ae7" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.2", -] - -[[package]] -name = "rand_hc" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3190ef7066a446f2e7f42e239d161e905420ccab01eb967c9eb27d21b2322a73" -dependencies = [ - "rand_core", + "getrandom", ] [[package]] name = "redox_syscall" -version = "0.1.57" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" - -[[package]] -name = "redox_syscall" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94341e4e44e24f6b591b59e47a8a027df12e008d73fd5672dbea9cc22f4507d9" +checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" dependencies = [ "bitflags", ] [[package]] name = "redox_users" -version = "0.3.5" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de0737333e7a9502c789a36d7c7fa6092a49895d4faa31ca5df163857ded2e9d" +checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" dependencies = [ - "getrandom 0.1.16", - "redox_syscall 0.1.57", - "rust-argon2", + "getrandom", + "redox_syscall", + "thiserror", ] [[package]] name = "regex" -version = "1.4.3" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9251239e129e16308e70d853559389de218ac275b515068abc96829d05b948a" +checksum = "e076559ef8e241f2ae3479e36f97bd5741c0330689e217ad51ce2c76808b868a" dependencies = [ "aho-corasick", "memchr", "regex-syntax", - "thread_local", ] [[package]] name = "regex-syntax" -version = "0.6.22" +version = "0.6.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5eb417147ba9860a96cfe72a0b93bf88fee1744b5636ec99ab20c1aa9376581" +checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" [[package]] name = "remove_dir_all" @@ -520,18 +396,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "rust-argon2" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b18820d944b33caa75a71378964ac46f58517c92b6ae5f762636247c09e78fb" -dependencies = [ - "base64", - "blake2b_simd", - "constant_time_eq", - "crossbeam-utils", -] - [[package]] name = "rustc-hash" version = "1.1.0" @@ -540,9 +404,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "ryu" -version = "1.0.5" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" +checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09" [[package]] name = "same-file" @@ -555,24 +419,24 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.5" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0486718e92ec9a68fbed73bb5ef687d71103b142595b406835649bebd33f72c7" +checksum = "e25dfac463d778e353db5be2449d1cce89bd6fd23c9f1ea21310ce6e5a1b29c4" [[package]] name = "serde" -version = "1.0.130" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f12d06de37cf59146fbdecab66aa99f9fe4f78722e3607577a5375d66bd0c913" +checksum = "256b9932320c590e707b94576e3cc1f7c9024d0ee6612dfbcf1cb106cbe8e055" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.130" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7bc1a1ab1961464eae040d96713baa5a724a8152c1222492465b54322ec508b" +checksum = "b4eae9b04cbffdfd550eb462ed33bc6a1b68c935127d008b27444d08380f94e4" dependencies = [ "proc-macro2", "quote", @@ -581,9 +445,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.63" +version = "1.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43535db9747a4ba938c0ce0a98cc631a46ebf943c9e1d604e091df6007620bf6" +checksum = "020ff22c755c2ed3f8cf162dbb41a7268d934702f3ed3631656ea597e08fc3db" dependencies = [ "indexmap", "itoa", @@ -605,25 +469,25 @@ checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" [[package]] name = "syn" -version = "1.0.67" +version = "1.0.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6498a9efc342871f91cc2d0d694c674368b4ceb40f62b65a7a08c3792935e702" +checksum = "60b9b43d45702de4c839cb9b51d9f529c5dd26a4aff255b42b1ebc03e88ee908" dependencies = [ "proc-macro2", "quote", - "unicode-xid", + "unicode-ident", ] [[package]] name = "tempfile" -version = "3.2.0" +version = "3.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22" +checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" dependencies = [ "cfg-if", + "fastrand", "libc", - "rand", - "redox_syscall 0.2.5", + "redox_syscall", "remove_dir_all", "winapi", ] @@ -639,76 +503,41 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.25" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa6f76457f59514c7eeb4e59d891395fab0b2fd1d40723ae737d64153392e9c6" +checksum = "10deb33631e3c9018b9baf9dcbbc4f737320d2b576bac10f6aefa048fa407e3e" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.25" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a36768c0fbf1bb15eca10defa29526bda730a2376c2ab4393ccfa16fb1a318d" +checksum = "982d17546b47146b28f7c22e3d08465f6b8903d0ea13c1660d9d84a6e7adcdbb" dependencies = [ "proc-macro2", "quote", "syn", ] -[[package]] -name = "thread_local" -version = "1.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8018d24e04c95ac8790716a5987d0fec4f8b27249ffa0f7d33f1369bdfb88cbd" -dependencies = [ - "once_cell", -] - -[[package]] -name = "time" -version = "0.1.43" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" -dependencies = [ - "libc", - "winapi", -] - [[package]] name = "tiny_http" -version = "0.8.0" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eded47106b8e52d8ed8119f0ea6e8c0f5881e69783e0297b5a8462958f334bc1" +checksum = "389915df6413a2e74fb181895f933386023c71110878cd0825588928e64cdc82" dependencies = [ "ascii", - "chrono", "chunked_transfer", + "httpdate", "log", - "url", ] -[[package]] -name = "tinyvec" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "317cca572a0e89c3ce0ca1f1bdc9369547fe318a683418e42ac8f59d14701023" -dependencies = [ - "tinyvec_macros", -] - -[[package]] -name = "tinyvec_macros" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" - [[package]] name = "toml" -version = "0.5.8" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a31142970826733df8241ef35dc040ef98c679ab14d7c3e54d827099b3acecaa" +checksum = "8d82e1a7758622a465f8cee077614c73484dac5b836c02ff6a40d5d1010324d7" dependencies = [ "serde", ] @@ -726,7 +555,7 @@ dependencies = [ name = "tree-sitter-cli" version = "0.20.7" dependencies = [ - "ansi_term 0.12.1", + "ansi_term", "anyhow", "atty", "clap", @@ -807,52 +636,22 @@ dependencies = [ ] [[package]] -name = "unicode-bidi" -version = "0.3.4" +name = "unicode-ident" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5" -dependencies = [ - "matches", -] - -[[package]] -name = "unicode-normalization" -version = "0.1.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07fbfce1c8a97d547e8b5334978438d9d6ec8c20e38f56d4a4374d181493eaef" -dependencies = [ - "tinyvec", -] +checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3" [[package]] name = "unicode-width" -version = "0.1.8" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" - -[[package]] -name = "unicode-xid" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" - -[[package]] -name = "url" -version = "2.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ccd964113622c8e9322cfac19eb1004a07e636c545f325da085d5cdde6f1f8b" -dependencies = [ - "form_urlencoded", - "idna", - "matches", - "percent-encoding", -] +checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" [[package]] name = "utf8-width" -version = "0.1.4" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9071ac216321a4470a69fb2b28cfc68dcd1a39acd877c8be8e014df6772d8efa" +checksum = "5190c9442dcdaf0ddd50f37420417d219ae5261bbf5db120d0f9bab996c9cba1" [[package]] name = "vec_map" @@ -862,9 +661,9 @@ checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" [[package]] name = "walkdir" -version = "2.3.1" +version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "777182bc735b6424e1a57516d35ed72cb8019d85c8c9bf536dccb3445c1a2f7d" +checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" dependencies = [ "same-file", "winapi", @@ -873,21 +672,15 @@ dependencies = [ [[package]] name = "wasi" -version = "0.9.0+wasi-snapshot-preview1" +version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" - -[[package]] -name = "wasi" -version = "0.10.2+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.71" +version = "0.2.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ee1280240b7c461d6a0071313e08f34a60b0365f14260362e5a2b17d1d31aa7" +checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -895,13 +688,13 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.71" +version = "0.2.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b7d8b6942b8bb3a9b0e73fc79b98095a27de6fa247615e59d096754a3bc2aa8" +checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142" dependencies = [ "bumpalo", - "lazy_static", "log", + "once_cell", "proc-macro2", "quote", "syn", @@ -910,9 +703,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.71" +version = "0.2.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5ac38da8ef716661f0f36c0d8320b89028efe10c7c0afde65baffb496ce0d3b" +checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -920,9 +713,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.71" +version = "0.2.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc053ec74d454df287b9374ee8abb36ffd5acb95ba87da3ba5b7d3fe20eb401e" +checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c" dependencies = [ "proc-macro2", "quote", @@ -933,15 +726,15 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.71" +version = "0.2.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d6f8ec44822dd71f5f221a5847fb34acd9060535c1211b70a05844c0f6383b1" +checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f" [[package]] name = "web-sys" -version = "0.3.48" +version = "0.3.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec600b26223b2948cedfde2a0aa6756dcf1fef616f43d7b3097aaf53a6c4d92b" +checksum = "bcda906d8be16e728fd5adc5b729afad4e444e106ab28cd1c7256e54fa61510f" dependencies = [ "js-sys", "wasm-bindgen", @@ -960,12 +753,13 @@ dependencies = [ [[package]] name = "which" -version = "4.1.0" +version = "4.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b55551e42cbdf2ce2bedd2203d0cc08dba002c27510f86dab6d0ce304cba3dfe" +checksum = "1c831fbbee9e129a8cf93e7747a82da9d95ba8e16621cae60ec2cdc849bacb7b" dependencies = [ "either", "libc", + "once_cell", ] [[package]] diff --git a/cli/build.rs b/cli/build.rs index 1986e023..0ff98a81 100644 --- a/cli/build.rs +++ b/cli/build.rs @@ -66,7 +66,21 @@ fn read_git_sha() -> Option { // If we're on a branch, read the SHA from the ref file. if head_content.starts_with("ref: ") { head_content.replace_range(0.."ref: ".len(), ""); - let ref_filename = git_dir_path.join(&head_content); + let ref_filename = { + let file = git_dir_path.join(&head_content); + if file.is_file() { + file + } else { + let file = git_dir_path + .parent() // worktrees subfolder + .unwrap() + .parent() // original gitdir + .unwrap() + .join(&head_content); + assert!(file.is_file()); + file + } + }; if let Some(path) = ref_filename.to_str() { println!("cargo:rerun-if-changed={}", path); } From eb549a89ad33923f4680ddb9281be79186a32db9 Mon Sep 17 00:00:00 2001 From: Phil Freeman Date: Wed, 14 Dec 2022 09:43:58 -0800 Subject: [PATCH 005/347] fix: possible rollover of nanoseconds in clock.h --- lib/src/clock.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/src/clock.h b/lib/src/clock.h index 94545f35..e6faa040 100644 --- a/lib/src/clock.h +++ b/lib/src/clock.h @@ -82,6 +82,10 @@ static inline TSClock clock_after(TSClock base, TSDuration duration) { TSClock result = base; result.tv_sec += duration / 1000000; result.tv_nsec += (duration % 1000000) * 1000; + if (result.tv_nsec >= 1000000000) { + result.tv_nsec -= 1000000000; + ++(result.tv_sec); + } return result; } From dddbc2d4f741597e212175c19644a473ec0170d2 Mon Sep 17 00:00:00 2001 From: Andrew Helwer Date: Sat, 20 Jun 2020 17:51:01 -0400 Subject: [PATCH 006/347] Added working commands for Windows users Windows users are likely to be using either Windows Terminal (which uses PowerShell) or PowerShell directly. --- docs/section-3-creating-parsers.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/section-3-creating-parsers.md b/docs/section-3-creating-parsers.md index 07f9d865..d4c05f25 100644 --- a/docs/section-3-creating-parsers.md +++ b/docs/section-3-creating-parsers.md @@ -80,6 +80,11 @@ You can test this parser by creating a source file with the contents "hello" and echo 'hello' > example-file tree-sitter parse example-file ``` +Alternatively, in Windows PowerShell: +```pwsh +"hello" | Out-File example-file -Encoding utf8 +tree-sitter parse example-file +``` This should print the following: From b1024a14937efc2c3b63cdb5e13bc33141a2c0b6 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Mon, 2 Jan 2023 22:56:51 +0200 Subject: [PATCH 007/347] cli: Fix build.rs in case of the current branch ref was packed Support SHA lookup in .git/packed-refs --- cli/build.rs | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/cli/build.rs b/cli/build.rs index 0ff98a81..74c6d833 100644 --- a/cli/build.rs +++ b/cli/build.rs @@ -1,3 +1,4 @@ +use std::ffi::OsStr; use std::path::{Path, PathBuf}; use std::{env, fs}; @@ -67,18 +68,36 @@ fn read_git_sha() -> Option { if head_content.starts_with("ref: ") { head_content.replace_range(0.."ref: ".len(), ""); let ref_filename = { + // Go to real non-worktree gitdir + let git_dir_path = git_dir_path + .parent() + .map(|p| { + p.file_name() + .map(|n| n == OsStr::new("worktrees")) + .and_then(|x| x.then(|| p.parent())) + }) + .flatten() + .flatten() + .unwrap_or(&git_dir_path); + let file = git_dir_path.join(&head_content); if file.is_file() { file } else { - let file = git_dir_path - .parent() // worktrees subfolder - .unwrap() - .parent() // original gitdir - .unwrap() - .join(&head_content); - assert!(file.is_file()); - file + let packed_refs = git_dir_path.join("packed-refs"); + if let Ok(packed_refs_content) = fs::read_to_string(&packed_refs) { + for line in packed_refs_content.lines() { + if let Some((hash, r#ref)) = line.split_once(' ') { + if r#ref == head_content { + if let Some(path) = packed_refs.to_str() { + println!("cargo:rerun-if-changed={}", path); + } + return Some(hash.to_string()); + } + } + } + } + return None; } }; if let Some(path) = ref_filename.to_str() { From 87a0517f3ce1d0eef47333dd50e8b25d932564c1 Mon Sep 17 00:00:00 2001 From: Andrew Helwer Date: Sat, 22 Jan 2022 10:16:32 -0500 Subject: [PATCH 008/347] Various updates to Creating Parsers docs * Documented test separator suffixes * Documented partial precedences field * Added external scanning to conflicting tokens rules * Added earliest starting position to conflicting tokens rules * Added note about lexical precedence vs. parse precedence * Added note about tree-sitter calling external scanner with all tokens marked valid during error recovery --- docs/section-3-creating-parsers.md | 49 ++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/docs/section-3-creating-parsers.md b/docs/section-3-creating-parsers.md index d4c05f25..747238b4 100644 --- a/docs/section-3-creating-parsers.md +++ b/docs/section-3-creating-parsers.md @@ -157,6 +157,26 @@ func x() int { (return_statement (number))))) ``` +* If your language's syntax conflicts with the `===` and `---` test separators, you can optionally add an arbitrary identical suffix (in the below example, `|||`) to disambiguate them: + +``` +==================||| +Basic module +==================||| + +---- MODULE Test ---- +increment(n) == n + 1 +==== + +---||| + +(source_file + (module (identifier) + (operator (identifier) + (parameter_list (identifier)) + (plus (identifier_ref) (number))))) +``` + These tests are important. They serve as the parser's API documentation, and they can be run every time you change the grammar to verify that everything still parses correctly. By default, the `tree-sitter test` command runs all of the tests in your `corpus` or `test/corpus/` folder. To run a particular test, you can use the `-f` flag: @@ -227,6 +247,7 @@ In addition to the `name` and `rules` fields, grammars have a few other optional * **`inline`** - an array of rule names that should be automatically *removed* from the grammar by replacing all of their usages with a copy of their definition. This is useful for rules that are used in multiple places but for which you *don't* want to create syntax tree nodes at runtime. * **`conflicts`** - an array of arrays of rule names. Each inner array represents a set of rules that's involved in an *LR(1) conflict* that is *intended to exist* in the grammar. When these conflicts occur at runtime, Tree-sitter will use the GLR algorithm to explore all of the possible interpretations. If *multiple* parses end up succeeding, Tree-sitter will pick the subtree whose corresponding rule has the highest total *dynamic precedence*. * **`externals`** - an array of token names which can be returned by an [*external scanner*](#external-scanners). External scanners allow you to write custom C code which runs during the lexing process in order to handle lexical rules (e.g. Python's indentation tokens) that cannot be described by regular expressions. +* **`precedences`** - an array of array of strings, where each array of strings defines named precedence levels in descending order. These names can be used in the `prec` functions to define precedence relative only to other names in the array, rather than globally. Can only be used with parse precedence, not lexical precedence. * **`word`** - the name of a token that will match keywords for the purpose of the [keyword extraction](#keyword-extraction) optimization. * **`supertypes`** an array of hidden rule names which should be considered to be 'supertypes' in the generated [*node types* file][static-node-types]. @@ -503,17 +524,31 @@ Tree-sitter's parsing process is divided into two phases: parsing (which is desc ### Conflicting Tokens -Grammars often contain multiple tokens that can match the same characters. For example, a grammar might contain the tokens (`"if"` and `/[a-z]+/`). Tree-sitter differentiates between these conflicting tokens in a few ways: +Grammars often contain multiple tokens that can match the same characters. For example, a grammar might contain the tokens (`"if"` and `/[a-z]+/`). Tree-sitter differentiates between these conflicting tokens in a few ways. -1. **Context-aware Lexing** - Tree-sitter performs lexing on-demand, during the parsing process. At any given position in a source document, the lexer only tries to recognize tokens that are *valid* at that position in the document. +1. **External Scanning** - If your grammar has an external scanner and one or more tokens in your `externals` array are valid at the current location, your external scanner will always be called first to determine whether those tokens are present. -2. **Lexical Precedence** - When the precedence functions described [above](#the-grammar-dsl) are used within the `token` function, the given precedence values serve as instructions to the lexer. If there are two valid tokens that match the characters at a given position in the document, Tree-sitter will select the one with the higher precedence. +1. **Context-Aware Lexing** - Tree-sitter performs lexing on-demand, during the parsing process. At any given position in a source document, the lexer only tries to recognize tokens that are *valid* at that position in the document. -3. **Match Length** - If multiple valid tokens with the same precedence match the characters at a given position in a document, Tree-sitter will select the token that matches the [longest sequence of characters][longest-match]. +1. **Earliest Starting Position** - Tree-sitter will prefer tokens with an earlier starting position. This is most often seen with very permissive regular expressions similar to `/.*/`, which are greedy and will consume as much text as possible. In this example the regex would consume all text until hitting a newline - even if text on that line could be interpreted as a different token. -4. **Match Specificity** - If there are two valid tokens with the same precedence and which both match the same number of characters, Tree-sitter will prefer a token that is specified in the grammar as a `String` over a token specified as a `RegExp`. +1. **Explicit Lexical Precedence** - When the precedence functions described [above](#the-grammar-dsl) are used within the `token` function, the given precedence values serve as instructions to the lexer. If there are two valid tokens that match the characters at a given position in the document, Tree-sitter will select the one with the higher precedence. -5. **Rule Order** - If none of the above criteria can be used to select one token over another, Tree-sitter will prefer the token that appears earlier in the grammar. +1. **Match Length** - If multiple valid tokens with the same precedence match the characters at a given position in a document, Tree-sitter will select the token that matches the [longest sequence of characters][longest-match]. + +1. **Match Specificity** - If there are two valid tokens with the same precedence and which both match the same number of characters, Tree-sitter will prefer a token that is specified in the grammar as a `String` over a token specified as a `RegExp`. + +1. **Rule Order** - If none of the above criteria can be used to select one token over another, Tree-sitter will prefer the token that appears earlier in the grammar. + +### Lexical Precedence vs. Parse Precedence + +One common mistake involves not distinguishing lexical precedence from parse precedence. +Parse precedence determines which rule is chosen to interpret a given sequence of tokens. +Lexical precedence determines which token is chosen to interpret a given section of text. +It is a lower-level operation that is done first. +The above list fully capture tree-sitter's lexical precedence rules, and you will probably refer back to this section of the documentation more often than any other. +Most of the time when you really get stuck, you're dealing with a lexical precedence problem. +Pay particular attention to the difference in meaning between using `prec` inside the `token` function vs. outside of it. ### Keywords @@ -699,6 +734,8 @@ if (valid_symbols[INDENT] || valid_symbol[DEDENT]) { } ``` +Note that if a syntax error is encountered during regular parsing, tree-sitter's first action during error recovery will be to call your external scanner with all tokens marked valid. Your scanner should detect this case and handle it appropriately. + [ambiguous-grammar]: https://en.wikipedia.org/wiki/Ambiguous_grammar [antlr]: http://www.antlr.org/ [bison-dprec]: https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html From 278ff01e71a8d798a19f85a61c10dd29e3e681c1 Mon Sep 17 00:00:00 2001 From: Andrew Helwer Date: Wed, 26 Jan 2022 18:57:32 -0500 Subject: [PATCH 009/347] Added additional documentation details * External scanners and infinite loops * Terminal keywords in externals array * Using error sentinel external token * Good practice to erase state variables in deserialize func * Emphasize external scanner is called first --- docs/section-3-creating-parsers.md | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/docs/section-3-creating-parsers.md b/docs/section-3-creating-parsers.md index 747238b4..5677292f 100644 --- a/docs/section-3-creating-parsers.md +++ b/docs/section-3-creating-parsers.md @@ -548,7 +548,7 @@ Lexical precedence determines which token is chosen to interpret a given section It is a lower-level operation that is done first. The above list fully capture tree-sitter's lexical precedence rules, and you will probably refer back to this section of the documentation more often than any other. Most of the time when you really get stuck, you're dealing with a lexical precedence problem. -Pay particular attention to the difference in meaning between using `prec` inside the `token` function vs. outside of it. +Pay particular attention to the difference in meaning between using `prec` inside the `token` function versus outside of it. ### Keywords @@ -632,7 +632,7 @@ grammar({ Then, add another C or C++ source file to your project. Currently, its path must be `src/scanner.c` or `src/scanner.cc` for the CLI to recognize it. Be sure to add this file to the `sources` section of your `binding.gyp` file so that it will be included when your project is compiled by Node.js and uncomment the appropriate block in your `bindings/rust/build.rs` file so that it will be included in your Rust crate. -In this new source file, define an [`enum`][enum] type containing the names of all of your external tokens. The ordering of this enum must match the order in your grammar's `externals` array. +In this new source file, define an [`enum`][enum] type containing the names of all of your external tokens. The ordering of this enum must match the order in your grammar's `externals` array; the actual names do not matter. ```c #include @@ -695,6 +695,7 @@ void tree_sitter_my_language_external_scanner_deserialize( ``` This function should *restore* the state of your scanner based the bytes that were previously written by the `serialize` function. It is called with a pointer to your scanner, a pointer to the buffer of bytes, and the number of bytes that should be read. +It is good practice to explicitly erase your scanner state variables at the start of this function, before restoring their values from the byte buffer. #### Scan @@ -712,10 +713,11 @@ This function is responsible for recognizing external tokens. It should return ` * **`int32_t lookahead`** - The current next character in the input stream, represented as a 32-bit unicode code point. * **`TSSymbol result_symbol`** - The symbol that was recognized. Your scan function should *assign* to this field one of the values from the `TokenType` enum, described above. -* **`void (*advance)(TSLexer *, bool skip)`** - A function for advancing to the next character. If you pass `true` for the second argument, the current character will be treated as whitespace. +* **`void (*advance)(TSLexer *, bool skip)`** - A function for advancing to the next character. If you pass `true` for the second argument, the current character will be treated as whitespace; whitespace won't be included in the text range associated with tokens emitted by the external scanner. * **`void (*mark_end)(TSLexer *)`** - A function for marking the end of the recognized token. This allows matching tokens that require multiple characters of lookahead. By default (if you don't call `mark_end`), any character that you moved past using the `advance` function will be included in the size of the token. But once you call `mark_end`, then any later calls to `advance` will *not* increase the size of the returned token. You can call `mark_end` multiple times to increase the size of the token. * **`uint32_t (*get_column)(TSLexer *)`** - A function for querying the current column position of the lexer. It returns the number of codepoints since the start of the current line. The codepoint position is recalculated on every call to this function by reading from the start of the line. -* **`bool (*is_at_included_range_start)(TSLexer *)`** - A function for checking if the parser has just skipped some characters in the document. When parsing an embedded document using the `ts_parser_set_included_ranges` function (described in the [multi-language document section][multi-language-section]), your scanner may want to apply some special behavior when moving to a disjoint part of the document. For example, in [EJS documents][ejs], the JavaScript parser uses this function to enable inserting automatic semicolon tokens in between the code directives, delimited by `<%` and `%>`. +* **`bool (*is_at_included_range_start)(const TSLexer *)`** - A function for checking whether the parser has just skipped some characters in the document. When parsing an embedded document using the `ts_parser_set_included_ranges` function (described in the [multi-language document section][multi-language-section]), your scanner may want to apply some special behavior when moving to a disjoint part of the document. For example, in [EJS documents][ejs], the JavaScript parser uses this function to enable inserting automatic semicolon tokens in between the code directives, delimited by `<%` and `%>`. +* **`bool (*eof)(const TSLexer *)`** - A function for determining whether the lexer is at the end of the file. The value of `lookahead` will be `0` at the end of a file, but this function should be used instead of checking for that value because the `0` or "NUL" value is also a valid character that could be present in the file being parsed. The third argument to the `scan` function is an array of booleans that indicates which of your external tokens are currently expected by the parser. You should only look for a given token if it is valid according to this array. At the same time, you cannot backtrack, so you may need to combine certain pieces of logic. @@ -734,7 +736,20 @@ if (valid_symbols[INDENT] || valid_symbol[DEDENT]) { } ``` -Note that if a syntax error is encountered during regular parsing, tree-sitter's first action during error recovery will be to call your external scanner with all tokens marked valid. Your scanner should detect this case and handle it appropriately. +#### Other External Scanner Details + +If a token in your `externals` array is valid at the current position in the parse, your external scanner will be called first before anything else is done. +This means your external scanner functions as a powerful override of tree-sitter's lexing behavior, and can be used to solve problems that can't be cracked with ordinary lexical, parse, or dynamic precedence. + +If a syntax error is encountered during regular parsing, tree-sitter's first action during error recovery will be to call your external scanner's `scan` function with all tokens marked valid. +Your scanner should detect this case and handle it appropriately. +One simple method of detection is to add an unused token to the end of your `externals` array, for example `externals: $ => [$.token1, $.token2, $.error_sentinel]`, then check whether that token is marked valid to determine whether tree-sitter is in error correction mode. + +If you put terminal keywords in your `externals` array, for example `externals: $ => ['if', 'then', 'else']`, then any time those terminals are present in your grammar they will be tokenized by your external scanner. +It is equivalent to writing `externals: [$.if_keyword, $.then_keyword, $.else_keyword]` then using `alias($.if_keyword, 'if')` in your grammar. + +External scanners are a common cause of infinite loops. +Be very careful when emitting zero-width tokens from your external scanner, and if you consume characters in a loop be sure use the `eof` function to check whether you are at the end of the file. [ambiguous-grammar]: https://en.wikipedia.org/wiki/Ambiguous_grammar [antlr]: http://www.antlr.org/ From 19bd5868eb72ea3e72e29359a0f06056aa609140 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Fri, 6 Jan 2023 08:32:26 +0200 Subject: [PATCH 010/347] Fix wasm tests --- lib/binding_web/exports.json | 7 ++++--- lib/binding_web/test/parser-test.js | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/binding_web/exports.json b/lib/binding_web/exports.json index 3fe11cdf..d4b6eb5e 100644 --- a/lib/binding_web/exports.json +++ b/lib/binding_web/exports.json @@ -4,13 +4,14 @@ "_malloc", "_realloc", - "__ZNKSt3__212basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE4copyEPcmm", "__ZNSt3__212basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE6__initEPKcm", - "__ZNSt3__212basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE7reserveEm", "__ZNSt3__212basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE9__grow_byEmmmmmm", "__ZNSt3__212basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE9push_backEc", - "__ZNSt3__212basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEED2Ev", "__ZNSt3__212basic_stringIwNS_11char_traitsIwEENS_9allocatorIwEEE9push_backEw", + "__ZNKSt3__212basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE4copyEPcmm", + "__ZNSt3__212basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE7reserveEm", + "__ZNSt3__212basic_stringIwNS_11char_traitsIwEENS_9allocatorIwEEE6resizeEmw", + "__ZNSt3__212basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEED2Ev", "__ZNSt3__212basic_stringIwNS_11char_traitsIwEENS_9allocatorIwEEED2Ev", "__ZdlPv", "__Znwm", diff --git a/lib/binding_web/test/parser-test.js b/lib/binding_web/test/parser-test.js index c76b49c7..fcd714b2 100644 --- a/lib/binding_web/test/parser-test.js +++ b/lib/binding_web/test/parser-test.js @@ -153,7 +153,7 @@ describe("Parser", () => { 'type: (primitive_type) ' + 'declarator: (init_declarator ' + 'declarator: (pointer_declarator declarator: (identifier)) ' + - 'value: (raw_string_literal))))' + 'value: (raw_string_literal delimiter: (raw_string_delimiter) (raw_string_content) (raw_string_delimiter)))))' ); }).timeout(5000); From 88fe1d00c42760beda7cc01f5259da3d7fc5265e Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Fri, 6 Jan 2023 08:15:15 +0200 Subject: [PATCH 011/347] Bump Emscripten version to 3.1.29 --- cli/emscripten-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/emscripten-version b/cli/emscripten-version index 199eda56..05b41fb6 100644 --- a/cli/emscripten-version +++ b/cli/emscripten-version @@ -1 +1 @@ -3.1.25 +3.1.29 From e733a07a5c0848e813a825a885f0c0dc0f04664b Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Fri, 6 Jan 2023 08:46:43 +0200 Subject: [PATCH 012/347] Update CI action versions --- .github/workflows/ci.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d494e236..71f4be63 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,7 +26,7 @@ jobs: - ubuntu-latest steps: - name: Checkout repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 # Work around https://github.com/actions/cache/issues/403. - name: Use GNU tar @@ -40,7 +40,7 @@ jobs: - name: Cache artifacts id: cache - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: | ~/.cargo/registry @@ -59,7 +59,7 @@ jobs: run: cargo fmt -- --check - name: Install emscripten - uses: mymindstorm/setup-emsdk@v10 + uses: mymindstorm/setup-emsdk@v11 with: version: ${{ env.EMSCRIPTEN_VERSION }} @@ -113,11 +113,11 @@ jobs: runs-on: windows-latest steps: - name: Checkout repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Cache artifacts id: cache - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: | ~/.cargo/registry From ad45f5cd2cabda2de406a480f6d660aa5c96a85b Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Fri, 6 Jan 2023 06:05:07 +0200 Subject: [PATCH 013/347] Remove unused no-minimize arg for the generate command --- cli/src/main.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index ce6743bf..9e3331c0 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -111,8 +111,7 @@ fn run() -> Result<()> { .long("report-states-for-rule") .value_name("rule-name") .takes_value(true), - ) - .arg(Arg::with_name("no-minimize").long("no-minimize")), + ), ) .subcommand( SubCommand::with_name("parse") From 5088781ef965c5cd7187c5308e3cb45f8f892860 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Fri, 6 Jan 2023 06:13:08 +0200 Subject: [PATCH 014/347] cli: add -b, --build flags for `tree-sitter generate` --- cli/src/main.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/cli/src/main.rs b/cli/src/main.rs index 9e3331c0..a6e75bf5 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -106,6 +106,11 @@ fn run() -> Result<()> { )), ) .arg(Arg::with_name("no-bindings").long("no-bindings")) + .arg( + Arg::with_name("build").long("build").short("b") + .help("Compile all defined languages in the current dir") + ) + .arg(&debug_build_arg) .arg( Arg::with_name("report-states-for-rule") .long("report-states-for-rule") @@ -269,6 +274,8 @@ fn run() -> Result<()> { ("generate", Some(matches)) => { let grammar_path = matches.value_of("grammar-path"); + let debug_build = matches.is_present("debug-build"); + let build = matches.is_present("build"); let report_symbol_name = matches.value_of("report-states-for-rule").or_else(|| { if matches.is_present("report-states") { Some("") @@ -297,6 +304,10 @@ fn run() -> Result<()> { generate_bindings, report_symbol_name, )?; + if build { + loader.use_debug_build(debug_build); + loader.languages_at_path(¤t_dir)?; + } } ("test", Some(matches)) => { From 108d0ecede9312e88ac12475ffac62af9fba5dbf Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Fri, 6 Jan 2023 06:37:22 +0200 Subject: [PATCH 015/347] loader: add TREE_SITTER_LIBDIR; cli: add --libdir to `tree-sitter generate` Closes #1336 --- cli/loader/src/lib.rs | 11 +++++++---- cli/src/main.rs | 18 +++++++++++++++--- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/cli/loader/src/lib.rs b/cli/loader/src/lib.rs index 6f8605f4..0f92b051 100644 --- a/cli/loader/src/lib.rs +++ b/cli/loader/src/lib.rs @@ -10,7 +10,7 @@ use std::path::{Path, PathBuf}; use std::process::Command; use std::sync::Mutex; use std::time::SystemTime; -use std::{fs, mem}; +use std::{env, fs, mem}; use tree_sitter::{Language, QueryError, QueryErrorKind}; use tree_sitter_highlight::HighlightConfiguration; use tree_sitter_tags::{Error as TagsError, TagsConfiguration}; @@ -108,9 +108,12 @@ unsafe impl Sync for Loader {} impl Loader { pub fn new() -> Result { - let parser_lib_path = dirs::cache_dir() - .ok_or(anyhow!("Cannot determine cache directory"))? - .join("tree-sitter/lib"); + let parser_lib_path = match env::var("TREE_SITTER_LIBDIR") { + Ok(path) => PathBuf::from(path), + _ => dirs::cache_dir() + .ok_or(anyhow!("Cannot determine cache directory"))? + .join("tree-sitter/lib"), + }; Ok(Self::with_parser_lib_path(parser_lib_path)) } diff --git a/cli/src/main.rs b/cli/src/main.rs index a6e75bf5..c1dd2501 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -1,7 +1,7 @@ use anyhow::{anyhow, Context, Result}; use clap::{App, AppSettings, Arg, SubCommand}; use glob::glob; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::{env, fs, u64}; use tree_sitter_cli::{ generate, highlight, logger, parse, playground, query, tags, test, test_highlight, test_tags, @@ -107,10 +107,18 @@ fn run() -> Result<()> { ) .arg(Arg::with_name("no-bindings").long("no-bindings")) .arg( - Arg::with_name("build").long("build").short("b") - .help("Compile all defined languages in the current dir") + Arg::with_name("build") + .long("build") + .short("b") + .help("Compile all defined languages in the current dir"), ) .arg(&debug_build_arg) + .arg( + Arg::with_name("libdir") + .long("libdir") + .takes_value(true) + .value_name("path"), + ) .arg( Arg::with_name("report-states-for-rule") .long("report-states-for-rule") @@ -276,6 +284,7 @@ fn run() -> Result<()> { let grammar_path = matches.value_of("grammar-path"); let debug_build = matches.is_present("debug-build"); let build = matches.is_present("build"); + let libdir = matches.value_of("libdir"); let report_symbol_name = matches.value_of("report-states-for-rule").or_else(|| { if matches.is_present("report-states") { Some("") @@ -305,6 +314,9 @@ fn run() -> Result<()> { report_symbol_name, )?; if build { + if let Some(path) = libdir { + loader = loader::Loader::with_parser_lib_path(PathBuf::from(path)); + } loader.use_debug_build(debug_build); loader.languages_at_path(¤t_dir)?; } From 9ac55f79d191f6fa200b1894ddac449fa3df70c1 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Sun, 8 Jan 2023 04:39:46 +0200 Subject: [PATCH 016/347] Update Makefile VERSION to 0.20.9 from lib/Cargo.toml Closes #1158 Closes #1608 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 764f411a..69f6f590 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -VERSION := 0.6.3 +VERSION := 0.20.9 # install directory layout PREFIX ?= /usr/local From 61b85b2664488bee81e36b1d145bea2d24b871c5 Mon Sep 17 00:00:00 2001 From: Boris Verkhovskiy Date: Sun, 8 Jan 2023 08:10:14 -0700 Subject: [PATCH 017/347] Make error message more specific --- cli/src/wasm.rs | 5 +++++ script/build-wasm | 6 +++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/cli/src/wasm.rs b/cli/src/wasm.rs index f31ac2d2..467fef71 100644 --- a/cli/src/wasm.rs +++ b/cli/src/wasm.rs @@ -65,6 +65,11 @@ pub fn compile_language_to_wasm(language_dir: &Path, force_docker: bool) -> Resu // Run `emcc` in a container using the `emscripten-slim` image command.args(&[EMSCRIPTEN_TAG, "emcc"]); } else { + if force_docker { + return Err(anyhow!( + "You must have docker on your PATH to run this command with --docker" + )); + } return Err(anyhow!( "You must have either emcc or docker on your PATH to run this command" )); diff --git a/script/build-wasm b/script/build-wasm index f95c3f15..dc42895a 100755 --- a/script/build-wasm +++ b/script/build-wasm @@ -71,7 +71,11 @@ elif which docker > /dev/null; then emscripten/emsdk:$emscripen_version \ emcc" else - echo 'You must have either `docker` or `emcc` on your PATH to run this script' + if [[ "$force_docker" == "1" ]]; then + echo 'You must have `docker` on your PATH to run this script with --docker' + else + echo 'You must have either `docker` or `emcc` on your PATH to run this script' + fi exit 1 fi From c7d431b53ed6e7b08d73d237cba960da1d437e62 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Tue, 10 Jan 2023 10:44:20 +0200 Subject: [PATCH 018/347] Add setting TREE_SITTER_DEBUG in the test subcommand Were asked in #1218 --- cli/src/main.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cli/src/main.rs b/cli/src/main.rs index c1dd2501..e0625708 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -329,6 +329,11 @@ fn run() -> Result<()> { let update = matches.is_present("update"); let filter = matches.value_of("filter"); + if debug { + // For augmenting debug logging in external scanners + env::set_var("TREE_SITTER_DEBUG", "1"); + } + loader.use_debug_build(debug_build); let languages = loader.languages_at_path(¤t_dir)?; From f627a97d24b8908110fe536f13b2c08b7a59efc2 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Wed, 11 Jan 2023 04:11:04 +0200 Subject: [PATCH 019/347] loader: use portable way of path joining --- cli/config/src/lib.rs | 6 ++++-- cli/loader/src/lib.rs | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/cli/config/src/lib.rs b/cli/config/src/lib.rs index 0a5eba54..678688fe 100644 --- a/cli/config/src/lib.rs +++ b/cli/config/src/lib.rs @@ -37,7 +37,8 @@ impl Config { let legacy_path = dirs::home_dir() .ok_or(anyhow!("Cannot determine home directory"))? - .join(".tree-sitter/config.json"); + .join(".tree-sitter") + .join("config.json"); if legacy_path.is_file() { return Ok(Some(legacy_path)); } @@ -48,7 +49,8 @@ impl Config { fn xdg_config_file() -> Result { let xdg_path = dirs::config_dir() .ok_or(anyhow!("Cannot determine config directory"))? - .join("tree-sitter/config.json"); + .join("tree-sitter") + .join("config.json"); Ok(xdg_path) } diff --git a/cli/loader/src/lib.rs b/cli/loader/src/lib.rs index 0f92b051..029da451 100644 --- a/cli/loader/src/lib.rs +++ b/cli/loader/src/lib.rs @@ -112,7 +112,8 @@ impl Loader { Ok(path) => PathBuf::from(path), _ => dirs::cache_dir() .ok_or(anyhow!("Cannot determine cache directory"))? - .join("tree-sitter/lib"), + .join("tree-sitter") + .join("lib"), }; Ok(Self::with_parser_lib_path(parser_lib_path)) } From ca38c32d368ffd84e686b0cf41fef754044351a9 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Sat, 14 Jan 2023 08:58:25 +0200 Subject: [PATCH 020/347] Exclude huge generated files from `git diff` output --- .gitattributes | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitattributes b/.gitattributes index 4fcce330..44bf45c7 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,2 +1,5 @@ /lib/src/unicode/*.h linguist-vendored /lib/src/unicode/LICENSE linguist-vendored + +/cli/src/generate/prepare_grammar/*.json -diff +Cargo.lock -diff From 5c1ca5adb0d007d6e68885601127fcbce0288c3f Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Sun, 15 Jan 2023 03:07:53 +0200 Subject: [PATCH 021/347] cli: produce smaller release binary by stripping debug info --- Cargo.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 38830584..d1e57f89 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,3 +2,6 @@ default-members = ["cli"] members = ["cli", "lib"] + +[profile.release] +strip = true From 7a56f0815436fadb93ad8f3e277aa4c6c44e7954 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Sun, 15 Jan 2023 04:08:03 +0200 Subject: [PATCH 022/347] ci: bump setup-emsdk action version to @v12 --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 71f4be63..1f5edc96 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -59,7 +59,7 @@ jobs: run: cargo fmt -- --check - name: Install emscripten - uses: mymindstorm/setup-emsdk@v11 + uses: mymindstorm/setup-emsdk@v12 with: version: ${{ env.EMSCRIPTEN_VERSION }} From ced103b519ea31eca559f4252798901a03e27fa3 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Sun, 15 Jan 2023 04:07:29 +0200 Subject: [PATCH 023/347] ci: rustup install rustfmt and clippy --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1f5edc96..e6ef591a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -54,6 +54,7 @@ jobs: with: toolchain: stable profile: minimal + components: rustfmt, clippy - name: Check Rust code formatting run: cargo fmt -- --check From adeb8d0aa9af9d4472316d74dcc0c809ac3654b6 Mon Sep 17 00:00:00 2001 From: sogaiu <983021772@users.noreply.github.com> Date: Mon, 16 Jan 2023 14:34:19 +0900 Subject: [PATCH 024/347] cli: Make init-config respect TREE_SITTER_DIR --- cli/config/src/lib.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cli/config/src/lib.rs b/cli/config/src/lib.rs index 678688fe..27b2dfb6 100644 --- a/cli/config/src/lib.rs +++ b/cli/config/src/lib.rs @@ -81,7 +81,13 @@ impl Config { /// /// (Note that this is typically only done by the `tree-sitter init-config` command.) pub fn initial() -> Result { - let location = Self::xdg_config_file()?; + let location = if let Ok(path) = env::var("TREE_SITTER_DIR") { + let mut path = PathBuf::from(path); + path.push("config.json"); + path + } else { + Self::xdg_config_file()? + }; let config = serde_json::json!({}); Ok(Config { location, config }) } From 23ce370fa3607ab1fed4e60aebd6c0d2e6744376 Mon Sep 17 00:00:00 2001 From: sogaiu <983021772@users.noreply.github.com> Date: Mon, 16 Jan 2023 14:24:16 +0900 Subject: [PATCH 025/347] cli: Stop config.json search sooner if TREE_SITTER_DIR set --- cli/config/src/lib.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cli/config/src/lib.rs b/cli/config/src/lib.rs index 27b2dfb6..3cd09b8d 100644 --- a/cli/config/src/lib.rs +++ b/cli/config/src/lib.rs @@ -25,6 +25,9 @@ impl Config { if let Ok(path) = env::var("TREE_SITTER_DIR") { let mut path = PathBuf::from(path); path.push("config.json"); + if !path.exists() { + return Ok(None); + } if path.is_file() { return Ok(Some(path)); } From ae3b60bd29e301a99568f5c20d46e83bbccfda68 Mon Sep 17 00:00:00 2001 From: Matt <85322+mattmassicotte@users.noreply.github.com> Date: Tue, 17 Jan 2023 09:14:57 -0500 Subject: [PATCH 026/347] Include many new parsers in a unified list --- docs/index.md | 75 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 28 deletions(-) diff --git a/docs/index.md b/docs/index.md index 1c4f958a..6602c25e 100644 --- a/docs/index.md +++ b/docs/index.md @@ -27,77 +27,96 @@ There are currently bindings that allow Tree-sitter to be used from the followin * [Kotlin](https://github.com/oxisto/kotlintree) * [Java](https://github.com/serenadeai/java-tree-sitter) -### Available Parsers - -Parsers for these languages are fairly complete: +### Parsers +* [Agda](https://github.com/tree-sitter/tree-sitter-agda) * [Bash](https://github.com/tree-sitter/tree-sitter-bash) -* [C](https://github.com/tree-sitter/tree-sitter-c) * [C#](https://github.com/tree-sitter/tree-sitter-c-sharp) * [C++](https://github.com/tree-sitter/tree-sitter-cpp) +* [Clojure](https://github.com/sogaiu/tree-sitter-clojure) +* [CMake](https://github.com/uyha/tree-sitter-cmake) +* [Comment](https://github.com/stsewd/tree-sitter-comment) * [Common Lisp](https://github.com/theHamsta/tree-sitter-commonlisp) * [CSS](https://github.com/tree-sitter/tree-sitter-css) * [CUDA](https://github.com/theHamsta/tree-sitter-cuda) -* [D](https://github.com/gdamore/tree-sitter-d) +* [C](https://github.com/tree-sitter/tree-sitter-c) +* [Dart](https://github.com/UserNobody14/tree-sitter-dart) +* [Dockerfile](https://github.com/camdencheek/tree-sitter-dockerfile) +* [Dockerfile](https://github.com/camdencheek/tree-sitter-dockerfile) * [DOT](https://github.com/rydesun/tree-sitter-dot) +* [D](https://github.com/gdamore/tree-sitter-d) +* [Elixir](https://github.com/elixir-lang/tree-sitter-elixir) * [Elm](https://github.com/elm-tooling/tree-sitter-elm) * [Emacs Lisp](https://github.com/Wilfred/tree-sitter-elisp) * [Eno](https://github.com/eno-lang/tree-sitter-eno) * [ERB / EJS](https://github.com/tree-sitter/tree-sitter-embedded-template) * [Erlang](https://github.com/WhatsApp/tree-sitter-erlang/) * [Fennel](https://github.com/travonted/tree-sitter-fennel) +* [Fish](https://github.com/ram02z/tree-sitter-fish) +* [Fortran](https://github.com/stadelmanma/tree-sitter-fortran) +* [gitattributes](https://github.com/ObserverOfTime/tree-sitter-gitattributes) +* [gitignore](https://github.com/shunsambongi/tree-sitter-gitignore) * [GLSL (OpenGL Shading Language)](https://github.com/theHamsta/tree-sitter-glsl) +* [Go mod](https://github.com/camdencheek/tree-sitter-go-mod) +* [Go work](https://github.com/omertuc/tree-sitter-go-work) * [Go](https://github.com/tree-sitter/tree-sitter-go) +* [Graphql](https://github.com/bkegley/tree-sitter-graphql) +* [Hack](https://github.com/slackhq/tree-sitter-hack) +* [Haskell](https://github.com/tree-sitter/tree-sitter-haskell) * [HCL](https://github.com/MichaHoffmann/tree-sitter-hcl) * [HTML](https://github.com/tree-sitter/tree-sitter-html) -* [Java](https://github.com/tree-sitter/tree-sitter-java) * [JavaScript](https://github.com/tree-sitter/tree-sitter-javascript) +* [Java](https://github.com/tree-sitter/tree-sitter-java) +* [JSON5](https://github.com/Joakker/tree-sitter-json5) * [JSON](https://github.com/tree-sitter/tree-sitter-json) +* [Julia](https://github.com/tree-sitter/tree-sitter-julia) +* [Kotlin](https://github.com/fwcd/tree-sitter-kotlin) +* [Latex](https://github.com/latex-lsp/tree-sitter-latex) +* [LLVM](https://github.com/benwilliamgraham/tree-sitter-llvm) * [Lua](https://github.com/Azganoth/tree-sitter-lua) * [Make](https://github.com/alemuller/tree-sitter-make) * [Markdown](https://github.com/ikatyang/tree-sitter-markdown) +* [Markdown](https://github.com/MDeiml/tree-sitter-markdown) +* [Nix](https://github.com/cstrahan/tree-sitter-nix) +* [Objective-C](https://github.com/jiyee/tree-sitter-objc) * [OCaml](https://github.com/tree-sitter/tree-sitter-ocaml) +* [Org](https://github.com/milisims/tree-sitter-org) +* [Pascal](https://github.com/Isopod/tree-sitter-pascal) +* [Perl](https://github.com/ganezdragon/tree-sitter-perl) * [PHP](https://github.com/tree-sitter/tree-sitter-php) +* [PowerShell](https://github.com/PowerShell/tree-sitter-PowerShell) +* [Protocol Buffers](https://github.com/mitchellh/tree-sitter-proto) * [Python](https://github.com/tree-sitter/tree-sitter-python) +* [Racket](https://github.com/6cdh/tree-sitter-racket) +* [Regex](https://github.com/tree-sitter/tree-sitter-regex) +* [reStructuredText](https://github.com/stsewd/tree-sitter-rst) * [Ruby](https://github.com/tree-sitter/tree-sitter-ruby) * [Rust](https://github.com/tree-sitter/tree-sitter-rust) * [R](https://github.com/r-lib/tree-sitter-r) * [S-expressions](https://github.com/AbstractMachinesLab/tree-sitter-sexp) +* [Scala](https://github.com/tree-sitter/tree-sitter-scala) +* [Scheme](https://github.com/6cdh/tree-sitter-scheme) +* [Scss](https://github.com/serenadeai/tree-sitter-scss) +* [Sourcepawn](https://github.com/nilshelmig/tree-sitter-sourcepawn) * [SPARQL](https://github.com/BonaBeavis/tree-sitter-sparql) +* [SQL-PostgreSQL](https://github.com/m-novikov/tree-sitter-sql) +* [SQL-Sqlite](https://github.com/dhcmrlchtdj/tree-sitter-sqlite) +* [SSH](https://github.com/metio/tree-sitter-ssh-client-config) * [Svelte](https://github.com/Himujjal/tree-sitter-svelte) * [Swift](https://github.com/alex-pinkus/tree-sitter-swift) * [SystemRDL](https://github.com/SystemRDL/tree-sitter-systemrdl) * [TOML](https://github.com/ikatyang/tree-sitter-toml) +* [Tree-sitter query](https://github.com/nvim-treesitter/tree-sitter-query) * [Turtle](https://github.com/BonaBeavis/tree-sitter-turtle) * [Twig](https://github.com/gbprod/tree-sitter-twig) * [TypeScript](https://github.com/tree-sitter/tree-sitter-typescript) * [Verilog](https://github.com/tree-sitter/tree-sitter-verilog) * [VHDL](https://github.com/alemuller/tree-sitter-vhdl) * [Vue](https://github.com/ikatyang/tree-sitter-vue) -* [YAML](https://github.com/ikatyang/tree-sitter-yaml) * [WASM](https://github.com/wasm-lsp/tree-sitter-wasm) * [WGSL WebGPU Shading Language](https://github.com/mehmetoguzderin/tree-sitter-wgsl) - -Parsers for these languages are in development: - -* [Agda](https://github.com/tree-sitter/tree-sitter-agda) -* [Elixir](https://github.com/elixir-lang/tree-sitter-elixir) -* [Dockerfile](https://github.com/camdencheek/tree-sitter-dockerfile) -* [Go mod](https://github.com/camdencheek/tree-sitter-go-mod) -* [Hack](https://github.com/slackhq/tree-sitter-hack) -* [Haskell](https://github.com/tree-sitter/tree-sitter-haskell) -* [Julia](https://github.com/tree-sitter/tree-sitter-julia) -* [Kotlin](https://github.com/fwcd/tree-sitter-kotlin) -* [Nix](https://github.com/cstrahan/tree-sitter-nix) -* [Objective-C](https://github.com/jiyee/tree-sitter-objc) -* [Org](https://github.com/milisims/tree-sitter-org) -* [Perl](https://github.com/ganezdragon/tree-sitter-perl) -* [Protocol Buffers](https://github.com/mitchellh/tree-sitter-proto) -* [Racket](https://github.com/6cdh/tree-sitter-racket) -* [Scala](https://github.com/tree-sitter/tree-sitter-scala) -* [Sourcepawn](https://github.com/nilshelmig/tree-sitter-sourcepawn) -* [SQL](https://github.com/m-novikov/tree-sitter-sql) - +* [YAML](https://github.com/ikatyang/tree-sitter-yaml) +* [Zig](https://github.com/maxxnino/tree-sitter-zig) ### Talks on Tree-sitter From cca626afe786dd1c1c0bf3609f81621165dabac1 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Fri, 27 Jan 2023 07:49:07 +0200 Subject: [PATCH 027/347] docs: remove duplicated link --- docs/index.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/index.md b/docs/index.md index 6602c25e..097f0241 100644 --- a/docs/index.md +++ b/docs/index.md @@ -42,7 +42,6 @@ There are currently bindings that allow Tree-sitter to be used from the followin * [C](https://github.com/tree-sitter/tree-sitter-c) * [Dart](https://github.com/UserNobody14/tree-sitter-dart) * [Dockerfile](https://github.com/camdencheek/tree-sitter-dockerfile) -* [Dockerfile](https://github.com/camdencheek/tree-sitter-dockerfile) * [DOT](https://github.com/rydesun/tree-sitter-dot) * [D](https://github.com/gdamore/tree-sitter-d) * [Elixir](https://github.com/elixir-lang/tree-sitter-elixir) From 2e732d17ddfa0360668c029fac0f3c0b9bc083a5 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Fri, 27 Jan 2023 10:15:23 +0200 Subject: [PATCH 028/347] docs: merge of all binding and grammar link PRs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Joined commit messages: --- Closes: #2048 docs: add capnp, smali, thrift Closes: #2026 Add Perl binding Closes: #1989 Add Ada to the list of available parsers Closes: #1980 Add clojure to list of available parsers After some discussion in https://github.com/sogaiu/tree-sitter-clojure/issues/28 I decided to submit an issue to the tree-sitter organization to include tree-sitter-clojure in their list of available parsers. This repository is used by a couple of editors and tools, and I am using it to build a tree-sitter based programming mode for Clojure in Emacs. Since there are a couple of tools that depend on it, it seems worthy of inclusion in an official list of tree-sitter grammars. Closes: #1943 Add meson grammar to list. Closes: #1926 docs(meson): add parse for meson add meson Closes: #1912 Merge branch 'master' into add-heex-parser Closes: #1908 Add Apex to available parsers Closes: #1876 Add tree-sitter-gleam to list of available parsers Closes: #1862 DOCS: Add entries various SQL flavor parsers Closes: #1857 Add Go to the available language bindings Closes: #1856 docs: add jq grammar Closes: #1828 Add tree sitter lalrpop This is a parser for https://github.com/lalrpop/lalrpop, a parser generator for use in Rust. It is still in development because it should support some kind of highlighting of the pseudo rust code used in rules. Closes: #1733 Add YANG parser to parser list I wrote this parser over summer vacation last year and I've been using it at work every day since. Closes: #1728 Add link to m68k grammar Link to m68k (Motorola 68000 assembly) grammar in docs. Closes: #1697 Added rego language Closes: #1694 docs: add Formula parser Closes: #1672 docs: Add link to QML (Qt UI spec/programming language) parser Closes: #1671 parser: add beancount parser link Closes: #1623 Add link to llvm grammars Add links to LLVM IR, MIR and TableGen parsers. Closes: #1579 Add my Smali parser to the WIP list Closes: #1575 Update index.md Closes: #1563 Add re2c and sort alphabetically Added re2c parser (lexer generator). Sorted the list alphabetically case-insensitively. Closes: #1209 add link to chess Portable Game Notation grammar Closes: #1176 Add tree-sitter-lean. Closes: #1123 Add parsers to list --- Co-authored-by: Amaan Qureshi Co-authored-by: Felipe Gasper Co-authored-by: Emmanuel Briot Co-authored-by: Danny Freeman Co-authored-by: Garrett D'Amore Co-authored-by: ShootingStarDragons Co-authored-by: Clay Co-authored-by: Anthony Heber Co-authored-by: Jonathan Arnett Co-authored-by: takegue Co-authored-by: Cédric Fabianski Co-authored-by: flurie Co-authored-by: traxys Co-authored-by: Tomas Sandven <597206+Hubro@users.noreply.github.com> Co-authored-by: Graham Bates Co-authored-by: Lex Co-authored-by: Ben Siraphob Co-authored-by: Yuya Nishihara Co-authored-by: WeiZhang Co-authored-by: Flakebi Co-authored-by: Yotam Co-authored-by: Fymyte <34305318+Fymyte@users.noreply.github.com> Co-authored-by: Alexandre A. Muller Co-authored-by: Roland Walker Co-authored-by: Julian Berman Co-authored-by: Santos Gallegos --- docs/index.md | 51 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 11 deletions(-) diff --git a/docs/index.md b/docs/index.md index 097f0241..2e3b59ed 100644 --- a/docs/index.md +++ b/docs/index.md @@ -9,19 +9,22 @@ Tree-sitter is a parser generator tool and an incremental parsing library. It ca * **General** enough to parse any programming language * **Fast** enough to parse on every keystroke in a text editor * **Robust** enough to provide useful results even in the presence of syntax errors -* **Dependency-free** so that the runtime library (which is written in pure C) can be embedded in any application +* **Dependency-free** so that the runtime library (which is written in pure [C](https://github.com/tree-sitter/tree-sitter/tree/master/lib)) can be embedded in any application ### Language Bindings There are currently bindings that allow Tree-sitter to be used from the following languages: +* [Go](https://github.com/smacker/go-tree-sitter) * [Haskell](https://github.com/tree-sitter/haskell-tree-sitter) * [JavaScript (Node.js)](https://github.com/tree-sitter/node-tree-sitter) * [JavaScript (Wasm)](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web) * [Lua](https://github.com/euclidianAce/ltreesitter) * [OCaml](https://github.com/returntocorp/ocaml-tree-sitter-core) +* [Perl](https://metacpan.org/pod/Text::Treesitter) * [Python](https://github.com/tree-sitter/py-tree-sitter) * [Ruby](https://github.com/tree-sitter/ruby-tree-sitter) +* [Ruby](https://github.com/calicoday/ruby-tree-sitter-ffi) * [Rust](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust) * [Swift](https://github.com/ChimeHQ/SwiftTreeSitter) * [Kotlin](https://github.com/oxisto/kotlintree) @@ -29,21 +32,25 @@ There are currently bindings that allow Tree-sitter to be used from the followin ### Parsers +* [Ada](https://github.com/briot/tree-sitter-ada) * [Agda](https://github.com/tree-sitter/tree-sitter-agda) +* [Apex](https://github.com/aheber/tree-sitter-sfapex) * [Bash](https://github.com/tree-sitter/tree-sitter-bash) -* [C#](https://github.com/tree-sitter/tree-sitter-c-sharp) +* [Beancount](https://github.com/zwpaper/tree-sitter-beancount) +* [Cap'n Proto](https://github.com/amaanq/tree-sitter-capnp) +* [C](https://github.com/tree-sitter/tree-sitter-c) * [C++](https://github.com/tree-sitter/tree-sitter-cpp) +* [C#](https://github.com/tree-sitter/tree-sitter-c-sharp) * [Clojure](https://github.com/sogaiu/tree-sitter-clojure) * [CMake](https://github.com/uyha/tree-sitter-cmake) * [Comment](https://github.com/stsewd/tree-sitter-comment) * [Common Lisp](https://github.com/theHamsta/tree-sitter-commonlisp) * [CSS](https://github.com/tree-sitter/tree-sitter-css) * [CUDA](https://github.com/theHamsta/tree-sitter-cuda) -* [C](https://github.com/tree-sitter/tree-sitter-c) * [Dart](https://github.com/UserNobody14/tree-sitter-dart) +* [D](https://github.com/gdamore/tree-sitter-d) * [Dockerfile](https://github.com/camdencheek/tree-sitter-dockerfile) * [DOT](https://github.com/rydesun/tree-sitter-dot) -* [D](https://github.com/gdamore/tree-sitter-d) * [Elixir](https://github.com/elixir-lang/tree-sitter-elixir) * [Elm](https://github.com/elm-tooling/tree-sitter-elm) * [Emacs Lisp](https://github.com/Wilfred/tree-sitter-elisp) @@ -52,60 +59,81 @@ There are currently bindings that allow Tree-sitter to be used from the followin * [Erlang](https://github.com/WhatsApp/tree-sitter-erlang/) * [Fennel](https://github.com/travonted/tree-sitter-fennel) * [Fish](https://github.com/ram02z/tree-sitter-fish) +* [Formula](https://github.com/siraben/tree-sitter-formula) * [Fortran](https://github.com/stadelmanma/tree-sitter-fortran) * [gitattributes](https://github.com/ObserverOfTime/tree-sitter-gitattributes) * [gitignore](https://github.com/shunsambongi/tree-sitter-gitignore) +* [Gleam](https://github.com/gleam-lang/tree-sitter-gleam) * [GLSL (OpenGL Shading Language)](https://github.com/theHamsta/tree-sitter-glsl) +* [Go](https://github.com/tree-sitter/tree-sitter-go) * [Go mod](https://github.com/camdencheek/tree-sitter-go-mod) * [Go work](https://github.com/omertuc/tree-sitter-go-work) -* [Go](https://github.com/tree-sitter/tree-sitter-go) * [Graphql](https://github.com/bkegley/tree-sitter-graphql) * [Hack](https://github.com/slackhq/tree-sitter-hack) * [Haskell](https://github.com/tree-sitter/tree-sitter-haskell) * [HCL](https://github.com/MichaHoffmann/tree-sitter-hcl) * [HTML](https://github.com/tree-sitter/tree-sitter-html) -* [JavaScript](https://github.com/tree-sitter/tree-sitter-javascript) * [Java](https://github.com/tree-sitter/tree-sitter-java) +* [JavaScript](https://github.com/tree-sitter/tree-sitter-javascript) +* [jq](https://github.com/flurie/tree-sitter-jq) * [JSON5](https://github.com/Joakker/tree-sitter-json5) * [JSON](https://github.com/tree-sitter/tree-sitter-json) * [Julia](https://github.com/tree-sitter/tree-sitter-julia) * [Kotlin](https://github.com/fwcd/tree-sitter-kotlin) +* [LALRPOP](https://github.com/traxys/tree-sitter-lalrpop) * [Latex](https://github.com/latex-lsp/tree-sitter-latex) +* [Lean](https://github.com/Julian/tree-sitter-lean) * [LLVM](https://github.com/benwilliamgraham/tree-sitter-llvm) +* [LLVM MachineIR](https://github.com/Flakebi/tree-sitter-llvm-mir) +* [LLVM TableGen](https://github.com/Flakebi/tree-sitter-tablegen) * [Lua](https://github.com/Azganoth/tree-sitter-lua) * [Make](https://github.com/alemuller/tree-sitter-make) * [Markdown](https://github.com/ikatyang/tree-sitter-markdown) * [Markdown](https://github.com/MDeiml/tree-sitter-markdown) +* [Meson](https://github.com/Decodetalkers/tree-sitter-meson) +* [Meson](https://github.com/staysail/tree-sitter-meson) +* [Motorola 68000 Assembly](https://github.com/grahambates/tree-sitter-m68k) * [Nix](https://github.com/cstrahan/tree-sitter-nix) * [Objective-C](https://github.com/jiyee/tree-sitter-objc) * [OCaml](https://github.com/tree-sitter/tree-sitter-ocaml) * [Org](https://github.com/milisims/tree-sitter-org) * [Pascal](https://github.com/Isopod/tree-sitter-pascal) * [Perl](https://github.com/ganezdragon/tree-sitter-perl) +* [Perl](https://github.com/tree-sitter-perl/tree-sitter-perl) +* [Perl POD](https://github.com/tree-sitter-perl/tree-sitter-pod) * [PHP](https://github.com/tree-sitter/tree-sitter-php) +* [Portable Game Notation](https://github.com/rolandwalker/tree-sitter-pgn) * [PowerShell](https://github.com/PowerShell/tree-sitter-PowerShell) * [Protocol Buffers](https://github.com/mitchellh/tree-sitter-proto) * [Python](https://github.com/tree-sitter/tree-sitter-python) +* [QML](https://github.com/yuja/tree-sitter-qmljs) * [Racket](https://github.com/6cdh/tree-sitter-racket) +* [Rasi](https://github.com/Fymyte/tree-sitter-rasi) +* [re2c](https://github.com/alemuller/tree-sitter-re2c) * [Regex](https://github.com/tree-sitter/tree-sitter-regex) +* [Rego](https://github.com/FallenAngel97/tree-sitter-rego) * [reStructuredText](https://github.com/stsewd/tree-sitter-rst) +* [R](https://github.com/r-lib/tree-sitter-r) * [Ruby](https://github.com/tree-sitter/tree-sitter-ruby) * [Rust](https://github.com/tree-sitter/tree-sitter-rust) -* [R](https://github.com/r-lib/tree-sitter-r) -* [S-expressions](https://github.com/AbstractMachinesLab/tree-sitter-sexp) * [Scala](https://github.com/tree-sitter/tree-sitter-scala) * [Scheme](https://github.com/6cdh/tree-sitter-scheme) * [Scss](https://github.com/serenadeai/tree-sitter-scss) +* [S-expressions](https://github.com/AbstractMachinesLab/tree-sitter-sexp) +* [Smali](https://github.com/amaanq/tree-sitter-smali) +* [Smali](https://git.sr.ht/~yotam/tree-sitter-smali) * [Sourcepawn](https://github.com/nilshelmig/tree-sitter-sourcepawn) * [SPARQL](https://github.com/BonaBeavis/tree-sitter-sparql) -* [SQL-PostgreSQL](https://github.com/m-novikov/tree-sitter-sql) -* [SQL-Sqlite](https://github.com/dhcmrlchtdj/tree-sitter-sqlite) +* [SQL - BigQuery](https://github.com/takegue/tree-sitter-sql-bigquery) +* [SQL - PostgreSQL](https://github.com/m-novikov/tree-sitter-sql) +* [SQL - SQLite](https://github.com/dhcmrlchtdj/tree-sitter-sqlite) * [SSH](https://github.com/metio/tree-sitter-ssh-client-config) * [Svelte](https://github.com/Himujjal/tree-sitter-svelte) * [Swift](https://github.com/alex-pinkus/tree-sitter-swift) * [SystemRDL](https://github.com/SystemRDL/tree-sitter-systemrdl) +* [Thrift](https://github.com/duskmoon314/tree-sitter-thrift) * [TOML](https://github.com/ikatyang/tree-sitter-toml) -* [Tree-sitter query](https://github.com/nvim-treesitter/tree-sitter-query) +* [Tree-sitter Query](https://github.com/nvim-treesitter/tree-sitter-query) * [Turtle](https://github.com/BonaBeavis/tree-sitter-turtle) * [Twig](https://github.com/gbprod/tree-sitter-twig) * [TypeScript](https://github.com/tree-sitter/tree-sitter-typescript) @@ -115,6 +143,7 @@ There are currently bindings that allow Tree-sitter to be used from the followin * [WASM](https://github.com/wasm-lsp/tree-sitter-wasm) * [WGSL WebGPU Shading Language](https://github.com/mehmetoguzderin/tree-sitter-wgsl) * [YAML](https://github.com/ikatyang/tree-sitter-yaml) +* [YANG](https://github.com/Hubro/tree-sitter-yang) * [Zig](https://github.com/maxxnino/tree-sitter-zig) ### Talks on Tree-sitter From 5766b8a0a785ea34fceb479a94f7fe24c9daae2f Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Mon, 6 Feb 2023 21:33:50 +0200 Subject: [PATCH 029/347] docs: apply `scheme` marker for all query syntax snippets --- docs/section-2-using-parsers.md | 40 ++++++++++++++++----------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/docs/section-2-using-parsers.md b/docs/section-2-using-parsers.md index ac3f968f..ea32f4f5 100644 --- a/docs/section-2-using-parsers.md +++ b/docs/section-2-using-parsers.md @@ -442,13 +442,13 @@ Many code analysis tasks involve searching for patterns in syntax trees. Tree-si A _query_ consists of one or more _patterns_, where each pattern is an [S-expression](https://en.wikipedia.org/wiki/S-expression) that matches a certain set of nodes in a syntax tree. The expression to match a given node consists of a pair of parentheses containing two things: the node's type, and optionally, a series of other S-expressions that match the node's children. For example, this pattern would match any `binary_expression` node whose children are both `number_literal` nodes: -``` +``` scheme (binary_expression (number_literal) (number_literal)) ``` Children can also be omitted. For example, this would match any `binary_expression` where at least _one_ of child is a `string_literal` node: -``` +``` scheme (binary_expression (string_literal)) ``` @@ -456,7 +456,7 @@ Children can also be omitted. For example, this would match any `binary_expressi In general, it's a good idea to make patterns more specific by specifying [field names](#node-field-names) associated with child nodes. You do this by prefixing a child pattern with a field name followed by a colon. For example, this pattern would match an `assignment_expression` node where the `left` child is a `member_expression` whose `object` is a `call_expression`. -``` +``` scheme (assignment_expression left: (member_expression object: (call_expression))) @@ -466,7 +466,7 @@ In general, it's a good idea to make patterns more specific by specifying [field You can also constrain a pattern so that it only matches nodes that *lack* a certain field. To do this, add a field name prefixed by a `!` within the parent pattern. For example, this pattern would match a class declaration with no type parameters: -``` +``` scheme (class_declaration name: (identifier) @class_name !type_parameters) @@ -476,7 +476,7 @@ You can also constrain a pattern so that it only matches nodes that *lack* a cer The parenthesized syntax for writing nodes only applies to [named nodes](#named-vs-anonymous-nodes). To match specific anonymous nodes, you write their name between double quotes. For example, this pattern would match any `binary_expression` where the operator is `!=` and the right side is `null`: -``` +``` scheme (binary_expression operator: "!=" right: (null)) @@ -488,7 +488,7 @@ When matching patterns, you may want to process specific nodes within the patter For example, this pattern would match any assignment of a `function` to an `identifier`, and it would associate the name `the-function-name` with the identifier: -``` +``` scheme (assignment_expression left: (identifier) @the-function-name right: (function)) @@ -496,7 +496,7 @@ For example, this pattern would match any assignment of a `function` to an `iden And this pattern would match all method definitions, associating the name `the-method-name` with the method name, `the-class-name` with the containing class name: -``` +``` scheme (class_declaration name: (identifier) @the-class-name body: (class_body @@ -510,13 +510,13 @@ You can match a repeating sequence of sibling nodes using the postfix `+` and `* For example, this pattern would match a sequence of one or more comments: -``` +``` scheme (comment)+ ``` This pattern would match a class declaration, capturing all of the decorators if any were present: -``` +``` scheme (class_declaration (decorator)* @the-decorator name: (identifier) @the-name) @@ -524,7 +524,7 @@ This pattern would match a class declaration, capturing all of the decorators if You can also mark a node as optional using the `?` operator. For example, this pattern would match all function calls, capturing a string argument if one was present: -``` +``` scheme (call_expression function: (identifier) @the-function arguments: (arguments (string)? @the-string-arg)) @@ -534,7 +534,7 @@ You can also mark a node as optional using the `?` operator. For example, this p You can also use parentheses for grouping a sequence of _sibling_ nodes. For example, this pattern would match a comment followed by a function declaration: -``` +``` scheme ( (comment) (function_declaration) @@ -543,7 +543,7 @@ You can also use parentheses for grouping a sequence of _sibling_ nodes. For exa Any of the quantification operators mentioned above (`+`, `*`, and `?`) can also be applied to groups. For example, this pattern would match a comma-separated series of numbers: -``` +``` scheme ( (number) ("," (number))* @@ -558,7 +558,7 @@ This is similar to _character classes_ from regular expressions (`[abc]` matches For example, this pattern would match a call to either a variable or an object property. In the case of a variable, capture it as `@function`, and in the case of a property, capture it as `@method`: -``` +``` scheme (call_expression function: [ (identifier) @function @@ -569,7 +569,7 @@ In the case of a variable, capture it as `@function`, and in the case of a prope This pattern would match a set of possible keyword tokens, capturing them as `@keyword`: -``` +``` scheme [ "break" "delete" @@ -592,7 +592,7 @@ and `_` will match any named or anonymous node. For example, this pattern would match any node inside a call: -``` +``` scheme (call (_) @call.inner) ``` @@ -602,7 +602,7 @@ The anchor operator, `.`, is used to constrain the ways in which child patterns When `.` is placed before the _first_ child within a parent pattern, the child will only match when it is the first named node in the parent. For example, the below pattern matches a given `array` node at most once, assigning the `@the-element` capture to the first `identifier` node in the parent `array`: -``` +``` scheme (array . (identifier) @the-element) ``` @@ -610,13 +610,13 @@ Without this anchor, the pattern would match once for every identifier in the ar Similarly, an anchor placed after a pattern's _last_ child will cause that child pattern to only match nodes that are the last named child of their parent. The below pattern matches only nodes that are the last named child within a `block`. -``` +``` scheme (block (_) @last-expression .) ``` Finally, an anchor _between_ two child patterns will cause the patterns to only match nodes that are immediate siblings. The pattern below, given a long dotted name like `a.b.c.d`, will only match pairs of consecutive identifiers: `a, b`, `b, c`, and `c, d`. -``` +``` scheme (dotted_name (identifier) @prev-id . @@ -633,7 +633,7 @@ You can also specify arbitrary metadata and conditions associated with a pattern For example, this pattern would match identifier whose names is written in `SCREAMING_SNAKE_CASE`: -``` +``` scheme ( (identifier) @constant (#match? @constant "^[A-Z][A-Z_]+") @@ -642,7 +642,7 @@ For example, this pattern would match identifier whose names is written in `SCRE And this pattern would match key-value pairs where the `value` is an identifier with the same name as the key: -``` +``` scheme ( (pair key: (property_identifier) @key-name From 3964651fc69cdf4ac060751464268fad41aca421 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Gaspard?= Date: Thu, 9 Feb 2023 03:35:42 +0100 Subject: [PATCH 030/347] Derive Hash for Language Given Language implements Eq, it can probably implement Hash too. This makes it easy to derive a random identifier from a `Language` --- lib/binding_rust/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 870d177b..84471d06 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -38,7 +38,7 @@ pub const PARSER_HEADER: &'static str = include_str!("../include/tree_sitter/par /// An opaque object that defines how to parse a particular language. The code for each /// `Language` is generated by the Tree-sitter CLI. #[doc(alias = "TSLanguage")] -#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] #[repr(transparent)] pub struct Language(*const ffi::TSLanguage); From 97fd990822deeb3c288f4999a8410fba69f230b6 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 13 Feb 2023 12:33:34 -0800 Subject: [PATCH 031/347] Add --dot flag to parse subcommand, for printing tree as DOT graph --- cli/src/main.rs | 20 ++++++--- cli/src/parse.rs | 19 +++++++-- cli/src/util.rs | 80 ++++++++++++++++++++++------------- lib/binding_rust/bindings.rs | 7 +++ lib/binding_rust/lib.rs | 10 +++++ lib/include/tree_sitter/api.h | 2 +- lib/src/clock.h | 1 + lib/src/tree.c | 5 ++- lib/src/tree.h | 2 + script/generate-bindings | 3 -- 10 files changed, 106 insertions(+), 43 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index e0625708..47e7597b 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -3,6 +3,7 @@ use clap::{App, AppSettings, Arg, SubCommand}; use glob::glob; use std::path::{Path, PathBuf}; use std::{env, fs, u64}; +use tree_sitter_cli::parse::ParseOutput; use tree_sitter_cli::{ generate, highlight, logger, parse, playground, query, tags, test, test_highlight, test_tags, util, wasm, @@ -136,7 +137,8 @@ fn run() -> Result<()> { .arg(&debug_arg) .arg(&debug_build_arg) .arg(&debug_graph_arg) - .arg(Arg::with_name("debug-xml").long("xml").short("x")) + .arg(Arg::with_name("output-dot").long("dot")) + .arg(Arg::with_name("output-xml").long("xml").short("x")) .arg( Arg::with_name("stat") .help("Show parsing statistic") @@ -377,8 +379,17 @@ fn run() -> Result<()> { let debug = matches.is_present("debug"); let debug_graph = matches.is_present("debug-graph"); let debug_build = matches.is_present("debug-build"); - let debug_xml = matches.is_present("debug-xml"); - let quiet = matches.is_present("quiet"); + + let output = if matches.is_present("output-dot") { + ParseOutput::Dot + } else if matches.is_present("output-xml") { + ParseOutput::Xml + } else if matches.is_present("quiet") { + ParseOutput::Quiet + } else { + ParseOutput::Normal + }; + let time = matches.is_present("time"); let edits = matches .values_of("edits") @@ -416,12 +427,11 @@ fn run() -> Result<()> { path, &edits, max_path_length, - quiet, + output, time, timeout, debug, debug_graph, - debug_xml, Some(&cancellation_flag), )?; diff --git a/cli/src/parse.rs b/cli/src/parse.rs index 15a9d4c9..3e28e51a 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -30,17 +30,24 @@ impl fmt::Display for Stats { } } +#[derive(Copy, Clone)] +pub enum ParseOutput { + Normal, + Quiet, + Xml, + Dot, +} + pub fn parse_file_at_path( language: Language, path: &Path, edits: &Vec<&str>, max_path_length: usize, - quiet: bool, + output: ParseOutput, print_time: bool, timeout: u64, debug: bool, debug_graph: bool, - debug_xml: bool, cancellation_flag: Option<&AtomicUsize>, ) -> Result { let mut _log_session = None; @@ -95,7 +102,7 @@ pub fn parse_file_at_path( let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000; let mut cursor = tree.walk(); - if !quiet { + if matches!(output, ParseOutput::Normal) { let mut needs_newline = false; let mut indent_level = 0; let mut did_visit_children = false; @@ -151,7 +158,7 @@ pub fn parse_file_at_path( println!(""); } - if debug_xml { + if matches!(output, ParseOutput::Xml) { let mut needs_newline = false; let mut indent_level = 0; let mut did_visit_children = false; @@ -206,6 +213,10 @@ pub fn parse_file_at_path( println!(""); } + if matches!(output, ParseOutput::Dot) { + util::print_tree_graph(&tree, "log.html").unwrap(); + } + let mut first_error = None; loop { let node = cursor.node(); diff --git a/cli/src/util.rs b/cli/src/util.rs index acd8acbf..2b7405fd 100644 --- a/cli/src/util.rs +++ b/cli/src/util.rs @@ -3,7 +3,7 @@ use std::io; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; use std::thread; -use tree_sitter::Parser; +use tree_sitter::{Parser, Tree}; #[cfg(unix)] use anyhow::{anyhow, Context}; @@ -29,39 +29,61 @@ pub fn cancel_on_stdin() -> Arc { } result } + #[cfg(windows)] -pub struct LogSession(); +pub struct LogSession; #[cfg(unix)] -pub struct LogSession(PathBuf, Option, Option); +pub struct LogSession { + path: PathBuf, + dot_process: Option, + dot_process_stdin: Option, +} + +pub fn print_tree_graph(tree: &Tree, path: &str) -> Result<()> { + let session = LogSession::new(path)?; + tree.print_dot_graph(session.dot_process_stdin.as_ref().unwrap()); + Ok(()) +} + +pub fn log_graphs(parser: &mut Parser, path: &str) -> Result { + let session = LogSession::new(path)?; + parser.print_dot_graphs(session.dot_process_stdin.as_ref().unwrap()); + Ok(session) +} #[cfg(windows)] -pub fn log_graphs(_parser: &mut Parser, _path: &str) -> Result { - Ok(LogSession()) +impl LogSession { + fn new(path: &str) -> Result { + Ok(Self) + } } #[cfg(unix)] -pub fn log_graphs(parser: &mut Parser, path: &str) -> Result { - use std::io::Write; +impl LogSession { + fn new(path: &str) -> Result { + use std::io::Write; - let mut dot_file = std::fs::File::create(path)?; - dot_file.write(HTML_HEADER)?; - let mut dot_process = Command::new("dot") - .arg("-Tsvg") - .stdin(Stdio::piped()) - .stdout(dot_file) - .spawn() - .with_context(|| "Failed to run the `dot` command. Check that graphviz is installed.")?; - let dot_stdin = dot_process - .stdin - .take() - .ok_or_else(|| anyhow!("Failed to open stdin for `dot` process."))?; - parser.print_dot_graphs(&dot_stdin); - Ok(LogSession( - PathBuf::from(path), - Some(dot_process), - Some(dot_stdin), - )) + let mut dot_file = std::fs::File::create(path)?; + dot_file.write(HTML_HEADER)?; + let mut dot_process = Command::new("dot") + .arg("-Tsvg") + .stdin(Stdio::piped()) + .stdout(dot_file) + .spawn() + .with_context(|| { + "Failed to run the `dot` command. Check that graphviz is installed." + })?; + let dot_stdin = dot_process + .stdin + .take() + .ok_or_else(|| anyhow!("Failed to open stdin for `dot` process."))?; + Ok(Self { + path: PathBuf::from(path), + dot_process: Some(dot_process), + dot_process_stdin: Some(dot_stdin), + }) + } } #[cfg(unix)] @@ -69,13 +91,13 @@ impl Drop for LogSession { fn drop(&mut self) { use std::fs; - drop(self.2.take().unwrap()); - let output = self.1.take().unwrap().wait_with_output().unwrap(); + drop(self.dot_process_stdin.take().unwrap()); + let output = self.dot_process.take().unwrap().wait_with_output().unwrap(); if output.status.success() { if cfg!(target_os = "macos") - && fs::metadata(&self.0).unwrap().len() > HTML_HEADER.len() as u64 + && fs::metadata(&self.path).unwrap().len() > HTML_HEADER.len() as u64 { - Command::new("open").arg(&self.0).output().unwrap(); + Command::new("open").arg(&self.path).output().unwrap(); } } else { eprintln!( diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index b4ec9bed..4591a380 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -346,6 +346,9 @@ extern "C" { pub fn ts_tree_language(arg1: *const TSTree) -> *const TSLanguage; } extern "C" { + #[doc = " Get the array of included ranges that was used to parse the syntax tree."] + #[doc = ""] + #[doc = " The returned pointer must be freed by the caller."] pub fn ts_tree_included_ranges(arg1: *const TSTree, length: *mut u32) -> *mut TSRange; } extern "C" { @@ -375,6 +378,10 @@ extern "C" { length: *mut u32, ) -> *mut TSRange; } +extern "C" { + #[doc = " Write a DOT graph describing the syntax tree to the given file."] + pub fn ts_tree_print_dot_graph(arg1: *const TSTree, file_descriptor: ::std::os::raw::c_int); +} extern "C" { #[doc = " Get the node's type as a null-terminated string."] pub fn ts_node_type(arg1: TSNode) -> *const ::std::os::raw::c_char; diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 84471d06..6f044cca 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -775,6 +775,16 @@ impl Tree { result } } + + /// Print a graph of the tree to the given file descriptor. + /// The graph is formatted in the DOT language. You may want to pipe this graph + /// directly to a `dot(1)` process in order to generate SVG output. + #[cfg(unix)] + #[doc(alias = "ts_tree_print_dot_graph")] + pub fn print_dot_graph(&self, file: &impl AsRawFd) { + let fd = file.as_raw_fd(); + unsafe { ffi::ts_tree_print_dot_graph(self.0.as_ptr(), fd) } + } } impl fmt::Debug for Tree { diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index bc05bc3c..5b48cf60 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -420,7 +420,7 @@ TSRange *ts_tree_get_changed_ranges( /** * Write a DOT graph describing the syntax tree to the given file. */ -void ts_tree_print_dot_graph(const TSTree *, FILE *); +void ts_tree_print_dot_graph(const TSTree *, int file_descriptor); /******************/ /* Section - Node */ diff --git a/lib/src/clock.h b/lib/src/clock.h index e6faa040..6e75729e 100644 --- a/lib/src/clock.h +++ b/lib/src/clock.h @@ -1,6 +1,7 @@ #ifndef TREE_SITTER_CLOCK_H_ #define TREE_SITTER_CLOCK_H_ +#include #include typedef uint64_t TSDuration; diff --git a/lib/src/tree.c b/lib/src/tree.c index f6bd2c72..bee2a3de 100644 --- a/lib/src/tree.c +++ b/lib/src/tree.c @@ -1,3 +1,4 @@ +#include #include "tree_sitter/api.h" #include "./array.h" #include "./get_changed_ranges.h" @@ -123,6 +124,8 @@ TSRange *ts_tree_get_changed_ranges(const TSTree *self, const TSTree *other, uin return result; } -void ts_tree_print_dot_graph(const TSTree *self, FILE *file) { +void ts_tree_print_dot_graph(const TSTree *self, int fd) { + FILE *file = fdopen(dup(fd), "a"); ts_subtree_print_dot_graph(self->root, self->language, file); + fclose(file); } diff --git a/lib/src/tree.h b/lib/src/tree.h index 0334b824..f012f888 100644 --- a/lib/src/tree.h +++ b/lib/src/tree.h @@ -1,6 +1,8 @@ #ifndef TREE_SITTER_TREE_H_ #define TREE_SITTER_TREE_H_ +#include "./subtree.h" + #ifdef __cplusplus extern "C" { #endif diff --git a/script/generate-bindings b/script/generate-bindings index 4b3fb951..54abac06 100755 --- a/script/generate-bindings +++ b/script/generate-bindings @@ -7,10 +7,7 @@ bindgen \ --no-layout-tests \ --whitelist-type '^TS.*' \ --whitelist-function '^ts_.*' \ - --opaque-type FILE \ - --blocklist-type FILE \ --blocklist-type '^__.*' \ - --blocklist-function ts_tree_print_dot_graph \ --size_t-is-usize \ $header_path > $output_path From 0b817a609f7cd3d7309a81dbfe96287c6945a085 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 13 Feb 2023 13:45:12 -0800 Subject: [PATCH 032/347] Clear the parse stack when terminating parsing early due to error cost This fixes a bug where the parse tree would not be rebalanced if this code path was taken. --- lib/src/parser.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/src/parser.c b/lib/src/parser.c index f186ce33..f84b3c8d 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -1941,8 +1941,16 @@ TSTree *ts_parser_parse( } } + // After advancing each version of the stack, re-sort the versions by their cost, + // removing any versions that are no longer worth pursuing. unsigned min_error_cost = ts_parser__condense_stack(self); + + // If there's already a finished parse tree that's better than any in-progress version, + // then terminate parsing. Clear the parse stack to remove any extra references to subtrees + // within the finished tree, ensuring that these subtrees can be safely mutated in-place + // for rebalancing. if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) < min_error_cost) { + ts_stack_clear(self->stack); break; } From 450c67484bcf2836807b4c6a9f128df2149d47d6 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 13 Feb 2023 23:18:35 -0800 Subject: [PATCH 033/347] For now, don't implement ts_tree_print_dot_graph on windows --- lib/src/tree.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/lib/src/tree.c b/lib/src/tree.c index bee2a3de..79e1d1ae 100644 --- a/lib/src/tree.c +++ b/lib/src/tree.c @@ -1,4 +1,3 @@ -#include #include "tree_sitter/api.h" #include "./array.h" #include "./get_changed_ranges.h" @@ -124,8 +123,21 @@ TSRange *ts_tree_get_changed_ranges(const TSTree *self, const TSTree *other, uin return result; } +#ifdef _WIN32 + +void ts_tree_print_dot_graph(const TSTree *self, int fd) { + (void)self; + (void)fd; +} + +#else + +#include + void ts_tree_print_dot_graph(const TSTree *self, int fd) { FILE *file = fdopen(dup(fd), "a"); ts_subtree_print_dot_graph(self->root, self->language, file); fclose(file); } + +#endif From 125503ff3b613b08233fc1e06292be9ddd9dd448 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 13 Feb 2023 23:23:47 -0800 Subject: [PATCH 034/347] Fix CLI build on windows --- cli/src/util.rs | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/cli/src/util.rs b/cli/src/util.rs index 2b7405fd..d180cd54 100644 --- a/cli/src/util.rs +++ b/cli/src/util.rs @@ -13,7 +13,14 @@ use std::path::PathBuf; use std::process::{Child, ChildStdin, Command, Stdio}; #[cfg(unix)] -const HTML_HEADER: &[u8] = b"\n\n\n"; +const HTML_HEADER: &[u8] = b" + + + + +"; pub fn cancel_on_stdin() -> Arc { let result = Arc::new(AtomicUsize::new(0)); @@ -40,25 +47,30 @@ pub struct LogSession { dot_process_stdin: Option, } +#[cfg(windows)] +pub fn print_tree_graph(_tree: &Tree, _path: &str) -> Result<()> { + Ok(()) +} + +#[cfg(windows)] +pub fn log_graphs(_parser: &mut Parser, _path: &str) -> Result { + Ok(LogSession) +} + +#[cfg(unix)] pub fn print_tree_graph(tree: &Tree, path: &str) -> Result<()> { let session = LogSession::new(path)?; tree.print_dot_graph(session.dot_process_stdin.as_ref().unwrap()); Ok(()) } +#[cfg(unix)] pub fn log_graphs(parser: &mut Parser, path: &str) -> Result { let session = LogSession::new(path)?; parser.print_dot_graphs(session.dot_process_stdin.as_ref().unwrap()); Ok(session) } -#[cfg(windows)] -impl LogSession { - fn new(path: &str) -> Result { - Ok(Self) - } -} - #[cfg(unix)] impl LogSession { fn new(path: &str) -> Result { From ff2436a6f8639b290e4395ca2b44491472647a2b Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 14 Feb 2023 14:41:25 -0800 Subject: [PATCH 035/347] Add --row-range, --quiet, and --time flags to query subcommand --- cli/src/main.rs | 30 ++++++++++++++++--- cli/src/query.rs | 77 +++++++++++++++++++++++++++++------------------- 2 files changed, 73 insertions(+), 34 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index 47e7597b..fb2a6327 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -3,6 +3,7 @@ use clap::{App, AppSettings, Arg, SubCommand}; use glob::glob; use std::path::{Path, PathBuf}; use std::{env, fs, u64}; +use tree_sitter::Point; use tree_sitter_cli::parse::ParseOutput; use tree_sitter_cli::{ generate, highlight, logger, parse, playground, query, tags, test, test_highlight, test_tags, @@ -173,6 +174,8 @@ fn run() -> Result<()> { .index(1) .required(true), ) + .arg(&time_arg) + .arg(&quiet_arg) .arg(&paths_file_arg) .arg(&paths_arg.clone().index(2)) .arg( @@ -181,6 +184,12 @@ fn run() -> Result<()> { .long("byte-range") .takes_value(true), ) + .arg( + Arg::with_name("row-range") + .help("The range of rows in which the query will be executed") + .long("row-range") + .takes_value(true), + ) .arg(&scope_arg) .arg(Arg::with_name("captures").long("captures").short("c")) .arg(Arg::with_name("test").long("test")), @@ -456,6 +465,8 @@ fn run() -> Result<()> { ("query", Some(matches)) => { let ordered_captures = matches.values_of("captures").is_some(); + let quiet = matches.values_of("quiet").is_some(); + let time = matches.values_of("time").is_some(); let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?; let loader_config = config.get()?; loader.find_all_languages(&loader_config)?; @@ -465,9 +476,17 @@ fn run() -> Result<()> { matches.value_of("scope"), )?; let query_path = Path::new(matches.value_of("query-path").unwrap()); - let range = matches.value_of("byte-range").map(|br| { - let r: Vec<&str> = br.split(":").collect(); - r[0].parse().unwrap()..r[1].parse().unwrap() + let byte_range = matches.value_of("byte-range").and_then(|arg| { + let mut parts = arg.split(":"); + let start = parts.next()?.parse().ok()?; + let end = parts.next().unwrap().parse().ok()?; + Some(start..end) + }); + let point_range = matches.value_of("row-range").and_then(|arg| { + let mut parts = arg.split(":"); + let start = parts.next()?.parse().ok()?; + let end = parts.next().unwrap().parse().ok()?; + Some(Point::new(start, 0)..Point::new(end, 0)) }); let should_test = matches.is_present("test"); query::query_files_at_paths( @@ -475,8 +494,11 @@ fn run() -> Result<()> { paths, query_path, ordered_captures, - range, + byte_range, + point_range, should_test, + quiet, + time, )?; } diff --git a/cli/src/query.rs b/cli/src/query.rs index 73d6dd28..fc24cb05 100644 --- a/cli/src/query.rs +++ b/cli/src/query.rs @@ -5,16 +5,20 @@ use std::{ io::{self, Write}, ops::Range, path::Path, + time::Instant, }; -use tree_sitter::{Language, Parser, Query, QueryCursor}; +use tree_sitter::{Language, Parser, Point, Query, QueryCursor}; pub fn query_files_at_paths( language: Language, paths: Vec, query_path: &Path, ordered_captures: bool, - range: Option>, + byte_range: Option>, + point_range: Option>, should_test: bool, + quiet: bool, + print_time: bool, ) -> Result<()> { let stdout = io::stdout(); let mut stdout = stdout.lock(); @@ -24,9 +28,12 @@ pub fn query_files_at_paths( let query = Query::new(language, &query_source).with_context(|| "Query compilation failed")?; let mut query_cursor = QueryCursor::new(); - if let Some(range) = range { + if let Some(range) = byte_range { query_cursor.set_byte_range(range); } + if let Some(range) = point_range { + query_cursor.set_point_range(range); + } let mut parser = Parser::new(); parser.set_language(language)?; @@ -40,22 +47,25 @@ pub fn query_files_at_paths( fs::read(&path).with_context(|| format!("Error reading source file {:?}", path))?; let tree = parser.parse(&source_code, None).unwrap(); + let start = Instant::now(); if ordered_captures { for (mat, capture_index) in query_cursor.captures(&query, tree.root_node(), source_code.as_slice()) { let capture = mat.captures[capture_index]; let capture_name = &query.capture_names()[capture.index as usize]; - writeln!( - &mut stdout, - " pattern: {:>2}, capture: {} - {}, start: {}, end: {}, text: `{}`", - mat.pattern_index, - capture.index, - capture_name, - capture.node.start_position(), - capture.node.end_position(), - capture.node.utf8_text(&source_code).unwrap_or("") - )?; + if !quiet { + writeln!( + &mut stdout, + " pattern: {:>2}, capture: {} - {}, start: {}, end: {}, text: `{}`", + mat.pattern_index, + capture.index, + capture_name, + capture.node.start_position(), + capture.node.end_position(), + capture.node.utf8_text(&source_code).unwrap_or("") + )?; + } results.push(query_testing::CaptureInfo { name: capture_name.to_string(), start: capture.node.start_position(), @@ -64,27 +74,31 @@ pub fn query_files_at_paths( } } else { for m in query_cursor.matches(&query, tree.root_node(), source_code.as_slice()) { - writeln!(&mut stdout, " pattern: {}", m.pattern_index)?; + if !quiet { + writeln!(&mut stdout, " pattern: {}", m.pattern_index)?; + } for capture in m.captures { let start = capture.node.start_position(); let end = capture.node.end_position(); let capture_name = &query.capture_names()[capture.index as usize]; - if end.row == start.row { - writeln!( - &mut stdout, - " capture: {} - {}, start: {}, end: {}, text: `{}`", - capture.index, - capture_name, - start, - end, - capture.node.utf8_text(&source_code).unwrap_or("") - )?; - } else { - writeln!( - &mut stdout, - " capture: {}, start: {}, end: {}", - capture_name, start, end, - )?; + if !quiet { + if end.row == start.row { + writeln!( + &mut stdout, + " capture: {} - {}, start: {}, end: {}, text: `{}`", + capture.index, + capture_name, + start, + end, + capture.node.utf8_text(&source_code).unwrap_or("") + )?; + } else { + writeln!( + &mut stdout, + " capture: {}, start: {}, end: {}", + capture_name, start, end, + )?; + } } results.push(query_testing::CaptureInfo { name: capture_name.to_string(), @@ -103,6 +117,9 @@ pub fn query_files_at_paths( if should_test { query_testing::assert_expected_captures(results, path, &mut parser, language)? } + if print_time { + writeln!(&mut stdout, "{:?}", start.elapsed())?; + } } Ok(()) From 32ce1fccd05efdf91dd8d99fba0fc91b46b18b81 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 14 Feb 2023 14:42:26 -0800 Subject: [PATCH 036/347] Precompute the set of repetition symbols that can match rootless patterns --- lib/src/query.c | 733 ++++++++++++++++++++++++++++-------------------- 1 file changed, 427 insertions(+), 306 deletions(-) diff --git a/lib/src/query.c b/lib/src/query.c index 710a9209..a756c089 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -228,12 +228,15 @@ typedef struct { AnalysisStateEntry stack[MAX_ANALYSIS_STATE_DEPTH]; uint16_t depth; uint16_t step_index; + TSSymbol root_symbol; } AnalysisState; typedef Array(AnalysisState *) AnalysisStateSet; typedef Array(AnalysisState *) AnalysisStatePool; +typedef Array(uint16_t) StepIndexArray; + /* * AnalysisSubgraph - A subset of the states in the parse table that are used * in constructing nodes with a certain symbol. Each state is accompanied by @@ -253,6 +256,8 @@ typedef struct { Array(AnalysisSubgraphNode) nodes; } AnalysisSubgraph; +typedef Array(AnalysisSubgraph) AnalysisSubgraphArray; + /* * StatePredecessorMap - A map that stores the predecessors of each parse state. * This is used during query analysis to determine which parse states can lead @@ -269,8 +274,8 @@ typedef struct { */ struct TSQuery { SymbolTable captures; - Array(CaptureQuantifiers) capture_quantifiers; SymbolTable predicate_values; + Array(CaptureQuantifiers) capture_quantifiers; Array(QueryStep) steps; Array(PatternEntry) pattern_map; Array(TSQueryPredicateStep) predicate_steps; @@ -278,6 +283,7 @@ struct TSQuery { Array(StepOffset) step_offsets; Array(TSFieldId) negated_fields; Array(char) string_buffer; + Array(TSSymbol) repeat_symbols_with_rootless_patterns; const TSLanguage *language; uint16_t wildcard_root_pattern_count; }; @@ -1113,7 +1119,324 @@ static inline void ts_query__pattern_map_insert( array_insert(&self->pattern_map, index, new_entry); } +static void ts_query__analyze_patterns_from_states( + TSQuery *self, + const AnalysisSubgraphArray *subgraphs, + AnalysisStateSet *states, + AnalysisStateSet *next_states, + AnalysisStateSet *deeper_states, + AnalysisStatePool *state_pool, + StepIndexArray *finished_parent_symbols, + StepIndexArray *final_step_indices, + bool *did_abort_analysis +) { + unsigned recursion_depth_limit = 0; + unsigned prev_final_step_count = 0; + + for (unsigned iteration = 0;; iteration++) { + if (iteration == MAX_ANALYSIS_ITERATION_COUNT) { + *did_abort_analysis = true; + break; + } + + #ifdef DEBUG_ANALYZE_QUERY + printf("Iteration: %u. Final step indices:", iteration); + for (unsigned j = 0; j < final_step_indices->size; j++) { + printf(" %4u", final_step_indices->contents[j]); + } + printf("\n"); + for (unsigned j = 0; j < states->size; j++) { + AnalysisState *state = states->contents[j]; + printf(" %3u: step: %u, stack: [", j, state->step_index); + for (unsigned k = 0; k < state->depth; k++) { + printf( + " {%s, child: %u, state: %4u", + self->language->symbol_names[state->stack[k].parent_symbol], + state->stack[k].child_index, + state->stack[k].parse_state + ); + if (state->stack[k].field_id) printf(", field: %s", self->language->field_names[state->stack[k].field_id]); + if (state->stack[k].done) printf(", DONE"); + printf("}"); + } + printf(" ]\n"); + } + #endif + + // If no further progress can be made within the current recursion depth limit, then + // bump the depth limit by one, and continue to process the states the exceeded the + // limit. But only allow this if progress has been made since the last time the depth + // limit was increased. + if (states->size == 0) { + if ( + deeper_states->size > 0 + && final_step_indices->size > prev_final_step_count + ) { + #ifdef DEBUG_ANALYZE_QUERY + printf("Increase recursion depth limit to %u\n", recursion_depth_limit + 1); + #endif + + prev_final_step_count = final_step_indices->size; + recursion_depth_limit++; + AnalysisStateSet _states = *states; + *states = *deeper_states; + *deeper_states = _states; + continue; + } + + break; + } + + analysis_state_set__clear(next_states, state_pool); + for (unsigned j = 0; j < states->size; j++) { + AnalysisState * const state = states->contents[j]; + + // For efficiency, it's important to avoid processing the same analysis state more + // than once. To achieve this, keep the states in order of ascending position within + // their hypothetical syntax trees. In each iteration of this loop, start by advancing + // the states that have made the least progress. Avoid advancing states that have already + // made more progress. + if (next_states->size > 0) { + int comparison = analysis_state__compare_position( + &state, + array_back(next_states) + ); + if (comparison == 0) { + analysis_state_set__insert_sorted_by_clone(next_states, state_pool, state); + continue; + } else if (comparison > 0) { + #ifdef DEBUG_ANALYZE_QUERY + printf("Terminate iteration at state %u\n", j); + #endif + while (j < states->size) { + analysis_state_set__push_by_clone( + next_states, + state_pool, + states->contents[j] + ); + j++; + } + break; + } + } + + const TSStateId parse_state = analysis_state__top(state)->parse_state; + const TSSymbol parent_symbol = analysis_state__top(state)->parent_symbol; + const TSFieldId parent_field_id = analysis_state__top(state)->field_id; + const unsigned child_index = analysis_state__top(state)->child_index; + const QueryStep * const step = &self->steps.contents[state->step_index]; + + unsigned subgraph_index, exists; + array_search_sorted_by(subgraphs, .symbol, parent_symbol, &subgraph_index, &exists); + if (!exists) continue; + const AnalysisSubgraph *subgraph = &subgraphs->contents[subgraph_index]; + + // Follow every possible path in the parse table, but only visit states that + // are part of the subgraph for the current symbol. + LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, parse_state); + while (ts_lookahead_iterator_next(&lookahead_iterator)) { + TSSymbol sym = lookahead_iterator.symbol; + + AnalysisSubgraphNode successor = { + .state = parse_state, + .child_index = child_index, + }; + if (lookahead_iterator.action_count) { + const TSParseAction *action = &lookahead_iterator.actions[lookahead_iterator.action_count - 1]; + if (action->type == TSParseActionTypeShift) { + if (!action->shift.extra) { + successor.state = action->shift.state; + successor.child_index++; + } + } else { + continue; + } + } else if (lookahead_iterator.next_state != 0) { + successor.state = lookahead_iterator.next_state; + successor.child_index++; + } else { + continue; + } + + unsigned node_index; + array_search_sorted_with( + &subgraph->nodes, + analysis_subgraph_node__compare, &successor, + &node_index, &exists + ); + while (node_index < subgraph->nodes.size) { + AnalysisSubgraphNode *node = &subgraph->nodes.contents[node_index++]; + if (node->state != successor.state || node->child_index != successor.child_index) break; + + // Use the subgraph to determine what alias and field will eventually be applied + // to this child node. + TSSymbol alias = ts_language_alias_at(self->language, node->production_id, child_index); + TSSymbol visible_symbol = alias + ? alias + : self->language->symbol_metadata[sym].visible + ? self->language->public_symbol_map[sym] + : 0; + TSFieldId field_id = parent_field_id; + if (!field_id) { + const TSFieldMapEntry *field_map, *field_map_end; + ts_language_field_map(self->language, node->production_id, &field_map, &field_map_end); + for (; field_map != field_map_end; field_map++) { + if (!field_map->inherited && field_map->child_index == child_index) { + field_id = field_map->field_id; + break; + } + } + } + + // Create a new state that has advanced past this hypothetical subtree. + AnalysisState next_state = *state; + AnalysisStateEntry *next_state_top = analysis_state__top(&next_state); + next_state_top->child_index = successor.child_index; + next_state_top->parse_state = successor.state; + if (node->done) next_state_top->done = true; + + // Determine if this hypothetical child node would match the current step + // of the query pattern. + bool does_match = false; + if (visible_symbol) { + does_match = true; + if (step->symbol == WILDCARD_SYMBOL) { + if ( + step->is_named && + !self->language->symbol_metadata[visible_symbol].named + ) does_match = false; + } else if (step->symbol != visible_symbol) { + does_match = false; + } + if (step->field && step->field != field_id) { + does_match = false; + } + if ( + step->supertype_symbol && + !analysis_state__has_supertype(state, step->supertype_symbol) + ) does_match = false; + } + + // If this child is hidden, then descend into it and walk through its children. + // If the top entry of the stack is at the end of its rule, then that entry can + // be replaced. Otherwise, push a new entry onto the stack. + else if (sym >= self->language->token_count) { + if (!next_state_top->done) { + if (next_state.depth + 1 >= MAX_ANALYSIS_STATE_DEPTH) { + #ifdef DEBUG_ANALYZE_QUERY + printf("Exceeded depth limit for state %u\n", j); + #endif + + *did_abort_analysis = true; + continue; + } + + next_state.depth++; + next_state_top = analysis_state__top(&next_state); + } + + *next_state_top = (AnalysisStateEntry) { + .parse_state = parse_state, + .parent_symbol = sym, + .child_index = 0, + .field_id = field_id, + .done = false, + }; + + if (analysis_state__recursion_depth(&next_state) > recursion_depth_limit) { + analysis_state_set__insert_sorted_by_clone( + deeper_states, + state_pool, + &next_state + ); + continue; + } + } + + // Pop from the stack when this state reached the end of its current syntax node. + while (next_state.depth > 0 && next_state_top->done) { + next_state.depth--; + next_state_top = analysis_state__top(&next_state); + } + + // If this hypothetical child did match the current step of the query pattern, + // then advance to the next step at the current depth. This involves skipping + // over any descendant steps of the current child. + const QueryStep *next_step = step; + if (does_match) { + for (;;) { + next_state.step_index++; + next_step = &self->steps.contents[next_state.step_index]; + if ( + next_step->depth == PATTERN_DONE_MARKER || + next_step->depth <= step->depth + ) break; + } + } else if (successor.state == parse_state) { + continue; + } + + for (;;) { + // Skip pass-through states. Although these states have alternatives, they are only + // used to implement repetitions, and query analysis does not need to process + // repetitions in order to determine whether steps are possible and definite. + if (next_step->is_pass_through) { + next_state.step_index++; + next_step++; + continue; + } + + // If the pattern is finished or hypothetical parent node is complete, then + // record that matching can terminate at this step of the pattern. Otherwise, + // add this state to the list of states to process on the next iteration. + if (!next_step->is_dead_end) { + bool did_finish_pattern = self->steps.contents[next_state.step_index].depth != step->depth; + if (did_finish_pattern) { + array_insert_sorted_by(finished_parent_symbols, , state->root_symbol); + } else if (next_state.depth == 0) { + array_insert_sorted_by(final_step_indices, , next_state.step_index); + } else { + analysis_state_set__insert_sorted_by_clone(next_states, state_pool, &next_state); + } + } + + // If the state has advanced to a step with an alternative step, then add another state + // at that alternative step. This process is simpler than the process of actually matching a + // pattern during query execution, because for the purposes of query analysis, there is no + // need to process repetitions. + if ( + does_match && + next_step->alternative_index != NONE && + next_step->alternative_index > next_state.step_index + ) { + next_state.step_index = next_step->alternative_index; + next_step = &self->steps.contents[next_state.step_index]; + } else { + break; + } + } + } + } + } + + AnalysisStateSet _states = *states; + *states = *next_states; + *next_states = _states; + } +} + static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { + Array(uint16_t) non_rooted_pattern_start_steps = array_new(); + for (unsigned i = 0; i < self->pattern_map.size; i++) { + PatternEntry *pattern = &self->pattern_map.contents[i]; + if (!pattern->is_rooted) { + QueryStep *step = &self->steps.contents[pattern->step_index]; + if (step->symbol != WILDCARD_SYMBOL) { + array_push(&non_rooted_pattern_start_steps, pattern->step_index); + } + } + } + // Walk forward through all of the steps in the query, computing some // basic information about each step. Mark all of the steps that contain // captures, and record the indices of all of the steps that have child steps. @@ -1158,7 +1481,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { // of the hidden symbols in the grammar, because these might occur within // one of the parent nodes, such that their children appear to belong to the // parent. - Array(AnalysisSubgraph) subgraphs = array_new(); + AnalysisSubgraphArray subgraphs = array_new(); for (unsigned i = 0; i < parent_step_indices.size; i++) { uint32_t parent_step_index = parent_step_indices.contents[i]; TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol; @@ -1324,7 +1647,8 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { AnalysisStateSet next_states = array_new(); AnalysisStateSet deeper_states = array_new(); AnalysisStatePool state_pool = array_new(); - Array(uint16_t) final_step_indices = array_new(); + StepIndexArray final_step_indices = array_new(); + StepIndexArray finished_parent_symbols = array_new(); for (unsigned i = 0; i < parent_step_indices.size; i++) { uint16_t parent_step_index = parent_step_indices.contents[i]; uint16_t parent_depth = self->steps.contents[parent_step_index].depth; @@ -1364,308 +1688,31 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { }, }, .depth = 1, + .root_symbol = parent_symbol, })); } // Walk the subgraph for this non-terminal, tracking all of the possible // sequences of progress within the pattern. - bool can_finish_pattern = false; bool did_abort_analysis = false; - unsigned recursion_depth_limit = 0; - unsigned prev_final_step_count = 0; array_clear(&final_step_indices); - for (unsigned iteration = 0;; iteration++) { - if (iteration == MAX_ANALYSIS_ITERATION_COUNT) { - did_abort_analysis = true; - break; - } + array_clear(&finished_parent_symbols); - #ifdef DEBUG_ANALYZE_QUERY - printf("Iteration: %u. Final step indices:", iteration); - for (unsigned j = 0; j < final_step_indices.size; j++) { - printf(" %4u", final_step_indices.contents[j]); - } - printf("\nWalk states for %u %s:\n", i, ts_language_symbol_name(self->language, parent_symbol)); - for (unsigned j = 0; j < states.size; j++) { - AnalysisState *state = states.contents[j]; - printf(" %3u: step: %u, stack: [", j, state->step_index); - for (unsigned k = 0; k < state->depth; k++) { - printf( - " {%s, child: %u, state: %4u", - self->language->symbol_names[state->stack[k].parent_symbol], - state->stack[k].child_index, - state->stack[k].parse_state - ); - if (state->stack[k].field_id) printf(", field: %s", self->language->field_names[state->stack[k].field_id]); - if (state->stack[k].done) printf(", DONE"); - printf("}"); - } - printf(" ]\n"); - } - #endif + #ifdef DEBUG_ANALYZE_QUERY + printf("\nWalk states for %s:\n", ts_language_symbol_name(self->language, states.contents[0]->stack[0].parent_symbol)); + #endif - // If no further progress can be made within the current recursion depth limit, then - // bump the depth limit by one, and continue to process the states the exceeded the - // limit. But only allow this if progress has been made since the last time the depth - // limit was increased. - if (states.size == 0) { - if ( - deeper_states.size > 0 - && final_step_indices.size > prev_final_step_count - ) { - #ifdef DEBUG_ANALYZE_QUERY - printf("Increase recursion depth limit to %u\n", recursion_depth_limit + 1); - #endif - - prev_final_step_count = final_step_indices.size; - recursion_depth_limit++; - AnalysisStateSet _states = states; - states = deeper_states; - deeper_states = _states; - continue; - } - - break; - } - - analysis_state_set__clear(&next_states, &state_pool); - for (unsigned j = 0; j < states.size; j++) { - AnalysisState * const state = states.contents[j]; - - // For efficiency, it's important to avoid processing the same analysis state more - // than once. To achieve this, keep the states in order of ascending position within - // their hypothetical syntax trees. In each iteration of this loop, start by advancing - // the states that have made the least progress. Avoid advancing states that have already - // made more progress. - if (next_states.size > 0) { - int comparison = analysis_state__compare_position( - &state, - array_back(&next_states) - ); - if (comparison == 0) { - #ifdef DEBUG_ANALYZE_QUERY - printf("Skip iteration for state %u\n", j); - #endif - analysis_state_set__insert_sorted_by_clone(&next_states, &state_pool, state); - continue; - } else if (comparison > 0) { - #ifdef DEBUG_ANALYZE_QUERY - printf("Terminate iteration at state %u\n", j); - #endif - while (j < states.size) { - analysis_state_set__push_by_clone( - &next_states, - &state_pool, - states.contents[j] - ); - j++; - } - break; - } - } - - const TSStateId parse_state = analysis_state__top(state)->parse_state; - const TSSymbol parent_symbol = analysis_state__top(state)->parent_symbol; - const TSFieldId parent_field_id = analysis_state__top(state)->field_id; - const unsigned child_index = analysis_state__top(state)->child_index; - const QueryStep * const step = &self->steps.contents[state->step_index]; - - unsigned subgraph_index, exists; - array_search_sorted_by(&subgraphs, .symbol, parent_symbol, &subgraph_index, &exists); - if (!exists) continue; - const AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; - - // Follow every possible path in the parse table, but only visit states that - // are part of the subgraph for the current symbol. - LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, parse_state); - while (ts_lookahead_iterator_next(&lookahead_iterator)) { - TSSymbol sym = lookahead_iterator.symbol; - - AnalysisSubgraphNode successor = { - .state = parse_state, - .child_index = child_index, - }; - if (lookahead_iterator.action_count) { - const TSParseAction *action = &lookahead_iterator.actions[lookahead_iterator.action_count - 1]; - if (action->type == TSParseActionTypeShift) { - if (!action->shift.extra) { - successor.state = action->shift.state; - successor.child_index++; - } - } else { - continue; - } - } else if (lookahead_iterator.next_state != 0) { - successor.state = lookahead_iterator.next_state; - successor.child_index++; - } else { - continue; - } - - unsigned node_index; - array_search_sorted_with( - &subgraph->nodes, - analysis_subgraph_node__compare, &successor, - &node_index, &exists - ); - while (node_index < subgraph->nodes.size) { - AnalysisSubgraphNode *node = &subgraph->nodes.contents[node_index++]; - if (node->state != successor.state || node->child_index != successor.child_index) break; - - // Use the subgraph to determine what alias and field will eventually be applied - // to this child node. - TSSymbol alias = ts_language_alias_at(self->language, node->production_id, child_index); - TSSymbol visible_symbol = alias - ? alias - : self->language->symbol_metadata[sym].visible - ? self->language->public_symbol_map[sym] - : 0; - TSFieldId field_id = parent_field_id; - if (!field_id) { - const TSFieldMapEntry *field_map, *field_map_end; - ts_language_field_map(self->language, node->production_id, &field_map, &field_map_end); - for (; field_map != field_map_end; field_map++) { - if (!field_map->inherited && field_map->child_index == child_index) { - field_id = field_map->field_id; - break; - } - } - } - - // Create a new state that has advanced past this hypothetical subtree. - AnalysisState next_state = *state; - AnalysisStateEntry *next_state_top = analysis_state__top(&next_state); - next_state_top->child_index = successor.child_index; - next_state_top->parse_state = successor.state; - if (node->done) next_state_top->done = true; - - // Determine if this hypothetical child node would match the current step - // of the query pattern. - bool does_match = false; - if (visible_symbol) { - does_match = true; - if (step->symbol == WILDCARD_SYMBOL) { - if ( - step->is_named && - !self->language->symbol_metadata[visible_symbol].named - ) does_match = false; - } else if (step->symbol != visible_symbol) { - does_match = false; - } - if (step->field && step->field != field_id) { - does_match = false; - } - if ( - step->supertype_symbol && - !analysis_state__has_supertype(state, step->supertype_symbol) - ) does_match = false; - } - - // If this child is hidden, then descend into it and walk through its children. - // If the top entry of the stack is at the end of its rule, then that entry can - // be replaced. Otherwise, push a new entry onto the stack. - else if (sym >= self->language->token_count) { - if (!next_state_top->done) { - if (next_state.depth + 1 >= MAX_ANALYSIS_STATE_DEPTH) { - #ifdef DEBUG_ANALYZE_QUERY - printf("Exceeded depth limit for state %u\n", j); - #endif - - did_abort_analysis = true; - continue; - } - - next_state.depth++; - next_state_top = analysis_state__top(&next_state); - } - - *next_state_top = (AnalysisStateEntry) { - .parse_state = parse_state, - .parent_symbol = sym, - .child_index = 0, - .field_id = field_id, - .done = false, - }; - - if (analysis_state__recursion_depth(&next_state) > recursion_depth_limit) { - analysis_state_set__insert_sorted_by_clone( - &deeper_states, - &state_pool, - &next_state - ); - continue; - } - } - - // Pop from the stack when this state reached the end of its current syntax node. - while (next_state.depth > 0 && next_state_top->done) { - next_state.depth--; - next_state_top = analysis_state__top(&next_state); - } - - // If this hypothetical child did match the current step of the query pattern, - // then advance to the next step at the current depth. This involves skipping - // over any descendant steps of the current child. - const QueryStep *next_step = step; - if (does_match) { - for (;;) { - next_state.step_index++; - next_step = &self->steps.contents[next_state.step_index]; - if ( - next_step->depth == PATTERN_DONE_MARKER || - next_step->depth <= parent_depth + 1 - ) break; - } - } else if (successor.state == parse_state) { - continue; - } - - for (;;) { - // Skip pass-through states. Although these states have alternatives, they are only - // used to implement repetitions, and query analysis does not need to process - // repetitions in order to determine whether steps are possible and definite. - if (next_step->is_pass_through) { - next_state.step_index++; - next_step++; - continue; - } - - // If the pattern is finished or hypothetical parent node is complete, then - // record that matching can terminate at this step of the pattern. Otherwise, - // add this state to the list of states to process on the next iteration. - if (!next_step->is_dead_end) { - bool did_finish_pattern = self->steps.contents[next_state.step_index].depth != parent_depth + 1; - if (did_finish_pattern) can_finish_pattern = true; - if (did_finish_pattern || next_state.depth == 0) { - array_insert_sorted_by(&final_step_indices, , next_state.step_index); - } else { - analysis_state_set__insert_sorted_by_clone(&next_states, &state_pool, &next_state); - } - } - - // If the state has advanced to a step with an alternative step, then add another state - // at that alternative step. This process is simpler than the process of actually matching a - // pattern during query execution, because for the purposes of query analysis, there is no - // need to process repetitions. - if ( - does_match && - next_step->alternative_index != NONE && - next_step->alternative_index > next_state.step_index - ) { - next_state.step_index = next_step->alternative_index; - next_step = &self->steps.contents[next_state.step_index]; - } else { - break; - } - } - } - } - } - - AnalysisStateSet _states = states; - states = next_states; - next_states = _states; - } + ts_query__analyze_patterns_from_states( + self, + &subgraphs, + &states, + &next_states, + &deeper_states, + &state_pool, + &finished_parent_symbols, + &final_step_indices, + &did_abort_analysis + ); // If this pattern could not be fully analyzed, then every step should // be considered fallible. @@ -1686,7 +1733,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { // If this pattern cannot match, store the pattern index so that it can be // returned to the caller. - if (!can_finish_pattern) { + if (finished_parent_symbols.size == 0) { assert(final_step_indices.size > 0); uint16_t impossible_step_index = *array_back(&final_step_indices); uint32_t i, exists; @@ -1810,6 +1857,75 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { } #endif + // Determine which repetition symbols in this language have the possibility + // of matching non-rooted patterns in this query. These repetition symbols + // prevent certain optimizations with range restrictions. + bool did_abort_analysis = false; + for (uint32_t i = 0; i < non_rooted_pattern_start_steps.size; i++) { + uint16_t step_index = non_rooted_pattern_start_steps.contents[i]; + + analysis_state_set__clear(&states, &state_pool); + analysis_state_set__clear(&deeper_states, &state_pool); + + for (unsigned j = 0; j < subgraphs.size; j++) { + AnalysisSubgraph *subgraph = &subgraphs.contents[j]; + TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, subgraph->symbol); + if (metadata.visible || metadata.named) continue; + for (uint32_t k = 0; k < subgraph->start_states.size; k++) { + TSStateId parse_state = subgraph->start_states.contents[k]; + analysis_state_set__push_by_clone(&states, &state_pool, &((AnalysisState) { + .step_index = step_index, + .stack = { + [0] = { + .parse_state = parse_state, + .parent_symbol = subgraph->symbol, + .child_index = 0, + .field_id = 0, + .done = false, + }, + }, + .root_symbol = subgraph->symbol, + .depth = 1, + })); + } + } + + #ifdef DEBUG_ANALYZE_QUERY + printf("\nWalk states for rootless pattern step %u:\n", step_index); + #endif + + array_clear(&final_step_indices); + array_clear(&finished_parent_symbols); + ts_query__analyze_patterns_from_states( + self, + &subgraphs, + &states, + &next_states, + &deeper_states, + &state_pool, + &finished_parent_symbols, + &final_step_indices, + &did_abort_analysis + ); + + for (unsigned k = 0; k < finished_parent_symbols.size; k++) { + TSSymbol symbol = finished_parent_symbols.contents[k]; + array_insert_sorted_by(&self->repeat_symbols_with_rootless_patterns, , symbol); + } + } + + #ifdef DEBUG_ANALYZE_QUERY + if (self->repeat_symbols_with_rootless_patterns.size > 0) { + printf("\nRepetition symbols with rootless patterns:\n"); + printf("aborted analysis: %d\n", did_abort_analysis); + for (unsigned i = 0; i < self->repeat_symbols_with_rootless_patterns.size; i++) { + TSSymbol symbol = self->repeat_symbols_with_rootless_patterns.contents[i]; + printf(" %u, %s\n", symbol, ts_language_symbol_name(self->language, symbol)); + } + printf("\n"); + } + #endif + // Cleanup for (unsigned i = 0; i < subgraphs.size; i++) { array_delete(&subgraphs.contents[i].start_states); @@ -1821,9 +1937,11 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { } array_delete(&state_pool); array_delete(&next_nodes); + array_delete(&non_rooted_pattern_start_steps); analysis_state_set__delete(&states); analysis_state_set__delete(&next_states); analysis_state_set__delete(&deeper_states); + array_delete(&finished_parent_symbols); array_delete(&final_step_indices); array_delete(&parent_step_indices); array_delete(&predicate_capture_ids); @@ -2571,6 +2689,7 @@ TSQuery *ts_query_new( .step_offsets = array_new(), .string_buffer = array_new(), .negated_fields = array_new(), + .repeat_symbols_with_rootless_patterns = array_new(), .wildcard_root_pattern_count = 0, .language = language, }; @@ -2685,6 +2804,7 @@ void ts_query_delete(TSQuery *self) { array_delete(&self->step_offsets); array_delete(&self->string_buffer); array_delete(&self->negated_fields); + array_delete(&self->repeat_symbols_with_rootless_patterns); symbol_table_delete(&self->captures); symbol_table_delete(&self->predicate_values); for (uint32_t index = 0; index < self->capture_quantifiers.size; index++) { @@ -3327,18 +3447,18 @@ static inline bool ts_query_cursor__advance( self->finished_states.size ); - bool node_intersects_range = ( - ts_node_end_byte(node) > self->start_byte && - ts_node_start_byte(node) < self->end_byte && - point_gt(ts_node_end_point(node), self->start_point) && - point_lt(ts_node_start_point(node), self->end_point) - ); bool parent_intersects_range = ts_node_is_null(parent_node) || ( ts_node_end_byte(parent_node) > self->start_byte && ts_node_start_byte(parent_node) < self->end_byte && point_gt(ts_node_end_point(parent_node), self->start_point) && point_lt(ts_node_start_point(parent_node), self->end_point) ); + bool node_intersects_range = parent_intersects_range && ( + ts_node_end_byte(node) > self->start_byte && + ts_node_start_byte(node) < self->end_byte && + point_gt(ts_node_end_point(node), self->start_point) && + point_lt(ts_node_start_point(node), self->end_point) + ); bool node_is_error = symbol == ts_builtin_sym_error; bool parent_is_error = !ts_node_is_null(parent_node) && @@ -3679,8 +3799,8 @@ static inline bool ts_query_cursor__advance( // When the current node ends prior to the desired start offset, // only descend for the purpose of continuing in-progress matches. - bool should_descend = node_intersects_range; - if (!should_descend) { + bool has_in_progress_matches = false; + if (!node_intersects_range) { for (unsigned i = 0; i < self->states.size; i++) { QueryState *state = &self->states.contents[i];; QueryStep *next_step = &self->query->steps.contents[state->step_index]; @@ -3688,12 +3808,13 @@ static inline bool ts_query_cursor__advance( next_step->depth != PATTERN_DONE_MARKER && state->start_depth + next_step->depth > self->depth ) { - should_descend = true; + has_in_progress_matches = true; break; } } } + bool should_descend = node_intersects_range || has_in_progress_matches; if (!should_descend) { LOG( " not descending. node end byte: %u, start byte: %u\n", From 189cf6d59daa7861f504c74d0a775b8f53cf98e2 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 14 Feb 2023 15:18:00 -0800 Subject: [PATCH 037/347] Group analysis state sets into QueryAnalysis struct --- lib/src/query.c | 218 ++++++++++++++++++++++-------------------------- 1 file changed, 102 insertions(+), 116 deletions(-) diff --git a/lib/src/query.c b/lib/src/query.c index a756c089..cbc9add6 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -233,9 +233,15 @@ typedef struct { typedef Array(AnalysisState *) AnalysisStateSet; -typedef Array(AnalysisState *) AnalysisStatePool; - -typedef Array(uint16_t) StepIndexArray; +typedef struct { + AnalysisStateSet states; + AnalysisStateSet next_states; + AnalysisStateSet deeper_states; + AnalysisStateSet state_pool; + Array(uint16_t) final_step_indices; + Array(TSSymbol) finished_parent_symbols; + bool did_abort; +} QueryAnalysis; /* * AnalysisSubgraph - A subset of the states in the parse table that are used @@ -940,30 +946,23 @@ static inline bool analysis_state__has_supertype(AnalysisState *self, TSSymbol s return false; } -static inline AnalysisState *analysis_state__clone(AnalysisState const *self) { - AnalysisState *new_state = ts_malloc(sizeof(AnalysisState)); - *new_state = *self; - return new_state; -} - -/**************** +/****************** * AnalysisStateSet - ****************/ + ******************/ // Obtains an `AnalysisState` instance, either by consuming one from this set's object pool, or by // cloning one from scratch. static inline AnalysisState *analysis_state_pool__clone_or_reuse( - AnalysisStatePool *self, + AnalysisStateSet *self, AnalysisState *borrowed_item ) { AnalysisState *new_item; if (self->size) { new_item = array_pop(self); - *new_item = *borrowed_item; } else { - new_item = analysis_state__clone(borrowed_item); + new_item = ts_malloc(sizeof(AnalysisState)); } - + *new_item = *borrowed_item; return new_item; } @@ -973,9 +972,9 @@ static inline AnalysisState *analysis_state_pool__clone_or_reuse( // // The caller retains ownership of the passed-in memory. However, the clone that is created by this // function will be managed by the state set. -static inline void analysis_state_set__insert_sorted_by_clone( +static inline void analysis_state_set__insert_sorted( AnalysisStateSet *self, - AnalysisStatePool *pool, + AnalysisStateSet *pool, AnalysisState *borrowed_item ) { unsigned index, exists; @@ -994,9 +993,9 @@ static inline void analysis_state_set__insert_sorted_by_clone( // // The caller retains ownership of the passed-in memory. However, the clone that is created by this // function will be managed by the state set. -static inline void analysis_state_set__push_by_clone( +static inline void analysis_state_set__push( AnalysisStateSet *self, - AnalysisStatePool *pool, + AnalysisStateSet *pool, AnalysisState *borrowed_item ) { AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item); @@ -1004,7 +1003,7 @@ static inline void analysis_state_set__push_by_clone( } // Removes all items from this set, returning it to an empty state. -static inline void analysis_state_set__clear(AnalysisStateSet *self, AnalysisStatePool *pool) { +static inline void analysis_state_set__clear(AnalysisStateSet *self, AnalysisStateSet *pool) { array_push_all(pool, self); array_clear(self); } @@ -1018,6 +1017,31 @@ static inline void analysis_state_set__delete(AnalysisStateSet *self) { array_delete(self); } +/**************** + * QueryAnalyzer + ****************/ + +static inline QueryAnalysis query_analysis__new() { + return (QueryAnalysis) { + .states = array_new(), + .next_states = array_new(), + .deeper_states = array_new(), + .state_pool = array_new(), + .final_step_indices = array_new(), + .finished_parent_symbols = array_new(), + .did_abort = false, + }; +} + +static inline void query_analysis__delete(QueryAnalysis *self) { + analysis_state_set__delete(&self->states); + analysis_state_set__delete(&self->next_states); + analysis_state_set__delete(&self->deeper_states); + analysis_state_set__delete(&self->state_pool); + array_delete(&self->final_step_indices); + array_delete(&self->finished_parent_symbols); +} + /*********************** * AnalysisSubgraphNode ***********************/ @@ -1119,23 +1143,21 @@ static inline void ts_query__pattern_map_insert( array_insert(&self->pattern_map, index, new_entry); } -static void ts_query__analyze_patterns_from_states( +// Walk the subgraph for this non-terminal, tracking all of the possible +// sequences of progress within the pattern. +static void ts_query__perform_analysis( TSQuery *self, const AnalysisSubgraphArray *subgraphs, - AnalysisStateSet *states, - AnalysisStateSet *next_states, - AnalysisStateSet *deeper_states, - AnalysisStatePool *state_pool, - StepIndexArray *finished_parent_symbols, - StepIndexArray *final_step_indices, - bool *did_abort_analysis + QueryAnalysis *analysis ) { unsigned recursion_depth_limit = 0; unsigned prev_final_step_count = 0; + array_clear(&analysis->final_step_indices); + array_clear(&analysis->finished_parent_symbols); for (unsigned iteration = 0;; iteration++) { if (iteration == MAX_ANALYSIS_ITERATION_COUNT) { - *did_abort_analysis = true; + analysis->did_abort = true; break; } @@ -1167,52 +1189,52 @@ static void ts_query__analyze_patterns_from_states( // bump the depth limit by one, and continue to process the states the exceeded the // limit. But only allow this if progress has been made since the last time the depth // limit was increased. - if (states->size == 0) { + if (analysis->states.size == 0) { if ( - deeper_states->size > 0 - && final_step_indices->size > prev_final_step_count + analysis->deeper_states.size > 0 && + analysis->final_step_indices.size > prev_final_step_count ) { #ifdef DEBUG_ANALYZE_QUERY printf("Increase recursion depth limit to %u\n", recursion_depth_limit + 1); #endif - prev_final_step_count = final_step_indices->size; + prev_final_step_count = analysis->final_step_indices.size; recursion_depth_limit++; - AnalysisStateSet _states = *states; - *states = *deeper_states; - *deeper_states = _states; + AnalysisStateSet _states = analysis->states; + analysis->states = analysis->deeper_states; + analysis->deeper_states = _states; continue; } break; } - analysis_state_set__clear(next_states, state_pool); - for (unsigned j = 0; j < states->size; j++) { - AnalysisState * const state = states->contents[j]; + analysis_state_set__clear(&analysis->next_states, &analysis->state_pool); + for (unsigned j = 0; j < analysis->states.size; j++) { + AnalysisState * const state = analysis->states.contents[j]; // For efficiency, it's important to avoid processing the same analysis state more // than once. To achieve this, keep the states in order of ascending position within // their hypothetical syntax trees. In each iteration of this loop, start by advancing // the states that have made the least progress. Avoid advancing states that have already // made more progress. - if (next_states->size > 0) { + if (analysis->next_states.size > 0) { int comparison = analysis_state__compare_position( &state, - array_back(next_states) + array_back(&analysis->next_states) ); if (comparison == 0) { - analysis_state_set__insert_sorted_by_clone(next_states, state_pool, state); + analysis_state_set__insert_sorted(&analysis->next_states, &analysis->state_pool, state); continue; } else if (comparison > 0) { #ifdef DEBUG_ANALYZE_QUERY printf("Terminate iteration at state %u\n", j); #endif - while (j < states->size) { - analysis_state_set__push_by_clone( - next_states, - state_pool, - states->contents[j] + while (j < analysis->states.size) { + analysis_state_set__push( + &analysis->next_states, + &analysis->state_pool, + analysis->states.contents[j] ); j++; } @@ -1327,7 +1349,7 @@ static void ts_query__analyze_patterns_from_states( printf("Exceeded depth limit for state %u\n", j); #endif - *did_abort_analysis = true; + analysis->did_abort = true; continue; } @@ -1344,9 +1366,9 @@ static void ts_query__analyze_patterns_from_states( }; if (analysis_state__recursion_depth(&next_state) > recursion_depth_limit) { - analysis_state_set__insert_sorted_by_clone( - deeper_states, - state_pool, + analysis_state_set__insert_sorted( + &analysis->deeper_states, + &analysis->state_pool, &next_state ); continue; @@ -1392,11 +1414,11 @@ static void ts_query__analyze_patterns_from_states( if (!next_step->is_dead_end) { bool did_finish_pattern = self->steps.contents[next_state.step_index].depth != step->depth; if (did_finish_pattern) { - array_insert_sorted_by(finished_parent_symbols, , state->root_symbol); + array_insert_sorted_by(&analysis->finished_parent_symbols, , state->root_symbol); } else if (next_state.depth == 0) { - array_insert_sorted_by(final_step_indices, , next_state.step_index); + array_insert_sorted_by(&analysis->final_step_indices, , next_state.step_index); } else { - analysis_state_set__insert_sorted_by_clone(next_states, state_pool, &next_state); + analysis_state_set__insert_sorted(&analysis->next_states, &analysis->state_pool, &next_state); } } @@ -1419,9 +1441,9 @@ static void ts_query__analyze_patterns_from_states( } } - AnalysisStateSet _states = *states; - *states = *next_states; - *next_states = _states; + AnalysisStateSet _states = analysis->states; + analysis->states = analysis->next_states; + analysis->next_states = _states; } } @@ -1643,12 +1665,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { // For each non-terminal pattern, determine if the pattern can successfully match, // and identify all of the possible children within the pattern where matching could fail. bool all_patterns_are_valid = true; - AnalysisStateSet states = array_new(); - AnalysisStateSet next_states = array_new(); - AnalysisStateSet deeper_states = array_new(); - AnalysisStatePool state_pool = array_new(); - StepIndexArray final_step_indices = array_new(); - StepIndexArray finished_parent_symbols = array_new(); + QueryAnalysis analysis = query_analysis__new(); for (unsigned i = 0; i < parent_step_indices.size; i++) { uint16_t parent_step_index = parent_step_indices.contents[i]; uint16_t parent_depth = self->steps.contents[parent_step_index].depth; @@ -1672,11 +1689,11 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { // Initialize an analysis state at every parse state in the table where // this parent symbol can occur. AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; - analysis_state_set__clear(&states, &state_pool); - analysis_state_set__clear(&deeper_states, &state_pool); + analysis_state_set__clear(&analysis.states, &analysis.state_pool); + analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool); for (unsigned j = 0; j < subgraph->start_states.size; j++) { TSStateId parse_state = subgraph->start_states.contents[j]; - analysis_state_set__push_by_clone(&states, &state_pool, &((AnalysisState) { + analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) { .step_index = parent_step_index + 1, .stack = { [0] = { @@ -1692,31 +1709,16 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { })); } - // Walk the subgraph for this non-terminal, tracking all of the possible - // sequences of progress within the pattern. - bool did_abort_analysis = false; - array_clear(&final_step_indices); - array_clear(&finished_parent_symbols); - #ifdef DEBUG_ANALYZE_QUERY printf("\nWalk states for %s:\n", ts_language_symbol_name(self->language, states.contents[0]->stack[0].parent_symbol)); #endif - ts_query__analyze_patterns_from_states( - self, - &subgraphs, - &states, - &next_states, - &deeper_states, - &state_pool, - &finished_parent_symbols, - &final_step_indices, - &did_abort_analysis - ); + analysis.did_abort = false; + ts_query__perform_analysis(self, &subgraphs, &analysis); // If this pattern could not be fully analyzed, then every step should // be considered fallible. - if (did_abort_analysis) { + if (analysis.did_abort) { for (unsigned j = parent_step_index + 1; j < self->steps.size; j++) { QueryStep *step = &self->steps.contents[j]; if ( @@ -1733,9 +1735,9 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { // If this pattern cannot match, store the pattern index so that it can be // returned to the caller. - if (finished_parent_symbols.size == 0) { - assert(final_step_indices.size > 0); - uint16_t impossible_step_index = *array_back(&final_step_indices); + if (analysis.finished_parent_symbols.size == 0) { + assert(analysis.final_step_indices.size > 0); + uint16_t impossible_step_index = *array_back(&analysis.final_step_indices); uint32_t i, exists; array_search_sorted_by(&self->step_offsets, .step_index, impossible_step_index, &i, &exists); if (i >= self->step_offsets.size) i = self->step_offsets.size - 1; @@ -1746,8 +1748,8 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { // Mark as fallible any step where a match terminated. // Later, this property will be propagated to all of the step's predecessors. - for (unsigned j = 0; j < final_step_indices.size; j++) { - uint32_t final_step_index = final_step_indices.contents[j]; + for (unsigned j = 0; j < analysis.final_step_indices.size; j++) { + uint32_t final_step_index = analysis.final_step_indices.contents[j]; QueryStep *step = &self->steps.contents[final_step_index]; if ( step->depth != PATTERN_DONE_MARKER && @@ -1860,20 +1862,20 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { // Determine which repetition symbols in this language have the possibility // of matching non-rooted patterns in this query. These repetition symbols // prevent certain optimizations with range restrictions. - bool did_abort_analysis = false; + analysis.did_abort = false; for (uint32_t i = 0; i < non_rooted_pattern_start_steps.size; i++) { uint16_t step_index = non_rooted_pattern_start_steps.contents[i]; - analysis_state_set__clear(&states, &state_pool); - analysis_state_set__clear(&deeper_states, &state_pool); - + analysis_state_set__clear(&analysis.states, &analysis.state_pool); + analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool); for (unsigned j = 0; j < subgraphs.size; j++) { AnalysisSubgraph *subgraph = &subgraphs.contents[j]; TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, subgraph->symbol); if (metadata.visible || metadata.named) continue; + for (uint32_t k = 0; k < subgraph->start_states.size; k++) { TSStateId parse_state = subgraph->start_states.contents[k]; - analysis_state_set__push_by_clone(&states, &state_pool, &((AnalysisState) { + analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) { .step_index = step_index, .stack = { [0] = { @@ -1894,22 +1896,14 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { printf("\nWalk states for rootless pattern step %u:\n", step_index); #endif - array_clear(&final_step_indices); - array_clear(&finished_parent_symbols); - ts_query__analyze_patterns_from_states( + ts_query__perform_analysis( self, &subgraphs, - &states, - &next_states, - &deeper_states, - &state_pool, - &finished_parent_symbols, - &final_step_indices, - &did_abort_analysis + &analysis ); - for (unsigned k = 0; k < finished_parent_symbols.size; k++) { - TSSymbol symbol = finished_parent_symbols.contents[k]; + for (unsigned k = 0; k < analysis.finished_parent_symbols.size; k++) { + TSSymbol symbol = analysis.finished_parent_symbols.contents[k]; array_insert_sorted_by(&self->repeat_symbols_with_rootless_patterns, , symbol); } } @@ -1917,7 +1911,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { #ifdef DEBUG_ANALYZE_QUERY if (self->repeat_symbols_with_rootless_patterns.size > 0) { printf("\nRepetition symbols with rootless patterns:\n"); - printf("aborted analysis: %d\n", did_abort_analysis); + printf("aborted analysis: %d\n", analyzer.did_abort); for (unsigned i = 0; i < self->repeat_symbols_with_rootless_patterns.size; i++) { TSSymbol symbol = self->repeat_symbols_with_rootless_patterns.contents[i]; printf(" %u, %s\n", symbol, ts_language_symbol_name(self->language, symbol)); @@ -1932,17 +1926,9 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { array_delete(&subgraphs.contents[i].nodes); } array_delete(&subgraphs); - for (unsigned i = 0; i < state_pool.size; i++) { - ts_free(state_pool.contents[i]); - } - array_delete(&state_pool); + query_analysis__delete(&analysis); array_delete(&next_nodes); array_delete(&non_rooted_pattern_start_steps); - analysis_state_set__delete(&states); - analysis_state_set__delete(&next_states); - analysis_state_set__delete(&deeper_states); - array_delete(&finished_parent_symbols); - array_delete(&final_step_indices); array_delete(&parent_step_indices); array_delete(&predicate_capture_ids); state_predecessor_map_delete(&predecessor_map); From 29c9073177d4e5f750daa9619feab75701a9a286 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 15 Feb 2023 14:01:59 -0800 Subject: [PATCH 038/347] Extract 'internal' versions of tree cursor movement fns that allow visiting hidden nodes --- lib/src/tree_cursor.c | 126 +++++++++++++++++++----------------------- lib/src/tree_cursor.h | 15 +++++ 2 files changed, 72 insertions(+), 69 deletions(-) diff --git a/lib/src/tree_cursor.c b/lib/src/tree_cursor.c index e8dc98a9..98930250 100644 --- a/lib/src/tree_cursor.c +++ b/lib/src/tree_cursor.c @@ -98,34 +98,43 @@ void ts_tree_cursor_delete(TSTreeCursor *_self) { // TSTreeCursor - walking the tree -bool ts_tree_cursor_goto_first_child(TSTreeCursor *_self) { +TreeCursorStep ts_tree_cursor_goto_first_child_internal(TSTreeCursor *_self) { TreeCursor *self = (TreeCursor *)_self; - - bool did_descend; - do { - did_descend = false; - - bool visible; - TreeCursorEntry entry; - CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); - while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { - if (visible) { - array_push(&self->stack, entry); - return true; - } - - if (ts_subtree_visible_child_count(*entry.subtree) > 0) { - array_push(&self->stack, entry); - did_descend = true; - break; - } + bool visible; + TreeCursorEntry entry; + CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); + while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { + if (visible) { + array_push(&self->stack, entry); + return TreeCursorStepVisible; } - } while (did_descend); + if (ts_subtree_visible_child_count(*entry.subtree) > 0) { + array_push(&self->stack, entry); + return TreeCursorStepHidden; + } + } + return TreeCursorStepNone; +} +bool ts_tree_cursor_goto_first_child(TSTreeCursor *self) { + for (;;) { + switch (ts_tree_cursor_goto_first_child_internal(self)) { + case TreeCursorStepHidden: + continue; + case TreeCursorStepVisible: + return true; + default: + return false; + } + } return false; } -int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *_self, uint32_t goal_byte) { +static inline int64_t ts_tree_cursor_goto_first_child_for_byte_and_point( + TSTreeCursor *_self, + uint32_t goal_byte, + TSPoint goal_point +) { TreeCursor *self = (TreeCursor *)_self; uint32_t initial_size = self->stack.size; uint32_t visible_child_index = 0; @@ -138,48 +147,8 @@ int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *_self, uint32_t g TreeCursorEntry entry; CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { - uint32_t end_byte = entry.position.bytes + ts_subtree_size(*entry.subtree).bytes; - bool at_goal = end_byte >= goal_byte; - uint32_t visible_child_count = ts_subtree_visible_child_count(*entry.subtree); - - if (at_goal) { - if (visible) { - array_push(&self->stack, entry); - return visible_child_index; - } - - if (visible_child_count > 0) { - array_push(&self->stack, entry); - did_descend = true; - break; - } - } else if (visible) { - visible_child_index++; - } else { - visible_child_index += visible_child_count; - } - } - } while (did_descend); - - self->stack.size = initial_size; - return -1; -} - -int64_t ts_tree_cursor_goto_first_child_for_point(TSTreeCursor *_self, TSPoint goal_point) { - TreeCursor *self = (TreeCursor *)_self; - uint32_t initial_size = self->stack.size; - uint32_t visible_child_index = 0; - - bool did_descend; - do { - did_descend = false; - - bool visible; - TreeCursorEntry entry; - CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); - while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { - TSPoint end_point = point_add(entry.position.extent, ts_subtree_size(*entry.subtree).extent); - bool at_goal = point_gte(end_point, goal_point); + Length entry_end = length_add(entry.position, ts_subtree_size(*entry.subtree)); + bool at_goal = entry_end.bytes >= goal_byte && point_gte(entry_end.extent, goal_point); uint32_t visible_child_count = ts_subtree_visible_child_count(*entry.subtree); if (at_goal) { if (visible) { @@ -203,7 +172,15 @@ int64_t ts_tree_cursor_goto_first_child_for_point(TSTreeCursor *_self, TSPoint g return -1; } -bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *_self) { +int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *self, uint32_t goal_byte) { + return ts_tree_cursor_goto_first_child_for_byte_and_point(self, goal_byte, POINT_ZERO); +} + +int64_t ts_tree_cursor_goto_first_child_for_point(TSTreeCursor *self, TSPoint goal_point) { + return ts_tree_cursor_goto_first_child_for_byte_and_point(self, 0, goal_point); +} + +TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *_self) { TreeCursor *self = (TreeCursor *)_self; uint32_t initial_size = self->stack.size; @@ -221,19 +198,30 @@ bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *_self) { while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { if (visible) { array_push(&self->stack, entry); - return true; + return TreeCursorStepVisible; } if (ts_subtree_visible_child_count(*entry.subtree)) { array_push(&self->stack, entry); - ts_tree_cursor_goto_first_child(_self); - return true; + return TreeCursorStepHidden; } } } self->stack.size = initial_size; - return false; + return TreeCursorStepNone; +} + +bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self) { + switch (ts_tree_cursor_goto_next_sibling_internal(self)) { + case TreeCursorStepHidden: + ts_tree_cursor_goto_first_child(self); + return true; + case TreeCursorStepVisible: + return true; + default: + return false; + } } bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) { diff --git a/lib/src/tree_cursor.h b/lib/src/tree_cursor.h index 69647d1d..7b94db6b 100644 --- a/lib/src/tree_cursor.h +++ b/lib/src/tree_cursor.h @@ -15,6 +15,12 @@ typedef struct { Array(TreeCursorEntry) stack; } TreeCursor; +typedef enum { + TreeCursorStepNone, + TreeCursorStepHidden, + TreeCursorStepVisible, +} TreeCursorStep; + void ts_tree_cursor_init(TreeCursor *, TSNode); void ts_tree_cursor_current_status( const TSTreeCursor *, @@ -26,6 +32,15 @@ void ts_tree_cursor_current_status( unsigned * ); +TreeCursorStep ts_tree_cursor_goto_first_child_internal(TSTreeCursor *); +TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *); + +static inline Subtree ts_tree_cursor_current_subtree(const TSTreeCursor *_self) { + const TreeCursor *self = (const TreeCursor *)_self; + TreeCursorEntry *last_entry = array_back(&self->stack); + return *last_entry->subtree; +} + TSNode ts_tree_cursor_parent_node(const TSTreeCursor *); #endif // TREE_SITTER_TREE_CURSOR_H_ From fa869cf3eddac07d82bfd48f7fda0a0705087a51 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 15 Feb 2023 14:03:15 -0800 Subject: [PATCH 039/347] Restructure query_cursor_advance to explicitly control which hidden nodes it descends into --- lib/src/query.c | 914 ++++++++++++++++++++++++---------------------- lib/src/subtree.h | 6 + 2 files changed, 492 insertions(+), 428 deletions(-) diff --git a/lib/src/query.c b/lib/src/query.c index cbc9add6..04a59f9a 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -309,6 +309,7 @@ struct TSQueryCursor { TSPoint start_point; TSPoint end_point; uint32_t next_state_id; + bool on_visible_node; bool ascending; bool halted; bool did_exceed_match_limit; @@ -1163,12 +1164,12 @@ static void ts_query__perform_analysis( #ifdef DEBUG_ANALYZE_QUERY printf("Iteration: %u. Final step indices:", iteration); - for (unsigned j = 0; j < final_step_indices->size; j++) { - printf(" %4u", final_step_indices->contents[j]); + for (unsigned j = 0; j < analysis->final_step_indices.size; j++) { + printf(" %4u", analysis->final_step_indices.contents[j]); } printf("\n"); - for (unsigned j = 0; j < states->size; j++) { - AnalysisState *state = states->contents[j]; + for (unsigned j = 0; j < analysis->states.size; j++) { + AnalysisState *state = analysis->states.contents[j]; printf(" %3u: step: %u, stack: [", j, state->step_index); for (unsigned k = 0; k < state->depth; k++) { printf( @@ -1710,7 +1711,10 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { } #ifdef DEBUG_ANALYZE_QUERY - printf("\nWalk states for %s:\n", ts_language_symbol_name(self->language, states.contents[0]->stack[0].parent_symbol)); + printf( + "\nWalk states for %s:\n", + ts_language_symbol_name(self->language, analysis.states.contents[0]->stack[0].parent_symbol) + ); #endif analysis.did_abort = false; @@ -1911,7 +1915,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { #ifdef DEBUG_ANALYZE_QUERY if (self->repeat_symbols_with_rootless_patterns.size > 0) { printf("\nRepetition symbols with rootless patterns:\n"); - printf("aborted analysis: %d\n", analyzer.did_abort); + printf("aborted analysis: %d\n", analysis.did_abort); for (unsigned i = 0; i < self->repeat_symbols_with_rootless_patterns.size; i++) { TSSymbol symbol = self->repeat_symbols_with_rootless_patterns.contents[i]; printf(" %u, %s\n", symbol, ts_language_symbol_name(self->language, symbol)); @@ -2986,6 +2990,7 @@ void ts_query_cursor_exec( array_clear(&self->finished_states); ts_tree_cursor_reset(&self->cursor, node); capture_list_pool_reset(&self->capture_list_pool); + self->on_visible_node = true; self->next_state_id = 0; self->depth = 0; self->ascending = false; @@ -3320,6 +3325,50 @@ static QueryState *ts_query_cursor__copy_state( return &self->states.contents[state_index + 1]; } +static inline bool ts_query_cursor__should_descend_outside_of_range( + TSQueryCursor *self +) { + // If there are in-progress matches whose remaining steps occur + // deeper in the tree, then descend. + for (unsigned i = 0; i < self->states.size; i++) { + QueryState *state = &self->states.contents[i];; + QueryStep *next_step = &self->query->steps.contents[state->step_index]; + if ( + next_step->depth != PATTERN_DONE_MARKER && + state->start_depth + next_step->depth > self->depth + ) { + return true; + } + } + + // If the current node is hidden, then a non-rooted pattern might match + // one if its roots inside of this node, and match another of its roots + // as part of a sibling node, so we may need to descend. + if (!self->on_visible_node) { + // Descending into a repetition node outside of the range can be + // expensive, because these nodes can have many visible children. + // Avoid descending into repetition nodes unless we have already + // determined that this query can match rootless patterns inside + // of this type of repetition node. + Subtree subtree = ts_tree_cursor_current_subtree(&self->cursor); + if (ts_subtree_is_repetition(subtree)) { + bool exists; + uint32_t index; + array_search_sorted_by( + &self->query->repeat_symbols_with_rootless_patterns,, + ts_subtree_symbol(subtree), + &index, + &exists + ); + return exists; + } + + return true; + } + + return false; +} + // Walk the tree, processing patterns until at least one pattern finishes, // If one or more patterns finish, return `true` and store their states in the // `finished_states` array. Multiple patterns can finish on the same node. If @@ -3351,219 +3400,49 @@ static inline bool ts_query_cursor__advance( ); // Leave this node by stepping to its next sibling or to its parent. - if (ts_tree_cursor_goto_next_sibling(&self->cursor)) { - self->ascending = false; - } else if (ts_tree_cursor_goto_parent(&self->cursor)) { - self->depth--; - } else { - LOG("halt at root\n"); - self->halted = true; - } - - // After leaving a node, remove any states that cannot make further progress. - uint32_t deleted_count = 0; - for (unsigned i = 0, n = self->states.size; i < n; i++) { - QueryState *state = &self->states.contents[i]; - QueryStep *step = &self->query->steps.contents[state->step_index]; - - // If a state completed its pattern inside of this node, but was deferred from finishing - // in order to search for longer matches, mark it as finished. - if (step->depth == PATTERN_DONE_MARKER) { - if (state->start_depth > self->depth || self->halted) { - LOG(" finish pattern %u\n", state->pattern_index); - array_push(&self->finished_states, *state); - did_match = true; - deleted_count++; - continue; - } - } - - // If a state needed to match something within this node, then remove that state - // as it has failed to match. - else if ((uint32_t)state->start_depth + (uint32_t)step->depth > self->depth) { - LOG( - " failed to match. pattern:%u, step:%u\n", - state->pattern_index, - state->step_index - ); - capture_list_pool_release( - &self->capture_list_pool, - state->capture_list_id - ); - deleted_count++; - continue; - } - - if (deleted_count > 0) { - self->states.contents[i - deleted_count] = *state; - } - } - self->states.size -= deleted_count; - } - - // Enter a new node. - else { - // Get the properties of the current node. - TSNode node = ts_tree_cursor_current_node(&self->cursor); - TSNode parent_node = ts_tree_cursor_parent_node(&self->cursor); - TSSymbol symbol = ts_node_symbol(node); - bool is_named = ts_node_is_named(node); - bool has_later_siblings; - bool has_later_named_siblings; - bool can_have_later_siblings_with_this_field; - TSFieldId field_id = 0; - TSSymbol supertypes[8] = {0}; - unsigned supertype_count = 8; - ts_tree_cursor_current_status( - &self->cursor, - &field_id, - &has_later_siblings, - &has_later_named_siblings, - &can_have_later_siblings_with_this_field, - supertypes, - &supertype_count - ); - LOG( - "enter node. depth:%u, type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u\n", - self->depth, - ts_node_type(node), - ts_language_field_name_for_id(self->query->language, field_id), - ts_node_start_point(node).row, - self->states.size, - self->finished_states.size - ); - - bool parent_intersects_range = ts_node_is_null(parent_node) || ( - ts_node_end_byte(parent_node) > self->start_byte && - ts_node_start_byte(parent_node) < self->end_byte && - point_gt(ts_node_end_point(parent_node), self->start_point) && - point_lt(ts_node_start_point(parent_node), self->end_point) - ); - bool node_intersects_range = parent_intersects_range && ( - ts_node_end_byte(node) > self->start_byte && - ts_node_start_byte(node) < self->end_byte && - point_gt(ts_node_end_point(node), self->start_point) && - point_lt(ts_node_start_point(node), self->end_point) - ); - bool node_is_error = symbol == ts_builtin_sym_error; - bool parent_is_error = - !ts_node_is_null(parent_node) && - ts_node_symbol(parent_node) == ts_builtin_sym_error; - - // Add new states for any patterns whose root node is a wildcard. - if (!node_is_error) { - for (unsigned i = 0; i < self->query->wildcard_root_pattern_count; i++) { - PatternEntry *pattern = &self->query->pattern_map.contents[i]; - - // If this node matches the first step of the pattern, then add a new - // state at the start of this pattern. - QueryStep *step = &self->query->steps.contents[pattern->step_index]; - if ( - (pattern->is_rooted ? - node_intersects_range : - (parent_intersects_range && !parent_is_error)) && - (!step->field || field_id == step->field) && - (!step->supertype_symbol || supertype_count > 0) - ) { - ts_query_cursor__add_state(self, pattern); - } - } - } - - // Add new states for any patterns whose root node matches this node. - unsigned i; - if (ts_query__pattern_map_search(self->query, symbol, &i)) { - PatternEntry *pattern = &self->query->pattern_map.contents[i]; - - QueryStep *step = &self->query->steps.contents[pattern->step_index]; - do { - // If this node matches the first step of the pattern, then add a new - // state at the start of this pattern. - if ( - (pattern->is_rooted ? - node_intersects_range : - (parent_intersects_range && !parent_is_error)) && - (!step->field || field_id == step->field) - ) { - ts_query_cursor__add_state(self, pattern); - } - - // Advance to the next pattern whose root node matches this node. - i++; - if (i == self->query->pattern_map.size) break; - pattern = &self->query->pattern_map.contents[i]; - step = &self->query->steps.contents[pattern->step_index]; - } while (step->symbol == symbol); - } - - // Update all of the in-progress states with current node. - for (unsigned i = 0, copy_count = 0; i < self->states.size; i += 1 + copy_count) { - QueryState *state = &self->states.contents[i]; - QueryStep *step = &self->query->steps.contents[state->step_index]; - state->has_in_progress_alternatives = false; - copy_count = 0; - - // Check that the node matches all of the criteria for the next - // step of the pattern. - if ((uint32_t)state->start_depth + (uint32_t)step->depth != self->depth) continue; - - // Determine if this node matches this step of the pattern, and also - // if this node can have later siblings that match this step of the - // pattern. - bool node_does_match = false; - if (step->symbol == WILDCARD_SYMBOL) { - node_does_match = !node_is_error && (is_named || !step->is_named); - } else { - node_does_match = symbol == step->symbol; - } - bool later_sibling_can_match = has_later_siblings; - if ((step->is_immediate && is_named) || state->seeking_immediate_match) { - later_sibling_can_match = false; - } - if (step->is_last_child && has_later_named_siblings) { - node_does_match = false; - } - if (step->supertype_symbol) { - bool has_supertype = false; - for (unsigned j = 0; j < supertype_count; j++) { - if (supertypes[j] == step->supertype_symbol) { - has_supertype = true; - break; - } - } - if (!has_supertype) node_does_match = false; - } - if (step->field) { - if (step->field == field_id) { - if (!can_have_later_siblings_with_this_field) { - later_sibling_can_match = false; - } + switch (ts_tree_cursor_goto_next_sibling_internal(&self->cursor)) { + case TreeCursorStepVisible: + self->on_visible_node = true; + self->ascending = false; + break; + case TreeCursorStepHidden: + self->depth--; + self->on_visible_node = false; + self->ascending = false; + break; + default: + if (ts_tree_cursor_goto_parent(&self->cursor)) { + self->depth--; } else { - node_does_match = false; + LOG("halt at root\n"); + self->halted = true; } - } + } - if (step->negated_field_list_id) { - TSFieldId *negated_field_ids = &self->query->negated_fields.contents[step->negated_field_list_id]; - for (;;) { - TSFieldId negated_field_id = *negated_field_ids; - if (negated_field_id) { - negated_field_ids++; - if (ts_node_child_by_field_id(node, negated_field_id).id) { - node_does_match = false; - break; - } - } else { - break; + if (self->on_visible_node) { + // After leaving a node, remove any states that cannot make further progress. + uint32_t deleted_count = 0; + for (unsigned i = 0, n = self->states.size; i < n; i++) { + QueryState *state = &self->states.contents[i]; + QueryStep *step = &self->query->steps.contents[state->step_index]; + + // If a state completed its pattern inside of this node, but was deferred from finishing + // in order to search for longer matches, mark it as finished. + if (step->depth == PATTERN_DONE_MARKER) { + if (state->start_depth > self->depth || self->halted) { + LOG(" finish pattern %u\n", state->pattern_index); + array_push(&self->finished_states, *state); + did_match = true; + deleted_count++; + continue; } } - } - // Remove states immediately if it is ever clear that they cannot match. - if (!node_does_match) { - if (!later_sibling_can_match) { + // If a state needed to match something within this node, then remove that state + // as it has failed to match. + else if ((uint32_t)state->start_depth + (uint32_t)step->depth > self->depth) { LOG( - " discard state. pattern:%u, step:%u\n", + " failed to match. pattern:%u, step:%u\n", state->pattern_index, state->step_index ); @@ -3571,249 +3450,428 @@ static inline bool ts_query_cursor__advance( &self->capture_list_pool, state->capture_list_id ); - array_erase(&self->states, i); - i--; + deleted_count++; + continue; } - continue; - } - // Some patterns can match their root node in multiple ways, capturing different - // children. If this pattern step could match later children within the same - // parent, then this query state cannot simply be updated in place. It must be - // split into two states: one that matches this node, and one which skips over - // this node, to preserve the possibility of matching later siblings. - if (later_sibling_can_match && ( - step->contains_captures || - ts_query__step_is_fallible(self->query, state->step_index) - )) { - if (ts_query_cursor__copy_state(self, &state)) { - LOG( - " split state for capture. pattern:%u, step:%u\n", - state->pattern_index, - state->step_index - ); - copy_count++; + if (deleted_count > 0) { + self->states.contents[i - deleted_count] = *state; } } + self->states.size -= deleted_count; + } + } - // If this pattern started with a wildcard, such that the pattern map - // actually points to the *second* step of the pattern, then check - // that the node has a parent, and capture the parent node if necessary. - if (state->needs_parent) { - TSNode parent = ts_tree_cursor_parent_node(&self->cursor); - if (ts_node_is_null(parent)) { - LOG(" missing parent node\n"); - state->dead = true; - } else { - state->needs_parent = false; - QueryStep *skipped_wildcard_step = step; - do { - skipped_wildcard_step--; - } while ( - skipped_wildcard_step->is_dead_end || - skipped_wildcard_step->is_pass_through || - skipped_wildcard_step->depth > 0 - ); - if (skipped_wildcard_step->capture_ids[0] != NONE) { - LOG(" capture wildcard parent\n"); - ts_query_cursor__capture( - self, - state, - skipped_wildcard_step, - parent - ); - } - } - } + // Enter a new node. + else { + // Get the properties of the current node. + TSNode node = ts_tree_cursor_current_node(&self->cursor); + TSNode parent_node = ts_tree_cursor_parent_node(&self->cursor); - // If the current node is captured in this pattern, add it to the capture list. - if (step->capture_ids[0] != NONE) { - ts_query_cursor__capture(self, state, step, node); - } + bool parent_precedes_range = !ts_node_is_null(parent_node) && ( + ts_node_end_byte(parent_node) <= self->start_byte || + point_lte(ts_node_end_point(parent_node), self->start_point) + ); + bool parent_follows_range = !ts_node_is_null(parent_node) && ( + ts_node_start_byte(parent_node) >= self->end_byte || + point_gte(ts_node_start_point(parent_node), self->end_point) + ); + bool node_precedes_range = parent_precedes_range || ( + ts_node_end_byte(node) <= self->start_byte || + point_lte(ts_node_end_point(node), self->start_point) + ); + bool node_follows_range = parent_follows_range || ( + ts_node_start_byte(node) >= self->end_byte || + point_gte(ts_node_start_point(node), self->end_point) + ); + bool parent_intersects_range = !parent_precedes_range && !parent_follows_range; + bool node_intersects_range = !node_precedes_range && !node_follows_range; - if (state->dead) { - array_erase(&self->states, i); - i--; - continue; - } - - // Advance this state to the next step of its pattern. - state->step_index++; - state->seeking_immediate_match = false; + if (self->on_visible_node) { + TSSymbol symbol = ts_node_symbol(node); + bool is_named = ts_node_is_named(node); + bool has_later_siblings; + bool has_later_named_siblings; + bool can_have_later_siblings_with_this_field; + TSFieldId field_id = 0; + TSSymbol supertypes[8] = {0}; + unsigned supertype_count = 8; + ts_tree_cursor_current_status( + &self->cursor, + &field_id, + &has_later_siblings, + &has_later_named_siblings, + &can_have_later_siblings_with_this_field, + supertypes, + &supertype_count + ); LOG( - " advance state. pattern:%u, step:%u\n", - state->pattern_index, - state->step_index + "enter node. depth:%u, type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u\n", + self->depth, + ts_node_type(node), + ts_language_field_name_for_id(self->query->language, field_id), + ts_node_start_point(node).row, + self->states.size, + self->finished_states.size ); - QueryStep *next_step = &self->query->steps.contents[state->step_index]; - if (stop_on_definite_step && next_step->root_pattern_guaranteed) did_match = true; + bool node_is_error = symbol == ts_builtin_sym_error; + bool parent_is_error = + !ts_node_is_null(parent_node) && + ts_node_symbol(parent_node) == ts_builtin_sym_error; - // If this state's next step has an alternative step, then copy the state in order - // to pursue both alternatives. The alternative step itself may have an alternative, - // so this is an interactive process. - unsigned end_index = i + 1; - for (unsigned j = i; j < end_index; j++) { - QueryState *state = &self->states.contents[j]; - QueryStep *next_step = &self->query->steps.contents[state->step_index]; - if (next_step->alternative_index != NONE) { - // A "dead-end" step exists only to add a non-sequential jump into the step sequence, - // via its alternative index. When a state reaches a dead-end step, it jumps straight - // to the step's alternative. - if (next_step->is_dead_end) { - state->step_index = next_step->alternative_index; - j--; - continue; + // Add new states for any patterns whose root node is a wildcard. + if (!node_is_error) { + for (unsigned i = 0; i < self->query->wildcard_root_pattern_count; i++) { + PatternEntry *pattern = &self->query->pattern_map.contents[i]; + + // If this node matches the first step of the pattern, then add a new + // state at the start of this pattern. + QueryStep *step = &self->query->steps.contents[pattern->step_index]; + if ( + (pattern->is_rooted ? + node_intersects_range : + (parent_intersects_range && !parent_is_error)) && + (!step->field || field_id == step->field) && + (!step->supertype_symbol || supertype_count > 0) + ) { + ts_query_cursor__add_state(self, pattern); + } + } + } + + // Add new states for any patterns whose root node matches this node. + unsigned i; + if (ts_query__pattern_map_search(self->query, symbol, &i)) { + PatternEntry *pattern = &self->query->pattern_map.contents[i]; + + QueryStep *step = &self->query->steps.contents[pattern->step_index]; + do { + // If this node matches the first step of the pattern, then add a new + // state at the start of this pattern. + if ( + (pattern->is_rooted ? + node_intersects_range : + (parent_intersects_range && !parent_is_error)) && + (!step->field || field_id == step->field) + ) { + ts_query_cursor__add_state(self, pattern); } - // A "pass-through" step exists only to add a branch into the step sequence, - // via its alternative_index. When a state reaches a pass-through step, it splits - // in order to process the alternative step, and then it advances to the next step. - if (next_step->is_pass_through) { - state->step_index++; - j--; - } + // Advance to the next pattern whose root node matches this node. + i++; + if (i == self->query->pattern_map.size) break; + pattern = &self->query->pattern_map.contents[i]; + step = &self->query->steps.contents[pattern->step_index]; + } while (step->symbol == symbol); + } - QueryState *copy = ts_query_cursor__copy_state(self, &state); - if (copy) { + // Update all of the in-progress states with current node. + for (unsigned i = 0, copy_count = 0; i < self->states.size; i += 1 + copy_count) { + QueryState *state = &self->states.contents[i]; + QueryStep *step = &self->query->steps.contents[state->step_index]; + state->has_in_progress_alternatives = false; + copy_count = 0; + + // Check that the node matches all of the criteria for the next + // step of the pattern. + if ((uint32_t)state->start_depth + (uint32_t)step->depth != self->depth) continue; + + // Determine if this node matches this step of the pattern, and also + // if this node can have later siblings that match this step of the + // pattern. + bool node_does_match = false; + if (step->symbol == WILDCARD_SYMBOL) { + node_does_match = !node_is_error && (is_named || !step->is_named); + } else { + node_does_match = symbol == step->symbol; + } + bool later_sibling_can_match = has_later_siblings; + if ((step->is_immediate && is_named) || state->seeking_immediate_match) { + later_sibling_can_match = false; + } + if (step->is_last_child && has_later_named_siblings) { + node_does_match = false; + } + if (step->supertype_symbol) { + bool has_supertype = false; + for (unsigned j = 0; j < supertype_count; j++) { + if (supertypes[j] == step->supertype_symbol) { + has_supertype = true; + break; + } + } + if (!has_supertype) node_does_match = false; + } + if (step->field) { + if (step->field == field_id) { + if (!can_have_later_siblings_with_this_field) { + later_sibling_can_match = false; + } + } else { + node_does_match = false; + } + } + + if (step->negated_field_list_id) { + TSFieldId *negated_field_ids = &self->query->negated_fields.contents[step->negated_field_list_id]; + for (;;) { + TSFieldId negated_field_id = *negated_field_ids; + if (negated_field_id) { + negated_field_ids++; + if (ts_node_child_by_field_id(node, negated_field_id).id) { + node_does_match = false; + break; + } + } else { + break; + } + } + } + + // Remove states immediately if it is ever clear that they cannot match. + if (!node_does_match) { + if (!later_sibling_can_match) { LOG( - " split state for branch. pattern:%u, from_step:%u, to_step:%u, immediate:%d, capture_count: %u\n", - copy->pattern_index, - copy->step_index, - next_step->alternative_index, - next_step->alternative_is_immediate, - capture_list_pool_get(&self->capture_list_pool, copy->capture_list_id)->size + " discard state. pattern:%u, step:%u\n", + state->pattern_index, + state->step_index + ); + capture_list_pool_release( + &self->capture_list_pool, + state->capture_list_id + ); + array_erase(&self->states, i); + i--; + } + continue; + } + + // Some patterns can match their root node in multiple ways, capturing different + // children. If this pattern step could match later children within the same + // parent, then this query state cannot simply be updated in place. It must be + // split into two states: one that matches this node, and one which skips over + // this node, to preserve the possibility of matching later siblings. + if (later_sibling_can_match && ( + step->contains_captures || + ts_query__step_is_fallible(self->query, state->step_index) + )) { + if (ts_query_cursor__copy_state(self, &state)) { + LOG( + " split state for capture. pattern:%u, step:%u\n", + state->pattern_index, + state->step_index ); - end_index++; copy_count++; - copy->step_index = next_step->alternative_index; - if (next_step->alternative_is_immediate) { - copy->seeking_immediate_match = true; + } + } + + // If this pattern started with a wildcard, such that the pattern map + // actually points to the *second* step of the pattern, then check + // that the node has a parent, and capture the parent node if necessary. + if (state->needs_parent) { + TSNode parent = ts_tree_cursor_parent_node(&self->cursor); + if (ts_node_is_null(parent)) { + LOG(" missing parent node\n"); + state->dead = true; + } else { + state->needs_parent = false; + QueryStep *skipped_wildcard_step = step; + do { + skipped_wildcard_step--; + } while ( + skipped_wildcard_step->is_dead_end || + skipped_wildcard_step->is_pass_through || + skipped_wildcard_step->depth > 0 + ); + if (skipped_wildcard_step->capture_ids[0] != NONE) { + LOG(" capture wildcard parent\n"); + ts_query_cursor__capture( + self, + state, + skipped_wildcard_step, + parent + ); + } + } + } + + // If the current node is captured in this pattern, add it to the capture list. + if (step->capture_ids[0] != NONE) { + ts_query_cursor__capture(self, state, step, node); + } + + if (state->dead) { + array_erase(&self->states, i); + i--; + continue; + } + + // Advance this state to the next step of its pattern. + state->step_index++; + state->seeking_immediate_match = false; + LOG( + " advance state. pattern:%u, step:%u\n", + state->pattern_index, + state->step_index + ); + + QueryStep *next_step = &self->query->steps.contents[state->step_index]; + if (stop_on_definite_step && next_step->root_pattern_guaranteed) did_match = true; + + // If this state's next step has an alternative step, then copy the state in order + // to pursue both alternatives. The alternative step itself may have an alternative, + // so this is an interactive process. + unsigned end_index = i + 1; + for (unsigned j = i; j < end_index; j++) { + QueryState *state = &self->states.contents[j]; + QueryStep *next_step = &self->query->steps.contents[state->step_index]; + if (next_step->alternative_index != NONE) { + // A "dead-end" step exists only to add a non-sequential jump into the step sequence, + // via its alternative index. When a state reaches a dead-end step, it jumps straight + // to the step's alternative. + if (next_step->is_dead_end) { + state->step_index = next_step->alternative_index; + j--; + continue; + } + + // A "pass-through" step exists only to add a branch into the step sequence, + // via its alternative_index. When a state reaches a pass-through step, it splits + // in order to process the alternative step, and then it advances to the next step. + if (next_step->is_pass_through) { + state->step_index++; + j--; + } + + QueryState *copy = ts_query_cursor__copy_state(self, &state); + if (copy) { + LOG( + " split state for branch. pattern:%u, from_step:%u, to_step:%u, immediate:%d, capture_count: %u\n", + copy->pattern_index, + copy->step_index, + next_step->alternative_index, + next_step->alternative_is_immediate, + capture_list_pool_get(&self->capture_list_pool, copy->capture_list_id)->size + ); + end_index++; + copy_count++; + copy->step_index = next_step->alternative_index; + if (next_step->alternative_is_immediate) { + copy->seeking_immediate_match = true; + } + } + } + } + } + + for (unsigned i = 0; i < self->states.size; i++) { + QueryState *state = &self->states.contents[i]; + if (state->dead) { + array_erase(&self->states, i); + i--; + continue; + } + + // Enfore the longest-match criteria. When a query pattern contains optional or + // repeated nodes, this is necessary to avoid multiple redundant states, where + // one state has a strict subset of another state's captures. + bool did_remove = false; + for (unsigned j = i + 1; j < self->states.size; j++) { + QueryState *other_state = &self->states.contents[j]; + + // Query states are kept in ascending order of start_depth and pattern_index. + // Since the longest-match criteria is only used for deduping matches of the same + // pattern and root node, we only need to perform pairwise comparisons within a + // small slice of the states array. + if ( + other_state->start_depth != state->start_depth || + other_state->pattern_index != state->pattern_index + ) break; + + bool left_contains_right, right_contains_left; + ts_query_cursor__compare_captures( + self, + state, + other_state, + &left_contains_right, + &right_contains_left + ); + if (left_contains_right) { + if (state->step_index == other_state->step_index) { + LOG( + " drop shorter state. pattern: %u, step_index: %u\n", + state->pattern_index, + state->step_index + ); + capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id); + array_erase(&self->states, j); + j--; + continue; + } + other_state->has_in_progress_alternatives = true; + } + if (right_contains_left) { + if (state->step_index == other_state->step_index) { + LOG( + " drop shorter state. pattern: %u, step_index: %u\n", + state->pattern_index, + state->step_index + ); + capture_list_pool_release(&self->capture_list_pool, state->capture_list_id); + array_erase(&self->states, i); + i--; + did_remove = true; + break; + } + state->has_in_progress_alternatives = true; + } + } + + // If the state is at the end of its pattern, remove it from the list + // of in-progress states and add it to the list of finished states. + if (!did_remove) { + LOG( + " keep state. pattern: %u, start_depth: %u, step_index: %u, capture_count: %u\n", + state->pattern_index, + state->start_depth, + state->step_index, + capture_list_pool_get(&self->capture_list_pool, state->capture_list_id)->size + ); + QueryStep *next_step = &self->query->steps.contents[state->step_index]; + if (next_step->depth == PATTERN_DONE_MARKER) { + if (state->has_in_progress_alternatives) { + LOG(" defer finishing pattern %u\n", state->pattern_index); + } else { + LOG(" finish pattern %u\n", state->pattern_index); + array_push(&self->finished_states, *state); + array_erase(&self->states, (uint32_t)(state - self->states.contents)); + did_match = true; + i--; } } } } } - for (unsigned i = 0; i < self->states.size; i++) { - QueryState *state = &self->states.contents[i]; - if (state->dead) { - array_erase(&self->states, i); - i--; - continue; - } - - // Enfore the longest-match criteria. When a query pattern contains optional or - // repeated nodes, this is necessary to avoid multiple redundant states, where - // one state has a strict subset of another state's captures. - bool did_remove = false; - for (unsigned j = i + 1; j < self->states.size; j++) { - QueryState *other_state = &self->states.contents[j]; - - // Query states are kept in ascending order of start_depth and pattern_index. - // Since the longest-match criteria is only used for deduping matches of the same - // pattern and root node, we only need to perform pairwise comparisons within a - // small slice of the states array. - if ( - other_state->start_depth != state->start_depth || - other_state->pattern_index != state->pattern_index - ) break; - - bool left_contains_right, right_contains_left; - ts_query_cursor__compare_captures( - self, - state, - other_state, - &left_contains_right, - &right_contains_left - ); - if (left_contains_right) { - if (state->step_index == other_state->step_index) { - LOG( - " drop shorter state. pattern: %u, step_index: %u\n", - state->pattern_index, - state->step_index - ); - capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id); - array_erase(&self->states, j); - j--; - continue; - } - other_state->has_in_progress_alternatives = true; - } - if (right_contains_left) { - if (state->step_index == other_state->step_index) { - LOG( - " drop shorter state. pattern: %u, step_index: %u\n", - state->pattern_index, - state->step_index - ); - capture_list_pool_release(&self->capture_list_pool, state->capture_list_id); - array_erase(&self->states, i); - i--; - did_remove = true; - break; - } - state->has_in_progress_alternatives = true; - } - } - - // If the state is at the end of its pattern, remove it from the list - // of in-progress states and add it to the list of finished states. - if (!did_remove) { - LOG( - " keep state. pattern: %u, start_depth: %u, step_index: %u, capture_count: %u\n", - state->pattern_index, - state->start_depth, - state->step_index, - capture_list_pool_get(&self->capture_list_pool, state->capture_list_id)->size - ); - QueryStep *next_step = &self->query->steps.contents[state->step_index]; - if (next_step->depth == PATTERN_DONE_MARKER) { - if (state->has_in_progress_alternatives) { - LOG(" defer finishing pattern %u\n", state->pattern_index); - } else { - LOG(" finish pattern %u\n", state->pattern_index); - array_push(&self->finished_states, *state); - array_erase(&self->states, (uint32_t)(state - self->states.contents)); - did_match = true; - i--; - } - } - } - } - - // When the current node ends prior to the desired start offset, - // only descend for the purpose of continuing in-progress matches. - bool has_in_progress_matches = false; - if (!node_intersects_range) { - for (unsigned i = 0; i < self->states.size; i++) { - QueryState *state = &self->states.contents[i];; - QueryStep *next_step = &self->query->steps.contents[state->step_index]; - if ( - next_step->depth != PATTERN_DONE_MARKER && - state->start_depth + next_step->depth > self->depth - ) { - has_in_progress_matches = true; + bool should_descend = + node_intersects_range || + ts_query_cursor__should_descend_outside_of_range(self); + if (should_descend) { + switch (ts_tree_cursor_goto_first_child_internal(&self->cursor)) { + case TreeCursorStepVisible: + self->depth++; + self->on_visible_node = true; + continue; + case TreeCursorStepHidden: + self->on_visible_node = false; + continue; + default: break; - } } } - bool should_descend = node_intersects_range || has_in_progress_matches; - if (!should_descend) { - LOG( - " not descending. node end byte: %u, start byte: %u\n", - ts_node_end_byte(node), - self->start_byte - ); - } - - if (should_descend && ts_tree_cursor_goto_first_child(&self->cursor)) { - self->depth++; - } else { - self->ascending = true; - } + self->ascending = true; } } } diff --git a/lib/src/subtree.h b/lib/src/subtree.h index 8456d2f1..a0e838eb 100644 --- a/lib/src/subtree.h +++ b/lib/src/subtree.h @@ -291,6 +291,12 @@ static inline uint32_t ts_subtree_repeat_depth(Subtree self) { return self.data.is_inline ? 0 : self.ptr->repeat_depth; } +static inline uint32_t ts_subtree_is_repetition(Subtree self) { + return self.data.is_inline + ? 0 + : !self.ptr->named && !self.ptr->visible && self.ptr->child_count != 0; +} + static inline uint32_t ts_subtree_node_count(Subtree self) { return (self.data.is_inline || self.ptr->child_count == 0) ? 1 : self.ptr->node_count; } From bd63fb2a0d837bb5ae254ce7749d63c58ebac945 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 15 Feb 2023 14:03:36 -0800 Subject: [PATCH 040/347] Tweak query tests --- Cargo.lock | 7 +++++ cli/Cargo.toml | 3 ++- cli/src/tests/query_test.rs | 51 ++++++++++++++++++++++++------------- lib/Cargo.toml | 2 +- 4 files changed, 44 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f5c4e7e4..ca773788 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -584,6 +584,7 @@ dependencies = [ "tree-sitter-highlight", "tree-sitter-loader", "tree-sitter-tags", + "unindent", "walkdir", "webbrowser", "which", @@ -647,6 +648,12 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" +[[package]] +name = "unindent" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aa30f5ea51ff7edfc797c6d3f9ec8cbd8cfedef5371766b7181d33977f4814f" + [[package]] name = "utf8-width" version = "0.1.6" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 5403075d..77cf52e4 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -3,7 +3,7 @@ name = "tree-sitter-cli" description = "CLI tool for developing, testing, and using Tree-sitter parsers" version = "0.20.7" authors = ["Max Brunsfeld "] -edition = "2018" +edition = "2021" license = "MIT" readme = "README.md" keywords = ["incremental", "parsing"] @@ -73,6 +73,7 @@ rand = "0.8" tempfile = "3" pretty_assertions = "0.7.2" ctor = "0.1" +unindent = "0.2" [build-dependencies] toml = "0.5" diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 31cb8035..63dea5a6 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -2,6 +2,7 @@ use super::helpers::{ allocations, fixtures::get_language, query_helpers::{Match, Pattern}, + ITERATION_COUNT, }; use lazy_static::lazy_static; use rand::{prelude::StdRng, SeedableRng}; @@ -10,6 +11,7 @@ use tree_sitter::{ CaptureQuantifier, Language, Node, Parser, Point, Query, QueryCapture, QueryCursor, QueryError, QueryErrorKind, QueryMatch, QueryPredicate, QueryPredicateArg, QueryProperty, }; +use unindent::Unindent; lazy_static! { static ref EXAMPLE_FILTER: Option = env::var("TREE_SITTER_TEST_EXAMPLE_FILTER").ok(); @@ -1920,20 +1922,28 @@ fn test_query_matches_within_point_range() { let language = get_language("javascript"); let query = Query::new(language, "(identifier) @element").unwrap(); - let source = "[a, b,\n c, d,\n e, f,\n g]"; + let source = " + [ + a, b, + c, d, + e, f, + g, h, + i, j, + k, l, + ] + " + .unindent(); let mut parser = Parser::new(); parser.set_language(language).unwrap(); let tree = parser.parse(&source, None).unwrap(); - let mut cursor = QueryCursor::new(); let matches = cursor - .set_point_range(Point::new(0, 0)..Point::new(1, 3)) + .set_point_range(Point::new(1, 0)..Point::new(2, 3)) .matches(&query, tree.root_node(), source.as_bytes()); - assert_eq!( - collect_matches(matches, &query, source), + collect_matches(matches, &query, &source), &[ (0, vec![("element", "a")]), (0, vec![("element", "b")]), @@ -1942,11 +1952,10 @@ fn test_query_matches_within_point_range() { ); let matches = cursor - .set_point_range(Point::new(1, 0)..Point::new(2, 3)) + .set_point_range(Point::new(2, 0)..Point::new(3, 3)) .matches(&query, tree.root_node(), source.as_bytes()); - assert_eq!( - collect_matches(matches, &query, source), + collect_matches(matches, &query, &source), &[ (0, vec![("element", "c")]), (0, vec![("element", "d")]), @@ -1954,16 +1963,19 @@ fn test_query_matches_within_point_range() { ] ); + // Zero end point is treated like no end point. let matches = cursor - .set_point_range(Point::new(2, 1)..Point::new(0, 0)) + .set_point_range(Point::new(4, 1)..Point::new(0, 0)) .matches(&query, tree.root_node(), source.as_bytes()); - assert_eq!( - collect_matches(matches, &query, source), + collect_matches(matches, &query, &source), &[ - (0, vec![("element", "e")]), - (0, vec![("element", "f")]), (0, vec![("element", "g")]), + (0, vec![("element", "h")]), + (0, vec![("element", "i")]), + (0, vec![("element", "j")]), + (0, vec![("element", "k")]), + (0, vec![("element", "l")]), ] ); }); @@ -3634,17 +3646,22 @@ fn test_query_random() { .parse(include_str!("helpers/query_helpers.rs"), None) .unwrap(); - // let start_seed = *SEED; let start_seed = 0; + let end_seed = start_seed + *ITERATION_COUNT; - for i in 0..100 { - let seed = (start_seed + i) as u64; + for seed in start_seed..(start_seed + end_seed) { + let seed = seed as u64; let mut rand = StdRng::seed_from_u64(seed); let (pattern_ast, _) = Pattern::random_pattern_in_tree(&pattern_tree, &mut rand); let pattern = pattern_ast.to_string(); let expected_matches = pattern_ast.matches_in_tree(&test_tree); - let query = Query::new(language, &pattern).unwrap(); + let query = match Query::new(language, &pattern) { + Ok(query) => query, + Err(e) => { + panic!("failed to build query for pattern {pattern} - {e}. seed: {seed}"); + } + }; let mut actual_matches = cursor .matches( &query, diff --git a/lib/Cargo.toml b/lib/Cargo.toml index d096efdc..c2d35685 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -3,7 +3,7 @@ name = "tree-sitter" description = "Rust bindings to the Tree-sitter parsing library" version = "0.20.9" authors = ["Max Brunsfeld "] -edition = "2018" +edition = "2021" license = "MIT" readme = "binding_rust/README.md" keywords = ["incremental", "parsing"] From 40703f110c7f16650b686fc4c56ab128cf61e449 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 15 Feb 2023 14:40:36 -0800 Subject: [PATCH 041/347] Fix bug in maintenance of query cursor's tree depth --- cli/src/tests/query_test.rs | 3 --- lib/src/query.c | 24 +++++++++++++++--------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 63dea5a6..c691df30 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -1876,7 +1876,6 @@ fn test_query_matches_within_byte_range() { cursor .set_byte_range(0..8) .matches(&query, tree.root_node(), source.as_bytes()); - assert_eq!( collect_matches(matches, &query, source), &[ @@ -1890,7 +1889,6 @@ fn test_query_matches_within_byte_range() { cursor .set_byte_range(5..15) .matches(&query, tree.root_node(), source.as_bytes()); - assert_eq!( collect_matches(matches, &query, source), &[ @@ -1904,7 +1902,6 @@ fn test_query_matches_within_byte_range() { cursor .set_byte_range(12..0) .matches(&query, tree.root_node(), source.as_bytes()); - assert_eq!( collect_matches(matches, &query, source), &[ diff --git a/lib/src/query.c b/lib/src/query.c index 04a59f9a..b2450ce2 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -3393,21 +3393,28 @@ static inline bool ts_query_cursor__advance( // Exit the current node. if (self->ascending) { - LOG( - "leave node. depth:%u, type:%s\n", - self->depth, - ts_node_type(ts_tree_cursor_current_node(&self->cursor)) - ); + if (self->on_visible_node) { + LOG( + "leave node. depth:%u, type:%s\n", + self->depth, + ts_node_type(ts_tree_cursor_current_node(&self->cursor)) + ); + } // Leave this node by stepping to its next sibling or to its parent. switch (ts_tree_cursor_goto_next_sibling_internal(&self->cursor)) { case TreeCursorStepVisible: - self->on_visible_node = true; + if (!self->on_visible_node) { + self->depth++; + self->on_visible_node = true; + } self->ascending = false; break; case TreeCursorStepHidden: - self->depth--; - self->on_visible_node = false; + if (self->on_visible_node) { + self->depth--; + self->on_visible_node = false; + } self->ascending = false; break; default: @@ -3467,7 +3474,6 @@ static inline bool ts_query_cursor__advance( // Get the properties of the current node. TSNode node = ts_tree_cursor_current_node(&self->cursor); TSNode parent_node = ts_tree_cursor_parent_node(&self->cursor); - bool parent_precedes_range = !ts_node_is_null(parent_node) && ( ts_node_end_byte(parent_node) <= self->start_byte || point_lte(ts_node_end_point(parent_node), self->start_point) From 837899e456202c6d112679c03e7e989451973a6d Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 15 Feb 2023 18:24:07 -0800 Subject: [PATCH 042/347] Add API for checking if a pattern in a query is non-local --- cli/src/tests/query_test.rs | 62 +++++++++++++++++++++++++++++++++++ lib/binding_rust/bindings.rs | 3 ++ lib/binding_rust/lib.rs | 8 ++++- lib/include/tree_sitter/api.h | 27 ++++++++++----- lib/src/query.c | 24 ++++++++++++-- 5 files changed, 112 insertions(+), 12 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index c691df30..e99fe06e 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -4084,6 +4084,68 @@ fn test_query_is_pattern_rooted() { }); } +#[test] +fn test_query_is_pattern_non_local() { + struct Row { + description: &'static str, + pattern: &'static str, + is_non_local: bool, + } + + let rows = [ + Row { + description: "simple token", + pattern: r#"(identifier)"#, + is_non_local: false, + }, + Row { + description: "siblings that can occur in an argument list", + pattern: r#"((identifier) (identifier))"#, + is_non_local: true, + }, + Row { + description: "siblings that can occur in a statement block", + pattern: r#"((return_statement) (return_statement))"#, + is_non_local: true, + }, + Row { + description: "siblings that can occur in a source file", + pattern: r#"((function_definition) (class_definition))"#, + is_non_local: true, + }, + Row { + description: "siblings that can't occur in any repetition", + pattern: r#"("{" "}")"#, + is_non_local: false, + }, + ]; + + allocations::record(|| { + eprintln!(""); + + let language = get_language("python"); + for row in &rows { + if let Some(filter) = EXAMPLE_FILTER.as_ref() { + if !row.description.contains(filter.as_str()) { + continue; + } + } + eprintln!(" query example: {:?}", row.description); + let query = Query::new(language, row.pattern).unwrap(); + assert_eq!( + query.is_pattern_non_local(0), + row.is_non_local, + "Description: {}, Pattern: {:?}", + row.description, + row.pattern + .split_ascii_whitespace() + .collect::>() + .join(" "), + ) + } + }); +} + #[test] fn test_capture_quantifiers() { struct Row { diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index 4591a380..be117f83 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -677,6 +677,9 @@ extern "C" { length: *mut u32, ) -> *const TSQueryPredicateStep; } +extern "C" { + pub fn ts_query_is_pattern_non_local(self_: *const TSQuery, pattern_index: u32) -> bool; +} extern "C" { pub fn ts_query_is_pattern_rooted(self_: *const TSQuery, pattern_index: u32) -> bool; } diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 6f044cca..579bf8e2 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -1736,11 +1736,17 @@ impl Query { } /// Check if a given pattern within a query has a single root node. - #[doc(alias = "ts_query_is_pattern_guaranteed_at_step")] + #[doc(alias = "ts_query_is_pattern_rooted")] pub fn is_pattern_rooted(&self, index: usize) -> bool { unsafe { ffi::ts_query_is_pattern_rooted(self.ptr.as_ptr(), index as u32) } } + /// Check if a given pattern within a query has a single root node. + #[doc(alias = "ts_query_is_pattern_non_local")] + pub fn is_pattern_non_local(&self, index: usize) -> bool { + unsafe { ffi::ts_query_is_pattern_non_local(self.ptr.as_ptr(), index as u32) } + } + /// Check if a given step in a query is 'definite'. /// /// A query step is 'definite' if its parent pattern will be guaranteed to match diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 5b48cf60..edc1c36a 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -750,15 +750,26 @@ const TSQueryPredicateStep *ts_query_predicates_for_pattern( uint32_t *length ); -bool ts_query_is_pattern_rooted( - const TSQuery *self, - uint32_t pattern_index -); +/* + * Check if the given pattern in the query has a single root node. + */ +bool ts_query_is_pattern_rooted(const TSQuery *self, uint32_t pattern_index); -bool ts_query_is_pattern_guaranteed_at_step( - const TSQuery *self, - uint32_t byte_offset -); +/* + * Check if the given pattern in the query is 'non local'. + * + * A non-local pattern has multiple root nodes and can match within a + * repeating sequence of nodes, as specified by the grammar. Non-local + * patterns disable certain optimizations that would otherwise be possible + * when executing a query on a specific range of a syntax tree. + */ +bool ts_query_is_pattern_non_local(const TSQuery *self, uint32_t pattern_index); + +/* + * Check if a given pattern is guaranteed to match once a given step is reached. + * The step is specified by its byte offset in the query's source code. + */ +bool ts_query_is_pattern_guaranteed_at_step(const TSQuery *self, uint32_t byte_offset); /** * Get the name and length of one of the query's captures, or one of the diff --git a/lib/src/query.c b/lib/src/query.c index b2450ce2..cfe11438 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -146,6 +146,7 @@ typedef struct { Slice steps; Slice predicate_steps; uint32_t start_byte; + bool is_non_local; } QueryPattern; typedef struct { @@ -1455,7 +1456,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { if (!pattern->is_rooted) { QueryStep *step = &self->steps.contents[pattern->step_index]; if (step->symbol != WILDCARD_SYMBOL) { - array_push(&non_rooted_pattern_start_steps, pattern->step_index); + array_push(&non_rooted_pattern_start_steps, i); } } } @@ -1868,7 +1869,8 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { // prevent certain optimizations with range restrictions. analysis.did_abort = false; for (uint32_t i = 0; i < non_rooted_pattern_start_steps.size; i++) { - uint16_t step_index = non_rooted_pattern_start_steps.contents[i]; + uint16_t pattern_entry_index = non_rooted_pattern_start_steps.contents[i]; + PatternEntry *pattern_entry = &self->pattern_map.contents[pattern_entry_index]; analysis_state_set__clear(&analysis.states, &analysis.state_pool); analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool); @@ -1880,7 +1882,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { for (uint32_t k = 0; k < subgraph->start_states.size; k++) { TSStateId parse_state = subgraph->start_states.contents[k]; analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) { - .step_index = step_index, + .step_index = pattern_entry->step_index, .stack = { [0] = { .parse_state = parse_state, @@ -1906,6 +1908,10 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { &analysis ); + if (analysis.finished_parent_symbols.size > 0) { + self->patterns.contents[pattern_entry->pattern_index].is_non_local = true; + } + for (unsigned k = 0; k < analysis.finished_parent_symbols.size; k++) { TSSymbol symbol = analysis.finished_parent_symbols.contents[k]; array_insert_sorted_by(&self->repeat_symbols_with_rootless_patterns, , symbol); @@ -2697,6 +2703,7 @@ TSQuery *ts_query_new( .steps = (Slice) {.offset = start_step_index}, .predicate_steps = (Slice) {.offset = start_predicate_step_index}, .start_byte = stream_offset(&stream), + .is_non_local = false, })); CaptureQuantifiers capture_quantifiers = capture_quantifiers_new(); *error_type = ts_query__parse_pattern(self, &stream, 0, false, &capture_quantifiers); @@ -2876,6 +2883,17 @@ bool ts_query_is_pattern_rooted( return true; } +bool ts_query_is_pattern_non_local( + const TSQuery *self, + uint32_t pattern_index +) { + if (pattern_index < self->patterns.size) { + return self->patterns.contents[pattern_index].is_non_local; + } else { + return false; + } +} + bool ts_query_is_pattern_guaranteed_at_step( const TSQuery *self, uint32_t byte_offset From 8dcf8517399d83ad7cfd2f046c8ea441827ebde2 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 16 Feb 2023 12:03:51 -0800 Subject: [PATCH 043/347] Add unit test for querying within a range of a long top-level repetition --- cli/src/tests/query_test.rs | 90 ++++++++++++++++++++++++++++++++++++- 1 file changed, 88 insertions(+), 2 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index e99fe06e..7d01c26e 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -2219,6 +2219,57 @@ fn test_query_captures_within_byte_range_assigned_after_iterating() { }); } +#[test] +fn test_query_matches_within_range_of_long_repetition() { + allocations::record(|| { + let language = get_language("rust"); + let query = Query::new( + language, + " + (function_item name: (identifier) @fn-name) + ", + ) + .unwrap(); + + let source = " + fn zero() {} + fn one() {} + fn two() {} + fn three() {} + fn four() {} + fn five() {} + fn six() {} + fn seven() {} + fn eight() {} + fn nine() {} + fn ten() {} + fn eleven() {} + fn twelve() {} + " + .unindent(); + + let mut parser = Parser::new(); + let mut cursor = QueryCursor::new(); + + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + + let matches = cursor + .set_point_range(Point::new(8, 0)..Point::new(20, 0)) + .matches(&query, tree.root_node(), source.as_bytes()); + assert_eq!( + collect_matches(matches, &query, &source), + &[ + (0, vec![("fn-name", "eight")]), + (0, vec![("fn-name", "nine")]), + (0, vec![("fn-name", "ten")]), + (0, vec![("fn-name", "eleven")]), + (0, vec![("fn-name", "twelve")]), + ] + ); + }); +} + #[test] fn test_query_matches_different_queries_same_cursor() { allocations::record(|| { @@ -4089,6 +4140,7 @@ fn test_query_is_pattern_non_local() { struct Row { description: &'static str, pattern: &'static str, + language: Language, is_non_local: bool, } @@ -4096,26 +4148,61 @@ fn test_query_is_pattern_non_local() { Row { description: "simple token", pattern: r#"(identifier)"#, + language: get_language("python"), is_non_local: false, }, Row { description: "siblings that can occur in an argument list", pattern: r#"((identifier) (identifier))"#, + language: get_language("python"), is_non_local: true, }, Row { description: "siblings that can occur in a statement block", pattern: r#"((return_statement) (return_statement))"#, + language: get_language("python"), is_non_local: true, }, Row { description: "siblings that can occur in a source file", pattern: r#"((function_definition) (class_definition))"#, + language: get_language("python"), is_non_local: true, }, Row { description: "siblings that can't occur in any repetition", pattern: r#"("{" "}")"#, + language: get_language("python"), + is_non_local: false, + }, + Row { + description: "siblings that can't occur in any repetition, wildcard root", + pattern: r#"(_ "{" "}") @foo"#, + language: get_language("javascript"), + is_non_local: false, + }, + Row { + description: "siblings that can occur in a class body, wildcard root", + pattern: r#"(_ (method_definition) (method_definition)) @foo"#, + language: get_language("javascript"), + is_non_local: true, + }, + Row { + description: "top-level repetitions that can occur in a class body", + pattern: r#"(method_definition)+ @foo"#, + language: get_language("javascript"), + is_non_local: true, + }, + Row { + description: "top-level repetitions that can occur in a statement block", + pattern: r#"(return_statement)+ @foo"#, + language: get_language("javascript"), + is_non_local: true, + }, + Row { + description: "rooted pattern that can occur in a statement block", + pattern: r#"(return_statement) @foo"#, + language: get_language("javascript"), is_non_local: false, }, ]; @@ -4123,7 +4210,6 @@ fn test_query_is_pattern_non_local() { allocations::record(|| { eprintln!(""); - let language = get_language("python"); for row in &rows { if let Some(filter) = EXAMPLE_FILTER.as_ref() { if !row.description.contains(filter.as_str()) { @@ -4131,7 +4217,7 @@ fn test_query_is_pattern_non_local() { } } eprintln!(" query example: {:?}", row.description); - let query = Query::new(language, row.pattern).unwrap(); + let query = Query::new(row.language, row.pattern).unwrap(); assert_eq!( query.is_pattern_non_local(0), row.is_non_local, From 811bc8256def2ddff65a1e46003263d7f49cd08e Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 13 Mar 2023 12:21:19 -0700 Subject: [PATCH 044/347] lib: 0.20.10 --- Cargo.lock | 2 +- lib/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ca773788..40b4afab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -544,7 +544,7 @@ dependencies = [ [[package]] name = "tree-sitter" -version = "0.20.9" +version = "0.20.10" dependencies = [ "cc", "lazy_static", diff --git a/lib/Cargo.toml b/lib/Cargo.toml index c2d35685..16fd0254 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter" description = "Rust bindings to the Tree-sitter parsing library" -version = "0.20.9" +version = "0.20.10" authors = ["Max Brunsfeld "] edition = "2021" license = "MIT" From b1ad55909d1539bfbf6ae66407e33250770f0a88 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 13 Mar 2023 12:22:10 -0700 Subject: [PATCH 045/347] 0.20.8 --- Cargo.lock | 2 +- cli/Cargo.toml | 2 +- cli/npm/package.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 40b4afab..4a844dc8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -553,7 +553,7 @@ dependencies = [ [[package]] name = "tree-sitter-cli" -version = "0.20.7" +version = "0.20.8" dependencies = [ "ansi_term", "anyhow", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 77cf52e4..ad21f0e7 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter-cli" description = "CLI tool for developing, testing, and using Tree-sitter parsers" -version = "0.20.7" +version = "0.20.8" authors = ["Max Brunsfeld "] edition = "2021" license = "MIT" diff --git a/cli/npm/package.json b/cli/npm/package.json index dfa53ab4..02309193 100644 --- a/cli/npm/package.json +++ b/cli/npm/package.json @@ -1,6 +1,6 @@ { "name": "tree-sitter-cli", - "version": "0.20.7", + "version": "0.20.8", "author": "Max Brunsfeld", "license": "MIT", "repository": { From 25d9c989ebdc53dbad3852317c5b45228ff012e4 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 13 Mar 2023 14:25:24 -0700 Subject: [PATCH 046/347] Update python error corpus to reflect grammar changes --- test/fixtures/error_corpus/python_errors.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/fixtures/error_corpus/python_errors.txt b/test/fixtures/error_corpus/python_errors.txt index edabb510..bd3101a9 100644 --- a/test/fixtures/error_corpus/python_errors.txt +++ b/test/fixtures/error_corpus/python_errors.txt @@ -89,7 +89,8 @@ def a(): parameters: (parameters) (ERROR (identifier)) body: (block - (expression_statement (string))))) + (expression_statement (string + string_content: (string_content)))))) =========================================== incomplete definition in class definition @@ -108,4 +109,4 @@ b (ERROR) body: (block)) (expression_statement - (identifier))) \ No newline at end of file + (identifier))) From ca152a93dd7f7dbea2e6eb84072b53dc74816443 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Tue, 14 Mar 2023 20:06:31 +0200 Subject: [PATCH 047/347] ci: fix tests --- cli/src/tests/parser_test.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index cf3b6437..30b12336 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -505,7 +505,7 @@ fn test_parsing_after_detecting_error_in_the_middle_of_a_string_token() { let tree = parser.parse(&source, None).unwrap(); assert_eq!( tree.root_node().to_sexp(), - "(module (expression_statement (assignment left: (identifier) right: (expression_list (identifier) (string)))))" + "(module (expression_statement (assignment left: (identifier) right: (expression_list (identifier) (string string_content: (string_content))))))" ); // Delete a suffix of the source code, starting in the middle of the string From 47c1e858efb3fc29f9a539378ff40c00caabac40 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Tue, 14 Mar 2023 20:37:12 +0200 Subject: [PATCH 048/347] Update deps: lib/binding_web/package.json --- lib/binding_web/package.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/binding_web/package.json b/lib/binding_web/package.json index 2cfcf72e..52f72d5b 100644 --- a/lib/binding_web/package.json +++ b/lib/binding_web/package.json @@ -27,8 +27,8 @@ }, "homepage": "https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web", "devDependencies": { - "chai": "^4.2.0", - "mocha": "^6.1.4", - "terser": "^3.17.0" + "chai": "^4.3.7", + "mocha": "^10.2.0", + "terser": "^5.16.6" } } From 1b2a3e3c73674e12144fb619083df48deb958716 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Wed, 15 Mar 2023 12:31:54 +0200 Subject: [PATCH 049/347] docs: add `webrick` to Gemfile to fix a compat issue --- docs/Gemfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/Gemfile b/docs/Gemfile index 91ceacd3..ee114290 100644 --- a/docs/Gemfile +++ b/docs/Gemfile @@ -1,2 +1,3 @@ source 'https://rubygems.org' -gem 'github-pages', group: :jekyll_plugins \ No newline at end of file +gem 'github-pages', group: :jekyll_plugins +gem "webrick" From 2ce06b623ede810682fb58ffa126854efe14dd60 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Wed, 15 Mar 2023 12:37:58 +0200 Subject: [PATCH 050/347] docs: update Gemfile.lock to fix GH dependabot complains Updated with commands like: > gem install bundler > bundle config set --local path 'vendor/bundle' > bundle update --bundler > #bundle add webrick > bundle update > #bundle exec jekyll serve --- docs/Gemfile.lock | 368 +++++++++++++++++++++++----------------------- 1 file changed, 185 insertions(+), 183 deletions(-) diff --git a/docs/Gemfile.lock b/docs/Gemfile.lock index 44aff756..3b2801be 100644 --- a/docs/Gemfile.lock +++ b/docs/Gemfile.lock @@ -1,258 +1,260 @@ GEM remote: https://rubygems.org/ specs: - activesupport (4.2.9) - i18n (~> 0.7) - minitest (~> 5.1) - thread_safe (~> 0.3, >= 0.3.4) - tzinfo (~> 1.1) - addressable (2.8.0) - public_suffix (>= 2.0.2, < 5.0) + activesupport (7.0.4.3) + concurrent-ruby (~> 1.0, >= 1.0.2) + i18n (>= 1.6, < 2) + minitest (>= 5.1) + tzinfo (~> 2.0) + addressable (2.8.1) + public_suffix (>= 2.0.2, < 6.0) coffee-script (2.4.1) coffee-script-source execjs coffee-script-source (1.11.1) colorator (1.1.0) - commonmarker (0.17.8) - ruby-enum (~> 0.5) - concurrent-ruby (1.0.5) - ethon (0.14.0) + commonmarker (0.23.8) + concurrent-ruby (1.2.2) + dnsruby (1.61.9) + simpleidn (~> 0.1) + em-websocket (0.5.3) + eventmachine (>= 0.12.9) + http_parser.rb (~> 0) + ethon (0.16.0) ffi (>= 1.15.0) - execjs (2.7.0) - faraday (1.5.1) - faraday-em_http (~> 1.0) - faraday-em_synchrony (~> 1.0) - faraday-excon (~> 1.1) - faraday-httpclient (~> 1.0.1) - faraday-net_http (~> 1.0) - faraday-net_http_persistent (~> 1.1) - faraday-patron (~> 1.0) - multipart-post (>= 1.2, < 3) + eventmachine (1.2.7) + execjs (2.8.1) + faraday (2.7.4) + faraday-net_http (>= 2.0, < 3.1) ruby2_keywords (>= 0.0.4) - faraday-em_http (1.0.0) - faraday-em_synchrony (1.0.0) - faraday-excon (1.1.0) - faraday-httpclient (1.0.1) - faraday-net_http (1.0.1) - faraday-net_http_persistent (1.2.0) - faraday-patron (1.0.0) - ffi (1.15.3) + faraday-net_http (3.0.2) + ffi (1.15.5) forwardable-extended (2.6.0) - gemoji (3.0.0) - github-pages (177) - activesupport (= 4.2.9) - github-pages-health-check (= 1.3.5) - jekyll (= 3.6.2) - jekyll-avatar (= 0.5.0) - jekyll-coffeescript (= 1.0.2) - jekyll-commonmark-ghpages (= 0.1.5) + gemoji (3.0.1) + github-pages (228) + github-pages-health-check (= 1.17.9) + jekyll (= 3.9.3) + jekyll-avatar (= 0.7.0) + jekyll-coffeescript (= 1.1.1) + jekyll-commonmark-ghpages (= 0.4.0) jekyll-default-layout (= 0.1.4) - jekyll-feed (= 0.9.2) - jekyll-gist (= 1.4.1) - jekyll-github-metadata (= 2.9.3) - jekyll-mentions (= 1.2.0) - jekyll-optional-front-matter (= 0.3.0) + jekyll-feed (= 0.15.1) + jekyll-gist (= 1.5.0) + jekyll-github-metadata (= 2.13.0) + jekyll-include-cache (= 0.2.1) + jekyll-mentions (= 1.6.0) + jekyll-optional-front-matter (= 0.3.2) jekyll-paginate (= 1.1.0) - jekyll-readme-index (= 0.2.0) - jekyll-redirect-from (= 0.12.1) - jekyll-relative-links (= 0.5.2) - jekyll-remote-theme (= 0.2.3) - jekyll-sass-converter (= 1.5.0) - jekyll-seo-tag (= 2.3.0) - jekyll-sitemap (= 1.1.1) - jekyll-swiss (= 0.4.0) - jekyll-theme-architect (= 0.1.0) - jekyll-theme-cayman (= 0.1.0) - jekyll-theme-dinky (= 0.1.0) - jekyll-theme-hacker (= 0.1.0) - jekyll-theme-leap-day (= 0.1.0) - jekyll-theme-merlot (= 0.1.0) - jekyll-theme-midnight (= 0.1.0) - jekyll-theme-minimal (= 0.1.0) - jekyll-theme-modernist (= 0.1.0) - jekyll-theme-primer (= 0.5.2) - jekyll-theme-slate (= 0.1.0) - jekyll-theme-tactile (= 0.1.0) - jekyll-theme-time-machine (= 0.1.0) - jekyll-titles-from-headings (= 0.5.0) - jemoji (= 0.8.1) - kramdown (= 1.16.2) - liquid (= 4.0.0) - listen (= 3.0.6) + jekyll-readme-index (= 0.3.0) + jekyll-redirect-from (= 0.16.0) + jekyll-relative-links (= 0.6.1) + jekyll-remote-theme (= 0.4.3) + jekyll-sass-converter (= 1.5.2) + jekyll-seo-tag (= 2.8.0) + jekyll-sitemap (= 1.4.0) + jekyll-swiss (= 1.0.0) + jekyll-theme-architect (= 0.2.0) + jekyll-theme-cayman (= 0.2.0) + jekyll-theme-dinky (= 0.2.0) + jekyll-theme-hacker (= 0.2.0) + jekyll-theme-leap-day (= 0.2.0) + jekyll-theme-merlot (= 0.2.0) + jekyll-theme-midnight (= 0.2.0) + jekyll-theme-minimal (= 0.2.0) + jekyll-theme-modernist (= 0.2.0) + jekyll-theme-primer (= 0.6.0) + jekyll-theme-slate (= 0.2.0) + jekyll-theme-tactile (= 0.2.0) + jekyll-theme-time-machine (= 0.2.0) + jekyll-titles-from-headings (= 0.5.3) + jemoji (= 0.12.0) + kramdown (= 2.3.2) + kramdown-parser-gfm (= 1.1.0) + liquid (= 4.0.4) mercenary (~> 0.3) - minima (= 2.1.1) - nokogiri (>= 1.8.1, < 2.0) - rouge (= 2.2.1) + minima (= 2.5.1) + nokogiri (>= 1.13.6, < 2.0) + rouge (= 3.26.0) terminal-table (~> 1.4) - github-pages-health-check (1.3.5) + github-pages-health-check (1.17.9) addressable (~> 2.3) - net-dns (~> 0.8) + dnsruby (~> 1.60) octokit (~> 4.0) - public_suffix (~> 2.0) - typhoeus (~> 0.7) - html-pipeline (2.7.1) + public_suffix (>= 3.0, < 5.0) + typhoeus (~> 1.3) + html-pipeline (2.14.3) activesupport (>= 2) nokogiri (>= 1.4) - i18n (0.9.5) + http_parser.rb (0.8.0) + i18n (1.12.0) concurrent-ruby (~> 1.0) - jekyll (3.6.2) + jekyll (3.9.3) addressable (~> 2.4) colorator (~> 1.0) + em-websocket (~> 0.5) + i18n (>= 0.7, < 2) jekyll-sass-converter (~> 1.0) - jekyll-watch (~> 1.1) - kramdown (~> 1.14) + jekyll-watch (~> 2.0) + kramdown (>= 1.17, < 3) liquid (~> 4.0) mercenary (~> 0.3.3) pathutil (~> 0.9) - rouge (>= 1.7, < 3) + rouge (>= 1.7, < 4) safe_yaml (~> 1.0) - jekyll-avatar (0.5.0) - jekyll (~> 3.0) - jekyll-coffeescript (1.0.2) + jekyll-avatar (0.7.0) + jekyll (>= 3.0, < 5.0) + jekyll-coffeescript (1.1.1) coffee-script (~> 2.2) coffee-script-source (~> 1.11.1) - jekyll-commonmark (1.1.0) - commonmarker (~> 0.14) - jekyll (>= 3.0, < 4.0) - jekyll-commonmark-ghpages (0.1.5) - commonmarker (~> 0.17.6) - jekyll-commonmark (~> 1) - rouge (~> 2) + jekyll-commonmark (1.4.0) + commonmarker (~> 0.22) + jekyll-commonmark-ghpages (0.4.0) + commonmarker (~> 0.23.7) + jekyll (~> 3.9.0) + jekyll-commonmark (~> 1.4.0) + rouge (>= 2.0, < 5.0) jekyll-default-layout (0.1.4) jekyll (~> 3.0) - jekyll-feed (0.9.2) - jekyll (~> 3.3) - jekyll-gist (1.4.1) + jekyll-feed (0.15.1) + jekyll (>= 3.7, < 5.0) + jekyll-gist (1.5.0) octokit (~> 4.2) - jekyll-github-metadata (2.9.3) - jekyll (~> 3.1) + jekyll-github-metadata (2.13.0) + jekyll (>= 3.4, < 5.0) octokit (~> 4.0, != 4.4.0) - jekyll-mentions (1.2.0) - activesupport (~> 4.0) + jekyll-include-cache (0.2.1) + jekyll (>= 3.7, < 5.0) + jekyll-mentions (1.6.0) html-pipeline (~> 2.3) - jekyll (~> 3.0) - jekyll-optional-front-matter (0.3.0) - jekyll (~> 3.0) + jekyll (>= 3.7, < 5.0) + jekyll-optional-front-matter (0.3.2) + jekyll (>= 3.0, < 5.0) jekyll-paginate (1.1.0) - jekyll-readme-index (0.2.0) - jekyll (~> 3.0) - jekyll-redirect-from (0.12.1) - jekyll (~> 3.3) - jekyll-relative-links (0.5.2) - jekyll (~> 3.3) - jekyll-remote-theme (0.2.3) - jekyll (~> 3.5) - rubyzip (>= 1.2.1, < 3.0) - typhoeus (>= 0.7, < 2.0) - jekyll-sass-converter (1.5.0) + jekyll-readme-index (0.3.0) + jekyll (>= 3.0, < 5.0) + jekyll-redirect-from (0.16.0) + jekyll (>= 3.3, < 5.0) + jekyll-relative-links (0.6.1) + jekyll (>= 3.3, < 5.0) + jekyll-remote-theme (0.4.3) + addressable (~> 2.0) + jekyll (>= 3.5, < 5.0) + jekyll-sass-converter (>= 1.0, <= 3.0.0, != 2.0.0) + rubyzip (>= 1.3.0, < 3.0) + jekyll-sass-converter (1.5.2) sass (~> 3.4) - jekyll-seo-tag (2.3.0) - jekyll (~> 3.3) - jekyll-sitemap (1.1.1) - jekyll (~> 3.3) - jekyll-swiss (0.4.0) - jekyll-theme-architect (0.1.0) - jekyll (~> 3.5) + jekyll-seo-tag (2.8.0) + jekyll (>= 3.8, < 5.0) + jekyll-sitemap (1.4.0) + jekyll (>= 3.7, < 5.0) + jekyll-swiss (1.0.0) + jekyll-theme-architect (0.2.0) + jekyll (> 3.5, < 5.0) jekyll-seo-tag (~> 2.0) - jekyll-theme-cayman (0.1.0) - jekyll (~> 3.5) + jekyll-theme-cayman (0.2.0) + jekyll (> 3.5, < 5.0) jekyll-seo-tag (~> 2.0) - jekyll-theme-dinky (0.1.0) - jekyll (~> 3.5) + jekyll-theme-dinky (0.2.0) + jekyll (> 3.5, < 5.0) jekyll-seo-tag (~> 2.0) - jekyll-theme-hacker (0.1.0) - jekyll (~> 3.5) + jekyll-theme-hacker (0.2.0) + jekyll (> 3.5, < 5.0) jekyll-seo-tag (~> 2.0) - jekyll-theme-leap-day (0.1.0) - jekyll (~> 3.5) + jekyll-theme-leap-day (0.2.0) + jekyll (> 3.5, < 5.0) jekyll-seo-tag (~> 2.0) - jekyll-theme-merlot (0.1.0) - jekyll (~> 3.5) + jekyll-theme-merlot (0.2.0) + jekyll (> 3.5, < 5.0) jekyll-seo-tag (~> 2.0) - jekyll-theme-midnight (0.1.0) - jekyll (~> 3.5) + jekyll-theme-midnight (0.2.0) + jekyll (> 3.5, < 5.0) jekyll-seo-tag (~> 2.0) - jekyll-theme-minimal (0.1.0) - jekyll (~> 3.5) + jekyll-theme-minimal (0.2.0) + jekyll (> 3.5, < 5.0) jekyll-seo-tag (~> 2.0) - jekyll-theme-modernist (0.1.0) - jekyll (~> 3.5) + jekyll-theme-modernist (0.2.0) + jekyll (> 3.5, < 5.0) jekyll-seo-tag (~> 2.0) - jekyll-theme-primer (0.5.2) - jekyll (~> 3.5) + jekyll-theme-primer (0.6.0) + jekyll (> 3.5, < 5.0) jekyll-github-metadata (~> 2.9) - jekyll-seo-tag (~> 2.2) - jekyll-theme-slate (0.1.0) - jekyll (~> 3.5) jekyll-seo-tag (~> 2.0) - jekyll-theme-tactile (0.1.0) - jekyll (~> 3.5) + jekyll-theme-slate (0.2.0) + jekyll (> 3.5, < 5.0) jekyll-seo-tag (~> 2.0) - jekyll-theme-time-machine (0.1.0) - jekyll (~> 3.5) + jekyll-theme-tactile (0.2.0) + jekyll (> 3.5, < 5.0) jekyll-seo-tag (~> 2.0) - jekyll-titles-from-headings (0.5.0) - jekyll (~> 3.3) - jekyll-watch (1.5.1) + jekyll-theme-time-machine (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-titles-from-headings (0.5.3) + jekyll (>= 3.3, < 5.0) + jekyll-watch (2.2.1) listen (~> 3.0) - jemoji (0.8.1) - activesupport (~> 4.0, >= 4.2.9) + jemoji (0.12.0) gemoji (~> 3.0) html-pipeline (~> 2.2) - jekyll (>= 3.0) - kramdown (1.16.2) - liquid (4.0.0) - listen (3.0.6) - rb-fsevent (>= 0.9.3) - rb-inotify (>= 0.9.7) + jekyll (>= 3.0, < 5.0) + kramdown (2.3.2) + rexml + kramdown-parser-gfm (1.1.0) + kramdown (~> 2.0) + liquid (4.0.4) + listen (3.8.0) + rb-fsevent (~> 0.10, >= 0.10.3) + rb-inotify (~> 0.9, >= 0.9.10) mercenary (0.3.6) - mini_portile2 (2.8.0) - minima (2.1.1) - jekyll (~> 3.3) - minitest (5.11.3) - multipart-post (2.1.1) - net-dns (0.9.0) - nokogiri (1.13.3) - mini_portile2 (~> 2.8.0) + minima (2.5.1) + jekyll (>= 3.5, < 5.0) + jekyll-feed (~> 0.9) + jekyll-seo-tag (~> 2.1) + minitest (5.18.0) + nokogiri (1.14.2-x86_64-linux) racc (~> 1.4) - octokit (4.21.0) - faraday (>= 0.9) - sawyer (~> 0.8.0, >= 0.5.3) + octokit (4.25.1) + faraday (>= 1, < 3) + sawyer (~> 0.9) pathutil (0.16.2) forwardable-extended (~> 2.6) - public_suffix (2.0.5) - racc (1.6.0) - rb-fsevent (0.11.0) + public_suffix (4.0.7) + racc (1.6.2) + rb-fsevent (0.11.2) rb-inotify (0.10.1) ffi (~> 1.0) - rouge (2.2.1) - ruby-enum (0.7.2) - i18n - ruby2_keywords (0.0.4) - rubyzip (2.0.0) + rexml (3.2.5) + rouge (3.26.0) + ruby2_keywords (0.0.5) + rubyzip (2.3.2) safe_yaml (1.0.5) sass (3.7.4) sass-listen (~> 4.0.0) sass-listen (4.0.0) rb-fsevent (~> 0.9, >= 0.9.4) rb-inotify (~> 0.9, >= 0.9.7) - sawyer (0.8.2) + sawyer (0.9.2) addressable (>= 2.3.5) - faraday (> 0.8, < 2.0) + faraday (>= 0.17.3, < 3) + simpleidn (0.2.1) + unf (~> 0.1.4) terminal-table (1.8.0) unicode-display_width (~> 1.1, >= 1.1.1) - thread_safe (0.3.6) - typhoeus (0.8.0) - ethon (>= 0.8.0) - tzinfo (1.2.5) - thread_safe (~> 0.1) - unicode-display_width (1.3.0) + typhoeus (1.4.0) + ethon (>= 0.9.0) + tzinfo (2.0.6) + concurrent-ruby (~> 1.0) + unf (0.1.4) + unf_ext + unf_ext (0.0.8.2) + unicode-display_width (1.8.0) + webrick (1.8.1) PLATFORMS ruby DEPENDENCIES github-pages + webrick BUNDLED WITH - 1.16.1 + 2.4.8 From 23faf59f162b881eee634ba8df2b65d6b67a4113 Mon Sep 17 00:00:00 2001 From: James McCoy Date: Wed, 15 Mar 2023 09:57:25 -0400 Subject: [PATCH 051/347] cli: Bump tree-sitter dependency to 0.20.10 tree-sitter/tree-sitter#2085 added the ts_query_is_pattern_non_local API and its usage in tree-sitter-cli, so bump version accordingly. --- cli/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/Cargo.toml b/cli/Cargo.toml index ad21f0e7..f228951d 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -41,7 +41,7 @@ webbrowser = "0.5.1" which = "4.1.0" [dependencies.tree-sitter] -version = "0.20.3" +version = "0.20.10" path = "../lib" [dependencies.tree-sitter-config] From a1388d9be152996492c425515187b6bc743fe9ca Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 15 Mar 2023 16:10:11 +0000 Subject: [PATCH 052/347] Bump webbrowser from 0.5.5 to 0.8.3 Bumps [webbrowser](https://github.com/amodm/webbrowser-rs) from 0.5.5 to 0.8.3. - [Release notes](https://github.com/amodm/webbrowser-rs/releases) - [Changelog](https://github.com/amodm/webbrowser-rs/blob/main/CHANGELOG.md) - [Commits](https://github.com/amodm/webbrowser-rs/compare/v0.5.5...v0.8.3) --- updated-dependencies: - dependency-name: webbrowser dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- Cargo.lock | 185 +++++++++++++++++++++++++++++++++++++++++++++++-- cli/Cargo.toml | 2 +- 2 files changed, 179 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4a844dc8..a863b14f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -61,12 +61,24 @@ version = "3.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba" +[[package]] +name = "bytes" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" + [[package]] name = "cc" version = "1.0.77" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e9f73505338f7d905b19d18738976aae232eb46b8efc15554ffc56deb5d9ebe4" +[[package]] +name = "cesu8" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" + [[package]] name = "cfg-if" version = "1.0.0" @@ -94,6 +106,32 @@ dependencies = [ "vec_map", ] +[[package]] +name = "combine" +version = "4.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35ed6e9d84f0b51a7f52daf1c7d71dd136fd7a3f41a8462b8cdb8c78d920fad4" +dependencies = [ + "bytes", + "memchr", +] + +[[package]] +name = "core-foundation" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" + [[package]] name = "ctor" version = "0.1.26" @@ -125,6 +163,15 @@ dependencies = [ "dirs-sys", ] +[[package]] +name = "dirs" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059" +dependencies = [ + "dirs-sys", +] + [[package]] name = "dirs-sys" version = "0.3.7" @@ -151,6 +198,15 @@ dependencies = [ "instant", ] +[[package]] +name = "form_urlencoded" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9c384f161156f5260c24a097c56119f9be8c798586aecc13afbcbe7b7e26bf8" +dependencies = [ + "percent-encoding", +] + [[package]] name = "getrandom" version = "0.2.8" @@ -198,6 +254,16 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" +[[package]] +name = "idna" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e14ddfc70884202db2244c223200c204c2bda1bc6e0998d11b5e024d657209e6" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + [[package]] name = "indexmap" version = "1.9.2" @@ -223,6 +289,26 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc" +[[package]] +name = "jni" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "039022cdf4d7b1cf548d31f60ae783138e5fd42013f6271049d7df7afadef96c" +dependencies = [ + "cesu8", + "combine", + "jni-sys", + "log", + "thiserror", + "walkdir", +] + +[[package]] +name = "jni-sys" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" + [[package]] name = "js-sys" version = "0.3.60" @@ -263,12 +349,36 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "malloc_buf" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62bb907fe88d54d8d9ce32a3cceab4218ed2f6b7d35617cafe9adf84e43919cb" +dependencies = [ + "libc", +] + [[package]] name = "memchr" version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +[[package]] +name = "ndk-context" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27b02d87554356db9e9a873add8782d4ea6e3e58ea071a9adb9a2e8ddb884a8b" + +[[package]] +name = "objc" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "915b1b472bc21c53464d6c8461c9d3af805ba1ef837e1cac254428f4a77177b1" +dependencies = [ + "malloc_buf", +] + [[package]] name = "once_cell" version = "1.16.0" @@ -284,6 +394,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "percent-encoding" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" + [[package]] name = "ppv-lite86" version = "0.2.17" @@ -350,6 +466,12 @@ dependencies = [ "getrandom", ] +[[package]] +name = "raw-window-handle" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f851a03551ceefd30132e447f07f96cb7011d6b658374f3aed847333adb5559" + [[package]] name = "redox_syscall" version = "0.2.16" @@ -533,6 +655,21 @@ dependencies = [ "log", ] +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + [[package]] name = "toml" version = "0.5.9" @@ -561,7 +698,7 @@ dependencies = [ "clap", "ctor", "difference", - "dirs", + "dirs 3.0.2", "glob", "html-escape", "indexmap", @@ -595,7 +732,7 @@ name = "tree-sitter-config" version = "0.19.0" dependencies = [ "anyhow", - "dirs", + "dirs 3.0.2", "serde", "serde_json", ] @@ -615,7 +752,7 @@ version = "0.20.0" dependencies = [ "anyhow", "cc", - "dirs", + "dirs 3.0.2", "libloading", "once_cell", "regex", @@ -636,12 +773,27 @@ dependencies = [ "tree-sitter", ] +[[package]] +name = "unicode-bidi" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524b68aca1d05e03fdf03fcdce2c6c94b6daf6d16861ddaa7e4f2b6638a9052c" + [[package]] name = "unicode-ident" version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3" +[[package]] +name = "unicode-normalization" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +dependencies = [ + "tinyvec", +] + [[package]] name = "unicode-width" version = "0.1.10" @@ -654,6 +806,17 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5aa30f5ea51ff7edfc797c6d3f9ec8cbd8cfedef5371766b7181d33977f4814f" +[[package]] +name = "url" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d68c799ae75762b8c3fe375feb6600ef5602c883c5d21eb51c09f22b83c4643" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + [[package]] name = "utf8-width" version = "0.1.6" @@ -749,10 +912,18 @@ dependencies = [ [[package]] name = "webbrowser" -version = "0.5.5" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecad156490d6b620308ed411cfee90d280b3cbd13e189ea0d3fada8acc89158a" +checksum = "aa61ff77f695a94d9c8558e0bb5c362a8fd1f27c74663770fbc633acbafedbb6" dependencies = [ + "core-foundation", + "dirs 4.0.0", + "jni", + "log", + "ndk-context", + "objc", + "raw-window-handle", + "url", "web-sys", "widestring", "winapi", @@ -771,9 +942,9 @@ dependencies = [ [[package]] name = "widestring" -version = "0.4.3" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c168940144dd21fd8046987c16a46a33d5fc84eec29ef9dcddc2ac9e31526b7c" +checksum = "653f141f39ec16bba3c5abe400a0c60da7468261cc2cbf36805022876bc721a8" [[package]] name = "winapi" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index f228951d..6d48e8b9 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -37,7 +37,7 @@ serde = { version = "1.0.130", features = ["derive"] } smallbitvec = "2.5.1" tiny_http = "0.12.0" walkdir = "2.3" -webbrowser = "0.5.1" +webbrowser = "0.8.3" which = "4.1.0" [dependencies.tree-sitter] From 457eb2fe761f1e4d0b6346154345007e040fa487 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Wed, 15 Mar 2023 14:48:47 +0200 Subject: [PATCH 053/347] Update Cargo.lock --- Cargo.lock | 294 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 204 insertions(+), 90 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a863b14f..7a675454 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -22,9 +22,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.66" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "216261ddc8289130e551ddcd5ce8a064710c0d064a4d2895c67151c92b5443f6" +checksum = "224afbd727c3d6e4b90103ece64b8d1b67fbb1973b1046c2281eed3f3803f800" [[package]] name = "ascii" @@ -57,9 +57,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bumpalo" -version = "3.11.1" +version = "3.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba" +checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" [[package]] name = "bytes" @@ -69,9 +69,9 @@ checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" [[package]] name = "cc" -version = "1.0.77" +version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9f73505338f7d905b19d18738976aae232eb46b8efc15554ffc56deb5d9ebe4" +checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" [[package]] name = "cesu8" @@ -87,9 +87,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chunked_transfer" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fff857943da45f546682664a79488be82e69e43c1a7a2307679ab9afb3a66d2e" +checksum = "cca491388666e04d7248af3f60f0c40cfb0991c72205595d7c396e3510207d1a" [[package]] name = "clap" @@ -185,15 +185,36 @@ dependencies = [ [[package]] name = "either" -version = "1.8.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797" +checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" + +[[package]] +name = "errno" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" +dependencies = [ + "errno-dragonfly", + "libc", + "winapi", +] + +[[package]] +name = "errno-dragonfly" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +dependencies = [ + "cc", + "libc", +] [[package]] name = "fastrand" -version = "1.8.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7a407cfaa3385c4ae6b23e84623d48c2798d06e3e6a1878f7f59f17b3f86499" +checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" dependencies = [ "instant", ] @@ -220,9 +241,9 @@ dependencies = [ [[package]] name = "glob" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "hashbrown" @@ -241,9 +262,9 @@ dependencies = [ [[package]] name = "html-escape" -version = "0.2.12" +version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15315cfa9503e9aa85a477138eff76a1b203a430703548052c330b69d8d8c205" +checksum = "6d1ad449764d627e22bfd7cd5e8868264fc9236e07c752972b4080cd351cb476" dependencies = [ "utf8-width", ] @@ -284,10 +305,20 @@ dependencies = [ ] [[package]] -name = "itoa" -version = "1.0.4" +name = "io-lifetimes" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc" +checksum = "cfa919a82ea574332e2de6e74b4c36e74d41982b335080fa59d4ef31be20fdf3" +dependencies = [ + "libc", + "windows-sys 0.45.0", +] + +[[package]] +name = "itoa" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" [[package]] name = "jni" @@ -311,9 +342,9 @@ checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" [[package]] name = "js-sys" -version = "0.3.60" +version = "0.3.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47" +checksum = "445dde2150c55e483f3d8416706b97ec8e8237c307e5b7b4b8dd15e6af2a0730" dependencies = [ "wasm-bindgen", ] @@ -326,9 +357,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.138" +version = "0.2.140" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db6d7e329c562c5dfab7a46a2afabc8b987ab9a4834c9d1ca04dc54c1546cef8" +checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c" [[package]] name = "libloading" @@ -340,6 +371,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "linux-raw-sys" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" + [[package]] name = "log" version = "0.4.17" @@ -381,9 +418,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.16.0" +version = "1.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860" +checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" [[package]] name = "output_vt100" @@ -420,18 +457,18 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.47" +version = "1.0.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725" +checksum = "1d0e1ae9e836cc3beddd63db0df682593d7e2d3d891ae8c9083d2113e1744224" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.21" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" +checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" dependencies = [ "proc-macro2", ] @@ -494,9 +531,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.7.0" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e076559ef8e241f2ae3479e36f97bd5741c0330689e217ad51ce2c76808b868a" +checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733" dependencies = [ "aho-corasick", "memchr", @@ -509,15 +546,6 @@ version = "0.6.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" -[[package]] -name = "remove_dir_all" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" -dependencies = [ - "winapi", -] - [[package]] name = "rustc-hash" version = "1.1.0" @@ -525,10 +553,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] -name = "ryu" -version = "1.0.11" +name = "rustix" +version = "0.36.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09" +checksum = "fd5c6ff11fecd55b40746d1995a02f2eb375bf8c00d192d521ee09f42bef37bc" +dependencies = [ + "bitflags", + "errno", + "io-lifetimes", + "libc", + "linux-raw-sys", + "windows-sys 0.45.0", +] + +[[package]] +name = "ryu" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041" [[package]] name = "same-file" @@ -541,24 +583,24 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.14" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e25dfac463d778e353db5be2449d1cce89bd6fd23c9f1ea21310ce6e5a1b29c4" +checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed" [[package]] name = "serde" -version = "1.0.149" +version = "1.0.156" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "256b9932320c590e707b94576e3cc1f7c9024d0ee6612dfbcf1cb106cbe8e055" +checksum = "314b5b092c0ade17c00142951e50ced110ec27cea304b1037c6969246c2469a4" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.149" +version = "1.0.156" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4eae9b04cbffdfd550eb462ed33bc6a1b68c935127d008b27444d08380f94e4" +checksum = "d7e29c4601e36bcec74a223228dce795f4cd3616341a4af93520ca1a837c087d" dependencies = [ "proc-macro2", "quote", @@ -567,9 +609,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.89" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "020ff22c755c2ed3f8cf162dbb41a7268d934702f3ed3631656ea597e08fc3db" +checksum = "1c533a59c9d8a93a09c6ab31f0fd5e5f4dd1b8fc9434804029839884765d04ea" dependencies = [ "indexmap", "itoa", @@ -591,9 +633,9 @@ checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" [[package]] name = "syn" -version = "1.0.105" +version = "1.0.109" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60b9b43d45702de4c839cb9b51d9f529c5dd26a4aff255b42b1ebc03e88ee908" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" dependencies = [ "proc-macro2", "quote", @@ -602,16 +644,15 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.3.0" +version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" +checksum = "af18f7ae1acd354b992402e9ec5864359d693cd8a79dcbef59f76891701c1e95" dependencies = [ "cfg-if", "fastrand", - "libc", "redox_syscall", - "remove_dir_all", - "winapi", + "rustix", + "windows-sys 0.42.0", ] [[package]] @@ -625,18 +666,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.37" +version = "1.0.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10deb33631e3c9018b9baf9dcbbc4f737320d2b576bac10f6aefa048fa407e3e" +checksum = "a5ab016db510546d856297882807df8da66a16fb8c4101cb8b30054b0d5b2d9c" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.37" +version = "1.0.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "982d17546b47146b28f7c22e3d08465f6b8903d0ea13c1660d9d84a6e7adcdbb" +checksum = "5420d42e90af0c38c3290abcca25b9b3bdf379fc9f55c528f53a269d9c9a267e" dependencies = [ "proc-macro2", "quote", @@ -672,9 +713,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "toml" -version = "0.5.9" +version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d82e1a7758622a465f8cee077614c73484dac5b836c02ff6a40d5d1010324d7" +checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" dependencies = [ "serde", ] @@ -781,9 +822,9 @@ checksum = "524b68aca1d05e03fdf03fcdce2c6c94b6daf6d16861ddaa7e4f2b6638a9052c" [[package]] name = "unicode-ident" -version = "1.0.5" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3" +checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" [[package]] name = "unicode-normalization" @@ -848,9 +889,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.83" +version = "0.2.84" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268" +checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -858,9 +899,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.83" +version = "0.2.84" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142" +checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9" dependencies = [ "bumpalo", "log", @@ -873,9 +914,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.83" +version = "0.2.84" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810" +checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -883,9 +924,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.83" +version = "0.2.84" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c" +checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" dependencies = [ "proc-macro2", "quote", @@ -896,15 +937,15 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.83" +version = "0.2.84" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f" +checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d" [[package]] name = "web-sys" -version = "0.3.60" +version = "0.3.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcda906d8be16e728fd5adc5b729afad4e444e106ab28cd1c7256e54fa61510f" +checksum = "e33b99f4b23ba3eec1a53ac264e35a755f00e966e0065077d6027c0f575b0b97" dependencies = [ "js-sys", "wasm-bindgen", @@ -912,9 +953,9 @@ dependencies = [ [[package]] name = "webbrowser" -version = "0.8.3" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa61ff77f695a94d9c8558e0bb5c362a8fd1f27c74663770fbc633acbafedbb6" +checksum = "97d1fa1e5c829b2bf9eb1e28fb950248b797cd6a04866fbdfa8bc31e5eef4c78" dependencies = [ "core-foundation", "dirs 4.0.0", @@ -925,27 +966,19 @@ dependencies = [ "raw-window-handle", "url", "web-sys", - "widestring", - "winapi", ] [[package]] name = "which" -version = "4.3.0" +version = "4.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c831fbbee9e129a8cf93e7747a82da9d95ba8e16621cae60ec2cdc849bacb7b" +checksum = "2441c784c52b289a054b7201fc93253e288f094e2f4be9058343127c4226a269" dependencies = [ "either", "libc", "once_cell", ] -[[package]] -name = "widestring" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "653f141f39ec16bba3c5abe400a0c60da7468261cc2cbf36805022876bc721a8" - [[package]] name = "winapi" version = "0.3.9" @@ -976,3 +1009,84 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" From 62f8c431aeee91894c79a5cf507399a6f764a4ba Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Thu, 16 Mar 2023 15:02:04 +0200 Subject: [PATCH 054/347] test: add `retry` and `test_with_seed` proc macros --- Cargo.lock | 11 +++ cli/Cargo.toml | 2 + cli/src/tests/proc_macro/Cargo.toml | 14 +++ cli/src/tests/proc_macro/src/lib.rs | 137 ++++++++++++++++++++++++++++ 4 files changed, 164 insertions(+) create mode 100644 cli/src/tests/proc_macro/Cargo.toml create mode 100644 cli/src/tests/proc_macro/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 7a675454..404c269f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -464,6 +464,16 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "proc_macro" +version = "0.1.0" +dependencies = [ + "proc-macro2", + "quote", + "rand", + "syn", +] + [[package]] name = "quote" version = "1.0.26" @@ -746,6 +756,7 @@ dependencies = [ "lazy_static", "log", "pretty_assertions", + "proc_macro", "rand", "regex", "regex-syntax", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 6d48e8b9..47e03284 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -69,6 +69,8 @@ version = "0.4.6" features = ["std"] [dev-dependencies] +proc_macro = { path = "src/tests/proc_macro" } + rand = "0.8" tempfile = "3" pretty_assertions = "0.7.2" diff --git a/cli/src/tests/proc_macro/Cargo.toml b/cli/src/tests/proc_macro/Cargo.toml new file mode 100644 index 00000000..a9a2b146 --- /dev/null +++ b/cli/src/tests/proc_macro/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "proc_macro" +version = "0.1.0" +edition = "2021" +publish = false + +[lib] +proc-macro = true + +[dependencies] +proc-macro2 = "1" +quote = "1" +rand = "0.8.5" +syn = { version = "1", features = ["full"] } diff --git a/cli/src/tests/proc_macro/src/lib.rs b/cli/src/tests/proc_macro/src/lib.rs new file mode 100644 index 00000000..d831a75b --- /dev/null +++ b/cli/src/tests/proc_macro/src/lib.rs @@ -0,0 +1,137 @@ +use proc_macro::TokenStream; +use proc_macro2::Span; +use quote::quote; +use syn::{ + parse::{Parse, ParseStream}, + parse_macro_input, Error, Expr, Ident, ItemFn, LitInt, Token, +}; + +#[proc_macro_attribute] +pub fn retry(args: TokenStream, input: TokenStream) -> TokenStream { + let count = parse_macro_input!(args as LitInt); + let input = parse_macro_input!(input as ItemFn); + let attrs = input.attrs.clone(); + let name = input.sig.ident.clone(); + + TokenStream::from(quote! { + #(#attrs),* + fn #name() { + #input + + for i in 0..=#count { + let result = std::panic::catch_unwind(|| { + #name(); + }); + + if result.is_ok() { + return; + } + + if i == #count { + std::panic::resume_unwind(result.unwrap_err()); + } + } + } + }) +} + +#[proc_macro_attribute] +pub fn test_with_seed(args: TokenStream, input: TokenStream) -> TokenStream { + struct Args { + retry: LitInt, + seed: Expr, + seed_fn: Option, + } + + impl Parse for Args { + fn parse(input: ParseStream) -> syn::Result { + let mut retry = None; + let mut seed = None; + let mut seed_fn = None; + + while !input.is_empty() { + let name = input.parse::()?; + match name.to_string().as_str() { + "retry" => { + input.parse::()?; + retry.replace(input.parse()?); + } + "seed" => { + input.parse::()?; + seed.replace(input.parse()?); + } + "seed_fn" => { + input.parse::()?; + seed_fn.replace(input.parse()?); + } + x => { + return Err(Error::new( + name.span(), + format!("Unsupported parameter `{x}`"), + )) + } + } + + if !input.is_empty() { + input.parse::()?; + } + } + + if retry.is_none() { + retry.replace(LitInt::new("0", Span::mixed_site())); + } + + Ok(Args { + retry: retry.expect("`retry` parameter is requred"), + seed: seed.expect("`initial_seed` parameter is required"), + seed_fn, + }) + } + } + + let Args { + retry, + seed, + seed_fn, + } = parse_macro_input!(args as Args); + + let seed_fn = seed_fn.iter(); + + let func = parse_macro_input!(input as ItemFn); + let attrs = func.attrs.clone(); + let name = func.sig.ident.clone(); + + // dbg!(quote::ToTokens::into_token_stream(&func)); + + TokenStream::from(quote! { + #[test] + #(#attrs),* + fn #name() { + #func + + let mut seed = #seed; + + for i in 0..=#retry { + let result = std::panic::catch_unwind(|| { + #name(seed); + }); + + if result.is_ok() { + return; + } + + if i == #retry { + std::panic::resume_unwind(result.unwrap_err()); + } + + #( + seed = #seed_fn(); + )* + + if i < #retry { + println!("\nRetry {}/{} with a new seed {}", i + 1, #retry, seed); + } + } + } + }) +} From 588549c09388dd6586cfbb06c06f8eeba436f38f Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Thu, 16 Mar 2023 15:02:59 +0200 Subject: [PATCH 055/347] test: run `test_parsing_with_a_timeout` with 10 retries --- cli/src/tests/parser_test.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index 30b12336..78c6cda4 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -8,6 +8,7 @@ use crate::{ generate::generate_parser_for_grammar, parse::{perform_edit, Edit}, }; +use proc_macro::retry; use std::{ sync::atomic::{AtomicUsize, Ordering}, thread, time, @@ -638,6 +639,7 @@ fn test_parsing_cancelled_by_another_thread() { // Timeouts #[test] +#[retry(10)] fn test_parsing_with_a_timeout() { let mut parser = Parser::new(); parser.set_language(get_language("json")).unwrap(); From 3aeef44eb651b77b7392d5ee89fa4d92273b8e0c Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Thu, 16 Mar 2023 15:08:23 +0200 Subject: [PATCH 056/347] test: run all corpus tests with 10 retries --- cli/src/tests/corpus_test.rs | 95 +++++++++++++++++++----------------- 1 file changed, 50 insertions(+), 45 deletions(-) diff --git a/cli/src/tests/corpus_test.rs b/cli/src/tests/corpus_test.rs index 401a99a5..b818b2c1 100644 --- a/cli/src/tests/corpus_test.rs +++ b/cli/src/tests/corpus_test.rs @@ -2,6 +2,7 @@ use super::helpers::{ allocations, edits::{get_random_edit, invert_edit}, fixtures::{fixtures_dir, get_language, get_test_language}, + new_seed, random::Rand, scope_sequence::ScopeSequence, EDIT_COUNT, EXAMPLE_FILTER, ITERATION_COUNT, LANGUAGE_FILTER, LOG_ENABLED, LOG_GRAPH_ENABLED, @@ -13,70 +14,71 @@ use crate::{ test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry}, util, }; -use std::fs; +use proc_macro::test_with_seed; +use std::{env, fs}; use tree_sitter::{LogType, Node, Parser, Point, Range, Tree}; -#[test] -fn test_bash_corpus() { - test_language_corpus("bash"); +#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] +fn test_corpus_for_bash(seed: usize) { + test_language_corpus(seed, "bash"); } -#[test] -fn test_c_corpus() { - test_language_corpus("c"); +#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] +fn test_corpus_for_c(seed: usize) { + test_language_corpus(seed, "c"); } -#[test] -fn test_cpp_corpus() { - test_language_corpus("cpp"); +#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] +fn test_corpus_for_cpp(seed: usize) { + test_language_corpus(seed, "cpp"); } -#[test] -fn test_embedded_template_corpus() { - test_language_corpus("embedded-template"); +#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] +fn test_corpus_for_embedded_template(seed: usize) { + test_language_corpus(seed, "embedded-template"); } -#[test] -fn test_go_corpus() { - test_language_corpus("go"); +#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] +fn test_corpus_for_go(seed: usize) { + test_language_corpus(seed, "go"); } -#[test] -fn test_html_corpus() { - test_language_corpus("html"); +#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] +fn test_corpus_for_html(seed: usize) { + test_language_corpus(seed, "html"); } -#[test] -fn test_javascript_corpus() { - test_language_corpus("javascript"); +#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] +fn test_corpus_for_javascript(seed: usize) { + test_language_corpus(seed, "javascript"); } -#[test] -fn test_json_corpus() { - test_language_corpus("json"); +#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] +fn test_corpus_for_json(seed: usize) { + test_language_corpus(seed, "json"); } -#[test] -fn test_php_corpus() { - test_language_corpus("php"); +#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] +fn test_corpus_for_php(seed: usize) { + test_language_corpus(seed, "php"); } -#[test] -fn test_python_corpus() { - test_language_corpus("python"); +#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] +fn test_corpus_for_python(seed: usize) { + test_language_corpus(seed, "python"); } -#[test] -fn test_ruby_corpus() { - test_language_corpus("ruby"); +#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] +fn test_corpus_for_ruby(seed: usize) { + test_language_corpus(seed, "ruby"); } -#[test] -fn test_rust_corpus() { - test_language_corpus("rust"); +#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] +fn test_corpus_for_rust(seed: usize) { + test_language_corpus(seed, "rust"); } -fn test_language_corpus(language_name: &str) { +fn test_language_corpus(start_seed: usize, language_name: &str) { let grammars_dir = fixtures_dir().join("grammars"); let error_corpus_dir = fixtures_dir().join("error_corpus"); let template_corpus_dir = fixtures_dir().join("template_corpus"); @@ -100,6 +102,10 @@ fn test_language_corpus(language_name: &str) { let language = get_language(language_name); let mut failure_count = 0; + + let log_seed = env::var("TREE_SITTER_LOG_SEED").is_ok(); + + println!(); for test in tests { println!(" {} example - {}", language_name, test.name); @@ -140,7 +146,7 @@ fn test_language_corpus(language_name: &str) { drop(parser); for trial in 0..*ITERATION_COUNT { - let seed = *START_SEED + trial; + let seed = start_seed + trial; let passed = allocations::record(|| { let mut rand = Rand::new(seed); let mut log_session = None; @@ -161,7 +167,9 @@ fn test_language_corpus(language_name: &str) { perform_edit(&mut tree, &mut input, &edit); } - // println!(" seed: {}", seed); + if log_seed { + println!(" seed: {}", seed); + } if *LOG_GRAPH_ENABLED { eprintln!("{}\n", String::from_utf8_lossy(&input)); @@ -173,10 +181,7 @@ fn test_language_corpus(language_name: &str) { // Check that the new tree is consistent. check_consistent_sizes(&tree2, &input); if let Err(message) = check_changed_ranges(&tree, &tree2, &input) { - println!( - "\nUnexpected scope change in seed {}\n{}\n\n", - seed, message - ); + println!("\nUnexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n",); return false; } @@ -211,7 +216,7 @@ fn test_language_corpus(language_name: &str) { // Check that the edited tree is consistent. check_consistent_sizes(&tree3, &input); if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) { - eprintln!("Unexpected scope change in seed {}\n{}\n\n", seed, message); + println!("Unexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n"); return false; } From ddb0af95098cf04a307d5ea6e93d77a00643c5d7 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Thu, 16 Mar 2023 15:08:56 +0200 Subject: [PATCH 057/347] test: use random SEED numbers This is needed to omit occurrences of the same seed in a sequence of following seeds due to the reason of that two initial seed are very close if based on unix epoch seconds. --- cli/src/tests/helpers/mod.rs | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/cli/src/tests/helpers/mod.rs b/cli/src/tests/helpers/mod.rs index def0ea3e..54df8809 100644 --- a/cli/src/tests/helpers/mod.rs +++ b/cli/src/tests/helpers/mod.rs @@ -6,7 +6,8 @@ pub(super) mod random; pub(super) mod scope_sequence; use lazy_static::lazy_static; -use std::{env, time, usize}; +use rand::Rng; +use std::env; lazy_static! { pub static ref LOG_ENABLED: bool = env::var("TREE_SITTER_LOG").is_ok(); @@ -16,11 +17,7 @@ lazy_static! { } lazy_static! { - pub static ref START_SEED: usize = - int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| time::SystemTime::now() - .duration_since(time::UNIX_EPOCH) - .unwrap() - .as_secs() as usize,); + pub static ref START_SEED: usize = new_seed(); pub static ref EDIT_COUNT: usize = int_env_var("TREE_SITTER_EDITS").unwrap_or(3); pub static ref ITERATION_COUNT: usize = int_env_var("TREE_SITTER_ITERATIONS").unwrap_or(10); } @@ -28,3 +25,10 @@ lazy_static! { fn int_env_var(name: &'static str) -> Option { env::var(name).ok().and_then(|e| e.parse().ok()) } + +pub(crate) fn new_seed() -> usize { + int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| { + let mut rng = rand::thread_rng(); + rng.gen::() + }) +} From 78fd9196f7cda76d105ddbd34641e010272efc8b Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Thu, 16 Mar 2023 17:29:27 +0200 Subject: [PATCH 058/347] ci: drop AppVeyor and its badge --- .appveyor.yml | 50 -------------------------------------------------- README.md | 1 - 2 files changed, 51 deletions(-) delete mode 100644 .appveyor.yml diff --git a/.appveyor.yml b/.appveyor.yml deleted file mode 100644 index d463b7a2..00000000 --- a/.appveyor.yml +++ /dev/null @@ -1,50 +0,0 @@ -build: false -install: - # Terminate early unless building either a tag or a PR. - - if "%APPVEYOR_REPO_TAG%" == "false" if not "%APPVEYOR_REPO_BRANCH%" == "master" appveyor exit - - # Install rust - - appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe - - IF "%PLATFORM%" == "x86" rustup-init -y --default-toolchain stable --default-host i686-pc-windows-msvc - - IF "%PLATFORM%" == "x64" rustup-init -y --default-toolchain stable --default-host x86_64-pc-windows-msvc - - set PATH=%PATH%;C:\Users\appveyor\.cargo\bin - - rustc -vV - - cargo -vV - - # Install dependencies - - git submodule update --init - -platform: - - x64 - - x86 - -test_script: - # Fetch and regenerate the fixture parsers - - script\fetch-fixtures.cmd - - cargo build --release - - script\generate-fixtures.cmd - - # Run tests - - script\test.cmd - - script\benchmark.cmd - -before_deploy: - - move target\release\tree-sitter.exe tree-sitter.exe - - 7z a -tgzip tree-sitter-windows-%PLATFORM%.gz tree-sitter.exe - - appveyor PushArtifact tree-sitter-windows-%PLATFORM%.gz - -deploy: - description: '' - provider: GitHub - auth_token: - secure: VC9ntV5+inKoNteZyLQksKzWMKXF46P+Jx3JHKVSfF+o1rWtZn2iIHAVsQv5LaUi - artifact: /tree-sitter-windows-.*/ - draft: true - force_update: true - on: - APPVEYOR_REPO_TAG: true - -cache: - - target - - test\fixtures\grammars - - C:\Users\appveyor\.cargo diff --git a/README.md b/README.md index 2d1e911d..f3c4abc2 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,6 @@ # tree-sitter [![Build Status](https://github.com/tree-sitter/tree-sitter/workflows/CI/badge.svg)](https://github.com/tree-sitter/tree-sitter/actions) -[![Build status](https://ci.appveyor.com/api/projects/status/vtmbd6i92e97l55w/branch/master?svg=true)](https://ci.appveyor.com/project/maxbrunsfeld/tree-sitter/branch/master) [![DOI](https://zenodo.org/badge/14164618.svg)](https://zenodo.org/badge/latestdoi/14164618) Tree-sitter is a parser generator tool and an incremental parsing library. It can build a concrete syntax tree for a source file and efficiently update the syntax tree as the source file is edited. Tree-sitter aims to be: From ec2af46f6fa8fc73204270397051f9e06fb5e32c Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Thu, 16 Mar 2023 18:00:02 +0200 Subject: [PATCH 059/347] chore: remove all dead badges --- cli/README.md | 2 -- highlight/README.md | 2 -- lib/binding_rust/README.md | 2 -- lib/binding_web/README.md | 2 -- 4 files changed, 8 deletions(-) diff --git a/cli/README.md b/cli/README.md index fe45b17b..8cdda9c0 100644 --- a/cli/README.md +++ b/cli/README.md @@ -1,8 +1,6 @@ Tree-sitter CLI =============== -[![Build Status](https://travis-ci.org/tree-sitter/tree-sitter.svg?branch=master)](https://travis-ci.org/tree-sitter/tree-sitter) -[![Build status](https://ci.appveyor.com/api/projects/status/vtmbd6i92e97l55w/branch/master?svg=true)](https://ci.appveyor.com/project/maxbrunsfeld/tree-sitter/branch/master) [![Crates.io](https://img.shields.io/crates/v/tree-sitter-cli.svg)](https://crates.io/crates/tree-sitter-cli) The Tree-sitter CLI allows you to develop, test, and use Tree-sitter grammars from the command line. It works on MacOS, Linux, and Windows. diff --git a/highlight/README.md b/highlight/README.md index c04c7c08..e8a5d063 100644 --- a/highlight/README.md +++ b/highlight/README.md @@ -1,7 +1,5 @@ # `tree-sitter-highlight` -[![Build Status](https://travis-ci.org/tree-sitter/tree-sitter.svg?branch=master)](https://travis-ci.org/tree-sitter/tree-sitter) -[![Build status](https://ci.appveyor.com/api/projects/status/vtmbd6i92e97l55w/branch/master?svg=true)](https://ci.appveyor.com/project/maxbrunsfeld/tree-sitter/branch/master) [![Crates.io](https://img.shields.io/crates/v/tree-sitter-highlight.svg)](https://crates.io/crates/tree-sitter-highlight) ### Usage diff --git a/lib/binding_rust/README.md b/lib/binding_rust/README.md index 6b48630a..dffe7661 100644 --- a/lib/binding_rust/README.md +++ b/lib/binding_rust/README.md @@ -1,7 +1,5 @@ # Rust Tree-sitter -[![Build Status](https://travis-ci.org/tree-sitter/tree-sitter.svg?branch=master)](https://travis-ci.org/tree-sitter/tree-sitter) -[![Build status](https://ci.appveyor.com/api/projects/status/vtmbd6i92e97l55w/branch/master?svg=true)](https://ci.appveyor.com/project/maxbrunsfeld/tree-sitter/branch/master) [![Crates.io](https://img.shields.io/crates/v/tree-sitter.svg)](https://crates.io/crates/tree-sitter) Rust bindings to the [Tree-sitter][] parsing library. diff --git a/lib/binding_web/README.md b/lib/binding_web/README.md index 0c0f5246..a75cd9f0 100644 --- a/lib/binding_web/README.md +++ b/lib/binding_web/README.md @@ -1,8 +1,6 @@ Web Tree-sitter =============== -[![Build Status](https://travis-ci.org/tree-sitter/tree-sitter.svg?branch=master)](https://travis-ci.org/tree-sitter/tree-sitter) - WebAssembly bindings to the [Tree-sitter](https://github.com/tree-sitter/tree-sitter) parsing library. ### Setup From cc4f932d175b65e6fc9a55df5c55fc0b3a7d9ada Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Tue, 4 Apr 2023 02:38:29 +0300 Subject: [PATCH 060/347] cicd: new workflow --- .github/scripts/cross.sh | 10 ++ .github/scripts/make.sh | 19 +++ .github/scripts/tree-sitter.sh | 12 ++ .github/workflows/CICD.yml | 69 ++++++++++ .github/workflows/build.yml | 171 +++++++++++++++++++++++++ .github/workflows/ci.yml | 150 ---------------------- .github/workflows/fast_checks.yml | 31 +++++ .github/workflows/full_rust_checks.yml | 32 +++++ .github/workflows/msrv.yml | 42 ++++++ .github/workflows/publish.yml | 21 +++ .github/workflows/release.yml | 101 +++++++++++++++ Cargo.lock | 157 ++++++++++++----------- script/generate-fixtures | 12 +- script/generate-fixtures-wasm | 12 +- 14 files changed, 611 insertions(+), 228 deletions(-) create mode 100755 .github/scripts/cross.sh create mode 100755 .github/scripts/make.sh create mode 100755 .github/scripts/tree-sitter.sh create mode 100644 .github/workflows/CICD.yml create mode 100644 .github/workflows/build.yml delete mode 100644 .github/workflows/ci.yml create mode 100644 .github/workflows/fast_checks.yml create mode 100644 .github/workflows/full_rust_checks.yml create mode 100644 .github/workflows/msrv.yml create mode 100644 .github/workflows/publish.yml create mode 100644 .github/workflows/release.yml diff --git a/.github/scripts/cross.sh b/.github/scripts/cross.sh new file mode 100755 index 00000000..07017192 --- /dev/null +++ b/.github/scripts/cross.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +set -x +set -e + +if [ "$CROSS" != 1 ]; then + exit 111 +fi + +docker run --rm -v /home/runner:/home/runner -w "$PWD" "$CROSS_IMAGE" "$@" diff --git a/.github/scripts/make.sh b/.github/scripts/make.sh new file mode 100755 index 00000000..62aa0c06 --- /dev/null +++ b/.github/scripts/make.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +set -x +set -e + +if [ "$CROSS" = 1 ]; then + if [ -z "$CC" ]; then + echo "make.sh: CC is not set" >&2 + exit 111 + fi + if [ -z "$AR" ]; then + echo "make.sh: AR is not set" >&2 + exit 111 + fi + + cross.sh make CC=$CC AR=$AR "$@" +else + make "$@" +fi diff --git a/.github/scripts/tree-sitter.sh b/.github/scripts/tree-sitter.sh new file mode 100755 index 00000000..2e6e31c2 --- /dev/null +++ b/.github/scripts/tree-sitter.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +set -x +set -e + +tree_sitter="$ROOT"/target/"$TARGET"/release/tree-sitter + +if [ "$CROSS" = 1 ]; then + cross.sh $CROSS_RUNNER "$tree_sitter" "$@" +else + "$tree_sitter" "$@" +fi diff --git a/.github/workflows/CICD.yml b/.github/workflows/CICD.yml new file mode 100644 index 00000000..7c2351a8 --- /dev/null +++ b/.github/workflows/CICD.yml @@ -0,0 +1,69 @@ +name: CICD + +on: + workflow_dispatch: + pull_request: + push: + branches: + - master + - check/* + +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}' + cancel-in-progress: true + +jobs: + init: + name: Init + runs-on: ubuntu-latest + steps: + - name: Get PR head ref + if: ${{ github.event_name == 'pull_request' }} + id: ref + run: | + echo "ref=refs/pull/${{ github.event.pull_request.number }}/head" >> $GITHUB_OUTPUT + outputs: + ref: >- + ${{ + (github.event_name == 'pull_request' && startsWith(github.head_ref, 'release/v')) + && steps.ref.outputs.ref + || github.ref + }} + + fast_checks: + name: Fast checks + uses: ./.github/workflows/fast_checks.yml + + full_checks: + name: Full Rust checks + needs: fast_checks + uses: ./.github/workflows/full_rust_checks.yml + + min_version: + name: Minimum supported rust version + needs: fast_checks + uses: ./.github/workflows/msrv.yml + with: + package: tree-sitter-cli + + build: + name: Build & Test + needs: [init, fast_checks] + uses: ./.github/workflows/build.yml + with: + ref: ${{ needs.init.outputs.ref }} + + release: + name: Release + needs: [init, fast_checks, full_checks, min_version, build] + if: > + github.event.pull_request.head.repo.full_name == github.repository && + startsWith(github.head_ref, 'release/v') + uses: ./.github/workflows/release.yml + with: + ref: ${{ needs.init.outputs.ref }} + + publish: + name: Publish + needs: release + uses: ./.github/workflows/publish.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 00000000..27b31085 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,171 @@ +name: Build & Test + +env: + CARGO_TERM_COLOR: always + RUSTFLAGS: "-D warnings" + CROSS_DEBUG: 1 + +on: + workflow_call: + inputs: + ref: + default: ${{ github.ref }} + type: string + +jobs: + build: + name: ${{ matrix.job.name }} (${{ matrix.job.target }}) (${{ matrix.job.os }}) + runs-on: ${{ matrix.job.os }} + strategy: + fail-fast: false + matrix: + job: + - { name: linux-aarch64 , target: aarch64-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } + - { name: linux-arm , target: arm-unknown-linux-gnueabihf , os: ubuntu-latest , use-cross: true } + - { name: linux-x64 , target: x86_64-unknown-linux-gnu , os: ubuntu-latest } + - { name: linux-x86 , target: i686-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } + - { name: windows-x64 , target: x86_64-pc-windows-msvc , os: windows-latest } + - { name: windows-x86 , target: i686-pc-windows-msvc , os: windows-latest } + - { name: macos-x64 , target: x86_64-apple-darwin , os: macos-latest } + + env: + BUILD_CMD: cargo + + defaults: + run: + shell: bash + + steps: + - name: Checkout source code + uses: actions/checkout@v3 + with: + ref: ${{ inputs.ref }} + + - name: Read Emscripten version + run: | + echo "EMSCRIPTEN_VERSION=$(cat cli/emscripten-version)" >> $GITHUB_ENV + + - name: Install Emscripten + uses: mymindstorm/setup-emsdk@v12 + with: + version: ${{ env.EMSCRIPTEN_VERSION }} + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + with: + targets: ${{ matrix.job.target }} + + - name: Install cross + if: matrix.job.use-cross + uses: taiki-e/install-action@v2 + with: + tool: cross + + - name: Build custom cross image + if: ${{ matrix.job.use-cross && matrix.job.os == 'ubuntu-latest' }} + run: | + cd .. + + target="${{ matrix.job.target }}" + image=ghcr.io/cross-rs/$target:custom + echo "CROSS_IMAGE=$image" >> $GITHUB_ENV + + echo "[target.$target]" >> Cross.toml + echo "image = \"$image\"" >> Cross.toml + echo "CROSS_CONFIG=$PWD/Cross.toml" >> $GITHUB_ENV + + echo "FROM ghcr.io/cross-rs/$target:edge" >> Dockerfile + echo "ENV DEBIAN_FRONTEND=noninteractive" >> Dockerfile + echo "RUN apt-get update && apt-get install -y nodejs" >> Dockerfile + docker build -t $image . + docker images + docker run --rm $image env + + cd - + + - name: Setup extra env + run: | + PATH="$PWD/.github/scripts:$PATH" + echo "PATH=$PATH" >> $GITHUB_ENV + echo "ROOT=$PWD" >> $GITHUB_ENV + echo "TREE_SITTER=tree-sitter.sh" >> $GITHUB_ENV + + export TARGET=${{ matrix.job.target }} + echo "TARGET=$TARGET" >> $GITHUB_ENV + + USE_CROSS="${{ matrix.job.use-cross }}" + + if [ "$USE_CROSS" == "true" ]; then + echo "BUILD_CMD=cross" >> $GITHUB_ENV + + export CROSS=1; echo "CROSS=$CROSS" >> $GITHUB_ENV + + runner=$(cross.sh bash -c "env | sed -nr '/^CARGO_TARGET_.*_RUNNER=/s///p'") + [ -n "$runner" ] && echo "CROSS_RUNNER=$runner" >> $GITHUB_ENV + echo "runner: $runner" + + case "$TARGET" in + i686-unknown-linux-gnu) CC=i686-linux-gnu-gcc AR=i686-linux-gnu-ar ;; + aarch64-unknown-linux-gnu) CC=aarch64-linux-gnu-gcc AR=aarch64-linux-gnu-ar ;; + arm-unknown-linux-gnueabihf) CC=arm-unknown-linux-gnueabihf-gcc AR=arm-unknown-linux-gnueabihf-gcc-ar ;; + esac + + [ -n "$CC" ] && echo "CC=$CC" >> $GITHUB_ENV + [ -n "$AR" ] && echo "AR=$AR" >> $GITHUB_ENV + fi + + case "$TARGET" in + *-windows-*) + echo "RUST_TEST_THREADS=1" >> $GITHUB_ENV # See #2041 tree-sitter issue + ;; + esac + + - name: Build C library + if: "!contains(matrix.job.os, 'windows')" # Requires an additional adapted Makefile for `cl.exe` compiler + run: make.sh CFLAGS="-Werror" -j + + - name: Build wasm library + run: script/build-wasm + + - name: Build CLI + run: $BUILD_CMD build --release --target=${{ matrix.job.target }} + + - name: Fetch fixtures + run: script/fetch-fixtures + + - name: Generate fixtures + run: script/generate-fixtures + + - name: Generate WASM fixtures + if: "!matrix.job.use-cross" + run: script/generate-fixtures-wasm + + - name: Run main tests + run: $BUILD_CMD test --target=${{ matrix.job.target }} + + - name: Run wasm tests + if: "!matrix.job.use-cross" # TODO: Install Emscripten into custom cross images + run: script/test-wasm + + - name: Run benchmarks + if: "!matrix.job.use-cross" # It doesn't make sense to benchmark something in an emulator + run: $BUILD_CMD bench benchmark -p tree-sitter-cli --target=${{ matrix.job.target }} + + - name: Upload CLI artifact + uses: actions/upload-artifact@v3 + with: + name: tree-sitter.${{ matrix.job.name }} + path: target/${{ matrix.job.target }}/release/tree-sitter${{ contains(matrix.job.target, 'windows') && '.exe' || '' }} + if-no-files-found: error + retention-days: 7 + + - name: Upload WASM artifacts + if: ${{ matrix.job.name == 'linux-x64' }} + uses: actions/upload-artifact@v3 + with: + name: tree-sitter.wasm + path: | + lib/binding_web/tree-sitter.js + lib/binding_web/tree-sitter.wasm + if-no-files-found: error + retention-days: 7 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index e6ef591a..00000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,150 +0,0 @@ -name: CI - -on: - push: - branches: - - master - tags: - - v* - pull_request: - branches: - - "**" - -env: - CARGO_TERM_COLOR: always - CARGO_INCREMENTAL: 0 - -jobs: - unix-tests: - name: Unix tests - runs-on: ${{ matrix.os }} - strategy: - fail-fast: true - matrix: - os: - - macos-latest - - ubuntu-latest - steps: - - name: Checkout repo - uses: actions/checkout@v3 - - # Work around https://github.com/actions/cache/issues/403. - - name: Use GNU tar - if: matrix.os == 'macos-latest' - run: | - echo PATH="/usr/local/opt/gnu-tar/libexec/gnubin:$PATH" >> $GITHUB_ENV - - - name: Read Emscripten version - run: | - printf 'EMSCRIPTEN_VERSION=%s\n' "$(cat cli/emscripten-version)" >> $GITHUB_ENV - - - name: Cache artifacts - id: cache - uses: actions/cache@v3 - with: - path: | - ~/.cargo/registry - ~/.cargo/git - target - key: ${{ runner.os }}-cargo-${{ hashFiles('Cargo.lock') }}-emscripten-${{ env.EMSCRIPTEN_VERSION }} - - - name: Install rust - if: steps.cache.outputs.cache-hit != 'true' - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - profile: minimal - components: rustfmt, clippy - - - name: Check Rust code formatting - run: cargo fmt -- --check - - - name: Install emscripten - uses: mymindstorm/setup-emsdk@v12 - with: - version: ${{ env.EMSCRIPTEN_VERSION }} - - - name: Build C library - run: make - - - name: Build wasm library - run: script/build-wasm - - - name: Build CLI - run: | - RUSTFLAGS="-D warnings" - cargo build --release - - - name: Set up fixture parsers - run: | - script/fetch-fixtures - script/generate-fixtures - script/generate-fixtures-wasm - - - name: Run main tests - run: cargo test - - - name: Run wasm tests - run: script/test-wasm - - - name: Run benchmarks - run: script/benchmark - - - name: Compress CLI binary - if: startsWith(github.ref, 'refs/tags/v') - run: | - cp target/release/tree-sitter . - export platform=$(echo ${{ runner.os }} | awk '{print tolower($0)}') - gzip --suffix "-${platform}-x64.gz" tree-sitter - - - name: Release - uses: softprops/action-gh-release@v1 - if: startsWith(github.ref, 'refs/tags/v') - with: - draft: true - files: | - tree-sitter-*.gz - lib/binding_web/tree-sitter.js - lib/binding_web/tree-sitter.wasm - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - windows-tests: - name: Windows tests - runs-on: windows-latest - steps: - - name: Checkout repo - uses: actions/checkout@v3 - - - name: Cache artifacts - id: cache - uses: actions/cache@v3 - with: - path: | - ~/.cargo/registry - ~/.cargo/git - target - key: ${{ runner.os }}-cargo-${{ hashFiles('Cargo.lock') }} - - - name: Install rust - if: steps.cache.outputs.cache-hit != 'true' - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - profile: minimal - - - name: Check Rust code formatting - run: cargo fmt -- --check - - - name: Build CLI - run: | - $env:RUSTFLAGS="-D warnings" - cargo build --release - - - name: Set up fixture parsers - run: | - script/fetch-fixtures.cmd - script/generate-fixtures.cmd - - - name: Run main tests - run: script/test diff --git a/.github/workflows/fast_checks.yml b/.github/workflows/fast_checks.yml new file mode 100644 index 00000000..ea474799 --- /dev/null +++ b/.github/workflows/fast_checks.yml @@ -0,0 +1,31 @@ +name: Fast checks to fail fast on any simple code issues + +env: + CARGO_TERM_COLOR: always + RUSTFLAGS: "-D warnings" + +on: + workflow_call: + +jobs: + check_rust_formatting: + name: Check Rust formating + runs-on: ubuntu-latest + steps: + + - name: Checkout source code + uses: actions/checkout@v3 + + - name: Run cargo fmt + run: cargo fmt -- --check + + check_c_warnings: + name: Check C warnings + runs-on: ubuntu-latest + steps: + + - name: Checkout source code + uses: actions/checkout@v3 + + - name: Make C library to check that it's able to compile without warnings + run: make -j CFLAGS="-Werror" diff --git a/.github/workflows/full_rust_checks.yml b/.github/workflows/full_rust_checks.yml new file mode 100644 index 00000000..2cc5f77d --- /dev/null +++ b/.github/workflows/full_rust_checks.yml @@ -0,0 +1,32 @@ +name: Full Rust codebase checks + +env: + CARGO_TERM_COLOR: always + RUSTFLAGS: "-D warnings" + +on: + workflow_call: + +jobs: + run: + name: Run checks + runs-on: ubuntu-latest + steps: + + - name: Checkout source code + uses: actions/checkout@v3 + + - name: Install rust toolchain + uses: dtolnay/rust-toolchain@master + with: + toolchain: stable + components: clippy, rustfmt + + - name: Run cargo fmt + run: cargo fmt -- --check + + # - name: Run clippy + # run: cargo clippy --all-targets + + - name: Run cargo check + run: cargo check --workspace --examples --tests --benches --bins diff --git a/.github/workflows/msrv.yml b/.github/workflows/msrv.yml new file mode 100644 index 00000000..3697930e --- /dev/null +++ b/.github/workflows/msrv.yml @@ -0,0 +1,42 @@ +name: Minimum supported rust version + +env: + CARGO_TERM_COLOR: always + RUSTFLAGS: "-D warnings" + +on: + workflow_call: + inputs: + package: + description: Target cargo package name + required: true + type: string + + +jobs: + run: + name: Run checks + runs-on: ubuntu-latest + steps: + + - name: Checkout source code + uses: actions/checkout@v3 + + - name: Get the MSRV from the package metadata + id: msrv + run: cargo metadata --no-deps --format-version 1 | jq -r '"version=" + (.packages[] | select(.name == "${{ inputs.package }}").rust_version)' >> $GITHUB_OUTPUT + + - name: Install rust toolchain (v${{ steps.msrv.outputs.version }}) + uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ steps.msrv.outputs.version }} + components: clippy, rustfmt + + - name: Run cargo fmt + run: cargo fmt -- --check + + # - name: Run clippy (on minimum supported rust version to prevent warnings we can't fix) + # run: cargo clippy --all-targets + + # - name: Run main tests + # run: cargo test diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 00000000..e1ad3e05 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,21 @@ +name: Publish to registries + +on: + workflow_call: + +jobs: + crates_io: + name: Publish to Crates.io + runs-on: ubuntu-latest + steps: + - name: Publish packages + run: | + echo "::warning::TODO: add a Crates.io publish logic" + + npm: + name: Publish to npmjs.com + runs-on: ubuntu-latest + steps: + - name: Publish packages + run: | + echo "::warning::TODO: add a npmjs.com publish logic" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 00000000..27e969e7 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,101 @@ +name: Release + +on: + workflow_call: + inputs: + ref: + default: ${{ github.ref }} + type: string + +jobs: + permissions: + name: Check permissions + runs-on: ubuntu-latest + outputs: + release_allowed: ${{ steps.maintainer.outputs.is_maintainer == 'true' }} + steps: + + - name: Is maintainer + id: maintainer + env: + GH_TOKEN: ${{ github.token }} + repo: ${{ github.repository }} + actor: ${{ github.actor }} + run: | + maintainer=$( + gh api "/repos/${repo}/collaborators" | + jq ".[] | {login, maintainer: .permissions | .maintain} | select(.login == \"${actor}\") | .maintainer" + ); + if [ "$maintainer" == "true" ]; then + echo "@${actor} has maintainer level permissions :rocket:" >> $GITHUB_STEP_SUMMARY; + echo "is_maintainer=true" >> $GITHUB_OUTPUT + fi + + release: + name: Release + needs: permissions + if: needs.permissions.outputs.release_allowed + runs-on: ubuntu-latest + permissions: + contents: write + steps: + + - name: Checkout source code + uses: actions/checkout@v3 + with: + ref: ${{ inputs.ref }} + + - name: Download build artifacts + uses: actions/download-artifact@v3 + with: + path: artifacts + + - name: Display structure of downloaded files + run: ls -lR + working-directory: artifacts + + - name: Prepare release artifacts + run: | + mkdir -p target + mv artifacts/tree-sitter.wasm/* target/ + rm -r artifacts/tree-sitter.wasm + for platform in $(cd artifacts; ls); do + exe=$(ls artifacts/$platform/tree-sitter*) + gzip --stdout --name $exe > target/tree-sitter-$platform.gz + done + rm -rf artifacts + ls -l target/ + + - name: Get tag name from a release/v* branch name + id: tag_name + env: + tag: ${{ github.head_ref }} + run: echo "tag=${tag#release/}" >> $GITHUB_OUTPUT + + - name: Add a release tag + env: + ref: ${{ inputs.ref }} + tag: ${{ steps.tag_name.outputs.tag }} + message: "Release ${{ steps.tag_name.outputs.tag }}" + run: | + git config user.name "${GITHUB_ACTOR}" + git config user.email "${GITHUB_ACTOR}@users.noreply.github.com" + git tag -a "$tag" HEAD -m "$message" + git push origin "$tag" + + - name: Create release + uses: softprops/action-gh-release@v1 + with: + name: ${{ steps.tag_name.outputs.tag }} + tag_name: ${{ steps.tag_name.outputs.tag }} + fail_on_unmatched_files: true + files: | + tree-sitter-*.gz + tree-sitter.wasm + tree-sitter.js + + - name: Merge release PR + env: + GH_TOKEN: ${{ github.token }} + run: | + gh pr merge ${{ github.event.pull_request.html_url }} --match-head-commit $(git rev-parse HEAD) --merge --delete-branch diff --git a/Cargo.lock b/Cargo.lock index 404c269f..4b4437e6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -22,9 +22,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.69" +version = "1.0.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "224afbd727c3d6e4b90103ece64b8d1b67fbb1973b1046c2281eed3f3803f800" +checksum = "7de8ce5e0f9f8d88245311066a578d72b7af3e7088f32783804676302df237e4" [[package]] name = "ascii" @@ -38,7 +38,7 @@ version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ - "hermit-abi", + "hermit-abi 0.1.19", "libc", "winapi", ] @@ -128,9 +128,9 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" +checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" [[package]] name = "ctor" @@ -139,7 +139,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d2301688392eb071b0bf1a37be05c469d3cc4dbbd95df672fe28ab021e6a096" dependencies = [ "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -191,13 +191,13 @@ checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" [[package]] name = "errno" -version = "0.2.8" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" +checksum = "50d6a0976c999d473fe89ad888d5a284e55366d9dc9038b1ba2aa15128c4afa0" dependencies = [ "errno-dragonfly", "libc", - "winapi", + "windows-sys", ] [[package]] @@ -260,6 +260,12 @@ dependencies = [ "libc", ] +[[package]] +name = "hermit-abi" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" + [[package]] name = "html-escape" version = "0.2.13" @@ -287,9 +293,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "1.9.2" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", "hashbrown", @@ -306,12 +312,13 @@ dependencies = [ [[package]] name = "io-lifetimes" -version = "1.0.6" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfa919a82ea574332e2de6e74b4c36e74d41982b335080fa59d4ef31be20fdf3" +checksum = "09270fd4fa1111bc614ed2246c7ef56239a3063d5be0d1ec3b589c505d400aeb" dependencies = [ + "hermit-abi 0.3.1", "libc", - "windows-sys 0.45.0", + "windows-sys", ] [[package]] @@ -322,16 +329,18 @@ checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" [[package]] name = "jni" -version = "0.20.0" +version = "0.21.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "039022cdf4d7b1cf548d31f60ae783138e5fd42013f6271049d7df7afadef96c" +checksum = "1a87aa2bb7d2af34197c04845522473242e1aa17c12f4935d5856491a7fb8c97" dependencies = [ "cesu8", + "cfg-if", "combine", "jni-sys", "log", "thiserror", "walkdir", + "windows-sys", ] [[package]] @@ -373,9 +382,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.1.4" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" +checksum = "d59d8c75012853d2e872fb56bc8a2e53718e2cafe1a4c823143141c6d90c322f" [[package]] name = "log" @@ -457,9 +466,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.52" +version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d0e1ae9e836cc3beddd63db0df682593d7e2d3d891ae8c9083d2113e1744224" +checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" dependencies = [ "unicode-ident", ] @@ -471,7 +480,7 @@ dependencies = [ "proc-macro2", "quote", "rand", - "syn", + "syn 1.0.109", ] [[package]] @@ -515,9 +524,9 @@ dependencies = [ [[package]] name = "raw-window-handle" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f851a03551ceefd30132e447f07f96cb7011d6b658374f3aed847333adb5559" +checksum = "f2ff9a1f06a88b01621b7ae906ef0211290d1c8a168a15542486a8f61c0833b9" [[package]] name = "redox_syscall" @@ -528,6 +537,15 @@ dependencies = [ "bitflags", ] +[[package]] +name = "redox_syscall" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +dependencies = [ + "bitflags", +] + [[package]] name = "redox_users" version = "0.4.3" @@ -535,15 +553,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" dependencies = [ "getrandom", - "redox_syscall", + "redox_syscall 0.2.16", "thiserror", ] [[package]] name = "regex" -version = "1.7.1" +version = "1.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733" +checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d" dependencies = [ "aho-corasick", "memchr", @@ -552,9 +570,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.6.28" +version = "0.6.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "rustc-hash" @@ -564,16 +582,16 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustix" -version = "0.36.9" +version = "0.37.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd5c6ff11fecd55b40746d1995a02f2eb375bf8c00d192d521ee09f42bef37bc" +checksum = "2aae838e49b3d63e9274e1c01833cc8139d3fec468c3b84688c628f44b1ae11d" dependencies = [ "bitflags", "errno", "io-lifetimes", "libc", "linux-raw-sys", - "windows-sys 0.45.0", + "windows-sys", ] [[package]] @@ -599,29 +617,29 @@ checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed" [[package]] name = "serde" -version = "1.0.156" +version = "1.0.159" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "314b5b092c0ade17c00142951e50ced110ec27cea304b1037c6969246c2469a4" +checksum = "3c04e8343c3daeec41f58990b9d77068df31209f2af111e059e9fe9646693065" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.156" +version = "1.0.159" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7e29c4601e36bcec74a223228dce795f4cd3616341a4af93520ca1a837c087d" +checksum = "4c614d17805b093df4b147b51339e7e44bf05ef59fba1e45d83500bcfb4d8585" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.13", ] [[package]] name = "serde_json" -version = "1.0.94" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c533a59c9d8a93a09c6ab31f0fd5e5f4dd1b8fc9434804029839884765d04ea" +checksum = "d721eca97ac802aa7777b701877c8004d950fc142651367300d21c1cc0194744" dependencies = [ "indexmap", "itoa", @@ -653,16 +671,27 @@ dependencies = [ ] [[package]] -name = "tempfile" -version = "3.4.0" +name = "syn" +version = "2.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af18f7ae1acd354b992402e9ec5864359d693cd8a79dcbef59f76891701c1e95" +checksum = "4c9da457c5285ac1f936ebd076af6dac17a61cfe7826f2076b4d015cf47bc8ec" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tempfile" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9fbec84f381d5795b08656e4912bec604d162bff9291d6189a78f4c8ab87998" dependencies = [ "cfg-if", "fastrand", - "redox_syscall", + "redox_syscall 0.3.5", "rustix", - "windows-sys 0.42.0", + "windows-sys", ] [[package]] @@ -676,22 +705,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.39" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5ab016db510546d856297882807df8da66a16fb8c4101cb8b30054b0d5b2d9c" +checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.39" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5420d42e90af0c38c3290abcca25b9b3bdf379fc9f55c528f53a269d9c9a267e" +checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.13", ] [[package]] @@ -827,9 +856,9 @@ dependencies = [ [[package]] name = "unicode-bidi" -version = "0.3.11" +version = "0.3.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524b68aca1d05e03fdf03fcdce2c6c94b6daf6d16861ddaa7e4f2b6638a9052c" +checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" [[package]] name = "unicode-ident" @@ -883,12 +912,11 @@ checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" [[package]] name = "walkdir" -version = "2.3.2" +version = "2.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698" dependencies = [ "same-file", - "winapi", "winapi-util", ] @@ -919,7 +947,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn", + "syn 1.0.109", "wasm-bindgen-shared", ] @@ -941,7 +969,7 @@ checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -964,9 +992,9 @@ dependencies = [ [[package]] name = "webbrowser" -version = "0.8.7" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97d1fa1e5c829b2bf9eb1e28fb950248b797cd6a04866fbdfa8bc31e5eef4c78" +checksum = "579cc485bd5ce5bfa0d738e4921dd0b956eca9800be1fd2e5257ebe95bc4617e" dependencies = [ "core-foundation", "dirs 4.0.0", @@ -1021,21 +1049,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" -[[package]] -name = "windows-sys" -version = "0.42.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" -dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", -] - [[package]] name = "windows-sys" version = "0.45.0" diff --git a/script/generate-fixtures b/script/generate-fixtures index 85298c46..2c3b178a 100755 --- a/script/generate-fixtures +++ b/script/generate-fixtures @@ -2,12 +2,18 @@ set -e -cargo build --release +root_dir=$PWD + +if [ "$CI" == true ]; then + set -x + tree_sitter="$TREE_SITTER" +else + cargo build --release + tree_sitter=${root_dir}/target/release/tree-sitter +fi filter_grammar_name=$1 -root_dir=$PWD -tree_sitter=${root_dir}/target/release/tree-sitter grammars_dir=${root_dir}/test/fixtures/grammars grammar_files=$(find $grammars_dir -name grammar.js | grep -v node_modules) diff --git a/script/generate-fixtures-wasm b/script/generate-fixtures-wasm index 9d44b58c..4bba56ae 100755 --- a/script/generate-fixtures-wasm +++ b/script/generate-fixtures-wasm @@ -2,7 +2,15 @@ set -e -cargo build --release +root_dir=$PWD + +if [ "$CI" == true ]; then + set -x + tree_sitter="$TREE_SITTER" +else + cargo build --release + tree_sitter=${root_dir}/target/release/tree-sitter +fi build_wasm_args= if [[ $1 == "--docker" ]]; then @@ -12,8 +20,6 @@ fi filter_grammar_name=$1 -root_dir=$PWD -tree_sitter=${root_dir}/target/release/tree-sitter grammars_dir=${root_dir}/test/fixtures/grammars grammar_files=$(find $grammars_dir -name grammar.js | grep -v node_modules) From da894afef59e1aefa23470c7db7445096f8f0e65 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Tue, 4 Apr 2023 02:41:42 +0300 Subject: [PATCH 061/347] cargo: specify minimum supported Rust version --- Cargo.toml | 5 ++++- cli/Cargo.toml | 1 + cli/config/Cargo.toml | 1 + cli/loader/Cargo.toml | 1 + cli/src/tests/proc_macro/Cargo.toml | 1 + highlight/Cargo.toml | 1 + lib/Cargo.toml | 1 + tags/Cargo.toml | 1 + 8 files changed, 11 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index d1e57f89..f69dbc4f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,10 @@ [workspace] default-members = ["cli"] - members = ["cli", "lib"] +resolver = "2" + +[workspace.package] +rust-version = "1.65" [profile.release] strip = true diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 47e03284..f9f8ca4b 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -9,6 +9,7 @@ readme = "README.md" keywords = ["incremental", "parsing"] categories = ["command-line-utilities", "parsing"] repository = "https://github.com/tree-sitter/tree-sitter" +rust-version.workspace = true [[bin]] name = "tree-sitter" diff --git a/cli/config/Cargo.toml b/cli/config/Cargo.toml index ab3808ad..114d6ce8 100644 --- a/cli/config/Cargo.toml +++ b/cli/config/Cargo.toml @@ -9,6 +9,7 @@ readme = "README.md" keywords = ["incremental", "parsing"] categories = ["command-line-utilities", "parsing"] repository = "https://github.com/tree-sitter/tree-sitter" +rust-version.workspace = true [dependencies] anyhow = "1.0" diff --git a/cli/loader/Cargo.toml b/cli/loader/Cargo.toml index 3a665ea9..6af28f30 100644 --- a/cli/loader/Cargo.toml +++ b/cli/loader/Cargo.toml @@ -9,6 +9,7 @@ readme = "README.md" keywords = ["incremental", "parsing"] categories = ["command-line-utilities", "parsing"] repository = "https://github.com/tree-sitter/tree-sitter" +rust-version.workspace = true [dependencies] anyhow = "1.0" diff --git a/cli/src/tests/proc_macro/Cargo.toml b/cli/src/tests/proc_macro/Cargo.toml index a9a2b146..e6900d10 100644 --- a/cli/src/tests/proc_macro/Cargo.toml +++ b/cli/src/tests/proc_macro/Cargo.toml @@ -3,6 +3,7 @@ name = "proc_macro" version = "0.1.0" edition = "2021" publish = false +rust-version.workspace = true [lib] proc-macro = true diff --git a/highlight/Cargo.toml b/highlight/Cargo.toml index 0b0de18e..e85ced8e 100644 --- a/highlight/Cargo.toml +++ b/highlight/Cargo.toml @@ -12,6 +12,7 @@ edition = "2018" keywords = ["incremental", "parsing", "syntax", "highlighting"] categories = ["parsing", "text-editors"] repository = "https://github.com/tree-sitter/tree-sitter" +rust-version.workspace = true [lib] crate-type = ["lib", "staticlib"] diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 16fd0254..39e07916 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -9,6 +9,7 @@ readme = "binding_rust/README.md" keywords = ["incremental", "parsing"] categories = ["api-bindings", "parsing", "text-editors"] repository = "https://github.com/tree-sitter/tree-sitter" +rust-version.workspace = true build = "binding_rust/build.rs" diff --git a/tags/Cargo.toml b/tags/Cargo.toml index d954a573..99d053e8 100644 --- a/tags/Cargo.toml +++ b/tags/Cargo.toml @@ -12,6 +12,7 @@ edition = "2018" keywords = ["incremental", "parsing", "syntax", "tagging"] categories = ["parsing", "text-editors"] repository = "https://github.com/tree-sitter/tree-sitter" +rust-version.workspace = true [lib] crate-type = ["lib", "staticlib"] From 34a83dfe695ff80a9d610df2fcbc1925b037d092 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Tue, 4 Apr 2023 03:08:08 +0300 Subject: [PATCH 062/347] cicd: update status badge --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f3c4abc2..34390187 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # tree-sitter -[![Build Status](https://github.com/tree-sitter/tree-sitter/workflows/CI/badge.svg)](https://github.com/tree-sitter/tree-sitter/actions) +[![CICD](https://github.com/tree-sitter/tree-sitter/actions/workflows/CICD.yml/badge.svg)](https://github.com/tree-sitter/tree-sitter/actions/workflows/CICD.yml) [![DOI](https://zenodo.org/badge/14164618.svg)](https://zenodo.org/badge/latestdoi/14164618) Tree-sitter is a parser generator tool and an incremental parsing library. It can build a concrete syntax tree for a source file and efficiently update the syntax tree as the source file is edited. Tree-sitter aims to be: From 404ae5f49adb52fc1d3f2369e0b744c2e4ade2a7 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Tue, 4 Apr 2023 05:03:52 +0300 Subject: [PATCH 063/347] cicd: fix bug in release workflow --- .github/workflows/release.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 27e969e7..87a06761 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -59,8 +59,8 @@ jobs: mkdir -p target mv artifacts/tree-sitter.wasm/* target/ rm -r artifacts/tree-sitter.wasm - for platform in $(cd artifacts; ls); do - exe=$(ls artifacts/$platform/tree-sitter*) + for platform in $(cd artifacts; ls | sed 's/^tree-sitter\.//'); do + exe=$(ls artifacts/tree-sitter.$platform/tree-sitter*) gzip --stdout --name $exe > target/tree-sitter-$platform.gz done rm -rf artifacts @@ -90,9 +90,9 @@ jobs: tag_name: ${{ steps.tag_name.outputs.tag }} fail_on_unmatched_files: true files: | - tree-sitter-*.gz - tree-sitter.wasm - tree-sitter.js + target/tree-sitter-*.gz + target/tree-sitter.wasm + target/tree-sitter.js - name: Merge release PR env: From 0c49d6745b3fc4822ab02e0018770cd6383a779c Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Tue, 4 Apr 2023 12:10:31 +0300 Subject: [PATCH 064/347] 0.20.8 - recovered --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4b4437e6..956a3f41 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -366,9 +366,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.140" +version = "0.2.141" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c" +checksum = "3304a64d199bb964be99741b7a14d26972741915b3649639149b2479bb46f4b5" [[package]] name = "libloading" From c1f784498f35f2d69e8b53824e1fd5a5d97eed65 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Tue, 4 Apr 2023 03:34:03 +0300 Subject: [PATCH 065/347] chore: nit picking in internal proc_macro crate --- Cargo.lock | 22 +++++++++++----------- cli/Cargo.toml | 2 +- cli/src/tests/proc_macro/Cargo.toml | 4 ++-- cli/src/tests/proc_macro/src/lib.rs | 4 ++-- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 956a3f41..52013e49 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -473,16 +473,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "proc_macro" -version = "0.1.0" -dependencies = [ - "proc-macro2", - "quote", - "rand", - "syn 1.0.109", -] - [[package]] name = "quote" version = "1.0.26" @@ -785,7 +775,6 @@ dependencies = [ "lazy_static", "log", "pretty_assertions", - "proc_macro", "rand", "regex", "regex-syntax", @@ -802,6 +791,7 @@ dependencies = [ "tree-sitter-highlight", "tree-sitter-loader", "tree-sitter-tags", + "tree-sitter-tests-proc-macro", "unindent", "walkdir", "webbrowser", @@ -854,6 +844,16 @@ dependencies = [ "tree-sitter", ] +[[package]] +name = "tree-sitter-tests-proc-macro" +version = "0.0.0" +dependencies = [ + "proc-macro2", + "quote", + "rand", + "syn 1.0.109", +] + [[package]] name = "unicode-bidi" version = "0.3.13" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index f9f8ca4b..48473095 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -70,7 +70,7 @@ version = "0.4.6" features = ["std"] [dev-dependencies] -proc_macro = { path = "src/tests/proc_macro" } +proc_macro = { path = "src/tests/proc_macro", package = "tree-sitter-tests-proc-macro" } rand = "0.8" tempfile = "3" diff --git a/cli/src/tests/proc_macro/Cargo.toml b/cli/src/tests/proc_macro/Cargo.toml index e6900d10..9db4025b 100644 --- a/cli/src/tests/proc_macro/Cargo.toml +++ b/cli/src/tests/proc_macro/Cargo.toml @@ -1,6 +1,6 @@ [package] -name = "proc_macro" -version = "0.1.0" +name = "tree-sitter-tests-proc-macro" +version = "0.0.0" edition = "2021" publish = false rust-version.workspace = true diff --git a/cli/src/tests/proc_macro/src/lib.rs b/cli/src/tests/proc_macro/src/lib.rs index d831a75b..4d92479f 100644 --- a/cli/src/tests/proc_macro/src/lib.rs +++ b/cli/src/tests/proc_macro/src/lib.rs @@ -82,8 +82,8 @@ pub fn test_with_seed(args: TokenStream, input: TokenStream) -> TokenStream { } Ok(Args { - retry: retry.expect("`retry` parameter is requred"), - seed: seed.expect("`initial_seed` parameter is required"), + retry: retry.expect("`retry` parameter is required"), + seed: seed.expect("`seed` parameter is required"), seed_fn, }) } From 3d396e120b6cd574cdfbaf72a6cd0d14ae79bf38 Mon Sep 17 00:00:00 2001 From: Vhyrro Date: Mon, 13 Mar 2023 18:52:39 +0100 Subject: [PATCH 066/347] feat(rust_bindings): provide `into_raw()` functions for treesitter structs --- lib/binding_rust/lib.rs | 42 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 579bf8e2..568bad25 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -1,4 +1,4 @@ -mod ffi; +pub mod ffi; mod util; #[cfg(unix)] @@ -9,7 +9,7 @@ use std::{ ffi::CStr, fmt, hash, iter, marker::PhantomData, - mem::MaybeUninit, + mem::{ManuallyDrop, MaybeUninit}, ops, os::raw::{c_char, c_void}, ptr::{self, NonNull}, @@ -334,6 +334,11 @@ impl Language { Some(id) } } + + /// Consumes the [Language], returning a raw pointer to the underlying C structure. + pub fn into_raw(self) -> *const ffi::TSLanguage { + self.0 + } } impl Parser { @@ -691,6 +696,14 @@ impl Parser { ffi::ts_parser_set_cancellation_flag(self.0.as_ptr(), ptr::null()); } } + + /// Consumes the [Parser], returning a raw pointer to the underlying C structure. + pub fn into_raw(mut self) -> *mut ffi::TSParser { + self.stop_printing_dot_graphs(); + self.set_logger(None); + + ManuallyDrop::new(self).0.as_ptr() + } } impl Drop for Parser { @@ -785,6 +798,11 @@ impl Tree { let fd = file.as_raw_fd(); unsafe { ffi::ts_tree_print_dot_graph(self.0.as_ptr(), fd) } } + + /// Consumes the [Tree], returning a raw pointer to the underlying C structure. + pub fn into_raw(self) -> *mut ffi::TSTree { + ManuallyDrop::new(self).0.as_ptr() + } } impl fmt::Debug for Tree { @@ -1190,6 +1208,11 @@ impl<'tree> Node<'tree> { let edit = edit.into(); unsafe { ffi::ts_node_edit(&mut self.0 as *mut ffi::TSNode, &edit) } } + + /// Consumes the [Node], returning a raw pointer to the underlying C structure. + pub fn into_raw(self) -> *mut ffi::TSNode { + &mut ManuallyDrop::new(self).0 + } } impl<'a> PartialEq for Node<'a> { @@ -1324,6 +1347,11 @@ impl<'a> TreeCursor<'a> { pub fn reset(&mut self, node: Node<'a>) { unsafe { ffi::ts_tree_cursor_reset(&mut self.0, node.0) }; } + + /// Consumes the [TreeCursor], returning a raw pointer to the underlying C structure. + pub fn into_raw(self) -> *mut ffi::TSTreeCursor { + &mut ManuallyDrop::new(self).0 + } } impl<'a> Clone for TreeCursor<'a> { @@ -1819,6 +1847,11 @@ impl Query { )); } } + + /// Consumes the [Query], returning a raw pointer to the underlying C structure. + pub fn into_raw(self) -> *mut ffi::TSQuery { + ManuallyDrop::new(self).ptr.as_ptr() + } } impl QueryCursor { @@ -1926,6 +1959,11 @@ impl QueryCursor { } self } + + /// Consumes the [QueryCursor], returning a raw pointer to the underlying C structure. + pub fn into_raw(self) -> *mut ffi::TSQueryCursor { + ManuallyDrop::new(self).ptr.as_ptr() + } } impl<'a, 'tree> QueryMatch<'a, 'tree> { From 576e4c7d0682103870acee87075716f424f71d90 Mon Sep 17 00:00:00 2001 From: Vhyrro Date: Mon, 13 Mar 2023 19:06:12 +0100 Subject: [PATCH 067/347] fix(rust_bindings): use-after-free for `Language::into_raw()` --- lib/binding_rust/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 568bad25..16e3cd4b 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -337,7 +337,7 @@ impl Language { /// Consumes the [Language], returning a raw pointer to the underlying C structure. pub fn into_raw(self) -> *const ffi::TSLanguage { - self.0 + ManuallyDrop::new(self).0 } } From 6c2957c8d3c5311f30b3468f55fbb2573ce1c9ff Mon Sep 17 00:00:00 2001 From: Vhyrro Date: Tue, 14 Mar 2023 19:34:18 +0100 Subject: [PATCH 068/347] ref(rust_bindings): move `into_raw()` functions into the `ffi` module --- lib/binding_rust/ffi.rs | 55 +++++++++++++++++++++++++++++++++++++++++ lib/binding_rust/lib.rs | 40 +----------------------------- 2 files changed, 56 insertions(+), 39 deletions(-) diff --git a/lib/binding_rust/ffi.rs b/lib/binding_rust/ffi.rs index 685ed765..9f900c35 100644 --- a/lib/binding_rust/ffi.rs +++ b/lib/binding_rust/ffi.rs @@ -7,3 +7,58 @@ include!("./bindings.rs"); extern "C" { pub(crate) fn dup(fd: std::os::raw::c_int) -> std::os::raw::c_int; } + +use crate::{Language, Node, Parser, Query, QueryCursor, Tree, TreeCursor}; +use std::mem::ManuallyDrop; + +impl Language { + /// Consumes the [Language], returning a raw pointer to the underlying C structure. + pub fn into_raw(self) -> *const TSLanguage { + ManuallyDrop::new(self).0 + } +} + +impl Parser { + /// Consumes the [Parser], returning a raw pointer to the underlying C structure. + pub fn into_raw(mut self) -> *mut TSParser { + self.stop_printing_dot_graphs(); + self.set_logger(None); + + ManuallyDrop::new(self).0.as_ptr() + } +} + +impl Tree { + /// Consumes the [Tree], returning a raw pointer to the underlying C structure. + pub fn into_raw(self) -> *mut TSTree { + ManuallyDrop::new(self).0.as_ptr() + } +} + +impl<'tree> Node<'tree> { + /// Consumes the [Node], returning a raw pointer to the underlying C structure. + pub fn into_raw(self) -> *mut TSNode { + &mut ManuallyDrop::new(self).0 + } +} + +impl<'a> TreeCursor<'a> { + /// Consumes the [TreeCursor], returning a raw pointer to the underlying C structure. + pub fn into_raw(self) -> *mut TSTreeCursor { + &mut ManuallyDrop::new(self).0 + } +} + +impl Query { + /// Consumes the [Query], returning a raw pointer to the underlying C structure. + pub fn into_raw(self) -> *mut TSQuery { + ManuallyDrop::new(self).ptr.as_ptr() + } +} + +impl QueryCursor { + /// Consumes the [QueryCursor], returning a raw pointer to the underlying C structure. + pub fn into_raw(self) -> *mut TSQueryCursor { + ManuallyDrop::new(self).ptr.as_ptr() + } +} diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 16e3cd4b..3746bcdd 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -9,7 +9,7 @@ use std::{ ffi::CStr, fmt, hash, iter, marker::PhantomData, - mem::{ManuallyDrop, MaybeUninit}, + mem::MaybeUninit, ops, os::raw::{c_char, c_void}, ptr::{self, NonNull}, @@ -334,11 +334,6 @@ impl Language { Some(id) } } - - /// Consumes the [Language], returning a raw pointer to the underlying C structure. - pub fn into_raw(self) -> *const ffi::TSLanguage { - ManuallyDrop::new(self).0 - } } impl Parser { @@ -696,14 +691,6 @@ impl Parser { ffi::ts_parser_set_cancellation_flag(self.0.as_ptr(), ptr::null()); } } - - /// Consumes the [Parser], returning a raw pointer to the underlying C structure. - pub fn into_raw(mut self) -> *mut ffi::TSParser { - self.stop_printing_dot_graphs(); - self.set_logger(None); - - ManuallyDrop::new(self).0.as_ptr() - } } impl Drop for Parser { @@ -798,11 +785,6 @@ impl Tree { let fd = file.as_raw_fd(); unsafe { ffi::ts_tree_print_dot_graph(self.0.as_ptr(), fd) } } - - /// Consumes the [Tree], returning a raw pointer to the underlying C structure. - pub fn into_raw(self) -> *mut ffi::TSTree { - ManuallyDrop::new(self).0.as_ptr() - } } impl fmt::Debug for Tree { @@ -1208,11 +1190,6 @@ impl<'tree> Node<'tree> { let edit = edit.into(); unsafe { ffi::ts_node_edit(&mut self.0 as *mut ffi::TSNode, &edit) } } - - /// Consumes the [Node], returning a raw pointer to the underlying C structure. - pub fn into_raw(self) -> *mut ffi::TSNode { - &mut ManuallyDrop::new(self).0 - } } impl<'a> PartialEq for Node<'a> { @@ -1347,11 +1324,6 @@ impl<'a> TreeCursor<'a> { pub fn reset(&mut self, node: Node<'a>) { unsafe { ffi::ts_tree_cursor_reset(&mut self.0, node.0) }; } - - /// Consumes the [TreeCursor], returning a raw pointer to the underlying C structure. - pub fn into_raw(self) -> *mut ffi::TSTreeCursor { - &mut ManuallyDrop::new(self).0 - } } impl<'a> Clone for TreeCursor<'a> { @@ -1847,11 +1819,6 @@ impl Query { )); } } - - /// Consumes the [Query], returning a raw pointer to the underlying C structure. - pub fn into_raw(self) -> *mut ffi::TSQuery { - ManuallyDrop::new(self).ptr.as_ptr() - } } impl QueryCursor { @@ -1959,11 +1926,6 @@ impl QueryCursor { } self } - - /// Consumes the [QueryCursor], returning a raw pointer to the underlying C structure. - pub fn into_raw(self) -> *mut ffi::TSQueryCursor { - ManuallyDrop::new(self).ptr.as_ptr() - } } impl<'a, 'tree> QueryMatch<'a, 'tree> { From e5357dc5adfecb94923e6a2a0a2d7170eb647cc2 Mon Sep 17 00:00:00 2001 From: Vhyrro Date: Tue, 14 Mar 2023 21:32:48 +0100 Subject: [PATCH 069/347] ref(rust_bindings): add `from_raw()` functions --- lib/binding_rust/ffi.rs | 282 +++++++++++++++++++++++++++++++++++++++- lib/binding_rust/lib.rs | 214 +----------------------------- 2 files changed, 282 insertions(+), 214 deletions(-) diff --git a/lib/binding_rust/ffi.rs b/lib/binding_rust/ffi.rs index 9f900c35..804c1cfc 100644 --- a/lib/binding_rust/ffi.rs +++ b/lib/binding_rust/ffi.rs @@ -8,10 +8,22 @@ extern "C" { pub(crate) fn dup(fd: std::os::raw::c_int) -> std::os::raw::c_int; } -use crate::{Language, Node, Parser, Query, QueryCursor, Tree, TreeCursor}; -use std::mem::ManuallyDrop; +use crate::{ + predicate_error, Language, Node, Parser, Query, QueryCursor, QueryError, QueryPredicate, + QueryPredicateArg, TextPredicate, Tree, TreeCursor, +}; +use std::{marker::PhantomData, mem::ManuallyDrop, ptr::NonNull, slice, str}; impl Language { + /// Reconstructs a [Language] from a raw pointer. + /// + /// # Safety + /// + /// `ptr` must be non-null. + pub unsafe fn from_raw(ptr: *mut TSLanguage) -> Language { + Language(ptr) + } + /// Consumes the [Language], returning a raw pointer to the underlying C structure. pub fn into_raw(self) -> *const TSLanguage { ManuallyDrop::new(self).0 @@ -19,6 +31,15 @@ impl Language { } impl Parser { + /// Reconstructs a [Parser] from a raw pointer. + /// + /// # Safety + /// + /// `ptr` must be non-null. + pub unsafe fn from_raw(ptr: *mut TSParser) -> Parser { + Parser(NonNull::new_unchecked(ptr)) + } + /// Consumes the [Parser], returning a raw pointer to the underlying C structure. pub fn into_raw(mut self) -> *mut TSParser { self.stop_printing_dot_graphs(); @@ -29,6 +50,15 @@ impl Parser { } impl Tree { + /// Reconstructs a [Tree] from a raw pointer. + /// + /// # Safety + /// + /// `ptr` must be non-null. + pub unsafe fn from_raw(ptr: *mut TSTree) -> Tree { + Tree(NonNull::new_unchecked(ptr)) + } + /// Consumes the [Tree], returning a raw pointer to the underlying C structure. pub fn into_raw(self) -> *mut TSTree { ManuallyDrop::new(self).0.as_ptr() @@ -36,6 +66,15 @@ impl Tree { } impl<'tree> Node<'tree> { + /// Reconstructs a [Node] from a raw pointer. + /// + /// # Safety + /// + /// `ptr` must be non-null. + pub unsafe fn from_raw(ptr: *mut TSNode) -> Node<'tree> { + Node(*ptr, PhantomData) + } + /// Consumes the [Node], returning a raw pointer to the underlying C structure. pub fn into_raw(self) -> *mut TSNode { &mut ManuallyDrop::new(self).0 @@ -43,6 +82,15 @@ impl<'tree> Node<'tree> { } impl<'a> TreeCursor<'a> { + /// Reconstructs a [TreeCursor] from a raw pointer. + /// + /// # Safety + /// + /// `ptr` must be non-null. + pub unsafe fn from_raw(ptr: *mut TSTreeCursor) -> TreeCursor<'a> { + TreeCursor(*ptr, PhantomData) + } + /// Consumes the [TreeCursor], returning a raw pointer to the underlying C structure. pub fn into_raw(self) -> *mut TSTreeCursor { &mut ManuallyDrop::new(self).0 @@ -50,6 +98,225 @@ impl<'a> TreeCursor<'a> { } impl Query { + /// Reconstructs a [Query] from a raw pointer. + /// + /// # Safety + /// + /// `ptr` must be non-null. + pub unsafe fn from_raw(ptr: *mut TSQuery, source: &str) -> Result { + let string_count = unsafe { ts_query_string_count(ptr) }; + let capture_count = unsafe { ts_query_capture_count(ptr) }; + let pattern_count = unsafe { ts_query_pattern_count(ptr) as usize }; + let mut result = Query { + ptr: unsafe { NonNull::new_unchecked(ptr) }, + capture_names: Vec::with_capacity(capture_count as usize), + capture_quantifiers: Vec::with_capacity(pattern_count as usize), + text_predicates: Vec::with_capacity(pattern_count), + property_predicates: Vec::with_capacity(pattern_count), + property_settings: Vec::with_capacity(pattern_count), + general_predicates: Vec::with_capacity(pattern_count), + }; + + // Build a vector of strings to store the capture names. + for i in 0..capture_count { + unsafe { + let mut length = 0u32; + let name = + ts_query_capture_name_for_id(ptr, i, &mut length as *mut u32) as *const u8; + let name = slice::from_raw_parts(name, length as usize); + let name = str::from_utf8_unchecked(name); + result.capture_names.push(name.to_string()); + } + } + + // Build a vector to store capture qunatifiers. + for i in 0..pattern_count { + let mut capture_quantifiers = Vec::with_capacity(capture_count as usize); + for j in 0..capture_count { + unsafe { + let quantifier = ts_query_capture_quantifier_for_id(ptr, i as u32, j); + capture_quantifiers.push(quantifier.into()); + } + } + result.capture_quantifiers.push(capture_quantifiers); + } + + // Build a vector of strings to represent literal values used in predicates. + let string_values = (0..string_count) + .map(|i| unsafe { + let mut length = 0u32; + let value = ts_query_string_value_for_id(ptr, i as u32, &mut length as *mut u32) + as *const u8; + let value = slice::from_raw_parts(value, length as usize); + let value = str::from_utf8_unchecked(value); + value.to_string() + }) + .collect::>(); + + // Build a vector of predicates for each pattern. + for i in 0..pattern_count { + let predicate_steps = unsafe { + let mut length = 0u32; + let raw_predicates = + ts_query_predicates_for_pattern(ptr, i as u32, &mut length as *mut u32); + if length > 0 { + slice::from_raw_parts(raw_predicates, length as usize) + } else { + &[] + } + }; + + let byte_offset = unsafe { ts_query_start_byte_for_pattern(ptr, i as u32) }; + let row = source + .char_indices() + .take_while(|(i, _)| *i < byte_offset as usize) + .filter(|(_, c)| *c == '\n') + .count(); + + let type_done = TSQueryPredicateStepType_TSQueryPredicateStepTypeDone; + let type_capture = TSQueryPredicateStepType_TSQueryPredicateStepTypeCapture; + let type_string = TSQueryPredicateStepType_TSQueryPredicateStepTypeString; + + let mut text_predicates = Vec::new(); + let mut property_predicates = Vec::new(); + let mut property_settings = Vec::new(); + let mut general_predicates = Vec::new(); + for p in predicate_steps.split(|s| s.type_ == type_done) { + if p.is_empty() { + continue; + } + + if p[0].type_ != type_string { + return Err(predicate_error( + row, + format!( + "Expected predicate to start with a function name. Got @{}.", + result.capture_names[p[0].value_id as usize], + ), + )); + } + + // Build a predicate for each of the known predicate function names. + let operator_name = &string_values[p[0].value_id as usize]; + match operator_name.as_str() { + "eq?" | "not-eq?" => { + if p.len() != 3 { + return Err(predicate_error( + row, + format!( + "Wrong number of arguments to #eq? predicate. Expected 2, got {}.", + p.len() - 1 + ), + )); + } + if p[1].type_ != type_capture { + return Err(predicate_error(row, format!( + "First argument to #eq? predicate must be a capture name. Got literal \"{}\".", + string_values[p[1].value_id as usize], + ))); + } + + let is_positive = operator_name == "eq?"; + text_predicates.push(if p[2].type_ == type_capture { + TextPredicate::CaptureEqCapture( + p[1].value_id, + p[2].value_id, + is_positive, + ) + } else { + TextPredicate::CaptureEqString( + p[1].value_id, + string_values[p[2].value_id as usize].clone(), + is_positive, + ) + }); + } + + "match?" | "not-match?" => { + if p.len() != 3 { + return Err(predicate_error(row, format!( + "Wrong number of arguments to #match? predicate. Expected 2, got {}.", + p.len() - 1 + ))); + } + if p[1].type_ != type_capture { + return Err(predicate_error(row, format!( + "First argument to #match? predicate must be a capture name. Got literal \"{}\".", + string_values[p[1].value_id as usize], + ))); + } + if p[2].type_ == type_capture { + return Err(predicate_error(row, format!( + "Second argument to #match? predicate must be a literal. Got capture @{}.", + result.capture_names[p[2].value_id as usize], + ))); + } + + let is_positive = operator_name == "match?"; + let regex = &string_values[p[2].value_id as usize]; + text_predicates.push(TextPredicate::CaptureMatchString( + p[1].value_id, + regex::bytes::Regex::new(regex).map_err(|_| { + predicate_error(row, format!("Invalid regex '{}'", regex)) + })?, + is_positive, + )); + } + + "set!" => property_settings.push(Self::parse_property( + row, + &operator_name, + &result.capture_names, + &string_values, + &p[1..], + )?), + + "is?" | "is-not?" => property_predicates.push(( + Self::parse_property( + row, + &operator_name, + &result.capture_names, + &string_values, + &p[1..], + )?, + operator_name == "is?", + )), + + _ => general_predicates.push(QueryPredicate { + operator: operator_name.clone().into_boxed_str(), + args: p[1..] + .iter() + .map(|a| { + if a.type_ == type_capture { + QueryPredicateArg::Capture(a.value_id) + } else { + QueryPredicateArg::String( + string_values[a.value_id as usize].clone().into_boxed_str(), + ) + } + }) + .collect(), + }), + } + } + + result + .text_predicates + .push(text_predicates.into_boxed_slice()); + result + .property_predicates + .push(property_predicates.into_boxed_slice()); + result + .property_settings + .push(property_settings.into_boxed_slice()); + result + .general_predicates + .push(general_predicates.into_boxed_slice()); + } + + Ok(result) + } + /// Consumes the [Query], returning a raw pointer to the underlying C structure. pub fn into_raw(self) -> *mut TSQuery { ManuallyDrop::new(self).ptr.as_ptr() @@ -57,6 +324,17 @@ impl Query { } impl QueryCursor { + /// Reconstructs a [QueryCursor] from a raw pointer. + /// + /// # Safety + /// + /// `ptr` must be non-null. + pub unsafe fn from_raw(ptr: *mut TSQueryCursor) -> QueryCursor { + QueryCursor { + ptr: NonNull::new_unchecked(ptr), + } + } + /// Consumes the [QueryCursor], returning a raw pointer to the underlying C structure. pub fn into_raw(self) -> *mut TSQueryCursor { ManuallyDrop::new(self).ptr.as_ptr() diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 3746bcdd..c2a0ead0 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -1434,217 +1434,7 @@ impl Query { }); } - let string_count = unsafe { ffi::ts_query_string_count(ptr) }; - let capture_count = unsafe { ffi::ts_query_capture_count(ptr) }; - let pattern_count = unsafe { ffi::ts_query_pattern_count(ptr) as usize }; - let mut result = Query { - ptr: unsafe { NonNull::new_unchecked(ptr) }, - capture_names: Vec::with_capacity(capture_count as usize), - capture_quantifiers: Vec::with_capacity(pattern_count as usize), - text_predicates: Vec::with_capacity(pattern_count), - property_predicates: Vec::with_capacity(pattern_count), - property_settings: Vec::with_capacity(pattern_count), - general_predicates: Vec::with_capacity(pattern_count), - }; - - // Build a vector of strings to store the capture names. - for i in 0..capture_count { - unsafe { - let mut length = 0u32; - let name = - ffi::ts_query_capture_name_for_id(ptr, i, &mut length as *mut u32) as *const u8; - let name = slice::from_raw_parts(name, length as usize); - let name = str::from_utf8_unchecked(name); - result.capture_names.push(name.to_string()); - } - } - - // Build a vector to store capture qunatifiers. - for i in 0..pattern_count { - let mut capture_quantifiers = Vec::with_capacity(capture_count as usize); - for j in 0..capture_count { - unsafe { - let quantifier = ffi::ts_query_capture_quantifier_for_id(ptr, i as u32, j); - capture_quantifiers.push(quantifier.into()); - } - } - result.capture_quantifiers.push(capture_quantifiers); - } - - // Build a vector of strings to represent literal values used in predicates. - let string_values = (0..string_count) - .map(|i| unsafe { - let mut length = 0u32; - let value = - ffi::ts_query_string_value_for_id(ptr, i as u32, &mut length as *mut u32) - as *const u8; - let value = slice::from_raw_parts(value, length as usize); - let value = str::from_utf8_unchecked(value); - value.to_string() - }) - .collect::>(); - - // Build a vector of predicates for each pattern. - for i in 0..pattern_count { - let predicate_steps = unsafe { - let mut length = 0u32; - let raw_predicates = - ffi::ts_query_predicates_for_pattern(ptr, i as u32, &mut length as *mut u32); - if length > 0 { - slice::from_raw_parts(raw_predicates, length as usize) - } else { - &[] - } - }; - - let byte_offset = unsafe { ffi::ts_query_start_byte_for_pattern(ptr, i as u32) }; - let row = source - .char_indices() - .take_while(|(i, _)| *i < byte_offset as usize) - .filter(|(_, c)| *c == '\n') - .count(); - - let type_done = ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeDone; - let type_capture = ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeCapture; - let type_string = ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeString; - - let mut text_predicates = Vec::new(); - let mut property_predicates = Vec::new(); - let mut property_settings = Vec::new(); - let mut general_predicates = Vec::new(); - for p in predicate_steps.split(|s| s.type_ == type_done) { - if p.is_empty() { - continue; - } - - if p[0].type_ != type_string { - return Err(predicate_error( - row, - format!( - "Expected predicate to start with a function name. Got @{}.", - result.capture_names[p[0].value_id as usize], - ), - )); - } - - // Build a predicate for each of the known predicate function names. - let operator_name = &string_values[p[0].value_id as usize]; - match operator_name.as_str() { - "eq?" | "not-eq?" => { - if p.len() != 3 { - return Err(predicate_error( - row, - format!( - "Wrong number of arguments to #eq? predicate. Expected 2, got {}.", - p.len() - 1 - ), - )); - } - if p[1].type_ != type_capture { - return Err(predicate_error(row, format!( - "First argument to #eq? predicate must be a capture name. Got literal \"{}\".", - string_values[p[1].value_id as usize], - ))); - } - - let is_positive = operator_name == "eq?"; - text_predicates.push(if p[2].type_ == type_capture { - TextPredicate::CaptureEqCapture( - p[1].value_id, - p[2].value_id, - is_positive, - ) - } else { - TextPredicate::CaptureEqString( - p[1].value_id, - string_values[p[2].value_id as usize].clone(), - is_positive, - ) - }); - } - - "match?" | "not-match?" => { - if p.len() != 3 { - return Err(predicate_error(row, format!( - "Wrong number of arguments to #match? predicate. Expected 2, got {}.", - p.len() - 1 - ))); - } - if p[1].type_ != type_capture { - return Err(predicate_error(row, format!( - "First argument to #match? predicate must be a capture name. Got literal \"{}\".", - string_values[p[1].value_id as usize], - ))); - } - if p[2].type_ == type_capture { - return Err(predicate_error(row, format!( - "Second argument to #match? predicate must be a literal. Got capture @{}.", - result.capture_names[p[2].value_id as usize], - ))); - } - - let is_positive = operator_name == "match?"; - let regex = &string_values[p[2].value_id as usize]; - text_predicates.push(TextPredicate::CaptureMatchString( - p[1].value_id, - regex::bytes::Regex::new(regex).map_err(|_| { - predicate_error(row, format!("Invalid regex '{}'", regex)) - })?, - is_positive, - )); - } - - "set!" => property_settings.push(Self::parse_property( - row, - &operator_name, - &result.capture_names, - &string_values, - &p[1..], - )?), - - "is?" | "is-not?" => property_predicates.push(( - Self::parse_property( - row, - &operator_name, - &result.capture_names, - &string_values, - &p[1..], - )?, - operator_name == "is?", - )), - - _ => general_predicates.push(QueryPredicate { - operator: operator_name.clone().into_boxed_str(), - args: p[1..] - .iter() - .map(|a| { - if a.type_ == type_capture { - QueryPredicateArg::Capture(a.value_id) - } else { - QueryPredicateArg::String( - string_values[a.value_id as usize].clone().into_boxed_str(), - ) - } - }) - .collect(), - }), - } - } - - result - .text_predicates - .push(text_predicates.into_boxed_slice()); - result - .property_predicates - .push(property_predicates.into_boxed_slice()); - result - .property_settings - .push(property_settings.into_boxed_slice()); - result - .general_predicates - .push(general_predicates.into_boxed_slice()); - } - Ok(result) + unsafe { Query::from_raw(ptr, source) } } /// Get the byte offset where the given pattern starts in the query's source. @@ -2291,7 +2081,7 @@ impl<'a> Iterator for LossyUtf8<'a> { } } -fn predicate_error(row: usize, message: String) -> QueryError { +pub(crate) fn predicate_error(row: usize, message: String) -> QueryError { QueryError { kind: QueryErrorKind::Predicate, row, From d56506b72714d0f57814866e670ccf0440e48d86 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Wed, 15 Mar 2023 11:24:53 +0200 Subject: [PATCH 070/347] ref(rust_bindings): keep Query construction logic in a one place --- lib/binding_rust/ffi.rs | 219 +-------------------------------------- lib/binding_rust/lib.rs | 220 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 221 insertions(+), 218 deletions(-) diff --git a/lib/binding_rust/ffi.rs b/lib/binding_rust/ffi.rs index 804c1cfc..53032db3 100644 --- a/lib/binding_rust/ffi.rs +++ b/lib/binding_rust/ffi.rs @@ -8,11 +8,8 @@ extern "C" { pub(crate) fn dup(fd: std::os::raw::c_int) -> std::os::raw::c_int; } -use crate::{ - predicate_error, Language, Node, Parser, Query, QueryCursor, QueryError, QueryPredicate, - QueryPredicateArg, TextPredicate, Tree, TreeCursor, -}; -use std::{marker::PhantomData, mem::ManuallyDrop, ptr::NonNull, slice, str}; +use crate::{Language, Node, Parser, Query, QueryCursor, QueryError, Tree, TreeCursor}; +use std::{marker::PhantomData, mem::ManuallyDrop, ptr::NonNull, str}; impl Language { /// Reconstructs a [Language] from a raw pointer. @@ -104,217 +101,7 @@ impl Query { /// /// `ptr` must be non-null. pub unsafe fn from_raw(ptr: *mut TSQuery, source: &str) -> Result { - let string_count = unsafe { ts_query_string_count(ptr) }; - let capture_count = unsafe { ts_query_capture_count(ptr) }; - let pattern_count = unsafe { ts_query_pattern_count(ptr) as usize }; - let mut result = Query { - ptr: unsafe { NonNull::new_unchecked(ptr) }, - capture_names: Vec::with_capacity(capture_count as usize), - capture_quantifiers: Vec::with_capacity(pattern_count as usize), - text_predicates: Vec::with_capacity(pattern_count), - property_predicates: Vec::with_capacity(pattern_count), - property_settings: Vec::with_capacity(pattern_count), - general_predicates: Vec::with_capacity(pattern_count), - }; - - // Build a vector of strings to store the capture names. - for i in 0..capture_count { - unsafe { - let mut length = 0u32; - let name = - ts_query_capture_name_for_id(ptr, i, &mut length as *mut u32) as *const u8; - let name = slice::from_raw_parts(name, length as usize); - let name = str::from_utf8_unchecked(name); - result.capture_names.push(name.to_string()); - } - } - - // Build a vector to store capture qunatifiers. - for i in 0..pattern_count { - let mut capture_quantifiers = Vec::with_capacity(capture_count as usize); - for j in 0..capture_count { - unsafe { - let quantifier = ts_query_capture_quantifier_for_id(ptr, i as u32, j); - capture_quantifiers.push(quantifier.into()); - } - } - result.capture_quantifiers.push(capture_quantifiers); - } - - // Build a vector of strings to represent literal values used in predicates. - let string_values = (0..string_count) - .map(|i| unsafe { - let mut length = 0u32; - let value = ts_query_string_value_for_id(ptr, i as u32, &mut length as *mut u32) - as *const u8; - let value = slice::from_raw_parts(value, length as usize); - let value = str::from_utf8_unchecked(value); - value.to_string() - }) - .collect::>(); - - // Build a vector of predicates for each pattern. - for i in 0..pattern_count { - let predicate_steps = unsafe { - let mut length = 0u32; - let raw_predicates = - ts_query_predicates_for_pattern(ptr, i as u32, &mut length as *mut u32); - if length > 0 { - slice::from_raw_parts(raw_predicates, length as usize) - } else { - &[] - } - }; - - let byte_offset = unsafe { ts_query_start_byte_for_pattern(ptr, i as u32) }; - let row = source - .char_indices() - .take_while(|(i, _)| *i < byte_offset as usize) - .filter(|(_, c)| *c == '\n') - .count(); - - let type_done = TSQueryPredicateStepType_TSQueryPredicateStepTypeDone; - let type_capture = TSQueryPredicateStepType_TSQueryPredicateStepTypeCapture; - let type_string = TSQueryPredicateStepType_TSQueryPredicateStepTypeString; - - let mut text_predicates = Vec::new(); - let mut property_predicates = Vec::new(); - let mut property_settings = Vec::new(); - let mut general_predicates = Vec::new(); - for p in predicate_steps.split(|s| s.type_ == type_done) { - if p.is_empty() { - continue; - } - - if p[0].type_ != type_string { - return Err(predicate_error( - row, - format!( - "Expected predicate to start with a function name. Got @{}.", - result.capture_names[p[0].value_id as usize], - ), - )); - } - - // Build a predicate for each of the known predicate function names. - let operator_name = &string_values[p[0].value_id as usize]; - match operator_name.as_str() { - "eq?" | "not-eq?" => { - if p.len() != 3 { - return Err(predicate_error( - row, - format!( - "Wrong number of arguments to #eq? predicate. Expected 2, got {}.", - p.len() - 1 - ), - )); - } - if p[1].type_ != type_capture { - return Err(predicate_error(row, format!( - "First argument to #eq? predicate must be a capture name. Got literal \"{}\".", - string_values[p[1].value_id as usize], - ))); - } - - let is_positive = operator_name == "eq?"; - text_predicates.push(if p[2].type_ == type_capture { - TextPredicate::CaptureEqCapture( - p[1].value_id, - p[2].value_id, - is_positive, - ) - } else { - TextPredicate::CaptureEqString( - p[1].value_id, - string_values[p[2].value_id as usize].clone(), - is_positive, - ) - }); - } - - "match?" | "not-match?" => { - if p.len() != 3 { - return Err(predicate_error(row, format!( - "Wrong number of arguments to #match? predicate. Expected 2, got {}.", - p.len() - 1 - ))); - } - if p[1].type_ != type_capture { - return Err(predicate_error(row, format!( - "First argument to #match? predicate must be a capture name. Got literal \"{}\".", - string_values[p[1].value_id as usize], - ))); - } - if p[2].type_ == type_capture { - return Err(predicate_error(row, format!( - "Second argument to #match? predicate must be a literal. Got capture @{}.", - result.capture_names[p[2].value_id as usize], - ))); - } - - let is_positive = operator_name == "match?"; - let regex = &string_values[p[2].value_id as usize]; - text_predicates.push(TextPredicate::CaptureMatchString( - p[1].value_id, - regex::bytes::Regex::new(regex).map_err(|_| { - predicate_error(row, format!("Invalid regex '{}'", regex)) - })?, - is_positive, - )); - } - - "set!" => property_settings.push(Self::parse_property( - row, - &operator_name, - &result.capture_names, - &string_values, - &p[1..], - )?), - - "is?" | "is-not?" => property_predicates.push(( - Self::parse_property( - row, - &operator_name, - &result.capture_names, - &string_values, - &p[1..], - )?, - operator_name == "is?", - )), - - _ => general_predicates.push(QueryPredicate { - operator: operator_name.clone().into_boxed_str(), - args: p[1..] - .iter() - .map(|a| { - if a.type_ == type_capture { - QueryPredicateArg::Capture(a.value_id) - } else { - QueryPredicateArg::String( - string_values[a.value_id as usize].clone().into_boxed_str(), - ) - } - }) - .collect(), - }), - } - } - - result - .text_predicates - .push(text_predicates.into_boxed_slice()); - result - .property_predicates - .push(property_predicates.into_boxed_slice()); - result - .property_settings - .push(property_settings.into_boxed_slice()); - result - .general_predicates - .push(general_predicates.into_boxed_slice()); - } - - Ok(result) + Query::from_raw_parts(ptr, source) } /// Consumes the [Query], returning a raw pointer to the underlying C structure. diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index c2a0ead0..9d470457 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -1434,7 +1434,223 @@ impl Query { }); } - unsafe { Query::from_raw(ptr, source) } + unsafe { Query::from_raw_parts(ptr, source) } + } + + #[doc(hidden)] + unsafe fn from_raw_parts(ptr: *mut ffi::TSQuery, source: &str) -> Result { + let string_count = unsafe { ffi::ts_query_string_count(ptr) }; + let capture_count = unsafe { ffi::ts_query_capture_count(ptr) }; + let pattern_count = unsafe { ffi::ts_query_pattern_count(ptr) as usize }; + let mut result = Query { + ptr: unsafe { NonNull::new_unchecked(ptr) }, + capture_names: Vec::with_capacity(capture_count as usize), + capture_quantifiers: Vec::with_capacity(pattern_count as usize), + text_predicates: Vec::with_capacity(pattern_count), + property_predicates: Vec::with_capacity(pattern_count), + property_settings: Vec::with_capacity(pattern_count), + general_predicates: Vec::with_capacity(pattern_count), + }; + + // Build a vector of strings to store the capture names. + for i in 0..capture_count { + unsafe { + let mut length = 0u32; + let name = + ffi::ts_query_capture_name_for_id(ptr, i, &mut length as *mut u32) as *const u8; + let name = slice::from_raw_parts(name, length as usize); + let name = str::from_utf8_unchecked(name); + result.capture_names.push(name.to_string()); + } + } + + // Build a vector to store capture qunatifiers. + for i in 0..pattern_count { + let mut capture_quantifiers = Vec::with_capacity(capture_count as usize); + for j in 0..capture_count { + unsafe { + let quantifier = ffi::ts_query_capture_quantifier_for_id(ptr, i as u32, j); + capture_quantifiers.push(quantifier.into()); + } + } + result.capture_quantifiers.push(capture_quantifiers); + } + + // Build a vector of strings to represent literal values used in predicates. + let string_values = (0..string_count) + .map(|i| unsafe { + let mut length = 0u32; + let value = + ffi::ts_query_string_value_for_id(ptr, i as u32, &mut length as *mut u32) + as *const u8; + let value = slice::from_raw_parts(value, length as usize); + let value = str::from_utf8_unchecked(value); + value.to_string() + }) + .collect::>(); + + // Build a vector of predicates for each pattern. + for i in 0..pattern_count { + let predicate_steps = unsafe { + let mut length = 0u32; + let raw_predicates = + ffi::ts_query_predicates_for_pattern(ptr, i as u32, &mut length as *mut u32); + if length > 0 { + slice::from_raw_parts(raw_predicates, length as usize) + } else { + &[] + } + }; + + let byte_offset = unsafe { ffi::ts_query_start_byte_for_pattern(ptr, i as u32) }; + let row = source + .char_indices() + .take_while(|(i, _)| *i < byte_offset as usize) + .filter(|(_, c)| *c == '\n') + .count(); + + let type_done = ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeDone; + let type_capture = ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeCapture; + let type_string = ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeString; + + let mut text_predicates = Vec::new(); + let mut property_predicates = Vec::new(); + let mut property_settings = Vec::new(); + let mut general_predicates = Vec::new(); + for p in predicate_steps.split(|s| s.type_ == type_done) { + if p.is_empty() { + continue; + } + + if p[0].type_ != type_string { + return Err(predicate_error( + row, + format!( + "Expected predicate to start with a function name. Got @{}.", + result.capture_names[p[0].value_id as usize], + ), + )); + } + + // Build a predicate for each of the known predicate function names. + let operator_name = &string_values[p[0].value_id as usize]; + match operator_name.as_str() { + "eq?" | "not-eq?" => { + if p.len() != 3 { + return Err(predicate_error( + row, + format!( + "Wrong number of arguments to #eq? predicate. Expected 2, got {}.", + p.len() - 1 + ), + )); + } + if p[1].type_ != type_capture { + return Err(predicate_error(row, format!( + "First argument to #eq? predicate must be a capture name. Got literal \"{}\".", + string_values[p[1].value_id as usize], + ))); + } + + let is_positive = operator_name == "eq?"; + text_predicates.push(if p[2].type_ == type_capture { + TextPredicate::CaptureEqCapture( + p[1].value_id, + p[2].value_id, + is_positive, + ) + } else { + TextPredicate::CaptureEqString( + p[1].value_id, + string_values[p[2].value_id as usize].clone(), + is_positive, + ) + }); + } + + "match?" | "not-match?" => { + if p.len() != 3 { + return Err(predicate_error(row, format!( + "Wrong number of arguments to #match? predicate. Expected 2, got {}.", + p.len() - 1 + ))); + } + if p[1].type_ != type_capture { + return Err(predicate_error(row, format!( + "First argument to #match? predicate must be a capture name. Got literal \"{}\".", + string_values[p[1].value_id as usize], + ))); + } + if p[2].type_ == type_capture { + return Err(predicate_error(row, format!( + "Second argument to #match? predicate must be a literal. Got capture @{}.", + result.capture_names[p[2].value_id as usize], + ))); + } + + let is_positive = operator_name == "match?"; + let regex = &string_values[p[2].value_id as usize]; + text_predicates.push(TextPredicate::CaptureMatchString( + p[1].value_id, + regex::bytes::Regex::new(regex).map_err(|_| { + predicate_error(row, format!("Invalid regex '{}'", regex)) + })?, + is_positive, + )); + } + + "set!" => property_settings.push(Self::parse_property( + row, + &operator_name, + &result.capture_names, + &string_values, + &p[1..], + )?), + + "is?" | "is-not?" => property_predicates.push(( + Self::parse_property( + row, + &operator_name, + &result.capture_names, + &string_values, + &p[1..], + )?, + operator_name == "is?", + )), + + _ => general_predicates.push(QueryPredicate { + operator: operator_name.clone().into_boxed_str(), + args: p[1..] + .iter() + .map(|a| { + if a.type_ == type_capture { + QueryPredicateArg::Capture(a.value_id) + } else { + QueryPredicateArg::String( + string_values[a.value_id as usize].clone().into_boxed_str(), + ) + } + }) + .collect(), + }), + } + } + + result + .text_predicates + .push(text_predicates.into_boxed_slice()); + result + .property_predicates + .push(property_predicates.into_boxed_slice()); + result + .property_settings + .push(property_settings.into_boxed_slice()); + result + .general_predicates + .push(general_predicates.into_boxed_slice()); + } + + Ok(result) } /// Get the byte offset where the given pattern starts in the query's source. @@ -2081,7 +2297,7 @@ impl<'a> Iterator for LossyUtf8<'a> { } } -pub(crate) fn predicate_error(row: usize, message: String) -> QueryError { +fn predicate_error(row: usize, message: String) -> QueryError { QueryError { kind: QueryErrorKind::Predicate, row, From 0261a13984cad1740a6a1ddf53bce978156bd001 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Sat, 25 Feb 2023 19:54:23 +0200 Subject: [PATCH 071/347] lib: fix `ts_node_field_name_for_child` implementation With hidden subtrees correct incorporation and field inheritance support. --- lib/src/node.c | 60 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 47 insertions(+), 13 deletions(-) diff --git a/lib/src/node.c b/lib/src/node.c index de59504e..36bfb24c 100644 --- a/lib/src/node.c +++ b/lib/src/node.c @@ -569,24 +569,58 @@ recur: return ts_node__null(); } -const char *ts_node_field_name_for_child(TSNode self, uint32_t child_index) { - const TSFieldMapEntry *field_map_start = NULL, *field_map_end = NULL; - if (!ts_node_child_count(self)) { +static inline const char *ts_node__field_name_from_language(TSNode self, uint32_t structural_child_index) { + const TSFieldMapEntry *field_map, *field_map_end; + ts_language_field_map( + self.tree->language, + ts_node__subtree(self).ptr->production_id, + &field_map, + &field_map_end + ); + for (; field_map != field_map_end; field_map++) { + if (!field_map->inherited && field_map->child_index == structural_child_index) { + return self.tree->language->field_names[field_map->field_id]; + } + } return NULL; - } +} - ts_language_field_map( - self.tree->language, - ts_node__subtree(self).ptr->production_id, - &field_map_start, - &field_map_end - ); +const char *ts_node_field_name_for_child(TSNode self, uint32_t child_index) { + TSNode result = self; + bool did_descend = true; + const char *inherited_field_name = NULL; - for (const TSFieldMapEntry *i = field_map_start; i < field_map_end; i++) { - if (i->child_index == child_index) { - return self.tree->language->field_names[i->field_id]; + while (did_descend) { + did_descend = false; + + TSNode child; + uint32_t index = 0; + NodeChildIterator iterator = ts_node_iterate_children(&result); + while (ts_node_child_iterator_next(&iterator, &child)) { + if (ts_node__is_relevant(child, true)) { + if (index == child_index) { + const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1); + if (field_name) return field_name; + return inherited_field_name; + } + index++; + } else { + uint32_t grandchild_index = child_index - index; + uint32_t grandchild_count = ts_node__relevant_child_count(child, true); + if (grandchild_index < grandchild_count) { + const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1); + if (field_name) inherited_field_name = field_name; + + did_descend = true; + result = child; + child_index = grandchild_index; + break; + } + index += grandchild_count; + } } } + return NULL; } From cde45268b6b5de8ab5894ec4313c16b46e5de443 Mon Sep 17 00:00:00 2001 From: Matt <85322+mattmassicotte@users.noreply.github.com> Date: Thu, 22 Sep 2022 06:39:52 -0400 Subject: [PATCH 072/347] subtree casts --- lib/src/subtree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/src/subtree.c b/lib/src/subtree.c index f8f82951..fd76402f 100644 --- a/lib/src/subtree.c +++ b/lib/src/subtree.c @@ -348,7 +348,7 @@ void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *langu Subtree child2 = ts_subtree_children(tree)[tree.ptr->child_count - 1]; long repeat_delta = (long)ts_subtree_repeat_depth(child1) - (long)ts_subtree_repeat_depth(child2); if (repeat_delta > 0) { - unsigned n = repeat_delta; + unsigned n = (unsigned)repeat_delta; for (unsigned i = n / 2; i > 0; i /= 2) { ts_subtree__compress(tree, i, language, &pool->tree_stack); n -= i; @@ -513,7 +513,7 @@ MutableSubtree ts_subtree_new_node( size_t new_byte_size = ts_subtree_alloc_size(children->size); if (children->capacity * sizeof(Subtree) < new_byte_size) { children->contents = ts_realloc(children->contents, new_byte_size); - children->capacity = new_byte_size / sizeof(Subtree); + children->capacity = (uint32_t)(new_byte_size / sizeof(Subtree)); } SubtreeHeapData *data = (SubtreeHeapData *)&children->contents[children->size]; From 65c16bfb17021b09a516c460f7a973b3325a5221 Mon Sep 17 00:00:00 2001 From: Matt <85322+mattmassicotte@users.noreply.github.com> Date: Thu, 22 Sep 2022 06:47:53 -0400 Subject: [PATCH 073/347] query casts --- lib/src/query.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/lib/src/query.c b/lib/src/query.c index cfe11438..18bd7fd3 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -331,7 +331,7 @@ static bool stream_advance(Stream *self) { if (self->input < self->end) { uint32_t size = ts_decode_utf8( (const uint8_t *)self->input, - self->end - self->input, + (uint32_t)(self->end - self->input), &self->next ); if (size > 0) { @@ -398,7 +398,7 @@ static void stream_scan_identifier(Stream *stream) { } static uint32_t stream_offset(Stream *self) { - return self->input - self->start; + return (uint32_t)(self->input - self->start); } /****************** @@ -2064,7 +2064,7 @@ static TSQueryError ts_query__parse_predicate( if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; const char *predicate_name = stream->input; stream_scan_identifier(stream); - uint32_t length = stream->input - predicate_name; + uint32_t length = (uint32_t)(stream->input - predicate_name); uint16_t id = symbol_table_insert_name( &self->predicate_values, predicate_name, @@ -2095,7 +2095,7 @@ static TSQueryError ts_query__parse_predicate( if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; const char *capture_name = stream->input; stream_scan_identifier(stream); - uint32_t length = stream->input - capture_name; + uint32_t length = (uint32_t)(stream->input - capture_name); // Add the capture id to the first step of the pattern int capture_id = symbol_table_id_for_name( @@ -2133,7 +2133,7 @@ static TSQueryError ts_query__parse_predicate( else if (stream_is_ident_start(stream)) { const char *symbol_start = stream->input; stream_scan_identifier(stream); - uint32_t length = stream->input - symbol_start; + uint32_t length = (uint32_t)(stream->input - symbol_start); uint16_t id = symbol_table_insert_name( &self->predicate_values, symbol_start, @@ -2302,7 +2302,7 @@ static TSQueryError ts_query__parse_pattern( if (stream_is_ident_start(stream)) { const char *node_name = stream->input; stream_scan_identifier(stream); - uint32_t length = stream->input - node_name; + uint32_t length = (uint32_t)(stream->input - node_name); // TODO - remove. // For temporary backward compatibility, handle predicates without the leading '#' sign. @@ -2353,7 +2353,7 @@ static TSQueryError ts_query__parse_pattern( const char *node_name = stream->input; stream_scan_identifier(stream); - uint32_t length = stream->input - node_name; + uint32_t length = (uint32_t)(stream->input - node_name); step->symbol = ts_language_symbol_for_name( self->language, @@ -2386,7 +2386,7 @@ static TSQueryError ts_query__parse_pattern( } const char *field_name = stream->input; stream_scan_identifier(stream); - uint32_t length = stream->input - field_name; + uint32_t length = (uint32_t)(stream->input - field_name); stream_skip_whitespace(stream); TSFieldId field_id = ts_language_field_id_for_name( @@ -2497,7 +2497,7 @@ static TSQueryError ts_query__parse_pattern( // Parse the field name const char *field_name = stream->input; stream_scan_identifier(stream); - uint32_t length = stream->input - field_name; + uint32_t length = (uint32_t)(stream->input - field_name); stream_skip_whitespace(stream); if (stream->next != ':') { @@ -2616,7 +2616,7 @@ static TSQueryError ts_query__parse_pattern( if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; const char *capture_name = stream->input; stream_scan_identifier(stream); - uint32_t length = stream->input - capture_name; + uint32_t length = (uint32_t)(stream->input - capture_name); stream_skip_whitespace(stream); // Add the capture id to the first step of the pattern @@ -3323,7 +3323,7 @@ static QueryState *ts_query_cursor__copy_state( QueryState **state_ref ) { const QueryState *state = *state_ref; - uint32_t state_index = state - self->states.contents; + uint32_t state_index = (uint32_t)(state - self->states.contents); QueryState copy = *state; copy.capture_list_id = NONE; From 0751736d1709d738b462ecd878b75c0fab536e3a Mon Sep 17 00:00:00 2001 From: Andreas Deininger Date: Fri, 31 Mar 2023 21:02:32 +0200 Subject: [PATCH 074/347] docs: convert various links to https protocol --- cli/README.md | 4 ++-- cli/npm/dsl.d.ts | 2 +- cli/npm/package.json | 2 +- cli/src/generate/grammar-schema.json | 2 +- cli/src/playground.html | 4 ++-- docs/index.md | 6 +++--- docs/section-2-using-parsers.md | 2 +- docs/section-3-creating-parsers.md | 2 +- docs/section-5-implementation.md | 2 +- 9 files changed, 13 insertions(+), 13 deletions(-) diff --git a/cli/README.md b/cli/README.md index 8cdda9c0..eff3608c 100644 --- a/cli/README.md +++ b/cli/README.md @@ -30,8 +30,8 @@ The `tree-sitter` binary itself has no dependencies, but specific commands have ### Commands -* `generate` - The `tree-sitter generate` command will generate a Tree-sitter parser based on the grammar in the current working directory. See [the documentation](http://tree-sitter.github.io/tree-sitter/creating-parsers) for more information. +* `generate` - The `tree-sitter generate` command will generate a Tree-sitter parser based on the grammar in the current working directory. See [the documentation](https://tree-sitter.github.io/tree-sitter/creating-parsers) for more information. -* `test` - The `tree-sitter test` command will run the unit tests for the Tree-sitter parser in the current working directory. See [the documentation](http://tree-sitter.github.io/tree-sitter/creating-parsers) for more information. +* `test` - The `tree-sitter test` command will run the unit tests for the Tree-sitter parser in the current working directory. See [the documentation](https://tree-sitter.github.io/tree-sitter/creating-parsers) for more information. * `parse` - The `tree-sitter parse` command will parse a file (or list of files) using Tree-sitter parsers. diff --git a/cli/npm/dsl.d.ts b/cli/npm/dsl.d.ts index f2ee57f1..fdf43b55 100644 --- a/cli/npm/dsl.d.ts +++ b/cli/npm/dsl.d.ts @@ -134,7 +134,7 @@ interface Grammar< * * @param $ grammar rules * - * @see http://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types + * @see https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types */ supertypes?: ( $: GrammarSymbols, diff --git a/cli/npm/package.json b/cli/npm/package.json index 02309193..44d1327f 100644 --- a/cli/npm/package.json +++ b/cli/npm/package.json @@ -5,7 +5,7 @@ "license": "MIT", "repository": { "type": "git", - "url": "http://github.com/tree-sitter/tree-sitter.git" + "url": "https://github.com/tree-sitter/tree-sitter.git" }, "description": "CLI for generating fast incremental parsers", "keywords": [ diff --git a/cli/src/generate/grammar-schema.json b/cli/src/generate/grammar-schema.json index 5ca35370..952aac80 100644 --- a/cli/src/generate/grammar-schema.json +++ b/cli/src/generate/grammar-schema.json @@ -63,7 +63,7 @@ }, "supertypes": { - "description": "A list of hidden rule names that should be considered supertypes in the generated node types file. See http://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types.", + "description": "A list of hidden rule names that should be considered supertypes in the generated node types file. See https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types.", "type": "array", "items": { "description": "the name of a rule in `rules` or `extras`", diff --git a/cli/src/playground.html b/cli/src/playground.html index 22c874df..b69f9351 100644 --- a/cli/src/playground.html +++ b/cli/src/playground.html @@ -3,8 +3,8 @@ tree-sitter THE_LANGUAGE_NAME - - + + diff --git a/docs/index.md b/docs/index.md index 2e3b59ed..9a15db06 100644 --- a/docs/index.md +++ b/docs/index.md @@ -158,7 +158,7 @@ The design of Tree-sitter was greatly influenced by the following research paper - [Practical Algorithms for Incremental Software Development Environments](https://www2.eecs.berkeley.edu/Pubs/TechRpts/1997/CSD-97-946.pdf) - [Context Aware Scanning for Parsing Extensible Languages](https://www-users.cse.umn.edu/~evw/pubs/vanwyk07gpce/vanwyk07gpce.pdf) -- [Efficient and Flexible Incremental Parsing](http://harmonia.cs.berkeley.edu/papers/twagner-parsing.pdf) -- [Incremental Analysis of Real Programming Languages](http://harmonia.cs.berkeley.edu/papers/twagner-glr.pdf) -- [Error Detection and Recovery in LR Parsers](http://what-when-how.com/compiler-writing/bottom-up-parsing-compiler-writing-part-13) +- [Efficient and Flexible Incremental Parsing](https://harmonia.cs.berkeley.edu/papers/twagner-parsing.pdf) +- [Incremental Analysis of Real Programming Languages](https://harmonia.cs.berkeley.edu/papers/twagner-glr.pdf) +- [Error Detection and Recovery in LR Parsers](https://what-when-how.com/compiler-writing/bottom-up-parsing-compiler-writing-part-13) - [Error Recovery for LR Parsers](https://apps.dtic.mil/sti/pdfs/ADA043470.pdf) diff --git a/docs/section-2-using-parsers.md b/docs/section-2-using-parsers.md index ea32f4f5..0d683dc1 100644 --- a/docs/section-2-using-parsers.md +++ b/docs/section-2-using-parsers.md @@ -290,7 +290,7 @@ This `ts_node_edit` function is _only_ needed in the case where you have retriev ### Multi-language Documents -Sometimes, different parts of a file may be written in different languages. For example, templating languages like [EJS](http://ejs.co) and [ERB](https://ruby-doc.org/stdlib-2.5.1/libdoc/erb/rdoc/ERB.html) allow you to generate HTML by writing a mixture of HTML and another language like JavaScript or Ruby. +Sometimes, different parts of a file may be written in different languages. For example, templating languages like [EJS](https://ejs.co) and [ERB](https://ruby-doc.org/stdlib-2.5.1/libdoc/erb/rdoc/ERB.html) allow you to generate HTML by writing a mixture of HTML and another language like JavaScript or Ruby. Tree-sitter handles these types of documents by allowing you to create a syntax tree based on the text in certain _ranges_ of a file. diff --git a/docs/section-3-creating-parsers.md b/docs/section-3-creating-parsers.md index 5677292f..0842edbb 100644 --- a/docs/section-3-creating-parsers.md +++ b/docs/section-3-creating-parsers.md @@ -752,7 +752,7 @@ External scanners are a common cause of infinite loops. Be very careful when emitting zero-width tokens from your external scanner, and if you consume characters in a loop be sure use the `eof` function to check whether you are at the end of the file. [ambiguous-grammar]: https://en.wikipedia.org/wiki/Ambiguous_grammar -[antlr]: http://www.antlr.org/ +[antlr]: https://www.antlr.org [bison-dprec]: https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html [bison]: https://en.wikipedia.org/wiki/GNU_bison [c-linkage]: https://en.cppreference.com/w/cpp/language/language_linkage diff --git a/docs/section-5-implementation.md b/docs/section-5-implementation.md index 532f1046..e0fa9661 100644 --- a/docs/section-5-implementation.md +++ b/docs/section-5-implementation.md @@ -13,7 +13,7 @@ syntax trees up-to-date as the source code changes. `libtree-sitter` is designed The CLI is used to generate a parser for a language by supplying a [context-free grammar](https://en.wikipedia.org/wiki/Context-free_grammar) describing the -language. The CLI is a build tool; it is no longer needed once a parser has been generated. It is written in Rust, and is available on [crates.io](https://crates.io), [npm](http://npmjs.com), and as a pre-built binary [on GitHub](https://github.com/tree-sitter/tree-sitter/releases/latest). +language. The CLI is a build tool; it is no longer needed once a parser has been generated. It is written in Rust, and is available on [crates.io](https://crates.io), [npm](https://npmjs.com), and as a pre-built binary [on GitHub](https://github.com/tree-sitter/tree-sitter/releases/latest). ## The CLI From 5ce60ef9e5183c7cbc91d2fd2c3f50a1c63ced3e Mon Sep 17 00:00:00 2001 From: M Hickford Date: Sat, 5 Nov 2022 19:46:27 +0000 Subject: [PATCH 075/347] docs: clarify naming convention for bindings and parsers --- docs/index.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/index.md b/docs/index.md index 2e3b59ed..cc4f5850 100644 --- a/docs/index.md +++ b/docs/index.md @@ -30,6 +30,8 @@ There are currently bindings that allow Tree-sitter to be used from the followin * [Kotlin](https://github.com/oxisto/kotlintree) * [Java](https://github.com/serenadeai/java-tree-sitter) +By convention, bindings are named with the language first, eg. ruby-tree-sitter. + ### Parsers * [Ada](https://github.com/briot/tree-sitter-ada) @@ -146,6 +148,8 @@ There are currently bindings that allow Tree-sitter to be used from the followin * [YANG](https://github.com/Hubro/tree-sitter-yang) * [Zig](https://github.com/maxxnino/tree-sitter-zig) +By convention, parsers are named with the language last, eg. tree-sitter-ruby. + ### Talks on Tree-sitter * [Strange Loop 2018](https://www.thestrangeloop.com/2018/tree-sitter---a-new-parsing-system-for-programming-tools.html) From c38f78345e6bcd511fd0b1e7dfa1645ca69173b1 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Tue, 4 Apr 2023 22:16:27 +0300 Subject: [PATCH 076/347] binding(rust): update script/generate-bindings to use latest rust-bindgen 0.64.0 version --- lib/binding_rust/bindings.rs | 357 ++++++----------------------------- script/generate-bindings | 4 +- 2 files changed, 56 insertions(+), 305 deletions(-) diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index be117f83..863b1df5 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -1,4 +1,4 @@ -/* automatically generated by rust-bindgen 0.59.2 */ +/* automatically generated by rust-bindgen 0.64.0 */ pub type TSSymbol = u16; pub type TSFieldId = u16; @@ -148,14 +148,7 @@ extern "C" { pub fn ts_parser_delete(parser: *mut TSParser); } extern "C" { - #[doc = " Set the language that the parser should use for parsing."] - #[doc = ""] - #[doc = " Returns a boolean indicating whether or not the language was successfully"] - #[doc = " assigned. True means assignment succeeded. False means there was a version"] - #[doc = " mismatch: the language was generated with an incompatible version of the"] - #[doc = " Tree-sitter CLI. Check the language's version using `ts_language_version`"] - #[doc = " and compare it to this library's `TREE_SITTER_LANGUAGE_VERSION` and"] - #[doc = " `TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION` constants."] + #[doc = " Set the language that the parser should use for parsing.\n\n Returns a boolean indicating whether or not the language was successfully\n assigned. True means assignment succeeded. False means there was a version\n mismatch: the language was generated with an incompatible version of the\n Tree-sitter CLI. Check the language's version using `ts_language_version`\n and compare it to this library's `TREE_SITTER_LANGUAGE_VERSION` and\n `TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION` constants."] pub fn ts_parser_set_language(self_: *mut TSParser, language: *const TSLanguage) -> bool; } extern "C" { @@ -163,25 +156,7 @@ extern "C" { pub fn ts_parser_language(self_: *const TSParser) -> *const TSLanguage; } extern "C" { - #[doc = " Set the ranges of text that the parser should include when parsing."] - #[doc = ""] - #[doc = " By default, the parser will always include entire documents. This function"] - #[doc = " allows you to parse only a *portion* of a document but still return a syntax"] - #[doc = " tree whose ranges match up with the document as a whole. You can also pass"] - #[doc = " multiple disjoint ranges."] - #[doc = ""] - #[doc = " The second and third parameters specify the location and length of an array"] - #[doc = " of ranges. The parser does *not* take ownership of these ranges; it copies"] - #[doc = " the data, so it doesn't matter how these ranges are allocated."] - #[doc = ""] - #[doc = " If `length` is zero, then the entire document will be parsed. Otherwise,"] - #[doc = " the given ranges must be ordered from earliest to latest in the document,"] - #[doc = " and they must not overlap. That is, the following must hold for all"] - #[doc = " `i` < `length - 1`: ranges[i].end_byte <= ranges[i + 1].start_byte"] - #[doc = ""] - #[doc = " If this requirement is not satisfied, the operation will fail, the ranges"] - #[doc = " will not be assigned, and this function will return `false`. On success,"] - #[doc = " this function returns `true`"] + #[doc = " Set the ranges of text that the parser should include when parsing.\n\n By default, the parser will always include entire documents. This function\n allows you to parse only a *portion* of a document but still return a syntax\n tree whose ranges match up with the document as a whole. You can also pass\n multiple disjoint ranges.\n\n The second and third parameters specify the location and length of an array\n of ranges. The parser does *not* take ownership of these ranges; it copies\n the data, so it doesn't matter how these ranges are allocated.\n\n If `length` is zero, then the entire document will be parsed. Otherwise,\n the given ranges must be ordered from earliest to latest in the document,\n and they must not overlap. That is, the following must hold for all\n `i` < `length - 1`: ranges[i].end_byte <= ranges[i + 1].start_byte\n\n If this requirement is not satisfied, the operation will fail, the ranges\n will not be assigned, and this function will return `false`. On success,\n this function returns `true`"] pub fn ts_parser_set_included_ranges( self_: *mut TSParser, ranges: *const TSRange, @@ -189,50 +164,11 @@ extern "C" { ) -> bool; } extern "C" { - #[doc = " Get the ranges of text that the parser will include when parsing."] - #[doc = ""] - #[doc = " The returned pointer is owned by the parser. The caller should not free it"] - #[doc = " or write to it. The length of the array will be written to the given"] - #[doc = " `length` pointer."] + #[doc = " Get the ranges of text that the parser will include when parsing.\n\n The returned pointer is owned by the parser. The caller should not free it\n or write to it. The length of the array will be written to the given\n `length` pointer."] pub fn ts_parser_included_ranges(self_: *const TSParser, length: *mut u32) -> *const TSRange; } extern "C" { - #[doc = " Use the parser to parse some source code and create a syntax tree."] - #[doc = ""] - #[doc = " If you are parsing this document for the first time, pass `NULL` for the"] - #[doc = " `old_tree` parameter. Otherwise, if you have already parsed an earlier"] - #[doc = " version of this document and the document has since been edited, pass the"] - #[doc = " previous syntax tree so that the unchanged parts of it can be reused."] - #[doc = " This will save time and memory. For this to work correctly, you must have"] - #[doc = " already edited the old syntax tree using the `ts_tree_edit` function in a"] - #[doc = " way that exactly matches the source code changes."] - #[doc = ""] - #[doc = " The `TSInput` parameter lets you specify how to read the text. It has the"] - #[doc = " following three fields:"] - #[doc = " 1. `read`: A function to retrieve a chunk of text at a given byte offset"] - #[doc = " and (row, column) position. The function should return a pointer to the"] - #[doc = " text and write its length to the `bytes_read` pointer. The parser does"] - #[doc = " not take ownership of this buffer; it just borrows it until it has"] - #[doc = " finished reading it. The function should write a zero value to the"] - #[doc = " `bytes_read` pointer to indicate the end of the document."] - #[doc = " 2. `payload`: An arbitrary pointer that will be passed to each invocation"] - #[doc = " of the `read` function."] - #[doc = " 3. `encoding`: An indication of how the text is encoded. Either"] - #[doc = " `TSInputEncodingUTF8` or `TSInputEncodingUTF16`."] - #[doc = ""] - #[doc = " This function returns a syntax tree on success, and `NULL` on failure. There"] - #[doc = " are three possible reasons for failure:"] - #[doc = " 1. The parser does not have a language assigned. Check for this using the"] - #[doc = "`ts_parser_language` function."] - #[doc = " 2. Parsing was cancelled due to a timeout that was set by an earlier call to"] - #[doc = " the `ts_parser_set_timeout_micros` function. You can resume parsing from"] - #[doc = " where the parser left out by calling `ts_parser_parse` again with the"] - #[doc = " same arguments. Or you can start parsing from scratch by first calling"] - #[doc = " `ts_parser_reset`."] - #[doc = " 3. Parsing was cancelled using a cancellation flag that was set by an"] - #[doc = " earlier call to `ts_parser_set_cancellation_flag`. You can resume parsing"] - #[doc = " from where the parser left out by calling `ts_parser_parse` again with"] - #[doc = " the same arguments."] + #[doc = " Use the parser to parse some source code and create a syntax tree.\n\n If you are parsing this document for the first time, pass `NULL` for the\n `old_tree` parameter. Otherwise, if you have already parsed an earlier\n version of this document and the document has since been edited, pass the\n previous syntax tree so that the unchanged parts of it can be reused.\n This will save time and memory. For this to work correctly, you must have\n already edited the old syntax tree using the `ts_tree_edit` function in a\n way that exactly matches the source code changes.\n\n The `TSInput` parameter lets you specify how to read the text. It has the\n following three fields:\n 1. `read`: A function to retrieve a chunk of text at a given byte offset\n and (row, column) position. The function should return a pointer to the\n text and write its length to the `bytes_read` pointer. The parser does\n not take ownership of this buffer; it just borrows it until it has\n finished reading it. The function should write a zero value to the\n `bytes_read` pointer to indicate the end of the document.\n 2. `payload`: An arbitrary pointer that will be passed to each invocation\n of the `read` function.\n 3. `encoding`: An indication of how the text is encoded. Either\n `TSInputEncodingUTF8` or `TSInputEncodingUTF16`.\n\n This function returns a syntax tree on success, and `NULL` on failure. There\n are three possible reasons for failure:\n 1. The parser does not have a language assigned. Check for this using the\n`ts_parser_language` function.\n 2. Parsing was cancelled due to a timeout that was set by an earlier call to\n the `ts_parser_set_timeout_micros` function. You can resume parsing from\n where the parser left out by calling `ts_parser_parse` again with the\n same arguments. Or you can start parsing from scratch by first calling\n `ts_parser_reset`.\n 3. Parsing was cancelled using a cancellation flag that was set by an\n earlier call to `ts_parser_set_cancellation_flag`. You can resume parsing\n from where the parser left out by calling `ts_parser_parse` again with\n the same arguments."] pub fn ts_parser_parse( self_: *mut TSParser, old_tree: *const TSTree, @@ -240,10 +176,7 @@ extern "C" { ) -> *mut TSTree; } extern "C" { - #[doc = " Use the parser to parse some source code stored in one contiguous buffer."] - #[doc = " The first two parameters are the same as in the `ts_parser_parse` function"] - #[doc = " above. The second two parameters indicate the location of the buffer and its"] - #[doc = " length in bytes."] + #[doc = " Use the parser to parse some source code stored in one contiguous buffer.\n The first two parameters are the same as in the `ts_parser_parse` function\n above. The second two parameters indicate the location of the buffer and its\n length in bytes."] pub fn ts_parser_parse_string( self_: *mut TSParser, old_tree: *const TSTree, @@ -252,10 +185,7 @@ extern "C" { ) -> *mut TSTree; } extern "C" { - #[doc = " Use the parser to parse some source code stored in one contiguous buffer with"] - #[doc = " a given encoding. The first four parameters work the same as in the"] - #[doc = " `ts_parser_parse_string` method above. The final parameter indicates whether"] - #[doc = " the text is encoded as UTF8 or UTF16."] + #[doc = " Use the parser to parse some source code stored in one contiguous buffer with\n a given encoding. The first four parameters work the same as in the\n `ts_parser_parse_string` method above. The final parameter indicates whether\n the text is encoded as UTF8 or UTF16."] pub fn ts_parser_parse_string_encoding( self_: *mut TSParser, old_tree: *const TSTree, @@ -265,21 +195,11 @@ extern "C" { ) -> *mut TSTree; } extern "C" { - #[doc = " Instruct the parser to start the next parse from the beginning."] - #[doc = ""] - #[doc = " If the parser previously failed because of a timeout or a cancellation, then"] - #[doc = " by default, it will resume where it left off on the next call to"] - #[doc = " `ts_parser_parse` or other parsing functions. If you don't want to resume,"] - #[doc = " and instead intend to use this parser to parse some other document, you must"] - #[doc = " call `ts_parser_reset` first."] + #[doc = " Instruct the parser to start the next parse from the beginning.\n\n If the parser previously failed because of a timeout or a cancellation, then\n by default, it will resume where it left off on the next call to\n `ts_parser_parse` or other parsing functions. If you don't want to resume,\n and instead intend to use this parser to parse some other document, you must\n call `ts_parser_reset` first."] pub fn ts_parser_reset(self_: *mut TSParser); } extern "C" { - #[doc = " Set the maximum duration in microseconds that parsing should be allowed to"] - #[doc = " take before halting."] - #[doc = ""] - #[doc = " If parsing takes longer than this, it will halt early, returning NULL."] - #[doc = " See `ts_parser_parse` for more information."] + #[doc = " Set the maximum duration in microseconds that parsing should be allowed to\n take before halting.\n\n If parsing takes longer than this, it will halt early, returning NULL.\n See `ts_parser_parse` for more information."] pub fn ts_parser_set_timeout_micros(self_: *mut TSParser, timeout: u64); } extern "C" { @@ -287,11 +207,7 @@ extern "C" { pub fn ts_parser_timeout_micros(self_: *const TSParser) -> u64; } extern "C" { - #[doc = " Set the parser's current cancellation flag pointer."] - #[doc = ""] - #[doc = " If a non-null pointer is assigned, then the parser will periodically read"] - #[doc = " from this pointer during parsing. If it reads a non-zero value, it will"] - #[doc = " halt early, returning NULL. See `ts_parser_parse` for more information."] + #[doc = " Set the parser's current cancellation flag pointer.\n\n If a non-null pointer is assigned, then the parser will periodically read\n from this pointer during parsing. If it reads a non-zero value, it will\n halt early, returning NULL. See `ts_parser_parse` for more information."] pub fn ts_parser_set_cancellation_flag(self_: *mut TSParser, flag: *const usize); } extern "C" { @@ -299,11 +215,7 @@ extern "C" { pub fn ts_parser_cancellation_flag(self_: *const TSParser) -> *const usize; } extern "C" { - #[doc = " Set the logger that a parser should use during parsing."] - #[doc = ""] - #[doc = " The parser does not take ownership over the logger payload. If a logger was"] - #[doc = " previously assigned, the caller is responsible for releasing any memory"] - #[doc = " owned by the previous logger."] + #[doc = " Set the logger that a parser should use during parsing.\n\n The parser does not take ownership over the logger payload. If a logger was\n previously assigned, the caller is responsible for releasing any memory\n owned by the previous logger."] pub fn ts_parser_set_logger(self_: *mut TSParser, logger: TSLogger); } extern "C" { @@ -311,17 +223,11 @@ extern "C" { pub fn ts_parser_logger(self_: *const TSParser) -> TSLogger; } extern "C" { - #[doc = " Set the file descriptor to which the parser should write debugging graphs"] - #[doc = " during parsing. The graphs are formatted in the DOT language. You may want"] - #[doc = " to pipe these graphs directly to a `dot(1)` process in order to generate"] - #[doc = " SVG output. You can turn off this logging by passing a negative number."] + #[doc = " Set the file descriptor to which the parser should write debugging graphs\n during parsing. The graphs are formatted in the DOT language. You may want\n to pipe these graphs directly to a `dot(1)` process in order to generate\n SVG output. You can turn off this logging by passing a negative number."] pub fn ts_parser_print_dot_graphs(self_: *mut TSParser, file: ::std::os::raw::c_int); } extern "C" { - #[doc = " Create a shallow copy of the syntax tree. This is very fast."] - #[doc = ""] - #[doc = " You need to copy a syntax tree in order to use it on more than one thread at"] - #[doc = " a time, as syntax trees are not thread safe."] + #[doc = " Create a shallow copy of the syntax tree. This is very fast.\n\n You need to copy a syntax tree in order to use it on more than one thread at\n a time, as syntax trees are not thread safe."] pub fn ts_tree_copy(self_: *const TSTree) -> *mut TSTree; } extern "C" { @@ -333,8 +239,7 @@ extern "C" { pub fn ts_tree_root_node(self_: *const TSTree) -> TSNode; } extern "C" { - #[doc = " Get the root node of the syntax tree, but with its position"] - #[doc = " shifted forward by the given offset."] + #[doc = " Get the root node of the syntax tree, but with its position\n shifted forward by the given offset."] pub fn ts_tree_root_node_with_offset( self_: *const TSTree, offset_bytes: u32, @@ -346,32 +251,15 @@ extern "C" { pub fn ts_tree_language(arg1: *const TSTree) -> *const TSLanguage; } extern "C" { - #[doc = " Get the array of included ranges that was used to parse the syntax tree."] - #[doc = ""] - #[doc = " The returned pointer must be freed by the caller."] + #[doc = " Get the array of included ranges that was used to parse the syntax tree.\n\n The returned pointer must be freed by the caller."] pub fn ts_tree_included_ranges(arg1: *const TSTree, length: *mut u32) -> *mut TSRange; } extern "C" { - #[doc = " Edit the syntax tree to keep it in sync with source code that has been"] - #[doc = " edited."] - #[doc = ""] - #[doc = " You must describe the edit both in terms of byte offsets and in terms of"] - #[doc = " (row, column) coordinates."] + #[doc = " Edit the syntax tree to keep it in sync with source code that has been\n edited.\n\n You must describe the edit both in terms of byte offsets and in terms of\n (row, column) coordinates."] pub fn ts_tree_edit(self_: *mut TSTree, edit: *const TSInputEdit); } extern "C" { - #[doc = " Compare an old edited syntax tree to a new syntax tree representing the same"] - #[doc = " document, returning an array of ranges whose syntactic structure has changed."] - #[doc = ""] - #[doc = " For this to work correctly, the old syntax tree must have been edited such"] - #[doc = " that its ranges match up to the new tree. Generally, you'll want to call"] - #[doc = " this function right after calling one of the `ts_parser_parse` functions."] - #[doc = " You need to pass the old tree that was passed to parse, as well as the new"] - #[doc = " tree that was returned from that function."] - #[doc = ""] - #[doc = " The returned array is allocated using `malloc` and the caller is responsible"] - #[doc = " for freeing it using `free`. The length of the array will be written to the"] - #[doc = " given `length` pointer."] + #[doc = " Compare an old edited syntax tree to a new syntax tree representing the same\n document, returning an array of ranges whose syntactic structure has changed.\n\n For this to work correctly, the old syntax tree must have been edited such\n that its ranges match up to the new tree. Generally, you'll want to call\n this function right after calling one of the `ts_parser_parse` functions.\n You need to pass the old tree that was passed to parse, as well as the new\n tree that was returned from that function.\n\n The returned array is allocated using `malloc` and the caller is responsible\n for freeing it using `free`. The length of the array will be written to the\n given `length` pointer."] pub fn ts_tree_get_changed_ranges( old_tree: *const TSTree, new_tree: *const TSTree, @@ -407,32 +295,23 @@ extern "C" { pub fn ts_node_end_point(arg1: TSNode) -> TSPoint; } extern "C" { - #[doc = " Get an S-expression representing the node as a string."] - #[doc = ""] - #[doc = " This string is allocated with `malloc` and the caller is responsible for"] - #[doc = " freeing it using `free`."] + #[doc = " Get an S-expression representing the node as a string.\n\n This string is allocated with `malloc` and the caller is responsible for\n freeing it using `free`."] pub fn ts_node_string(arg1: TSNode) -> *mut ::std::os::raw::c_char; } extern "C" { - #[doc = " Check if the node is null. Functions like `ts_node_child` and"] - #[doc = " `ts_node_next_sibling` will return a null node to indicate that no such node"] - #[doc = " was found."] + #[doc = " Check if the node is null. Functions like `ts_node_child` and\n `ts_node_next_sibling` will return a null node to indicate that no such node\n was found."] pub fn ts_node_is_null(arg1: TSNode) -> bool; } extern "C" { - #[doc = " Check if the node is *named*. Named nodes correspond to named rules in the"] - #[doc = " grammar, whereas *anonymous* nodes correspond to string literals in the"] - #[doc = " grammar."] + #[doc = " Check if the node is *named*. Named nodes correspond to named rules in the\n grammar, whereas *anonymous* nodes correspond to string literals in the\n grammar."] pub fn ts_node_is_named(arg1: TSNode) -> bool; } extern "C" { - #[doc = " Check if the node is *missing*. Missing nodes are inserted by the parser in"] - #[doc = " order to recover from certain kinds of syntax errors."] + #[doc = " Check if the node is *missing*. Missing nodes are inserted by the parser in\n order to recover from certain kinds of syntax errors."] pub fn ts_node_is_missing(arg1: TSNode) -> bool; } extern "C" { - #[doc = " Check if the node is *extra*. Extra nodes represent things like comments,"] - #[doc = " which are not required the grammar, but can appear anywhere."] + #[doc = " Check if the node is *extra*. Extra nodes represent things like comments,\n which are not required the grammar, but can appear anywhere."] pub fn ts_node_is_extra(arg1: TSNode) -> bool; } extern "C" { @@ -448,13 +327,11 @@ extern "C" { pub fn ts_node_parent(arg1: TSNode) -> TSNode; } extern "C" { - #[doc = " Get the node's child at the given index, where zero represents the first"] - #[doc = " child."] + #[doc = " Get the node's child at the given index, where zero represents the first\n child."] pub fn ts_node_child(arg1: TSNode, arg2: u32) -> TSNode; } extern "C" { - #[doc = " Get the field name for node's child at the given index, where zero represents"] - #[doc = " the first child. Returns NULL, if no field is found."] + #[doc = " Get the field name for node's child at the given index, where zero represents\n the first child. Returns NULL, if no field is found."] pub fn ts_node_field_name_for_child(arg1: TSNode, arg2: u32) -> *const ::std::os::raw::c_char; } extern "C" { @@ -462,15 +339,11 @@ extern "C" { pub fn ts_node_child_count(arg1: TSNode) -> u32; } extern "C" { - #[doc = " Get the node's *named* child at the given index."] - #[doc = ""] - #[doc = " See also `ts_node_is_named`."] + #[doc = " Get the node's *named* child at the given index.\n\n See also `ts_node_is_named`."] pub fn ts_node_named_child(arg1: TSNode, arg2: u32) -> TSNode; } extern "C" { - #[doc = " Get the node's number of *named* children."] - #[doc = ""] - #[doc = " See also `ts_node_is_named`."] + #[doc = " Get the node's number of *named* children.\n\n See also `ts_node_is_named`."] pub fn ts_node_named_child_count(arg1: TSNode) -> u32; } extern "C" { @@ -482,10 +355,7 @@ extern "C" { ) -> TSNode; } extern "C" { - #[doc = " Get the node's child with the given numerical field id."] - #[doc = ""] - #[doc = " You can convert a field name to an id using the"] - #[doc = " `ts_language_field_id_for_name` function."] + #[doc = " Get the node's child with the given numerical field id.\n\n You can convert a field name to an id using the\n `ts_language_field_id_for_name` function."] pub fn ts_node_child_by_field_id(arg1: TSNode, arg2: TSFieldId) -> TSNode; } extern "C" { @@ -511,8 +381,7 @@ extern "C" { pub fn ts_node_first_named_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode; } extern "C" { - #[doc = " Get the smallest node within this node that spans the given range of bytes"] - #[doc = " or (row, column) positions."] + #[doc = " Get the smallest node within this node that spans the given range of bytes\n or (row, column) positions."] pub fn ts_node_descendant_for_byte_range(arg1: TSNode, arg2: u32, arg3: u32) -> TSNode; } extern "C" { @@ -520,8 +389,7 @@ extern "C" { -> TSNode; } extern "C" { - #[doc = " Get the smallest named node within this node that spans the given range of"] - #[doc = " bytes or (row, column) positions."] + #[doc = " Get the smallest named node within this node that spans the given range of\n bytes or (row, column) positions."] pub fn ts_node_named_descendant_for_byte_range(arg1: TSNode, arg2: u32, arg3: u32) -> TSNode; } extern "C" { @@ -532,13 +400,7 @@ extern "C" { ) -> TSNode; } extern "C" { - #[doc = " Edit the node to keep it in-sync with source code that has been edited."] - #[doc = ""] - #[doc = " This function is only rarely needed. When you edit a syntax tree with the"] - #[doc = " `ts_tree_edit` function, all of the nodes that you retrieve from the tree"] - #[doc = " afterward will already reflect the edit. You only need to use `ts_node_edit`"] - #[doc = " when you have a `TSNode` instance that you want to keep and continue to use"] - #[doc = " after an edit."] + #[doc = " Edit the node to keep it in-sync with source code that has been edited.\n\n This function is only rarely needed. When you edit a syntax tree with the\n `ts_tree_edit` function, all of the nodes that you retrieve from the tree\n afterward will already reflect the edit. You only need to use `ts_node_edit`\n when you have a `TSNode` instance that you want to keep and continue to use\n after an edit."] pub fn ts_node_edit(arg1: *mut TSNode, arg2: *const TSInputEdit); } extern "C" { @@ -546,11 +408,7 @@ extern "C" { pub fn ts_node_eq(arg1: TSNode, arg2: TSNode) -> bool; } extern "C" { - #[doc = " Create a new tree cursor starting from the given node."] - #[doc = ""] - #[doc = " A tree cursor allows you to walk a syntax tree more efficiently than is"] - #[doc = " possible using the `TSNode` functions. It is a mutable object that is always"] - #[doc = " on a certain syntax node, and can be moved imperatively to different nodes."] + #[doc = " Create a new tree cursor starting from the given node.\n\n A tree cursor allows you to walk a syntax tree more efficiently than is\n possible using the `TSNode` functions. It is a mutable object that is always\n on a certain syntax node, and can be moved imperatively to different nodes."] pub fn ts_tree_cursor_new(arg1: TSNode) -> TSTreeCursor; } extern "C" { @@ -566,48 +424,29 @@ extern "C" { pub fn ts_tree_cursor_current_node(arg1: *const TSTreeCursor) -> TSNode; } extern "C" { - #[doc = " Get the field name of the tree cursor's current node."] - #[doc = ""] - #[doc = " This returns `NULL` if the current node doesn't have a field."] - #[doc = " See also `ts_node_child_by_field_name`."] + #[doc = " Get the field name of the tree cursor's current node.\n\n This returns `NULL` if the current node doesn't have a field.\n See also `ts_node_child_by_field_name`."] pub fn ts_tree_cursor_current_field_name( arg1: *const TSTreeCursor, ) -> *const ::std::os::raw::c_char; } extern "C" { - #[doc = " Get the field id of the tree cursor's current node."] - #[doc = ""] - #[doc = " This returns zero if the current node doesn't have a field."] - #[doc = " See also `ts_node_child_by_field_id`, `ts_language_field_id_for_name`."] + #[doc = " Get the field id of the tree cursor's current node.\n\n This returns zero if the current node doesn't have a field.\n See also `ts_node_child_by_field_id`, `ts_language_field_id_for_name`."] pub fn ts_tree_cursor_current_field_id(arg1: *const TSTreeCursor) -> TSFieldId; } extern "C" { - #[doc = " Move the cursor to the parent of its current node."] - #[doc = ""] - #[doc = " This returns `true` if the cursor successfully moved, and returns `false`"] - #[doc = " if there was no parent node (the cursor was already on the root node)."] + #[doc = " Move the cursor to the parent of its current node.\n\n This returns `true` if the cursor successfully moved, and returns `false`\n if there was no parent node (the cursor was already on the root node)."] pub fn ts_tree_cursor_goto_parent(arg1: *mut TSTreeCursor) -> bool; } extern "C" { - #[doc = " Move the cursor to the next sibling of its current node."] - #[doc = ""] - #[doc = " This returns `true` if the cursor successfully moved, and returns `false`"] - #[doc = " if there was no next sibling node."] + #[doc = " Move the cursor to the next sibling of its current node.\n\n This returns `true` if the cursor successfully moved, and returns `false`\n if there was no next sibling node."] pub fn ts_tree_cursor_goto_next_sibling(arg1: *mut TSTreeCursor) -> bool; } extern "C" { - #[doc = " Move the cursor to the first child of its current node."] - #[doc = ""] - #[doc = " This returns `true` if the cursor successfully moved, and returns `false`"] - #[doc = " if there were no children."] + #[doc = " Move the cursor to the first child of its current node.\n\n This returns `true` if the cursor successfully moved, and returns `false`\n if there were no children."] pub fn ts_tree_cursor_goto_first_child(arg1: *mut TSTreeCursor) -> bool; } extern "C" { - #[doc = " Move the cursor to the first child of its current node that extends beyond"] - #[doc = " the given byte offset or point."] - #[doc = ""] - #[doc = " This returns the index of the child node if one was found, and returns -1"] - #[doc = " if no such child was found."] + #[doc = " Move the cursor to the first child of its current node that extends beyond\n the given byte offset or point.\n\n This returns the index of the child node if one was found, and returns -1\n if no such child was found."] pub fn ts_tree_cursor_goto_first_child_for_byte(arg1: *mut TSTreeCursor, arg2: u32) -> i64; } extern "C" { @@ -618,15 +457,7 @@ extern "C" { pub fn ts_tree_cursor_copy(arg1: *const TSTreeCursor) -> TSTreeCursor; } extern "C" { - #[doc = " Create a new query from a string containing one or more S-expression"] - #[doc = " patterns. The query is associated with a particular language, and can"] - #[doc = " only be run on syntax nodes parsed with that language."] - #[doc = ""] - #[doc = " If all of the given patterns are valid, this returns a `TSQuery`."] - #[doc = " If a pattern is invalid, this returns `NULL`, and provides two pieces"] - #[doc = " of information about the problem:"] - #[doc = " 1. The byte offset of the error is written to the `error_offset` parameter."] - #[doc = " 2. The type of error is written to the `error_type` parameter."] + #[doc = " Create a new query from a string containing one or more S-expression\n patterns. The query is associated with a particular language, and can\n only be run on syntax nodes parsed with that language.\n\n If all of the given patterns are valid, this returns a `TSQuery`.\n If a pattern is invalid, this returns `NULL`, and provides two pieces\n of information about the problem:\n 1. The byte offset of the error is written to the `error_offset` parameter.\n 2. The type of error is written to the `error_type` parameter."] pub fn ts_query_new( language: *const TSLanguage, source: *const ::std::os::raw::c_char, @@ -650,27 +481,11 @@ extern "C" { pub fn ts_query_string_count(arg1: *const TSQuery) -> u32; } extern "C" { - #[doc = " Get the byte offset where the given pattern starts in the query's source."] - #[doc = ""] - #[doc = " This can be useful when combining queries by concatenating their source"] - #[doc = " code strings."] + #[doc = " Get the byte offset where the given pattern starts in the query's source.\n\n This can be useful when combining queries by concatenating their source\n code strings."] pub fn ts_query_start_byte_for_pattern(arg1: *const TSQuery, arg2: u32) -> u32; } extern "C" { - #[doc = " Get all of the predicates for the given pattern in the query."] - #[doc = ""] - #[doc = " The predicates are represented as a single array of steps. There are three"] - #[doc = " types of steps in this array, which correspond to the three legal values for"] - #[doc = " the `type` field:"] - #[doc = " - `TSQueryPredicateStepTypeCapture` - Steps with this type represent names"] - #[doc = " of captures. Their `value_id` can be used with the"] - #[doc = " `ts_query_capture_name_for_id` function to obtain the name of the capture."] - #[doc = " - `TSQueryPredicateStepTypeString` - Steps with this type represent literal"] - #[doc = " strings. Their `value_id` can be used with the"] - #[doc = " `ts_query_string_value_for_id` function to obtain their string value."] - #[doc = " - `TSQueryPredicateStepTypeDone` - Steps with this type are *sentinels*"] - #[doc = " that represent the end of an individual predicate. If a pattern has two"] - #[doc = " predicates, then there will be two steps with this `type` in the array."] + #[doc = " Get all of the predicates for the given pattern in the query.\n\n The predicates are represented as a single array of steps. There are three\n types of steps in this array, which correspond to the three legal values for\n the `type` field:\n - `TSQueryPredicateStepTypeCapture` - Steps with this type represent names\n of captures. Their `value_id` can be used with the\n `ts_query_capture_name_for_id` function to obtain the name of the capture.\n - `TSQueryPredicateStepTypeString` - Steps with this type represent literal\n strings. Their `value_id` can be used with the\n `ts_query_string_value_for_id` function to obtain their string value.\n - `TSQueryPredicateStepTypeDone` - Steps with this type are *sentinels*\n that represent the end of an individual predicate. If a pattern has two\n predicates, then there will be two steps with this `type` in the array."] pub fn ts_query_predicates_for_pattern( self_: *const TSQuery, pattern_index: u32, @@ -678,18 +493,16 @@ extern "C" { ) -> *const TSQueryPredicateStep; } extern "C" { - pub fn ts_query_is_pattern_non_local(self_: *const TSQuery, pattern_index: u32) -> bool; + pub fn ts_query_is_pattern_rooted(self_: *const TSQuery, pattern_index: u32) -> bool; } extern "C" { - pub fn ts_query_is_pattern_rooted(self_: *const TSQuery, pattern_index: u32) -> bool; + pub fn ts_query_is_pattern_non_local(self_: *const TSQuery, pattern_index: u32) -> bool; } extern "C" { pub fn ts_query_is_pattern_guaranteed_at_step(self_: *const TSQuery, byte_offset: u32) -> bool; } extern "C" { - #[doc = " Get the name and length of one of the query's captures, or one of the"] - #[doc = " query's string literals. Each capture and string is associated with a"] - #[doc = " numeric id based on the order that it appeared in the query's source."] + #[doc = " Get the name and length of one of the query's captures, or one of the\n query's string literals. Each capture and string is associated with a\n numeric id based on the order that it appeared in the query's source."] pub fn ts_query_capture_name_for_id( arg1: *const TSQuery, id: u32, @@ -697,8 +510,7 @@ extern "C" { ) -> *const ::std::os::raw::c_char; } extern "C" { - #[doc = " Get the quantifier of the query's captures. Each capture is * associated"] - #[doc = " with a numeric id based on the order that it appeared in the query's source."] + #[doc = " Get the quantifier of the query's captures. Each capture is * associated\n with a numeric id based on the order that it appeared in the query's source."] pub fn ts_query_capture_quantifier_for_id( arg1: *const TSQuery, pattern_id: u32, @@ -713,11 +525,7 @@ extern "C" { ) -> *const ::std::os::raw::c_char; } extern "C" { - #[doc = " Disable a certain capture within a query."] - #[doc = ""] - #[doc = " This prevents the capture from being returned in matches, and also avoids"] - #[doc = " any resource usage associated with recording the capture. Currently, there"] - #[doc = " is no way to undo this."] + #[doc = " Disable a certain capture within a query.\n\n This prevents the capture from being returned in matches, and also avoids\n any resource usage associated with recording the capture. Currently, there\n is no way to undo this."] pub fn ts_query_disable_capture( arg1: *mut TSQuery, arg2: *const ::std::os::raw::c_char, @@ -725,33 +533,11 @@ extern "C" { ); } extern "C" { - #[doc = " Disable a certain pattern within a query."] - #[doc = ""] - #[doc = " This prevents the pattern from matching and removes most of the overhead"] - #[doc = " associated with the pattern. Currently, there is no way to undo this."] + #[doc = " Disable a certain pattern within a query.\n\n This prevents the pattern from matching and removes most of the overhead\n associated with the pattern. Currently, there is no way to undo this."] pub fn ts_query_disable_pattern(arg1: *mut TSQuery, arg2: u32); } extern "C" { - #[doc = " Create a new cursor for executing a given query."] - #[doc = ""] - #[doc = " The cursor stores the state that is needed to iteratively search"] - #[doc = " for matches. To use the query cursor, first call `ts_query_cursor_exec`"] - #[doc = " to start running a given query on a given syntax node. Then, there are"] - #[doc = " two options for consuming the results of the query:"] - #[doc = " 1. Repeatedly call `ts_query_cursor_next_match` to iterate over all of the"] - #[doc = " *matches* in the order that they were found. Each match contains the"] - #[doc = " index of the pattern that matched, and an array of captures. Because"] - #[doc = " multiple patterns can match the same set of nodes, one match may contain"] - #[doc = " captures that appear *before* some of the captures from a previous match."] - #[doc = " 2. Repeatedly call `ts_query_cursor_next_capture` to iterate over all of the"] - #[doc = " individual *captures* in the order that they appear. This is useful if"] - #[doc = " don't care about which pattern matched, and just want a single ordered"] - #[doc = " sequence of captures."] - #[doc = ""] - #[doc = " If you don't care about consuming all of the results, you can stop calling"] - #[doc = " `ts_query_cursor_next_match` or `ts_query_cursor_next_capture` at any point."] - #[doc = " You can then start executing another query on another node by calling"] - #[doc = " `ts_query_cursor_exec` again."] + #[doc = " Create a new cursor for executing a given query.\n\n The cursor stores the state that is needed to iteratively search\n for matches. To use the query cursor, first call `ts_query_cursor_exec`\n to start running a given query on a given syntax node. Then, there are\n two options for consuming the results of the query:\n 1. Repeatedly call `ts_query_cursor_next_match` to iterate over all of the\n *matches* in the order that they were found. Each match contains the\n index of the pattern that matched, and an array of captures. Because\n multiple patterns can match the same set of nodes, one match may contain\n captures that appear *before* some of the captures from a previous match.\n 2. Repeatedly call `ts_query_cursor_next_capture` to iterate over all of the\n individual *captures* in the order that they appear. This is useful if\n don't care about which pattern matched, and just want a single ordered\n sequence of captures.\n\n If you don't care about consuming all of the results, you can stop calling\n `ts_query_cursor_next_match` or `ts_query_cursor_next_capture` at any point.\n You can then start executing another query on another node by calling\n `ts_query_cursor_exec` again."] pub fn ts_query_cursor_new() -> *mut TSQueryCursor; } extern "C" { @@ -763,15 +549,7 @@ extern "C" { pub fn ts_query_cursor_exec(arg1: *mut TSQueryCursor, arg2: *const TSQuery, arg3: TSNode); } extern "C" { - #[doc = " Manage the maximum number of in-progress matches allowed by this query"] - #[doc = " cursor."] - #[doc = ""] - #[doc = " Query cursors have an optional maximum capacity for storing lists of"] - #[doc = " in-progress captures. If this capacity is exceeded, then the"] - #[doc = " earliest-starting match will silently be dropped to make room for further"] - #[doc = " matches. This maximum capacity is optional — by default, query cursors allow"] - #[doc = " any number of pending matches, dynamically allocating new space for them as"] - #[doc = " needed as the query is executed."] + #[doc = " Manage the maximum number of in-progress matches allowed by this query\n cursor.\n\n Query cursors have an optional maximum capacity for storing lists of\n in-progress captures. If this capacity is exceeded, then the\n earliest-starting match will silently be dropped to make room for further\n matches. This maximum capacity is optional — by default, query cursors allow\n any number of pending matches, dynamically allocating new space for them as\n needed as the query is executed."] pub fn ts_query_cursor_did_exceed_match_limit(arg1: *const TSQueryCursor) -> bool; } extern "C" { @@ -781,28 +559,21 @@ extern "C" { pub fn ts_query_cursor_set_match_limit(arg1: *mut TSQueryCursor, arg2: u32); } extern "C" { - #[doc = " Set the range of bytes or (row, column) positions in which the query"] - #[doc = " will be executed."] + #[doc = " Set the range of bytes or (row, column) positions in which the query\n will be executed."] pub fn ts_query_cursor_set_byte_range(arg1: *mut TSQueryCursor, arg2: u32, arg3: u32); } extern "C" { pub fn ts_query_cursor_set_point_range(arg1: *mut TSQueryCursor, arg2: TSPoint, arg3: TSPoint); } extern "C" { - #[doc = " Advance to the next match of the currently running query."] - #[doc = ""] - #[doc = " If there is a match, write it to `*match` and return `true`."] - #[doc = " Otherwise, return `false`."] + #[doc = " Advance to the next match of the currently running query.\n\n If there is a match, write it to `*match` and return `true`.\n Otherwise, return `false`."] pub fn ts_query_cursor_next_match(arg1: *mut TSQueryCursor, match_: *mut TSQueryMatch) -> bool; } extern "C" { pub fn ts_query_cursor_remove_match(arg1: *mut TSQueryCursor, id: u32); } extern "C" { - #[doc = " Advance to the next capture of the currently running query."] - #[doc = ""] - #[doc = " If there is a capture, write its match to `*match` and its index within"] - #[doc = " the matche's capture list to `*capture_index`. Otherwise, return `false`."] + #[doc = " Advance to the next capture of the currently running query.\n\n If there is a capture, write its match to `*match` and its index within\n the matche's capture list to `*capture_index`. Otherwise, return `false`."] pub fn ts_query_cursor_next_capture( arg1: *mut TSQueryCursor, match_: *mut TSQueryMatch, @@ -849,35 +620,15 @@ extern "C" { ) -> TSFieldId; } extern "C" { - #[doc = " Check whether the given node type id belongs to named nodes, anonymous nodes,"] - #[doc = " or a hidden nodes."] - #[doc = ""] - #[doc = " See also `ts_node_is_named`. Hidden nodes are never returned from the API."] + #[doc = " Check whether the given node type id belongs to named nodes, anonymous nodes,\n or a hidden nodes.\n\n See also `ts_node_is_named`. Hidden nodes are never returned from the API."] pub fn ts_language_symbol_type(arg1: *const TSLanguage, arg2: TSSymbol) -> TSSymbolType; } extern "C" { - #[doc = " Get the ABI version number for this language. This version number is used"] - #[doc = " to ensure that languages were generated by a compatible version of"] - #[doc = " Tree-sitter."] - #[doc = ""] - #[doc = " See also `ts_parser_set_language`."] + #[doc = " Get the ABI version number for this language. This version number is used\n to ensure that languages were generated by a compatible version of\n Tree-sitter.\n\n See also `ts_parser_set_language`."] pub fn ts_language_version(arg1: *const TSLanguage) -> u32; } extern "C" { - #[doc = " Set the allocation functions used by the library."] - #[doc = ""] - #[doc = " By default, Tree-sitter uses the standard libc allocation functions,"] - #[doc = " but aborts the process when an allocation fails. This function lets"] - #[doc = " you supply alternative allocation functions at runtime."] - #[doc = ""] - #[doc = " If you pass `NULL` for any parameter, Tree-sitter will switch back to"] - #[doc = " its default implementation of that function."] - #[doc = ""] - #[doc = " If you call this function after the library has already been used, then"] - #[doc = " you must ensure that either:"] - #[doc = " 1. All the existing objects have been freed."] - #[doc = " 2. The new allocator shares its state with the old one, so it is capable"] - #[doc = " of freeing memory that was allocated by the old allocator."] + #[doc = " Set the allocation functions used by the library.\n\n By default, Tree-sitter uses the standard libc allocation functions,\n but aborts the process when an allocation fails. This function lets\n you supply alternative allocation functions at runtime.\n\n If you pass `NULL` for any parameter, Tree-sitter will switch back to\n its default implementation of that function.\n\n If you call this function after the library has already been used, then\n you must ensure that either:\n 1. All the existing objects have been freed.\n 2. The new allocator shares its state with the old one, so it is capable\n of freeing memory that was allocated by the old allocator."] pub fn ts_set_allocator( new_malloc: ::std::option::Option< unsafe extern "C" fn(arg1: usize) -> *mut ::std::os::raw::c_void, diff --git a/script/generate-bindings b/script/generate-bindings index 54abac06..19975d37 100755 --- a/script/generate-bindings +++ b/script/generate-bindings @@ -5,8 +5,8 @@ header_path='lib/include/tree_sitter/api.h' bindgen \ --no-layout-tests \ - --whitelist-type '^TS.*' \ - --whitelist-function '^ts_.*' \ + --allowlist-type '^TS.*' \ + --allowlist-function '^ts_.*' \ --blocklist-type '^__.*' \ --size_t-is-usize \ $header_path > $output_path From 0376533c04e07784b69590daab5684ac51730c11 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Wed, 5 Apr 2023 20:41:47 +0300 Subject: [PATCH 077/347] cicd: add a test job with enabled UBSAN --- .github/workflows/build.yml | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 27b31085..cf3628a1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -169,3 +169,34 @@ jobs: lib/binding_web/tree-sitter.wasm if-no-files-found: error retention-days: 7 + + check_undefined_behaviour: + name: Undefined behaviour checks + runs-on: ubuntu-latest + env: + TREE_SITTER: ${{ github.workspace }}/target/release/tree-sitter + steps: + - name: Checkout source code + uses: actions/checkout@v3 + + - name: Install UBSAN library + run: sudo apt-get install -y libubsan1 + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + + - name: Build CLI + run: cargo build --release + + - name: Fetch fixtures + run: script/fetch-fixtures + + - name: Generate fixtures + run: script/generate-fixtures + + - name: Run main tests with undefined behaviour sanitizer (UBSAN) + env: + UBSAN_OPTIONS: halt_on_error=1 + CFLAGS: -fsanitize=undefined + RUSTFLAGS: -lubsan + run: cargo test -- --test-threads 1 From 4c2a36302bec7495626250e3dd018252ec4309da Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Thu, 6 Apr 2023 03:59:55 +0300 Subject: [PATCH 078/347] lib: fix OOB in query engine reported in #2162 --- lib/src/query.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/src/query.c b/lib/src/query.c index 18bd7fd3..da7a4166 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -938,6 +938,9 @@ static inline int analysis_state__compare( } static inline AnalysisStateEntry *analysis_state__top(AnalysisState *self) { + if (self->depth == 0) { + return &self->stack[0]; + } return &self->stack[self->depth - 1]; } From 0d326824d25912ab0fb558e54980554111f71e64 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Thu, 6 Apr 2023 01:49:50 +0300 Subject: [PATCH 079/347] test: add a reproducing test for #2162 --- cli/src/tests/github_issue_test.rs | 16 ++++++++++++++++ cli/src/tests/mod.rs | 1 + 2 files changed, 17 insertions(+) create mode 100644 cli/src/tests/github_issue_test.rs diff --git a/cli/src/tests/github_issue_test.rs b/cli/src/tests/github_issue_test.rs new file mode 100644 index 00000000..42fe3e9a --- /dev/null +++ b/cli/src/tests/github_issue_test.rs @@ -0,0 +1,16 @@ +// Tests in this mod need be executed with enabled UBSAN library: +// ``` +// UBSAN_OPTIONS="halt_on_error=1" \ +// CFLAGS="-fsanitize=undefined" \ +// RUSTFLAGS="-lubsan" \ +// cargo test --target $(rustc -vV | sed -nr 's/^host: //p') -- --test-threads 1 +// ``` + +use crate::tests::helpers::fixtures::get_language; +use tree_sitter::Query; + +#[test] +fn issue_2162_out_of_bound() { + let language = get_language("java"); + assert!(Query::new(language, "(package_declaration _ (_) @name _)").is_ok()); +} diff --git a/cli/src/tests/mod.rs b/cli/src/tests/mod.rs index 1b804450..03a588b9 100644 --- a/cli/src/tests/mod.rs +++ b/cli/src/tests/mod.rs @@ -1,4 +1,5 @@ mod corpus_test; +mod github_issue_test; mod helpers; mod highlight_test; mod node_test; From 52f22a65eb7153aca6563372425c4c79a84242b9 Mon Sep 17 00:00:00 2001 From: Christian Clason Date: Thu, 6 Apr 2023 14:24:19 +0200 Subject: [PATCH 080/347] cicd: build CLI for macos-arm64 --- .github/workflows/build.yml | 12 +++++++++--- cli/npm/install.js | 8 ++------ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index cf3628a1..d4a68a06 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -6,6 +6,7 @@ env: CROSS_DEBUG: 1 on: + workflow_dispatch: workflow_call: inputs: ref: @@ -27,6 +28,7 @@ jobs: - { name: windows-x64 , target: x86_64-pc-windows-msvc , os: windows-latest } - { name: windows-x86 , target: i686-pc-windows-msvc , os: windows-latest } - { name: macos-x64 , target: x86_64-apple-darwin , os: macos-latest } + - { name: macos-arm64 , target: aarch64-apple-darwin , os: macos-latest } env: BUILD_CMD: cargo @@ -125,30 +127,34 @@ jobs: run: make.sh CFLAGS="-Werror" -j - name: Build wasm library + if: ${{ !matrix.job.use-cross && matrix.job.name != 'macos-arm64' }} # Not used run: script/build-wasm - name: Build CLI run: $BUILD_CMD build --release --target=${{ matrix.job.target }} - name: Fetch fixtures + if: ${{ matrix.job.name != 'macos-arm64' }} # Not used run: script/fetch-fixtures - name: Generate fixtures + if: ${{ matrix.job.name != 'macos-arm64' }} # Can't run CLI on host run: script/generate-fixtures - name: Generate WASM fixtures - if: "!matrix.job.use-cross" + if: ${{ !matrix.job.use-cross && matrix.job.name != 'macos-arm64' }} # Not used run: script/generate-fixtures-wasm - name: Run main tests + if: ${{ matrix.job.name != 'macos-arm64' }} # Can't run CLI on host run: $BUILD_CMD test --target=${{ matrix.job.target }} - name: Run wasm tests - if: "!matrix.job.use-cross" # TODO: Install Emscripten into custom cross images + if: ${{ !matrix.job.use-cross && matrix.job.name != 'macos-arm64' }} # Not used run: script/test-wasm - name: Run benchmarks - if: "!matrix.job.use-cross" # It doesn't make sense to benchmark something in an emulator + if: ${{ !matrix.job.use-cross && matrix.job.name != 'macos-arm64' }} # Cross-compiled benchmarks make no sense run: $BUILD_CMD bench benchmark -p tree-sitter-cli --target=${{ matrix.job.target }} - name: Upload CLI artifact diff --git a/cli/npm/install.js b/cli/npm/install.js index 2790b47d..9e572c3a 100755 --- a/cli/npm/install.js +++ b/cli/npm/install.js @@ -16,14 +16,10 @@ const platformName = { let archName = { 'x64': 'x64', 'x86': 'x86', - 'ia32': 'x86' + 'ia32': 'x86', + 'arm64': 'arm64' }[process.arch]; -// ARM macs can run x64 binaries via Rosetta. Rely on that for now. -if (platformName === 'macos' && process.arch === 'arm64') { - archName = 'x64'; -} - if (!platformName || !archName) { console.error( `Cannot install tree-sitter-cli for platform ${process.platform}, architecture ${process.arch}` From 71f32a21664502ea08eb2e406c08680650dddba9 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Fri, 7 Apr 2023 10:22:51 +0300 Subject: [PATCH 081/347] cicd: additional tweaks --- .github/workflows/build.yml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d4a68a06..61543552 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -6,7 +6,6 @@ env: CROSS_DEBUG: 1 on: - workflow_dispatch: workflow_call: inputs: ref: @@ -27,8 +26,8 @@ jobs: - { name: linux-x86 , target: i686-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } - { name: windows-x64 , target: x86_64-pc-windows-msvc , os: windows-latest } - { name: windows-x86 , target: i686-pc-windows-msvc , os: windows-latest } - - { name: macos-x64 , target: x86_64-apple-darwin , os: macos-latest } - { name: macos-arm64 , target: aarch64-apple-darwin , os: macos-latest } + - { name: macos-x64 , target: x86_64-apple-darwin , os: macos-latest } env: BUILD_CMD: cargo @@ -123,7 +122,7 @@ jobs: esac - name: Build C library - if: "!contains(matrix.job.os, 'windows')" # Requires an additional adapted Makefile for `cl.exe` compiler + if: ${{ !contains(matrix.job.os, 'windows') }} # Requires an additional adapted Makefile for `cl.exe` compiler run: make.sh CFLAGS="-Werror" -j - name: Build wasm library @@ -138,7 +137,7 @@ jobs: run: script/fetch-fixtures - name: Generate fixtures - if: ${{ matrix.job.name != 'macos-arm64' }} # Can't run CLI on host + if: ${{ matrix.job.name != 'macos-arm64' }} # Can't natively run CLI on runner's host run: script/generate-fixtures - name: Generate WASM fixtures @@ -146,7 +145,7 @@ jobs: run: script/generate-fixtures-wasm - name: Run main tests - if: ${{ matrix.job.name != 'macos-arm64' }} # Can't run CLI on host + if: ${{ matrix.job.name != 'macos-arm64' }} # Can't natively run CLI on runner's host run: $BUILD_CMD test --target=${{ matrix.job.target }} - name: Run wasm tests From 10178ade356f33f7603a217fe39680ae6c1c08ad Mon Sep 17 00:00:00 2001 From: Joel Spadin Date: Fri, 7 Apr 2023 12:57:50 -0500 Subject: [PATCH 082/347] fix: Use / paths when building WASM Changed the build-wasm command to always use forward slashes in paths, since using Windows style paths breaks if the build is run with Docker. Fixes #532 --- Cargo.lock | 7 +++++++ cli/Cargo.toml | 1 + cli/src/wasm.rs | 15 ++++++++++----- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 52013e49..7c3fa7eb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -440,6 +440,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "path-slash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e91099d4268b0e11973f036e885d652fb0b21fedcf69738c627f94db6a44f42" + [[package]] name = "percent-encoding" version = "2.2.0" @@ -774,6 +780,7 @@ dependencies = [ "indexmap", "lazy_static", "log", + "path-slash", "pretty_assertions", "rand", "regex", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 48473095..ee6d52af 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -30,6 +30,7 @@ glob = "0.3.0" html-escape = "0.2.6" indexmap = "1" lazy_static = "1.2.0" +path-slash = "0.2.1" regex = "1" regex-syntax = "0.6.4" rustc-hash = "1" diff --git a/cli/src/wasm.rs b/cli/src/wasm.rs index 467fef71..35f09b55 100644 --- a/cli/src/wasm.rs +++ b/cli/src/wasm.rs @@ -1,5 +1,6 @@ use super::generate::parse_grammar::GrammarJSON; use anyhow::{anyhow, Context, Result}; +use path_slash::PathExt as _; use std::ffi::{OsStr, OsString}; use std::fs; use std::path::Path; @@ -41,7 +42,7 @@ pub fn compile_language_to_wasm(language_dir: &Path, force_docker: bool) -> Resu volume_string = OsString::from(parent); volume_string.push(":/src:Z"); command.arg("--workdir"); - command.arg(&Path::new("/src").join(filename)); + command.arg(Path::new("/src").join(filename).to_slash_lossy().as_ref()); } else { volume_string = OsString::from(language_dir); volume_string.push(":/src:Z"); @@ -103,14 +104,18 @@ pub fn compile_language_to_wasm(language_dir: &Path, force_docker: bool) -> Resu let scanner_cpp_path = src.join("scanner.cpp"); if language_dir.join(&scanner_cc_path).exists() { - command.arg("-xc++").arg(&scanner_cc_path); + command + .arg("-xc++") + .arg(scanner_cc_path.to_slash_lossy().as_ref()); } else if language_dir.join(&scanner_cpp_path).exists() { - command.arg("-xc++").arg(&scanner_cpp_path); + command + .arg("-xc++") + .arg(scanner_cpp_path.to_slash_lossy().as_ref()); } else if language_dir.join(&scanner_c_path).exists() { - command.arg(&scanner_c_path); + command.arg(scanner_c_path.to_slash_lossy().as_ref()); } - command.arg(&parser_c_path); + command.arg(parser_c_path.to_slash_lossy().as_ref()); let output = command .output() From bb122d6d4788205f72ad3acaa95157dc63d15f4b Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Sat, 8 Apr 2023 01:47:57 +0300 Subject: [PATCH 083/347] cicd: separate sanitize workflow + manual trigger for build workflow --- .github/workflows/CICD.yml | 7 +++++- .github/workflows/build.yml | 32 +-------------------------- .github/workflows/sanitize.yml | 40 ++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 32 deletions(-) create mode 100644 .github/workflows/sanitize.yml diff --git a/.github/workflows/CICD.yml b/.github/workflows/CICD.yml index 7c2351a8..e675781f 100644 --- a/.github/workflows/CICD.yml +++ b/.github/workflows/CICD.yml @@ -46,6 +46,11 @@ jobs: with: package: tree-sitter-cli + sanitize: + name: Sanitize + needs: [init, fast_checks] + uses: ./.github/workflows/sanitize.yml + build: name: Build & Test needs: [init, fast_checks] @@ -55,7 +60,7 @@ jobs: release: name: Release - needs: [init, fast_checks, full_checks, min_version, build] + needs: [init, fast_checks, full_checks, min_version, build, sanitize] if: > github.event.pull_request.head.repo.full_name == github.repository && startsWith(github.head_ref, 'release/v') diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 61543552..6cfa0d48 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -6,6 +6,7 @@ env: CROSS_DEBUG: 1 on: + workflow_dispatch: workflow_call: inputs: ref: @@ -174,34 +175,3 @@ jobs: lib/binding_web/tree-sitter.wasm if-no-files-found: error retention-days: 7 - - check_undefined_behaviour: - name: Undefined behaviour checks - runs-on: ubuntu-latest - env: - TREE_SITTER: ${{ github.workspace }}/target/release/tree-sitter - steps: - - name: Checkout source code - uses: actions/checkout@v3 - - - name: Install UBSAN library - run: sudo apt-get install -y libubsan1 - - - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@stable - - - name: Build CLI - run: cargo build --release - - - name: Fetch fixtures - run: script/fetch-fixtures - - - name: Generate fixtures - run: script/generate-fixtures - - - name: Run main tests with undefined behaviour sanitizer (UBSAN) - env: - UBSAN_OPTIONS: halt_on_error=1 - CFLAGS: -fsanitize=undefined - RUSTFLAGS: -lubsan - run: cargo test -- --test-threads 1 diff --git a/.github/workflows/sanitize.yml b/.github/workflows/sanitize.yml new file mode 100644 index 00000000..ebfb477c --- /dev/null +++ b/.github/workflows/sanitize.yml @@ -0,0 +1,40 @@ +name: Sunitize + +env: + CARGO_TERM_COLOR: always + RUSTFLAGS: "-D warnings" + +on: + workflow_call: + +jobs: + check_undefined_behaviour: + name: Undefined behaviour checks + runs-on: ubuntu-latest + env: + TREE_SITTER: ${{ github.workspace }}/target/release/tree-sitter + steps: + - name: Checkout source code + uses: actions/checkout@v3 + + - name: Install UBSAN library + run: sudo apt-get install -y libubsan1 + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + + - name: Build CLI + run: cargo build --release + + - name: Fetch fixtures + run: script/fetch-fixtures + + - name: Generate fixtures + run: script/generate-fixtures + + - name: Run main tests with undefined behaviour sanitizer (UBSAN) + env: + UBSAN_OPTIONS: halt_on_error=1 + CFLAGS: -fsanitize=undefined + RUSTFLAGS: -lubsan + run: cargo test -- --test-threads 1 From 96086806d56404e65e82fc17a144506e98d30e43 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Sat, 8 Apr 2023 05:14:12 +0300 Subject: [PATCH 084/347] cicd: change author configuring for a tag --- .github/workflows/release.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 87a06761..979d95a3 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -78,8 +78,8 @@ jobs: tag: ${{ steps.tag_name.outputs.tag }} message: "Release ${{ steps.tag_name.outputs.tag }}" run: | - git config user.name "${GITHUB_ACTOR}" - git config user.email "${GITHUB_ACTOR}@users.noreply.github.com" + git config user.name "$(git log -1 --pretty='%cn')" + git config user.email "$(git log -1 --pretty='%ce')" git tag -a "$tag" HEAD -m "$message" git push origin "$tag" From f03f024ec45c092f4a314565d65f641b5f13741c Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Sat, 8 Apr 2023 07:35:34 +0300 Subject: [PATCH 085/347] cicd: change approach for matrix shaping with an anchor name plus extras --- .github/workflows/build.yml | 112 ++++++++++++++++++++---------------- 1 file changed, 61 insertions(+), 51 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6cfa0d48..2df9bc27 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -15,20 +15,39 @@ on: jobs: build: - name: ${{ matrix.job.name }} (${{ matrix.job.target }}) (${{ matrix.job.os }}) - runs-on: ${{ matrix.job.os }} + name: ${{ matrix.name }} (${{ matrix.target }}) (${{ matrix.os }}) + runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: - job: - - { name: linux-aarch64 , target: aarch64-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } - - { name: linux-arm , target: arm-unknown-linux-gnueabihf , os: ubuntu-latest , use-cross: true } - - { name: linux-x64 , target: x86_64-unknown-linux-gnu , os: ubuntu-latest } - - { name: linux-x86 , target: i686-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } - - { name: windows-x64 , target: x86_64-pc-windows-msvc , os: windows-latest } - - { name: windows-x86 , target: i686-pc-windows-msvc , os: windows-latest } - - { name: macos-arm64 , target: aarch64-apple-darwin , os: macos-latest } - - { name: macos-x64 , target: x86_64-apple-darwin , os: macos-latest } + name: + - linux-aarch64 # + - linux-arm # + - linux-x64 # + - linux-x86 # + - windows-x64 # <-- No C library build - requires an additional adapted Makefile for `cl.exe` compiler + - windows-x86 # -- // -- + - macos-arm64 # <-- MacOS M1/M2 - no tests, only CLI build to be published on release artifacts + - macos-x64 # + + include: + - { name: linux-aarch64 , target: aarch64-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } + - { name: linux-arm , target: arm-unknown-linux-gnueabihf , os: ubuntu-latest , use-cross: true } + - { name: linux-x64 , target: x86_64-unknown-linux-gnu , os: ubuntu-latest } + - { name: linux-x86 , target: i686-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } + - { name: windows-x64 , target: x86_64-pc-windows-msvc , os: windows-latest } + - { name: windows-x86 , target: i686-pc-windows-msvc , os: windows-latest } + - { name: macos-arm64 , target: aarch64-apple-darwin , os: macos-latest } + - { name: macos-x64 , target: x86_64-apple-darwin , os: macos-latest } + + # Cross compilers for C library + - { name: linux-aarch64 , cc: aarch64-linux-gnu-gcc , ar: aarch64-linux-gnu-ar } + - { name: linux-arm , cc: arm-unknown-linux-gnueabihf-gcc , ar: arm-unknown-linux-gnueabihf-gcc-ar } + - { name: linux-x86 , cc: i686-linux-gnu-gcc , ar: i686-linux-gnu-ar } + + # See #2041 tree-sitter issue + - { name: windows-x64 , rust_test_threads: 1 } + - { name: windows-x86 , rust_test_threads: 1 } env: BUILD_CMD: cargo @@ -55,20 +74,20 @@ jobs: - name: Install Rust toolchain uses: dtolnay/rust-toolchain@stable with: - targets: ${{ matrix.job.target }} + targets: ${{ matrix.target }} - name: Install cross - if: matrix.job.use-cross + if: matrix.use-cross uses: taiki-e/install-action@v2 with: tool: cross - name: Build custom cross image - if: ${{ matrix.job.use-cross && matrix.job.os == 'ubuntu-latest' }} + if: ${{ matrix.use-cross && matrix.os == 'ubuntu-latest' }} run: | cd .. - target="${{ matrix.job.target }}" + target="${{ matrix.target }}" image=ghcr.io/cross-rs/$target:custom echo "CROSS_IMAGE=$image" >> $GITHUB_ENV @@ -85,88 +104,79 @@ jobs: cd - - - name: Setup extra env + - name: Setup env extras + env: + RUST_TEST_THREADS: ${{ matrix.rust_test_threads }} + USE_CROSS: ${{ matrix.use-cross }} + CC: ${{ matrix.cc }} + AR: ${{ matrix.ar }} run: | PATH="$PWD/.github/scripts:$PATH" echo "PATH=$PATH" >> $GITHUB_ENV echo "ROOT=$PWD" >> $GITHUB_ENV echo "TREE_SITTER=tree-sitter.sh" >> $GITHUB_ENV - export TARGET=${{ matrix.job.target }} + export TARGET=${{ matrix.target }} echo "TARGET=$TARGET" >> $GITHUB_ENV - USE_CROSS="${{ matrix.job.use-cross }}" + [ -n "$RUST_TEST_THREADS" ] && \ + echo "RUST_TEST_THREADS=$RUST_TEST_THREADS" >> $GITHUB_ENV + + [ -n "$CC" ] && echo "CC=$CC" >> $GITHUB_ENV + [ -n "$AR" ] && echo "AR=$AR" >> $GITHUB_ENV if [ "$USE_CROSS" == "true" ]; then echo "BUILD_CMD=cross" >> $GITHUB_ENV - - export CROSS=1; echo "CROSS=$CROSS" >> $GITHUB_ENV - + export CROSS=1; echo "CROSS=1" >> $GITHUB_ENV runner=$(cross.sh bash -c "env | sed -nr '/^CARGO_TARGET_.*_RUNNER=/s///p'") [ -n "$runner" ] && echo "CROSS_RUNNER=$runner" >> $GITHUB_ENV - echo "runner: $runner" - - case "$TARGET" in - i686-unknown-linux-gnu) CC=i686-linux-gnu-gcc AR=i686-linux-gnu-ar ;; - aarch64-unknown-linux-gnu) CC=aarch64-linux-gnu-gcc AR=aarch64-linux-gnu-ar ;; - arm-unknown-linux-gnueabihf) CC=arm-unknown-linux-gnueabihf-gcc AR=arm-unknown-linux-gnueabihf-gcc-ar ;; - esac - - [ -n "$CC" ] && echo "CC=$CC" >> $GITHUB_ENV - [ -n "$AR" ] && echo "AR=$AR" >> $GITHUB_ENV fi - case "$TARGET" in - *-windows-*) - echo "RUST_TEST_THREADS=1" >> $GITHUB_ENV # See #2041 tree-sitter issue - ;; - esac - - name: Build C library - if: ${{ !contains(matrix.job.os, 'windows') }} # Requires an additional adapted Makefile for `cl.exe` compiler + if: ${{ !contains(matrix.os, 'windows') }} # Requires an additional adapted Makefile for `cl.exe` compiler run: make.sh CFLAGS="-Werror" -j - name: Build wasm library - if: ${{ !matrix.job.use-cross && matrix.job.name != 'macos-arm64' }} # Not used + if: ${{ !matrix.use-cross && matrix.name != 'macos-arm64' }} # Not used run: script/build-wasm - name: Build CLI - run: $BUILD_CMD build --release --target=${{ matrix.job.target }} + run: $BUILD_CMD build --release --target=${{ matrix.target }} - name: Fetch fixtures - if: ${{ matrix.job.name != 'macos-arm64' }} # Not used + if: ${{ matrix.name != 'macos-arm64' }} # Not used run: script/fetch-fixtures - name: Generate fixtures - if: ${{ matrix.job.name != 'macos-arm64' }} # Can't natively run CLI on runner's host + if: ${{ matrix.name != 'macos-arm64' }} # Can't natively run CLI on runner's host run: script/generate-fixtures - name: Generate WASM fixtures - if: ${{ !matrix.job.use-cross && matrix.job.name != 'macos-arm64' }} # Not used + if: ${{ !matrix.use-cross && matrix.name != 'macos-arm64' }} # Not used run: script/generate-fixtures-wasm - name: Run main tests - if: ${{ matrix.job.name != 'macos-arm64' }} # Can't natively run CLI on runner's host - run: $BUILD_CMD test --target=${{ matrix.job.target }} + if: ${{ matrix.name != 'macos-arm64' }} # Can't natively run CLI on runner's host + run: $BUILD_CMD test --target=${{ matrix.target }} - name: Run wasm tests - if: ${{ !matrix.job.use-cross && matrix.job.name != 'macos-arm64' }} # Not used + if: ${{ !matrix.use-cross && matrix.name != 'macos-arm64' }} # Not used run: script/test-wasm - name: Run benchmarks - if: ${{ !matrix.job.use-cross && matrix.job.name != 'macos-arm64' }} # Cross-compiled benchmarks make no sense - run: $BUILD_CMD bench benchmark -p tree-sitter-cli --target=${{ matrix.job.target }} + if: ${{ !matrix.use-cross && matrix.name != 'macos-arm64' }} # Cross-compiled benchmarks make no sense + run: $BUILD_CMD bench benchmark -p tree-sitter-cli --target=${{ matrix.target }} - name: Upload CLI artifact uses: actions/upload-artifact@v3 with: - name: tree-sitter.${{ matrix.job.name }} - path: target/${{ matrix.job.target }}/release/tree-sitter${{ contains(matrix.job.target, 'windows') && '.exe' || '' }} + name: tree-sitter.${{ matrix.name }} + path: target/${{ matrix.target }}/release/tree-sitter${{ contains(matrix.target, 'windows') && '.exe' || '' }} if-no-files-found: error retention-days: 7 - name: Upload WASM artifacts - if: ${{ matrix.job.name == 'linux-x64' }} + if: ${{ matrix.name == 'linux-x64' }} uses: actions/upload-artifact@v3 with: name: tree-sitter.wasm From d8caf6f8c530bef60f5ee419e0f4e96cb8452c63 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Sat, 8 Apr 2023 08:42:17 +0300 Subject: [PATCH 086/347] chore(cicd): change formatting --- .github/workflows/build.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2df9bc27..b5c7ab60 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -89,15 +89,15 @@ jobs: target="${{ matrix.target }}" image=ghcr.io/cross-rs/$target:custom - echo "CROSS_IMAGE=$image" >> $GITHUB_ENV + echo "CROSS_IMAGE=$image" >> $GITHUB_ENV - echo "[target.$target]" >> Cross.toml - echo "image = \"$image\"" >> Cross.toml - echo "CROSS_CONFIG=$PWD/Cross.toml" >> $GITHUB_ENV + echo "[target.$target]" >> Cross.toml + echo "image = \"$image\"" >> Cross.toml + echo "CROSS_CONFIG=$PWD/Cross.toml" >> $GITHUB_ENV - echo "FROM ghcr.io/cross-rs/$target:edge" >> Dockerfile - echo "ENV DEBIAN_FRONTEND=noninteractive" >> Dockerfile - echo "RUN apt-get update && apt-get install -y nodejs" >> Dockerfile + echo "FROM ghcr.io/cross-rs/$target:edge" >> Dockerfile + echo "ENV DEBIAN_FRONTEND=noninteractive" >> Dockerfile + echo "RUN apt-get update && apt-get install -y nodejs" >> Dockerfile docker build -t $image . docker images docker run --rm $image env From 34e0ab696ce156843a76ab7f52b83ad2007dfe75 Mon Sep 17 00:00:00 2001 From: Carlo Teubner <435950+c4rlo@users.noreply.github.com> Date: Sat, 8 Apr 2023 18:10:44 +0100 Subject: [PATCH 087/347] docs: remove mention of Atom Atom is dead, so this mention seems unnecessary now. --- docs/section-4-syntax-highlighting.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/section-4-syntax-highlighting.md b/docs/section-4-syntax-highlighting.md index a6e5d74c..0cf7890f 100644 --- a/docs/section-4-syntax-highlighting.md +++ b/docs/section-4-syntax-highlighting.md @@ -9,8 +9,6 @@ Syntax highlighting is a very common feature in applications that deal with code This document explains how the Tree-sitter syntax highlighting system works, using the command line interface. If you are using `tree-sitter-highlight` library (either from C or from Rust), all of these concepts are still applicable, but the configuration data is provided using in-memory objects, rather than files. -**Note - If you are working on syntax highlighting in the [Atom](https://atom.io/) text editor, you should consult [the grammar-creation page](https://flight-manual.atom.io/hacking-atom/sections/creating-a-grammar/) of the Atom Flight Manual, *not* this document. Atom currently uses a different syntax highlighting system that is also based on Tree-sitter, but is older than the one described here.** - ## Overview All of the files needed to highlight a given language are normally included in the same git repository as the Tree-sitter grammar for that language (for example, [`tree-sitter-javascript`](https://github.com/tree-sitter/tree-sitter-javascript), [`tree-sitter-ruby`](https://github.com/tree-sitter/tree-sitter-ruby)). In order to run syntax highlighting from the command-line, three types of files are needed: From e3ea048db1cfd184594c92e5cd9869462e164ea2 Mon Sep 17 00:00:00 2001 From: Kait Lam Date: Tue, 21 Mar 2023 15:36:09 +1000 Subject: [PATCH 088/347] docs: alphabetise language bindings on front page This helps with readability when scanning for a particular language. It is, strangely, almost sorted with the exception of Java and Kotlin. --- docs/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/index.md b/docs/index.md index 8c3f9e52..9fb1fd2a 100644 --- a/docs/index.md +++ b/docs/index.md @@ -17,8 +17,10 @@ There are currently bindings that allow Tree-sitter to be used from the followin * [Go](https://github.com/smacker/go-tree-sitter) * [Haskell](https://github.com/tree-sitter/haskell-tree-sitter) +* [Java](https://github.com/serenadeai/java-tree-sitter) * [JavaScript (Node.js)](https://github.com/tree-sitter/node-tree-sitter) * [JavaScript (Wasm)](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web) +* [Kotlin](https://github.com/oxisto/kotlintree) * [Lua](https://github.com/euclidianAce/ltreesitter) * [OCaml](https://github.com/returntocorp/ocaml-tree-sitter-core) * [Perl](https://metacpan.org/pod/Text::Treesitter) @@ -27,8 +29,6 @@ There are currently bindings that allow Tree-sitter to be used from the followin * [Ruby](https://github.com/calicoday/ruby-tree-sitter-ffi) * [Rust](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust) * [Swift](https://github.com/ChimeHQ/SwiftTreeSitter) -* [Kotlin](https://github.com/oxisto/kotlintree) -* [Java](https://github.com/serenadeai/java-tree-sitter) By convention, bindings are named with the language first, eg. ruby-tree-sitter. From ae738c3c0f02557960c9c45b3bbac6f8ccb292f3 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Tue, 24 Jan 2023 14:25:29 +0200 Subject: [PATCH 089/347] fix(dsl): add support for rule refs to externals --- cli/npm/dsl.d.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/npm/dsl.d.ts b/cli/npm/dsl.d.ts index fdf43b55..316400f8 100644 --- a/cli/npm/dsl.d.ts +++ b/cli/npm/dsl.d.ts @@ -102,7 +102,7 @@ interface Grammar< externals?: ( $: Record>, previous: Rule[], - ) => SymbolRule[]; + ) => (SymbolRule | RegExp | string)[]; /** * An array of tokens that may appear anywhere in the language. This From 8c6d157ca571460bef2220512e9721e65261a12f Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Sat, 11 Mar 2023 11:08:38 +0200 Subject: [PATCH 090/347] fix(dsl): fix formatting --- cli/npm/dsl.d.ts | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/cli/npm/dsl.d.ts b/cli/npm/dsl.d.ts index 316400f8..78b654f3 100644 --- a/cli/npm/dsl.d.ts +++ b/cli/npm/dsl.d.ts @@ -1,19 +1,19 @@ -type AliasRule = {type: 'ALIAS'; named: boolean; content: Rule; value: string}; -type BlankRule = {type: 'BLANK'}; -type ChoiceRule = {type: 'CHOICE'; members: Rule[]}; -type FieldRule = {type: 'FIELD'; name: string; content: Rule}; -type ImmediateTokenRule = {type: 'IMMEDIATE_TOKEN'; content: Rule}; -type PatternRule = {type: 'PATTERN'; value: string}; -type PrecDynamicRule = {type: 'PREC_DYNAMIC'; content: Rule; value: number}; -type PrecLeftRule = {type: 'PREC_LEFT'; content: Rule; value: number}; -type PrecRightRule = {type: 'PREC_RIGHT'; content: Rule; value: number}; -type PrecRule = {type: 'PREC'; content: Rule; value: number}; -type Repeat1Rule = {type: 'REPEAT1'; content: Rule}; -type RepeatRule = {type: 'REPEAT'; content: Rule}; -type SeqRule = {type: 'SEQ'; members: Rule[]}; -type StringRule = {type: 'STRING'; value: string}; -type SymbolRule = {type: 'SYMBOL'; name: Name}; -type TokenRule = {type: 'TOKEN'; content: Rule}; +type AliasRule = { type: 'ALIAS'; named: boolean; content: Rule; value: string }; +type BlankRule = { type: 'BLANK' }; +type ChoiceRule = { type: 'CHOICE'; members: Rule[] }; +type FieldRule = { type: 'FIELD'; name: string; content: Rule }; +type ImmediateTokenRule = { type: 'IMMEDIATE_TOKEN'; content: Rule }; +type PatternRule = { type: 'PATTERN'; value: string }; +type PrecDynamicRule = { type: 'PREC_DYNAMIC'; content: Rule; value: number }; +type PrecLeftRule = { type: 'PREC_LEFT'; content: Rule; value: number }; +type PrecRightRule = { type: 'PREC_RIGHT'; content: Rule; value: number }; +type PrecRule = { type: 'PREC'; content: Rule; value: number }; +type Repeat1Rule = { type: 'REPEAT1'; content: Rule }; +type RepeatRule = { type: 'REPEAT'; content: Rule }; +type SeqRule = { type: 'SEQ'; members: Rule[] }; +type StringRule = { type: 'STRING'; value: string }; +type SymbolRule = { type: 'SYMBOL'; name: Name }; +type TokenRule = { type: 'TOKEN'; content: Rule }; type Rule = | AliasRule @@ -48,8 +48,8 @@ type RuleBuilders< RuleName extends string, BaseGrammarRuleName extends string > = { - [name in RuleName]: RuleBuilder; -}; + [name in RuleName]: RuleBuilder; + }; interface Grammar< RuleName extends string, @@ -153,8 +153,8 @@ interface Grammar< type GrammarSchema = { [K in keyof Grammar]: K extends 'rules' - ? Record - : Grammar[K]; + ? Record + : Grammar[K]; }; /** From 1f051d339c2639aa6c062bfc504034d427b71a90 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Sun, 9 Apr 2023 20:40:18 +0300 Subject: [PATCH 091/347] chore(docs): fix misprint repetitions and remove dangling spaces --- docs/section-3-creating-parsers.md | 2 +- docs/section-5-implementation.md | 2 +- lib/binding_web/README.md | 16 ++++++++-------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/section-3-creating-parsers.md b/docs/section-3-creating-parsers.md index 0842edbb..842b87eb 100644 --- a/docs/section-3-creating-parsers.md +++ b/docs/section-3-creating-parsers.md @@ -46,7 +46,7 @@ npm install --save nan npm install --save-dev tree-sitter-cli ``` -The last command will install the CLI into the `node_modules` folder in your working directory. An executable program called `tree-sitter` will be created inside of `node_modules/.bin/`. You may want to follow the Node.js convention of adding that folder to your your `PATH` so that you can easily run this program when working in this directory. +The last command will install the CLI into the `node_modules` folder in your working directory. An executable program called `tree-sitter` will be created inside of `node_modules/.bin/`. You may want to follow the Node.js convention of adding that folder to your `PATH` so that you can easily run this program when working in this directory. ```sh # In your shell profile script diff --git a/docs/section-5-implementation.md b/docs/section-5-implementation.md index e0fa9661..4f97d760 100644 --- a/docs/section-5-implementation.md +++ b/docs/section-5-implementation.md @@ -21,7 +21,7 @@ The `tree-sitter` CLI's most important feature is the `generate` subcommand. Thi ### Parsing a Grammar -First, Tree-sitter must must evaluate the JavaScript code in `grammar.js` and convert the grammar to a JSON format. It does this by shelling out to `node`. The format of the grammars is formally specified by the JSON schema in [grammar-schema.json](https://github.com/tree-sitter/tree-sitter/blob/master/cli/src/generate/grammar-schema.json). The parsing is implemented in [parse_grammar.rs](https://github.com/tree-sitter/tree-sitter/blob/master/cli/src/generate/parse_grammar.rs). +First, Tree-sitter must evaluate the JavaScript code in `grammar.js` and convert the grammar to a JSON format. It does this by shelling out to `node`. The format of the grammars is formally specified by the JSON schema in [grammar-schema.json](https://github.com/tree-sitter/tree-sitter/blob/master/cli/src/generate/grammar-schema.json). The parsing is implemented in [parse_grammar.rs](https://github.com/tree-sitter/tree-sitter/blob/master/cli/src/generate/parse_grammar.rs). ### Grammar Rules diff --git a/lib/binding_web/README.md b/lib/binding_web/README.md index a75cd9f0..1d645a33 100644 --- a/lib/binding_web/README.md +++ b/lib/binding_web/README.md @@ -5,7 +5,7 @@ WebAssembly bindings to the [Tree-sitter](https://github.com/tree-sitter/tree-si ### Setup -You can download the the `tree-sitter.js` and `tree-sitter.wasm` files from [the latest GitHub release](https://github.com/tree-sitter/tree-sitter/releases/latest) and load them using a standalone script: +You can download the `tree-sitter.js` and `tree-sitter.wasm` files from [the latest GitHub release](https://github.com/tree-sitter/tree-sitter/releases/latest) and load them using a standalone script: ```html "); @@ -512,20 +514,22 @@ fn test_highlighting_via_c_api() { let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap(); let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap(); let locals_query = fs::read_to_string(queries.join("locals.scm")).unwrap(); - c::ts_highlighter_add_language( - highlighter, - lang_name.as_ptr(), - js_scope.as_ptr(), - js_injection_regex.as_ptr(), - language, - highlights_query.as_ptr() as *const c_char, - injections_query.as_ptr() as *const c_char, - locals_query.as_ptr() as *const c_char, - highlights_query.len() as u32, - injections_query.len() as u32, - locals_query.len() as u32, - false, - ); + unsafe { + c::ts_highlighter_add_language( + highlighter, + lang_name.as_ptr(), + js_scope.as_ptr(), + js_injection_regex.as_ptr(), + language, + highlights_query.as_ptr() as *const c_char, + injections_query.as_ptr() as *const c_char, + locals_query.as_ptr() as *const c_char, + highlights_query.len() as u32, + injections_query.len() as u32, + locals_query.len() as u32, + false, + ); + } let html_scope = c_string("text.html.basic"); let html_injection_regex = c_string("^html"); @@ -534,31 +538,35 @@ fn test_highlighting_via_c_api() { let queries = get_language_queries_path("html"); let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap(); let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap(); - c::ts_highlighter_add_language( - highlighter, - lang_name.as_ptr(), - html_scope.as_ptr(), - html_injection_regex.as_ptr(), - language, - highlights_query.as_ptr() as *const c_char, - injections_query.as_ptr() as *const c_char, - ptr::null(), - highlights_query.len() as u32, - injections_query.len() as u32, - 0, - false, - ); + unsafe { + c::ts_highlighter_add_language( + highlighter, + lang_name.as_ptr(), + html_scope.as_ptr(), + html_injection_regex.as_ptr(), + language, + highlights_query.as_ptr() as *const c_char, + injections_query.as_ptr() as *const c_char, + ptr::null(), + highlights_query.len() as u32, + injections_query.len() as u32, + 0, + false, + ); + } let buffer = c::ts_highlight_buffer_new(); - c::ts_highlighter_highlight( - highlighter, - html_scope.as_ptr(), - source_code.as_ptr(), - source_code.as_bytes().len() as u32, - buffer, - ptr::null_mut(), - ); + unsafe { + c::ts_highlighter_highlight( + highlighter, + html_scope.as_ptr(), + source_code.as_ptr(), + source_code.as_bytes().len() as u32, + buffer, + ptr::null_mut(), + ); + } let output_bytes = c::ts_highlight_buffer_content(buffer); let output_line_offsets = c::ts_highlight_buffer_line_offsets(buffer); @@ -589,8 +597,10 @@ fn test_highlighting_via_c_api() { ] ); - c::ts_highlighter_delete(highlighter); - c::ts_highlight_buffer_delete(buffer); + unsafe { + c::ts_highlighter_delete(highlighter); + c::ts_highlight_buffer_delete(buffer); + } } #[test] diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs index 07e5d1de..20392749 100644 --- a/cli/src/tests/tags_test.rs +++ b/cli/src/tests/tags_test.rs @@ -9,7 +9,7 @@ use std::{ use tree_sitter::Point; use tree_sitter_tags::{c_lib as c, Error, TagsConfiguration, TagsContext}; -const PYTHON_TAG_QUERY: &'static str = r#" +const PYTHON_TAG_QUERY: &str = r#" ( (function_definition name: (identifier) @name @@ -39,7 +39,7 @@ const PYTHON_TAG_QUERY: &'static str = r#" attribute: (identifier) @name)) @reference.call "#; -const JS_TAG_QUERY: &'static str = r#" +const JS_TAG_QUERY: &str = r#" ( (comment)* @doc . (class_declaration @@ -68,7 +68,7 @@ const JS_TAG_QUERY: &'static str = r#" function: (identifier) @name) @reference.call "#; -const RUBY_TAG_QUERY: &'static str = r#" +const RUBY_TAG_QUERY: &str = r#" (method name: (_) @name) @definition.method @@ -359,25 +359,29 @@ fn test_tags_via_c_api() { ); let c_scope_name = CString::new(scope_name).unwrap(); - let result = c::ts_tagger_add_language( - tagger, - c_scope_name.as_ptr(), - language, - JS_TAG_QUERY.as_ptr(), - ptr::null(), - JS_TAG_QUERY.len() as u32, - 0, - ); + let result = unsafe { + c::ts_tagger_add_language( + tagger, + c_scope_name.as_ptr(), + language, + JS_TAG_QUERY.as_ptr(), + ptr::null(), + JS_TAG_QUERY.len() as u32, + 0, + ) + }; assert_eq!(result, c::TSTagsError::Ok); - let result = c::ts_tagger_tag( - tagger, - c_scope_name.as_ptr(), - source_code.as_ptr(), - source_code.len() as u32, - buffer, - ptr::null(), - ); + let result = unsafe { + c::ts_tagger_tag( + tagger, + c_scope_name.as_ptr(), + source_code.as_ptr(), + source_code.len() as u32, + buffer, + ptr::null(), + ) + }; assert_eq!(result, c::TSTagsError::Ok); let tags = unsafe { slice::from_raw_parts( @@ -419,8 +423,10 @@ fn test_tags_via_c_api() { ] ); - c::ts_tags_buffer_delete(buffer); - c::ts_tagger_delete(tagger); + unsafe { + c::ts_tags_buffer_delete(buffer); + c::ts_tagger_delete(tagger); + } }); } diff --git a/highlight/src/c_lib.rs b/highlight/src/c_lib.rs index 33197088..78cdd8c2 100644 --- a/highlight/src/c_lib.rs +++ b/highlight/src/c_lib.rs @@ -32,8 +32,14 @@ pub enum ErrorCode { InvalidLanguageName, } +/// Create a new [`TSHighlighter`] instance. +/// +/// # Safety +/// +/// The caller must ensure that the `highlight_names` and `attribute_strings` arrays are valid for +/// the lifetime of the returned [`TSHighlighter`] instance, and are non-null. #[no_mangle] -pub extern "C" fn ts_highlighter_new( +pub unsafe extern "C" fn ts_highlighter_new( highlight_names: *const *const c_char, attribute_strings: *const *const c_char, highlight_count: u32, @@ -43,11 +49,11 @@ pub extern "C" fn ts_highlighter_new( let attribute_strings = unsafe { slice::from_raw_parts(attribute_strings, highlight_count as usize) }; let highlight_names = highlight_names - .into_iter() + .iter() .map(|s| unsafe { CStr::from_ptr(*s).to_string_lossy().to_string() }) .collect::>(); let attribute_strings = attribute_strings - .into_iter() + .iter() .map(|s| unsafe { CStr::from_ptr(*s).to_bytes() }) .collect(); let carriage_return_index = highlight_names.iter().position(|s| s == "carriage-return"); @@ -59,8 +65,14 @@ pub extern "C" fn ts_highlighter_new( })) } +/// Add a language to a [`TSHighlighter`] instance. +/// +/// # Safety +/// +/// The caller must ensure that any `*const c_char` parameters are valid for the lifetime of +/// the [`TSHighlighter`] instance, and are non-null. #[no_mangle] -pub extern "C" fn ts_highlighter_add_language( +pub unsafe extern "C" fn ts_highlighter_add_language( this: *mut TSHighlighter, language_name: *const c_char, scope_name: *const c_char, @@ -125,7 +137,7 @@ pub extern "C" fn ts_highlighter_add_language( apply_all_captures, ) .or(Err(ErrorCode::InvalidQuery))?; - config.configure(&this.highlight_names.as_slice()); + config.configure(this.highlight_names.as_slice()); this.languages.insert(scope_name, (injection_regex, config)); Ok(()) @@ -145,13 +157,23 @@ pub extern "C" fn ts_highlight_buffer_new() -> *mut TSHighlightBuffer { })) } +/// Deleteis a [`TSHighlighter`] instance. +/// +/// # Safety +/// +/// `this` must be non-null. #[no_mangle] -pub extern "C" fn ts_highlighter_delete(this: *mut TSHighlighter) { +pub unsafe extern "C" fn ts_highlighter_delete(this: *mut TSHighlighter) { drop(unsafe { Box::from_raw(this) }) } +/// Deleteis a [`TSHighlightBuffer`] instance. +/// +/// # Safety +/// +/// `this` must be non-null. #[no_mangle] -pub extern "C" fn ts_highlight_buffer_delete(this: *mut TSHighlightBuffer) { +pub unsafe extern "C" fn ts_highlight_buffer_delete(this: *mut TSHighlightBuffer) { drop(unsafe { Box::from_raw(this) }) } @@ -179,8 +201,14 @@ pub extern "C" fn ts_highlight_buffer_line_count(this: *const TSHighlightBuffer) this.renderer.line_offsets.len() as u32 } +/// Highlight a string of source code. +/// +/// # Safety +/// +/// The caller must ensure that `scope_name`, `source_code`, and `cancellation_flag` are valid for +/// the lifetime of the [`TSHighlighter`] instance, and are non-null. #[no_mangle] -pub extern "C" fn ts_highlighter_highlight( +pub unsafe extern "C" fn ts_highlighter_highlight( this: *const TSHighlighter, scope_name: *const c_char, source_code: *const c_char, @@ -238,15 +266,8 @@ impl TSHighlighter { .renderer .render(highlights, source_code, &|s| self.attribute_strings[s.0]); match result { - Err(Error::Cancelled) => { - return ErrorCode::Timeout; - } - Err(Error::InvalidLanguage) => { - return ErrorCode::InvalidLanguage; - } - Err(Error::Unknown) => { - return ErrorCode::Timeout; - } + Err(Error::Cancelled) | Err(Error::Unknown) => ErrorCode::Timeout, + Err(Error::InvalidLanguage) => ErrorCode::InvalidLanguage, Ok(()) => ErrorCode::Ok, } } else { diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs index c8f39d2c..0952d851 100644 --- a/tags/src/c_lib.rs +++ b/tags/src/c_lib.rs @@ -66,13 +66,23 @@ pub extern "C" fn ts_tagger_new() -> *mut TSTagger { })) } +/// Delete a TSTagger. +/// +/// # Safety +/// +/// `this` must be non-null #[no_mangle] -pub extern "C" fn ts_tagger_delete(this: *mut TSTagger) { +pub unsafe extern "C" fn ts_tagger_delete(this: *mut TSTagger) { drop(unsafe { Box::from_raw(this) }) } +/// Add a language to a TSTagger. +/// +/// # Safety +/// +/// `this` must be non-null #[no_mangle] -pub extern "C" fn ts_tagger_add_language( +pub unsafe extern "C" fn ts_tagger_add_language( this: *mut TSTagger, scope_name: *const c_char, language: Language, @@ -84,7 +94,7 @@ pub extern "C" fn ts_tagger_add_language( let tagger = unwrap_mut_ptr(this); let scope_name = unsafe { unwrap(CStr::from_ptr(scope_name).to_str()) }; let tags_query = unsafe { slice::from_raw_parts(tags_query, tags_query_len as usize) }; - let locals_query = if locals_query != std::ptr::null() { + let locals_query = if !locals_query.is_null() { unsafe { slice::from_raw_parts(locals_query, locals_query_len as usize) } } else { &[] @@ -111,8 +121,13 @@ pub extern "C" fn ts_tagger_add_language( } } +/// Tag some source code. +/// +/// # Safety +/// +/// `this` must be non-null #[no_mangle] -pub extern "C" fn ts_tagger_tag( +pub unsafe extern "C" fn ts_tagger_tag( this: *mut TSTagger, scope_name: *const c_char, source_code: *const u8, @@ -201,8 +216,13 @@ pub extern "C" fn ts_tags_buffer_new() -> *mut TSTagsBuffer { })) } +/// Delete a TSTagsBuffer. +/// +/// # Safety +/// +/// `this` must be non-null #[no_mangle] -pub extern "C" fn ts_tags_buffer_delete(this: *mut TSTagsBuffer) { +pub unsafe extern "C" fn ts_tags_buffer_delete(this: *mut TSTagsBuffer) { drop(unsafe { Box::from_raw(this) }) } @@ -236,8 +256,13 @@ pub extern "C" fn ts_tags_buffer_found_parse_error(this: *const TSTagsBuffer) -> buffer.errors_present } +/// Get the syntax kinds for a given scope name. +/// +/// # Safety +/// +/// `this` must be non-null #[no_mangle] -pub extern "C" fn ts_tagger_syntax_kinds_for_scope_name( +pub unsafe extern "C" fn ts_tagger_syntax_kinds_for_scope_name( this: *mut TSTagger, scope_name: *const c_char, len: *mut u32, From ffae7d611563f0a7e6fcfafbcb34e14f0c722a9d Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Sun, 20 Aug 2023 01:48:17 +0300 Subject: [PATCH 287/347] fix: mark helper Rust funcs that receive raw pointers as unsafe --- cli/src/tests/highlight_test.rs | 8 ++--- highlight/src/c_lib.rs | 62 +++++++++++++++------------------ tags/src/c_lib.rs | 34 +++++++++--------- 3 files changed, 50 insertions(+), 54 deletions(-) diff --git a/cli/src/tests/highlight_test.rs b/cli/src/tests/highlight_test.rs index c788c177..c4ca0b49 100644 --- a/cli/src/tests/highlight_test.rs +++ b/cli/src/tests/highlight_test.rs @@ -568,10 +568,10 @@ fn test_highlighting_via_c_api() { ); } - let output_bytes = c::ts_highlight_buffer_content(buffer); - let output_line_offsets = c::ts_highlight_buffer_line_offsets(buffer); - let output_len = c::ts_highlight_buffer_len(buffer); - let output_line_count = c::ts_highlight_buffer_line_count(buffer); + let output_bytes = unsafe { c::ts_highlight_buffer_content(buffer) }; + let output_line_offsets = unsafe { c::ts_highlight_buffer_line_offsets(buffer) }; + let output_len = unsafe { c::ts_highlight_buffer_len(buffer) }; + let output_line_count = unsafe { c::ts_highlight_buffer_line_count(buffer) }; let output_bytes = unsafe { slice::from_raw_parts(output_bytes, output_len as usize) }; let output_line_offsets = diff --git a/highlight/src/c_lib.rs b/highlight/src/c_lib.rs index 78cdd8c2..2fc6934e 100644 --- a/highlight/src/c_lib.rs +++ b/highlight/src/c_lib.rs @@ -44,17 +44,15 @@ pub unsafe extern "C" fn ts_highlighter_new( attribute_strings: *const *const c_char, highlight_count: u32, ) -> *mut TSHighlighter { - let highlight_names = - unsafe { slice::from_raw_parts(highlight_names, highlight_count as usize) }; - let attribute_strings = - unsafe { slice::from_raw_parts(attribute_strings, highlight_count as usize) }; + let highlight_names = slice::from_raw_parts(highlight_names, highlight_count as usize); + let attribute_strings = slice::from_raw_parts(attribute_strings, highlight_count as usize); let highlight_names = highlight_names - .iter() - .map(|s| unsafe { CStr::from_ptr(*s).to_string_lossy().to_string() }) + .into_iter() + .map(|s| CStr::from_ptr(*s).to_string_lossy().to_string()) .collect::>(); let attribute_strings = attribute_strings - .iter() - .map(|s| unsafe { CStr::from_ptr(*s).to_bytes() }) + .into_iter() + .map(|s| CStr::from_ptr(*s).to_bytes()) .collect(); let carriage_return_index = highlight_names.iter().position(|s| s == "carriage-return"); Box::into_raw(Box::new(TSHighlighter { @@ -88,7 +86,7 @@ pub unsafe extern "C" fn ts_highlighter_add_language( ) -> ErrorCode { let f = move || { let this = unwrap_mut_ptr(this); - let scope_name = unsafe { CStr::from_ptr(scope_name) }; + let scope_name = CStr::from_ptr(scope_name); let scope_name = scope_name .to_str() .or(Err(ErrorCode::InvalidUtf8))? @@ -96,29 +94,26 @@ pub unsafe extern "C" fn ts_highlighter_add_language( let injection_regex = if injection_regex.is_null() { None } else { - let pattern = unsafe { CStr::from_ptr(injection_regex) }; + let pattern = CStr::from_ptr(injection_regex); let pattern = pattern.to_str().or(Err(ErrorCode::InvalidUtf8))?; Some(Regex::new(pattern).or(Err(ErrorCode::InvalidRegex))?) }; - let highlight_query = unsafe { - slice::from_raw_parts(highlight_query as *const u8, highlight_query_len as usize) - }; + let highlight_query = + slice::from_raw_parts(highlight_query as *const u8, highlight_query_len as usize); + let highlight_query = str::from_utf8(highlight_query).or(Err(ErrorCode::InvalidUtf8))?; let injection_query = if injection_query_len > 0 { - let query = unsafe { - slice::from_raw_parts(injection_query as *const u8, injection_query_len as usize) - }; + let query = + slice::from_raw_parts(injection_query as *const u8, injection_query_len as usize); str::from_utf8(query).or(Err(ErrorCode::InvalidUtf8))? } else { "" }; let locals_query = if locals_query_len > 0 { - let query = unsafe { - slice::from_raw_parts(locals_query as *const u8, locals_query_len as usize) - }; + let query = slice::from_raw_parts(locals_query as *const u8, locals_query_len as usize); str::from_utf8(query).or(Err(ErrorCode::InvalidUtf8))? } else { "" @@ -164,7 +159,7 @@ pub extern "C" fn ts_highlight_buffer_new() -> *mut TSHighlightBuffer { /// `this` must be non-null. #[no_mangle] pub unsafe extern "C" fn ts_highlighter_delete(this: *mut TSHighlighter) { - drop(unsafe { Box::from_raw(this) }) + drop(Box::from_raw(this)) } /// Deleteis a [`TSHighlightBuffer`] instance. @@ -174,29 +169,31 @@ pub unsafe extern "C" fn ts_highlighter_delete(this: *mut TSHighlighter) { /// `this` must be non-null. #[no_mangle] pub unsafe extern "C" fn ts_highlight_buffer_delete(this: *mut TSHighlightBuffer) { - drop(unsafe { Box::from_raw(this) }) + drop(Box::from_raw(this)) } #[no_mangle] -pub extern "C" fn ts_highlight_buffer_content(this: *const TSHighlightBuffer) -> *const u8 { +pub unsafe extern "C" fn ts_highlight_buffer_content(this: *const TSHighlightBuffer) -> *const u8 { let this = unwrap_ptr(this); this.renderer.html.as_slice().as_ptr() } #[no_mangle] -pub extern "C" fn ts_highlight_buffer_line_offsets(this: *const TSHighlightBuffer) -> *const u32 { +pub unsafe extern "C" fn ts_highlight_buffer_line_offsets( + this: *const TSHighlightBuffer, +) -> *const u32 { let this = unwrap_ptr(this); this.renderer.line_offsets.as_slice().as_ptr() } #[no_mangle] -pub extern "C" fn ts_highlight_buffer_len(this: *const TSHighlightBuffer) -> u32 { +pub unsafe extern "C" fn ts_highlight_buffer_len(this: *const TSHighlightBuffer) -> u32 { let this = unwrap_ptr(this); this.renderer.html.len() as u32 } #[no_mangle] -pub extern "C" fn ts_highlight_buffer_line_count(this: *const TSHighlightBuffer) -> u32 { +pub unsafe extern "C" fn ts_highlight_buffer_line_count(this: *const TSHighlightBuffer) -> u32 { let this = unwrap_ptr(this); this.renderer.line_offsets.len() as u32 } @@ -218,10 +215,9 @@ pub unsafe extern "C" fn ts_highlighter_highlight( ) -> ErrorCode { let this = unwrap_ptr(this); let output = unwrap_mut_ptr(output); - let scope_name = unwrap(unsafe { CStr::from_ptr(scope_name).to_str() }); - let source_code = - unsafe { slice::from_raw_parts(source_code as *const u8, source_code_len as usize) }; - let cancellation_flag = unsafe { cancellation_flag.as_ref() }; + let scope_name = unwrap(CStr::from_ptr(scope_name).to_str()); + let source_code = slice::from_raw_parts(source_code as *const u8, source_code_len as usize); + let cancellation_flag = cancellation_flag.as_ref(); this.highlight(source_code, scope_name, output, cancellation_flag) } @@ -276,15 +272,15 @@ impl TSHighlighter { } } -fn unwrap_ptr<'a, T>(result: *const T) -> &'a T { - unsafe { result.as_ref() }.unwrap_or_else(|| { +unsafe fn unwrap_ptr<'a, T>(result: *const T) -> &'a T { + result.as_ref().unwrap_or_else(|| { eprintln!("{}:{} - pointer must not be null", file!(), line!()); abort(); }) } -fn unwrap_mut_ptr<'a, T>(result: *mut T) -> &'a mut T { - unsafe { result.as_mut() }.unwrap_or_else(|| { +unsafe fn unwrap_mut_ptr<'a, T>(result: *mut T) -> &'a mut T { + result.as_mut().unwrap_or_else(|| { eprintln!("{}:{} - pointer must not be null", file!(), line!()); abort(); }) diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs index 0952d851..df21f181 100644 --- a/tags/src/c_lib.rs +++ b/tags/src/c_lib.rs @@ -73,7 +73,7 @@ pub extern "C" fn ts_tagger_new() -> *mut TSTagger { /// `this` must be non-null #[no_mangle] pub unsafe extern "C" fn ts_tagger_delete(this: *mut TSTagger) { - drop(unsafe { Box::from_raw(this) }) + drop(Box::from_raw(this)) } /// Add a language to a TSTagger. @@ -92,10 +92,10 @@ pub unsafe extern "C" fn ts_tagger_add_language( locals_query_len: u32, ) -> TSTagsError { let tagger = unwrap_mut_ptr(this); - let scope_name = unsafe { unwrap(CStr::from_ptr(scope_name).to_str()) }; - let tags_query = unsafe { slice::from_raw_parts(tags_query, tags_query_len as usize) }; - let locals_query = if !locals_query.is_null() { - unsafe { slice::from_raw_parts(locals_query, locals_query_len as usize) } + let scope_name = unwrap(CStr::from_ptr(scope_name).to_str()); + let tags_query = slice::from_raw_parts(tags_query, tags_query_len as usize); + let locals_query = if locals_query != std::ptr::null() { + slice::from_raw_parts(locals_query, locals_query_len as usize) } else { &[] }; @@ -137,14 +137,14 @@ pub unsafe extern "C" fn ts_tagger_tag( ) -> TSTagsError { let tagger = unwrap_mut_ptr(this); let buffer = unwrap_mut_ptr(output); - let scope_name = unsafe { unwrap(CStr::from_ptr(scope_name).to_str()) }; + let scope_name = unwrap(CStr::from_ptr(scope_name).to_str()); if let Some(config) = tagger.languages.get(scope_name) { shrink_and_clear(&mut buffer.tags, BUFFER_TAGS_RESERVE_CAPACITY); shrink_and_clear(&mut buffer.docs, BUFFER_DOCS_RESERVE_CAPACITY); - let source_code = unsafe { slice::from_raw_parts(source_code, source_code_len as usize) }; - let cancellation_flag = unsafe { cancellation_flag.as_ref() }; + let source_code = slice::from_raw_parts(source_code, source_code_len as usize); + let cancellation_flag = cancellation_flag.as_ref(); let tags = match buffer .context @@ -223,35 +223,35 @@ pub extern "C" fn ts_tags_buffer_new() -> *mut TSTagsBuffer { /// `this` must be non-null #[no_mangle] pub unsafe extern "C" fn ts_tags_buffer_delete(this: *mut TSTagsBuffer) { - drop(unsafe { Box::from_raw(this) }) + drop(Box::from_raw(this)) } #[no_mangle] -pub extern "C" fn ts_tags_buffer_tags(this: *const TSTagsBuffer) -> *const TSTag { +pub unsafe extern "C" fn ts_tags_buffer_tags(this: *const TSTagsBuffer) -> *const TSTag { let buffer = unwrap_ptr(this); buffer.tags.as_ptr() } #[no_mangle] -pub extern "C" fn ts_tags_buffer_tags_len(this: *const TSTagsBuffer) -> u32 { +pub unsafe extern "C" fn ts_tags_buffer_tags_len(this: *const TSTagsBuffer) -> u32 { let buffer = unwrap_ptr(this); buffer.tags.len() as u32 } #[no_mangle] -pub extern "C" fn ts_tags_buffer_docs(this: *const TSTagsBuffer) -> *const c_char { +pub unsafe extern "C" fn ts_tags_buffer_docs(this: *const TSTagsBuffer) -> *const c_char { let buffer = unwrap_ptr(this); buffer.docs.as_ptr() as *const c_char } #[no_mangle] -pub extern "C" fn ts_tags_buffer_docs_len(this: *const TSTagsBuffer) -> u32 { +pub unsafe extern "C" fn ts_tags_buffer_docs_len(this: *const TSTagsBuffer) -> u32 { let buffer = unwrap_ptr(this); buffer.docs.len() as u32 } #[no_mangle] -pub extern "C" fn ts_tags_buffer_found_parse_error(this: *const TSTagsBuffer) -> bool { +pub unsafe extern "C" fn ts_tags_buffer_found_parse_error(this: *const TSTagsBuffer) -> bool { let buffer = unwrap_ptr(this); buffer.errors_present } @@ -268,7 +268,7 @@ pub unsafe extern "C" fn ts_tagger_syntax_kinds_for_scope_name( len: *mut u32, ) -> *const *const c_char { let tagger = unwrap_mut_ptr(this); - let scope_name = unsafe { unwrap(CStr::from_ptr(scope_name).to_str()) }; + let scope_name = unwrap(CStr::from_ptr(scope_name).to_str()); let len = unwrap_mut_ptr(len); *len = 0; @@ -279,14 +279,14 @@ pub unsafe extern "C" fn ts_tagger_syntax_kinds_for_scope_name( std::ptr::null() } -fn unwrap_ptr<'a, T>(result: *const T) -> &'a T { +unsafe fn unwrap_ptr<'a, T>(result: *const T) -> &'a T { unsafe { result.as_ref() }.unwrap_or_else(|| { eprintln!("{}:{} - pointer must not be null", file!(), line!()); abort(); }) } -fn unwrap_mut_ptr<'a, T>(result: *mut T) -> &'a mut T { +unsafe fn unwrap_mut_ptr<'a, T>(result: *mut T) -> &'a mut T { unsafe { result.as_mut() }.unwrap_or_else(|| { eprintln!("{}:{} - pointer must not be null", file!(), line!()); abort(); From c332066666b1bced7575aacd8f469d13b1e63437 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Sat, 19 Aug 2023 19:42:18 -0400 Subject: [PATCH 288/347] fix(safety): improve docs for unsafe C functions --- highlight/src/c_lib.rs | 65 +++++++++++++++++++++++++++++----- tags/src/c_lib.rs | 79 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 126 insertions(+), 18 deletions(-) diff --git a/highlight/src/c_lib.rs b/highlight/src/c_lib.rs index 2fc6934e..1ab4903a 100644 --- a/highlight/src/c_lib.rs +++ b/highlight/src/c_lib.rs @@ -47,11 +47,11 @@ pub unsafe extern "C" fn ts_highlighter_new( let highlight_names = slice::from_raw_parts(highlight_names, highlight_count as usize); let attribute_strings = slice::from_raw_parts(attribute_strings, highlight_count as usize); let highlight_names = highlight_names - .into_iter() + .iter() .map(|s| CStr::from_ptr(*s).to_string_lossy().to_string()) .collect::>(); let attribute_strings = attribute_strings - .into_iter() + .iter() .map(|s| CStr::from_ptr(*s).to_bytes()) .collect(); let carriage_return_index = highlight_names.iter().position(|s| s == "carriage-return"); @@ -65,9 +65,14 @@ pub unsafe extern "C" fn ts_highlighter_new( /// Add a language to a [`TSHighlighter`] instance. /// +/// Returns an [`ErrorCode`] indicating whether the language was added successfully or not. +/// /// # Safety /// -/// The caller must ensure that any `*const c_char` parameters are valid for the lifetime of +/// `this` must be non-null and must be a valid pointer to a [`TSHighlighter`] instance +/// created by [`ts_highlighter_new`]. +/// +/// The caller must ensure that any `*const c_char` (C-style string) parameters are valid for the lifetime of /// the [`TSHighlighter`] instance, and are non-null. #[no_mangle] pub unsafe extern "C" fn ts_highlighter_add_language( @@ -119,7 +124,7 @@ pub unsafe extern "C" fn ts_highlighter_add_language( "" }; - let lang = unsafe { CStr::from_ptr(language_name) } + let lang = CStr::from_ptr(language_name) .to_str() .or(Err(ErrorCode::InvalidLanguageName))?; @@ -152,32 +157,60 @@ pub extern "C" fn ts_highlight_buffer_new() -> *mut TSHighlightBuffer { })) } -/// Deleteis a [`TSHighlighter`] instance. +/// Deletes a [`TSHighlighter`] instance. /// /// # Safety /// -/// `this` must be non-null. +/// `this` must be non-null and must be a valid pointer to a [`TSHighlighter`] instance +/// created by [`ts_highlighter_new`]. +/// +/// It cannot be used after this function is called. #[no_mangle] pub unsafe extern "C" fn ts_highlighter_delete(this: *mut TSHighlighter) { drop(Box::from_raw(this)) } -/// Deleteis a [`TSHighlightBuffer`] instance. +/// Deletes a [`TSHighlightBuffer`] instance. /// /// # Safety /// -/// `this` must be non-null. +/// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance +/// created by [`ts_highlight_buffer_new`] +/// +/// It cannot be used after this function is called. #[no_mangle] pub unsafe extern "C" fn ts_highlight_buffer_delete(this: *mut TSHighlightBuffer) { drop(Box::from_raw(this)) } +/// Get the HTML content of a [`TSHighlightBuffer`] instance as a raw pointer. +/// +/// # Safety +/// +/// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance +/// created by [`ts_highlight_buffer_new`]. +/// +/// The returned pointer, a C-style string, must not outlive the [`TSHighlightBuffer`] instance, else the +/// data will point to garbage. +/// +/// To get the length of the HTML content, use [`ts_highlight_buffer_len`]. #[no_mangle] pub unsafe extern "C" fn ts_highlight_buffer_content(this: *const TSHighlightBuffer) -> *const u8 { let this = unwrap_ptr(this); this.renderer.html.as_slice().as_ptr() } +/// Get the line offsets of a [`TSHighlightBuffer`] instance as a C-style array. +/// +/// # Safety +/// +/// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance +/// created by [`ts_highlight_buffer_new`]. +/// +/// The returned pointer, a C-style array of [`u32`]s, must not outlive the [`TSHighlightBuffer`] instance, else the +/// data will point to garbage. +/// +/// To get the length of the array, use [`ts_highlight_buffer_line_count`]. #[no_mangle] pub unsafe extern "C" fn ts_highlight_buffer_line_offsets( this: *const TSHighlightBuffer, @@ -186,12 +219,24 @@ pub unsafe extern "C" fn ts_highlight_buffer_line_offsets( this.renderer.line_offsets.as_slice().as_ptr() } +/// Get the length of the HTML content of a [`TSHighlightBuffer`] instance. +/// +/// # Safety +/// +/// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance +/// created by [`ts_highlight_buffer_new`]. #[no_mangle] pub unsafe extern "C" fn ts_highlight_buffer_len(this: *const TSHighlightBuffer) -> u32 { let this = unwrap_ptr(this); this.renderer.html.len() as u32 } +/// Get the number of lines in a [`TSHighlightBuffer`] instance. +/// +/// # Safety +/// +/// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance +/// created by [`ts_highlight_buffer_new`]. #[no_mangle] pub unsafe extern "C" fn ts_highlight_buffer_line_count(this: *const TSHighlightBuffer) -> u32 { let this = unwrap_ptr(this); @@ -202,8 +247,10 @@ pub unsafe extern "C" fn ts_highlight_buffer_line_count(this: *const TSHighlight /// /// # Safety /// -/// The caller must ensure that `scope_name`, `source_code`, and `cancellation_flag` are valid for +/// The caller must ensure that `scope_name`, `source_code`, `output`, and `cancellation_flag` are valid for /// the lifetime of the [`TSHighlighter`] instance, and are non-null. +/// +/// `this` must be a non-null pointer to a [`TSHighlighter`] instance created by [`ts_highlighter_new`] #[no_mangle] pub unsafe extern "C" fn ts_highlighter_highlight( this: *const TSHighlighter, diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs index df21f181..915b0220 100644 --- a/tags/src/c_lib.rs +++ b/tags/src/c_lib.rs @@ -70,7 +70,7 @@ pub extern "C" fn ts_tagger_new() -> *mut TSTagger { /// /// # Safety /// -/// `this` must be non-null +/// `this` must be non-null and a valid pointer to a [`TSTagger`] instance. #[no_mangle] pub unsafe extern "C" fn ts_tagger_delete(this: *mut TSTagger) { drop(Box::from_raw(this)) @@ -78,9 +78,15 @@ pub unsafe extern "C" fn ts_tagger_delete(this: *mut TSTagger) { /// Add a language to a TSTagger. /// +/// Returns a [`TSTagsError`] indicating whether the operation was successful or not. +/// /// # Safety /// -/// `this` must be non-null +/// `this` must be non-null and a valid pointer to a [`TSTagger`] instance. +/// `scope_name` must be non-null and a valid pointer to a null-terminated string. +/// `tags_query` and `locals_query` must be non-null and valid pointers to strings. +/// +/// The caller must ensure that the lengths of `tags_query` and `locals_query` are correct. #[no_mangle] pub unsafe extern "C" fn ts_tagger_add_language( this: *mut TSTagger, @@ -94,7 +100,7 @@ pub unsafe extern "C" fn ts_tagger_add_language( let tagger = unwrap_mut_ptr(this); let scope_name = unwrap(CStr::from_ptr(scope_name).to_str()); let tags_query = slice::from_raw_parts(tags_query, tags_query_len as usize); - let locals_query = if locals_query != std::ptr::null() { + let locals_query = if !locals_query.is_null() { slice::from_raw_parts(locals_query, locals_query_len as usize) } else { &[] @@ -121,11 +127,17 @@ pub unsafe extern "C" fn ts_tagger_add_language( } } -/// Tag some source code. +/// Tags some source code. +/// +/// Returns a [`TSTagsError`] indicating whether the operation was successful or not. /// /// # Safety /// -/// `this` must be non-null +/// `this` must be a non-null valid pointer to a [`TSTagger`] instance. +/// `scope_name` must be a non-null valid pointer to a null-terminated string. +/// `source_code` must be a non-null valid pointer to a slice of bytes. +/// `output` must be a non-null valid pointer to a [`TSTagsBuffer`] instance. +/// `cancellation_flag` must be a non-null valid pointer to an [`AtomicUsize`] instance. #[no_mangle] pub unsafe extern "C" fn ts_tagger_tag( this: *mut TSTagger, @@ -220,36 +232,75 @@ pub extern "C" fn ts_tags_buffer_new() -> *mut TSTagsBuffer { /// /// # Safety /// -/// `this` must be non-null +/// `this` must be non-null and a valid pointer to a [`TSTagsBuffer`] instance created by +/// [`ts_tags_buffer_new`]. #[no_mangle] pub unsafe extern "C" fn ts_tags_buffer_delete(this: *mut TSTagsBuffer) { drop(Box::from_raw(this)) } +/// Get the tags from a TSTagsBuffer. +/// +/// # Safety +/// +/// `this` must be non-null and a valid pointer to a [`TSTagsBuffer`] instance created by +/// [`ts_tags_buffer_new`]. +/// +/// The caller must ensure that the returned pointer is not used after the [`TSTagsBuffer`] +/// is deleted with [`ts_tags_buffer_delete`], else the data will point to garbage. #[no_mangle] pub unsafe extern "C" fn ts_tags_buffer_tags(this: *const TSTagsBuffer) -> *const TSTag { let buffer = unwrap_ptr(this); buffer.tags.as_ptr() } +/// Get the number of tags in a TSTagsBuffer. +/// +/// # Safety +/// +/// `this` must be non-null and a valid pointer to a [`TSTagsBuffer`] instance. #[no_mangle] pub unsafe extern "C" fn ts_tags_buffer_tags_len(this: *const TSTagsBuffer) -> u32 { let buffer = unwrap_ptr(this); buffer.tags.len() as u32 } +/// Get the documentation strings from a TSTagsBuffer. +/// +/// # Safety +/// +/// `this` must be non-null and a valid pointer to a [`TSTagsBuffer`] instance created by +/// [`ts_tags_buffer_new`]. +/// +/// The caller must ensure that the returned pointer is not used after the [`TSTagsBuffer`] +/// is deleted with [`ts_tags_buffer_delete`], else the data will point to garbage. +/// +/// The returned pointer points to a C-style string. +/// To get the length of the string, use [`ts_tags_buffer_docs_len`]. #[no_mangle] pub unsafe extern "C" fn ts_tags_buffer_docs(this: *const TSTagsBuffer) -> *const c_char { let buffer = unwrap_ptr(this); buffer.docs.as_ptr() as *const c_char } +/// Get the length of the documentation strings in a TSTagsBuffer. +/// +/// # Safety +/// +/// `this` must be non-null and a valid pointer to a [`TSTagsBuffer`] instance created by +/// [`ts_tags_buffer_new`]. #[no_mangle] pub unsafe extern "C" fn ts_tags_buffer_docs_len(this: *const TSTagsBuffer) -> u32 { let buffer = unwrap_ptr(this); buffer.docs.len() as u32 } +/// Get whether or not a TSTagsBuffer contains any parse errors. +/// +/// # Safety +/// +/// `this` must be non-null and a valid pointer to a [`TSTagsBuffer`] instance created by +/// [`ts_tags_buffer_new`]. #[no_mangle] pub unsafe extern "C" fn ts_tags_buffer_found_parse_error(this: *const TSTagsBuffer) -> bool { let buffer = unwrap_ptr(this); @@ -258,9 +309,19 @@ pub unsafe extern "C" fn ts_tags_buffer_found_parse_error(this: *const TSTagsBuf /// Get the syntax kinds for a given scope name. /// +/// Returns a pointer to a null-terminated array of null-terminated strings. +/// /// # Safety /// -/// `this` must be non-null +/// `this` must be non-null and a valid pointer to a [`TSTagger`] instance created by +/// [`ts_tagger_new`]. +/// `scope_name` must be non-null and a valid pointer to a null-terminated string. +/// `len` must be non-null and a valid pointer to a `u32`. +/// +/// The caller must ensure that the returned pointer is not used after the [`TSTagger`] +/// is deleted with [`ts_tagger_delete`], else the data will point to garbage. +/// +/// The returned pointer points to a C-style string array. #[no_mangle] pub unsafe extern "C" fn ts_tagger_syntax_kinds_for_scope_name( this: *mut TSTagger, @@ -280,14 +341,14 @@ pub unsafe extern "C" fn ts_tagger_syntax_kinds_for_scope_name( } unsafe fn unwrap_ptr<'a, T>(result: *const T) -> &'a T { - unsafe { result.as_ref() }.unwrap_or_else(|| { + result.as_ref().unwrap_or_else(|| { eprintln!("{}:{} - pointer must not be null", file!(), line!()); abort(); }) } unsafe fn unwrap_mut_ptr<'a, T>(result: *mut T) -> &'a mut T { - unsafe { result.as_mut() }.unwrap_or_else(|| { + result.as_mut().unwrap_or_else(|| { eprintln!("{}:{} - pointer must not be null", file!(), line!()); abort(); }) From 4278e03b1138d0171a40145cd89bd0486fc733c1 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Sun, 20 Aug 2023 16:00:33 +0300 Subject: [PATCH 289/347] feat: generate Rust bindings during build process It can be used as: > cargo build -p tree-sitter -F bindgen --- Cargo.lock | 88 +++++++++++++++++++++++++++++++++++++++ lib/Cargo.toml | 1 + lib/binding_rust/build.rs | 41 ++++++++++++++++++ lib/binding_rust/ffi.rs | 4 ++ lib/binding_rust/lib.rs | 5 ++- 5 files changed, 137 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3c444391..80a4e28d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -43,6 +43,29 @@ dependencies = [ "winapi", ] +[[package]] +name = "bindgen" +version = "0.66.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b84e06fc203107bfbad243f4aba2af864eb7db3b1cf46ea0a023b0b433d2a7" +dependencies = [ + "bitflags 2.4.0", + "cexpr", + "clang-sys", + "lazy_static", + "lazycell", + "log", + "peeking_take_while", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn 2.0.29", + "which", +] + [[package]] name = "bitflags" version = "1.3.2" @@ -82,6 +105,15 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + [[package]] name = "cfg-if" version = "1.0.0" @@ -94,6 +126,17 @@ version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cca491388666e04d7248af3f60f0c40cfb0991c72205595d7c396e3510207d1a" +[[package]] +name = "clang-sys" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c688fc74432808e3eb684cae8830a86be1d66a2bd58e1f248ed0960a590baf6f" +dependencies = [ + "glob", + "libc", + "libloading", +] + [[package]] name = "clap" version = "2.34.0" @@ -381,6 +424,12 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + [[package]] name = "libc" version = "0.2.147" @@ -424,6 +473,12 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "ndk-context" version = "0.1.1" @@ -442,6 +497,16 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "objc" version = "0.2.7" @@ -469,6 +534,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e91099d4268b0e11973f036e885d652fb0b21fedcf69738c627f94db6a44f42" +[[package]] +name = "peeking_take_while" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" + [[package]] name = "percent-encoding" version = "2.3.0" @@ -491,6 +562,16 @@ dependencies = [ "yansi", ] +[[package]] +name = "prettyplease" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c64d9ba0963cdcea2e1b2230fbae2bab30eb25a174be395c41e764bfb65dd62" +dependencies = [ + "proc-macro2", + "syn 2.0.29", +] + [[package]] name = "proc-macro2" version = "1.0.66" @@ -684,6 +765,12 @@ dependencies = [ "serde", ] +[[package]] +name = "shlex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3" + [[package]] name = "smallbitvec" version = "2.5.1" @@ -831,6 +918,7 @@ dependencies = [ name = "tree-sitter" version = "0.20.10" dependencies = [ + "bindgen", "cc", "regex", ] diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 5e1f3559..592521fd 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -26,6 +26,7 @@ include = [ regex = "1.9.1" [build-dependencies] +bindgen = { version = "^0.66.1", optional = true } cc = "^1.0.79" [lib] diff --git a/lib/binding_rust/build.rs b/lib/binding_rust/build.rs index ec367980..a74bdb27 100644 --- a/lib/binding_rust/build.rs +++ b/lib/binding_rust/build.rs @@ -17,6 +17,9 @@ fn main() { } } + #[cfg(feature = "bindgen")] + generate_bindings(); + let src_path = Path::new("src"); for entry in fs::read_dir(&src_path).unwrap() { let entry = entry.unwrap(); @@ -34,6 +37,44 @@ fn main() { .compile("tree-sitter"); } +#[cfg(feature = "bindgen")] +fn generate_bindings() { + const HEADER_PATH: &str = "include/tree_sitter/api.h"; + + println!("cargo:rerun-if-changed={}", HEADER_PATH); + + let no_copy = [ + "TSInput", + "TSLanguage", + "TSLogger", + "TSLookaheadIterator", + "TSParser", + "TSTree", + "TSQuery", + "TSQueryCursor", + "TSQueryCapture", + "TSQueryMatch", + "TSQueryPredicateStep", + ]; + + let bindings = bindgen::Builder::default() + .header(HEADER_PATH) + .layout_tests(false) + .allowlist_type("^TS.*") + .allowlist_function("^ts_.*") + .allowlist_var("^TREE_SITTER.*") + .no_copy(no_copy.join("|")) + .generate() + .expect("Failed to generate bindings"); + + let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); + let bindings_rs = out_dir.join("bindings.rs"); + + bindings.write_to_file(&bindings_rs).expect(&*format!( + "Failed to write bindings into path: {bindings_rs:?}" + )); +} + fn which(exe_name: impl AsRef) -> Option { env::var_os("PATH").and_then(|paths| { env::split_paths(&paths).find_map(|dir| { diff --git a/lib/binding_rust/ffi.rs b/lib/binding_rust/ffi.rs index ac4da98b..a99d2afe 100644 --- a/lib/binding_rust/ffi.rs +++ b/lib/binding_rust/ffi.rs @@ -2,6 +2,10 @@ #![allow(non_upper_case_globals)] #![allow(non_camel_case_types)] +#[cfg(feature = "bindgen")] +include!(concat!(env!("OUT_DIR"), "/bindings.rs")); + +#[cfg(not(feature = "bindgen"))] include!("./bindings.rs"); extern "C" { diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 932fc452..de3065d2 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -27,12 +27,13 @@ use std::{ /// The Tree-sitter library is generally backwards-compatible with languages /// generated using older CLI versions, but is not forwards-compatible. #[doc(alias = "TREE_SITTER_LANGUAGE_VERSION")] -pub const LANGUAGE_VERSION: usize = ffi::TREE_SITTER_LANGUAGE_VERSION; +pub const LANGUAGE_VERSION: usize = ffi::TREE_SITTER_LANGUAGE_VERSION as usize; /// The earliest ABI version that is supported by the current version of the /// library. #[doc(alias = "TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION")] -pub const MIN_COMPATIBLE_LANGUAGE_VERSION: usize = ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION; +pub const MIN_COMPATIBLE_LANGUAGE_VERSION: usize = + ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION as usize; pub const PARSER_HEADER: &'static str = include_str!("../include/tree_sitter/parser.h"); From abd57bc69b4ec9af9d4e5f76c2f4f63273a35444 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Sun, 20 Aug 2023 16:10:13 +0300 Subject: [PATCH 290/347] chore: simplify script/generate-bindings --- lib/binding_rust/bindings.rs | 5 ++--- script/generate-bindings | 13 +------------ 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index e7168fb5..225fbc11 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -1,5 +1,7 @@ /* automatically generated by rust-bindgen 0.66.1 */ +pub const TREE_SITTER_LANGUAGE_VERSION: u32 = 14; +pub const TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION: u32 = 13; pub type TSStateId = u16; pub type TSSymbol = u16; pub type TSFieldId = u16; @@ -783,6 +785,3 @@ extern "C" { new_free: ::std::option::Option, ); } - -pub const TREE_SITTER_LANGUAGE_VERSION: usize = 14; -pub const TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION: usize = 13; diff --git a/script/generate-bindings b/script/generate-bindings index fb47e247..52fc43f3 100755 --- a/script/generate-bindings +++ b/script/generate-bindings @@ -33,18 +33,7 @@ bindgen \ --no-layout-tests \ --allowlist-type '^TS.*' \ --allowlist-function '^ts_.*' \ + --allowlist-var "^TREE_SITTER.*" \ --blocklist-type '^__.*' \ --no-copy "$no_copy" \ $header_path > $output_path - -echo "" >> $output_path - -defines=( - TREE_SITTER_LANGUAGE_VERSION - TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION -) - -for define in "${defines[@]}"; do - define_value=$(grep -E "#define $define (.*)" $header_path | cut -d' ' -f3) - echo "pub const $define: usize = $define_value;" >> $output_path -done From 897c187786a00b353dc0333e00b6b570d245f0ef Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Mon, 21 Aug 2023 05:19:20 +0300 Subject: [PATCH 291/347] fix: `make install` should install files with default perms --- .gitignore | 1 + Makefile | 22 +++++++++++++--------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index 5a17dd9b..53550dd7 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ log*.html fuzz-results +/tree-sitter.pc test/fixtures/grammars/* !test/fixtures/grammars/.gitkeep package-lock.json diff --git a/Makefile b/Makefile index be915fa3..a11214d5 100644 --- a/Makefile +++ b/Makefile @@ -55,18 +55,22 @@ ifneq ($(STRIP),) endif install: all - install -d '$(DESTDIR)$(LIBDIR)' - install -m755 libtree-sitter.a '$(DESTDIR)$(LIBDIR)'/libtree-sitter.a - install -m755 libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER) - ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER_MAJOR) - ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXT) - install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter - install -m644 lib/include/tree_sitter/*.h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/ - install -d '$(DESTDIR)$(PCLIBDIR)' sed -e 's|@LIBDIR@|$(LIBDIR)|;s|@INCLUDEDIR@|$(INCLUDEDIR)|;s|@VERSION@|$(VERSION)|' \ -e 's|=$(PREFIX)|=$${prefix}|' \ -e 's|@PREFIX@|$(PREFIX)|' \ - tree-sitter.pc.in > '$(DESTDIR)$(PCLIBDIR)'/tree-sitter.pc + tree-sitter.pc.in > tree-sitter.pc + + install -d '$(DESTDIR)$(LIBDIR)' + install -m644 -t '$(DESTDIR)$(LIBDIR)' libtree-sitter.a + install -m755 -t '$(DESTDIR)$(LIBDIR)' libtree-sitter.$(SOEXTVER) + ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER_MAJOR) + ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXT) + + install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter + install -m644 -t '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter lib/include/tree_sitter/*.h + + install -d '$(DESTDIR)$(PCLIBDIR)' + install -m644 -t '$(DESTDIR)$(PCLIBDIR)' tree-sitter.pc clean: rm -f lib/src/*.o libtree-sitter.a libtree-sitter.$(SOEXT) libtree-sitter.$(SOEXTVER_MAJOR) libtree-sitter.$(SOEXTVER) From da6affaf940509e09f2a38262ae617fd244fbe95 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Mon, 21 Aug 2023 00:53:04 -0400 Subject: [PATCH 292/347] feat: allow `@injection.self` to inject the node w/ itself --- highlight/src/lib.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs index 20ac5edf..9f4e5b8e 100644 --- a/highlight/src/lib.rs +++ b/highlight/src/lib.rs @@ -112,6 +112,7 @@ pub struct HighlightConfiguration { non_local_variable_patterns: Vec, injection_content_capture_index: Option, injection_language_capture_index: Option, + injection_self_capture_index: Option, local_scope_capture_index: Option, local_def_capture_index: Option, local_def_value_capture_index: Option, @@ -309,6 +310,7 @@ impl HighlightConfiguration { // Store the numeric ids for all of the special captures. let mut injection_content_capture_index = None; let mut injection_language_capture_index = None; + let mut injection_self_capture_index = None; let mut local_def_capture_index = None; let mut local_def_value_capture_index = None; let mut local_ref_capture_index = None; @@ -318,6 +320,7 @@ impl HighlightConfiguration { match name.as_str() { "injection.content" => injection_content_capture_index = i, "injection.language" => injection_language_capture_index = i, + "injection.self" => injection_self_capture_index = i, "local.definition" => local_def_capture_index = i, "local.definition-value" => local_def_value_capture_index = i, "local.reference" => local_ref_capture_index = i, @@ -339,6 +342,7 @@ impl HighlightConfiguration { non_local_variable_patterns, injection_content_capture_index, injection_language_capture_index, + injection_self_capture_index, local_def_capture_index, local_def_value_capture_index, local_ref_capture_index, @@ -1120,6 +1124,7 @@ fn injection_for_match<'a>( ) -> (Option<&'a str>, Option>, bool) { let content_capture_index = config.injection_content_capture_index; let language_capture_index = config.injection_language_capture_index; + let self_capture_index = config.injection_self_capture_index; let mut language_name = None; let mut content_node = None; @@ -1129,6 +1134,11 @@ fn injection_for_match<'a>( language_name = capture.node.utf8_text(source).ok(); } else if index == content_capture_index { content_node = Some(capture.node); + } else if index == self_capture_index { + if let Ok(name) = capture.node.utf8_text(source) { + language_name = Some(name); + content_node = Some(capture.node); + } } } @@ -1144,6 +1154,9 @@ fn injection_for_match<'a>( } } + // Setting the `injection.self` key can be used to specify that the + // language name should be the same as the language of the current + // layer. "injection.self" => { if language_name.is_none() { language_name = Some(config.language_name.as_str()); From e3a5863287e1da35f41bd764a96de1b06222129e Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Mon, 21 Aug 2023 00:53:46 -0400 Subject: [PATCH 293/347] feat: add `@injection.parent` to inject an injection's node with the parent language --- cli/src/tests/highlight_test.rs | 1 + highlight/src/lib.rs | 37 ++++++++++++++++++++++++++++----- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/cli/src/tests/highlight_test.rs b/cli/src/tests/highlight_test.rs index c4ca0b49..e400b047 100644 --- a/cli/src/tests/highlight_test.rs +++ b/cli/src/tests/highlight_test.rs @@ -24,6 +24,7 @@ lazy_static! { get_highlight_config("rust", Some("injections.scm"), &HIGHLIGHT_NAMES); static ref HIGHLIGHT_NAMES: Vec = [ "attribute", + "boolean", "carriage-return", "comment", "constant", diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs index 9f4e5b8e..e118530f 100644 --- a/highlight/src/lib.rs +++ b/highlight/src/lib.rs @@ -19,6 +19,7 @@ const BUFFER_LINES_RESERVE_CAPACITY: usize = 1000; lazy_static! { static ref STANDARD_CAPTURE_NAMES: HashSet<&'static str> = vec![ "attribute", + "boolean", "carriage-return", "comment", "comment.documentation", @@ -112,6 +113,7 @@ pub struct HighlightConfiguration { non_local_variable_patterns: Vec, injection_content_capture_index: Option, injection_language_capture_index: Option, + injection_parent_capture_index: Option, injection_self_capture_index: Option, local_scope_capture_index: Option, local_def_capture_index: Option, @@ -155,6 +157,7 @@ where F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, { source: &'a [u8], + language_name: &'a str, byte_offset: usize, highlighter: &'a mut Highlighter, injection_callback: F, @@ -199,6 +202,7 @@ impl Highlighter { ) -> Result> + 'a, Error> { let layers = HighlightIterLayer::new( source, + None, self, cancellation_flag, &mut injection_callback, @@ -214,6 +218,7 @@ impl Highlighter { assert_ne!(layers.len(), 0); let mut result = HighlightIter { source, + language_name: &config.language_name, byte_offset: 0, injection_callback, cancellation_flag, @@ -310,6 +315,7 @@ impl HighlightConfiguration { // Store the numeric ids for all of the special captures. let mut injection_content_capture_index = None; let mut injection_language_capture_index = None; + let mut injection_parent_capture_index = None; let mut injection_self_capture_index = None; let mut local_def_capture_index = None; let mut local_def_value_capture_index = None; @@ -320,6 +326,7 @@ impl HighlightConfiguration { match name.as_str() { "injection.content" => injection_content_capture_index = i, "injection.language" => injection_language_capture_index = i, + "injection.parent" => injection_parent_capture_index = i, "injection.self" => injection_self_capture_index = i, "local.definition" => local_def_capture_index = i, "local.definition-value" => local_def_value_capture_index = i, @@ -342,6 +349,7 @@ impl HighlightConfiguration { non_local_variable_patterns, injection_content_capture_index, injection_language_capture_index, + injection_parent_capture_index, injection_self_capture_index, local_def_capture_index, local_def_value_capture_index, @@ -418,6 +426,7 @@ impl<'a> HighlightIterLayer<'a> { /// added to the returned vector. fn new Option<&'a HighlightConfiguration> + 'a>( source: &'a [u8], + parent_name: Option<&str>, highlighter: &mut Highlighter, cancellation_flag: Option<&'a AtomicUsize>, injection_callback: &mut F, @@ -450,8 +459,13 @@ impl<'a> HighlightIterLayer<'a> { cursor.matches(combined_injections_query, tree.root_node(), source); for mat in matches { let entry = &mut injections_by_pattern_index[mat.pattern_index]; - let (language_name, content_node, include_children) = - injection_for_match(config, combined_injections_query, &mat, source); + let (language_name, content_node, include_children) = injection_for_match( + config, + parent_name, + combined_injections_query, + &mat, + source, + ); if language_name.is_some() { entry.0 = language_name; } @@ -772,8 +786,13 @@ where // If this capture represents an injection, then process the injection. if match_.pattern_index < layer.config.locals_pattern_index { - let (language_name, content_node, include_children) = - injection_for_match(&layer.config, &layer.config.query, &match_, &self.source); + let (language_name, content_node, include_children) = injection_for_match( + layer.config, + Some(self.language_name), + &layer.config.query, + &match_, + self.source, + ); // Explicitly remove this match so that none of its other captures will remain // in the stream of captures. @@ -791,6 +810,7 @@ where if !ranges.is_empty() { match HighlightIterLayer::new( self.source, + Some(self.language_name), self.highlighter, self.cancellation_flag, &mut self.injection_callback, @@ -1118,22 +1138,29 @@ impl HtmlRenderer { fn injection_for_match<'a>( config: &'a HighlightConfiguration, + parent_name: Option<&'a str>, query: &'a Query, query_match: &QueryMatch<'a, 'a>, source: &'a [u8], ) -> (Option<&'a str>, Option>, bool) { let content_capture_index = config.injection_content_capture_index; let language_capture_index = config.injection_language_capture_index; + let parent_capture_index = config.injection_parent_capture_index; let self_capture_index = config.injection_self_capture_index; let mut language_name = None; let mut content_node = None; + let parent_name = parent_name.unwrap_or_default(); + for capture in query_match.captures { let index = Some(capture.index); if index == language_capture_index { language_name = capture.node.utf8_text(source).ok(); } else if index == content_capture_index { content_node = Some(capture.node); + } else if index == parent_capture_index && !parent_name.is_empty() { + language_name = Some(parent_name); + content_node = Some(capture.node); } else if index == self_capture_index { if let Ok(name) = capture.node.utf8_text(source) { language_name = Some(name); @@ -1150,7 +1177,7 @@ fn injection_for_match<'a>( // that sets the injection.language key. "injection.language" => { if language_name.is_none() { - language_name = prop.value.as_ref().map(|s| s.as_ref()) + language_name = prop.value.as_ref().map(|s| s.as_ref()); } } From f4a6134461e844796af13fa0e86d89d0f9d27e73 Mon Sep 17 00:00:00 2001 From: DennySun2100 <138833544+DennySun2100@users.noreply.github.com> Date: Mon, 21 Aug 2023 11:54:09 -0700 Subject: [PATCH 294/347] GCC pragma causes warning on non-GNU compilers --- lib/include/tree_sitter/api.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 1cc6b3e9..5003cff7 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -1,7 +1,9 @@ #ifndef TREE_SITTER_API_H_ #define TREE_SITTER_API_H_ +#ifdef __GNUC__ #pragma GCC visibility push(default) +#endif #ifdef __cplusplus extern "C" { @@ -1165,6 +1167,8 @@ void ts_set_allocator( } #endif +#ifdef __GNUC__ #pragma GCC visibility pop +#endif #endif // TREE_SITTER_API_H_ From cd0bdf586fd910f8224f0a35969145531f66a649 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Tue, 22 Aug 2023 01:27:00 -0400 Subject: [PATCH 295/347] fix(injections): only allow setting self/parent via `#set!` --- highlight/src/lib.rs | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs index e118530f..fad91ad9 100644 --- a/highlight/src/lib.rs +++ b/highlight/src/lib.rs @@ -113,8 +113,6 @@ pub struct HighlightConfiguration { non_local_variable_patterns: Vec, injection_content_capture_index: Option, injection_language_capture_index: Option, - injection_parent_capture_index: Option, - injection_self_capture_index: Option, local_scope_capture_index: Option, local_def_capture_index: Option, local_def_value_capture_index: Option, @@ -315,8 +313,6 @@ impl HighlightConfiguration { // Store the numeric ids for all of the special captures. let mut injection_content_capture_index = None; let mut injection_language_capture_index = None; - let mut injection_parent_capture_index = None; - let mut injection_self_capture_index = None; let mut local_def_capture_index = None; let mut local_def_value_capture_index = None; let mut local_ref_capture_index = None; @@ -326,8 +322,6 @@ impl HighlightConfiguration { match name.as_str() { "injection.content" => injection_content_capture_index = i, "injection.language" => injection_language_capture_index = i, - "injection.parent" => injection_parent_capture_index = i, - "injection.self" => injection_self_capture_index = i, "local.definition" => local_def_capture_index = i, "local.definition-value" => local_def_value_capture_index = i, "local.reference" => local_ref_capture_index = i, @@ -349,8 +343,6 @@ impl HighlightConfiguration { non_local_variable_patterns, injection_content_capture_index, injection_language_capture_index, - injection_parent_capture_index, - injection_self_capture_index, local_def_capture_index, local_def_value_capture_index, local_ref_capture_index, @@ -1145,12 +1137,9 @@ fn injection_for_match<'a>( ) -> (Option<&'a str>, Option>, bool) { let content_capture_index = config.injection_content_capture_index; let language_capture_index = config.injection_language_capture_index; - let parent_capture_index = config.injection_parent_capture_index; - let self_capture_index = config.injection_self_capture_index; let mut language_name = None; let mut content_node = None; - let parent_name = parent_name.unwrap_or_default(); for capture in query_match.captures { let index = Some(capture.index); @@ -1158,14 +1147,6 @@ fn injection_for_match<'a>( language_name = capture.node.utf8_text(source).ok(); } else if index == content_capture_index { content_node = Some(capture.node); - } else if index == parent_capture_index && !parent_name.is_empty() { - language_name = Some(parent_name); - content_node = Some(capture.node); - } else if index == self_capture_index { - if let Ok(name) = capture.node.utf8_text(source) { - language_name = Some(name); - content_node = Some(capture.node); - } } } @@ -1190,6 +1171,15 @@ fn injection_for_match<'a>( } } + // Setting the `injection.parent` key can be used to specify that + // the language name should be the same as the language of the + // parent layer + "injection.parent" => { + if language_name.is_none() { + language_name = parent_name; + } + } + // By default, injections do not include the *children* of an // `injection.content` node - only the ranges that belong to the // node itself. This can be changed using a `#set!` predicate that From 95ab103f48c9cf22837a31174ff10b38bc00f793 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Tue, 22 Aug 2023 01:36:25 -0400 Subject: [PATCH 296/347] docs: update injection properties --- docs/section-4-syntax-highlighting.md | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/docs/section-4-syntax-highlighting.md b/docs/section-4-syntax-highlighting.md index fc2c9c17..8fd73cf6 100644 --- a/docs/section-4-syntax-highlighting.md +++ b/docs/section-4-syntax-highlighting.md @@ -362,9 +362,18 @@ All of these examples can be modeled in terms of a *parent* syntax tree and one The language injection behavior can also be configured by some properties associated with patterns: * `injection.language` - can be used to hard-code the name of a specific language. -* `injection.combined` - indicates that *all* of the matching nodes in the tree should have their content parsed as *one* nested document. -* `injection.include-children` - indicates that the `@injection.content` node's *entire* text should be re-parsed, including the text of its child nodes. By default, child nodes' text will be *excluded* from the injected document. -* `injection.self` - indicates that the `@injection.content` node should be parsed using the same language as the parent node. This is useful for cases where the parent node's language is not known until runtime (e.g. via inheriting another language) +* `injection.combined` - indicates that *all* of the matching nodes in the tree + should have their content parsed as *one* nested document. +* `injection.include-children` - indicates that the `@injection.content` node's + *entire* text should be re-parsed, including the text of its child nodes. By default, +child nodes' text will be *excluded* from the injected document. +* `injection.self` - indicates that the `@injection.content` node should be parsed + using the same language as the node itself. This is useful for cases where the + node's language is not known until runtime (e.g. via inheriting another language) +* `injection.parent` indicates that the `@injection.content` node should be parsed + using the same language as the node's parent language. This is only meant for injections + that need to refer back to the parent language to parse the node's text inside + the injected language. #### Examples From bba9809390f355e993f0a8436f83bdb125ab0e5a Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Tue, 22 Aug 2023 16:33:03 +0300 Subject: [PATCH 297/347] fix: `make install` BSDs don't have `-t` option --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index a11214d5..b73e6363 100644 --- a/Makefile +++ b/Makefile @@ -61,16 +61,16 @@ install: all tree-sitter.pc.in > tree-sitter.pc install -d '$(DESTDIR)$(LIBDIR)' - install -m644 -t '$(DESTDIR)$(LIBDIR)' libtree-sitter.a - install -m755 -t '$(DESTDIR)$(LIBDIR)' libtree-sitter.$(SOEXTVER) + install -m644 libtree-sitter.a '$(DESTDIR)$(LIBDIR)'/ + install -m755 libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/ ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER_MAJOR) ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXT) install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter - install -m644 -t '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter lib/include/tree_sitter/*.h + install -m644 lib/include/tree_sitter/*.h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/ install -d '$(DESTDIR)$(PCLIBDIR)' - install -m644 -t '$(DESTDIR)$(PCLIBDIR)' tree-sitter.pc + install -m644 tree-sitter.pc '$(DESTDIR)$(PCLIBDIR)'/ clean: rm -f lib/src/*.o libtree-sitter.a libtree-sitter.$(SOEXT) libtree-sitter.$(SOEXTVER_MAJOR) libtree-sitter.$(SOEXTVER) From 5c7098dd5684e4f5a931053f852d40a5205557d1 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Tue, 22 Aug 2023 19:22:31 -0400 Subject: [PATCH 298/347] fix(query): debug print uses wrong variable --- lib/src/query.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/src/query.c b/lib/src/query.c index f7c98375..e3fd27a0 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -1907,7 +1907,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { } #ifdef DEBUG_ANALYZE_QUERY - printf("\nWalk states for rootless pattern step %u:\n", step_index); + printf("\nWalk states for rootless pattern step %u:\n", pattern_entry->step_index); #endif ts_query__perform_analysis( From 683fe442e49bb2c8b9e37c6a9b49ec5f7a50c2ac Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Wed, 23 Aug 2023 15:14:32 +0300 Subject: [PATCH 299/347] fix(gen): cycle between aliases and anonymous symbols An example of an error cycle in a `parser.c`: ``` static const TSSymbol ts_symbol_map[] = { ... [anon_sym_RBRACE] = anon_sym_RBRACE2, [anon_sym_RBRACE2] = anon_sym_RBRACE, ... }; ``` --- cli/src/generate/render.rs | 76 +++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 37 deletions(-) diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index 197ce364..f92bf312 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -152,49 +152,51 @@ impl Generator { self.symbol_ids[&Symbol::end()].clone(), ); - self.symbol_map = self - .parse_table - .symbols - .iter() - .map(|symbol| { - let mut mapping = symbol; + self.symbol_map = HashMap::new(); - // There can be multiple symbols in the grammar that have the same name and kind, - // due to simple aliases. When that happens, ensure that they map to the same - // public-facing symbol. If one of the symbols is not aliased, choose that one - // to be the public-facing symbol. Otherwise, pick the symbol with the lowest - // numeric value. - if let Some(alias) = self.default_aliases.get(symbol) { - let kind = alias.kind(); - for other_symbol in &self.parse_table.symbols { - if let Some(other_alias) = self.default_aliases.get(other_symbol) { - if other_symbol < mapping && other_alias == alias { - mapping = other_symbol; + for symbol in self.parse_table.symbols.iter() { + let mut mapping = symbol; + + // There can be multiple symbols in the grammar that have the same name and kind, + // due to simple aliases. When that happens, ensure that they map to the same + // public-facing symbol. If one of the symbols is not aliased, choose that one + // to be the public-facing symbol. Otherwise, pick the symbol with the lowest + // numeric value. + if let Some(alias) = self.default_aliases.get(symbol) { + let kind = alias.kind(); + for other_symbol in &self.parse_table.symbols { + if let Some(other_alias) = self.default_aliases.get(other_symbol) { + if other_symbol < mapping && other_alias == alias { + mapping = other_symbol; + } + } else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) { + mapping = other_symbol; + break; + } + } + } + // Two anonymous tokens with different flags but the same string value + // should be represented with the same symbol in the public API. Examples: + // * "<" and token(prec(1, "<")) + // * "(" and token.immediate("(") + else if symbol.is_terminal() { + let metadata = self.metadata_for_symbol(*symbol); + for other_symbol in &self.parse_table.symbols { + let other_metadata = self.metadata_for_symbol(*other_symbol); + if other_metadata == metadata { + if let Some(mapped) = self.symbol_map.get(other_symbol) { + if mapped == symbol { + break; } - } else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) { - mapping = other_symbol; - break; - } - } - } - // Two anonymous tokens with different flags but the same string value - // should be represented with the same symbol in the public API. Examples: - // * "<" and token(prec(1, "<")) - // * "(" and token.immediate("(") - else if symbol.is_terminal() { - let metadata = self.metadata_for_symbol(*symbol); - for other_symbol in &self.parse_table.symbols { - let other_metadata = self.metadata_for_symbol(*other_symbol); - if other_metadata == metadata { - mapping = other_symbol; - break; } + mapping = other_symbol; + break; } } + } - (*symbol, *mapping) - }) - .collect(); + self.symbol_map.insert(*symbol, *mapping); + } for production_info in &self.parse_table.production_infos { // Build a list of all field names From 1dbb986515b32308a6f0b0e42cf4c1eef56367f6 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Wed, 23 Aug 2023 08:56:14 -0400 Subject: [PATCH 300/347] chore: add a test for an aliased anonymous symbol with flags --- cli/src/tests/query_test.rs | 90 ++++++++++++++++++++++++++++++++++++- 1 file changed, 88 insertions(+), 2 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index c0994d31..94d5ca97 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -1,10 +1,13 @@ use super::helpers::{ allocations, - fixtures::get_language, + fixtures::{get_language, get_test_language}, query_helpers::{assert_query_matches, Match, Pattern}, ITERATION_COUNT, }; -use crate::tests::helpers::query_helpers::{collect_captures, collect_matches}; +use crate::{ + generate::generate_parser_for_grammar, + tests::helpers::query_helpers::{collect_captures, collect_matches}, +}; use indoc::indoc; use lazy_static::lazy_static; use rand::{prelude::StdRng, SeedableRng}; @@ -4812,3 +4815,86 @@ fn test_query_max_start_depth_more() { } }); } + +#[test] +fn test_grammar_with_aliased_literal_query() { + // module.exports = grammar({ + // name: 'test', + // + // rules: { + // source: $ => repeat(choice($.compound_statement, $.expansion)), + // + // compound_statement: $ => seq(alias(token(prec(-1, '}')), '}')), + // + // expansion: $ => seq('}'), + // }, + // }); + let (parser_name, parser_code) = generate_parser_for_grammar( + r#" + { + "name": "test", + "rules": { + "source": { + "type": "REPEAT", + "content": { + "type": "CHOICE", + "members": [ + { + "type": "SYMBOL", + "name": "compound_statement" + }, + { + "type": "SYMBOL", + "name": "expansion" + } + ] + } + }, + "compound_statement": { + "type": "SEQ", + "members": [ + { + "type": "ALIAS", + "content": { + "type": "TOKEN", + "content": { + "type": "PREC", + "value": -1, + "content": { + "type": "STRING", + "value": "}" + } + } + }, + "named": false, + "value": "}" + } + ] + }, + "expansion": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "}" + } + ] + } + } + } + "#, + ) + .unwrap(); + + let language = get_test_language(&parser_name, &parser_code, None); + + let query = Query::new( + language, + r#" + (compound_statement "}" @bracket1) + (expansion "}" @bracket2) + "#, + ); + + assert!(query.is_ok()); +} From b22e4fe3c9d9501c6c61b93ac0c06c83435ffa50 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Fri, 25 Aug 2023 16:55:08 +0300 Subject: [PATCH 301/347] chore: fix a wasm bash test --- lib/binding_web/test/parser-test.js | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/lib/binding_web/test/parser-test.js b/lib/binding_web/test/parser-test.js index fcd714b2..74c45d96 100644 --- a/lib/binding_web/test/parser-test.js +++ b/lib/binding_web/test/parser-test.js @@ -127,19 +127,19 @@ describe("Parser", () => { it("can use the bash parser", async () => { parser.setLanguage(await Parser.Language.load(languageURL('bash'))); - tree = parser.parse("FOO=bar echo < err.txt > hello.txt \nhello\nEOF"); + tree = parser.parse("FOO=bar echo < err.txt > hello.txt \nhello${FOO}\nEOF"); assert.equal( tree.rootNode.toString(), - '(program (redirected_statement ' + - 'body: (command ' + - '(variable_assignment ' + - 'name: (variable_name) ' + - 'value: (word)) ' + - 'name: (command_name (word))) ' + - 'redirect: (heredoc_redirect (heredoc_start)) ' + - 'redirect: (file_redirect descriptor: (file_descriptor) destination: (word)) ' + - 'redirect: (file_redirect destination: (word))) ' + - '(heredoc_body))' + '(program ' + + '(redirected_statement ' + + 'body: (command ' + + '(variable_assignment name: (variable_name) value: (word)) ' + + 'name: (command_name (word))) ' + + 'redirect: (heredoc_redirect (heredoc_start) ' + + 'redirect: (file_redirect descriptor: (file_descriptor) destination: (word)) ' + + 'redirect: (file_redirect destination: (word)) ' + + '(heredoc_body ' + + '(expansion (variable_name))) (heredoc_end))))' ); }).timeout(5000); From e985d0e74342c78f99ff6c12dc7b7efaf597dfd3 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Fri, 25 Aug 2023 17:12:04 +0300 Subject: [PATCH 302/347] cicd: pin rust toolchain to 1.71.1 for all mips targets --- .github/workflows/build.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 775db7de..91cc7f5f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -107,6 +107,11 @@ jobs: - { runtime: linux-sparc64 , cc: sparc64-linux-gnu-gcc , ar: sparc64-linux-gnu-ar } - { runtime: linux-thumbv7neon , cc: arm-linux-gnueabihf-gcc , ar: arm-linux-gnueabihf-ar } + # Rust toolchains + - { runtime: linux-mips , rust-toolchain: 1.71.1 } + - { runtime: linux-mips64 , rust-toolchain: 1.71.1 } + - { runtime: linux-mipsel , rust-toolchain: 1.71.1 } + - { runtime: linux-mips64el , rust-toolchain: 1.71.1 } # See #2041 tree-sitter issue - { runtime: windows-x64 , rust-test-threads: 1 } @@ -145,6 +150,7 @@ jobs: uses: dtolnay/rust-toolchain@stable with: targets: ${{ matrix.target }} + toolchain: ${{ matrix.rust-toolchain || 'stable' }} - name: Install cross if: ${{ matrix.use-cross }} From 5c5de9ca610a0f7037406cb82f3c715ba4e3d50c Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Fri, 25 Aug 2023 17:14:55 +0300 Subject: [PATCH 303/347] cicd: rename `runtime` to `platform` for better clarity --- .github/workflows/build.yml | 112 ++++++++++++++++++------------------ 1 file changed, 56 insertions(+), 56 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 91cc7f5f..05c08d2c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -27,12 +27,12 @@ on: jobs: build: - name: ${{ matrix.runtime }} (${{ matrix.target }}) (${{ matrix.os }}) + name: ${{ matrix.platform }} (${{ matrix.target }}) (${{ matrix.os }}) runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: - runtime: + platform: - linux-arm64 # - linux-arm # - linux-armhf # @@ -60,66 +60,66 @@ jobs: include: # When adding a new `target`: - # 1. Define a new runtime alias above + # 1. Define a new platform alias above # 2. Add a new record to a matrix map in `cli/npm/install.js` - - { runtime: linux-arm64 , target: aarch64-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } - - { runtime: linux-arm , target: arm-unknown-linux-gnueabi , os: ubuntu-latest , use-cross: true } - - { runtime: linux-armhf , target: arm-unknown-linux-gnueabihf , os: ubuntu-latest , use-cross: true } - - { runtime: linux-armv5te , target: armv5te-unknown-linux-gnueabi , os: ubuntu-latest , use-cross: true } - - { runtime: linux-armv7l , target: armv7-unknown-linux-gnueabihf , os: ubuntu-latest , use-cross: true } - - { runtime: linux-x64 , target: x86_64-unknown-linux-gnu , os: ubuntu-20.04 } #2272 - - { runtime: linux-x86 , target: i686-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } - - { runtime: linux-i586 , target: i586-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } - - { runtime: linux-mips , target: mips-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } - - { runtime: linux-mips64 , target: mips64-unknown-linux-gnuabi64 , os: ubuntu-latest , use-cross: true } - - { runtime: linux-mipsel , target: mipsel-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } - - { runtime: linux-mips64el , target: mips64el-unknown-linux-gnuabi64 , os: ubuntu-latest , use-cross: true } - - { runtime: linux-powerpc , target: powerpc-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } - - { runtime: linux-powerpc64 , target: powerpc64-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } - - { runtime: linux-powerpc64el , target: powerpc64le-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } - - { runtime: linux-riscv64gc , target: riscv64gc-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } - - { runtime: linux-s390x , target: s390x-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } - - { runtime: linux-sparc64 , target: sparc64-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } - - { runtime: linux-thumbv7neon , target: thumbv7neon-unknown-linux-gnueabihf , os: ubuntu-latest , use-cross: true } - - { runtime: windows-arm64 , target: aarch64-pc-windows-msvc , os: windows-latest } - - { runtime: windows-x64 , target: x86_64-pc-windows-msvc , os: windows-latest } - - { runtime: windows-x86 , target: i686-pc-windows-msvc , os: windows-latest } - - { runtime: macos-arm64 , target: aarch64-apple-darwin , os: macos-latest } - - { runtime: macos-x64 , target: x86_64-apple-darwin , os: macos-latest } + - { platform: linux-arm64 , target: aarch64-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } + - { platform: linux-arm , target: arm-unknown-linux-gnueabi , os: ubuntu-latest , use-cross: true } + - { platform: linux-armhf , target: arm-unknown-linux-gnueabihf , os: ubuntu-latest , use-cross: true } + - { platform: linux-armv5te , target: armv5te-unknown-linux-gnueabi , os: ubuntu-latest , use-cross: true } + - { platform: linux-armv7l , target: armv7-unknown-linux-gnueabihf , os: ubuntu-latest , use-cross: true } + - { platform: linux-x64 , target: x86_64-unknown-linux-gnu , os: ubuntu-20.04 } #2272 + - { platform: linux-x86 , target: i686-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } + - { platform: linux-i586 , target: i586-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } + - { platform: linux-mips , target: mips-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } + - { platform: linux-mips64 , target: mips64-unknown-linux-gnuabi64 , os: ubuntu-latest , use-cross: true } + - { platform: linux-mipsel , target: mipsel-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } + - { platform: linux-mips64el , target: mips64el-unknown-linux-gnuabi64 , os: ubuntu-latest , use-cross: true } + - { platform: linux-powerpc , target: powerpc-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } + - { platform: linux-powerpc64 , target: powerpc64-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } + - { platform: linux-powerpc64el , target: powerpc64le-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } + - { platform: linux-riscv64gc , target: riscv64gc-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } + - { platform: linux-s390x , target: s390x-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } + - { platform: linux-sparc64 , target: sparc64-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } + - { platform: linux-thumbv7neon , target: thumbv7neon-unknown-linux-gnueabihf , os: ubuntu-latest , use-cross: true } + - { platform: windows-arm64 , target: aarch64-pc-windows-msvc , os: windows-latest } + - { platform: windows-x64 , target: x86_64-pc-windows-msvc , os: windows-latest } + - { platform: windows-x86 , target: i686-pc-windows-msvc , os: windows-latest } + - { platform: macos-arm64 , target: aarch64-apple-darwin , os: macos-latest } + - { platform: macos-x64 , target: x86_64-apple-darwin , os: macos-latest } # Cross compilers for C library - - { runtime: linux-arm64 , cc: aarch64-linux-gnu-gcc , ar: aarch64-linux-gnu-ar } - - { runtime: linux-arm , cc: arm-linux-gnueabi-gcc , ar: arm-linux-gnueabi-ar } - - { runtime: linux-armhf , cc: arm-unknown-linux-gnueabihf-gcc , ar: arm-unknown-linux-gnueabihf-ar } - - { runtime: linux-armv5te , cc: arm-linux-gnueabi-gcc , ar: arm-linux-gnueabi-ar } - - { runtime: linux-armv7l , cc: arm-linux-gnueabihf-gcc , ar: arm-linux-gnueabihf-ar } - - { runtime: linux-x86 , cc: i686-linux-gnu-gcc , ar: i686-linux-gnu-ar } - - { runtime: linux-i586 , cc: i686-linux-gnu-gcc , ar: i686-linux-gnu-ar } - - { runtime: linux-mips , cc: mips-linux-gnu-gcc , ar: mips-linux-gnu-ar } - - { runtime: linux-mips64 , cc: mips64-linux-gnuabi64-gcc , ar: mips64-linux-gnuabi64-ar } - - { runtime: linux-mipsel , cc: mipsel-linux-gnu-gcc , ar: mipsel-linux-gnu-ar } - - { runtime: linux-mips64el , cc: mips64el-linux-gnuabi64-gcc , ar: mips64el-linux-gnuabi64-ar } - - { runtime: linux-powerpc , cc: powerpc-linux-gnu-gcc , ar: powerpc-linux-gnu-ar } - - { runtime: linux-powerpc64 , cc: powerpc64-linux-gnu-gcc , ar: powerpc64-linux-gnu-ar } - - { runtime: linux-powerpc64el , cc: powerpc64le-linux-gnu-gcc , ar: powerpc64le-linux-gnu-ar } - - { runtime: linux-riscv64gc , cc: riscv64-linux-gnu-gcc , ar: riscv64-linux-gnu-ar } - - { runtime: linux-s390x , cc: s390x-linux-gnu-gcc , ar: s390x-linux-gnu-ar } - - { runtime: linux-sparc64 , cc: sparc64-linux-gnu-gcc , ar: sparc64-linux-gnu-ar } - - { runtime: linux-thumbv7neon , cc: arm-linux-gnueabihf-gcc , ar: arm-linux-gnueabihf-ar } + - { platform: linux-arm64 , cc: aarch64-linux-gnu-gcc , ar: aarch64-linux-gnu-ar } + - { platform: linux-arm , cc: arm-linux-gnueabi-gcc , ar: arm-linux-gnueabi-ar } + - { platform: linux-armhf , cc: arm-unknown-linux-gnueabihf-gcc , ar: arm-unknown-linux-gnueabihf-ar } + - { platform: linux-armv5te , cc: arm-linux-gnueabi-gcc , ar: arm-linux-gnueabi-ar } + - { platform: linux-armv7l , cc: arm-linux-gnueabihf-gcc , ar: arm-linux-gnueabihf-ar } + - { platform: linux-x86 , cc: i686-linux-gnu-gcc , ar: i686-linux-gnu-ar } + - { platform: linux-i586 , cc: i686-linux-gnu-gcc , ar: i686-linux-gnu-ar } + - { platform: linux-mips , cc: mips-linux-gnu-gcc , ar: mips-linux-gnu-ar } + - { platform: linux-mips64 , cc: mips64-linux-gnuabi64-gcc , ar: mips64-linux-gnuabi64-ar } + - { platform: linux-mipsel , cc: mipsel-linux-gnu-gcc , ar: mipsel-linux-gnu-ar } + - { platform: linux-mips64el , cc: mips64el-linux-gnuabi64-gcc , ar: mips64el-linux-gnuabi64-ar } + - { platform: linux-powerpc , cc: powerpc-linux-gnu-gcc , ar: powerpc-linux-gnu-ar } + - { platform: linux-powerpc64 , cc: powerpc64-linux-gnu-gcc , ar: powerpc64-linux-gnu-ar } + - { platform: linux-powerpc64el , cc: powerpc64le-linux-gnu-gcc , ar: powerpc64le-linux-gnu-ar } + - { platform: linux-riscv64gc , cc: riscv64-linux-gnu-gcc , ar: riscv64-linux-gnu-ar } + - { platform: linux-s390x , cc: s390x-linux-gnu-gcc , ar: s390x-linux-gnu-ar } + - { platform: linux-sparc64 , cc: sparc64-linux-gnu-gcc , ar: sparc64-linux-gnu-ar } + - { platform: linux-thumbv7neon , cc: arm-linux-gnueabihf-gcc , ar: arm-linux-gnueabihf-ar } # Rust toolchains - - { runtime: linux-mips , rust-toolchain: 1.71.1 } - - { runtime: linux-mips64 , rust-toolchain: 1.71.1 } - - { runtime: linux-mipsel , rust-toolchain: 1.71.1 } - - { runtime: linux-mips64el , rust-toolchain: 1.71.1 } + - { platform: linux-mips , rust-toolchain: 1.71.1 } + - { platform: linux-mips64 , rust-toolchain: 1.71.1 } + - { platform: linux-mipsel , rust-toolchain: 1.71.1 } + - { platform: linux-mips64el , rust-toolchain: 1.71.1 } # See #2041 tree-sitter issue - - { runtime: windows-x64 , rust-test-threads: 1 } - - { runtime: windows-x86 , rust-test-threads: 1 } + - { platform: windows-x64 , rust-test-threads: 1 } + - { platform: windows-x86 , rust-test-threads: 1 } # CLI only build - - { runtime: windows-arm64 , cli-only: true } - - { runtime: macos-arm64 , cli-only: true } + - { platform: windows-arm64 , cli-only: true } + - { platform: macos-arm64 , cli-only: true } env: BUILD_CMD: cargo @@ -218,7 +218,7 @@ jobs: run: $BUILD_CMD build --release --target=${{ matrix.target }} - name: Info about CLI - if: ${{ startsWith(matrix.runtime, 'linux') }} + if: ${{ startsWith(matrix.platform, 'linux') }} run: | min_glibc=$(objdump -p target/$TARGET/release/tree-sitter${{ env.EXE }} | sed -nr 's/.*(GLIBC_.+).*/\1/p' | sort -uV | tail -n1) echo "🔗 Minimal **glibc** version required for CLI: ${min_glibc}">> $GITHUB_STEP_SUMMARY @@ -250,13 +250,13 @@ jobs: - name: Upload CLI artifact uses: actions/upload-artifact@v3 with: - name: tree-sitter.${{ matrix.runtime }} + name: tree-sitter.${{ matrix.platform }} path: target/${{ matrix.target }}/release/tree-sitter${{ env.EXE }} if-no-files-found: error retention-days: 7 - name: Upload WASM artifacts - if: ${{ matrix.runtime == 'linux-x64' }} + if: ${{ matrix.platform == 'linux-x64' }} uses: actions/upload-artifact@v3 with: name: tree-sitter.wasm From fae3c5b5cf84fdb918e89604f00445046be13b50 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Thu, 24 Aug 2023 15:55:48 +0300 Subject: [PATCH 304/347] fix(lib): improve visibility control for clang --- lib/include/tree_sitter/api.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 5003cff7..b2d8a6f5 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -1,7 +1,7 @@ #ifndef TREE_SITTER_API_H_ #define TREE_SITTER_API_H_ -#ifdef __GNUC__ +#if defined(__GNUC__) || defined(__clang__) #pragma GCC visibility push(default) #endif @@ -1167,7 +1167,7 @@ void ts_set_allocator( } #endif -#ifdef __GNUC__ +#if defined(__GNUC__) || defined(__clang__) #pragma GCC visibility pop #endif From fbfa58edc8663e9c0a825a72bef42dcfff8a1aef Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Thu, 24 Aug 2023 15:23:44 +0300 Subject: [PATCH 305/347] chore(gen): move external scanner stuff closer to the end of `parser.c` --- cli/src/generate/render.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index f92bf312..b4cf9150 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -129,6 +129,7 @@ impl Generator { } self.add_lex_modes_list(); + self.add_parse_table(); if !self.syntax_grammar.external_tokens.is_empty() { self.add_external_token_enum(); @@ -136,7 +137,6 @@ impl Generator { self.add_external_scanner_states_list(); } - self.add_parse_table(); self.add_parser_export(); self.buffer From b3fef28a1063a56c75a3ae114a6731ffb871961f Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Thu, 24 Aug 2023 19:09:10 +0300 Subject: [PATCH 306/347] chore(gen): add `parser.c` enum names to be better discoverable --- cli/src/generate/render.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index b4cf9150..f0d8ddec 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -338,7 +338,7 @@ impl Generator { } fn add_symbol_enum(&mut self) { - add_line!(self, "enum {{"); + add_line!(self, "enum ts_symbol_identifiers {{"); indent!(self); self.symbol_order.insert(Symbol::end(), 0); let mut i = 1; @@ -410,7 +410,7 @@ impl Generator { } fn add_field_name_enum(&mut self) { - add_line!(self, "enum {{"); + add_line!(self, "enum ts_field_identifiers {{"); indent!(self); for (i, field_name) in self.field_names.iter().enumerate() { add_line!(self, "{} = {},", self.field_id(field_name), i + 1); @@ -1026,7 +1026,7 @@ impl Generator { } fn add_external_token_enum(&mut self) { - add_line!(self, "enum {{"); + add_line!(self, "enum ts_external_scanner_symbol_identifiers {{"); indent!(self); for i in 0..self.syntax_grammar.external_tokens.len() { add_line!( From 13c79db3aac71281bb17ea470e9fe4664138b2fa Mon Sep 17 00:00:00 2001 From: Shem Sedrick Date: Thu, 24 Aug 2023 12:43:53 -0500 Subject: [PATCH 307/347] Updated build.zig to work with 0.11.0 --- build.zig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/build.zig b/build.zig index 05e4f2c2..8b6a7c33 100644 --- a/build.zig +++ b/build.zig @@ -8,9 +8,9 @@ pub fn build(b: *std.Build) void { }); lib.linkLibC(); - lib.addCSourceFile("lib/src/lib.c", &.{}); - lib.addIncludePath("lib/include"); - lib.addIncludePath("lib/src"); + lib.addCSourceFile(.{ .file = .{ .path = "lib/src/lib.c" }, .flags = &.{} }); + lib.addIncludePath(.{ .path = "lib/include" }); + lib.addIncludePath(.{ .path = "lib/src" }); b.installArtifact(lib); } From 69697666641d60fd816adb2e7329c857d68ebe25 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Thu, 24 Aug 2023 02:54:59 -0400 Subject: [PATCH 308/347] fix: ignore regex u flag It's needed for certain regex patterns in JS, and we can just silently ignore it. --- cli/src/generate/parse_grammar.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cli/src/generate/parse_grammar.rs b/cli/src/generate/parse_grammar.rs index 88bd57c1..e8eca095 100644 --- a/cli/src/generate/parse_grammar.rs +++ b/cli/src/generate/parse_grammar.rs @@ -150,10 +150,10 @@ fn parse_rule(json: RuleJSON) -> Rule { f.chars() .filter(|c| { if *c != 'i' { - eprintln!("Warning: unsupported flag {}", c); + eprintln!("Warning: unsupported flag {c}"); false } else { - true + *c != 'u' // silently ignore unicode flag } }) .collect() From b412d86f198573f4303c671534d7358fdb04f0f1 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Sat, 26 Aug 2023 20:50:23 +0300 Subject: [PATCH 309/347] fix: makefile should install only `api.h` header --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b73e6363..eb4075f0 100644 --- a/Makefile +++ b/Makefile @@ -67,7 +67,7 @@ install: all ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXT) install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter - install -m644 lib/include/tree_sitter/*.h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/ + install -m644 lib/include/tree_sitter/api.h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/ install -d '$(DESTDIR)$(PCLIBDIR)' install -m644 tree-sitter.pc '$(DESTDIR)$(PCLIBDIR)'/ From 60779cc1acb9929a10a773c4dbad304c5f524f1e Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Sat, 26 Aug 2023 20:57:08 +0300 Subject: [PATCH 310/347] fix(gen): `parser.c` should include `parser.h` relatively --- cli/src/generate/render.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index f0d8ddec..69fa3c48 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -256,7 +256,7 @@ impl Generator { } fn add_includes(&mut self) { - add_line!(self, "#include "); + add_line!(self, "#include \"tree_sitter/parser.h\""); add_line!(self, ""); } From a4ea4737ac13ec0e0a34a26624330010f51b27be Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Fri, 25 Aug 2023 18:28:27 -0400 Subject: [PATCH 311/347] fix: do not increment `current_included_range_index` past `included_range_count` in `__do_advance` --- cli/src/tests/mod.rs | 1 + cli/src/tests/parser_hang_test.rs | 61 +++++++++++++++++++ lib/src/lexer.c | 4 +- .../get_col_should_hang_not_crash/corpus.txt | 0 .../get_col_should_hang_not_crash/grammar.js | 13 ++++ .../get_col_should_hang_not_crash/scanner.c | 17 ++++++ 6 files changed, 95 insertions(+), 1 deletion(-) create mode 100644 cli/src/tests/parser_hang_test.rs create mode 100644 test/fixtures/test_grammars/get_col_should_hang_not_crash/corpus.txt create mode 100644 test/fixtures/test_grammars/get_col_should_hang_not_crash/grammar.js create mode 100644 test/fixtures/test_grammars/get_col_should_hang_not_crash/scanner.c diff --git a/cli/src/tests/mod.rs b/cli/src/tests/mod.rs index e579209f..308fc2c5 100644 --- a/cli/src/tests/mod.rs +++ b/cli/src/tests/mod.rs @@ -5,6 +5,7 @@ mod helpers; mod highlight_test; mod language_test; mod node_test; +mod parser_hang_test; mod parser_test; mod pathological_test; mod query_test; diff --git a/cli/src/tests/parser_hang_test.rs b/cli/src/tests/parser_hang_test.rs new file mode 100644 index 00000000..cfa34957 --- /dev/null +++ b/cli/src/tests/parser_hang_test.rs @@ -0,0 +1,61 @@ +use pretty_assertions::assert_eq; +use tree_sitter::Parser; + +use crate::{ + generate::generate_parser_for_grammar, + tests::helpers::fixtures::{fixtures_dir, get_test_language}, +}; + +#[test] +fn test_grammar_that_should_hang_and_not_segfault() { + use std::sync::mpsc; + + let (tx, rx) = mpsc::channel(); + + std::thread::spawn(move || { + let (parser_name, parser_code) = generate_parser_for_grammar( + r#" + { + "name": "get_col_should_hang_not_crash", + "rules": { + "source_file": { + "type": "SEQ", + "members": [ { "type": "SYMBOL", "name": "test" } ] + } + }, + "extras": [ { "type": "PATTERN", "value": "\\s" } ], + "externals": [ { "type": "SYMBOL", "name": "test" } ] + } + "#, + ) + .unwrap(); + + let mut parser = Parser::new(); + parser + .set_language(get_test_language( + &parser_name, + &parser_code, + Some( + fixtures_dir() + .join("test_grammars") + .join("get_col_should_hang_not_crash") + .as_path(), + ), + )) + .unwrap(); + + let code_that_should_hang = "\nHello"; + + parser.parse(code_that_should_hang, None).unwrap(); + + // Won't be reached + let _ = tx.send(()); + }); + + // Ok signifies that it did not hang + // RecvTimeoutError::Disconnected signifies that the parser thread exited unexpectedly (crashed) + assert_eq!( + rx.recv_timeout(std::time::Duration::from_secs(5)), + Err(mpsc::RecvTimeoutError::Timeout) + ); +} diff --git a/lib/src/lexer.c b/lib/src/lexer.c index e32158b2..d108c04e 100644 --- a/lib/src/lexer.c +++ b/lib/src/lexer.c @@ -172,7 +172,9 @@ static void ts_lexer__do_advance(Lexer *self, bool skip) { self->current_position.bytes >= current_range->end_byte || current_range->end_byte == current_range->start_byte ) { - self->current_included_range_index++; + if (self->current_included_range_index < self->included_range_count) { + self->current_included_range_index++; + } if (self->current_included_range_index < self->included_range_count) { current_range++; self->current_position = (Length) { diff --git a/test/fixtures/test_grammars/get_col_should_hang_not_crash/corpus.txt b/test/fixtures/test_grammars/get_col_should_hang_not_crash/corpus.txt new file mode 100644 index 00000000..e69de29b diff --git a/test/fixtures/test_grammars/get_col_should_hang_not_crash/grammar.js b/test/fixtures/test_grammars/get_col_should_hang_not_crash/grammar.js new file mode 100644 index 00000000..83d57d2c --- /dev/null +++ b/test/fixtures/test_grammars/get_col_should_hang_not_crash/grammar.js @@ -0,0 +1,13 @@ +module.exports = grammar({ + name: 'get_col_should_hang_not_crash', + + externals: $ => [ + $.test, + ], + + rules: { + source_file: $ => seq( + $.test + ), + }, +}); diff --git a/test/fixtures/test_grammars/get_col_should_hang_not_crash/scanner.c b/test/fixtures/test_grammars/get_col_should_hang_not_crash/scanner.c new file mode 100644 index 00000000..d21ec6d4 --- /dev/null +++ b/test/fixtures/test_grammars/get_col_should_hang_not_crash/scanner.c @@ -0,0 +1,17 @@ +#include + +unsigned tree_sitter_get_col_should_hang_not_crash_external_scanner_serialize() { return 0; } + +void tree_sitter_get_col_should_hang_not_crash_external_scanner_deserialize() {} + +void *tree_sitter_get_col_should_hang_not_crash_external_scanner_create() { return NULL; } + +void tree_sitter_get_col_should_hang_not_crash_external_scanner_destroy() {} + +bool tree_sitter_get_col_should_hang_not_crash_external_scanner_scan(void *payload, TSLexer *lexer, + const bool *valid_symbols) { + while (true) { + lexer->advance(lexer, false); + lexer->get_column(lexer); + } +} From b205a1f5a2c8980853ee537dd34b52c8bd4de3bd Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Sun, 27 Aug 2023 18:37:15 +0300 Subject: [PATCH 312/347] Run parser hang test in a killable subprocess --- cli/src/tests/parser_hang_test.rs | 114 ++++++++++++++++++------------ 1 file changed, 70 insertions(+), 44 deletions(-) diff --git a/cli/src/tests/parser_hang_test.rs b/cli/src/tests/parser_hang_test.rs index cfa34957..ab4273e3 100644 --- a/cli/src/tests/parser_hang_test.rs +++ b/cli/src/tests/parser_hang_test.rs @@ -1,61 +1,87 @@ -use pretty_assertions::assert_eq; -use tree_sitter::Parser; - use crate::{ - generate::generate_parser_for_grammar, + generate::{generate_parser_for_grammar, load_grammar_file}, tests::helpers::fixtures::{fixtures_dir, get_test_language}, }; +use std::{ + env::VarError, + process::{Command, Stdio}, +}; +use tree_sitter::Parser; #[test] fn test_grammar_that_should_hang_and_not_segfault() { - use std::sync::mpsc; + let parent_sleep_millis = 1000; + let test_name = "test_grammar_that_should_hang_and_not_segfault"; + let test_var = "CARGO_HANG_TEST"; - let (tx, rx) = mpsc::channel(); + eprintln!(" {test_name}"); - std::thread::spawn(move || { - let (parser_name, parser_code) = generate_parser_for_grammar( - r#" - { - "name": "get_col_should_hang_not_crash", - "rules": { - "source_file": { - "type": "SEQ", - "members": [ { "type": "SYMBOL", "name": "test" } ] + let tests_exec_path = std::env::args() + .nth(0) + .expect("Failed get get tests executable path"); + + match std::env::var(test_var) { + Ok(v) if v == test_name => { + eprintln!(" child process id {}", std::process::id()); + hang_test(); + } + + Err(VarError::NotPresent) => { + eprintln!(" parent process id {}", std::process::id()); + if true { + let mut command = Command::new(tests_exec_path); + command.arg(test_name).env(test_var, test_name); + if std::env::args().any(|x| x == "--nocapture") { + command.arg("--nocapture"); + } else { + command.stdout(Stdio::null()).stderr(Stdio::null()); + } + match command.spawn() { + Ok(mut child) => { + std::thread::sleep(std::time::Duration::from_millis(parent_sleep_millis)); + match child.try_wait() { + Ok(Some(status)) if status.success() => { + panic!("Child wasn't hang and exited successfully") + } + Ok(Some(status)) => panic!( + "Child wasn't hang and exited with status code: {:?}", + status.code() + ), + _ => (), + } + if let Err(e) = child.kill() { + eprintln!( + "Failed to kill hang test sub process id: {}, error: {e}", + child.id() + ); + } } - }, - "extras": [ { "type": "PATTERN", "value": "\\s" } ], - "externals": [ { "type": "SYMBOL", "name": "test" } ] + Err(e) => panic!("{e}"), + } } - "#, - ) - .unwrap(); + } + + Err(e) => panic!("Env var error: {e}"), + _ => unreachable!(), + } + + fn hang_test() { + let test_grammar_dir = fixtures_dir() + .join("test_grammars") + .join("get_col_should_hang_not_crash"); + + let grammar_json = load_grammar_file(&test_grammar_dir.join("grammar.js"), None).unwrap(); + let (parser_name, parser_code) = + generate_parser_for_grammar(grammar_json.as_str()).unwrap(); + + let language = + get_test_language(&parser_name, &parser_code, Some(test_grammar_dir.as_path())); let mut parser = Parser::new(); - parser - .set_language(get_test_language( - &parser_name, - &parser_code, - Some( - fixtures_dir() - .join("test_grammars") - .join("get_col_should_hang_not_crash") - .as_path(), - ), - )) - .unwrap(); + parser.set_language(language).unwrap(); let code_that_should_hang = "\nHello"; parser.parse(code_that_should_hang, None).unwrap(); - - // Won't be reached - let _ = tx.send(()); - }); - - // Ok signifies that it did not hang - // RecvTimeoutError::Disconnected signifies that the parser thread exited unexpectedly (crashed) - assert_eq!( - rx.recv_timeout(std::time::Duration::from_secs(5)), - Err(mpsc::RecvTimeoutError::Timeout) - ); + } } From a9c4965dd620c086e5419f71117ec46de8fb5b99 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Mon, 28 Aug 2023 04:46:07 +0300 Subject: [PATCH 313/347] cicd: exclude hang tests for exotic arches and ASAN --- .github/workflows/sanitize.yml | 4 ++-- cli/src/tests/parser_hang_test.rs | 12 ++++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/.github/workflows/sanitize.yml b/.github/workflows/sanitize.yml index 834c5ab4..2ece182b 100644 --- a/.github/workflows/sanitize.yml +++ b/.github/workflows/sanitize.yml @@ -36,14 +36,14 @@ jobs: env: UBSAN_OPTIONS: halt_on_error=1 CFLAGS: -fsanitize=undefined - RUSTFLAGS: -lubsan + RUSTFLAGS: ${{ env.RUSTFLAGS }} -lubsan run: cargo test -- --test-threads 1 - name: Run main tests with address sanitizer (ASAN) env: ASAN_OPTIONS: halt_on_error=1 CFLAGS: -fsanitize=address - RUSTFLAGS: -Zsanitizer=address + RUSTFLAGS: ${{ env.RUSTFLAGS }} -Zsanitizer=address --cfg=sanitizing run: | rustup install nightly rustup component add rust-src --toolchain nightly-x86_64-unknown-linux-gnu diff --git a/cli/src/tests/parser_hang_test.rs b/cli/src/tests/parser_hang_test.rs index ab4273e3..b8274804 100644 --- a/cli/src/tests/parser_hang_test.rs +++ b/cli/src/tests/parser_hang_test.rs @@ -1,3 +1,6 @@ +// For some reasons `Command::spawn` doesn't work in CI env for many exotic arches. +#![cfg(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing)))] + use crate::{ generate::{generate_parser_for_grammar, load_grammar_file}, tests::helpers::fixtures::{fixtures_dir, get_test_language}, @@ -8,6 +11,15 @@ use std::{ }; use tree_sitter::Parser; +// The `sanitizing` cfg is required to don't run tests under specific sunitizer +// because they don't work well with subprocesses _(it's an assumption)_. +// +// Bellow are two alternative examples of how to disable tests for some arches +// if a way with excluding the whole mod from compilation would work well. +// +// #[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing)))] +// #[cfg_attr(not(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing))), ignore)] +// #[test] fn test_grammar_that_should_hang_and_not_segfault() { let parent_sleep_millis = 1000; From cbce87dab11adf8a91c60c307470b18cb2d44b41 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Mon, 28 Aug 2023 14:16:36 +0300 Subject: [PATCH 314/347] chore: add an extra notes for hang tests --- cli/src/tests/parser_hang_test.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cli/src/tests/parser_hang_test.rs b/cli/src/tests/parser_hang_test.rs index b8274804..02a2689c 100644 --- a/cli/src/tests/parser_hang_test.rs +++ b/cli/src/tests/parser_hang_test.rs @@ -15,7 +15,12 @@ use tree_sitter::Parser; // because they don't work well with subprocesses _(it's an assumption)_. // // Bellow are two alternative examples of how to disable tests for some arches -// if a way with excluding the whole mod from compilation would work well. +// if a way with excluding the whole mod from compilation wouldn't work well. +// +// XXX: Also may be it makes sense to keep such tests as ignored by default +// to omit surprises and enable them on CI by passing an extra option explicitly: +// +// > cargo test -- --include-ignored // // #[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing)))] // #[cfg_attr(not(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing))), ignore)] From 4fabc49066300f51e538c545f05f9196089018f7 Mon Sep 17 00:00:00 2001 From: mgunyho <20118130+mgunyho@users.noreply.github.com> Date: Mon, 28 Aug 2023 19:26:33 +0300 Subject: [PATCH 315/347] doc: Fix broken link --- lib/binding_rust/lib.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index de3065d2..de9dd0dc 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -342,7 +342,8 @@ impl Language { FieldId::new(id) } - /// Get the next parse state. Combine this with [lookahead_iterator] to + /// Get the next parse state. Combine this with + /// [lookahead_iterator](Language::lookahead_iterator) to /// generate completion suggestions or valid symbols in error nodes. /// /// Example: From dcfd91fc25e6e3d9cec8db213046af3329062e41 Mon Sep 17 00:00:00 2001 From: mgunyho <20118130+mgunyho@users.noreply.github.com> Date: Mon, 28 Aug 2023 19:26:37 +0300 Subject: [PATCH 316/347] doc: Add internal links to backticked text where applicable --- lib/binding_rust/lib.rs | 42 ++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index de9dd0dc..c44dd7f5 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -78,13 +78,13 @@ pub struct InputEdit { pub new_end_position: Point, } -/// A single node within a syntax `Tree`. +/// A single node within a syntax [`Tree`]. #[doc(alias = "TSNode")] #[derive(Clone, Copy)] #[repr(transparent)] pub struct Node<'tree>(ffi::TSNode, PhantomData<&'tree ()>); -/// A stateful object that this is used to produce a `Tree` based on some source code. +/// A stateful object that this is used to produce a [`Tree`] based on some source code. #[doc(alias = "TSParser")] pub struct Parser(NonNull); @@ -105,7 +105,7 @@ type FieldId = NonZeroU16; /// A callback that receives log messages during parser. type Logger<'a> = Box; -/// A stateful object for walking a syntax `Tree` efficiently. +/// A stateful object for walking a syntax [Tree] efficiently. #[doc(alias = "TSTreeCursor")] pub struct TreeCursor<'cursor>(ffi::TSTreeCursor, PhantomData<&'cursor ()>); @@ -145,13 +145,13 @@ impl From for CaptureQuantifier { } } -/// A stateful object for executing a `Query` on a syntax `Tree`. +/// A stateful object for executing a [`Query`] on a syntax [`Tree`]. #[doc(alias = "TSQueryCursor")] pub struct QueryCursor { ptr: NonNull, } -/// A key-value pair associated with a particular pattern in a `Query`. +/// A key-value pair associated with a particular pattern in a [`Query`]. #[derive(Debug, PartialEq, Eq)] pub struct QueryProperty { pub key: Box, @@ -165,14 +165,14 @@ pub enum QueryPredicateArg { String(Box), } -/// A key-value pair associated with a particular pattern in a `Query`. +/// A key-value pair associated with a particular pattern in a [`Query`]. #[derive(Debug, PartialEq, Eq)] pub struct QueryPredicate { pub operator: Box, pub args: Vec, } -/// A match of a `Query` to a particular set of `Node`s. +/// A match of a [`Query`] to a particular set of [`Node`]s. pub struct QueryMatch<'cursor, 'tree> { pub pattern_index: usize, pub captures: &'cursor [QueryCapture<'tree>], @@ -180,7 +180,7 @@ pub struct QueryMatch<'cursor, 'tree> { cursor: *mut ffi::TSQueryCursor, } -/// A sequence of `QueryMatch`es associated with a given `QueryCursor`. +/// A sequence of [`QueryMatch`]es associated with a given [`QueryCursor`]. pub struct QueryMatches<'query, 'cursor, T: TextProvider, I: AsRef<[u8]>> { ptr: *mut ffi::TSQueryCursor, query: &'query Query, @@ -190,7 +190,7 @@ pub struct QueryMatches<'query, 'cursor, T: TextProvider, I: AsRef<[u8]>> { _phantom: PhantomData<(&'cursor (), I)>, } -/// A sequence of `QueryCapture`s associated with a given `QueryCursor`. +/// A sequence of [`QueryCapture`]s associated with a given [`QueryCursor`]. pub struct QueryCaptures<'query, 'cursor, T: TextProvider, I: AsRef<[u8]>> { ptr: *mut ffi::TSQueryCursor, query: &'query Query, @@ -208,7 +208,7 @@ where fn text(&mut self, node: Node) -> Self::I; } -/// A particular `Node` that has been captured with a particular name within a `Query`. +/// A particular [`Node`] that has been captured with a particular name within a [`Query`]. #[derive(Clone, Copy, Debug)] #[repr(C)] pub struct QueryCapture<'tree> { @@ -216,17 +216,17 @@ pub struct QueryCapture<'tree> { pub index: u32, } -/// An error that occurred when trying to assign an incompatible `Language` to a `Parser`. +/// An error that occurred when trying to assign an incompatible [`Language`] to a [`Parser`]. #[derive(Debug, PartialEq, Eq)] pub struct LanguageError { version: usize, } -/// An error that occurred in `Parser::set_included_ranges`. +/// An error that occurred in [`Parser::set_included_ranges`]. #[derive(Debug, PartialEq, Eq)] pub struct IncludedRangesError(pub usize); -/// An error that occurred when trying to create a `Query`. +/// An error that occurred when trying to create a [`Query`]. #[derive(Debug, PartialEq, Eq)] pub struct QueryError { pub row: usize, @@ -264,7 +264,7 @@ pub struct LossyUtf8<'a> { impl Language { /// Get the ABI version number that indicates which version of the Tree-sitter CLI - /// that was used to generate this `Language`. + /// that was used to generate this [`Language`]. #[doc(alias = "ts_language_version")] pub fn version(&self) -> usize { unsafe { ffi::ts_language_version(self.0) as usize } @@ -632,10 +632,10 @@ impl Parser { /// Instruct the parser to start the next parse from the beginning. /// - /// If the parser previously failed because of a timeout or a cancellation, then - /// by default, it will resume where it left off on the next call to `parse` or - /// other parsing functions. If you don't want to resume, and instead intend to - /// use this parser to parse some other document, you must call `reset` first. + /// If the parser previously failed because of a timeout or a cancellation, then by default, it + /// will resume where it left off on the next call to [`parse`](Parser::parse) or other parsing + /// functions. If you don't want to resume, and instead intend to use this parser to parse some + /// other document, you must call `reset` first. #[doc(alias = "ts_parser_reset")] pub fn reset(&mut self) { unsafe { ffi::ts_parser_reset(self.0.as_ptr()) } @@ -653,7 +653,7 @@ impl Parser { /// take before halting. /// /// If parsing takes longer than this, it will halt early, returning `None`. - /// See `parse` for more information. + /// See [`parse`](Parser::parse) for more information. #[doc(alias = "ts_parser_set_timeout_micros")] pub fn set_timeout_micros(&mut self, timeout_micros: u64) { unsafe { ffi::ts_parser_set_timeout_micros(self.0.as_ptr(), timeout_micros) } @@ -1068,7 +1068,7 @@ impl<'tree> Node<'tree> { /// allocations, you should reuse the same cursor for subsequent calls to /// this method. /// - /// If you're walking the tree recursively, you may want to use the `TreeCursor` + /// If you're walking the tree recursively, you may want to use the [`TreeCursor`] /// APIs directly instead. pub fn children<'cursor>( &self, @@ -1440,7 +1440,7 @@ impl<'cursor> TreeCursor<'cursor> { /// Re-initialize a tree cursor to the same position as another cursor. /// - /// Unlike `reset`, this will not lose parent information and + /// Unlike [`reset`](TreeCursor::reset), this will not lose parent information and /// allows reusing already created cursors. #[doc(alias = "ts_tree_cursor_reset_to")] pub fn reset_to(&mut self, cursor: TreeCursor<'cursor>) { From 8d894bb0500a3481735141983dee203084bca18a Mon Sep 17 00:00:00 2001 From: mgunyho <20118130+mgunyho@users.noreply.github.com> Date: Mon, 28 Aug 2023 19:26:38 +0300 Subject: [PATCH 317/347] doc: Add backticks to all internal links --- lib/binding_rust/ffi.rs | 32 ++++++++--------- lib/binding_rust/lib.rs | 78 ++++++++++++++++++++--------------------- 2 files changed, 55 insertions(+), 55 deletions(-) diff --git a/lib/binding_rust/ffi.rs b/lib/binding_rust/ffi.rs index a99d2afe..a962e6c1 100644 --- a/lib/binding_rust/ffi.rs +++ b/lib/binding_rust/ffi.rs @@ -18,7 +18,7 @@ use crate::{ use std::{marker::PhantomData, mem::ManuallyDrop, ptr::NonNull, str}; impl Language { - /// Reconstructs a [Language] from a raw pointer. + /// Reconstructs a [`Language`] from a raw pointer. /// /// # Safety /// @@ -27,14 +27,14 @@ impl Language { Language(ptr) } - /// Consumes the [Language], returning a raw pointer to the underlying C structure. + /// Consumes the [`Language`], returning a raw pointer to the underlying C structure. pub fn into_raw(self) -> *const TSLanguage { ManuallyDrop::new(self).0 } } impl Parser { - /// Reconstructs a [Parser] from a raw pointer. + /// Reconstructs a [`Parser`] from a raw pointer. /// /// # Safety /// @@ -43,7 +43,7 @@ impl Parser { Parser(NonNull::new_unchecked(ptr)) } - /// Consumes the [Parser], returning a raw pointer to the underlying C structure. + /// Consumes the [`Parser`], returning a raw pointer to the underlying C structure. /// /// # Safety /// @@ -56,7 +56,7 @@ impl Parser { } impl Tree { - /// Reconstructs a [Tree] from a raw pointer. + /// Reconstructs a [`Tree`] from a raw pointer. /// /// # Safety /// @@ -65,14 +65,14 @@ impl Tree { Tree(NonNull::new_unchecked(ptr)) } - /// Consumes the [Tree], returning a raw pointer to the underlying C structure. + /// Consumes the [`Tree`], returning a raw pointer to the underlying C structure. pub fn into_raw(self) -> *mut TSTree { ManuallyDrop::new(self).0.as_ptr() } } impl<'tree> Node<'tree> { - /// Reconstructs a [Node] from a raw pointer. + /// Reconstructs a [`Node`] from a raw pointer. /// /// # Safety /// @@ -81,14 +81,14 @@ impl<'tree> Node<'tree> { Node(raw, PhantomData) } - /// Consumes the [Node], returning a raw pointer to the underlying C structure. + /// Consumes the [`Node`], returning a raw pointer to the underlying C structure. pub fn into_raw(self) -> TSNode { ManuallyDrop::new(self).0 } } impl<'a> TreeCursor<'a> { - /// Reconstructs a [TreeCursor] from a raw pointer. + /// Reconstructs a [`TreeCursor`] from a raw pointer. /// /// # Safety /// @@ -97,14 +97,14 @@ impl<'a> TreeCursor<'a> { TreeCursor(raw, PhantomData) } - /// Consumes the [TreeCursor], returning a raw pointer to the underlying C structure. + /// Consumes the [`TreeCursor`], returning a raw pointer to the underlying C structure. pub fn into_raw(self) -> TSTreeCursor { ManuallyDrop::new(self).0 } } impl Query { - /// Reconstructs a [Query] from a raw pointer. + /// Reconstructs a [`Query`] from a raw pointer. /// /// # Safety /// @@ -113,14 +113,14 @@ impl Query { Query::from_raw_parts(ptr, source) } - /// Consumes the [Query], returning a raw pointer to the underlying C structure. + /// Consumes the [`Query`], returning a raw pointer to the underlying C structure. pub fn into_raw(self) -> *mut TSQuery { ManuallyDrop::new(self).ptr.as_ptr() } } impl QueryCursor { - /// Reconstructs a [QueryCursor] from a raw pointer. + /// Reconstructs a [`QueryCursor`] from a raw pointer. /// /// # Safety /// @@ -131,14 +131,14 @@ impl QueryCursor { } } - /// Consumes the [QueryCursor], returning a raw pointer to the underlying C structure. + /// Consumes the [`QueryCursor`], returning a raw pointer to the underlying C structure. pub fn into_raw(self) -> *mut TSQueryCursor { ManuallyDrop::new(self).ptr.as_ptr() } } impl LookaheadIterator { - /// Reconstructs a [LookaheadIterator] from a raw pointer. + /// Reconstructs a [`LookaheadIterator`] from a raw pointer. /// /// # Safety /// @@ -147,7 +147,7 @@ impl LookaheadIterator { LookaheadIterator(NonNull::new_unchecked(ptr)) } - /// Consumes the [LookaheadIterator], returning a raw pointer to the underlying C structure. + /// Consumes the [`LookaheadIterator`], returning a raw pointer to the underlying C structure. pub fn into_raw(self) -> *mut TSLookaheadIterator { ManuallyDrop::new(self).0.as_ptr() } diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index c44dd7f5..3c8d7504 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -105,7 +105,7 @@ type FieldId = NonZeroU16; /// A callback that receives log messages during parser. type Logger<'a> = Box; -/// A stateful object for walking a syntax [Tree] efficiently. +/// A stateful object for walking a syntax [`Tree`] efficiently. #[doc(alias = "TSTreeCursor")] pub struct TreeCursor<'cursor>(ffi::TSTreeCursor, PhantomData<&'cursor ()>); @@ -343,7 +343,7 @@ impl Language { } /// Get the next parse state. Combine this with - /// [lookahead_iterator](Language::lookahead_iterator) to + /// [`lookahead_iterator`](Language::lookahead_iterator) to /// generate completion suggestions or valid symbols in error nodes. /// /// Example: @@ -359,9 +359,9 @@ impl Language { /// /// This returns `None` if state is invalid for this language. /// - /// Iterating [LookaheadIterator] will yield valid symbols in the given + /// Iterating [`LookaheadIterator`] will yield valid symbols in the given /// parse state. Newly created lookahead iterators will return the `ERROR` - /// symbol from [LookaheadIterator::current_symbol]. + /// symbol from [`LookaheadIterator::current_symbol`]. /// /// Lookahead iterators can be useful to generate suggestions and improve /// syntax error diagnostics. To get symbols valid in an ERROR node, use the @@ -389,9 +389,9 @@ impl Parser { /// Returns a Result indicating whether or not the language was successfully /// assigned. True means assignment succeeded. False means there was a version /// mismatch: the language was generated with an incompatible version of the - /// Tree-sitter CLI. Check the language's version using [Language::version] - /// and compare it to this library's [LANGUAGE_VERSION](LANGUAGE_VERSION) and - /// [MIN_COMPATIBLE_LANGUAGE_VERSION](MIN_COMPATIBLE_LANGUAGE_VERSION) constants. + /// Tree-sitter CLI. Check the language's version using [`Language::version`] + /// and compare it to this library's [`LANGUAGE_VERSION`](LANGUAGE_VERSION) and + /// [`MIN_COMPATIBLE_LANGUAGE_VERSION`](MIN_COMPATIBLE_LANGUAGE_VERSION) constants. #[doc(alias = "ts_parser_set_language")] pub fn set_language(&mut self, language: Language) -> Result<(), LanguageError> { let version = language.version(); @@ -487,12 +487,12 @@ impl Parser { /// * `old_tree` A previous syntax tree parsed from the same document. /// If the text of the document has changed since `old_tree` was /// created, then you must edit `old_tree` to match the new text using - /// [Tree::edit]. + /// [`Tree::edit`]. /// - /// Returns a [Tree] if parsing succeeded, or `None` if: - /// * The parser has not yet had a language assigned with [Parser::set_language] - /// * The timeout set with [Parser::set_timeout_micros] expired - /// * The cancellation flag set with [Parser::set_cancellation_flag] was flipped + /// Returns a [`Tree`] if parsing succeeded, or `None` if: + /// * The parser has not yet had a language assigned with [`Parser::set_language`] + /// * The timeout set with [`Parser::set_timeout_micros`] expired + /// * The cancellation flag set with [`Parser::set_cancellation_flag`] was flipped #[doc(alias = "ts_parser_parse")] pub fn parse(&mut self, text: impl AsRef<[u8]>, old_tree: Option<&Tree>) -> Option { let bytes = text.as_ref(); @@ -510,7 +510,7 @@ impl Parser { /// * `old_tree` A previous syntax tree parsed from the same document. /// If the text of the document has changed since `old_tree` was /// created, then you must edit `old_tree` to match the new text using - /// [Tree::edit]. + /// [`Tree::edit`]. pub fn parse_utf16( &mut self, input: impl AsRef<[u16]>, @@ -534,7 +534,7 @@ impl Parser { /// * `old_tree` A previous syntax tree parsed from the same document. /// If the text of the document has changed since `old_tree` was /// created, then you must edit `old_tree` to match the new text using - /// [Tree::edit]. + /// [`Tree::edit`]. pub fn parse_with, F: FnMut(usize, Point) -> T>( &mut self, callback: &mut F, @@ -584,7 +584,7 @@ impl Parser { /// * `old_tree` A previous syntax tree parsed from the same document. /// If the text of the document has changed since `old_tree` was /// created, then you must edit `old_tree` to match the new text using - /// [Tree::edit]. + /// [`Tree::edit`]. pub fn parse_utf16_with, F: FnMut(usize, Point) -> T>( &mut self, callback: &mut F, @@ -643,7 +643,7 @@ impl Parser { /// Get the duration in microseconds that parsing is allowed to take. /// - /// This is set via [set_timeout_micros](Parser::set_timeout_micros). + /// This is set via [`set_timeout_micros`](Parser::set_timeout_micros). #[doc(alias = "ts_parser_timeout_micros")] pub fn timeout_micros(&self) -> u64 { unsafe { ffi::ts_parser_timeout_micros(self.0.as_ptr()) } @@ -711,7 +711,7 @@ impl Parser { /// /// If a pointer is assigned, then the parser will periodically read from /// this pointer during parsing. If it reads a non-zero value, it will halt early, - /// returning `None`. See [parse](Parser::parse) for more information. + /// returning `None`. See [`parse`](Parser::parse) for more information. #[doc(alias = "ts_parser_set_cancellation_flag")] pub unsafe fn set_cancellation_flag(&mut self, flag: Option<&AtomicUsize>) { if let Some(flag) = flag { @@ -771,7 +771,7 @@ impl Tree { unsafe { ffi::ts_tree_edit(self.0.as_ptr(), &edit) }; } - /// Create a new [TreeCursor] starting from the root of the tree. + /// Create a new [`TreeCursor`] starting from the root of the tree. pub fn walk(&self) -> TreeCursor { self.root_node().walk() } @@ -781,7 +781,7 @@ impl Tree { /// /// For this to work correctly, this syntax tree must have been edited such that its /// ranges match up to the new tree. Generally, you'll want to call this method right - /// after calling one of the [Parser::parse] functions. Call it on the old tree that + /// after calling one of the [`Parser::parse`] functions. Call it on the old tree that /// was passed to parse, and pass the new tree that was returned from `parse`. #[doc(alias = "ts_tree_get_changed_ranges")] pub fn changed_ranges(&self, other: &Tree) -> impl ExactSizeIterator { @@ -882,7 +882,7 @@ impl<'tree> Node<'tree> { .unwrap() } - /// Get the [Language] that was used to parse this node's syntax tree. + /// Get the [`Language`] that was used to parse this node's syntax tree. #[doc(alias = "ts_node_language")] pub fn language(&self) -> Language { Language(unsafe { ffi::ts_node_language(self.0) }) @@ -996,7 +996,7 @@ impl<'tree> Node<'tree> { /// /// This method is fairly fast, but its cost is technically log(i), so you /// if you might be iterating over a long list of children, you should use - /// [Node::children] instead. + /// [`Node::children`] instead. #[doc(alias = "ts_node_child")] pub fn child(&self, i: usize) -> Option { Self::new(unsafe { ffi::ts_node_child(self.0, i as u32) }) @@ -1010,10 +1010,10 @@ impl<'tree> Node<'tree> { /// Get this node's *named* child at the given index. /// - /// See also [Node::is_named]. + /// See also [`Node::is_named`]. /// This method is fairly fast, but its cost is technically log(i), so you /// if you might be iterating over a long list of children, you should use - /// [Node::named_children] instead. + /// [`Node::named_children`] instead. #[doc(alias = "ts_node_named_child")] pub fn named_child(&self, i: usize) -> Option { Self::new(unsafe { ffi::ts_node_named_child(self.0, i as u32) }) @@ -1021,7 +1021,7 @@ impl<'tree> Node<'tree> { /// Get this node's number of *named* children. /// - /// See also [Node::is_named]. + /// See also [`Node::is_named`]. #[doc(alias = "ts_node_named_child_count")] pub fn named_child_count(&self) -> usize { unsafe { ffi::ts_node_named_child_count(self.0) as usize } @@ -1030,7 +1030,7 @@ impl<'tree> Node<'tree> { /// Get the first child with the given field name. /// /// If multiple children may have the same field name, access them using - /// [children_by_field_name](Node::children_by_field_name) + /// [`children_by_field_name`](Node::children_by_field_name) #[doc(alias = "ts_node_child_by_field_name")] pub fn child_by_field_name(&self, field_name: impl AsRef<[u8]>) -> Option { let field_name = field_name.as_ref(); @@ -1045,8 +1045,8 @@ impl<'tree> Node<'tree> { /// Get this node's child with the given numerical field id. /// - /// See also [child_by_field_name](Node::child_by_field_name). You can convert a field name to - /// an id using [Language::field_id_for_name]. + /// See also [`child_by_field_name`](Node::child_by_field_name). You can convert a field name to + /// an id using [`Language::field_id_for_name`]. #[doc(alias = "ts_node_child_by_field_id")] pub fn child_by_field_id(&self, field_id: u16) -> Option { Self::new(unsafe { ffi::ts_node_child_by_field_id(self.0, field_id) }) @@ -1063,8 +1063,8 @@ impl<'tree> Node<'tree> { /// Iterate over this node's children. /// - /// A [TreeCursor] is used to retrieve the children efficiently. Obtain - /// a [TreeCursor] by calling [Tree::walk] or [Node::walk]. To avoid unnecessary + /// A [`TreeCursor`] is used to retrieve the children efficiently. Obtain + /// a [`TreeCursor`] by calling [`Tree::walk`] or [`Node::walk`]. To avoid unnecessary /// allocations, you should reuse the same cursor for subsequent calls to /// this method. /// @@ -1085,7 +1085,7 @@ impl<'tree> Node<'tree> { /// Iterate over this node's named children. /// - /// See also [Node::children]. + /// See also [`Node::children`]. pub fn named_children<'cursor>( &self, cursor: &'cursor mut TreeCursor<'tree>, @@ -1106,7 +1106,7 @@ impl<'tree> Node<'tree> { /// Iterate over this node's children with a given field name. /// - /// See also [Node::children]. + /// See also [`Node::children`]. pub fn children_by_field_name<'cursor>( &self, field_name: &str, @@ -1137,7 +1137,7 @@ impl<'tree> Node<'tree> { /// Iterate over this node's children with a given field id. /// - /// See also [Node::children_by_field_name]. + /// See also [`Node::children_by_field_name`]. pub fn children_by_field_id<'cursor>( &self, field_id: FieldId, @@ -1250,7 +1250,7 @@ impl<'tree> Node<'tree> { &source.as_ref()[self.start_byte()..self.end_byte()] } - /// Create a new [TreeCursor] starting from this node. + /// Create a new [`TreeCursor`] starting from this node. #[doc(alias = "ts_tree_cursor_new")] pub fn walk(&self) -> TreeCursor<'tree> { TreeCursor(unsafe { ffi::ts_tree_cursor_new(self.0) }, PhantomData) @@ -1259,9 +1259,9 @@ impl<'tree> Node<'tree> { /// Edit this node to keep it in-sync with source code that has been edited. /// /// This function is only rarely needed. When you edit a syntax tree with the - /// [Tree::edit] method, all of the nodes that you retrieve from the tree - /// afterward will already reflect the edit. You only need to use [Node::edit] - /// when you have a specific [Node] instance that you want to keep and continue + /// [`Tree::edit`] method, all of the nodes that you retrieve from the tree + /// afterward will already reflect the edit. You only need to use [`Node::edit`] + /// when you have a specific [`Node`] instance that you want to keep and continue /// to use after an edit. #[doc(alias = "ts_node_edit")] pub fn edit(&mut self, edit: &InputEdit) { @@ -1301,7 +1301,7 @@ impl fmt::Debug for Node<'_> { } impl<'cursor> TreeCursor<'cursor> { - /// Get the tree cursor's current [Node]. + /// Get the tree cursor's current [`Node`]. #[doc(alias = "ts_tree_cursor_current_node")] pub fn node(&self) -> Node<'cursor> { Node( @@ -1312,7 +1312,7 @@ impl<'cursor> TreeCursor<'cursor> { /// Get the numerical field id of this tree cursor's current node. /// - /// See also [field_name](TreeCursor::field_name). + /// See also [`field_name`](TreeCursor::field_name). #[doc(alias = "ts_tree_cursor_current_field_id")] pub fn field_id(&self) -> Option { let id = unsafe { ffi::ts_tree_cursor_current_field_id(&self.0) }; @@ -1330,7 +1330,7 @@ impl<'cursor> TreeCursor<'cursor> { /// Get the numerical field id of this tree cursor's current node. /// - /// See also [field_name](TreeCursor::field_name). + /// See also [`field_name`](TreeCursor::field_name). #[doc(alias = "ts_tree_cursor_current_depth")] pub fn depth(&self) -> u32 { unsafe { ffi::ts_tree_cursor_current_depth(&self.0) } From bdc0e44d45234af7581ff362d1193cbded588cfd Mon Sep 17 00:00:00 2001 From: mgunyho <20118130+mgunyho@users.noreply.github.com> Date: Mon, 28 Aug 2023 19:52:29 +0300 Subject: [PATCH 318/347] doc: Include README in top-level module documentation --- lib/binding_rust/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 3c8d7504..9cd04563 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -1,3 +1,5 @@ +#![doc = include_str!("./README.md")] + pub mod ffi; mod util; From 08629215371fa434db92e0b1f6fadd34544bba9a Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Mon, 28 Aug 2023 21:16:02 +0300 Subject: [PATCH 319/347] doc: Include README as top-level module documentation for all crates --- cli/config/README.md | 6 ++++-- cli/config/src/lib.rs | 2 +- cli/loader/README.md | 4 ++-- cli/loader/src/lib.rs | 2 ++ cli/src/lib.rs | 2 ++ highlight/README.md | 2 +- highlight/src/lib.rs | 2 ++ tags/README.md | 2 +- tags/src/lib.rs | 2 ++ 9 files changed, 17 insertions(+), 7 deletions(-) diff --git a/cli/config/README.md b/cli/config/README.md index 8cbfbcf4..e7d7b39b 100644 --- a/cli/config/README.md +++ b/cli/config/README.md @@ -1,5 +1,7 @@ -# `tree-sitter-config` +# Tree-sitter Config + +Manages Tree-sitter's configuration file. You can use a configuration file to control the behavior of the `tree-sitter` -command-line program. This crate implements the logic for finding and the +command-line program. This crate implements the logic for finding and the parsing the contents of the configuration file. diff --git a/cli/config/src/lib.rs b/cli/config/src/lib.rs index 3cd09b8d..83b85358 100644 --- a/cli/config/src/lib.rs +++ b/cli/config/src/lib.rs @@ -1,4 +1,4 @@ -//! Manages tree-sitter's configuration file. +#![doc = include_str!("../README.md")] use anyhow::{anyhow, Context, Result}; use serde::{Deserialize, Serialize}; diff --git a/cli/loader/README.md b/cli/loader/README.md index 9889ec71..a3c18674 100644 --- a/cli/loader/README.md +++ b/cli/loader/README.md @@ -1,6 +1,6 @@ -# `tree-sitter-loader` +# Tree-sitter Loader The `tree-sitter` command-line program will dynamically find and build grammars at runtime, if you have cloned the grammars' repositories to your local -filesystem. This helper crate implements that logic, so that you can use it in +filesystem. This helper crate implements that logic, so that you can use it in your own program analysis tools, as well. diff --git a/cli/loader/src/lib.rs b/cli/loader/src/lib.rs index 2b3896ef..3022b6b9 100644 --- a/cli/loader/src/lib.rs +++ b/cli/loader/src/lib.rs @@ -1,3 +1,5 @@ +#![doc = include_str!("../README.md")] + use anyhow::{anyhow, Context, Error, Result}; use libloading::{Library, Symbol}; use once_cell::unsync::OnceCell; diff --git a/cli/src/lib.rs b/cli/src/lib.rs index d52b516a..549db773 100644 --- a/cli/src/lib.rs +++ b/cli/src/lib.rs @@ -1,3 +1,5 @@ +#![doc = include_str!("../README.md")] + pub mod generate; pub mod highlight; pub mod logger; diff --git a/highlight/README.md b/highlight/README.md index 95b229eb..459790ef 100644 --- a/highlight/README.md +++ b/highlight/README.md @@ -1,4 +1,4 @@ -# `tree-sitter-highlight` +# Tree-sitter Highlight [![crates.io badge]][crates.io] diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs index fad91ad9..2903c7c5 100644 --- a/highlight/src/lib.rs +++ b/highlight/src/lib.rs @@ -1,3 +1,5 @@ +#![doc = include_str!("../README.md")] + pub mod c_lib; pub mod util; pub use c_lib as c; diff --git a/tags/README.md b/tags/README.md index f5b85cb2..d7daac79 100644 --- a/tags/README.md +++ b/tags/README.md @@ -1,4 +1,4 @@ -# `tree-sitter-tags` +# Tree-sitter Tags [![crates.io badge]][crates.io] diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 13499d86..0cf1bf96 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -1,3 +1,5 @@ +#![doc = include_str!("../README.md")] + pub mod c_lib; use memchr::memchr; From 756e51867d7b47616e86d4205f3cca81f06f462e Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Mon, 28 Aug 2023 22:48:46 +0300 Subject: [PATCH 320/347] doc: fix `cargo doc` warnings --- cli/Cargo.toml | 1 + lib/binding_rust/bindings.rs | 2 +- lib/include/tree_sitter/api.h | 5 +++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/cli/Cargo.toml b/cli/Cargo.toml index de703c85..0edd3c3d 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -14,6 +14,7 @@ rust-version.workspace = true [[bin]] name = "tree-sitter" path = "src/main.rs" +doc = false [[bench]] name = "benchmark" diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index 225fbc11..3cf38097 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -164,7 +164,7 @@ extern "C" { pub fn ts_parser_set_language(self_: *mut TSParser, language: *const TSLanguage) -> bool; } extern "C" { - #[doc = " Set the ranges of text that the parser should include when parsing.\n\n By default, the parser will always include entire documents. This function\n allows you to parse only a *portion* of a document but still return a syntax\n tree whose ranges match up with the document as a whole. You can also pass\n multiple disjoint ranges.\n\n The second and third parameters specify the location and length of an array\n of ranges. The parser does *not* take ownership of these ranges; it copies\n the data, so it doesn't matter how these ranges are allocated.\n\n If `count` is zero, then the entire document will be parsed. Otherwise,\n the given ranges must be ordered from earliest to latest in the document,\n and they must not overlap. That is, the following must hold for all\n `i` < `count - 1`: ranges[i].end_byte <= ranges[i + 1].start_byte\n\n If this requirement is not satisfied, the operation will fail, the ranges\n will not be assigned, and this function will return `false`. On success,\n this function returns `true`"] + #[doc = " Set the ranges of text that the parser should include when parsing.\n\n By default, the parser will always include entire documents. This function\n allows you to parse only a *portion* of a document but still return a syntax\n tree whose ranges match up with the document as a whole. You can also pass\n multiple disjoint ranges.\n\n The second and third parameters specify the location and length of an array\n of ranges. The parser does *not* take ownership of these ranges; it copies\n the data, so it doesn't matter how these ranges are allocated.\n\n If `count` is zero, then the entire document will be parsed. Otherwise,\n the given ranges must be ordered from earliest to latest in the document,\n and they must not overlap. That is, the following must hold for all:\n\n `i < count - 1`: `ranges[i].end_byte <= ranges[i + 1].start_byte`\n\n If this requirement is not satisfied, the operation will fail, the ranges\n will not be assigned, and this function will return `false`. On success,\n this function returns `true`"] pub fn ts_parser_set_included_ranges( self_: *mut TSParser, ranges: *const TSRange, diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index b2d8a6f5..9072bb60 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -192,8 +192,9 @@ bool ts_parser_set_language(TSParser *self, const TSLanguage *language); * * If `count` is zero, then the entire document will be parsed. Otherwise, * the given ranges must be ordered from earliest to latest in the document, - * and they must not overlap. That is, the following must hold for all - * `i` < `count - 1`: ranges[i].end_byte <= ranges[i + 1].start_byte + * and they must not overlap. That is, the following must hold for all: + * + * `i < count - 1`: `ranges[i].end_byte <= ranges[i + 1].start_byte` * * If this requirement is not satisfied, the operation will fail, the ranges * will not be assigned, and this function will return `false`. On success, From f9117a022145695b1bf083c943a384fac4b9144a Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Mon, 28 Aug 2023 23:05:36 +0300 Subject: [PATCH 321/347] doc: Add internal links for `api.h` funcs --- lib/binding_rust/bindings.rs | 54 ++++++++--------- lib/include/tree_sitter/api.h | 111 ++++++++++++++++++---------------- 2 files changed, 85 insertions(+), 80 deletions(-) diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index 3cf38097..a0e52f6c 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -160,7 +160,7 @@ extern "C" { pub fn ts_parser_language(self_: *const TSParser) -> *const TSLanguage; } extern "C" { - #[doc = " Set the language that the parser should use for parsing.\n\n Returns a boolean indicating whether or not the language was successfully\n assigned. True means assignment succeeded. False means there was a version\n mismatch: the language was generated with an incompatible version of the\n Tree-sitter CLI. Check the language's version using `ts_language_version`\n and compare it to this library's `TREE_SITTER_LANGUAGE_VERSION` and\n `TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION` constants."] + #[doc = " Set the language that the parser should use for parsing.\n\n Returns a boolean indicating whether or not the language was successfully\n assigned. True means assignment succeeded. False means there was a version\n mismatch: the language was generated with an incompatible version of the\n Tree-sitter CLI. Check the language's version using [`ts_language_version`]\n and compare it to this library's [`TREE_SITTER_LANGUAGE_VERSION`] and\n [`TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION`] constants."] pub fn ts_parser_set_language(self_: *mut TSParser, language: *const TSLanguage) -> bool; } extern "C" { @@ -176,7 +176,7 @@ extern "C" { pub fn ts_parser_included_ranges(self_: *const TSParser, count: *mut u32) -> *const TSRange; } extern "C" { - #[doc = " Use the parser to parse some source code and create a syntax tree.\n\n If you are parsing this document for the first time, pass `NULL` for the\n `old_tree` parameter. Otherwise, if you have already parsed an earlier\n version of this document and the document has since been edited, pass the\n previous syntax tree so that the unchanged parts of it can be reused.\n This will save time and memory. For this to work correctly, you must have\n already edited the old syntax tree using the `ts_tree_edit` function in a\n way that exactly matches the source code changes.\n\n The `TSInput` parameter lets you specify how to read the text. It has the\n following three fields:\n 1. `read`: A function to retrieve a chunk of text at a given byte offset\n and (row, column) position. The function should return a pointer to the\n text and write its length to the `bytes_read` pointer. The parser does\n not take ownership of this buffer; it just borrows it until it has\n finished reading it. The function should write a zero value to the\n `bytes_read` pointer to indicate the end of the document.\n 2. `payload`: An arbitrary pointer that will be passed to each invocation\n of the `read` function.\n 3. `encoding`: An indication of how the text is encoded. Either\n `TSInputEncodingUTF8` or `TSInputEncodingUTF16`.\n\n This function returns a syntax tree on success, and `NULL` on failure. There\n are three possible reasons for failure:\n 1. The parser does not have a language assigned. Check for this using the\n`ts_parser_language` function.\n 2. Parsing was cancelled due to a timeout that was set by an earlier call to\n the `ts_parser_set_timeout_micros` function. You can resume parsing from\n where the parser left out by calling `ts_parser_parse` again with the\n same arguments. Or you can start parsing from scratch by first calling\n `ts_parser_reset`.\n 3. Parsing was cancelled using a cancellation flag that was set by an\n earlier call to `ts_parser_set_cancellation_flag`. You can resume parsing\n from where the parser left out by calling `ts_parser_parse` again with\n the same arguments."] + #[doc = " Use the parser to parse some source code and create a syntax tree.\n\n If you are parsing this document for the first time, pass `NULL` for the\n `old_tree` parameter. Otherwise, if you have already parsed an earlier\n version of this document and the document has since been edited, pass the\n previous syntax tree so that the unchanged parts of it can be reused.\n This will save time and memory. For this to work correctly, you must have\n already edited the old syntax tree using the [`ts_tree_edit`] function in a\n way that exactly matches the source code changes.\n\n The [`TSInput`] parameter lets you specify how to read the text. It has the\n following three fields:\n 1. [`read`]: A function to retrieve a chunk of text at a given byte offset\n and (row, column) position. The function should return a pointer to the\n text and write its length to the [`bytes_read`] pointer. The parser does\n not take ownership of this buffer; it just borrows it until it has\n finished reading it. The function should write a zero value to the\n [`bytes_read`] pointer to indicate the end of the document.\n 2. [`payload`]: An arbitrary pointer that will be passed to each invocation\n of the [`read`] function.\n 3. [`encoding`]: An indication of how the text is encoded. Either\n `TSInputEncodingUTF8` or `TSInputEncodingUTF16`.\n\n This function returns a syntax tree on success, and `NULL` on failure. There\n are three possible reasons for failure:\n 1. The parser does not have a language assigned. Check for this using the\n[`ts_parser_language`] function.\n 2. Parsing was cancelled due to a timeout that was set by an earlier call to\n the [`ts_parser_set_timeout_micros`] function. You can resume parsing from\n where the parser left out by calling [`ts_parser_parse`] again with the\n same arguments. Or you can start parsing from scratch by first calling\n [`ts_parser_reset`].\n 3. Parsing was cancelled using a cancellation flag that was set by an\n earlier call to [`ts_parser_set_cancellation_flag`]. You can resume parsing\n from where the parser left out by calling [`ts_parser_parse`] again with\n the same arguments.\n\n [`read`]: TSInput::read\n [`payload`]: TSInput::payload\n [`encoding`]: TSInput::encoding\n [`bytes_read`]: TSInput::read"] pub fn ts_parser_parse( self_: *mut TSParser, old_tree: *const TSTree, @@ -184,7 +184,7 @@ extern "C" { ) -> *mut TSTree; } extern "C" { - #[doc = " Use the parser to parse some source code stored in one contiguous buffer.\n The first two parameters are the same as in the `ts_parser_parse` function\n above. The second two parameters indicate the location of the buffer and its\n length in bytes."] + #[doc = " Use the parser to parse some source code stored in one contiguous buffer.\n The first two parameters are the same as in the [`ts_parser_parse`] function\n above. The second two parameters indicate the location of the buffer and its\n length in bytes."] pub fn ts_parser_parse_string( self_: *mut TSParser, old_tree: *const TSTree, @@ -193,7 +193,7 @@ extern "C" { ) -> *mut TSTree; } extern "C" { - #[doc = " Use the parser to parse some source code stored in one contiguous buffer with\n a given encoding. The first four parameters work the same as in the\n `ts_parser_parse_string` method above. The final parameter indicates whether\n the text is encoded as UTF8 or UTF16."] + #[doc = " Use the parser to parse some source code stored in one contiguous buffer with\n a given encoding. The first four parameters work the same as in the\n [`ts_parser_parse_string`] method above. The final parameter indicates whether\n the text is encoded as UTF8 or UTF16."] pub fn ts_parser_parse_string_encoding( self_: *mut TSParser, old_tree: *const TSTree, @@ -203,11 +203,11 @@ extern "C" { ) -> *mut TSTree; } extern "C" { - #[doc = " Instruct the parser to start the next parse from the beginning.\n\n If the parser previously failed because of a timeout or a cancellation, then\n by default, it will resume where it left off on the next call to\n `ts_parser_parse` or other parsing functions. If you don't want to resume,\n and instead intend to use this parser to parse some other document, you must\n call `ts_parser_reset` first."] + #[doc = " Instruct the parser to start the next parse from the beginning.\n\n If the parser previously failed because of a timeout or a cancellation, then\n by default, it will resume where it left off on the next call to\n [`ts_parser_parse`] or other parsing functions. If you don't want to resume,\n and instead intend to use this parser to parse some other document, you must\n call [`ts_parser_reset`] first."] pub fn ts_parser_reset(self_: *mut TSParser); } extern "C" { - #[doc = " Set the maximum duration in microseconds that parsing should be allowed to\n take before halting.\n\n If parsing takes longer than this, it will halt early, returning NULL.\n See `ts_parser_parse` for more information."] + #[doc = " Set the maximum duration in microseconds that parsing should be allowed to\n take before halting.\n\n If parsing takes longer than this, it will halt early, returning NULL.\n See [`ts_parser_parse`] for more information."] pub fn ts_parser_set_timeout_micros(self_: *mut TSParser, timeout_micros: u64); } extern "C" { @@ -215,7 +215,7 @@ extern "C" { pub fn ts_parser_timeout_micros(self_: *const TSParser) -> u64; } extern "C" { - #[doc = " Set the parser's current cancellation flag pointer.\n\n If a non-null pointer is assigned, then the parser will periodically read\n from this pointer during parsing. If it reads a non-zero value, it will\n halt early, returning NULL. See `ts_parser_parse` for more information."] + #[doc = " Set the parser's current cancellation flag pointer.\n\n If a non-null pointer is assigned, then the parser will periodically read\n from this pointer during parsing. If it reads a non-zero value, it will\n halt early, returning NULL. See [`ts_parser_parse`] for more information."] pub fn ts_parser_set_cancellation_flag(self_: *mut TSParser, flag: *const usize); } extern "C" { @@ -267,7 +267,7 @@ extern "C" { pub fn ts_tree_edit(self_: *mut TSTree, edit: *const TSInputEdit); } extern "C" { - #[doc = " Compare an old edited syntax tree to a new syntax tree representing the same\n document, returning an array of ranges whose syntactic structure has changed.\n\n For this to work correctly, the old syntax tree must have been edited such\n that its ranges match up to the new tree. Generally, you'll want to call\n this function right after calling one of the `ts_parser_parse` functions.\n You need to pass the old tree that was passed to parse, as well as the new\n tree that was returned from that function.\n\n The returned array is allocated using `malloc` and the caller is responsible\n for freeing it using `free`. The length of the array will be written to the\n given `length` pointer."] + #[doc = " Compare an old edited syntax tree to a new syntax tree representing the same\n document, returning an array of ranges whose syntactic structure has changed.\n\n For this to work correctly, the old syntax tree must have been edited such\n that its ranges match up to the new tree. Generally, you'll want to call\n this function right after calling one of the [`ts_parser_parse`] functions.\n You need to pass the old tree that was passed to parse, as well as the new\n tree that was returned from that function.\n\n The returned array is allocated using `malloc` and the caller is responsible\n for freeing it using `free`. The length of the array will be written to the\n given `length` pointer."] pub fn ts_tree_get_changed_ranges( old_tree: *const TSTree, new_tree: *const TSTree, @@ -295,7 +295,7 @@ extern "C" { pub fn ts_node_grammar_type(self_: TSNode) -> *const ::std::os::raw::c_char; } extern "C" { - #[doc = " Get the node's type as a numerical id as it appears in the grammar ignoring\n aliases. This should be used in `ts_language_next_state` instead of\n `ts_node_symbol`."] + #[doc = " Get the node's type as a numerical id as it appears in the grammar ignoring\n aliases. This should be used in [`ts_language_next_state`] instead of\n [`ts_node_symbol`]."] pub fn ts_node_grammar_symbol(self_: TSNode) -> TSSymbol; } extern "C" { @@ -319,7 +319,7 @@ extern "C" { pub fn ts_node_string(self_: TSNode) -> *mut ::std::os::raw::c_char; } extern "C" { - #[doc = " Check if the node is null. Functions like `ts_node_child` and\n `ts_node_next_sibling` will return a null node to indicate that no such node\n was found."] + #[doc = " Check if the node is null. Functions like [`ts_node_child`] and\n [`ts_node_next_sibling`] will return a null node to indicate that no such node\n was found."] pub fn ts_node_is_null(self_: TSNode) -> bool; } extern "C" { @@ -374,11 +374,11 @@ extern "C" { pub fn ts_node_child_count(self_: TSNode) -> u32; } extern "C" { - #[doc = " Get the node's *named* child at the given index.\n\n See also `ts_node_is_named`."] + #[doc = " Get the node's *named* child at the given index.\n\n See also [`ts_node_is_named`]."] pub fn ts_node_named_child(self_: TSNode, child_index: u32) -> TSNode; } extern "C" { - #[doc = " Get the node's number of *named* children.\n\n See also `ts_node_is_named`."] + #[doc = " Get the node's number of *named* children.\n\n See also [`ts_node_is_named`]."] pub fn ts_node_named_child_count(self_: TSNode) -> u32; } extern "C" { @@ -390,7 +390,7 @@ extern "C" { ) -> TSNode; } extern "C" { - #[doc = " Get the node's child with the given numerical field id.\n\n You can convert a field name to an id using the\n `ts_language_field_id_for_name` function."] + #[doc = " Get the node's child with the given numerical field id.\n\n You can convert a field name to an id using the\n [`ts_language_field_id_for_name`] function."] pub fn ts_node_child_by_field_id(self_: TSNode, field_id: TSFieldId) -> TSNode; } extern "C" { @@ -442,7 +442,7 @@ extern "C" { ) -> TSNode; } extern "C" { - #[doc = " Edit the node to keep it in-sync with source code that has been edited.\n\n This function is only rarely needed. When you edit a syntax tree with the\n `ts_tree_edit` function, all of the nodes that you retrieve from the tree\n afterward will already reflect the edit. You only need to use `ts_node_edit`\n when you have a `TSNode` instance that you want to keep and continue to use\n after an edit."] + #[doc = " Edit the node to keep it in-sync with source code that has been edited.\n\n This function is only rarely needed. When you edit a syntax tree with the\n [`ts_tree_edit`] function, all of the nodes that you retrieve from the tree\n afterward will already reflect the edit. You only need to use [`ts_node_edit`]\n when you have a [`TSNode`] instance that you want to keep and continue to use\n after an edit."] pub fn ts_node_edit(self_: *mut TSNode, edit: *const TSInputEdit); } extern "C" { @@ -450,7 +450,7 @@ extern "C" { pub fn ts_node_eq(self_: TSNode, other: TSNode) -> bool; } extern "C" { - #[doc = " Create a new tree cursor starting from the given node.\n\n A tree cursor allows you to walk a syntax tree more efficiently than is\n possible using the `TSNode` functions. It is a mutable object that is always\n on a certain syntax node, and can be moved imperatively to different nodes."] + #[doc = " Create a new tree cursor starting from the given node.\n\n A tree cursor allows you to walk a syntax tree more efficiently than is\n possible using the [`TSNode`] functions. It is a mutable object that is always\n on a certain syntax node, and can be moved imperatively to different nodes."] pub fn ts_tree_cursor_new(node: TSNode) -> TSTreeCursor; } extern "C" { @@ -462,7 +462,7 @@ extern "C" { pub fn ts_tree_cursor_reset(self_: *mut TSTreeCursor, node: TSNode); } extern "C" { - #[doc = " Re-initialize a tree cursor to the same position as another cursor.\n\n Unlike `ts_tree_cursor_reset`, this will not lose parent information and\n allows reusing already created cursors."] + #[doc = " Re-initialize a tree cursor to the same position as another cursor.\n\n Unlike [`ts_tree_cursor_reset`], this will not lose parent information and\n allows reusing already created cursors."] pub fn ts_tree_cursor_reset_to(dst: *mut TSTreeCursor, src: *const TSTreeCursor); } extern "C" { @@ -470,13 +470,13 @@ extern "C" { pub fn ts_tree_cursor_current_node(self_: *const TSTreeCursor) -> TSNode; } extern "C" { - #[doc = " Get the field name of the tree cursor's current node.\n\n This returns `NULL` if the current node doesn't have a field.\n See also `ts_node_child_by_field_name`."] + #[doc = " Get the field name of the tree cursor's current node.\n\n This returns `NULL` if the current node doesn't have a field.\n See also [`ts_node_child_by_field_name`]."] pub fn ts_tree_cursor_current_field_name( self_: *const TSTreeCursor, ) -> *const ::std::os::raw::c_char; } extern "C" { - #[doc = " Get the field id of the tree cursor's current node.\n\n This returns zero if the current node doesn't have a field.\n See also `ts_node_child_by_field_id`, `ts_language_field_id_for_name`."] + #[doc = " Get the field id of the tree cursor's current node.\n\n This returns zero if the current node doesn't have a field.\n See also [`ts_node_child_by_field_id`], [`ts_language_field_id_for_name`]."] pub fn ts_tree_cursor_current_field_id(self_: *const TSTreeCursor) -> TSFieldId; } extern "C" { @@ -488,7 +488,7 @@ extern "C" { pub fn ts_tree_cursor_goto_next_sibling(self_: *mut TSTreeCursor) -> bool; } extern "C" { - #[doc = " Move the cursor to the previous sibling of its current node.\n\n This returns `true` if the cursor successfully moved, and returns `false` if\n there was no previous sibling node.\n\n Note, that this function may be slower than\n `ts_tree_cursor_goto_next_sibling` due to how node positions are stored. In\n the worst case, this will need to iterate through all the children upto the\n previous sibling node to recalculate its position."] + #[doc = " Move the cursor to the previous sibling of its current node.\n\n This returns `true` if the cursor successfully moved, and returns `false` if\n there was no previous sibling node.\n\n Note, that this function may be slower than\n [`ts_tree_cursor_goto_next_sibling`] due to how node positions are stored. In\n the worst case, this will need to iterate through all the children upto the\n previous sibling node to recalculate its position."] pub fn ts_tree_cursor_goto_previous_sibling(self_: *mut TSTreeCursor) -> bool; } extern "C" { @@ -496,7 +496,7 @@ extern "C" { pub fn ts_tree_cursor_goto_first_child(self_: *mut TSTreeCursor) -> bool; } extern "C" { - #[doc = " Move the cursor to the last child of its current node.\n\n This returns `true` if the cursor successfully moved, and returns `false` if\n there were no children.\n\n Note that this function may be slower than `ts_tree_cursor_goto_first_child`\n because it needs to iterate through all the children to compute the child's\n position."] + #[doc = " Move the cursor to the last child of its current node.\n\n This returns `true` if the cursor successfully moved, and returns `false` if\n there were no children.\n\n Note that this function may be slower than [`ts_tree_cursor_goto_first_child`]\n because it needs to iterate through all the children to compute the child's\n position."] pub fn ts_tree_cursor_goto_last_child(self_: *mut TSTreeCursor) -> bool; } extern "C" { @@ -528,7 +528,7 @@ extern "C" { pub fn ts_tree_cursor_copy(cursor: *const TSTreeCursor) -> TSTreeCursor; } extern "C" { - #[doc = " Create a new query from a string containing one or more S-expression\n patterns. The query is associated with a particular language, and can\n only be run on syntax nodes parsed with that language.\n\n If all of the given patterns are valid, this returns a `TSQuery`.\n If a pattern is invalid, this returns `NULL`, and provides two pieces\n of information about the problem:\n 1. The byte offset of the error is written to the `error_offset` parameter.\n 2. The type of error is written to the `error_type` parameter."] + #[doc = " Create a new query from a string containing one or more S-expression\n patterns. The query is associated with a particular language, and can\n only be run on syntax nodes parsed with that language.\n\n If all of the given patterns are valid, this returns a [`TSQuery`].\n If a pattern is invalid, this returns `NULL`, and provides two pieces\n of information about the problem:\n 1. The byte offset of the error is written to the `error_offset` parameter.\n 2. The type of error is written to the `error_type` parameter."] pub fn ts_query_new( language: *const TSLanguage, source: *const ::std::os::raw::c_char, @@ -556,7 +556,7 @@ extern "C" { pub fn ts_query_start_byte_for_pattern(self_: *const TSQuery, pattern_index: u32) -> u32; } extern "C" { - #[doc = " Get all of the predicates for the given pattern in the query.\n\n The predicates are represented as a single array of steps. There are three\n types of steps in this array, which correspond to the three legal values for\n the `type` field:\n - `TSQueryPredicateStepTypeCapture` - Steps with this type represent names\n of captures. Their `value_id` can be used with the\n `ts_query_capture_name_for_id` function to obtain the name of the capture.\n - `TSQueryPredicateStepTypeString` - Steps with this type represent literal\n strings. Their `value_id` can be used with the\n `ts_query_string_value_for_id` function to obtain their string value.\n - `TSQueryPredicateStepTypeDone` - Steps with this type are *sentinels*\n that represent the end of an individual predicate. If a pattern has two\n predicates, then there will be two steps with this `type` in the array."] + #[doc = " Get all of the predicates for the given pattern in the query.\n\n The predicates are represented as a single array of steps. There are three\n types of steps in this array, which correspond to the three legal values for\n the `type` field:\n - `TSQueryPredicateStepTypeCapture` - Steps with this type represent names\n of captures. Their `value_id` can be used with the\n [`ts_query_capture_name_for_id`] function to obtain the name of the capture.\n - `TSQueryPredicateStepTypeString` - Steps with this type represent literal\n strings. Their `value_id` can be used with the\n [`ts_query_string_value_for_id`] function to obtain their string value.\n - `TSQueryPredicateStepTypeDone` - Steps with this type are *sentinels*\n that represent the end of an individual predicate. If a pattern has two\n predicates, then there will be two steps with this `type` in the array."] pub fn ts_query_predicates_for_pattern( self_: *const TSQuery, pattern_index: u32, @@ -608,7 +608,7 @@ extern "C" { pub fn ts_query_disable_pattern(self_: *mut TSQuery, pattern_index: u32); } extern "C" { - #[doc = " Create a new cursor for executing a given query.\n\n The cursor stores the state that is needed to iteratively search\n for matches. To use the query cursor, first call `ts_query_cursor_exec`\n to start running a given query on a given syntax node. Then, there are\n two options for consuming the results of the query:\n 1. Repeatedly call `ts_query_cursor_next_match` to iterate over all of the\n *matches* in the order that they were found. Each match contains the\n index of the pattern that matched, and an array of captures. Because\n multiple patterns can match the same set of nodes, one match may contain\n captures that appear *before* some of the captures from a previous match.\n 2. Repeatedly call `ts_query_cursor_next_capture` to iterate over all of the\n individual *captures* in the order that they appear. This is useful if\n don't care about which pattern matched, and just want a single ordered\n sequence of captures.\n\n If you don't care about consuming all of the results, you can stop calling\n `ts_query_cursor_next_match` or `ts_query_cursor_next_capture` at any point.\n You can then start executing another query on another node by calling\n `ts_query_cursor_exec` again."] + #[doc = " Create a new cursor for executing a given query.\n\n The cursor stores the state that is needed to iteratively search\n for matches. To use the query cursor, first call [`ts_query_cursor_exec`]\n to start running a given query on a given syntax node. Then, there are\n two options for consuming the results of the query:\n 1. Repeatedly call [`ts_query_cursor_next_match`] to iterate over all of the\n *matches* in the order that they were found. Each match contains the\n index of the pattern that matched, and an array of captures. Because\n multiple patterns can match the same set of nodes, one match may contain\n captures that appear *before* some of the captures from a previous match.\n 2. Repeatedly call [`ts_query_cursor_next_capture`] to iterate over all of the\n individual *captures* in the order that they appear. This is useful if\n don't care about which pattern matched, and just want a single ordered\n sequence of captures.\n\n If you don't care about consuming all of the results, you can stop calling\n [`ts_query_cursor_next_match`] or [`ts_query_cursor_next_capture`] at any point.\n You can then start executing another query on another node by calling\n [`ts_query_cursor_exec`] again."] pub fn ts_query_cursor_new() -> *mut TSQueryCursor; } extern "C" { @@ -708,15 +708,15 @@ extern "C" { ) -> TSFieldId; } extern "C" { - #[doc = " Check whether the given node type id belongs to named nodes, anonymous nodes,\n or a hidden nodes.\n\n See also `ts_node_is_named`. Hidden nodes are never returned from the API."] + #[doc = " Check whether the given node type id belongs to named nodes, anonymous nodes,\n or a hidden nodes.\n\n See also [`ts_node_is_named`]. Hidden nodes are never returned from the API."] pub fn ts_language_symbol_type(self_: *const TSLanguage, symbol: TSSymbol) -> TSSymbolType; } extern "C" { - #[doc = " Get the ABI version number for this language. This version number is used\n to ensure that languages were generated by a compatible version of\n Tree-sitter.\n\n See also `ts_parser_set_language`."] + #[doc = " Get the ABI version number for this language. This version number is used\n to ensure that languages were generated by a compatible version of\n Tree-sitter.\n\n See also [`ts_parser_set_language`]."] pub fn ts_language_version(self_: *const TSLanguage) -> u32; } extern "C" { - #[doc = " Get the next parse state. Combine this with lookahead iterators to generate\n completion suggestions or valid symbols in error nodes. Use\n `ts_node_grammar_symbol` for valid symbols."] + #[doc = " Get the next parse state. Combine this with lookahead iterators to generate\n completion suggestions or valid symbols in error nodes. Use\n [`ts_node_grammar_symbol`] for valid symbols."] pub fn ts_language_next_state( self_: *const TSLanguage, state: TSStateId, @@ -724,7 +724,7 @@ extern "C" { ) -> TSStateId; } extern "C" { - #[doc = " Create a new lookahead iterator for the given language and parse state.\n\n This returns `NULL` if state is invalid for the language.\n\n Repeatedly using `ts_lookahead_iterator_next` and\n `ts_lookahead_iterator_current_symbol` will generate valid symbols in the\n given parse state. Newly created lookahead iterators will contain the `ERROR`\n symbol.\n\n Lookahead iterators can be useful to generate suggestions and improve syntax\n error diagnostics. To get symbols valid in an ERROR node, use the lookahead\n iterator on its first leaf node state. For `MISSING` nodes, a lookahead\n iterator created on the previous non-extra leaf node may be appropriate."] + #[doc = " Create a new lookahead iterator for the given language and parse state.\n\n This returns `NULL` if state is invalid for the language.\n\n Repeatedly using [`ts_lookahead_iterator_next`] and\n [`ts_lookahead_iterator_current_symbol`] will generate valid symbols in the\n given parse state. Newly created lookahead iterators will contain the `ERROR`\n symbol.\n\n Lookahead iterators can be useful to generate suggestions and improve syntax\n error diagnostics. To get symbols valid in an ERROR node, use the lookahead\n iterator on its first leaf node state. For `MISSING` nodes, a lookahead\n iterator created on the previous non-extra leaf node may be appropriate."] pub fn ts_lookahead_iterator_new( self_: *const TSLanguage, state: TSStateId, diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 9072bb60..56093d91 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -172,9 +172,9 @@ const TSLanguage *ts_parser_language(const TSParser *self); * Returns a boolean indicating whether or not the language was successfully * assigned. True means assignment succeeded. False means there was a version * mismatch: the language was generated with an incompatible version of the - * Tree-sitter CLI. Check the language's version using `ts_language_version` - * and compare it to this library's `TREE_SITTER_LANGUAGE_VERSION` and - * `TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION` constants. + * Tree-sitter CLI. Check the language's version using [`ts_language_version`] + * and compare it to this library's [`TREE_SITTER_LANGUAGE_VERSION`] and + * [`TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION`] constants. */ bool ts_parser_set_language(TSParser *self, const TSLanguage *language); @@ -226,35 +226,40 @@ const TSRange *ts_parser_included_ranges( * version of this document and the document has since been edited, pass the * previous syntax tree so that the unchanged parts of it can be reused. * This will save time and memory. For this to work correctly, you must have - * already edited the old syntax tree using the `ts_tree_edit` function in a + * already edited the old syntax tree using the [`ts_tree_edit`] function in a * way that exactly matches the source code changes. * - * The `TSInput` parameter lets you specify how to read the text. It has the + * The [`TSInput`] parameter lets you specify how to read the text. It has the * following three fields: - * 1. `read`: A function to retrieve a chunk of text at a given byte offset + * 1. [`read`]: A function to retrieve a chunk of text at a given byte offset * and (row, column) position. The function should return a pointer to the - * text and write its length to the `bytes_read` pointer. The parser does + * text and write its length to the [`bytes_read`] pointer. The parser does * not take ownership of this buffer; it just borrows it until it has * finished reading it. The function should write a zero value to the - * `bytes_read` pointer to indicate the end of the document. - * 2. `payload`: An arbitrary pointer that will be passed to each invocation - * of the `read` function. - * 3. `encoding`: An indication of how the text is encoded. Either + * [`bytes_read`] pointer to indicate the end of the document. + * 2. [`payload`]: An arbitrary pointer that will be passed to each invocation + * of the [`read`] function. + * 3. [`encoding`]: An indication of how the text is encoded. Either * `TSInputEncodingUTF8` or `TSInputEncodingUTF16`. * * This function returns a syntax tree on success, and `NULL` on failure. There * are three possible reasons for failure: * 1. The parser does not have a language assigned. Check for this using the - `ts_parser_language` function. + [`ts_parser_language`] function. * 2. Parsing was cancelled due to a timeout that was set by an earlier call to - * the `ts_parser_set_timeout_micros` function. You can resume parsing from - * where the parser left out by calling `ts_parser_parse` again with the + * the [`ts_parser_set_timeout_micros`] function. You can resume parsing from + * where the parser left out by calling [`ts_parser_parse`] again with the * same arguments. Or you can start parsing from scratch by first calling - * `ts_parser_reset`. + * [`ts_parser_reset`]. * 3. Parsing was cancelled using a cancellation flag that was set by an - * earlier call to `ts_parser_set_cancellation_flag`. You can resume parsing - * from where the parser left out by calling `ts_parser_parse` again with + * earlier call to [`ts_parser_set_cancellation_flag`]. You can resume parsing + * from where the parser left out by calling [`ts_parser_parse`] again with * the same arguments. + * + * [`read`]: TSInput::read + * [`payload`]: TSInput::payload + * [`encoding`]: TSInput::encoding + * [`bytes_read`]: TSInput::read */ TSTree *ts_parser_parse( TSParser *self, @@ -264,7 +269,7 @@ TSTree *ts_parser_parse( /** * Use the parser to parse some source code stored in one contiguous buffer. - * The first two parameters are the same as in the `ts_parser_parse` function + * The first two parameters are the same as in the [`ts_parser_parse`] function * above. The second two parameters indicate the location of the buffer and its * length in bytes. */ @@ -278,7 +283,7 @@ TSTree *ts_parser_parse_string( /** * Use the parser to parse some source code stored in one contiguous buffer with * a given encoding. The first four parameters work the same as in the - * `ts_parser_parse_string` method above. The final parameter indicates whether + * [`ts_parser_parse_string`] method above. The final parameter indicates whether * the text is encoded as UTF8 or UTF16. */ TSTree *ts_parser_parse_string_encoding( @@ -294,9 +299,9 @@ TSTree *ts_parser_parse_string_encoding( * * If the parser previously failed because of a timeout or a cancellation, then * by default, it will resume where it left off on the next call to - * `ts_parser_parse` or other parsing functions. If you don't want to resume, + * [`ts_parser_parse`] or other parsing functions. If you don't want to resume, * and instead intend to use this parser to parse some other document, you must - * call `ts_parser_reset` first. + * call [`ts_parser_reset`] first. */ void ts_parser_reset(TSParser *self); @@ -305,7 +310,7 @@ void ts_parser_reset(TSParser *self); * take before halting. * * If parsing takes longer than this, it will halt early, returning NULL. - * See `ts_parser_parse` for more information. + * See [`ts_parser_parse`] for more information. */ void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros); @@ -319,7 +324,7 @@ uint64_t ts_parser_timeout_micros(const TSParser *self); * * If a non-null pointer is assigned, then the parser will periodically read * from this pointer during parsing. If it reads a non-zero value, it will - * halt early, returning NULL. See `ts_parser_parse` for more information. + * halt early, returning NULL. See [`ts_parser_parse`] for more information. */ void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag); @@ -409,7 +414,7 @@ void ts_tree_edit(TSTree *self, const TSInputEdit *edit); * * For this to work correctly, the old syntax tree must have been edited such * that its ranges match up to the new tree. Generally, you'll want to call - * this function right after calling one of the `ts_parser_parse` functions. + * this function right after calling one of the [`ts_parser_parse`] functions. * You need to pass the old tree that was passed to parse, as well as the new * tree that was returned from that function. * @@ -455,8 +460,8 @@ const char *ts_node_grammar_type(TSNode self); /** * Get the node's type as a numerical id as it appears in the grammar ignoring - * aliases. This should be used in `ts_language_next_state` instead of - * `ts_node_symbol`. + * aliases. This should be used in [`ts_language_next_state`] instead of + * [`ts_node_symbol`]. */ TSSymbol ts_node_grammar_symbol(TSNode self); @@ -489,8 +494,8 @@ TSPoint ts_node_end_point(TSNode self); char *ts_node_string(TSNode self); /** - * Check if the node is null. Functions like `ts_node_child` and - * `ts_node_next_sibling` will return a null node to indicate that no such node + * Check if the node is null. Functions like [`ts_node_child`] and + * [`ts_node_next_sibling`] will return a null node to indicate that no such node * was found. */ bool ts_node_is_null(TSNode self); @@ -564,14 +569,14 @@ uint32_t ts_node_child_count(TSNode self); /** * Get the node's *named* child at the given index. * - * See also `ts_node_is_named`. + * See also [`ts_node_is_named`]. */ TSNode ts_node_named_child(TSNode self, uint32_t child_index); /** * Get the node's number of *named* children. * - * See also `ts_node_is_named`. + * See also [`ts_node_is_named`]. */ uint32_t ts_node_named_child_count(TSNode self); @@ -588,7 +593,7 @@ TSNode ts_node_child_by_field_name( * Get the node's child with the given numerical field id. * * You can convert a field name to an id using the - * `ts_language_field_id_for_name` function. + * [`ts_language_field_id_for_name`] function. */ TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id); @@ -637,9 +642,9 @@ TSNode ts_node_named_descendant_for_point_range(TSNode self, TSPoint start, TSPo * Edit the node to keep it in-sync with source code that has been edited. * * This function is only rarely needed. When you edit a syntax tree with the - * `ts_tree_edit` function, all of the nodes that you retrieve from the tree - * afterward will already reflect the edit. You only need to use `ts_node_edit` - * when you have a `TSNode` instance that you want to keep and continue to use + * [`ts_tree_edit`] function, all of the nodes that you retrieve from the tree + * afterward will already reflect the edit. You only need to use [`ts_node_edit`] + * when you have a [`TSNode`] instance that you want to keep and continue to use * after an edit. */ void ts_node_edit(TSNode *self, const TSInputEdit *edit); @@ -657,7 +662,7 @@ bool ts_node_eq(TSNode self, TSNode other); * Create a new tree cursor starting from the given node. * * A tree cursor allows you to walk a syntax tree more efficiently than is - * possible using the `TSNode` functions. It is a mutable object that is always + * possible using the [`TSNode`] functions. It is a mutable object that is always * on a certain syntax node, and can be moved imperatively to different nodes. */ TSTreeCursor ts_tree_cursor_new(TSNode node); @@ -675,7 +680,7 @@ void ts_tree_cursor_reset(TSTreeCursor *self, TSNode node); /** * Re-initialize a tree cursor to the same position as another cursor. * - * Unlike `ts_tree_cursor_reset`, this will not lose parent information and + * Unlike [`ts_tree_cursor_reset`], this will not lose parent information and * allows reusing already created cursors. */ void ts_tree_cursor_reset_to(TSTreeCursor *dst, const TSTreeCursor *src); @@ -689,7 +694,7 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *self); * Get the field name of the tree cursor's current node. * * This returns `NULL` if the current node doesn't have a field. - * See also `ts_node_child_by_field_name`. + * See also [`ts_node_child_by_field_name`]. */ const char *ts_tree_cursor_current_field_name(const TSTreeCursor *self); @@ -697,7 +702,7 @@ const char *ts_tree_cursor_current_field_name(const TSTreeCursor *self); * Get the field id of the tree cursor's current node. * * This returns zero if the current node doesn't have a field. - * See also `ts_node_child_by_field_id`, `ts_language_field_id_for_name`. + * See also [`ts_node_child_by_field_id`], [`ts_language_field_id_for_name`]. */ TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *self); @@ -724,7 +729,7 @@ bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self); * there was no previous sibling node. * * Note, that this function may be slower than - * `ts_tree_cursor_goto_next_sibling` due to how node positions are stored. In + * [`ts_tree_cursor_goto_next_sibling`] due to how node positions are stored. In * the worst case, this will need to iterate through all the children upto the * previous sibling node to recalculate its position. */ @@ -744,7 +749,7 @@ bool ts_tree_cursor_goto_first_child(TSTreeCursor *self); * This returns `true` if the cursor successfully moved, and returns `false` if * there were no children. * - * Note that this function may be slower than `ts_tree_cursor_goto_first_child` + * Note that this function may be slower than [`ts_tree_cursor_goto_first_child`] * because it needs to iterate through all the children to compute the child's * position. */ @@ -790,7 +795,7 @@ TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *cursor); * patterns. The query is associated with a particular language, and can * only be run on syntax nodes parsed with that language. * - * If all of the given patterns are valid, this returns a `TSQuery`. + * If all of the given patterns are valid, this returns a [`TSQuery`]. * If a pattern is invalid, this returns `NULL`, and provides two pieces * of information about the problem: * 1. The byte offset of the error is written to the `error_offset` parameter. @@ -832,10 +837,10 @@ uint32_t ts_query_start_byte_for_pattern(const TSQuery *self, uint32_t pattern_i * the `type` field: * - `TSQueryPredicateStepTypeCapture` - Steps with this type represent names * of captures. Their `value_id` can be used with the - * `ts_query_capture_name_for_id` function to obtain the name of the capture. + * [`ts_query_capture_name_for_id`] function to obtain the name of the capture. * - `TSQueryPredicateStepTypeString` - Steps with this type represent literal * strings. Their `value_id` can be used with the - * `ts_query_string_value_for_id` function to obtain their string value. + * [`ts_query_string_value_for_id`] function to obtain their string value. * - `TSQueryPredicateStepTypeDone` - Steps with this type are *sentinels* * that represent the end of an individual predicate. If a pattern has two * predicates, then there will be two steps with this `type` in the array. @@ -915,23 +920,23 @@ void ts_query_disable_pattern(TSQuery *self, uint32_t pattern_index); * Create a new cursor for executing a given query. * * The cursor stores the state that is needed to iteratively search - * for matches. To use the query cursor, first call `ts_query_cursor_exec` + * for matches. To use the query cursor, first call [`ts_query_cursor_exec`] * to start running a given query on a given syntax node. Then, there are * two options for consuming the results of the query: - * 1. Repeatedly call `ts_query_cursor_next_match` to iterate over all of the + * 1. Repeatedly call [`ts_query_cursor_next_match`] to iterate over all of the * *matches* in the order that they were found. Each match contains the * index of the pattern that matched, and an array of captures. Because * multiple patterns can match the same set of nodes, one match may contain * captures that appear *before* some of the captures from a previous match. - * 2. Repeatedly call `ts_query_cursor_next_capture` to iterate over all of the + * 2. Repeatedly call [`ts_query_cursor_next_capture`] to iterate over all of the * individual *captures* in the order that they appear. This is useful if * don't care about which pattern matched, and just want a single ordered * sequence of captures. * * If you don't care about consuming all of the results, you can stop calling - * `ts_query_cursor_next_match` or `ts_query_cursor_next_capture` at any point. + * [`ts_query_cursor_next_match`] or [`ts_query_cursor_next_capture`] at any point. * You can then start executing another query on another node by calling - * `ts_query_cursor_exec` again. + * [`ts_query_cursor_exec`] again. */ TSQueryCursor *ts_query_cursor_new(void); @@ -1052,7 +1057,7 @@ TSFieldId ts_language_field_id_for_name(const TSLanguage *self, const char *name * Check whether the given node type id belongs to named nodes, anonymous nodes, * or a hidden nodes. * - * See also `ts_node_is_named`. Hidden nodes are never returned from the API. + * See also [`ts_node_is_named`]. Hidden nodes are never returned from the API. */ TSSymbolType ts_language_symbol_type(const TSLanguage *self, TSSymbol symbol); @@ -1061,14 +1066,14 @@ TSSymbolType ts_language_symbol_type(const TSLanguage *self, TSSymbol symbol); * to ensure that languages were generated by a compatible version of * Tree-sitter. * - * See also `ts_parser_set_language`. + * See also [`ts_parser_set_language`]. */ uint32_t ts_language_version(const TSLanguage *self); /** * Get the next parse state. Combine this with lookahead iterators to generate * completion suggestions or valid symbols in error nodes. Use - * `ts_node_grammar_symbol` for valid symbols. + * [`ts_node_grammar_symbol`] for valid symbols. */ TSStateId ts_language_next_state(const TSLanguage *self, TSStateId state, TSSymbol symbol); @@ -1081,8 +1086,8 @@ TSStateId ts_language_next_state(const TSLanguage *self, TSStateId state, TSSymb * * This returns `NULL` if state is invalid for the language. * - * Repeatedly using `ts_lookahead_iterator_next` and - * `ts_lookahead_iterator_current_symbol` will generate valid symbols in the + * Repeatedly using [`ts_lookahead_iterator_next`] and + * [`ts_lookahead_iterator_current_symbol`] will generate valid symbols in the * given parse state. Newly created lookahead iterators will contain the `ERROR` * symbol. * From 09ac28c77d216964636ea054ba76bcf96a670933 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Fri, 18 Aug 2023 19:45:00 -0400 Subject: [PATCH 322/347] feat!: properly handle predicates used on quantified captures --- cli/src/tests/query_test.rs | 83 +++++++++++++++++++++++ lib/binding_rust/lib.rs | 132 ++++++++++++++++++++++-------------- lib/binding_web/binding.js | 66 +++++++++++++++--- 3 files changed, 218 insertions(+), 63 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 94d5ca97..34cf40a9 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -4574,6 +4574,89 @@ fn test_capture_quantifiers() { }); } +#[test] +fn test_query_quantified_captures() { + struct Row { + description: &'static str, + language: Language, + code: &'static str, + pattern: &'static str, + captures: &'static [(&'static str, &'static str)], + } + + // #[rustfmt::skip] + let rows = &[ + Row { + description: "doc comments where all must match the prefiix", + language: get_language("c"), + code: indoc! {" + /// foo + /// bar + /// baz + + void main() {} + + /// qux + /// quux + // quuz + "}, + pattern: r#" + ((comment)+ @comment.documentation + (#match? @comment.documentation "^///")) + "#, + captures: &[ + ("comment.documentation", "/// foo"), + ("comment.documentation", "/// bar"), + ("comment.documentation", "/// baz"), + ], + }, + Row { + description: "doc comments where one must match the prefix", + language: get_language("c"), + code: indoc! {" + /// foo + /// bar + /// baz + + void main() {} + + /// qux + /// quux + // quuz + "}, + pattern: r#" + ((comment)+ @comment.documentation + (#any-match? @comment.documentation "^///")) + "#, + captures: &[ + ("comment.documentation", "/// foo"), + ("comment.documentation", "/// bar"), + ("comment.documentation", "/// baz"), + ("comment.documentation", "/// qux"), + ("comment.documentation", "/// quux"), + ("comment.documentation", "// quuz"), + ], + }, + ]; + + allocations::record(|| { + for row in rows { + eprintln!(" quantified query example: {:?}", row.description); + + let mut parser = Parser::new(); + parser.set_language(row.language).unwrap(); + let tree = parser.parse(row.code, None).unwrap(); + + let query = Query::new(row.language, row.pattern).unwrap(); + + let mut cursor = QueryCursor::new(); + let matches = cursor.captures(&query, tree.root_node(), row.code.as_bytes()); + + assert_eq!(collect_captures(matches, &query, row.code), row.captures); + } + }); +} + #[test] fn test_query_max_start_depth() { struct Row { diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 9cd04563..8762c7f6 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -118,7 +118,7 @@ pub struct Query { ptr: NonNull, capture_names: Vec, capture_quantifiers: Vec>, - text_predicates: Vec>, + text_predicates: Vec>, property_settings: Vec>, property_predicates: Vec>, general_predicates: Vec>, @@ -250,11 +250,16 @@ pub enum QueryErrorKind { } #[derive(Debug)] -enum TextPredicate { - CaptureEqString(u32, String, bool), - CaptureEqCapture(u32, u32, bool), - CaptureMatchString(u32, regex::bytes::Regex, bool), - CaptureAnyString(u32, Vec, bool), +/// The first item is the capture index +/// The next is capture specific, depending on what item is expected +/// The first bool is if the capture is positive +/// The last item is a bool signifying whether or not it's meant to match +/// any or all captures +enum TextPredicateCapture { + EqString(u32, String, bool, bool), + EqCapture(u32, u32, bool, bool), + MatchString(u32, regex::bytes::Regex, bool, bool), + AnyString(u32, Vec, bool), } // TODO: Remove this struct at at some point. If `core::str::lossy::Utf8Lossy` @@ -1733,7 +1738,7 @@ impl Query { // Build a predicate for each of the known predicate function names. let operator_name = &string_values[p[0].value_id as usize]; match operator_name.as_str() { - "eq?" | "not-eq?" => { + "eq?" | "not-eq?" | "any-eq?" | "any-not-eq?" => { if p.len() != 3 { return Err(predicate_error( row, @@ -1750,23 +1755,30 @@ impl Query { ))); } - let is_positive = operator_name == "eq?"; + let is_positive = operator_name == "eq?" || operator_name == "any-eq?"; + let match_all = match operator_name.as_str() { + "eq?" | "not-eq?" => true, + "any-eq?" | "any-not-eq?" => false, + _ => unreachable!(), + }; text_predicates.push(if p[2].type_ == type_capture { - TextPredicate::CaptureEqCapture( + TextPredicateCapture::EqCapture( p[1].value_id, p[2].value_id, is_positive, + match_all, ) } else { - TextPredicate::CaptureEqString( + TextPredicateCapture::EqString( p[1].value_id, string_values[p[2].value_id as usize].clone(), is_positive, + match_all, ) }); } - "match?" | "not-match?" => { + "match?" | "not-match?" | "any-match?" | "any-not-match?" => { if p.len() != 3 { return Err(predicate_error(row, format!( "Wrong number of arguments to #match? predicate. Expected 2, got {}.", @@ -1786,20 +1798,27 @@ impl Query { ))); } - let is_positive = operator_name == "match?"; + let is_positive = + operator_name == "match?" || operator_name == "any-match?"; + let match_all = match operator_name.as_str() { + "match?" | "not-match?" => true, + "any-match?" | "any-not-match?" => false, + _ => unreachable!(), + }; let regex = &string_values[p[2].value_id as usize]; - text_predicates.push(TextPredicate::CaptureMatchString( + text_predicates.push(TextPredicateCapture::MatchString( p[1].value_id, regex::bytes::Regex::new(regex).map_err(|_| { predicate_error(row, format!("Invalid regex '{}'", regex)) })?, is_positive, + match_all, )); } "set!" => property_settings.push(Self::parse_property( row, - &operator_name, + operator_name, &result.capture_names, &string_values, &p[1..], @@ -1808,7 +1827,7 @@ impl Query { "is?" | "is-not?" => property_predicates.push(( Self::parse_property( row, - &operator_name, + operator_name, &result.capture_names, &string_values, &p[1..], @@ -1841,7 +1860,7 @@ impl Query { } values.push(string_values[arg.value_id as usize].clone()); } - text_predicates.push(TextPredicate::CaptureAnyString( + text_predicates.push(TextPredicateCapture::AnyString( p[1].value_id, values, is_positive, @@ -2203,7 +2222,7 @@ impl<'tree> QueryMatch<'_, 'tree> { ) -> impl Iterator> + '_ { self.captures .iter() - .filter_map(move |capture| (capture.index == capture_ix).then(|| capture.node)) + .filter_map(move |capture| (capture.index == capture_ix).then_some(capture.node)) } fn new(m: ffi::TSQueryMatch, cursor: *mut ffi::TSQueryCursor) -> Self { @@ -2266,52 +2285,61 @@ impl<'tree> QueryMatch<'_, 'tree> { query.text_predicates[self.pattern_index] .iter() .all(|predicate| match predicate { - TextPredicate::CaptureEqCapture(i, j, is_positive) => { - let node1 = self.nodes_for_capture_index(*i).next(); - let node2 = self.nodes_for_capture_index(*j).next(); - match (node1, node2) { - (Some(node1), Some(node2)) => { - let mut text1 = text_provider.text(node1); - let mut text2 = text_provider.text(node2); - let text1 = node_text1.get_text(&mut text1); - let text2 = node_text2.get_text(&mut text2); - (text1 == text2) == *is_positive + TextPredicateCapture::EqCapture(i, j, is_positive, match_all_nodes) => { + let mut nodes_1 = self.nodes_for_capture_index(*i); + let mut nodes_2 = self.nodes_for_capture_index(*j); + while let (Some(node1), Some(node2)) = (nodes_1.next(), nodes_2.next()) { + let mut text1 = text_provider.text(node1); + let mut text2 = text_provider.text(node2); + let text1 = node_text1.get_text(&mut text1); + let text2 = node_text2.get_text(&mut text2); + if (text1 == text2) != *is_positive && *match_all_nodes { + return false; + } + if (text1 == text2) == *is_positive && !*match_all_nodes { + return true; } - _ => true, } + nodes_1.next().is_none() && nodes_2.next().is_none() } - TextPredicate::CaptureEqString(i, s, is_positive) => { - let node = self.nodes_for_capture_index(*i).next(); - match node { - Some(node) => { - let mut text = text_provider.text(node); - let text = node_text1.get_text(&mut text); - (text == s.as_bytes()) == *is_positive + TextPredicateCapture::EqString(i, s, is_positive, match_all_nodes) => { + let nodes = self.nodes_for_capture_index(*i); + for node in nodes { + let mut text = text_provider.text(node); + let text = node_text1.get_text(&mut text); + if (text == s.as_bytes()) != *is_positive && *match_all_nodes { + return false; + } + if (text == s.as_bytes()) == *is_positive && !*match_all_nodes { + return true; } - None => true, } + true } - TextPredicate::CaptureMatchString(i, r, is_positive) => { - let node = self.nodes_for_capture_index(*i).next(); - match node { - Some(node) => { - let mut text = text_provider.text(node); - let text = node_text1.get_text(&mut text); - r.is_match(text) == *is_positive + TextPredicateCapture::MatchString(i, r, is_positive, match_all_nodes) => { + let nodes = self.nodes_for_capture_index(*i); + for node in nodes { + let mut text = text_provider.text(node); + let text = node_text1.get_text(&mut text); + if (r.is_match(text)) != *is_positive && *match_all_nodes { + return false; + } + if (r.is_match(text)) == *is_positive && !*match_all_nodes { + return true; } - None => true, } + true } - TextPredicate::CaptureAnyString(i, v, is_positive) => { - let node = self.nodes_for_capture_index(*i).next(); - match node { - Some(node) => { - let mut text = text_provider.text(node); - let text = node_text1.get_text(&mut text); - v.iter().any(|s| text == s.as_bytes()) == *is_positive + TextPredicateCapture::AnyString(i, v, is_positive) => { + let nodes = self.nodes_for_capture_index(*i); + for node in nodes { + let mut text = text_provider.text(node); + let text = node_text1.get_text(&mut text); + if (v.iter().any(|s| text == s.as_bytes())) != *is_positive { + return false; } - None => true, } + true } }) } diff --git a/lib/binding_web/binding.js b/lib/binding_web/binding.js index 8443bf25..0ba30106 100644 --- a/lib/binding_web/binding.js +++ b/lib/binding_web/binding.js @@ -841,7 +841,13 @@ class Language { } const operator = steps[0].value; let isPositive = true; + let matchAll = true; switch (operator) { + case 'any-not-eq?': + isPositive = false; + matchAll = false; + case 'any-eq?': + matchAll = false; case 'not-eq?': isPositive = false; case 'eq?': @@ -855,28 +861,36 @@ class Language { const captureName1 = steps[1].name; const captureName2 = steps[2].name; textPredicates[i].push(function(captures) { - let node1, node2 + let nodes_1 = []; + let nodes_2 = []; for (const c of captures) { - if (c.name === captureName1) node1 = c.node; - if (c.name === captureName2) node2 = c.node; + if (c.name === captureName1) nodes_1.push(c.node); + if (c.name === captureName2) nodes_2.push(c.node); } - if(node1 === undefined || node2 === undefined) return true; - return (node1.text === node2.text) === isPositive; + return matchAll + ? nodes_1.every(n1 => nodes_2.some(n2 => n1.text === n2.text)) === isPositive + : nodes_1.some(n1 => nodes_2.some(n2 => n1.text === n2.text)) === isPositive; }); } else { const captureName = steps[1].name; const stringValue = steps[2].value; textPredicates[i].push(function(captures) { + let nodes = []; for (const c of captures) { - if (c.name === captureName) { - return (c.node.text === stringValue) === isPositive; - }; + if (c.name === captureName) nodes.push(c.node); } - return true; + return matchAll + ? nodes.every(n => n.text === stringValue) === isPositive + : nodes.some(n => n.text === stringValue) === isPositive; }); } break; + case 'not-any-match?': + isPositive = false; + matchAll = false; + case 'any-match?': + matchAll = false; case 'not-match?': isPositive = false; case 'match?': @@ -892,10 +906,14 @@ class Language { const captureName = steps[1].name; const regex = new RegExp(steps[2].value); textPredicates[i].push(function(captures) { + const nodes = []; for (const c of captures) { - if (c.name === captureName) return regex.test(c.node.text) === isPositive; + if (c.name === captureName) nodes.push(c.node.text); } - return true; + if (nodes.length === 0) return !isPositive; + return matchAll + ? nodes.every(text => regex.test(text)) === isPositive + : nodes.some(text => regex.test(text)) === isPositive; }); break; @@ -923,6 +941,32 @@ class Language { properties[i][steps[1].value] = steps[2] ? steps[2].value : null; break; + case 'not-any-of?': + isPositive = false; + case 'any-of?': + if (steps.length < 2) throw new Error( + `Wrong number of arguments to \`#${operator}\` predicate. Expected at least 1. Got ${steps.length - 1}.` + ); + if (steps[1].type !== 'capture') throw new Error( + `First argument of \`#${operator}\` predicate must be a capture. Got "${steps[1].value}".` + ); + for (let i = 2; i < steps.length; i++) { + if (steps[i].type !== 'string') throw new Error( + `Arguments to \`#${operator}\` predicate must be a strings.".` + ); + } + captureName = steps[1].name; + const values = steps.slice(2).map(s => s.value); + textPredicates[i].push(function(captures) { + const nodes = []; + for (const c of captures) { + if (c.name === captureName) nodes.push(c.node.text); + } + if (nodes.length === 0) return !isPositive; + return nodes.every(text => values.includes(text)) === isPositive; + }); + break; + default: predicates[i].push({operator, operands: steps.slice(1)}); } From c5cb27e52256ac61db6324472cf2ad93c059e3b3 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Sat, 19 Aug 2023 00:15:27 -0400 Subject: [PATCH 323/347] docs: improve predicate docs --- cli/src/tests/query_test.rs | 2 +- docs/section-2-using-parsers.md | 144 +++++++++++++++++++++++++++----- 2 files changed, 122 insertions(+), 24 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 34cf40a9..ed1f9e25 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -4587,7 +4587,7 @@ fn test_query_quantified_captures() { // #[rustfmt::skip] let rows = &[ Row { - description: "doc comments where all must match the prefiix", + description: "doc comments where all must match the prefix", language: get_language("c"), code: indoc! {" /// foo diff --git a/docs/section-2-using-parsers.md b/docs/section-2-using-parsers.md index 87c049e7..5106a49c 100644 --- a/docs/section-2-using-parsers.md +++ b/docs/section-2-using-parsers.md @@ -21,21 +21,21 @@ Alternatively, you can incorporate the library in a larger project's build syste **source file:** -* `tree-sitter/lib/src/lib.c` +- `tree-sitter/lib/src/lib.c` **include directories:** -* `tree-sitter/lib/src` -* `tree-sitter/lib/include` +- `tree-sitter/lib/src` +- `tree-sitter/lib/include` ### The Basic Objects There are four main types of objects involved when using Tree-sitter: languages, parsers, syntax trees, and syntax nodes. In C, these are called `TSLanguage`, `TSParser`, `TSTree`, and `TSNode`. -* A `TSLanguage` is an opaque object that defines how to parse a particular programming language. The code for each `TSLanguage` is generated by Tree-sitter. Many languages are already available in separate git repositories within the [Tree-sitter GitHub organization](https://github.com/tree-sitter). See [the next page](./creating-parsers) for how to create new languages. -* A `TSParser` is a stateful object that can be assigned a `TSLanguage` and used to produce a `TSTree` based on some source code. -* A `TSTree` represents the syntax tree of an entire source code file. It contains `TSNode` instances that indicate the structure of the source code. It can also be edited and used to produce a new `TSTree` in the event that the source code changes. -* A `TSNode` represents a single node in the syntax tree. It tracks its start and end positions in the source code, as well as its relation to other nodes like its parent, siblings and children. +- A `TSLanguage` is an opaque object that defines how to parse a particular programming language. The code for each `TSLanguage` is generated by Tree-sitter. Many languages are already available in separate git repositories within the [Tree-sitter GitHub organization](https://github.com/tree-sitter). See [the next page](./creating-parsers) for how to create new languages. +- A `TSParser` is a stateful object that can be assigned a `TSLanguage` and used to produce a `TSTree` based on some source code. +- A `TSTree` represents the syntax tree of an entire source code file. It contains `TSNode` instances that indicate the structure of the source code. It can also be edited and used to produce a new `TSTree` in the event that the source code changes. +- A `TSNode` represents a single node in the syntax tree. It tracks its start and end positions in the source code, as well as its relation to other nodes like its parent, siblings and children. ### An Example Program @@ -629,18 +629,36 @@ The restrictions placed on a pattern by an anchor operator ignore anonymous node #### Predicates -You can also specify arbitrary metadata and conditions associated with a pattern by adding _predicate_ S-expressions anywhere within your pattern. Predicate S-expressions start with a _predicate name_ beginning with a `#` character. After that, they can contain an arbitrary number of `@`-prefixed capture names or strings. +You can also specify arbitrary metadata and conditions associated with a pattern +by adding _predicate_ S-expressions anywhere within your pattern. Predicate S-expressions +start with a _predicate name_ beginning with a `#` character. After that, they can +contain an arbitrary number of `@`-prefixed capture names or strings. -For example, this pattern would match identifier whose names is written in `SCREAMING_SNAKE_CASE`: +Tree-Sitter's CLI supports the following predicates by default: + +##### eq?, not-eq?, any-eq?, any-not-eq? + +This family of predicates allows you to match against a single capture or string +value. + +The first argument must be a capture, but the second can be either a capture to +compare the two captures' text, or a string to compare first capture's text +against. + +The base predicate is "#eq?", but its complement "#not-eq?" can be used to _not_ +match a value. + +Consider the following example targeting C: ```scheme -( - (identifier) @constant - (#match? @constant "^[A-Z][A-Z_]+") -) +((identifier) @variable.builtin + (#eq? @variable.builtin "self")) ``` -And this pattern would match key-value pairs where the `value` is an identifier with the same name as the key: +This pattern would match any identifier that is `self` or `this`. + +And this pattern would match key-value pairs where the `value` is an identifier +with the same name as the key: ```scheme ( @@ -651,7 +669,87 @@ And this pattern would match key-value pairs where the `value` is an identifier ) ``` -_Note_ - Predicates are not handled directly by the Tree-sitter C library. They are just exposed in a structured form so that higher-level code can perform the filtering. However, higher-level bindings to Tree-sitter like [the Rust crate](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust) or the [WebAssembly binding](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web) implement a few common predicates like `#eq?` and `#match?`. +The prefix "any-" is meant for use with quantified captures. Here's +an example finding a segment of empty comments + +```scheme +((comment)+ @comment.empty + (#any-eq? @comment.empty "//")) +``` + +Note that "#any-eq?" will match a quantified capture if +_any_ of the nodes match the predicate, while by default a quantified capture +will only match if _all_ the nodes match the predicate. + +##### match?, not-match?, any-match?, any-not-match? + +These predicates are similar to the eq? predicates, but they use regular expressions +to match against the capture's text. + +The first argument must be a capture, and the second must be a string containing +a regular expression. + +For example, this pattern would match identifier whose name is written in `SCREAMING_SNAKE_CASE`: + +```scheme +((identifier) @constant + (#match? @constant "^[A-Z][A-Z_]+")) +``` + +Here's an example finding potential documentation comments in C + +```scheme +((comment)+ @comment.documentation + (#match? @comment.documentation "^///\s+.*")) +``` + +Here's another example finding Cgo comments to potentially inject with C + +```scheme +((comment)+ @injection.content + . + (import_declaration + (import_spec path: (interpreted_string_literal) @_import_c)) + (#eq? @_import_c "\"C\"") + (#match? @injection.content "^//")) +``` + +##### any-of?, not-any-of? + +The "any-of?" predicate allows you to match a capture against multiple strings, +and will match if the capture's text is equal to any of the strings. + +Consider this example that targets JavaScript: + +```scheme +((identifier) @variable.builtin + (#any-of? @variable.builtin + "arguments" + "module" + "console" + "window" + "document")) +``` + +This will match any of the builtin variables in JavaScript. + +_Note_ — Predicates are not handled directly by the Tree-sitter C library. +They are just exposed in a structured form so that higher-level code can perform +the filtering. However, higher-level bindings to Tree-sitter like +[the Rust Crate](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust) +or the [WebAssembly binding](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web) +do implement a few common predicates like the `#eq?`, `#match?`, and `#any-of?` +predicates explained above. + +To recap about the predicates Tree-Sitter's bindings support: + +- `#eq?` checks for a direct match against a capture or string +- `#match?` checks for a match against a regular expression +- `#any-of?` checks for a match against a list of strings +- Adding `not-` to the beginning of any of these predicates will negate the match +- By default, a quantified capture will only match if _all_ of the nodes match the predicate +- Adding `any-` before the `eq` or `match` predicates will instead match if any of the nodes match the predicate + ### The Query API @@ -723,8 +821,8 @@ The node types file contains an array of objects, each of which describes a part Every object in this array has these two entries: -* `"type"` - A string that indicates which grammar rule the node represents. This corresponds to the `ts_node_type` function described [above](#syntax-nodes). -* `"named"` - A boolean that indicates whether this kind of node corresponds to a rule name in the grammar or just a string literal. See [above](#named-vs-anonymous-nodes) for more info. +- `"type"` - A string that indicates which grammar rule the node represents. This corresponds to the `ts_node_type` function described [above](#syntax-nodes). +- `"named"` - A boolean that indicates whether this kind of node corresponds to a rule name in the grammar or just a string literal. See [above](#named-vs-anonymous-nodes) for more info. Examples: @@ -745,14 +843,14 @@ Together, these two fields constitute a unique identifier for a node type; no tw Many syntax nodes can have _children_. The node type object describes the possible children that a node can have using the following entries: -* `"fields"` - An object that describes the possible [fields](#node-field-names) that the node can have. The keys of this object are field names, and the values are _child type_ objects, described below. -* `"children"` - Another _child type_ object that describes all of the node's possible _named_ children _without_ fields. +- `"fields"` - An object that describes the possible [fields](#node-field-names) that the node can have. The keys of this object are field names, and the values are _child type_ objects, described below. +- `"children"` - Another _child type_ object that describes all of the node's possible _named_ children _without_ fields. A _child type_ object describes a set of child nodes using the following entries: -* `"required"` - A boolean indicating whether there is always _at least one_ node in this set. -* `"multiple"` - A boolean indicating whether there can be _multiple_ nodes in this set. -* `"types"`- An array of objects that represent the possible types of nodes in this set. Each object has two keys: `"type"` and `"named"`, whose meanings are described above. +- `"required"` - A boolean indicating whether there is always _at least one_ node in this set. +- `"multiple"` - A boolean indicating whether there can be _multiple_ nodes in this set. +- `"types"`- An array of objects that represent the possible types of nodes in this set. Each object has two keys: `"type"` and `"named"`, whose meanings are described above. Example with fields: @@ -812,7 +910,7 @@ In Tree-sitter grammars, there are usually certain rules that represent abstract Normally, hidden rules are not mentioned in the node types file, since they don't appear in the syntax tree. But if you add a hidden rule to the grammar's [`supertypes` list](./creating-parsers#the-grammar-dsl), then it _will_ show up in the node types file, with the following special entry: -* `"subtypes"` - An array of objects that specify the _types_ of nodes that this 'supertype' node can wrap. +- `"subtypes"` - An array of objects that specify the _types_ of nodes that this 'supertype' node can wrap. Example: From 09030401d1fb0c93b34352437767b587e6697cfb Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Thu, 31 Aug 2023 17:15:38 -0400 Subject: [PATCH 324/347] fix(node): add `_isalpha` --- lib/binding_web/exports.json | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/binding_web/exports.json b/lib/binding_web/exports.json index 71151b71..ad7a6987 100644 --- a/lib/binding_web/exports.json +++ b/lib/binding_web/exports.json @@ -17,6 +17,7 @@ "__Znwm", "___cxa_atexit", "_abort", + "_isalpha", "_iswalnum", "_iswalpha", "_iswdigit", From ec88699f1c537420c3d75355c95c6c0a9022064c Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Thu, 31 Aug 2023 18:26:22 -0400 Subject: [PATCH 325/347] test(node): update bash test --- lib/binding_web/test/parser-test.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/binding_web/test/parser-test.js b/lib/binding_web/test/parser-test.js index 74c45d96..a25e5dc5 100644 --- a/lib/binding_web/test/parser-test.js +++ b/lib/binding_web/test/parser-test.js @@ -139,7 +139,7 @@ describe("Parser", () => { 'redirect: (file_redirect descriptor: (file_descriptor) destination: (word)) ' + 'redirect: (file_redirect destination: (word)) ' + '(heredoc_body ' + - '(expansion (variable_name))) (heredoc_end))))' + '(expansion (variable_name)) (heredoc_content)) (heredoc_end))))' ); }).timeout(5000); From dc5ec1cf5380910fa6eb71af7c452ddc62582f82 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Fri, 1 Sep 2023 17:01:39 -0400 Subject: [PATCH 326/347] refactor: remove `&Option` where used --- cli/loader/src/lib.rs | 34 +++++++++++++++---------------- cli/src/playground.rs | 14 ++++++------- cli/src/tests/helpers/fixtures.rs | 2 +- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/cli/loader/src/lib.rs b/cli/loader/src/lib.rs index 3022b6b9..1fbb1473 100644 --- a/cli/loader/src/lib.rs +++ b/cli/loader/src/lib.rs @@ -344,7 +344,7 @@ impl Loader { &grammar_json.name, &header_path, &parser_path, - &scanner_path, + scanner_path.as_deref(), ) } @@ -353,7 +353,7 @@ impl Loader { name: &str, header_path: &Path, parser_path: &Path, - scanner_path: &Option, + scanner_path: Option<&Path>, ) -> Result { let mut lib_name = name.to_string(); if self.debug_build { @@ -362,7 +362,7 @@ impl Loader { let mut library_path = self.parser_lib_path.join(lib_name); library_path.set_extension(DYLIB_EXTENSION); - let recompile = needs_recompile(&library_path, &parser_path, &scanner_path) + let recompile = needs_recompile(&library_path, &parser_path, scanner_path) .with_context(|| "Failed to compare source and binary timestamps")?; if recompile { @@ -740,21 +740,21 @@ impl<'a> LanguageConfiguration<'a> { .iter() .filter(|p| p.ends_with("highlights.scm")) .cloned() - .collect(), + .collect::>(), ), Some( paths .iter() .filter(|p| p.ends_with("tags.scm")) .cloned() - .collect(), + .collect::>(), ), Some( paths .iter() .filter(|p| p.ends_with("locals.scm")) .cloned() - .collect(), + .collect::>(), ), ), None => (None, None, None), @@ -764,25 +764,25 @@ impl<'a> LanguageConfiguration<'a> { .get_or_try_init(|| { let (highlights_query, highlight_ranges) = self.read_queries( if highlights_filenames.is_some() { - &highlights_filenames + highlights_filenames.as_deref() } else { - &self.highlights_filenames + self.highlights_filenames.as_deref() }, "highlights.scm", )?; let (injections_query, injection_ranges) = self.read_queries( if injections_filenames.is_some() { - &injections_filenames + injections_filenames.as_deref() } else { - &self.injections_filenames + self.injections_filenames.as_deref() }, "injections.scm", )?; let (locals_query, locals_ranges) = self.read_queries( if locals_filenames.is_some() { - &locals_filenames + locals_filenames.as_deref() } else { - &self.locals_filenames + self.locals_filenames.as_deref() }, "locals.scm", )?; @@ -844,9 +844,9 @@ impl<'a> LanguageConfiguration<'a> { self.tags_config .get_or_try_init(|| { let (tags_query, tags_ranges) = - self.read_queries(&self.tags_filenames, "tags.scm")?; + self.read_queries(self.tags_filenames.as_deref(), "tags.scm")?; let (locals_query, locals_ranges) = - self.read_queries(&self.locals_filenames, "locals.scm")?; + self.read_queries(self.locals_filenames.as_deref(), "locals.scm")?; if tags_query.is_empty() { Ok(None) } else { @@ -900,12 +900,12 @@ impl<'a> LanguageConfiguration<'a> { fn read_queries( &self, - paths: &Option>, + paths: Option<&[String]>, default_path: &str, ) -> Result<(String, Vec<(String, Range)>)> { let mut query = String::new(); let mut path_ranges = Vec::new(); - if let Some(paths) = paths.as_ref() { + if let Some(paths) = paths { for path in paths { let abs_path = self.root_path.join(path); let prev_query_len = query.len(); @@ -930,7 +930,7 @@ impl<'a> LanguageConfiguration<'a> { fn needs_recompile( lib_path: &Path, parser_c_path: &Path, - scanner_path: &Option, + scanner_path: Option<&Path>, ) -> Result { if !lib_path.exists() { return Ok(true); diff --git a/cli/src/playground.rs b/cli/src/playground.rs index 662eb8aa..35314c77 100644 --- a/cli/src/playground.rs +++ b/cli/src/playground.rs @@ -12,7 +12,7 @@ use tiny_http::{Header, Response, Server}; macro_rules! optional_resource { ($name: tt, $path: tt) => { #[cfg(TREE_SITTER_EMBED_WASM_BINDING)] - fn $name(tree_sitter_dir: &Option) -> Cow<'static, [u8]> { + fn $name(tree_sitter_dir: Option<&PathBuf>) -> Cow<'static, [u8]> { if let Some(tree_sitter_dir) = tree_sitter_dir { Cow::Owned(fs::read(tree_sitter_dir.join($path)).unwrap()) } else { @@ -21,7 +21,7 @@ macro_rules! optional_resource { } #[cfg(not(TREE_SITTER_EMBED_WASM_BINDING))] - fn $name(tree_sitter_dir: &Option) -> Cow<'static, [u8]> { + fn $name(tree_sitter_dir: Option<&PathBuf>) -> Cow<'static, [u8]> { if let Some(tree_sitter_dir) = tree_sitter_dir { Cow::Owned(fs::read(tree_sitter_dir.join($path)).unwrap()) } else { @@ -35,7 +35,7 @@ optional_resource!(get_playground_js, "docs/assets/js/playground.js"); optional_resource!(get_lib_js, "lib/binding_web/tree-sitter.js"); optional_resource!(get_lib_wasm, "lib/binding_web/tree-sitter.wasm"); -fn get_main_html(tree_sitter_dir: &Option) -> Cow<'static, [u8]> { +fn get_main_html(tree_sitter_dir: Option<&PathBuf>) -> Cow<'static, [u8]> { if let Some(tree_sitter_dir) = tree_sitter_dir { Cow::Owned(fs::read(tree_sitter_dir.join("cli/src/playground.html")).unwrap()) } else { @@ -63,13 +63,13 @@ pub fn serve(grammar_path: &Path, open_in_browser: bool) -> Result<()> { } let tree_sitter_dir = env::var("TREE_SITTER_BASE_DIR").map(PathBuf::from).ok(); - let main_html = str::from_utf8(&get_main_html(&tree_sitter_dir)) + let main_html = str::from_utf8(&get_main_html(tree_sitter_dir.as_ref())) .unwrap() .replace("THE_LANGUAGE_NAME", &grammar_name) .into_bytes(); - let playground_js = get_playground_js(&tree_sitter_dir); - let lib_js = get_lib_js(&tree_sitter_dir); - let lib_wasm = get_lib_wasm(&tree_sitter_dir); + let playground_js = get_playground_js(tree_sitter_dir.as_ref()); + let lib_js = get_lib_js(tree_sitter_dir.as_ref()); + let lib_wasm = get_lib_wasm(tree_sitter_dir.as_ref()); let html_header = Header::from_str("Content-Type: text/html").unwrap(); let js_header = Header::from_str("Content-Type: application/javascript").unwrap(); diff --git a/cli/src/tests/helpers/fixtures.rs b/cli/src/tests/helpers/fixtures.rs index 5d27329b..0f45ef54 100644 --- a/cli/src/tests/helpers/fixtures.rs +++ b/cli/src/tests/helpers/fixtures.rs @@ -88,7 +88,7 @@ pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> } }); TEST_LOADER - .load_language_from_sources(name, &HEADER_DIR, &parser_c_path, &scanner_path) + .load_language_from_sources(name, &HEADER_DIR, &parser_c_path, scanner_path.as_deref()) .unwrap() } From 055c329a693b1f3b6efd98408e2952d5bc32958b Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Fri, 1 Sep 2023 20:41:42 +0300 Subject: [PATCH 327/347] chore(lib): fix fields naming in QueryPattern to singular --- lib/src/query.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/lib/src/query.c b/lib/src/query.c index e3fd27a0..4e623ae7 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -143,8 +143,8 @@ typedef struct { } PatternEntry; typedef struct { - Slice steps; - Slice predicate_steps; + Slice step; + Slice predicate_step; uint32_t start_byte; bool is_non_local; } QueryPattern; @@ -1782,8 +1782,8 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { // Gather all of the captures that are used in predicates for this pattern. array_clear(&predicate_capture_ids); for ( - unsigned start = pattern->predicate_steps.offset, - end = start + pattern->predicate_steps.length, + unsigned start = pattern->predicate_step.offset, + end = start + pattern->predicate_step.length, j = start; j < end; j++ ) { TSQueryPredicateStep *step = &self->predicate_steps.contents[j]; @@ -1795,8 +1795,8 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { // Find all of the steps that have these captures. for ( - unsigned start = pattern->steps.offset, - end = start + pattern->steps.length, + unsigned start = pattern->step.offset, + end = start + pattern->step.length, j = start; j < end; j++ ) { QueryStep *step = &self->steps.contents[j]; @@ -2711,8 +2711,8 @@ TSQuery *ts_query_new( uint32_t start_step_index = self->steps.size; uint32_t start_predicate_step_index = self->predicate_steps.size; array_push(&self->patterns, ((QueryPattern) { - .steps = (Slice) {.offset = start_step_index}, - .predicate_steps = (Slice) {.offset = start_predicate_step_index}, + .step = (Slice) {.offset = start_step_index}, + .predicate_step = (Slice) {.offset = start_predicate_step_index}, .start_byte = stream_offset(&stream), .is_non_local = false, })); @@ -2721,8 +2721,8 @@ TSQuery *ts_query_new( array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER, false)); QueryPattern *pattern = array_back(&self->patterns); - pattern->steps.length = self->steps.size - start_step_index; - pattern->predicate_steps.length = self->predicate_steps.size - start_predicate_step_index; + pattern->step.length = self->steps.size - start_step_index; + pattern->predicate_step.length = self->predicate_steps.size - start_predicate_step_index; // If any pattern could not be parsed, then report the error information // and terminate. @@ -2865,7 +2865,7 @@ const TSQueryPredicateStep *ts_query_predicates_for_pattern( uint32_t pattern_index, uint32_t *step_count ) { - Slice slice = self->patterns.contents[pattern_index].predicate_steps; + Slice slice = self->patterns.contents[pattern_index].predicate_step; *step_count = slice.length; if (self->predicate_steps.contents == NULL) { return NULL; From 52f7eaff3182a726eb064a91d4e49dfbaecd4ee3 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Fri, 1 Sep 2023 05:33:52 +0300 Subject: [PATCH 328/347] chore(rust): make `Query` extra predicates state fully immutable --- cli/loader/src/lib.rs | 4 +- cli/src/tests/helpers/query_helpers.rs | 2 +- cli/src/tests/query_test.rs | 11 +- highlight/src/lib.rs | 9 +- lib/binding_rust/lib.rs | 146 ++++++++++++++----------- tags/src/lib.rs | 2 +- 6 files changed, 96 insertions(+), 78 deletions(-) diff --git a/cli/loader/src/lib.rs b/cli/loader/src/lib.rs index 1fbb1473..d260a96c 100644 --- a/cli/loader/src/lib.rs +++ b/cli/loader/src/lib.rs @@ -828,8 +828,8 @@ impl<'a> LanguageConfiguration<'a> { let mut all_highlight_names = self.highlight_names.lock().unwrap(); if self.use_all_highlight_names { for capture_name in result.query.capture_names() { - if !all_highlight_names.contains(capture_name) { - all_highlight_names.push(capture_name.clone()); + if !all_highlight_names.iter().any(|x| x == capture_name) { + all_highlight_names.push(capture_name.to_string()); } } } diff --git a/cli/src/tests/helpers/query_helpers.rs b/cli/src/tests/helpers/query_helpers.rs index a21320b1..4d71dfd0 100644 --- a/cli/src/tests/helpers/query_helpers.rs +++ b/cli/src/tests/helpers/query_helpers.rs @@ -353,7 +353,7 @@ fn format_captures<'a>( captures .map(|capture| { ( - query.capture_names()[capture.index as usize].as_str(), + query.capture_names()[capture.index as usize], capture.node.utf8_text(source.as_bytes()).unwrap(), ) }) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index ed1f9e25..51c783e7 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -2269,7 +2269,7 @@ fn test_query_captures_within_byte_range_assigned_after_iterating() { for (mat, capture_ix) in captures.by_ref().take(5) { let capture = mat.captures[capture_ix as usize]; results.push(( - query.capture_names()[capture.index as usize].as_str(), + query.capture_names()[capture.index as usize], &source[capture.node.byte_range()], )); } @@ -2292,7 +2292,7 @@ fn test_query_captures_within_byte_range_assigned_after_iterating() { for (mat, capture_ix) in captures { let capture = mat.captures[capture_ix as usize]; results.push(( - query.capture_names()[capture.index as usize].as_str(), + query.capture_names()[capture.index as usize], &source[capture.node.byte_range()], )); } @@ -2533,7 +2533,7 @@ fn test_query_matches_with_captured_wildcard_at_root() { .iter() .map(|c| { ( - query.capture_names()[c.index as usize].as_str(), + query.capture_names()[c.index as usize], c.node.kind(), c.node.start_position().row, ) @@ -2934,7 +2934,8 @@ fn test_query_captures_with_predicates() { args: vec![ QueryPredicateArg::Capture(0), QueryPredicateArg::String("omg".to_string().into_boxed_str()), - ], + ] + .into_boxed_slice(), },] ); assert_eq!(query.property_settings(1), &[]); @@ -3826,7 +3827,7 @@ fn test_query_random() { captures: mat .captures .iter() - .map(|c| (query.capture_names()[c.index as usize].as_str(), c.node)) + .map(|c| (query.capture_names()[c.index as usize], c.node)) .collect::>(), }) .collect::>(); diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs index 2903c7c5..2170b07f 100644 --- a/highlight/src/lib.rs +++ b/highlight/src/lib.rs @@ -321,7 +321,7 @@ impl HighlightConfiguration { let mut local_scope_capture_index = None; for (i, name) in query.capture_names().iter().enumerate() { let i = Some(i as u32); - match name.as_str() { + match *name { "injection.content" => injection_content_capture_index = i, "injection.language" => injection_language_capture_index = i, "local.definition" => local_def_capture_index = i, @@ -353,7 +353,7 @@ impl HighlightConfiguration { } /// Get a slice containing all of the highlight names used in the configuration. - pub fn names(&self) -> &[String] { + pub fn names(&self) -> &[&str] { self.query.capture_names() } @@ -399,7 +399,7 @@ impl HighlightConfiguration { // Return the list of this configuration's capture names that are neither present in the // list of predefined 'canonical' names nor start with an underscore (denoting 'private' captures // used as part of capture internals). - pub fn nonconformant_capture_names(&self, capture_names: &HashSet<&str>) -> Vec<&String> { + pub fn nonconformant_capture_names(&self, capture_names: &HashSet<&str>) -> Vec<&str> { let capture_names = if capture_names.is_empty() { &*STANDARD_CAPTURE_NAMES } else { @@ -407,7 +407,8 @@ impl HighlightConfiguration { }; self.names() .iter() - .filter(|&n| !(n.starts_with('_') || capture_names.contains(n.as_str()))) + .filter(|&n| !(n.starts_with('_') || capture_names.contains(n))) + .map(|n| *n) .collect() } } diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 8762c7f6..81204625 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -116,12 +116,12 @@ pub struct TreeCursor<'cursor>(ffi::TSTreeCursor, PhantomData<&'cursor ()>); #[derive(Debug)] pub struct Query { ptr: NonNull, - capture_names: Vec, - capture_quantifiers: Vec>, - text_predicates: Vec>, - property_settings: Vec>, - property_predicates: Vec>, - general_predicates: Vec>, + capture_names: Box<[&'static str]>, + capture_quantifiers: Box<[Box<[CaptureQuantifier]>]>, + text_predicates: Box<[Box<[TextPredicateCapture]>]>, + property_settings: Box<[Box<[QueryProperty]>]>, + property_predicates: Box<[Box<[(QueryProperty, bool)]>]>, + general_predicates: Box<[Box<[QueryPredicate]>]>, } /// A quantifier for captures @@ -171,7 +171,7 @@ pub enum QueryPredicateArg { #[derive(Debug, PartialEq, Eq)] pub struct QueryPredicate { pub operator: Box, - pub args: Vec, + pub args: Box<[QueryPredicateArg]>, } /// A match of a [`Query`] to a particular set of [`Node`]s. @@ -256,10 +256,10 @@ pub enum QueryErrorKind { /// The last item is a bool signifying whether or not it's meant to match /// any or all captures enum TextPredicateCapture { - EqString(u32, String, bool, bool), + EqString(u32, Box, bool, bool), EqCapture(u32, u32, bool, bool), MatchString(u32, regex::bytes::Regex, bool, bool), - AnyString(u32, Vec, bool), + AnyString(u32, Box<[Box]>, bool), } // TODO: Remove this struct at at some point. If `core::str::lossy::Utf8Lossy` @@ -1643,29 +1643,37 @@ impl Query { } #[doc(hidden)] - unsafe fn from_raw_parts(ptr: *mut ffi::TSQuery, source: &str) -> Result { - let string_count = unsafe { ffi::ts_query_string_count(ptr) }; - let capture_count = unsafe { ffi::ts_query_capture_count(ptr) }; - let pattern_count = unsafe { ffi::ts_query_pattern_count(ptr) as usize }; - let mut result = Query { - ptr: unsafe { NonNull::new_unchecked(ptr) }, - capture_names: Vec::with_capacity(capture_count as usize), - capture_quantifiers: Vec::with_capacity(pattern_count as usize), - text_predicates: Vec::with_capacity(pattern_count), - property_predicates: Vec::with_capacity(pattern_count), - property_settings: Vec::with_capacity(pattern_count), - general_predicates: Vec::with_capacity(pattern_count), + unsafe fn from_raw_parts(ptr: *mut ffi::TSQuery, source: &str) -> Result { + let ptr = { + struct TSQueryDrop(*mut ffi::TSQuery); + impl Drop for TSQueryDrop { + fn drop(&mut self) { + unsafe { ffi::ts_query_delete(self.0) } + } + } + TSQueryDrop(ptr) }; + let string_count = unsafe { ffi::ts_query_string_count(ptr.0) }; + let capture_count = unsafe { ffi::ts_query_capture_count(ptr.0) }; + let pattern_count = unsafe { ffi::ts_query_pattern_count(ptr.0) as usize }; + + let mut capture_names = Vec::with_capacity(capture_count as usize); + let mut capture_quantifiers_vec = Vec::with_capacity(pattern_count as usize); + let mut text_predicates_vec = Vec::with_capacity(pattern_count); + let mut property_predicates_vec = Vec::with_capacity(pattern_count); + let mut property_settings_vec = Vec::with_capacity(pattern_count); + let mut general_predicates_vec = Vec::with_capacity(pattern_count); + // Build a vector of strings to store the capture names. for i in 0..capture_count { unsafe { let mut length = 0u32; - let name = - ffi::ts_query_capture_name_for_id(ptr, i, &mut length as *mut u32) as *const u8; + let name = ffi::ts_query_capture_name_for_id(ptr.0, i, &mut length as *mut u32) + as *const u8; let name = slice::from_raw_parts(name, length as usize); let name = str::from_utf8_unchecked(name); - result.capture_names.push(name.to_string()); + capture_names.push(name); } } @@ -1674,11 +1682,11 @@ impl Query { let mut capture_quantifiers = Vec::with_capacity(capture_count as usize); for j in 0..capture_count { unsafe { - let quantifier = ffi::ts_query_capture_quantifier_for_id(ptr, i as u32, j); + let quantifier = ffi::ts_query_capture_quantifier_for_id(ptr.0, i as u32, j); capture_quantifiers.push(quantifier.into()); } } - result.capture_quantifiers.push(capture_quantifiers); + capture_quantifiers_vec.push(capture_quantifiers.into()); } // Build a vector of strings to represent literal values used in predicates. @@ -1686,11 +1694,11 @@ impl Query { .map(|i| unsafe { let mut length = 0u32; let value = - ffi::ts_query_string_value_for_id(ptr, i as u32, &mut length as *mut u32) + ffi::ts_query_string_value_for_id(ptr.0, i as u32, &mut length as *mut u32) as *const u8; let value = slice::from_raw_parts(value, length as usize); let value = str::from_utf8_unchecked(value); - value.to_string() + value }) .collect::>(); @@ -1699,13 +1707,13 @@ impl Query { let predicate_steps = unsafe { let mut length = 0u32; let raw_predicates = - ffi::ts_query_predicates_for_pattern(ptr, i as u32, &mut length as *mut u32); + ffi::ts_query_predicates_for_pattern(ptr.0, i as u32, &mut length as *mut u32); (length > 0) .then(|| slice::from_raw_parts(raw_predicates, length as usize)) .unwrap_or_default() }; - let byte_offset = unsafe { ffi::ts_query_start_byte_for_pattern(ptr, i as u32) }; + let byte_offset = unsafe { ffi::ts_query_start_byte_for_pattern(ptr.0, i as u32) }; let row = source .char_indices() .take_while(|(i, _)| *i < byte_offset as usize) @@ -1730,14 +1738,14 @@ impl Query { row, format!( "Expected predicate to start with a function name. Got @{}.", - result.capture_names[p[0].value_id as usize], + capture_names[p[0].value_id as usize], ), )); } // Build a predicate for each of the known predicate function names. - let operator_name = &string_values[p[0].value_id as usize]; - match operator_name.as_str() { + let operator_name = string_values[p[0].value_id as usize]; + match operator_name { "eq?" | "not-eq?" | "any-eq?" | "any-not-eq?" => { if p.len() != 3 { return Err(predicate_error( @@ -1756,7 +1764,7 @@ impl Query { } let is_positive = operator_name == "eq?" || operator_name == "any-eq?"; - let match_all = match operator_name.as_str() { + let match_all = match operator_name { "eq?" | "not-eq?" => true, "any-eq?" | "any-not-eq?" => false, _ => unreachable!(), @@ -1771,7 +1779,7 @@ impl Query { } else { TextPredicateCapture::EqString( p[1].value_id, - string_values[p[2].value_id as usize].clone(), + string_values[p[2].value_id as usize].to_string().into(), is_positive, match_all, ) @@ -1794,13 +1802,13 @@ impl Query { if p[2].type_ == type_capture { return Err(predicate_error(row, format!( "Second argument to #match? predicate must be a literal. Got capture @{}.", - result.capture_names[p[2].value_id as usize], + capture_names[p[2].value_id as usize], ))); } let is_positive = operator_name == "match?" || operator_name == "any-match?"; - let match_all = match operator_name.as_str() { + let match_all = match operator_name { "match?" | "not-match?" => true, "any-match?" | "any-not-match?" => false, _ => unreachable!(), @@ -1818,8 +1826,8 @@ impl Query { "set!" => property_settings.push(Self::parse_property( row, - operator_name, - &result.capture_names, + &operator_name, + &capture_names, &string_values, &p[1..], )?), @@ -1827,8 +1835,8 @@ impl Query { "is?" | "is-not?" => property_predicates.push(( Self::parse_property( row, - operator_name, - &result.capture_names, + &operator_name, + &capture_names, &string_values, &p[1..], )?, @@ -1855,20 +1863,24 @@ impl Query { if arg.type_ == type_capture { return Err(predicate_error(row, format!( "Arguments to #any-of? predicate must be literals. Got capture @{}.", - result.capture_names[arg.value_id as usize], + capture_names[arg.value_id as usize], ))); } - values.push(string_values[arg.value_id as usize].clone()); + values.push(string_values[arg.value_id as usize]); } text_predicates.push(TextPredicateCapture::AnyString( p[1].value_id, - values, + values + .iter() + .map(|x| x.to_string().into()) + .collect::>() + .into(), is_positive, )); } _ => general_predicates.push(QueryPredicate { - operator: operator_name.clone().into_boxed_str(), + operator: operator_name.to_string().into(), args: p[1..] .iter() .map(|a| { @@ -1876,7 +1888,7 @@ impl Query { QueryPredicateArg::Capture(a.value_id) } else { QueryPredicateArg::String( - string_values[a.value_id as usize].clone().into_boxed_str(), + string_values[a.value_id as usize].to_string().into(), ) } }) @@ -1885,20 +1897,24 @@ impl Query { } } - result - .text_predicates - .push(text_predicates.into_boxed_slice()); - result - .property_predicates - .push(property_predicates.into_boxed_slice()); - result - .property_settings - .push(property_settings.into_boxed_slice()); - result - .general_predicates - .push(general_predicates.into_boxed_slice()); + text_predicates_vec.push(text_predicates.into()); + property_predicates_vec.push(property_predicates.into()); + property_settings_vec.push(property_settings.into()); + general_predicates_vec.push(general_predicates.into()); } + let result = Query { + ptr: unsafe { NonNull::new_unchecked(ptr.0) }, + capture_names: capture_names.into(), + capture_quantifiers: capture_quantifiers_vec.into(), + text_predicates: text_predicates_vec.into(), + property_predicates: property_predicates_vec.into(), + property_settings: property_settings_vec.into(), + general_predicates: general_predicates_vec.into(), + }; + + std::mem::forget(ptr); + Ok(result) } @@ -1924,7 +1940,7 @@ impl Query { } /// Get the names of the captures used in the query. - pub fn capture_names(&self) -> &[String] { + pub fn capture_names(&self) -> &[&str] { &self.capture_names } @@ -1937,7 +1953,7 @@ impl Query { pub fn capture_index_for_name(&self, name: &str) -> Option { self.capture_names .iter() - .position(|n| n == name) + .position(|n| *n == name) .map(|ix| ix as u32) } @@ -2016,8 +2032,8 @@ impl Query { fn parse_property( row: usize, function_name: &str, - capture_names: &[String], - string_values: &[String], + capture_names: &[&str], + string_values: &[&str], args: &[ffi::TSQueryPredicateStep], ) -> Result { if args.len() == 0 || args.len() > 3 { @@ -2050,7 +2066,7 @@ impl Query { } else if key.is_none() { key = Some(&string_values[arg.value_id as usize]); } else if value.is_none() { - value = Some(string_values[arg.value_id as usize].as_str()); + value = Some(string_values[arg.value_id as usize]); } else { return Err(predicate_error( row, @@ -2349,8 +2365,8 @@ impl QueryProperty { pub fn new(key: &str, value: Option<&str>, capture_id: Option) -> Self { QueryProperty { capture_id, - key: key.to_string().into_boxed_str(), - value: value.map(|s| s.to_string().into_boxed_str()), + key: key.to_string().into(), + value: value.map(|s| s.to_string().into()), } } } diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 0cf1bf96..e151e3ee 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -136,7 +136,7 @@ impl TagsConfiguration { let mut local_scope_capture_index = None; let mut local_definition_capture_index = None; for (i, name) in query.capture_names().iter().enumerate() { - match name.as_str() { + match *name { "" => continue, "name" => name_capture_index = Some(i as u32), "ignore" => ignore_capture_index = Some(i as u32), From 08ac19086babd0ea7c4744e0e4b80f1c145b7e6d Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Fri, 1 Sep 2023 20:28:31 +0300 Subject: [PATCH 329/347] chore: simplify test case after query state improvements --- cli/src/tests/query_test.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 51c783e7..5fb33e01 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -3605,12 +3605,7 @@ fn test_query_capture_names() { assert_eq!( query.capture_names(), - &[ - "left-operand".to_string(), - "right-operand".to_string(), - "body".to_string(), - "loop-condition".to_string(), - ] + ["left-operand", "right-operand", "body", "loop-condition"] ); }); } From 67a5dbdd935d284a406e898d074b2cc820a98508 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Sun, 3 Sep 2023 05:52:24 +0300 Subject: [PATCH 330/347] fix: dealloc calls on zero pointers --- cli/src/tests/helpers/allocations.rs | 4 +++- lib/binding_rust/util.rs | 4 +++- lib/src/array.h | 10 ++++++---- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/cli/src/tests/helpers/allocations.rs b/cli/src/tests/helpers/allocations.rs index 2ab80291..0d2331d3 100644 --- a/cli/src/tests/helpers/allocations.rs +++ b/cli/src/tests/helpers/allocations.rs @@ -107,7 +107,9 @@ unsafe extern "C" fn ts_record_calloc(count: usize, size: usize) -> *mut c_void } unsafe extern "C" fn ts_record_realloc(ptr: *mut c_void, size: usize) -> *mut c_void { - record_dealloc(ptr); + if !ptr.is_null() { + record_dealloc(ptr); + } let result = realloc(ptr, size); record_alloc(result); result diff --git a/lib/binding_rust/util.rs b/lib/binding_rust/util.rs index 5eda71f4..d5a73437 100644 --- a/lib/binding_rust/util.rs +++ b/lib/binding_rust/util.rs @@ -37,6 +37,8 @@ impl ExactSizeIterator for CBufferIter {} impl Drop for CBufferIter { fn drop(&mut self) { - unsafe { (FREE_FN)(self.ptr as *mut c_void) }; + if !self.ptr.is_null() { + unsafe { (FREE_FN)(self.ptr as *mut c_void) }; + } } } diff --git a/lib/src/array.h b/lib/src/array.h index e5cd361f..e026f6b2 100644 --- a/lib/src/array.h +++ b/lib/src/array.h @@ -132,10 +132,12 @@ typedef Array(void) VoidArray; #define array__elem_size(self) sizeof(*(self)->contents) static inline void array__delete(VoidArray *self) { - ts_free(self->contents); - self->contents = NULL; - self->size = 0; - self->capacity = 0; + if (self->contents) { + ts_free(self->contents); + self->contents = NULL; + self->size = 0; + self->capacity = 0; + } } static inline void array__erase(VoidArray *self, size_t element_size, From 7f7084c2cb64a1617746ac6c7bbdb773131593a7 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Sun, 3 Sep 2023 05:29:48 +0300 Subject: [PATCH 331/347] chore(test): panic on zero pointer deallocs for alloc tracked scopes --- cli/src/tests/helpers/allocations.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/cli/src/tests/helpers/allocations.rs b/cli/src/tests/helpers/allocations.rs index 0d2331d3..43537633 100644 --- a/cli/src/tests/helpers/allocations.rs +++ b/cli/src/tests/helpers/allocations.rs @@ -83,6 +83,9 @@ fn record_alloc(ptr: *mut c_void) { } fn record_dealloc(ptr: *mut c_void) { + if ptr.is_null() { + panic!("Zero pointer deallocation!"); + } RECORDER.with(|recorder| { if recorder.enabled.load(SeqCst) { recorder @@ -107,11 +110,13 @@ unsafe extern "C" fn ts_record_calloc(count: usize, size: usize) -> *mut c_void } unsafe extern "C" fn ts_record_realloc(ptr: *mut c_void, size: usize) -> *mut c_void { - if !ptr.is_null() { - record_dealloc(ptr); - } let result = realloc(ptr, size); - record_alloc(result); + if ptr.is_null() { + record_alloc(result); + } else if ptr != result { + record_dealloc(ptr); + record_alloc(result); + } result } From 9cc1daafcab760224a767d1dc37b25a5d5344530 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Sun, 3 Sep 2023 06:47:27 +0300 Subject: [PATCH 332/347] chore(ffi): remove enum name prefixes from all C enum values --- cli/src/main.rs | 4 +-- cli/src/parse.rs | 2 +- lib/binding_rust/bindings.rs | 44 ++++++++++++------------- lib/binding_rust/build.rs | 1 + lib/binding_rust/lib.rs | 63 +++++++++++++++++------------------- script/generate-bindings | 1 + 6 files changed, 57 insertions(+), 58 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index fbdb0343..6699d764 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -448,8 +448,8 @@ fn run() -> Result<()> { matches .values_of("encoding") .map_or(Ok(None), |mut e| match e.next() { - Some("utf16") => Ok(Some(ffi::TSInputEncoding_TSInputEncodingUTF16)), - Some("utf8") => Ok(Some(ffi::TSInputEncoding_TSInputEncodingUTF8)), + Some("utf16") => Ok(Some(ffi::TSInputEncodingUTF16)), + Some("utf8") => Ok(Some(ffi::TSInputEncodingUTF8)), Some(_) => Err(anyhow!("Invalid encoding. Expected one of: utf8, utf16")), None => Ok(None), })?; diff --git a/cli/src/parse.rs b/cli/src/parse.rs index 3647c2c4..5b1a4b31 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -88,7 +88,7 @@ pub fn parse_file_at_path(opts: ParseFileOptions) -> Result { } let tree = match opts.encoding { - Some(encoding) if encoding == ffi::TSInputEncoding_TSInputEncodingUTF16 => { + Some(encoding) if encoding == ffi::TSInputEncodingUTF16 => { let source_code_utf16 = source_code .chunks_exact(2) .map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]])) diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index a0e52f6c..c25bc1e4 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -35,12 +35,12 @@ pub struct TSQueryCursor { pub struct TSLookaheadIterator { _unused: [u8; 0], } -pub const TSInputEncoding_TSInputEncodingUTF8: TSInputEncoding = 0; -pub const TSInputEncoding_TSInputEncodingUTF16: TSInputEncoding = 1; +pub const TSInputEncodingUTF8: TSInputEncoding = 0; +pub const TSInputEncodingUTF16: TSInputEncoding = 1; pub type TSInputEncoding = ::std::os::raw::c_uint; -pub const TSSymbolType_TSSymbolTypeRegular: TSSymbolType = 0; -pub const TSSymbolType_TSSymbolTypeAnonymous: TSSymbolType = 1; -pub const TSSymbolType_TSSymbolTypeAuxiliary: TSSymbolType = 2; +pub const TSSymbolTypeRegular: TSSymbolType = 0; +pub const TSSymbolTypeAnonymous: TSSymbolType = 1; +pub const TSSymbolTypeAuxiliary: TSSymbolType = 2; pub type TSSymbolType = ::std::os::raw::c_uint; #[repr(C)] #[derive(Debug, Copy, Clone)] @@ -70,8 +70,8 @@ pub struct TSInput { >, pub encoding: TSInputEncoding, } -pub const TSLogType_TSLogTypeParse: TSLogType = 0; -pub const TSLogType_TSLogTypeLex: TSLogType = 1; +pub const TSLogTypeParse: TSLogType = 0; +pub const TSLogTypeLex: TSLogType = 1; pub type TSLogType = ::std::os::raw::c_uint; #[repr(C)] #[derive(Debug)] @@ -115,11 +115,11 @@ pub struct TSQueryCapture { pub node: TSNode, pub index: u32, } -pub const TSQuantifier_TSQuantifierZero: TSQuantifier = 0; -pub const TSQuantifier_TSQuantifierZeroOrOne: TSQuantifier = 1; -pub const TSQuantifier_TSQuantifierZeroOrMore: TSQuantifier = 2; -pub const TSQuantifier_TSQuantifierOne: TSQuantifier = 3; -pub const TSQuantifier_TSQuantifierOneOrMore: TSQuantifier = 4; +pub const TSQuantifierZero: TSQuantifier = 0; +pub const TSQuantifierZeroOrOne: TSQuantifier = 1; +pub const TSQuantifierZeroOrMore: TSQuantifier = 2; +pub const TSQuantifierOne: TSQuantifier = 3; +pub const TSQuantifierOneOrMore: TSQuantifier = 4; pub type TSQuantifier = ::std::os::raw::c_uint; #[repr(C)] #[derive(Debug)] @@ -129,9 +129,9 @@ pub struct TSQueryMatch { pub capture_count: u16, pub captures: *const TSQueryCapture, } -pub const TSQueryPredicateStepType_TSQueryPredicateStepTypeDone: TSQueryPredicateStepType = 0; -pub const TSQueryPredicateStepType_TSQueryPredicateStepTypeCapture: TSQueryPredicateStepType = 1; -pub const TSQueryPredicateStepType_TSQueryPredicateStepTypeString: TSQueryPredicateStepType = 2; +pub const TSQueryPredicateStepTypeDone: TSQueryPredicateStepType = 0; +pub const TSQueryPredicateStepTypeCapture: TSQueryPredicateStepType = 1; +pub const TSQueryPredicateStepTypeString: TSQueryPredicateStepType = 2; pub type TSQueryPredicateStepType = ::std::os::raw::c_uint; #[repr(C)] #[derive(Debug)] @@ -139,13 +139,13 @@ pub struct TSQueryPredicateStep { pub type_: TSQueryPredicateStepType, pub value_id: u32, } -pub const TSQueryError_TSQueryErrorNone: TSQueryError = 0; -pub const TSQueryError_TSQueryErrorSyntax: TSQueryError = 1; -pub const TSQueryError_TSQueryErrorNodeType: TSQueryError = 2; -pub const TSQueryError_TSQueryErrorField: TSQueryError = 3; -pub const TSQueryError_TSQueryErrorCapture: TSQueryError = 4; -pub const TSQueryError_TSQueryErrorStructure: TSQueryError = 5; -pub const TSQueryError_TSQueryErrorLanguage: TSQueryError = 6; +pub const TSQueryErrorNone: TSQueryError = 0; +pub const TSQueryErrorSyntax: TSQueryError = 1; +pub const TSQueryErrorNodeType: TSQueryError = 2; +pub const TSQueryErrorField: TSQueryError = 3; +pub const TSQueryErrorCapture: TSQueryError = 4; +pub const TSQueryErrorStructure: TSQueryError = 5; +pub const TSQueryErrorLanguage: TSQueryError = 6; pub type TSQueryError = ::std::os::raw::c_uint; extern "C" { #[doc = " Create a new parser."] diff --git a/lib/binding_rust/build.rs b/lib/binding_rust/build.rs index a74bdb27..690d1527 100644 --- a/lib/binding_rust/build.rs +++ b/lib/binding_rust/build.rs @@ -64,6 +64,7 @@ fn generate_bindings() { .allowlist_function("^ts_.*") .allowlist_var("^TREE_SITTER.*") .no_copy(no_copy.join("|")) + .prepend_enum_name(false) .generate() .expect("Failed to generate bindings"); diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 81204625..fdd661dd 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -137,11 +137,11 @@ pub enum CaptureQuantifier { impl From for CaptureQuantifier { fn from(value: ffi::TSQuantifier) -> Self { match value { - ffi::TSQuantifier_TSQuantifierZero => CaptureQuantifier::Zero, - ffi::TSQuantifier_TSQuantifierZeroOrOne => CaptureQuantifier::ZeroOrOne, - ffi::TSQuantifier_TSQuantifierZeroOrMore => CaptureQuantifier::ZeroOrMore, - ffi::TSQuantifier_TSQuantifierOne => CaptureQuantifier::One, - ffi::TSQuantifier_TSQuantifierOneOrMore => CaptureQuantifier::OneOrMore, + ffi::TSQuantifierZero => CaptureQuantifier::Zero, + ffi::TSQuantifierZeroOrOne => CaptureQuantifier::ZeroOrOne, + ffi::TSQuantifierZeroOrMore => CaptureQuantifier::ZeroOrMore, + ffi::TSQuantifierOne => CaptureQuantifier::One, + ffi::TSQuantifierOneOrMore => CaptureQuantifier::OneOrMore, _ => panic!("Unrecognized quantifier: {}", value), } } @@ -312,14 +312,12 @@ impl Language { /// Check if the node type for the given numerical id is named (as opposed /// to an anonymous node type). pub fn node_kind_is_named(&self, id: u16) -> bool { - unsafe { ffi::ts_language_symbol_type(self.0, id) == ffi::TSSymbolType_TSSymbolTypeRegular } + unsafe { ffi::ts_language_symbol_type(self.0, id) == ffi::TSSymbolTypeRegular } } #[doc(alias = "ts_language_symbol_type")] pub fn node_kind_is_visible(&self, id: u16) -> bool { - unsafe { - ffi::ts_language_symbol_type(self.0, id) <= ffi::TSSymbolType_TSSymbolTypeAnonymous - } + unsafe { ffi::ts_language_symbol_type(self.0, id) <= ffi::TSSymbolTypeAnonymous } } /// Get the number of distinct field names in this language. @@ -445,7 +443,7 @@ impl Parser { ) { let callback = (payload as *mut Logger).as_mut().unwrap(); if let Ok(message) = CStr::from_ptr(c_message).to_str() { - let log_type = if c_log_type == ffi::TSLogType_TSLogTypeParse { + let log_type = if c_log_type == ffi::TSLogTypeParse { LogType::Parse } else { LogType::Lex @@ -571,7 +569,7 @@ impl Parser { let c_input = ffi::TSInput { payload: &mut payload as *mut (&mut F, Option) as *mut c_void, read: Some(read::), - encoding: ffi::TSInputEncoding_TSInputEncodingUTF8, + encoding: ffi::TSInputEncodingUTF8, }; let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0.as_ptr()); @@ -627,7 +625,7 @@ impl Parser { let c_input = ffi::TSInput { payload: &mut payload as *mut (&mut F, Option) as *mut c_void, read: Some(read::), - encoding: ffi::TSInputEncoding_TSInputEncodingUTF16, + encoding: ffi::TSInputEncodingUTF16, }; let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0.as_ptr()); @@ -1568,7 +1566,7 @@ impl Query { // On failure, build an error based on the error code and offset. if ptr.is_null() { - if error_type == ffi::TSQueryError_TSQueryErrorLanguage { + if error_type == ffi::TSQueryErrorLanguage { return Err(QueryError { row: 0, column: 0, @@ -1600,18 +1598,16 @@ impl Query { let message; match error_type { // Error types that report names - ffi::TSQueryError_TSQueryErrorNodeType - | ffi::TSQueryError_TSQueryErrorField - | ffi::TSQueryError_TSQueryErrorCapture => { + ffi::TSQueryErrorNodeType | ffi::TSQueryErrorField | ffi::TSQueryErrorCapture => { let suffix = source.split_at(offset).1; let end_offset = suffix .find(|c| !char::is_alphanumeric(c) && c != '_' && c != '-') .unwrap_or(suffix.len()); message = suffix.split_at(end_offset).0.to_string(); kind = match error_type { - ffi::TSQueryError_TSQueryErrorNodeType => QueryErrorKind::NodeType, - ffi::TSQueryError_TSQueryErrorField => QueryErrorKind::Field, - ffi::TSQueryError_TSQueryErrorCapture => QueryErrorKind::Capture, + ffi::TSQueryErrorNodeType => QueryErrorKind::NodeType, + ffi::TSQueryErrorField => QueryErrorKind::Field, + ffi::TSQueryErrorCapture => QueryErrorKind::Capture, _ => unreachable!(), }; } @@ -1624,7 +1620,7 @@ impl Query { "Unexpected EOF".to_string() }; kind = match error_type { - ffi::TSQueryError_TSQueryErrorStructure => QueryErrorKind::Structure, + ffi::TSQueryErrorStructure => QueryErrorKind::Structure, _ => QueryErrorKind::Syntax, }; } @@ -1720,20 +1716,21 @@ impl Query { .filter(|(_, c)| *c == '\n') .count(); - let type_done = ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeDone; - let type_capture = ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeCapture; - let type_string = ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeString; + use ffi::TSQueryPredicateStepType as T; + const TYPE_DONE: T = ffi::TSQueryPredicateStepTypeDone; + const TYPE_CAPTURE: T = ffi::TSQueryPredicateStepTypeCapture; + const TYPE_STRING: T = ffi::TSQueryPredicateStepTypeString; let mut text_predicates = Vec::new(); let mut property_predicates = Vec::new(); let mut property_settings = Vec::new(); let mut general_predicates = Vec::new(); - for p in predicate_steps.split(|s| s.type_ == type_done) { + for p in predicate_steps.split(|s| s.type_ == TYPE_DONE) { if p.is_empty() { continue; } - if p[0].type_ != type_string { + if p[0].type_ != TYPE_STRING { return Err(predicate_error( row, format!( @@ -1756,7 +1753,7 @@ impl Query { ), )); } - if p[1].type_ != type_capture { + if p[1].type_ != TYPE_CAPTURE { return Err(predicate_error(row, format!( "First argument to #eq? predicate must be a capture name. Got literal \"{}\".", string_values[p[1].value_id as usize], @@ -1769,7 +1766,7 @@ impl Query { "any-eq?" | "any-not-eq?" => false, _ => unreachable!(), }; - text_predicates.push(if p[2].type_ == type_capture { + text_predicates.push(if p[2].type_ == TYPE_CAPTURE { TextPredicateCapture::EqCapture( p[1].value_id, p[2].value_id, @@ -1793,13 +1790,13 @@ impl Query { p.len() - 1 ))); } - if p[1].type_ != type_capture { + if p[1].type_ != TYPE_CAPTURE { return Err(predicate_error(row, format!( "First argument to #match? predicate must be a capture name. Got literal \"{}\".", string_values[p[1].value_id as usize], ))); } - if p[2].type_ == type_capture { + if p[2].type_ == TYPE_CAPTURE { return Err(predicate_error(row, format!( "Second argument to #match? predicate must be a literal. Got capture @{}.", capture_names[p[2].value_id as usize], @@ -1850,7 +1847,7 @@ impl Query { p.len() - 1 ))); } - if p[1].type_ != type_capture { + if p[1].type_ != TYPE_CAPTURE { return Err(predicate_error(row, format!( "First argument to #any-of? predicate must be a capture name. Got literal \"{}\".", string_values[p[1].value_id as usize], @@ -1860,7 +1857,7 @@ impl Query { let is_positive = operator_name == "any-of?"; let mut values = Vec::new(); for arg in &p[2..] { - if arg.type_ == type_capture { + if arg.type_ == TYPE_CAPTURE { return Err(predicate_error(row, format!( "Arguments to #any-of? predicate must be literals. Got capture @{}.", capture_names[arg.value_id as usize], @@ -1884,7 +1881,7 @@ impl Query { args: p[1..] .iter() .map(|a| { - if a.type_ == type_capture { + if a.type_ == TYPE_CAPTURE { QueryPredicateArg::Capture(a.value_id) } else { QueryPredicateArg::String( @@ -2052,7 +2049,7 @@ impl Query { let mut value = None; for arg in args { - if arg.type_ == ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeCapture { + if arg.type_ == ffi::TSQueryPredicateStepTypeCapture { if capture_id.is_some() { return Err(predicate_error( row, diff --git a/script/generate-bindings b/script/generate-bindings index 52fc43f3..9ced5712 100755 --- a/script/generate-bindings +++ b/script/generate-bindings @@ -35,5 +35,6 @@ bindgen \ --allowlist-function '^ts_.*' \ --allowlist-var "^TREE_SITTER.*" \ --blocklist-type '^__.*' \ + --no-prepend-enum-name \ --no-copy "$no_copy" \ $header_path > $output_path From 46965770fc554acdda414f70b4a1566e66988a5e Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Wed, 30 Aug 2023 20:03:03 +0300 Subject: [PATCH 333/347] fix(lib): segmentation fault in `ts_node_parse_state` --- lib/src/node.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/src/node.c b/lib/src/node.c index 092e96f8..546b9099 100644 --- a/lib/src/node.c +++ b/lib/src/node.c @@ -489,8 +489,10 @@ TSStateId ts_node_parse_state(TSNode self) { TSStateId ts_node_next_parse_state(TSNode self) { const TSLanguage *language = self.tree->language; uint16_t state = ts_node_parse_state(self); + if (state == TS_TREE_STATE_NONE) { + return TS_TREE_STATE_NONE; + } uint16_t symbol = ts_node_grammar_symbol(self); - return ts_language_next_state(language, state, symbol); } From 6d4aac723f7951dfecedafed5f80e5bc231a5ff0 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Tue, 19 Sep 2023 09:29:13 -0400 Subject: [PATCH 334/347] feat: add some more commonly used functions to exports.json --- lib/binding_web/exports.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/binding_web/exports.json b/lib/binding_web/exports.json index ad7a6987..bc794480 100644 --- a/lib/binding_web/exports.json +++ b/lib/binding_web/exports.json @@ -18,10 +18,13 @@ "___cxa_atexit", "_abort", "_isalpha", + "_isspace", "_iswalnum", "_iswalpha", + "_iswblank", "_iswdigit", "_iswlower", + "_iswupper", "_iswspace", "_memchr", "_memcmp", @@ -31,7 +34,9 @@ "_strlen", "_strcmp", "_strncpy", + "_tolower", "_towupper", + "_stderr", "_ts_init", "_ts_language_field_count", From 8d0997d5b4f38fbd94c188bb24c1fa5c088a3a4b Mon Sep 17 00:00:00 2001 From: dstoc <539597+dstoc@users.noreply.github.com> Date: Mon, 17 Oct 2022 16:53:34 +1100 Subject: [PATCH 335/347] Add towlower to wasm exports tree-sitter/tree-sitter#1906 --- lib/binding_web/exports.json | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/binding_web/exports.json b/lib/binding_web/exports.json index bc794480..dfc9ff54 100644 --- a/lib/binding_web/exports.json +++ b/lib/binding_web/exports.json @@ -35,6 +35,7 @@ "_strcmp", "_strncpy", "_tolower", + "_towlower", "_towupper", "_stderr", From ef9cabd4b5e1eda9d5d3b9377f2bee4e781dafdf Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Wed, 20 Sep 2023 09:47:24 -0400 Subject: [PATCH 336/347] fix: update javascript tests and use cpp/javascript master for fixtures --- cli/src/tests/highlight_test.rs | 42 +++++------ cli/src/tests/parser_test.rs | 12 ++-- cli/src/tests/query_test.rs | 70 +++++++++---------- cli/src/tests/test_highlight_test.rs | 7 +- script/fetch-fixtures | 4 +- .../error_corpus/javascript_errors.txt | 8 +-- 6 files changed, 72 insertions(+), 71 deletions(-) diff --git a/cli/src/tests/highlight_test.rs b/cli/src/tests/highlight_test.rs index e400b047..12c120ab 100644 --- a/cli/src/tests/highlight_test.rs +++ b/cli/src/tests/highlight_test.rs @@ -62,7 +62,7 @@ lazy_static! { fn test_highlighting_javascript() { let source = "const a = function(b) { return b + c; }"; assert_eq!( - &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(), + &to_token_vector(source, &JS_HIGHLIGHT).unwrap(), &[vec![ ("const", vec!["keyword"]), (" ", vec![]), @@ -72,14 +72,14 @@ fn test_highlighting_javascript() { (" ", vec![]), ("function", vec!["keyword"]), ("(", vec!["punctuation.bracket"]), - ("b", vec!["variable.parameter"]), + ("b", vec!["variable"]), (")", vec!["punctuation.bracket"]), (" ", vec![]), ("{", vec!["punctuation.bracket"]), (" ", vec![]), ("return", vec!["keyword"]), (" ", vec![]), - ("b", vec!["variable.parameter"]), + ("b", vec!["variable"]), (" ", vec![]), ("+", vec!["operator"]), (" ", vec![]), @@ -93,7 +93,7 @@ fn test_highlighting_javascript() { #[test] fn test_highlighting_injected_html_in_javascript() { - let source = vec!["const s = html `
${a < b}
`;"].join("\n"); + let source = ["const s = html `
${a < b}
`;"].join("\n"); assert_eq!( &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(), @@ -157,7 +157,7 @@ fn test_highlighting_injected_javascript_in_html_mini() { #[test] fn test_highlighting_injected_javascript_in_html() { - let source = vec![ + let source = [ "", " "].join("\n"); + let source = ["
<% foo() %>
"].join("\n"); assert_eq!( &to_token_vector(&source, &EJS_HIGHLIGHT).unwrap(), @@ -377,7 +377,7 @@ fn test_highlighting_ejs_with_html_and_javascript() { fn test_highlighting_javascript_with_jsdoc() { // Regression test: the middle comment has no highlights. This should not prevent // later injections from highlighting properly. - let source = vec!["a /* @see a */ b; /* nothing */ c; /* @see b */"].join("\n"); + let source = ["a /* @see a */ b; /* nothing */ c; /* @see b */"].join("\n"); assert_eq!( &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(), @@ -405,7 +405,7 @@ fn test_highlighting_javascript_with_jsdoc() { #[test] fn test_highlighting_with_content_children_included() { - let source = vec!["assert!(", " a.b.c() < D::e::()", ");"].join("\n"); + let source = ["assert!(", " a.b.c() < D::e::()", ");"].join("\n"); assert_eq!( &to_token_vector(&source, &RUST_HIGHLIGHT).unwrap(), @@ -483,7 +483,7 @@ fn test_highlighting_cancellation() { #[test] fn test_highlighting_via_c_api() { - let highlights = vec![ + let highlights = [ "class=tag\0", "class=function\0", "class=string\0", @@ -622,11 +622,11 @@ fn test_highlighting_with_all_captures_applied() { [ \"{\" \"}\" \"(\" \")\" ] @punctuation.bracket "}; let mut rust_highlight_reverse = - HighlightConfiguration::new(language, "rust", &highlights_query, "", "", true).unwrap(); + HighlightConfiguration::new(language, "rust", highlights_query, "", "", true).unwrap(); rust_highlight_reverse.configure(&HIGHLIGHT_NAMES); assert_eq!( - &to_token_vector(&source, &rust_highlight_reverse).unwrap(), + &to_token_vector(source, &rust_highlight_reverse).unwrap(), &[[ ("fn", vec!["keyword"]), (" ", vec![]), @@ -743,20 +743,20 @@ fn to_token_vector<'a>( } HighlightEvent::Source { start, end } => { let s = str::from_utf8(&src[start..end]).unwrap(); - for (i, l) in s.split("\n").enumerate() { + for (i, l) in s.split('\n').enumerate() { let l = l.trim_end_matches('\r'); if i > 0 { lines.push(line); line = Vec::new(); } - if l.len() > 0 { + if !l.is_empty() { line.push((l, highlights.clone())); } } } } } - if line.len() > 0 { + if !line.is_empty() { lines.push(line); } Ok(lines) diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index d490b78f..99616f56 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -148,7 +148,7 @@ fn test_parsing_with_custom_utf8_input() { ) ); assert_eq!(root.kind(), "source_file"); - assert_eq!(root.has_error(), false); + assert!(!root.has_error()); assert_eq!(root.child(0).unwrap().kind(), "function_item"); } @@ -187,7 +187,7 @@ fn test_parsing_with_custom_utf16_input() { "(source_file (function_item (visibility_modifier) name: (identifier) parameters: (parameters) body: (block (integer_literal))))" ); assert_eq!(root.kind(), "source_file"); - assert_eq!(root.has_error(), false); + assert!(!root.has_error()); assert_eq!(root.child(0).unwrap().kind(), "function_item"); } @@ -834,7 +834,7 @@ fn test_parsing_with_one_included_range() { concat!( "(program (expression_statement (call_expression ", "function: (member_expression object: (identifier) property: (property_identifier)) ", - "arguments: (arguments (string)))))", + "arguments: (arguments (string (string_fragment))))))", ) ); assert_eq!( @@ -1183,7 +1183,7 @@ fn test_parsing_with_a_newly_included_range() { .set_included_ranges(&[simple_range(range1_start, range1_end)]) .unwrap(); let tree = parser - .parse_with(&mut chunked_input(&source_code, 3), None) + .parse_with(&mut chunked_input(source_code, 3), None) .unwrap(); assert_eq!( tree.root_node().to_sexp(), @@ -1202,7 +1202,7 @@ fn test_parsing_with_a_newly_included_range() { ]) .unwrap(); let tree2 = parser - .parse_with(&mut chunked_input(&source_code, 3), Some(&tree)) + .parse_with(&mut chunked_input(source_code, 3), Some(&tree)) .unwrap(); assert_eq!( tree2.root_node().to_sexp(), @@ -1226,7 +1226,7 @@ fn test_parsing_with_a_newly_included_range() { simple_range(range3_start, range3_end), ]) .unwrap(); - let tree3 = parser.parse(&source_code, Some(&tree)).unwrap(); + let tree3 = parser.parse(source_code, Some(&tree)).unwrap(); assert_eq!( tree3.root_node().to_sexp(), concat!( diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 5fb33e01..13e4f8d0 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -323,16 +323,16 @@ fn test_query_errors_on_impossible_patterns() { assert_eq!( Query::new( js_lang, - "(binary_expression left: (identifier) left: (identifier))" + "(binary_expression left: (expression (identifier)) left: (expression (identifier)))" ), Err(QueryError { kind: QueryErrorKind::Structure, row: 0, - offset: 38, - column: 38, + offset: 51, + column: 51, message: [ - "(binary_expression left: (identifier) left: (identifier))", - " ^" + "(binary_expression left: (expression (identifier)) left: (expression (identifier)))", + " ^", ] .join("\n"), }) @@ -437,19 +437,19 @@ fn test_query_errors_on_impossible_patterns() { Query::new( js_lang, "(if_statement - condition: (parenthesized_expression (_expression) @cond))", + condition: (parenthesized_expression (expression) @cond))", ) .unwrap(); assert_eq!( - Query::new(js_lang, "(if_statement condition: (_expression))",), + Query::new(js_lang, "(if_statement condition: (expression))",), Err(QueryError { kind: QueryErrorKind::Structure, row: 0, offset: 14, column: 14, message: [ - "(if_statement condition: (_expression))", // + "(if_statement condition: (expression))", // " ^", ] .join("\n") @@ -1726,7 +1726,7 @@ fn test_query_matches_with_too_many_permutations_to_track() { collect_matches(matches, &query, source.as_str())[0], (0, vec![("pre", "hello"), ("post", "hello")]), ); - assert_eq!(cursor.did_exceed_match_limit(), true); + assert!(cursor.did_exceed_match_limit()); }); } @@ -1775,7 +1775,7 @@ fn test_query_sibling_patterns_dont_match_children_of_an_error() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); assert_eq!( @@ -1825,7 +1825,7 @@ fn test_query_matches_with_alternatives_and_too_many_permutations_to_track() { collect_matches(matches, &query, source.as_str()), vec![(1, vec![("method", "b")]); 50], ); - assert_eq!(cursor.did_exceed_match_limit(), true); + assert!(cursor.did_exceed_match_limit()); }); } @@ -1956,7 +1956,7 @@ fn test_query_matches_within_byte_range() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); @@ -2086,7 +2086,7 @@ fn test_query_captures_within_byte_range() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let captures = @@ -2122,7 +2122,7 @@ fn test_query_matches_with_unrooted_patterns_intersecting_byte_range() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); // within the type parameter list @@ -2260,14 +2260,14 @@ fn test_query_captures_within_byte_range_assigned_after_iterating() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let mut captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); // Retrieve some captures let mut results = Vec::new(); for (mat, capture_ix) in captures.by_ref().take(5) { - let capture = mat.captures[capture_ix as usize]; + let capture = mat.captures[capture_ix]; results.push(( query.capture_names()[capture.index as usize], &source[capture.node.byte_range()], @@ -2290,7 +2290,7 @@ fn test_query_captures_within_byte_range_assigned_after_iterating() { results.clear(); captures.set_byte_range(source.find("Ok").unwrap()..source.len()); for (mat, capture_ix) in captures { - let capture = mat.captures[capture_ix as usize]; + let capture = mat.captures[capture_ix]; results.push(( query.capture_names()[capture.index as usize], &source[capture.node.byte_range()], @@ -2393,7 +2393,7 @@ fn test_query_matches_different_queries_same_cursor() { let mut cursor = QueryCursor::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let matches = cursor.matches(&query1, tree.root_node(), source.as_bytes()); assert_eq!( @@ -2436,7 +2436,7 @@ fn test_query_matches_with_multiple_captures_on_a_node() { let mut cursor = QueryCursor::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); assert_eq!( @@ -2524,7 +2524,7 @@ fn test_query_matches_with_captured_wildcard_at_root() { let mut parser = Parser::new(); let mut cursor = QueryCursor::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let match_capture_names_and_rows = cursor .matches(&query, tree.root_node(), source.as_bytes()) @@ -2790,7 +2790,7 @@ fn test_query_captures_basic() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); @@ -2873,7 +2873,7 @@ fn test_query_captures_with_text_conditions() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); @@ -3019,7 +3019,7 @@ fn test_query_captures_with_duplicates() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); @@ -3221,11 +3221,11 @@ fn test_query_captures_with_too_many_nested_results() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); cursor.set_match_limit(32); let captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); - let captures = collect_captures(captures, &query, &source); + let captures = collect_captures(captures, &query, source); assert_eq!( &captures[0..4], @@ -3284,7 +3284,7 @@ fn test_query_captures_with_definite_pattern_containing_many_nested_matches() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); @@ -3320,7 +3320,7 @@ fn test_query_captures_ordered_by_both_start_and_end_positions() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); @@ -3361,7 +3361,7 @@ fn test_query_captures_with_matches_removed() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let mut captured_strings = Vec::new(); @@ -3405,7 +3405,7 @@ fn test_query_captures_with_matches_removed_before_they_finish() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let mut captured_strings = Vec::new(); @@ -3447,7 +3447,7 @@ fn test_query_captures_and_matches_iterators_are_fused() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let mut captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); @@ -3521,7 +3521,7 @@ fn test_query_text_callback_returns_chunks() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let captures = cursor.captures(&query, tree.root_node(), |node: Node| { chunks_in_range(node.byte_range()) @@ -3619,7 +3619,7 @@ fn test_query_lifetime_is_separate_from_nodes_lifetime() { let language = get_language("javascript"); let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); fn take_first_node_from_captures<'tree>( source: &str, @@ -3932,10 +3932,10 @@ fn test_query_is_pattern_guaranteed_at_step() { Row { description: "a guaranteed step with a field", language: get_language("javascript"), - pattern: r#"(binary_expression left: (identifier) right: (_))"#, + pattern: r#"(binary_expression left: (expression) right: (_))"#, results_by_substring: &[ ("binary_expression", false), - ("(identifier)", false), + ("(expression)", false), ("(_)", true), ], }, @@ -4002,7 +4002,7 @@ fn test_query_is_pattern_guaranteed_at_step() { "#, results_by_substring: &[ ("identifier", false), - ("property_identifier", true), + ("property_identifier", false), ("[", true), ], }, diff --git a/cli/src/tests/test_highlight_test.rs b/cli/src/tests/test_highlight_test.rs index d9b2c43a..06ad7d59 100644 --- a/cli/src/tests/test_highlight_test.rs +++ b/cli/src/tests/test_highlight_test.rs @@ -12,7 +12,7 @@ fn test_highlight_test_with_basic_test() { Some("injections.scm"), &[ "function".to_string(), - "variable.parameter".to_string(), + "variable".to_string(), "keyword".to_string(), ], ); @@ -22,7 +22,7 @@ fn test_highlight_test_with_basic_test() { " // ^ function", " // ^ keyword", " return d + e;", - " // ^ variable.parameter", + " // ^ variable", " // ^ !variable", "};", ] @@ -35,7 +35,7 @@ fn test_highlight_test_with_basic_test() { &[ Assertion::new(1, 5, false, String::from("function")), Assertion::new(1, 11, false, String::from("keyword")), - Assertion::new(4, 9, false, String::from("variable.parameter")), + Assertion::new(4, 9, false, String::from("variable")), Assertion::new(4, 11, true, String::from("variable")), ] ); @@ -53,6 +53,7 @@ fn test_highlight_test_with_basic_test() { (Point::new(1, 19), Point::new(1, 20), Highlight(1)), // "d" (Point::new(4, 2), Point::new(4, 8), Highlight(2)), // "return" (Point::new(4, 9), Point::new(4, 10), Highlight(1)), // "d" + (Point::new(4, 13), Point::new(4, 14), Highlight(1)), // "e" ] ); } diff --git a/script/fetch-fixtures b/script/fetch-fixtures index eb66d314..1eec16ee 100755 --- a/script/fetch-fixtures +++ b/script/fetch-fixtures @@ -23,12 +23,12 @@ fetch_grammar() { fetch_grammar bash master fetch_grammar c master -fetch_grammar cpp 670404d7c689be1c868a46f919ba2a3912f2b7ef +fetch_grammar cpp master fetch_grammar embedded-template master fetch_grammar go master fetch_grammar html master fetch_grammar java master -fetch_grammar javascript partial-order-precedences +fetch_grammar javascript master fetch_grammar jsdoc master fetch_grammar json master fetch_grammar php master diff --git a/test/fixtures/error_corpus/javascript_errors.txt b/test/fixtures/error_corpus/javascript_errors.txt index 4359ae68..e2f21176 100644 --- a/test/fixtures/error_corpus/javascript_errors.txt +++ b/test/fixtures/error_corpus/javascript_errors.txt @@ -74,8 +74,8 @@ if ({a: 'b'} {c: 'd'}) { (program (if_statement (parenthesized_expression - (ERROR (object (pair (property_identifier) (string)))) - (object (pair (property_identifier) (string)))) + (ERROR (object (pair (property_identifier) (string (string_fragment))))) + (object (pair (property_identifier) (string (string_fragment))))) (statement_block (expression_statement (assignment_expression @@ -178,12 +178,12 @@ function main(x) { (expression_statement (call_expression (member_expression (identifier) (property_identifier)) - (arguments (string)))) + (arguments (string (string_fragment))))) (expression_statement (binary_expression (identifier) (ERROR) (call_expression (member_expression (identifier) (property_identifier)) - (arguments (string))))) + (arguments (string (string_fragment)))))) (return_statement (object))))) From cc6689534323ad0f142250ed553fb6469ff34bb3 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Thu, 21 Sep 2023 00:52:50 -0400 Subject: [PATCH 337/347] perf: cache the current language configuration to lookup later on --- Cargo.toml | 4 +++ cli/loader/src/lib.rs | 64 +++++++++++++++++++++++-------------------- 2 files changed, 39 insertions(+), 29 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 03e24caf..bc2aedaa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,3 +15,7 @@ codegen-units = 1 # Maximum size reduction optimizations. [profile.size] inherits = "release" opt-level = "s" # Optimize for size. + +[profile.profile] +inherits = "release" +strip = false diff --git a/cli/loader/src/lib.rs b/cli/loader/src/lib.rs index d260a96c..ed9c5f39 100644 --- a/cli/loader/src/lib.rs +++ b/cli/loader/src/lib.rs @@ -70,12 +70,12 @@ impl Config { } #[cfg(unix)] -const DYLIB_EXTENSION: &'static str = "so"; +const DYLIB_EXTENSION: &str = "so"; #[cfg(windows)] const DYLIB_EXTENSION: &'static str = "dll"; -const BUILD_TARGET: &'static str = env!("BUILD_TARGET"); +const BUILD_TARGET: &str = env!("BUILD_TARGET"); pub struct LanguageConfiguration<'a> { pub scope: Option, @@ -101,6 +101,7 @@ pub struct Loader { languages_by_id: Vec<(PathBuf, OnceCell)>, language_configurations: Vec>, language_configuration_ids_by_file_type: HashMap>, + language_configuration_in_current_path: Option, highlight_names: Box>>, use_all_highlight_names: bool, debug_build: bool, @@ -127,13 +128,14 @@ impl Loader { languages_by_id: Vec::new(), language_configurations: Vec::new(), language_configuration_ids_by_file_type: HashMap::new(), + language_configuration_in_current_path: None, highlight_names: Box::new(Mutex::new(Vec::new())), use_all_highlight_names: true, debug_build: false, } } - pub fn configure_highlights(&mut self, names: &Vec) { + pub fn configure_highlights(&mut self, names: &[String]) { self.use_all_highlight_names = false; let mut highlights = self.highlight_names.lock().unwrap(); highlights.clear(); @@ -149,8 +151,7 @@ impl Loader { eprintln!("Warning: You have not configured any parser directories!"); eprintln!("Please run `tree-sitter init-config` and edit the resulting"); eprintln!("configuration file to indicate where we should look for"); - eprintln!("language grammars."); - eprintln!(""); + eprintln!("language grammars.\n"); } for parser_container_dir in &config.parser_directories { if let Ok(entries) = fs::read_dir(parser_container_dir) { @@ -160,6 +161,7 @@ impl Loader { if parser_dir_name.starts_with("tree-sitter-") { self.find_language_configurations_at_path( &parser_container_dir.join(parser_dir_name), + false, ) .ok(); } @@ -171,7 +173,7 @@ impl Loader { } pub fn languages_at_path(&mut self, path: &Path) -> Result> { - if let Ok(configurations) = self.find_language_configurations_at_path(path) { + if let Ok(configurations) = self.find_language_configurations_at_path(path, true) { let mut language_ids = configurations .iter() .map(|c| c.language_id) @@ -342,7 +344,7 @@ impl Loader { self.load_language_from_sources( &grammar_json.name, - &header_path, + header_path, &parser_path, scanner_path.as_deref(), ) @@ -362,7 +364,7 @@ impl Loader { let mut library_path = self.parser_lib_path.join(lib_name); library_path.set_extension(DYLIB_EXTENSION); - let recompile = needs_recompile(&library_path, &parser_path, scanner_path) + let recompile = needs_recompile(&library_path, parser_path, scanner_path) .with_context(|| "Failed to compare source and binary timestamps")?; if recompile { @@ -382,7 +384,7 @@ impl Loader { } if compiler.is_like_msvc() { - command.args(&["/nologo", "/LD", "/I"]).arg(header_path); + command.args(["/nologo", "/LD", "/I"]).arg(header_path); if self.debug_build { command.arg("/Od"); } else { @@ -514,24 +516,20 @@ impl Loader { } } - pub fn find_language_configurations_at_path<'a>( - &'a mut self, + pub fn find_language_configurations_at_path( + &mut self, parser_path: &Path, + set_current_path_config: bool, ) -> Result<&[LanguageConfiguration]> { - #[derive(Deserialize)] + #[derive(Default, Deserialize)] #[serde(untagged)] enum PathsJSON { + #[default] Empty, Single(String), Multiple(Vec), } - impl Default for PathsJSON { - fn default() -> Self { - PathsJSON::Empty - } - } - impl PathsJSON { fn into_vec(self) -> Option> { match self { @@ -614,7 +612,7 @@ impl Loader { let configuration = LanguageConfiguration { root_path: parser_path.to_path_buf(), - language_name: grammar_json.name, + language_name: grammar_json.name.clone(), scope: config_json.scope, language_id, file_types: config_json.file_types.unwrap_or(Vec::new()), @@ -627,19 +625,26 @@ impl Loader { highlights_filenames: config_json.highlights.into_vec(), highlight_config: OnceCell::new(), tags_config: OnceCell::new(), - highlight_names: &*self.highlight_names, + highlight_names: &self.highlight_names, use_all_highlight_names: self.use_all_highlight_names, }; for file_type in &configuration.file_types { self.language_configuration_ids_by_file_type .entry(file_type.to_string()) - .or_insert(Vec::new()) + .or_default() .push(self.language_configurations.len()); } self.language_configurations .push(unsafe { mem::transmute(configuration) }); + + if set_current_path_config + && self.language_configuration_in_current_path.is_none() + { + self.language_configuration_in_current_path = + Some(self.language_configurations.len() - 1); + } } } } @@ -668,7 +673,7 @@ impl Loader { tags_filenames: None, highlight_config: OnceCell::new(), tags_config: OnceCell::new(), - highlight_names: &*self.highlight_names, + highlight_names: &self.highlight_names, use_all_highlight_names: self.use_all_highlight_names, }; self.language_configurations @@ -693,11 +698,11 @@ impl Loader { if let Some(scope) = scope { if let Some(config) = self .language_configuration_for_scope(scope) - .with_context(|| format!("Failed to load language for scope '{}'", scope))? + .with_context(|| format!("Failed to load language for scope '{scope}'"))? { Ok(config.0) } else { - return Err(anyhow!("Unknown scope '{}'", scope)); + Err(anyhow!("Unknown scope '{scope}'")) } } else if let Some((lang, _)) = self .language_configuration_for_file_name(path) @@ -709,8 +714,10 @@ impl Loader { })? { Ok(lang) + } else if let Some(id) = self.language_configuration_in_current_path { + Ok(self.language_for_id(self.language_configurations[id].language_id)?) } else if let Some(lang) = self - .languages_at_path(¤t_dir) + .languages_at_path(current_dir) .with_context(|| "Failed to load language in current directory")? .first() .cloned() @@ -833,7 +840,7 @@ impl<'a> LanguageConfiguration<'a> { } } } - result.configure(&all_highlight_names.as_slice()); + result.configure(all_highlight_names.as_slice()); Ok(Some(result)) } }) @@ -869,7 +876,6 @@ impl<'a> LanguageConfiguration<'a> { locals_query.len(), ) } - .into() } else { error.into() } @@ -879,9 +885,9 @@ impl<'a> LanguageConfiguration<'a> { .map(Option::as_ref) } - fn include_path_in_query_error<'b>( + fn include_path_in_query_error( mut error: QueryError, - ranges: &'b Vec<(String, Range)>, + ranges: &[(String, Range)], source: &str, start_offset: usize, ) -> Error { From dd52cafdd9537c93bb2d77d68385dab47934d9f5 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Thu, 21 Sep 2023 11:28:22 +0300 Subject: [PATCH 338/347] chore: switch fetch-fixtures.cmd to all master branches --- script/fetch-fixtures.cmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/script/fetch-fixtures.cmd b/script/fetch-fixtures.cmd index 5e8b6a16..32727b0c 100644 --- a/script/fetch-fixtures.cmd +++ b/script/fetch-fixtures.cmd @@ -2,12 +2,12 @@ call:fetch_grammar bash master call:fetch_grammar c master -call:fetch_grammar cpp 670404d7c689be1c868a46f919ba2a3912f2b7ef +call:fetch_grammar cpp master call:fetch_grammar embedded-template master call:fetch_grammar go master call:fetch_grammar html master call:fetch_grammar java master -call:fetch_grammar javascript partial-order-precedences +call:fetch_grammar javascript master call:fetch_grammar jsdoc master call:fetch_grammar json master call:fetch_grammar php master From 82ddb3ddcc30dd557695d02b189ca17ef1993374 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Thu, 5 Oct 2023 07:59:35 +0300 Subject: [PATCH 339/347] cicd: add skips for fragile corpus tests --- cli/src/tests/corpus_test.rs | 59 ++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/cli/src/tests/corpus_test.rs b/cli/src/tests/corpus_test.rs index 6b2f2a20..ce89743b 100644 --- a/cli/src/tests/corpus_test.rs +++ b/cli/src/tests/corpus_test.rs @@ -14,71 +14,81 @@ use crate::{ test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry}, util, }; -use std::{env, fs}; +use std::{collections::HashSet, env, fs}; use tree_sitter::{LogType, Node, Parser, Point, Range, Tree}; use tree_sitter_proc_macro::test_with_seed; #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_bash(seed: usize) { - test_language_corpus(seed, "bash"); + test_language_corpus( + "bash", + seed, + Some(&[ + // Fragile tests where edit customization changes + // lead to significant parse tree structure changes. + "bash - corpus - commands - Nested Heredocs", + "bash - corpus - commands - Quoted Heredocs", + "bash - corpus - commands - Heredocs with weird characters", + ]), + ); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_c(seed: usize) { - test_language_corpus(seed, "c"); + test_language_corpus("c", seed, None); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_cpp(seed: usize) { - test_language_corpus(seed, "cpp"); + test_language_corpus("cpp", seed, None); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_embedded_template(seed: usize) { - test_language_corpus(seed, "embedded-template"); + test_language_corpus("embedded-template", seed, None); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_go(seed: usize) { - test_language_corpus(seed, "go"); + test_language_corpus("go", seed, None); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_html(seed: usize) { - test_language_corpus(seed, "html"); + test_language_corpus("html", seed, None); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_javascript(seed: usize) { - test_language_corpus(seed, "javascript"); + test_language_corpus("javascript", seed, None); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_json(seed: usize) { - test_language_corpus(seed, "json"); + test_language_corpus("json", seed, None); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_php(seed: usize) { - test_language_corpus(seed, "php"); + test_language_corpus("php", seed, None); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_python(seed: usize) { - test_language_corpus(seed, "python"); + test_language_corpus("python", seed, None); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_ruby(seed: usize) { - test_language_corpus(seed, "ruby"); + test_language_corpus("ruby", seed, None); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_rust(seed: usize) { - test_language_corpus(seed, "rust"); + test_language_corpus("rust", seed, None); } -fn test_language_corpus(start_seed: usize, language_name: &str) { +fn test_language_corpus(language_name: &str, start_seed: usize, skipped: Option<&[&str]>) { let grammars_dir = fixtures_dir().join("grammars"); let error_corpus_dir = fixtures_dir().join("error_corpus"); let template_corpus_dir = fixtures_dir().join("template_corpus"); @@ -100,6 +110,8 @@ fn test_language_corpus(start_seed: usize, language_name: &str) { t })); + let skipped = skipped.map(|x| HashSet::<&str>::from_iter(x.iter().map(|x| *x))); + let language = get_language(language_name); let mut failure_count = 0; @@ -112,7 +124,14 @@ fn test_language_corpus(start_seed: usize, language_name: &str) { println!(); for (test_index, test) in tests.iter().enumerate() { - let test_name = format!("{language_name} example - {}", test.name); + let test_name = format!("{language_name} - {}", test.name); + + if let Some(skipped) = skipped.as_ref() { + if skipped.contains(test_name.as_str()) { + println!(" {test_index}. {test_name} - SKIPPED"); + continue; + } + } println!(" {test_index}. {test_name}"); @@ -129,10 +148,7 @@ fn test_language_corpus(start_seed: usize, language_name: &str) { } if actual_output != test.output { - println!( - "Incorrect initial parse for {} - {}", - language_name, test.name, - ); + println!("Incorrect initial parse for {test_name}"); print_diff_key(); print_diff(&actual_output, &test.output); println!(""); @@ -219,10 +235,7 @@ fn test_language_corpus(start_seed: usize, language_name: &str) { } if actual_output != test.output { - println!( - "Incorrect parse for {} - {} - seed {}", - language_name, test.name, seed - ); + println!("Incorrect parse for {test_name} - seed {seed}"); print_diff_key(); print_diff(&actual_output, &test.output); println!(""); From d95836eb35872d7eec4f3b7a4e1dd709a814391a Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Thu, 5 Oct 2023 14:39:36 +0300 Subject: [PATCH 340/347] cicd: add en extra check for non matchable skips --- cli/src/tests/corpus_test.rs | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/cli/src/tests/corpus_test.rs b/cli/src/tests/corpus_test.rs index ce89743b..8a4c54cc 100644 --- a/cli/src/tests/corpus_test.rs +++ b/cli/src/tests/corpus_test.rs @@ -14,7 +14,7 @@ use crate::{ test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry}, util, }; -use std::{collections::HashSet, env, fs}; +use std::{collections::HashMap, env, fs}; use tree_sitter::{LogType, Node, Parser, Point, Range, Tree}; use tree_sitter_proc_macro::test_with_seed; @@ -110,7 +110,7 @@ fn test_language_corpus(language_name: &str, start_seed: usize, skipped: Option< t })); - let skipped = skipped.map(|x| HashSet::<&str>::from_iter(x.iter().map(|x| *x))); + let mut skipped = skipped.map(|x| HashMap::<&str, usize>::from_iter(x.iter().map(|x| (*x, 0)))); let language = get_language(language_name); let mut failure_count = 0; @@ -125,10 +125,10 @@ fn test_language_corpus(language_name: &str, start_seed: usize, skipped: Option< println!(); for (test_index, test) in tests.iter().enumerate() { let test_name = format!("{language_name} - {}", test.name); - - if let Some(skipped) = skipped.as_ref() { - if skipped.contains(test_name.as_str()) { + if let Some(skipped) = skipped.as_mut() { + if let Some(counter) = skipped.get_mut(test_name.as_str()) { println!(" {test_index}. {test_name} - SKIPPED"); + *counter += 1; continue; } } @@ -262,6 +262,18 @@ fn test_language_corpus(language_name: &str, start_seed: usize, skipped: Option< if failure_count > 0 { panic!("{} {} corpus tests failed", failure_count, language_name); } + + if let Some(skipped) = skipped.as_mut() { + skipped.retain(|_, v| *v == 0); + + if skipped.len() > 0 { + println!("Non matchable skip definitions:"); + for k in skipped.keys() { + println!(" {k}"); + } + panic!("Non matchable skip definitions needs to be removed"); + } + } } #[test] From c63f1680adb6237cba4a372146497e2e4757c174 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Wed, 4 Oct 2023 11:20:18 +0300 Subject: [PATCH 341/347] chore(rust): improve perf for position funcs --- Cargo.lock | 5 +++-- cli/Cargo.toml | 1 + cli/src/parse.rs | 44 ++++++++++++++++++++++++-------------------- 3 files changed, 28 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 80a4e28d..be8829ac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -469,9 +469,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.5.0" +version = "2.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" [[package]] name = "minimal-lexical" @@ -941,6 +941,7 @@ dependencies = [ "indoc", "lazy_static", "log", + "memchr", "path-slash", "pretty_assertions", "rand", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 0edd3c3d..e62c443a 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -32,6 +32,7 @@ glob = "0.3.1" html-escape = "0.2.13" indexmap = "2.0.0" lazy_static = "1.4.0" +memchr = "2.6.3" path-slash = "0.2.1" regex = "1.9.1" regex-syntax = "0.7.4" diff --git a/cli/src/parse.rs b/cli/src/parse.rs index 5b1a4b31..30ddd238 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -370,31 +370,35 @@ fn parse_edit_flag(source_code: &Vec, flag: &str) -> Result { }) } -fn offset_for_position(input: &Vec, position: Point) -> usize { - let mut current_position = Point { row: 0, column: 0 }; - for (i, c) in input.iter().enumerate() { - if *c as char == '\n' { - current_position.row += 1; - current_position.column = 0; - } else { - current_position.column += 1; - } - if current_position > position { - return i; +fn offset_for_position(input: &[u8], position: Point) -> usize { + let mut row = 0; + let mut offset = 0; + let mut iter = memchr::memchr_iter(b'\n', input); + loop { + if let Some(pos) = iter.next() { + if row < position.row { + row += 1; + offset = pos; + continue; + } } + offset += 1; + break; } - return input.len(); + offset + position.column } -fn position_for_offset(input: &Vec, offset: usize) -> Point { +fn position_for_offset(input: &[u8], offset: usize) -> Point { let mut result = Point { row: 0, column: 0 }; - for c in &input[0..offset] { - if *c as char == '\n' { - result.row += 1; - result.column = 0; - } else { - result.column += 1; - } + let mut last = 0; + for pos in memchr::memchr_iter(b'\n', &input[..offset]) { + result.row += 1; + last = pos; } + result.column = if result.row > 0 { + offset - last - 1 + } else { + offset + }; result } From a5a75648189612bc15138d92331e98c96f87a748 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Wed, 4 Oct 2023 11:21:48 +0300 Subject: [PATCH 342/347] chore(rust): add error reporting for position funcs --- cli/src/parse.rs | 35 ++++++++++++++++++++++++----------- cli/src/tests/corpus_test.rs | 4 ++-- cli/src/tests/node_test.rs | 2 +- cli/src/tests/parser_test.rs | 13 ++++++++----- cli/src/tests/tree_test.rs | 2 +- 5 files changed, 36 insertions(+), 20 deletions(-) diff --git a/cli/src/parse.rs b/cli/src/parse.rs index 30ddd238..68279361 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -115,7 +115,7 @@ pub fn parse_file_at_path(opts: ParseFileOptions) -> Result { for (i, edit) in opts.edits.iter().enumerate() { let edit = parse_edit_flag(&source_code, edit)?; - perform_edit(&mut tree, &mut source_code, &edit); + perform_edit(&mut tree, &mut source_code, &edit)?; tree = parser.parse(&source_code, Some(&tree)).unwrap(); if opts.debug_graph { @@ -309,14 +309,14 @@ pub fn parse_file_at_path(opts: ParseFileOptions) -> Result { Ok(false) } -pub fn perform_edit(tree: &mut Tree, input: &mut Vec, edit: &Edit) -> InputEdit { +pub fn perform_edit(tree: &mut Tree, input: &mut Vec, edit: &Edit) -> Result { let start_byte = edit.position; let old_end_byte = edit.position + edit.deleted_length; let new_end_byte = edit.position + edit.inserted_text.len(); - let start_position = position_for_offset(input, start_byte); - let old_end_position = position_for_offset(input, old_end_byte); + let start_position = position_for_offset(input, start_byte)?; + let old_end_position = position_for_offset(input, old_end_byte)?; input.splice(start_byte..old_end_byte, edit.inserted_text.iter().cloned()); - let new_end_position = position_for_offset(input, new_end_byte); + let new_end_position = position_for_offset(input, new_end_byte)?; let edit = InputEdit { start_byte, old_end_byte, @@ -326,7 +326,7 @@ pub fn perform_edit(tree: &mut Tree, input: &mut Vec, edit: &Edit) -> InputE new_end_position, }; tree.edit(&edit); - edit + Ok(edit) } fn parse_edit_flag(source_code: &Vec, flag: &str) -> Result { @@ -355,7 +355,7 @@ fn parse_edit_flag(source_code: &Vec, flag: &str) -> Result { let row = usize::from_str_radix(row, 10).map_err(|_| error())?; let column = parts.next().ok_or_else(error)?; let column = usize::from_str_radix(column, 10).map_err(|_| error())?; - offset_for_position(source_code, Point { row, column }) + offset_for_position(source_code, Point { row, column })? } else { usize::from_str_radix(position, 10).map_err(|_| error())? }; @@ -370,7 +370,7 @@ fn parse_edit_flag(source_code: &Vec, flag: &str) -> Result { }) } -fn offset_for_position(input: &[u8], position: Point) -> usize { +pub fn offset_for_position(input: &[u8], position: Point) -> Result { let mut row = 0; let mut offset = 0; let mut iter = memchr::memchr_iter(b'\n', input); @@ -385,10 +385,23 @@ fn offset_for_position(input: &[u8], position: Point) -> usize { offset += 1; break; } - offset + position.column + if position.row - row > 0 { + return Err(anyhow!("Failed to address a row: {}", position.row)); + } + if let Some(pos) = iter.next() { + if (pos - offset < position.column) || (input[offset] == b'\n' && position.column > 0) { + return Err(anyhow!("Failed to address a column: {}", position.column)); + }; + } else if input.len() - offset < position.column { + return Err(anyhow!("Failed to address a column over the end")); + } + Ok(offset + position.column) } -fn position_for_offset(input: &[u8], offset: usize) -> Point { +pub fn position_for_offset(input: &[u8], offset: usize) -> Result { + if offset > input.len() { + return Err(anyhow!("Failed to address an offset: {offset}")); + } let mut result = Point { row: 0, column: 0 }; let mut last = 0; for pos in memchr::memchr_iter(b'\n', &input[..offset]) { @@ -400,5 +413,5 @@ fn position_for_offset(input: &[u8], offset: usize) -> Point { } else { offset }; - result + Ok(result) } diff --git a/cli/src/tests/corpus_test.rs b/cli/src/tests/corpus_test.rs index 8a4c54cc..589b1839 100644 --- a/cli/src/tests/corpus_test.rs +++ b/cli/src/tests/corpus_test.rs @@ -187,7 +187,7 @@ fn test_language_corpus(language_name: &str, start_seed: usize, skipped: Option< for _ in 0..1 + rand.unsigned(*EDIT_COUNT) { let edit = get_random_edit(&mut rand, &input); undo_stack.push(invert_edit(&input, &edit)); - perform_edit(&mut tree, &mut input, &edit); + perform_edit(&mut tree, &mut input, &edit).unwrap(); } if log_seed { @@ -219,7 +219,7 @@ fn test_language_corpus(language_name: &str, start_seed: usize, skipped: Option< // Undo all of the edits and re-parse again. while let Some(edit) = undo_stack.pop() { - perform_edit(&mut tree2, &mut input, &edit); + perform_edit(&mut tree2, &mut input, &edit).unwrap(); } if *LOG_GRAPH_ENABLED { eprintln!("{}\n", String::from_utf8_lossy(&input)); diff --git a/cli/src/tests/node_test.rs b/cli/src/tests/node_test.rs index 43b3d66b..c4548d3e 100644 --- a/cli/src/tests/node_test.rs +++ b/cli/src/tests/node_test.rs @@ -552,7 +552,7 @@ fn test_node_edit() { let edit = get_random_edit(&mut rand, &mut code); let mut tree2 = tree.clone(); - let edit = perform_edit(&mut tree2, &mut code, &edit); + let edit = perform_edit(&mut tree2, &mut code, &edit).unwrap(); for node in nodes_before.iter_mut() { node.edit(&edit); } diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index 99616f56..434a81f9 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -342,7 +342,8 @@ fn test_parsing_after_editing_beginning_of_code() { deleted_length: 0, inserted_text: b" || 5".to_vec(), }, - ); + ) + .unwrap(); let mut recorder = ReadRecorder::new(&code); let tree = parser @@ -389,7 +390,8 @@ fn test_parsing_after_editing_end_of_code() { deleted_length: 0, inserted_text: b".d".to_vec(), }, - ); + ) + .unwrap(); let mut recorder = ReadRecorder::new(&code); let tree = parser @@ -464,7 +466,8 @@ h + i deleted_length: 0, inserted_text: b"1234".to_vec(), }, - ); + ) + .unwrap(); assert_eq!( code, @@ -528,12 +531,12 @@ fn test_parsing_after_detecting_error_in_the_middle_of_a_string_token() { let undo = invert_edit(&source, &edit); let mut tree2 = tree.clone(); - perform_edit(&mut tree2, &mut source, &edit); + perform_edit(&mut tree2, &mut source, &edit).unwrap(); tree2 = parser.parse(&source, Some(&tree2)).unwrap(); assert!(tree2.root_node().has_error()); let mut tree3 = tree2.clone(); - perform_edit(&mut tree3, &mut source, &undo); + perform_edit(&mut tree3, &mut source, &undo).unwrap(); tree3 = parser.parse(&source, Some(&tree3)).unwrap(); assert_eq!(tree3.root_node().to_sexp(), tree.root_node().to_sexp(),); } diff --git a/cli/src/tests/tree_test.rs b/cli/src/tests/tree_test.rs index 7d091c3f..c63b588b 100644 --- a/cli/src/tests/tree_test.rs +++ b/cli/src/tests/tree_test.rs @@ -663,7 +663,7 @@ fn get_changed_ranges( source_code: &mut Vec, edit: Edit, ) -> Vec { - perform_edit(tree, source_code, &edit); + perform_edit(tree, source_code, &edit).unwrap(); let new_tree = parser.parse(&source_code, Some(tree)).unwrap(); let result = tree.changed_ranges(&new_tree).collect(); *tree = new_tree; From a91a6cc61508a518fd5ff93843cce5bdb5e1c19b Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Mon, 16 Oct 2023 16:13:40 +0300 Subject: [PATCH 343/347] cicd: disable failed `linux-riscv64gc` target Related issue #2712 --- .github/workflows/build.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 05c08d2c..77dbb9df 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -48,7 +48,7 @@ jobs: - linux-powerpc # - linux-powerpc64 # - linux-powerpc64el # - - linux-riscv64gc # + # - linux-riscv64gc # #2712 - linux-s390x # - linux-sparc64 # - linux-thumbv7neon # @@ -77,7 +77,7 @@ jobs: - { platform: linux-powerpc , target: powerpc-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } - { platform: linux-powerpc64 , target: powerpc64-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } - { platform: linux-powerpc64el , target: powerpc64le-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } - - { platform: linux-riscv64gc , target: riscv64gc-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } + # - { platform: linux-riscv64gc , target: riscv64gc-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } #2712 - { platform: linux-s390x , target: s390x-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } - { platform: linux-sparc64 , target: sparc64-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } - { platform: linux-thumbv7neon , target: thumbv7neon-unknown-linux-gnueabihf , os: ubuntu-latest , use-cross: true } @@ -102,7 +102,7 @@ jobs: - { platform: linux-powerpc , cc: powerpc-linux-gnu-gcc , ar: powerpc-linux-gnu-ar } - { platform: linux-powerpc64 , cc: powerpc64-linux-gnu-gcc , ar: powerpc64-linux-gnu-ar } - { platform: linux-powerpc64el , cc: powerpc64le-linux-gnu-gcc , ar: powerpc64le-linux-gnu-ar } - - { platform: linux-riscv64gc , cc: riscv64-linux-gnu-gcc , ar: riscv64-linux-gnu-ar } + # - { platform: linux-riscv64gc , cc: riscv64-linux-gnu-gcc , ar: riscv64-linux-gnu-ar } #2712 - { platform: linux-s390x , cc: s390x-linux-gnu-gcc , ar: s390x-linux-gnu-ar } - { platform: linux-sparc64 , cc: sparc64-linux-gnu-gcc , ar: sparc64-linux-gnu-ar } - { platform: linux-thumbv7neon , cc: arm-linux-gnueabihf-gcc , ar: arm-linux-gnueabihf-ar } From 5e62120050ae4db6adf07c3071641f36bcb62eb2 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Mon, 16 Oct 2023 21:45:40 +0300 Subject: [PATCH 344/347] chore: fix local fixture test for C language --- test/fixtures/error_corpus/c_errors.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/fixtures/error_corpus/c_errors.txt b/test/fixtures/error_corpus/c_errors.txt index 4b507590..d44400cf 100644 --- a/test/fixtures/error_corpus/c_errors.txt +++ b/test/fixtures/error_corpus/c_errors.txt @@ -108,7 +108,7 @@ int main() { (function_declarator (identifier) (parameter_list)) (compound_statement (declaration (primitive_type) (identifier)) - (ERROR (primitive_type) (UNEXPECTED '$'))))) + (ERROR (primitive_type) (ERROR) (identifier) (UNEXPECTED '@'))))) ========================================= Extra values in parenthesized expressions From e265929f90b6c961466f96679d222c01b1d53bbf Mon Sep 17 00:00:00 2001 From: Daumantas Kavolis Date: Wed, 25 Oct 2023 10:19:03 +0300 Subject: [PATCH 345/347] Fix `goto_previous_sibling` with padded first children --- cli/src/tests/tree_test.rs | 30 ++++++++++++++++++++++++++++++ lib/src/tree_cursor.c | 6 +++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/cli/src/tests/tree_test.rs b/cli/src/tests/tree_test.rs index c63b588b..161f65e9 100644 --- a/cli/src/tests/tree_test.rs +++ b/cli/src/tests/tree_test.rs @@ -376,6 +376,36 @@ fn test_tree_cursor() { assert_eq!(copy.node().kind(), "struct_item"); } +#[test] +fn test_tree_cursor_previous_sibling() { + let mut parser = Parser::new(); + parser.set_language(get_language("rust")).unwrap(); + + let text = " + // Hi there + // This is fun! + // Another one! +"; + let tree = parser.parse(text, None).unwrap(); + + let mut cursor = tree.walk(); + assert_eq!(cursor.node().kind(), "source_file"); + + assert!(cursor.goto_last_child()); + assert_eq!(cursor.node().kind(), "line_comment"); + assert_eq!(cursor.node().utf8_text(text.as_bytes()).unwrap(), "// Another one!"); + + assert!(cursor.goto_previous_sibling()); + assert_eq!(cursor.node().kind(), "line_comment"); + assert_eq!(cursor.node().utf8_text(text.as_bytes()).unwrap(), "// This is fun!"); + + assert!(cursor.goto_previous_sibling()); + assert_eq!(cursor.node().kind(), "line_comment"); + assert_eq!(cursor.node().utf8_text(text.as_bytes()).unwrap(), "// Hi there"); + + assert!(!cursor.goto_previous_sibling()); +} + #[test] fn test_tree_cursor_fields() { let mut parser = Parser::new(); diff --git a/lib/src/tree_cursor.c b/lib/src/tree_cursor.c index 25eca482..f08b9692 100644 --- a/lib/src/tree_cursor.c +++ b/lib/src/tree_cursor.c @@ -377,7 +377,11 @@ TreeCursorStep ts_tree_cursor_goto_previous_sibling_internal(TSTreeCursor *_self position = parent->position; uint32_t child_index = array_back(&self->stack)->child_index; const Subtree *children = ts_subtree_children((*(parent->subtree))); - for (uint32_t i = 0; i < child_index; ++i) { + + // skip first child padding since its position should match the position of the parent + if (child_index > 0) + position = length_add(position, ts_subtree_size(children[0])); + for (uint32_t i = 1; i < child_index; ++i) { position = length_add(position, ts_subtree_total_size(children[i])); } if (child_index > 0) From e26e23fd0e6baad6b5285ba1d990142b1f0ff1aa Mon Sep 17 00:00:00 2001 From: Daumantas Kavolis Date: Wed, 25 Oct 2023 10:24:35 +0300 Subject: [PATCH 346/347] Fix formatting --- cli/src/tests/tree_test.rs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/cli/src/tests/tree_test.rs b/cli/src/tests/tree_test.rs index 161f65e9..f3792138 100644 --- a/cli/src/tests/tree_test.rs +++ b/cli/src/tests/tree_test.rs @@ -393,15 +393,24 @@ fn test_tree_cursor_previous_sibling() { assert!(cursor.goto_last_child()); assert_eq!(cursor.node().kind(), "line_comment"); - assert_eq!(cursor.node().utf8_text(text.as_bytes()).unwrap(), "// Another one!"); + assert_eq!( + cursor.node().utf8_text(text.as_bytes()).unwrap(), + "// Another one!" + ); assert!(cursor.goto_previous_sibling()); assert_eq!(cursor.node().kind(), "line_comment"); - assert_eq!(cursor.node().utf8_text(text.as_bytes()).unwrap(), "// This is fun!"); + assert_eq!( + cursor.node().utf8_text(text.as_bytes()).unwrap(), + "// This is fun!" + ); assert!(cursor.goto_previous_sibling()); assert_eq!(cursor.node().kind(), "line_comment"); - assert_eq!(cursor.node().utf8_text(text.as_bytes()).unwrap(), "// Hi there"); + assert_eq!( + cursor.node().utf8_text(text.as_bytes()).unwrap(), + "// Hi there" + ); assert!(!cursor.goto_previous_sibling()); } From 143ed959c9a4d591538413b59b935fdba1066a5c Mon Sep 17 00:00:00 2001 From: Daumantas Kavolis Date: Wed, 25 Oct 2023 10:47:55 +0300 Subject: [PATCH 347/347] Use single if block --- lib/src/tree_cursor.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/src/tree_cursor.c b/lib/src/tree_cursor.c index f08b9692..63d22c8b 100644 --- a/lib/src/tree_cursor.c +++ b/lib/src/tree_cursor.c @@ -378,14 +378,14 @@ TreeCursorStep ts_tree_cursor_goto_previous_sibling_internal(TSTreeCursor *_self uint32_t child_index = array_back(&self->stack)->child_index; const Subtree *children = ts_subtree_children((*(parent->subtree))); - // skip first child padding since its position should match the position of the parent - if (child_index > 0) + if (child_index > 0) { + // skip first child padding since its position should match the position of the parent position = length_add(position, ts_subtree_size(children[0])); - for (uint32_t i = 1; i < child_index; ++i) { - position = length_add(position, ts_subtree_total_size(children[i])); - } - if (child_index > 0) + for (uint32_t i = 1; i < child_index; ++i) { + position = length_add(position, ts_subtree_total_size(children[i])); + } position = length_add(position, ts_subtree_padding(children[child_index])); + } array_back(&self->stack)->position = position;