Compare commits

..

26 commits

Author SHA1 Message Date
Quentin Boyer
c2e50ccd11 Add the source to the closure 2026-01-22 01:23:08 +01:00
Quentin Boyer
1a54b1794d Refactor to be able to pass a query 2026-01-22 01:15:51 +01:00
Quentin Boyer
4c89725111 Expose a callback instead of doing the filtering in the iterator 2026-01-22 00:47:36 +01:00
Quentin Boyer
705130705a Ignore directives in predicates 2026-01-22 00:14:19 +01:00
Quentin Boyer
e5ee144b0a Allow to match arbitrary predicates 2026-01-21 23:50:06 +01:00
Quentin Boyer
90885404ce Depend on upstream tree-sitter 2026-01-21 23:01:14 +01:00
Will Lillis
152d2756fc fix(cli): warn user when nm can't be run to verify the symbols inside
the parser being built

(cherry picked from commit 0cdb6bef7b)
2026-01-18 23:26:47 -05:00
Christian Clason
f05efbb352 fix(wasm): regenerate stdlib with wasm-opt
Problem: Output of `cargo xtask build-wasm-stdlib` depends on whether
`wasm-opt` is installed (since `clang` will use it by default if it
finds it).

Solution: Install it and rerun the xtask.
(cherry picked from commit 5d290a2a75)
2026-01-15 16:52:47 +01:00
Will Lillis
1f221c8500 fix(build): define _BSD_SOURCE
System endian conversion macros are gated behind this feature flag for
older versions of GLIBC. `_BSD_SOURCE` and `_SVID_SOURCE` were
deprecated and replaced with `_DEFAULT_SOURCE` starting with GLIBC 2.19.

(cherry picked from commit aefae11c0d)
2026-01-12 23:43:46 -05:00
Kevin Wang
fdca0718bc fix(templates): fix python free-threading compatibility
(cherry picked from commit 630fa52717)
2026-01-10 04:01:08 -06:00
Christian Clason
fa7b1b2a66 fix(wasm): update wasm-stdlib.h
(cherry picked from commit cd6672701b)
2026-01-06 19:27:35 +01:00
tree-sitter-ci-bot[bot]
adcc4d1f7b
fix(wasm): add common definitions to stdlib (#5199) (#5208)
Also expose `strlen` through `string.h` instead of `stdio.h`.

(cherry picked from commit f4ca3d95ca)

Co-authored-by: Trim21 <trim21.me@gmail.com>
2026-01-06 12:27:26 +01:00
skewb1k
7d9c544c96 fix(cli): restore test summary output for tree-sitter test
Problem:
After commit f02d7e7e33
the `tree-sitter test` command no longer printed the final test summary,
leaving empty line. The `Stats` struct was embedded into `TestSummary`,
and the explicit call to print it was removed.

Solution:
Print `parse_stats` from `TestSummary.fmt()` implementation.

(cherry picked from commit 17e3c7a5c5)
2026-01-04 22:45:41 -08:00
WillLillis
c1e49d1571 feat(cli): fill in missing fields to tree-sitter.json when running
`tree-sitter init -u`

(cherry picked from commit dd60d5cff0)
2025-12-31 20:37:15 +01:00
WillLillis
eae6554735 fix(cli): increase verbosity of tree-sitter init -u updates
Also, use `info` logs rather than `warn`

(cherry picked from commit f1288ea5c9)
2025-12-31 20:37:15 +01:00
WillLillis
48ee942c4f fix(cli): canonicalize build --output path
This fixes a potential issue with the new lock file hashing mechanism,
in which two different path literals pointing to the same location would
hash to separate lock files, allowing a race condition.

(cherry picked from commit 93d793d249)
2025-12-30 17:49:45 +01:00
Firas al-Khalil
9ee2b87dd6 feat(cli): concurrent build of same grammar on different paths
(cherry picked from commit 5d9605a91e)
2025-12-29 12:37:04 +01:00
Firas al-Khalil
fb91deb8d9 fix(cli): report library load failure
Instead of panicking somehere else.

This happens on concurrent builds of the the same grammar.

(cherry picked from commit 5293dd683e)
2025-12-29 12:37:04 +01:00
Firas al-Khalil
789a966f96 fix(cli): report context on compile fail
(cherry picked from commit 62effdf128)
2025-12-29 12:37:04 +01:00
WillLillis
3c49fef0e3 fix(rust): address nightly clippy lint
(cherry picked from commit 8e4f21aba0)
2025-12-27 19:39:28 -05:00
WillLillis
8a297b86bc fix(cli): set language in cwd for all usages of highlight command
(cherry picked from commit 5208299bbb)
2025-12-27 19:39:28 -05:00
skewb1k
ac6644016c fix(cli): remove extra newline with --cst
Makes CST output consistent with other formats.

(cherry picked from commit f05e57e2fc)
2025-12-24 15:37:30 +01:00
skewb1k
a80765614b fix(cli): remove extra indentation with --cst --no-ranges
(cherry picked from commit 2f33a37dff)
2025-12-24 15:37:30 +01:00
kevin-hua-kraken
34602af22c fix(playground): update query API
(cherry picked from commit a7d8c0cbb2)
2025-12-23 14:18:14 +01:00
Will Lillis
c4f81931e6 fix(cli): correct discrepancy with cst for --no-ranges
(cherry picked from commit eacb95c85d)
2025-12-16 23:24:07 -05:00
skewb1k
25777e5a64 fix(cli): trailing whitespace after multiline text nodes in CST
Problem:
The CST printer emits trailing whitespace after multiline text nodes.
With 1704c604bf and `:cst` corpus tests
this causes trailing spaces to appear on `test --update`.
These spaces cannot be removed afterward, as the test runner
expects an exact character-for-character match for CST tests.

Solution:
Print whitespace only if node is not multiline.

(cherry picked from commit 4ac2d5d276)
2025-12-14 22:41:02 -05:00
67 changed files with 343 additions and 3299 deletions

View file

@ -1,25 +0,0 @@
module.exports = async ({ github, context, core }) => {
if (context.eventName !== 'pull_request') return;
const prNumber = context.payload.pull_request.number;
const owner = context.repo.owner;
const repo = context.repo.repo;
const { data: files } = await github.rest.pulls.listFiles({
owner,
repo,
pull_number: prNumber
});
const changedFiles = files.map(file => file.filename);
const wasmStdLibSrc = 'crates/language/wasm/';
const dirChanged = changedFiles.some(file => file.startsWith(wasmStdLibSrc));
if (!dirChanged) return;
const wasmStdLibHeader = 'lib/src/wasm/wasm-stdlib.h';
const requiredChanged = changedFiles.includes(wasmStdLibHeader);
if (!requiredChanged) core.setFailed(`Changes detected in ${wasmStdLibSrc} but ${wasmStdLibHeader} was not modified.`);
};

View file

@ -24,7 +24,7 @@ jobs:
private-key: ${{ secrets.BACKPORT_KEY }}
- name: Create backport PR
uses: korthout/backport-action@v4
uses: korthout/backport-action@v3
with:
pull_title: "${pull_title}"
label_pattern: "^ci:backport ([^ ]+)$"

View file

@ -278,7 +278,7 @@ jobs:
- name: Upload CLI artifact
if: "!matrix.no-run"
uses: actions/upload-artifact@v6
uses: actions/upload-artifact@v5
with:
name: tree-sitter.${{ matrix.platform }}
path: target/${{ matrix.target }}/release/tree-sitter${{ contains(matrix.target, 'windows') && '.exe' || '' }}
@ -287,7 +287,7 @@ jobs:
- name: Upload Wasm artifacts
if: matrix.platform == 'linux-x64'
uses: actions/upload-artifact@v6
uses: actions/upload-artifact@v5
with:
name: tree-sitter.wasm
path: |

View file

@ -44,6 +44,3 @@ jobs:
build:
uses: ./.github/workflows/build.yml
check-wasm-stdlib:
uses: ./.github/workflows/wasm_stdlib.yml

View file

@ -25,7 +25,7 @@ jobs:
uses: actions/checkout@v6
- name: Download build artifacts
uses: actions/download-artifact@v7
uses: actions/download-artifact@v6
with:
path: artifacts

View file

@ -1,19 +0,0 @@
name: Check Wasm Stdlib build
on:
workflow_call:
jobs:
check:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v6
- name: Check directory changes
uses: actions/github-script@v8
with:
script: |
const scriptPath = `${process.env.GITHUB_WORKSPACE}/.github/scripts/wasm_stdlib.js`;
const script = require(scriptPath);
return script({ github, context, core });

View file

@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.13)
project(tree-sitter
VERSION "0.27.0"
VERSION "0.26.3"
DESCRIPTION "An incremental parsing system for programming tools"
HOMEPAGE_URL "https://tree-sitter.github.io/tree-sitter/"
LANGUAGES C)

2749
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,26 +1,17 @@
[workspace]
default-members = ["crates/cli"]
members = [
"crates/cli",
"crates/config",
"crates/generate",
"crates/highlight",
"crates/loader",
"crates/tags",
"crates/xtask",
"crates/language",
"lib",
]
resolver = "2"
[workspace.package]
version = "0.27.0"
version = "0.26.3"
authors = [
"Max Brunsfeld <maxbrunsfeld@gmail.com>",
"Amaan Qureshi <amaanq12@gmail.com>",
]
edition = "2021"
rust-version = "1.85"
rust-version = "1.84"
homepage = "https://tree-sitter.github.io/tree-sitter"
repository = "https://github.com/tree-sitter/tree-sitter"
license = "MIT"
@ -106,8 +97,8 @@ ansi_colours = "1.2.3"
anstyle = "1.0.13"
anyhow = "1.0.100"
bstr = "1.12.0"
cc = "1.2.53"
clap = { version = "4.5.54", features = [
cc = "1.2.48"
clap = { version = "4.5.53", features = [
"cargo",
"derive",
"env",
@ -115,7 +106,7 @@ clap = { version = "4.5.54", features = [
"string",
"unstable-styles",
] }
clap_complete = "4.5.65"
clap_complete = "4.5.61"
clap_complete_nushell = "4.5.10"
crc32fast = "1.5.0"
ctor = "0.2.9"
@ -140,7 +131,7 @@ rustc-hash = "2.1.1"
schemars = "1.0.5"
semver = { version = "1.0.27", features = ["serde"] }
serde = { version = "1.0.219", features = ["derive"] }
serde_json = { version = "1.0.149", features = ["preserve_order"] }
serde_json = { version = "1.0.145", features = ["preserve_order"] }
similar = "2.7.0"
smallbitvec = "2.6.0"
streaming-iterator = "0.1.9"
@ -153,11 +144,11 @@ walkdir = "2.5.0"
wasmparser = "0.243.0"
webbrowser = "1.0.5"
tree-sitter = { version = "0.27.0", path = "./lib" }
tree-sitter-generate = { version = "0.27.0", path = "./crates/generate" }
tree-sitter-loader = { version = "0.27.0", path = "./crates/loader" }
tree-sitter-config = { version = "0.27.0", path = "./crates/config" }
tree-sitter-highlight = { version = "0.27.0", path = "./crates/highlight" }
tree-sitter-tags = { version = "0.27.0", path = "./crates/tags" }
tree-sitter = { version = "0.26.3", path = "./lib" }
tree-sitter-generate = { version = "0.26.3", path = "./crates/generate" }
tree-sitter-loader = { version = "0.26.3", path = "./crates/loader" }
tree-sitter-config = { version = "0.26.3", path = "./crates/config" }
tree-sitter-highlight = { version = "0.26.3", path = "./crates/highlight" }
tree-sitter-tags = { version = "0.26.3", path = "./crates/tags" }
tree-sitter-language = { version = "0.1", path = "./crates/language" }

View file

@ -1,4 +1,4 @@
VERSION := 0.27.0
VERSION := 0.26.3
DESCRIPTION := An incremental parsing system for programming tools
HOMEPAGE_URL := https://tree-sitter.github.io/tree-sitter/

View file

@ -1,7 +1,7 @@
.{
.name = .tree_sitter,
.fingerprint = 0x841224b447ac0d4f,
.version = "0.27.0",
.version = "0.26.3",
.minimum_zig_version = "0.14.1",
.paths = .{
"build.zig",

View file

@ -7,8 +7,7 @@
[npmjs.com]: https://www.npmjs.org/package/tree-sitter-cli
[npmjs.com badge]: https://img.shields.io/npm/v/tree-sitter-cli.svg?color=%23BF4A4A
The Tree-sitter CLI allows you to develop, test, and use Tree-sitter grammars from the command line. It works on `MacOS`,
`Linux`, and `Windows`.
The Tree-sitter CLI allows you to develop, test, and use Tree-sitter grammars from the command line. It works on `MacOS`, `Linux`, and `Windows`.
### Installation
@ -35,11 +34,9 @@ The `tree-sitter` binary itself has no dependencies, but specific commands have
### Commands
* `generate` - The `tree-sitter generate` command will generate a Tree-sitter parser based on the grammar in the current
working directory. See [the documentation] for more information.
* `generate` - The `tree-sitter generate` command will generate a Tree-sitter parser based on the grammar in the current working directory. See [the documentation] for more information.
* `test` - The `tree-sitter test` command will run the unit tests for the Tree-sitter parser in the current working directory.
See [the documentation] for more information.
* `test` - The `tree-sitter test` command will run the unit tests for the Tree-sitter parser in the current working directory. See [the documentation] for more information.
* `parse` - The `tree-sitter parse` command will parse a file (or list of files) using Tree-sitter parsers.

View file

@ -1,12 +1,12 @@
{
"name": "tree-sitter-cli",
"version": "0.27.0",
"version": "0.26.3",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "tree-sitter-cli",
"version": "0.27.0",
"version": "0.26.3",
"hasInstallScript": true,
"license": "MIT",
"bin": {

View file

@ -1,6 +1,6 @@
{
"name": "tree-sitter-cli",
"version": "0.27.0",
"version": "0.26.3",
"author": {
"name": "Max Brunsfeld",
"email": "maxbrunsfeld@gmail.com"

View file

@ -953,7 +953,7 @@ fn render_node_range(
fn cst_render_node(
opts: &ParseFileOptions,
cursor: &TreeCursor,
cursor: &mut TreeCursor,
source_code: &[u8],
out: &mut impl Write,
total_width: usize,

View file

@ -19,8 +19,7 @@
--light-scrollbar-track: #f1f1f1;
--light-scrollbar-thumb: #c1c1c1;
--light-scrollbar-thumb-hover: #a8a8a8;
--light-tree-row-bg: #e3f2fd;
--dark-bg: #1d1f21;
--dark-border: #2d2d2d;
--dark-text: #c5c8c6;
@ -29,7 +28,6 @@
--dark-scrollbar-track: #25282c;
--dark-scrollbar-thumb: #4a4d51;
--dark-scrollbar-thumb-hover: #5a5d61;
--dark-tree-row-bg: #373737;
--primary-color: #0550ae;
--primary-color-alpha: rgba(5, 80, 174, 0.1);
@ -44,7 +42,6 @@
--text-color: var(--dark-text);
--panel-bg: var(--dark-panel-bg);
--code-bg: var(--dark-code-bg);
--tree-row-bg: var(--dark-tree-row-bg);
}
[data-theme="light"] {
@ -53,7 +50,6 @@
--text-color: var(--light-text);
--panel-bg: white;
--code-bg: white;
--tree-row-bg: var(--light-tree-row-bg);
}
/* Base Styles */
@ -279,7 +275,7 @@
}
#output-container a.highlighted {
background-color: #cae2ff;
background-color: #d9d9d9;
color: red;
border-radius: 3px;
text-decoration: underline;
@ -350,7 +346,7 @@
}
& #output-container a.highlighted {
background-color: #656669;
background-color: #373b41;
color: red;
}
@ -377,9 +373,6 @@
color: var(--dark-text);
}
}
.tree-row:has(.highlighted) {
background-color: var(--tree-row-bg);
}
</style>
</head>

View file

@ -1068,6 +1068,7 @@ fn run_tests(
return Ok(true);
}
let failure_count = test_summary.parse_failures.len();
let mut ran_test_in_group = false;
let matches_filter = |name: &str, file_name: &Option<String>, opts: &TestOptions| {
@ -1131,7 +1132,7 @@ fn run_tests(
test_summary.parse_results.pop_traversal();
if let Some(file_path) = file_path {
if opts.update {
if opts.update && test_summary.parse_failures.len() - failure_count > 0 {
write_tests(&file_path, corrected_entries)?;
}
corrected_entries.clear();

View file

@ -225,7 +225,7 @@ impl Pattern {
}
// Find every matching combination of child patterns and child nodes.
let mut finished_matches = Vec::<Match<'_, 'tree>>::new();
let mut finished_matches = Vec::<Match>::new();
if cursor.goto_first_child() {
let mut match_states = vec![(0, mat)];
loop {

View file

@ -33,7 +33,7 @@ log.workspace = true
pathdiff = { version = "0.2.3", optional = true }
regex.workspace = true
regex-syntax.workspace = true
rquickjs = { version = "0.11.0", optional = true, features = [
rquickjs = { version = "0.10.0", optional = true, features = [
"bindgen",
"loader",
"macro",

View file

@ -95,27 +95,9 @@ impl Console {
Type::Module => "module".to_string(),
Type::BigInt => v.get::<String>().unwrap_or_else(|_| "BigInt".to_string()),
Type::Unknown => "unknown".to_string(),
Type::Array => {
let js_vals = v
.as_array()
.unwrap()
.iter::<Value<'_>>()
.filter_map(|x| x.ok())
.map(|x| {
if x.is_string() {
format!("'{}'", Self::format_args(&[x]))
} else {
Self::format_args(&[x])
}
})
.collect::<Vec<_>>()
.join(", ");
format!("[ {js_vals} ]")
}
Type::Symbol
| Type::Object
| Type::Proxy
| Type::Array
| Type::Function
| Type::Constructor
| Type::Promise
@ -215,11 +197,11 @@ fn try_resolve_path(path: &Path) -> rquickjs::Result<PathBuf> {
}
#[allow(clippy::needless_pass_by_value)]
fn require_from_module<'js>(
ctx: Ctx<'js>,
fn require_from_module<'a>(
ctx: Ctx<'a>,
module_path: String,
from_module: &str,
) -> rquickjs::Result<Value<'js>> {
) -> rquickjs::Result<Value<'a>> {
let current_module = PathBuf::from(from_module);
let current_dir = if current_module.is_file() {
current_module.parent().unwrap_or(Path::new("."))
@ -234,13 +216,13 @@ fn require_from_module<'js>(
load_module_from_content(&ctx, &resolved_path, &contents)
}
fn load_module_from_content<'js>(
ctx: &Ctx<'js>,
fn load_module_from_content<'a>(
ctx: &Ctx<'a>,
path: &Path,
contents: &str,
) -> rquickjs::Result<Value<'js>> {
) -> rquickjs::Result<Value<'a>> {
if path.extension().is_some_and(|ext| ext == "json") {
return ctx.eval::<Value<'js>, _>(format!("JSON.parse({contents:?})"));
return ctx.eval::<Value, _>(format!("JSON.parse({contents:?})"));
}
let exports = Object::new(ctx.clone())?;
@ -256,7 +238,7 @@ fn load_module_from_content<'js>(
let module_path = filename.clone();
let require = Function::new(
ctx.clone(),
move |ctx_inner: Ctx<'js>, target_path: String| -> rquickjs::Result<Value<'js>> {
move |ctx_inner: Ctx<'a>, target_path: String| -> rquickjs::Result<Value<'a>> {
require_from_module(ctx_inner, target_path, &module_path)
},
)?;
@ -264,8 +246,8 @@ fn load_module_from_content<'js>(
let wrapper =
format!("(function(exports, require, module, __filename, __dirname) {{ {contents} }})");
let module_func = ctx.eval::<Function<'js>, _>(wrapper)?;
module_func.call::<_, Value<'js>>((exports, require, module_obj.clone(), filename, dirname))?;
let module_func = ctx.eval::<Function, _>(wrapper)?;
module_func.call::<_, Value>((exports, require, module_obj.clone(), filename, dirname))?;
module_obj.get("exports")
}

View file

@ -28,4 +28,4 @@ regex.workspace = true
thiserror.workspace = true
streaming-iterator.workspace = true
tree-sitter.workspace = true
tree-sitter = "0.26"

View file

@ -297,6 +297,7 @@ impl TSHighlighter {
})
})
},
&|_, _, _| true,
);
if let Ok(highlights) = highlights {

View file

@ -162,15 +162,17 @@ struct LocalScope<'a> {
local_defs: Vec<LocalDef<'a>>,
}
struct HighlightIter<'a, F>
struct HighlightIter<'a, F, G>
where
F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
G: Fn(&QueryMatch, &Query, &[u8]) -> bool + 'a,
{
source: &'a [u8],
language_name: &'a str,
byte_offset: usize,
highlighter: &'a mut Highlighter,
injection_callback: F,
capture_filter: &'a G,
cancellation_flag: Option<&'a AtomicUsize>,
layers: Vec<HighlightIterLayer<'a>>,
iter_count: usize,
@ -181,7 +183,7 @@ where
struct HighlightIterLayer<'a> {
_tree: Tree,
cursor: QueryCursor,
captures: iter::Peekable<_QueryCaptures<'a, 'a, &'a [u8], &'a [u8]>>,
captures: iter::Peekable<Box<dyn Iterator<Item = (QueryMatch<'a, 'a>, usize)> + 'a>>,
config: &'a HighlightConfiguration,
highlight_end_stack: Vec<usize>,
scope_stack: Vec<LocalScope<'a>>,
@ -189,7 +191,7 @@ struct HighlightIterLayer<'a> {
depth: usize,
}
pub struct _QueryCaptures<'query, 'tree, T: TextProvider<I>, I: AsRef<[u8]>> {
pub struct _QueryCaptures<'query, 'tree: 'query, T: TextProvider<I>, I: AsRef<[u8]>> {
ptr: *mut ffi::TSQueryCursor,
query: &'query Query,
text_provider: T,
@ -225,7 +227,7 @@ impl<'tree> _QueryMatch<'_, 'tree> {
}
}
impl<'query, 'tree, T: TextProvider<I>, I: AsRef<[u8]>> Iterator
impl<'query, 'tree: 'query, T: TextProvider<I>, I: AsRef<[u8]>> Iterator
for _QueryCaptures<'query, 'tree, T, I>
{
type Item = (QueryMatch<'query, 'tree>, usize);
@ -244,6 +246,7 @@ impl<'query, 'tree, T: TextProvider<I>, I: AsRef<[u8]>> Iterator
&m.assume_init(),
self.ptr,
));
if result.satisfies_text_predicates(
self.query,
&mut self.buffer1,
@ -252,6 +255,7 @@ impl<'query, 'tree, T: TextProvider<I>, I: AsRef<[u8]>> Iterator
) {
return Some((result, capture_index as usize));
}
result.remove();
} else {
return None;
@ -287,6 +291,7 @@ impl Highlighter {
source: &'a [u8],
cancellation_flag: Option<&'a AtomicUsize>,
mut injection_callback: impl FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
query_filter: &'a impl Fn(&QueryMatch, &Query, &[u8]) -> bool,
) -> Result<impl Iterator<Item = Result<HighlightEvent, Error>> + 'a, Error> {
let layers = HighlightIterLayer::new(
source,
@ -294,6 +299,7 @@ impl Highlighter {
self,
cancellation_flag,
&mut injection_callback,
query_filter,
config,
0,
vec![Range {
@ -309,6 +315,7 @@ impl Highlighter {
language_name: &config.language_name,
byte_offset: 0,
injection_callback,
capture_filter: query_filter,
cancellation_flag,
highlighter: self,
iter_count: 0,
@ -509,12 +516,16 @@ impl<'a> HighlightIterLayer<'a> {
/// disjoint ranges are parsed as one syntax tree), these will be eagerly processed and
/// added to the returned vector.
#[allow(clippy::too_many_arguments)]
fn new<F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a>(
fn new<
F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
G: Fn(&QueryMatch, &Query, &[u8]) -> bool,
>(
source: &'a [u8],
parent_name: Option<&str>,
highlighter: &mut Highlighter,
cancellation_flag: Option<&'a AtomicUsize>,
injection_callback: &mut F,
query_filter: &'a G,
mut config: &'a HighlightConfiguration,
mut depth: usize,
mut ranges: Vec<Range>,
@ -594,7 +605,6 @@ impl<'a> HighlightIterLayer<'a> {
}
}
// SAFETY:
// The `captures` iterator borrows the `Tree` and the `QueryCursor`, which
// prevents them from being moved. But both of these values are really just
// pointers, so it's actually ok to move them.
@ -602,12 +612,22 @@ impl<'a> HighlightIterLayer<'a> {
let cursor_ref = unsafe {
mem::transmute::<&mut QueryCursor, &'static mut QueryCursor>(&mut cursor)
};
let captures = unsafe {
std::mem::transmute::<QueryCaptures<_, _>, _QueryCaptures<_, _>>(
cursor_ref.captures(&config.query, tree_ref.root_node(), source),
)
}
.peekable();
std::mem::transmute::<
QueryCaptures<_, _>,
_QueryCaptures<'a, 'a, &'a [u8], &'a [u8]>,
>(cursor_ref.captures(
&config.query,
tree_ref.root_node(),
source,
))
};
let captures: Box<dyn Iterator<Item = _>> =
Box::new(captures.filter(|(result, _): &(_, _)| {
query_filter(result, &config.query, source)
}));
result.push(HighlightIterLayer {
highlight_end_stack: Vec::new(),
@ -619,7 +639,7 @@ impl<'a> HighlightIterLayer<'a> {
cursor,
depth,
_tree: tree,
captures,
captures: captures.peekable(),
config,
ranges,
});
@ -757,9 +777,10 @@ impl<'a> HighlightIterLayer<'a> {
}
}
impl<'a, F> HighlightIter<'a, F>
impl<'a, F, G> HighlightIter<'a, F, G>
where
F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
G: Fn(&QueryMatch, &Query, &[u8]) -> bool,
{
fn emit_event(
&mut self,
@ -823,9 +844,10 @@ where
}
}
impl<'a, F> Iterator for HighlightIter<'a, F>
impl<'a, F, G> Iterator for HighlightIter<'a, F, G>
where
F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
G: Fn(&QueryMatch, &Query, &[u8]) -> bool,
{
type Item = Result<HighlightEvent, Error>;
@ -922,6 +944,7 @@ where
self.highlighter,
self.cancellation_flag,
&mut self.injection_callback,
self.capture_filter,
config,
self.layers[0].depth + 1,
ranges,

View file

@ -1,7 +1,7 @@
[package]
name = "tree-sitter-language"
description = "The tree-sitter Language type, used by the library and by language implementations"
version = "0.1.7"
version = "0.1.6"
authors.workspace = true
edition.workspace = true
rust-version = "1.77"

View file

@ -765,7 +765,7 @@ impl Loader {
}
#[must_use]
pub fn get_all_language_configurations(&self) -> Vec<(&LanguageConfiguration<'static>, &Path)> {
pub fn get_all_language_configurations(&self) -> Vec<(&LanguageConfiguration, &Path)> {
self.language_configurations
.iter()
.map(|c| (c, self.languages_by_id[c.language_id].0.as_ref()))
@ -775,7 +775,7 @@ impl Loader {
pub fn language_configuration_for_scope(
&self,
scope: &str,
) -> LoaderResult<Option<(Language, &LanguageConfiguration<'static>)>> {
) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
for configuration in &self.language_configurations {
if configuration.scope.as_ref().is_some_and(|s| s == scope) {
let language = self.language_for_id(configuration.language_id)?;
@ -788,7 +788,7 @@ impl Loader {
pub fn language_configuration_for_first_line_regex(
&self,
path: &Path,
) -> LoaderResult<Option<(Language, &LanguageConfiguration<'static>)>> {
) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
self.language_configuration_ids_by_first_line_regex
.iter()
.try_fold(None, |_, (regex, ids)| {
@ -817,7 +817,7 @@ impl Loader {
pub fn language_configuration_for_file_name(
&self,
path: &Path,
) -> LoaderResult<Option<(Language, &LanguageConfiguration<'static>)>> {
) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
// Find all the language configurations that match this file name
// or a suffix of the file name.
let configuration_ids = path
@ -889,7 +889,7 @@ impl Loader {
pub fn language_configuration_for_injection_string(
&self,
string: &str,
) -> LoaderResult<Option<(Language, &LanguageConfiguration<'static>)>> {
) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
let mut best_match_length = 0;
let mut best_match_position = None;
for (i, configuration) in self.language_configurations.iter().enumerate() {
@ -1539,9 +1539,7 @@ impl Loader {
}
#[must_use]
pub fn get_language_configuration_in_current_path(
&self,
) -> Option<&LanguageConfiguration<'static>> {
pub fn get_language_configuration_in_current_path(&self) -> Option<&LanguageConfiguration> {
self.language_configuration_in_current_path
.map(|i| &self.language_configurations[i])
}
@ -1550,7 +1548,7 @@ impl Loader {
&mut self,
parser_path: &Path,
set_current_path_config: bool,
) -> LoaderResult<&[LanguageConfiguration<'static>]> {
) -> LoaderResult<&[LanguageConfiguration]> {
let initial_language_configuration_count = self.language_configurations.len();
match TreeSitterJSON::from_file(parser_path) {

View file

@ -313,7 +313,6 @@ impl TagsContext {
)
.ok_or(Error::Cancelled)?;
// SAFETY:
// The `matches` iterator borrows the `Tree`, which prevents it from being
// moved. But the tree is really just a pointer, so it's actually ok to
// move it.

View file

@ -73,8 +73,9 @@ The behaviors of these three files are described in the next section.
## Queries
Tree-sitter's syntax highlighting system is based on *tree queries*, which are a general system for pattern-matching on
Tree-sitter's syntax trees. See [this section][pattern matching] of the documentation for more information about tree queries.
Tree-sitter's syntax highlighting system is based on *tree queries*, which are a general system for pattern-matching on Tree-sitter's
syntax trees. See [this section][pattern matching] of the documentation for more information
about tree queries.
Syntax highlighting is controlled by *three* different types of query files that are usually included in the `queries` folder.
The default names for the query files use the `.scm` file. We chose this extension because it commonly used for files written

View file

@ -3,8 +3,7 @@
Tree-sitter can be used in conjunction with its [query language][query language] as a part of code navigation systems.
An example of such a system can be seen in the `tree-sitter tags` command, which emits a textual dump of the interesting
syntactic nodes in its file argument. A notable application of this is GitHub's support for [search-based code navigation][gh search].
This document exists to describe how to integrate with such systems, and how to extend this functionality to any language
with a Tree-sitter grammar.
This document exists to describe how to integrate with such systems, and how to extend this functionality to any language with a Tree-sitter grammar.
## Tagging and captures
@ -13,9 +12,9 @@ entities. Having found them, you use a syntax capture to label the entity and it
The essence of a given tag lies in two pieces of data: the _role_ of the entity that is matched
(i.e. whether it is a definition or a reference) and the _kind_ of that entity, which describes how the entity is used
(i.e. whether it's a class definition, function call, variable reference, and so on). Our convention is to use a syntax
capture following the `@role.kind` capture name format, and another inner capture, always called `@name`, that pulls out
the name of a given identifier.
(i.e. whether it's a class definition, function call, variable reference, and so on). Our convention is to use a syntax capture
following the `@role.kind` capture name format, and another inner capture, always called `@name`, that pulls out the name
of a given identifier.
You may optionally include a capture named `@doc` to bind a docstring. For convenience purposes, the tagging system provides
two built-in functions, `#select-adjacent!` and `#strip!` that are convenient for removing comment syntax from a docstring.

View file

@ -51,7 +51,7 @@ cargo install --path crates/cli
If you're going to be in a fast iteration cycle and would like the CLI to build faster, you can use the `release-dev` profile:
```sh
cargo build --profile release-dev
cargo build --release --profile release-dev
# or
cargo install --path crates/cli --profile release-dev
```
@ -93,8 +93,7 @@ cargo xtask build-wasm-stdlib
This command looks for the [Wasi SDK][wasi_sdk] indicated by the `TREE_SITTER_WASI_SDK_PATH`
environment variable. If you don't have the binary, it can be downloaded from wasi-sdk's [releases][wasi-sdk-releases]
page. Note that any changes to `crates/language/wasm/**` requires rebuilding the tree-sitter Wasm stdlib via
`cargo xtask build-wasm-stdlib`.
page.
### Debugging

View file

@ -19,8 +19,8 @@ will attempt to build the parser in the current working directory.
### `-w/--wasm`
Compile the parser as a Wasm module. This command looks for the [Wasi SDK][wasi_sdk] indicated by the `TREE_SITTER_WASI_SDK_PATH`
environment variable. If you don't have the binary, the CLI will attempt to download it for you to `<CACHE_DIR>/tree-sitter/wasi-sdk/`,
where `<CACHE_DIR>` is resolved according to the [XDG base directory][XDG] or Window's [Known_Folder_Locations][Known_Folder].
environment variable. If you don't have the binary, the CLI will attempt to download it for you to `<CACHE_DIR>/tree-sitter/wasi-sdk/`, where
`<CACHE_DIR>` is resolved according to the [XDG base directory][XDG] or Window's [Known_Folder_Locations][Known_Folder].
### `-o/--output`
@ -37,8 +37,7 @@ in the external scanner does so using their allocator.
### `-0/--debug`
Compile the parser with debug flags enabled. This is useful when debugging issues that require a debugger like `gdb` or
`lldb`.
Compile the parser with debug flags enabled. This is useful when debugging issues that require a debugger like `gdb` or `lldb`.
[Known_Folder]: https://learn.microsoft.com/en-us/windows/win32/shell/knownfolderid
[wasi_sdk]: https://github.com/WebAssembly/wasi-sdk

View file

@ -1,8 +1,6 @@
# `tree-sitter dump-languages`
The `dump-languages` command prints out a list of all the languages that the CLI knows about. This can be useful for debugging
purposes, or for scripting. The paths to search comes from the config file's [`parser-directories`][parser-directories]
object.
The `dump-languages` command prints out a list of all the languages that the CLI knows about. This can be useful for debugging purposes, or for scripting. The paths to search comes from the config file's [`parser-directories`][parser-directories] object.
```bash
tree-sitter dump-languages [OPTIONS] # Aliases: langs
@ -12,7 +10,6 @@ tree-sitter dump-languages [OPTIONS] # Aliases: langs
### `--config-path`
The path to the configuration file. Ordinarily, the CLI will use the default location as explained in the [init-config](./init-config.md)
command. This flag allows you to explicitly override that default, and use a config defined elsewhere.
The path to the configuration file. Ordinarily, the CLI will use the default location as explained in the [init-config](./init-config.md) command. This flag allows you to explicitly override that default, and use a config defined elsewhere.
[parser-directories]: ./init-config.md#parser-directories

View file

@ -1,39 +1,30 @@
# `tree-sitter generate`
The most important command for grammar development is `tree-sitter generate`, which reads the grammar in structured form
and outputs C files that can be compiled into a shared or static library (e.g., using the [`build`](./build.md) command).
The most important command you'll use is `tree-sitter generate`. This command reads the `grammar.js` file in your current
working directory and creates a file called `src/parser.c`, which implements the parser. After making changes to your grammar,
just run `tree-sitter generate` again.
```bash
tree-sitter generate [OPTIONS] [GRAMMAR_PATH] # Aliases: gen, g
```
The optional `GRAMMAR_PATH` argument should point to the structured grammar, in one of two forms:
- `grammar.js` a (ESM or CJS) JavaScript file; if the argument is omitted, it defaults to `./grammar.js`.
- `grammar.json` a structured representation of the grammar that is created as a byproduct of `generate`; this can be used
to regenerate a missing `parser.c` without requiring a JavaScript runtime (useful when distributing parsers to consumers).
The grammar path argument allows you to specify a path to a `grammar.js` JavaScript file, or `grammar.json` JSON file.
In case your `grammar.js` file is in a non-standard path, you can specify it yourself. But, if you are using a parser
where `grammar.json` was already generated, or it was hand-written, you can tell the CLI to generate the parser *based*
on this JSON file. This avoids relying on a JavaScript file and avoids the dependency on a JavaScript runtime.
If there is an ambiguity or *local ambiguity* in your grammar, Tree-sitter will detect it during parser generation, and
it will exit with a `Unresolved conflict` error message. To learn more about conflicts and how to handle them, see
it will exit with a `Unresolved conflict` error message. To learn more about conflicts and how to handle them, check out
the section on [`Structuring Rules Well`](../creating-parsers/3-writing-the-grammar.md#structuring-rules-well)
in the user guide.
## Generated files
- `src/parser.c` implements the parser logic specified in the grammar.
- `src/tree_sitter/parser.h` provides basic C definitions that are used in the generated `parser.c` file.
- `src/tree_sitter/alloc.h` provides memory allocation macros that can be used in an external scanner.
- `src/tree_sitter/array.h` provides array macros that can be used in an external scanner.
- `src/grammar.json` contains a structured representation of the grammar; can be used to regenerate the parser without having
to re-evaluate the `grammar.js`.
- `src/node-types.json` provides type information about individual syntax nodes; see the section on [`Static Node Types`](../using-parsers/6-static-node-types.md).
## Options
### `-l/--log`
Print the log of the parser generation process. This includes information such as what tokens are included in the error
recovery state, what keywords were extracted, what states were split and why, and the entry point state.
Print the log of the parser generation process. This is really only useful if you know what you're doing, or are investigating
a bug in the CLI itself. It logs info such as what tokens are included in the error recovery state,
what keywords were extracted, what states were split and why, and the entry point state.
### `--abi <VERSION>`
@ -63,8 +54,7 @@ The path to the JavaScript runtime executable to use when generating the parser.
Note that you can also set this with `TREE_SITTER_JS_RUNTIME`. Starting from version 0.26, you can
also pass in `native` to use the experimental native QuickJS runtime that comes bundled with the CLI.
This avoids the dependency on a JavaScript runtime entirely. The native QuickJS runtime is compatible
with ESM as well as with CommonJS in strict mode. If your grammar depends on `npm` to install dependencies such as base
grammars, the native runtime can be used *after* running `npm install`.
with ESM as well as with CommonJS in strict mode. If your grammar depends on `npm` to install dependencies such as base grammars, the native runtime can be used *after* running `npm install`.
### `--disable-optimization`

View file

@ -52,8 +52,7 @@ The path to the directory containing the grammar.
### `--config-path <CONFIG_PATH>`
The path to an alternative configuration (`config.json`) file. See [the init-config command](./init-config.md) for more
information.
The path to an alternative configuration (`config.json`) file. See [the init-config command](./init-config.md) for more information.
### `-n/--test-number <TEST_NUMBER>`

View file

@ -1,8 +1,4 @@
# CLI Overview
The `tree-sitter` command-line interface is used to create, manage, test, and build tree-sitter parsers. It is controlled
by
- a personal `tree-sitter/config.json` config file generated by [`tree-sitter init-config`](./init-config.md)
- a parser `tree-sitter.json` config file generated by [`tree-sitter init`](./init.md).
Let's go over all of the functionality of the `tree-sitter` command line interface.
Once you feel that you have enough of a grasp on the CLI, you can move onto the grammar authoring section to learn more about writing your own parser.

View file

@ -8,94 +8,30 @@ we recommend using git for version control of your grammar.
tree-sitter init [OPTIONS] # Aliases: i
```
## Generated files
## Options
### Required files
### `--update`
The following required files are always created if missing:
Update outdated generated files, if needed.
- `tree-sitter.json` - The main configuration file that determines how `tree-sitter` interacts with the grammar. If missing,
the `init` command will prompt the user for the required fields. See [below](./init.md#structure-of-tree-sitterjson) for
the full documentation of the structure of this file.
- `package.json` - The `npm` manifest for the parser. This file is required for some `tree-sitter` subcommands, and if the
grammar has dependencies (e.g., another published base grammar that this grammar extends).
- `grammar.js` - An empty template for the main grammar file; see [the section on creating parsers](../2-creating-parser).
### `-p/--grammar-path <PATH>`
### Language bindings
Language bindings are files that allow your parser to be directly used by projects written in the respective language.
The following bindings are created if enabled in `tree-sitter.json`:
#### C/C++
- `Makefile` — This file tells [`make`][make] how to compile your language.
- `CMakeLists.txt` — This file tells [`cmake`][cmake] how to compile your language.
- `bindings/c/tree_sitter/tree-sitter-language.h` — This file provides the C interface of your language.
- `bindings/c/tree-sitter-language.pc` — This file provides [pkg-config][pkg-config] metadata about your language's C library.
#### Go
- `go.mod` — This file is the manifest of the Go module.
- `bindings/go/binding.go` — This file wraps your language in a Go module.
- `bindings/go/binding_test.go` — This file contains a test for the Go package.
#### Node
- `binding.gyp` — This file tells Node.js how to compile your language.
- `bindings/node/binding.cc` — This file wraps your language in a JavaScript module for Node.js.
- `bindings/node/index.js` — This is the file that Node.js initially loads when using your language.
- `bindings/node/index.d.ts` — This file provides type hints for your parser when used in TypeScript.
- `bindings/node/binding_test.js` — This file contains a test for the Node.js package.
#### Java
- `pom.xml` - This file is the manifest of the Maven package.
- `bindings/java/main/namespace/language/TreeSitterLanguage.java` - This file wraps your language in a Java class.
- `bindings/java/test/TreeSitterLanguageTest.java` - This file contains a test for the Java package.
#### Python
- `pyproject.toml` — This file is the manifest of the Python package.
- `setup.py` — This file tells Python how to compile your language.
- `bindings/python/tree_sitter_language/binding.c` — This file wraps your language in a Python module.
- `bindings/python/tree_sitter_language/__init__.py` — This file tells Python how to load your language.
- `bindings/python/tree_sitter_language/__init__.pyi` — This file provides type hints for your parser when used in Python.
- `bindings/python/tree_sitter_language/py.typed` — This file provides type hints for your parser when used in Python.
- `bindings/python/tests/test_binding.py` — This file contains a test for the Python package.
#### Rust
- `Cargo.toml` — This file is the manifest of the Rust package.
- `bindings/rust/build.rs` — This file tells Rust how to compile your language.
- `bindings/rust/lib.rs` — This file wraps your language in a Rust crate when used in Rust.
#### Swift
- `Package.swift` — This file tells Swift how to compile your language.
- `bindings/swift/TreeSitterLanguage/language.h` — This file wraps your language in a Swift module when used in Swift.
- `bindings/swift/TreeSitterLanguageTests/TreeSitterLanguageTests.swift` — This file contains a test for the Swift package.
#### Zig
- `build.zig` - This file tells Zig how to compile your language.
- `build.zig.zon` - This file is the manifest of the Zig package.
- `bindings/zig/root.zig` - This file wraps your language in a Zig module.
- `bindings/zig/test.zig` - This file contains a test for the Zig package.
### Additional files
In addition, the following files are created that aim to improve the development experience:
- `.editorconfig` — This file tells your editor how to format your code. More information about this file can be found [here][editorconfig].
- `.gitattributes` — This file tells Git how to handle line endings and tells GitHub which files are generated.
- `.gitignore` — This file tells Git which files to ignore when committing changes.
The path to the directory containing the grammar.
## Structure of `tree-sitter.json`
The main file of interest for users to configure is `tree-sitter.json`, which tells the CLI information about your grammar,
such as the location of queries.
### The `grammars` field
This field is an array of objects, though you typically only need one object in this array unless your repo has
multiple grammars (for example, `Typescript` and `TSX`), e.g.,
multiple grammars (for example, `Typescript` and `TSX`).
### Example
Typically, the objects in the `"tree-sitter"` array only needs to specify a few keys:
```json
{
"tree-sitter": [
@ -113,7 +49,7 @@ multiple grammars (for example, `Typescript` and `TSX`), e.g.,
}
```
#### Basic fields
#### Basic Fields
These keys specify basic information about the parser:
@ -129,12 +65,12 @@ parser to files that should be checked for modifications during recompilation.
This is useful during development to have changes to other files besides scanner.c
be picked up by the cli.
#### Language detection
#### Language Detection
These keys help to decide whether the language applies to a given file:
- `file-types` — An array of filename suffix strings (not including the dot). The grammar will be used for files whose names
end with one of these suffixes. Note that the suffix may match an *entire* filename.
- `file-types` — An array of filename suffix strings. The grammar will be used for files whose names end with one of
these suffixes. Note that the suffix may match an *entire* filename.
- `first-line-regex` — A regex pattern that will be tested against the first line of a file
to determine whether this language applies to the file. If present, this regex will be used for any file whose
@ -149,14 +85,14 @@ no `content-regex` will be preferred over this one.
should be used for a potential *language injection* site.
Language injection is described in more detail in [the relevant section](../3-syntax-highlighting.md#language-injection).
#### Query paths
#### Query Paths
These keys specify relative paths from the directory containing `tree-sitter.json` to the files that control syntax highlighting:
- `highlights` — Path to a *highlight query*. Default: `queries/highlights.scm`
- `locals` — Path to a *local variable query*. Default: `queries/locals.scm`.
- `injections` — Path to an *injection query*. Default: `queries/injections.scm`.
- `tags` — Path to a *tag query*. Default: `queries/tags.scm`.
- `tags` — Path to an *tag query*. Default: `queries/tags.scm`.
### The `metadata` field
@ -185,19 +121,81 @@ Each key is a language name, and the value is a boolean.
- `swift` (default: `false`)
- `zig` (default: `false`)
## Options
## Binding Files
### `-u/--update`
When you run `tree-sitter init`, the CLI will also generate a number of files in your repository that allow for your parser
to be used from different language. Here is a list of these bindings files that are generated, and what their purpose is:
Update outdated generated files, if possible.
### C/C++
**Note:** Existing files that may have been edited manually are _not_ updated in general. To force an update to such files,
remove them and call `tree-sitter init -u` again.
- `Makefile` — This file tells [`make`][make] how to compile your language.
- `CMakeLists.txt` — This file tells [`cmake`][cmake] how to compile your language.
- `bindings/c/tree_sitter/tree-sitter-language.h` — This file provides the C interface of your language.
- `bindings/c/tree-sitter-language.pc` — This file provides [pkg-config][pkg-config] metadata about your language's C library.
- `src/tree_sitter/parser.h` — This file provides some basic C definitions that are used in your generated `parser.c` file.
- `src/tree_sitter/alloc.h` — This file provides some memory allocation macros that are to be used in your external scanner,
if you have one.
- `src/tree_sitter/array.h` — This file provides some array macros that are to be used in your external scanner,
if you have one.
### `-p/--grammar-path <PATH>`
### Go
The path to the directory containing the grammar.
- `go.mod` — This file is the manifest of the Go module.
- `bindings/go/binding.go` — This file wraps your language in a Go module.
- `bindings/go/binding_test.go` — This file contains a test for the Go package.
### Node
- `binding.gyp` — This file tells Node.js how to compile your language.
- `package.json` — This file is the manifest of the Node.js package.
- `bindings/node/binding.cc` — This file wraps your language in a JavaScript module for Node.js.
- `bindings/node/index.js` — This is the file that Node.js initially loads when using your language.
- `bindings/node/index.d.ts` — This file provides type hints for your parser when used in TypeScript.
- `bindings/node/binding_test.js` — This file contains a test for the Node.js package.
### Java
- `pom.xml` - This file is the manifest of the Maven package.
- `bindings/java/main/namespace/language/TreeSitterLanguage.java` - This file wraps your language in a Java class.
- `bindings/java/test/TreeSitterLanguageTest.java` - This file contains a test for the Java package.
### Python
- `pyproject.toml` — This file is the manifest of the Python package.
- `setup.py` — This file tells Python how to compile your language.
- `bindings/python/tree_sitter_language/binding.c` — This file wraps your language in a Python module.
- `bindings/python/tree_sitter_language/__init__.py` — This file tells Python how to load your language.
`bindings/python/tree_sitter_language/__init__.pyi` — This file provides type hints for your parser when used in Python.
- `bindings/python/tree_sitter_language/py.typed` — This file provides type hints for your parser when used in Python.
- `bindings/python/tests/test_binding.py` — This file contains a test for the Python package.
### Rust
- `Cargo.toml` — This file is the manifest of the Rust package.
- `bindings/rust/lib.rs` — This file wraps your language in a Rust crate when used in Rust.
- `bindings/rust/build.rs` — This file wraps the building process for the Rust crate.
### Swift
- `Package.swift` — This file tells Swift how to compile your language.
- `bindings/swift/TreeSitterLanguage/language.h` — This file wraps your language in a Swift module when used in Swift.
- `bindings/swift/TreeSitterLanguageTests/TreeSitterLanguageTests.swift` — This file contains a test for the Swift package.
### Zig
- `build.zig` - This file tells Zig how to compile your language.
- `build.zig.zon` - This file is the manifest of the Zig package.
- `bindings/zig/root.zig` - This file wraps your language in a Zig module.
- `bindings/zig/test.zig` - This file contains a test for the Zig package.
### Additional Files
Additionally, there's a few other files that are generated when you run `tree-sitter init`,
that aim to improve the development experience:
- `.editorconfig` — This file tells your editor how to format your code. More information about this file can be found [here][editorconfig]
- `.gitattributes` — This file tells Git how to handle line endings, and tells GitHub what files are generated.
- `.gitignore` — This file tells Git what files to ignore when committing changes.
[cmake]: https://cmake.org/cmake/help/latest
[editorconfig]: https://editorconfig.org

View file

@ -78,8 +78,7 @@ Suppress main output.
### `--edits <EDITS>...`
Apply edits after parsing the file. Edits are in the form of `row,col|position delcount insert_text` where row and col,
or position are 0-indexed.
Apply edits after parsing the file. Edits are in the form of `row,col|position delcount insert_text` where row and col, or position are 0-indexed.
### `--encoding <ENCODING>`
@ -96,8 +95,7 @@ Output parsing results in a JSON format.
### `--config-path <CONFIG_PATH>`
The path to an alternative configuration (`config.json`) file. See [the init-config command](./init-config.md) for more
information.
The path to an alternative configuration (`config.json`) file. See [the init-config command](./init-config.md) for more information.
### `-n/--test-number <TEST_NUMBER>`

View file

@ -7,8 +7,8 @@ tree-sitter playground [OPTIONS] # Aliases: play, pg, web-ui
```
```admonish note
For this to work, you must have already built the parser as a Wasm module. This can be done with the [`build`](./build.md)
subcommand (`tree-sitter build --wasm`).
For this to work, you must have already built the parser as a Wasm module. This can be done with the [`build`](./build.md) subcommand
(`tree-sitter build --wasm`).
```
## Options

View file

@ -47,8 +47,8 @@ The range of rows in which the query will be executed. The format is `start_row:
### `--containing-row-range <ROW_RANGE>`
The range of rows in which the query will be executed. Only the matches that are fully contained within the provided row
range will be returned.
The range of rows in which the query will be executed. Only the matches that are fully contained within the provided row range
will be returned.
### `--scope <SCOPE>`
@ -64,8 +64,7 @@ Whether to run query tests or not.
### `--config-path <CONFIG_PATH>`
The path to an alternative configuration (`config.json`) file. See [the init-config command](./init-config.md) for more
information.
The path to an alternative configuration (`config.json`) file. See [the init-config command](./init-config.md) for more information.
### `-n/--test-number <TEST_NUMBER>`

View file

@ -31,8 +31,7 @@ The path to the directory containing the grammar.
### `--config-path <CONFIG_PATH>`
The path to an alternative configuration (`config.json`) file. See [the init-config command](./init-config.md) for more
information.
The path to an alternative configuration (`config.json`) file. See [the init-config command](./init-config.md) for more information.
### `-n/--test-number <TEST_NUMBER>`

View file

@ -63,8 +63,7 @@ When using the `--debug-graph` option, open the log file in the default browser.
### `--config-path <CONFIG_PATH>`
The path to an alternative configuration (`config.json`) file. See [the init-config command](./init-config.md) for more
information.
The path to an alternative configuration (`config.json`) file. See [the init-config command](./init-config.md) for more information.
### `--show-fields`

View file

@ -25,9 +25,11 @@ tree-sitter version --bump minor # minor bump
tree-sitter version --bump major # major bump
```
As a grammar author, you should keep the version of your grammar in sync across different bindings. However, doing so manually
is error-prone and tedious, so this command takes care of the burden. If you are using a version control system, it is recommended
to commit the changes made by this command, and to tag the commit with the new version.
As a grammar author, you should keep the version of your grammar in sync across
different bindings. However, doing so manually is error-prone and tedious, so
this command takes care of the burden. If you are using a version control system,
it is recommended to commit the changes made by this command, and to tag the
commit with the new version.
To print the current version without bumping it, use:

View file

@ -17,8 +17,8 @@ DSL through the `RustRegex` class. Simply pass your regex pattern as a string:
```
Unlike JavaScript's builtin `RegExp` class, which takes a pattern and flags as separate arguments, `RustRegex` only
accepts a single pattern string. While it doesn't support separate flags, you can use inline flags within the pattern
itself. For more details about Rust's regex syntax and capabilities, check out the [Rust regex documentation][rust regex].
accepts a single pattern string. While it doesn't support separate flags, you can use inline flags within the pattern itself.
For more details about Rust's regex syntax and capabilities, check out the [Rust regex documentation][rust regex].
```admonish note
Only a subset of the Regex engine is actually supported. This is due to certain features like lookahead and lookaround
@ -50,10 +50,10 @@ The previous `repeat` rule is implemented in `repeat1` but is included because i
- **Options : `optional(rule)`** — This function creates a rule that matches *zero or one* occurrence of a given rule.
It is analogous to the `[x]` (square bracket) syntax in EBNF notation.
- **Precedence : `prec(number, rule)`** — This function marks the given rule with a numerical precedence, which will be
used to resolve [*LR(1) Conflicts*][lr-conflict] at parser-generation time. When two rules overlap in a way that represents
either a true ambiguity or a *local* ambiguity given one token of lookahead, Tree-sitter will try to resolve the conflict
by matching the rule with the higher precedence. The default precedence of all rules is zero. This works similarly to the
- **Precedence : `prec(number, rule)`** — This function marks the given rule with a numerical precedence, which will be used
to resolve [*LR(1) Conflicts*][lr-conflict] at parser-generation time. When two rules overlap in a way that represents either
a true ambiguity or a *local* ambiguity given one token of lookahead, Tree-sitter will try to resolve the conflict by matching
the rule with the higher precedence. The default precedence of all rules is zero. This works similarly to the
[precedence directives][yacc-prec] in Yacc grammars.
This function can also be used to assign lexical precedence to a given
@ -115,8 +115,8 @@ want to create syntax tree nodes at runtime.
- **`conflicts`** — an array of arrays of rule names. Each inner array represents a set of rules that's involved in an
*LR(1) conflict* that is *intended to exist* in the grammar. When these conflicts occur at runtime, Tree-sitter will use
the GLR algorithm to explore all the possible interpretations. If *multiple* parses end up succeeding, Tree-sitter will
pick the subtree whose corresponding rule has the highest total *dynamic precedence*.
the GLR algorithm to explore all the possible interpretations. If *multiple* parses end up succeeding, Tree-sitter will pick
the subtree whose corresponding rule has the highest total *dynamic precedence*.
- **`externals`** — an array of token names which can be returned by an
[*external scanner*][external-scanners]. External scanners allow you to write custom C code which runs during the lexing
@ -139,10 +139,10 @@ for more details.
array of reserved rules. The reserved rule in the array must be a terminal token meaning it must be a string, regex, token,
or terminal rule. The reserved rule must also exist and be used in the grammar, specifying arbitrary tokens will not work.
The *first* reserved word set in the object is the global word set, meaning it applies to every rule in every parse state.
However, certain keywords are contextual, depending on the rule. For example, in JavaScript, keywords are typically not
allowed as ordinary variables, however, they *can* be used as a property name. In this situation, the `reserved` function
would be used, and the word set to pass in would be the name of the word set that is declared in the `reserved` object that
corresponds to an empty array, signifying *no* keywords are reserved.
However, certain keywords are contextual, depending on the rule. For example, in JavaScript, keywords are typically not allowed
as ordinary variables, however, they *can* be used as a property name. In this situation, the `reserved` function would be used,
and the word set to pass in would be the name of the word set that is declared in the `reserved` object that corresponds to an
empty array, signifying *no* keywords are reserved.
[bison-dprec]: https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html
[ebnf]: https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form

View file

@ -1,7 +1,7 @@
# Writing the Grammar
Writing a grammar requires creativity. There are an infinite number of CFGs (context-free grammars) that can be used to
describe any given language. To produce a good Tree-sitter parser, you need to create a grammar with two important properties:
Writing a grammar requires creativity. There are an infinite number of CFGs (context-free grammars) that can be used to describe
any given language. To produce a good Tree-sitter parser, you need to create a grammar with two important properties:
1. **An intuitive structure** — Tree-sitter's output is a [concrete syntax tree][cst]; each node in the tree corresponds
directly to a [terminal or non-terminal symbol][non-terminal] in the grammar. So to produce an easy-to-analyze tree, there
@ -139,8 +139,8 @@ instead. It's often useful to check your progress by trying to parse some real c
## Structuring Rules Well
Imagine that you were just starting work on the [Tree-sitter JavaScript parser][tree-sitter-javascript]. Naively, you might
try to directly mirror the structure of the [ECMAScript Language Spec][ecmascript-spec]. To illustrate the problem with
this approach, consider the following line of code:
try to directly mirror the structure of the [ECMAScript Language Spec][ecmascript-spec]. To illustrate the problem with this
approach, consider the following line of code:
```js
return x + y;
@ -181,17 +181,16 @@ which are unrelated to the actual code.
## Standard Rule Names
Tree-sitter places no restrictions on how to name the rules of your grammar. It can be helpful, however, to follow certain
conventions used by many other established grammars in the ecosystem. Some of these well-established patterns are listed
below:
Tree-sitter places no restrictions on how to name the rules of your grammar. It can be helpful, however, to follow certain conventions
used by many other established grammars in the ecosystem. Some of these well-established patterns are listed below:
- `source_file`: Represents an entire source file, this rule is commonly used as the root node for a grammar,
- `expression`/`statement`: Used to represent statements and expressions for a given language. Commonly defined as a choice
between several more specific sub-expression/sub-statement rules.
- `expression`/`statement`: Used to represent statements and expressions for a given language. Commonly defined as a choice between several
more specific sub-expression/sub-statement rules.
- `block`: Used as the parent node for block scopes, with its children representing the block's contents.
- `type`: Represents the types of a language such as `int`, `char`, and `void`.
- `identifier`: Used for constructs like variable names, function arguments, and object fields; this rule is commonly used
as the `word` token in grammars.
- `identifier`: Used for constructs like variable names, function arguments, and object fields; this rule is commonly used as the `word`
token in grammars.
- `string`: Used to represent `"string literals"`.
- `comment`: Used to represent comments, this rule is commonly used as an `extra`.
@ -309,9 +308,9 @@ This is where `prec.left` and `prec.right` come into use. We want to select the
## Using Conflicts
Sometimes, conflicts are actually desirable. In our JavaScript grammar, expressions and patterns can create intentional
ambiguity. A construct like `[x, y]` could be legitimately parsed as both an array literal (like in `let a = [x, y]`) or
as a destructuring pattern (like in `let [x, y] = arr`).
Sometimes, conflicts are actually desirable. In our JavaScript grammar, expressions and patterns can create intentional ambiguity.
A construct like `[x, y]` could be legitimately parsed as both an array literal (like in `let a = [x, y]`) or as a destructuring
pattern (like in `let [x, y] = arr`).
```js
export default grammar({
@ -565,8 +564,8 @@ as mentioned in the previous page, is `token(prec(N, ...))`.
## Keywords
Many languages have a set of _keyword_ tokens (e.g. `if`, `for`, `return`), as well as a more general token (e.g. `identifier`)
that matches any word, including many of the keyword strings. For example, JavaScript has a keyword `instanceof`, which
is used as a binary operator, like this:
that matches any word, including many of the keyword strings. For example, JavaScript has a keyword `instanceof`, which is
used as a binary operator, like this:
```js
if (a instanceof Something) b();

View file

@ -143,10 +143,10 @@ the second argument, the current character will be treated as whitespace; whites
associated with tokens emitted by the external scanner.
- **`void (*mark_end)(TSLexer *)`** — A function for marking the end of the recognized token. This allows matching tokens
that require multiple characters of lookahead. By default, (if you don't call `mark_end`), any character that you moved
past using the `advance` function will be included in the size of the token. But once you call `mark_end`, then any later
calls to `advance` will _not_ increase the size of the returned token. You can call `mark_end` multiple times to increase
the size of the token.
that require multiple characters of lookahead. By default, (if you don't call `mark_end`), any character that you moved past
using the `advance` function will be included in the size of the token. But once you call `mark_end`, then any later calls
to `advance` will _not_ increase the size of the returned token. You can call `mark_end` multiple times to increase the size
of the token.
- **`uint32_t (*get_column)(TSLexer *)`** — A function for querying the current column position of the lexer. It returns
the number of codepoints since the start of the current line. The codepoint position is recalculated on every call to this
@ -185,9 +185,9 @@ if (valid_symbols[INDENT] || valid_symbols[DEDENT]) {
### Allocator
Instead of using libc's `malloc`, `calloc`, `realloc`, and `free`, you should use the versions prefixed with `ts_` from
`tree_sitter/alloc.h`. These macros can allow a potential consumer to override the default allocator with their own implementation,
but by default will use the libc functions.
Instead of using libc's `malloc`, `calloc`, `realloc`, and `free`, you should use the versions prefixed with `ts_` from `tree_sitter/alloc.h`.
These macros can allow a potential consumer to override the default allocator with their own implementation, but by default
will use the libc functions.
As a consumer of the tree-sitter core library as well as any parser libraries that might use allocations, you can enable
overriding the default allocator and have it use the same one as the library allocator, of which you can set with `ts_set_allocator`.
@ -195,8 +195,7 @@ To enable this overriding in scanners, you must compile them with the `TREE_SITT
the library must be linked into your final app dynamically, since it needs to resolve the internal functions at runtime.
If you are compiling an executable binary that uses the core library, but want to load parsers dynamically at runtime, then
you will have to use a special linker flag on Unix. For non-Darwin systems, that would be `--dynamic-list` and for Darwin
systems, that would be `-exported_symbols_list`. The CLI does exactly this, so you can use it as a reference (check out
`cli/build.rs`).
systems, that would be `-exported_symbols_list`. The CLI does exactly this, so you can use it as a reference (check out `cli/build.rs`).
For example, assuming you wanted to allocate 100 bytes for your scanner, you'd do so like the following example:
@ -294,10 +293,9 @@ bool tree_sitter_my_language_external_scanner_scan(
## Other External Scanner Details
External scanners have priority over Tree-sitter's normal lexing process. When a token listed in the externals array is
valid at a given position, the external scanner is called first. This makes external scanners a powerful way to override
Tree-sitter's default lexing behavior, especially for cases that can't be handled with regular lexical rules, parsing, or
dynamic precedence.
External scanners have priority over Tree-sitter's normal lexing process. When a token listed in the externals array is valid
at a given position, the external scanner is called first. This makes external scanners a powerful way to override Tree-sitter's
default lexing behavior, especially for cases that can't be handled with regular lexical rules, parsing, or dynamic precedence.
During error recovery, Tree-sitter's first step is to call the external scanner's scan function with all tokens marked as
valid. Your scanner should detect and handle this case appropriately. One simple approach is to add an unused "sentinel"

View file

@ -39,8 +39,8 @@ It only shows the *named* nodes, as described in [this section][named-vs-anonymo
```
The expected output section can also *optionally* show the [*field names*][node-field-names] associated with each child
node. To include field names in your tests, you write a node's field name followed by a colon, before the node itself
in the S-expression:
node. To include field names in your tests, you write a node's field name followed by a colon, before the node itself in
the S-expression:
```query
(source_file
@ -87,11 +87,6 @@ The recommendation is to be comprehensive in adding tests. If it's a visible nod
directory. It's typically a good idea to test all the permutations of each language construct. This increases test coverage,
but doubly acquaints readers with a way to examine expected outputs and understand the "edges" of a language.
```admonish tip
After modifying the grammar, you can run `tree-sitter test -u`
to update all syntax trees in corpus files with current parser output.
```
## Attributes
Tests can be annotated with a few `attributes`. Attributes must be put in the header, below the test name, and start with
@ -104,8 +99,8 @@ you can repeat the attribute on a new line.
The following attributes are available:
* `:cst` - This attribute specifies that the expected output should be in the form of a CST instead of the normal S-expression.
This CST matches the format given by `parse --cst`.
* `:cst` - This attribute specifies that the expected output should be in the form of a CST instead of the normal S-expression. This
CST matches the format given by `parse --cst`.
* `:error` — This attribute will assert that the parse tree contains an error. It's useful to just validate that a certain
input is invalid without displaying the whole parse tree, as such you should omit the parse tree below the `---` line.
* `:fail-fast` — This attribute will stop the testing of additional cases if the test marked with this attribute fails.

View file

@ -1,4 +1,4 @@
# Creating parsers
Developing Tree-sitter grammars can have a difficult learning curve, but once you get the hang of it, it can be fun and
even zen-like. This document will help you to get started and to develop a useful mental model.
Developing Tree-sitter grammars can have a difficult learning curve, but once you get the hang of it, it can be fun and even
zen-like. This document will help you to get started and to develop a useful mental model.

View file

@ -10,8 +10,7 @@ file and efficiently update the syntax tree as the source file is edited. Tree-s
- **General** enough to parse any programming language
- **Fast** enough to parse on every keystroke in a text editor
- **Robust** enough to provide useful results even in the presence of syntax errors
- **Dependency-free** so that the runtime library (which is written in pure [C11](https://github.com/tree-sitter/tree-sitter/tree/master/lib))
can be embedded in any application
- **Dependency-free** so that the runtime library (which is written in pure [C11](https://github.com/tree-sitter/tree-sitter/tree/master/lib)) can be embedded in any application
## Language Bindings

View file

@ -2,8 +2,7 @@
## Providing the Code
In the example on the previous page, we parsed source code stored in a simple string using the `ts_parser_parse_string`
function:
In the example on the previous page, we parsed source code stored in a simple string using the `ts_parser_parse_string` function:
```c
TSTree *ts_parser_parse_string(
@ -136,10 +135,10 @@ Consider a grammar rule like this:
if_statement: $ => seq("if", "(", $._expression, ")", $._statement);
```
A syntax node representing an `if_statement` in this language would have 5 children: the condition expression, the body
statement, as well as the `if`, `(`, and `)` tokens. The expression and the statement would be marked as _named_ nodes,
because they have been given explicit names in the grammar. But the `if`, `(`, and `)` nodes would _not_ be named nodes,
because they are represented in the grammar as simple strings.
A syntax node representing an `if_statement` in this language would have 5 children: the condition expression, the body statement,
as well as the `if`, `(`, and `)` tokens. The expression and the statement would be marked as _named_ nodes, because they
have been given explicit names in the grammar. But the `if`, `(`, and `)` nodes would _not_ be named nodes, because they
are represented in the grammar as simple strings.
You can check whether any given node is named:

View file

@ -19,8 +19,8 @@ typedef struct {
void ts_tree_edit(TSTree *, const TSInputEdit *);
```
Then, you can call `ts_parser_parse` again, passing in the old tree. This will create a new tree that internally shares
structure with the old tree.
Then, you can call `ts_parser_parse` again, passing in the old tree. This will create a new tree that internally shares structure
with the old tree.
When you edit a syntax tree, the positions of its nodes will change. If you have stored any `TSNode` instances outside of
the `TSTree`, you must update their positions separately, using the same `TSInputEdit` value, in order to update their

View file

@ -108,9 +108,9 @@ In Tree-sitter grammars, there are usually certain rules that represent abstract
"type", "declaration"). In the `grammar.js` file, these are often written as [hidden rules][hidden rules]
whose definition is a simple [`choice`][grammar dsl] where each member is just a single symbol.
Normally, hidden rules are not mentioned in the node types file, since they don't appear in the syntax tree. But if you
add a hidden rule to the grammar's [`supertypes` list][grammar dsl], then it _will_ show up in the node types file, with
the following special entry:
Normally, hidden rules are not mentioned in the node types file, since they don't appear in the syntax tree. But if you add
a hidden rule to the grammar's [`supertypes` list][grammar dsl], then it _will_ show up in the node
types file, with the following special entry:
- `"subtypes"` — An array of objects that specify the _types_ of nodes that this 'supertype' node can wrap.

View file

@ -15,11 +15,8 @@ A given version of the tree-sitter library is only able to load parsers generate
| >=0.20.3, <=0.24 | 13 | 14 |
| >=0.25 | 13 | 15 |
By default, the tree-sitter CLI will generate parsers using the latest available ABI for that version, but an older ABI
(supported by the CLI) can be selected by passing the [`--abi` option][abi_option] to the `generate` command.
By default, the tree-sitter CLI will generate parsers using the latest available ABI for that version, but an older ABI (supported by the CLI) can be selected by passing the [`--abi` option][abi_option] to the `generate` command.
Note that the ABI version range supported by the CLI can be smaller than for the library: When a new ABI version is released,
older versions will be phased out over a deprecation period, which starts with no longer being able to generate parsers
with the oldest ABI version.
Note that the ABI version range supported by the CLI can be smaller than for the library: When a new ABI version is released, older versions will be phased out over a deprecation period, which starts with no longer being able to generate parsers with the oldest ABI version.
[abi_option]: ../cli/generate.md#--abi-version

View file

@ -6,8 +6,8 @@ the core concepts remain the same.
Tree-sitter's parsing functionality is implemented through its C API, with all functions documented in the [tree_sitter/api.h][api.h]
header file, but if you're working in another language, you can use one of the following bindings found [here](../index.md#language-bindings),
each providing idiomatic access to Tree-sitter's functionality. Of these bindings, the official ones have their own API
doc hosted online at the following pages:
each providing idiomatic access to Tree-sitter's functionality. Of these bindings, the official ones have their own API docs
hosted online at the following pages:
- [Go][go]
- [Java]

View file

@ -1,9 +1,9 @@
# Query Syntax
A _query_ consists of one or more _patterns_, where each pattern is an [S-expression][s-exp] that matches a certain set
of nodes in a syntax tree. The expression to match a given node consists of a pair of parentheses containing two things:
the node's type, and optionally, a series of other S-expressions that match the node's children. For example, this pattern
would match any `binary_expression` node whose children are both `number_literal` nodes:
A _query_ consists of one or more _patterns_, where each pattern is an [S-expression][s-exp] that matches a certain set of
nodes in a syntax tree. The expression to match a given node consists of a pair of parentheses containing two things: the
node's type, and optionally, a series of other S-expressions that match the node's children. For example, this pattern would
match any `binary_expression` node whose children are both `number_literal` nodes:
```query
(binary_expression (number_literal) (number_literal))
@ -99,10 +99,10 @@ by `(ERROR)` queries. Specific missing node types can also be queried:
### Supertype Nodes
Some node types are marked as _supertypes_ in a grammar. A supertype is a node type that contains multiple
subtypes. For example, in the [JavaScript grammar example][grammar], `expression` is a supertype that can represent any
kind of expression, such as a `binary_expression`, `call_expression`, or `identifier`. You can use supertypes in queries
to match any of their subtypes, rather than having to list out each subtype individually. For example, this pattern would
match any kind of expression, even though it's not a visible node in the syntax tree:
subtypes. For example, in the [JavaScript grammar example][grammar], `expression` is a supertype that can represent any kind
of expression, such as a `binary_expression`, `call_expression`, or `identifier`. You can use supertypes in queries to match
any of their subtypes, rather than having to list out each subtype individually. For example, this pattern would match any
kind of expression, even though it's not a visible node in the syntax tree:
```query
(expression) @any-expression

View file

@ -128,15 +128,15 @@ This pattern would match any builtin variable that is not a local variable, beca
# Directives
Similar to predicates, directives are a way to associate arbitrary metadata with a pattern. The only difference between
predicates and directives is that directives end in a `!` character instead of `?` character.
Similar to predicates, directives are a way to associate arbitrary metadata with a pattern. The only difference between predicates
and directives is that directives end in a `!` character instead of `?` character.
Tree-sitter's CLI supports the following directives by default:
## The `set!` directive
This directive allows you to associate key-value pairs with a pattern. The key and value can be any arbitrary text that
you see fit.
This directive allows you to associate key-value pairs with a pattern. The key and value can be any arbitrary text that you
see fit.
```query
((comment) @injection.content
@ -156,8 +156,8 @@ another capture are preserved. It takes two arguments, both of which are capture
### The `#strip!` directive
The `#strip!` directive allows you to remove text from a capture. It takes two arguments: the first is the capture to strip
text from, and the second is a regular expression to match against the text. Any text matched by the regular expression
will be removed from the text associated with the capture.
text from, and the second is a regular expression to match against the text. Any text matched by the regular expression will
be removed from the text associated with the capture.
For an example on the `#select-adjacent!` and `#strip!` directives,
view the [code navigation](../../4-code-navigation.md#examples) documentation.

View file

@ -17,7 +17,7 @@
eachSystem = lib.genAttrs systems;
pkgsFor = inputs.nixpkgs.legacyPackages;
version = "0.27.0";
version = "0.26.3";
fs = lib.fileset;
src = fs.toSource {

View file

@ -317,7 +317,7 @@ pub trait Decode {
/// A stateful object for walking a syntax [`Tree`] efficiently.
#[doc(alias = "TSTreeCursor")]
pub struct TreeCursor<'tree>(ffi::TSTreeCursor, PhantomData<&'tree ()>);
pub struct TreeCursor<'cursor>(ffi::TSTreeCursor, PhantomData<&'cursor ()>);
/// A set of patterns that match nodes in a syntax tree.
#[doc(alias = "TSQuery")]
@ -392,7 +392,7 @@ pub struct QueryMatch<'cursor, 'tree> {
}
/// A sequence of [`QueryMatch`]es associated with a given [`QueryCursor`].
pub struct QueryMatches<'query, 'tree, T: TextProvider<I>, I: AsRef<[u8]>> {
pub struct QueryMatches<'query, 'tree: 'query, T: TextProvider<I>, I: AsRef<[u8]>> {
ptr: *mut ffi::TSQueryCursor,
query: &'query Query,
text_provider: T,
@ -407,7 +407,7 @@ pub struct QueryMatches<'query, 'tree, T: TextProvider<I>, I: AsRef<[u8]>> {
///
/// During iteration, each element contains a [`QueryMatch`] and index. The index can
/// be used to access the new capture inside of the [`QueryMatch::captures`]'s [`captures`].
pub struct QueryCaptures<'query, 'tree, T: TextProvider<I>, I: AsRef<[u8]>> {
pub struct QueryCaptures<'query, 'tree: 'query, T: TextProvider<I>, I: AsRef<[u8]>> {
ptr: *mut ffi::TSQueryCursor,
query: &'query Query,
text_provider: T,
@ -1581,7 +1581,7 @@ impl<'tree> Node<'tree> {
/// Get the [`Language`] that was used to parse this node's syntax tree.
#[doc(alias = "ts_node_language")]
#[must_use]
pub fn language(&self) -> LanguageRef<'tree> {
pub fn language(&self) -> LanguageRef {
LanguageRef(unsafe { ffi::ts_node_language(self.0) }, PhantomData)
}
@ -2082,11 +2082,11 @@ impl fmt::Display for Node<'_> {
}
}
impl<'tree> TreeCursor<'tree> {
impl<'cursor> TreeCursor<'cursor> {
/// Get the tree cursor's current [`Node`].
#[doc(alias = "ts_tree_cursor_current_node")]
#[must_use]
pub fn node(&self) -> Node<'tree> {
pub fn node(&self) -> Node<'cursor> {
Node(
unsafe { ffi::ts_tree_cursor_current_node(&self.0) },
PhantomData,
@ -2227,7 +2227,7 @@ impl<'tree> TreeCursor<'tree> {
/// Re-initialize this tree cursor to start at the original node that the
/// cursor was constructed with.
#[doc(alias = "ts_tree_cursor_reset")]
pub fn reset(&mut self, node: Node<'tree>) {
pub fn reset(&mut self, node: Node<'cursor>) {
unsafe { ffi::ts_tree_cursor_reset(&mut self.0, node.0) };
}
@ -3404,7 +3404,7 @@ impl QueryProperty {
/// Provide a `StreamingIterator` instead of the traditional `Iterator`, as the
/// underlying object in the C library gets updated on each iteration. Copies would
/// have their internal state overwritten, leading to Undefined Behavior
impl<'query, 'tree, T: TextProvider<I>, I: AsRef<[u8]>> StreamingIterator
impl<'query, 'tree: 'query, T: TextProvider<I>, I: AsRef<[u8]>> StreamingIterator
for QueryMatches<'query, 'tree, T, I>
{
type Item = QueryMatch<'query, 'tree>;
@ -3435,13 +3435,15 @@ impl<'query, 'tree, T: TextProvider<I>, I: AsRef<[u8]>> StreamingIterator
}
}
impl<T: TextProvider<I>, I: AsRef<[u8]>> StreamingIteratorMut for QueryMatches<'_, '_, T, I> {
impl<'query, 'tree: 'query, T: TextProvider<I>, I: AsRef<[u8]>> StreamingIteratorMut
for QueryMatches<'query, 'tree, T, I>
{
fn get_mut(&mut self) -> Option<&mut Self::Item> {
self.current_match.as_mut()
}
}
impl<'query, 'tree, T: TextProvider<I>, I: AsRef<[u8]>> StreamingIterator
impl<'query, 'tree: 'query, T: TextProvider<I>, I: AsRef<[u8]>> StreamingIterator
for QueryCaptures<'query, 'tree, T, I>
{
type Item = (QueryMatch<'query, 'tree>, usize);
@ -3478,7 +3480,9 @@ impl<'query, 'tree, T: TextProvider<I>, I: AsRef<[u8]>> StreamingIterator
}
}
impl<T: TextProvider<I>, I: AsRef<[u8]>> StreamingIteratorMut for QueryCaptures<'_, '_, T, I> {
impl<'query, 'tree: 'query, T: TextProvider<I>, I: AsRef<[u8]>> StreamingIteratorMut
for QueryCaptures<'query, 'tree, T, I>
{
fn get_mut(&mut self) -> Option<&mut Self::Item> {
self.current_match.as_mut()
}
@ -3618,8 +3622,8 @@ impl From<ffi::TSRange> for Range {
}
}
impl From<&InputEdit> for ffi::TSInputEdit {
fn from(val: &InputEdit) -> Self {
impl From<&'_ InputEdit> for ffi::TSInputEdit {
fn from(val: &'_ InputEdit) -> Self {
Self {
start_byte: val.start_byte as u32,
old_end_byte: val.old_end_byte as u32,

View file

@ -1,12 +1,12 @@
{
"name": "web-tree-sitter",
"version": "0.27.0",
"version": "0.26.3",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "web-tree-sitter",
"version": "0.27.0",
"version": "0.26.3",
"license": "MIT",
"devDependencies": {
"@eslint/js": "^9.39.1",

View file

@ -1,6 +1,6 @@
{
"name": "web-tree-sitter",
"version": "0.27.0",
"version": "0.26.3",
"description": "Tree-sitter bindings for the web",
"repository": {
"type": "git",

View file

@ -1,8 +0,0 @@
export function newFinalizer<T>(handler: (value: T) => void): FinalizationRegistry<T> | undefined {
try {
return new FinalizationRegistry(handler);
} catch(e) {
console.error('Unsupported FinalizationRegistry:', e);
return;
}
}

View file

@ -1,10 +1,5 @@
import { C, Internal, assertInternal } from './constants';
import { Language } from './language';
import { newFinalizer } from './finalization_registry';
const finalizer = newFinalizer((address: number) => {
C._ts_lookahead_iterator_delete(address);
});
export class LookaheadIterator implements Iterable<string> {
/** @internal */
@ -18,7 +13,6 @@ export class LookaheadIterator implements Iterable<string> {
assertInternal(internal);
this[0] = address;
this.language = language;
finalizer?.register(this, address, this);
}
/** Get the current symbol of the lookahead iterator. */
@ -33,7 +27,6 @@ export class LookaheadIterator implements Iterable<string> {
/** Delete the lookahead iterator, freeing its resources. */
delete(): void {
finalizer?.unregister(this);
C._ts_lookahead_iterator_delete(this[0]);
this[0] = 0;
}

View file

@ -3,7 +3,6 @@ import { Language } from './language';
import { marshalRange, unmarshalRange } from './marshal';
import { checkModule, initializeBinding } from './bindings';
import { Tree } from './tree';
import { newFinalizer } from './finalization_registry';
/**
* Options for parsing
@ -83,11 +82,6 @@ export let LANGUAGE_VERSION: number;
*/
export let MIN_COMPATIBLE_VERSION: number;
const finalizer = newFinalizer((addresses: number[]) => {
C._ts_parser_delete(addresses[0]);
C._free(addresses[1]);
});
/**
* A stateful object that is used to produce a {@link Tree} based on some
* source code.
@ -123,7 +117,6 @@ export class Parser {
*/
constructor() {
this.initialize();
finalizer?.register(this, [this[0], this[1]], this);
}
/** @internal */
@ -138,7 +131,6 @@ export class Parser {
/** Delete the parser, freeing its resources. */
delete() {
finalizer?.unregister(this);
C._ts_parser_delete(this[0]);
C._free(this[1]);
this[0] = 0;

View file

@ -3,7 +3,6 @@ import { Node } from './node';
import { marshalNode, unmarshalCaptures } from './marshal';
import { TRANSFER_BUFFER } from './parser';
import { Language } from './language';
import { newFinalizer } from './finalization_registry';
const PREDICATE_STEP_TYPE_CAPTURE = 1;
@ -507,10 +506,6 @@ function parsePattern(
}
}
const finalizer = newFinalizer((address: number) => {
C._ts_query_delete(address);
});
export class Query {
/** @internal */
private [0] = 0; // Internal handle for Wasm
@ -692,12 +687,10 @@ export class Query {
this.assertedProperties = assertedProperties;
this.refutedProperties = refutedProperties;
this.exceededMatchLimit = false;
finalizer?.register(this, address, this);
}
/** Delete the query, freeing its resources. */
delete(): void {
finalizer?.unregister(this);
C._ts_query_delete(this[0]);
this[0] = 0;
}

View file

@ -5,7 +5,6 @@ import { TreeCursor } from './tree_cursor';
import { marshalEdit, marshalPoint, unmarshalNode, unmarshalRange } from './marshal';
import { TRANSFER_BUFFER } from './parser';
import { Edit } from './edit';
import { newFinalizer } from './finalization_registry';
/** @internal */
export function getText(tree: Tree, startIndex: number, endIndex: number, startPosition: Point): string {
@ -29,10 +28,6 @@ export function getText(tree: Tree, startIndex: number, endIndex: number, startP
return result ?? '';
}
const finalizer = newFinalizer((address: number) => {
C._ts_tree_delete(address);
});
/** A tree that represents the syntactic structure of a source code file. */
export class Tree {
/** @internal */
@ -50,7 +45,6 @@ export class Tree {
this[0] = address;
this.language = language;
this.textCallback = textCallback;
finalizer?.register(this, address, this);
}
/** Create a shallow copy of the syntax tree. This is very fast. */
@ -61,7 +55,6 @@ export class Tree {
/** Delete the syntax tree, freeing its resources. */
delete(): void {
finalizer?.unregister(this);
C._ts_tree_delete(this[0]);
this[0] = 0;
}

View file

@ -3,11 +3,6 @@ import { marshalNode, marshalPoint, marshalTreeCursor, unmarshalNode, unmarshalP
import { Node } from './node';
import { TRANSFER_BUFFER } from './parser';
import { getText, Tree } from './tree';
import { newFinalizer } from './finalization_registry';
const finalizer = newFinalizer((address: number) => {
C._ts_tree_cursor_delete_wasm(address);
});
/** A stateful object for walking a syntax {@link Tree} efficiently. */
export class TreeCursor {
@ -35,7 +30,6 @@ export class TreeCursor {
assertInternal(internal);
this.tree = tree;
unmarshalTreeCursor(this);
finalizer?.register(this, this.tree[0], this);
}
/** Creates a deep copy of the tree cursor. This allocates new memory. */
@ -48,7 +42,6 @@ export class TreeCursor {
/** Delete the tree cursor, freeing its resources. */
delete(): void {
finalizer?.unregister(this);
marshalTreeCursor(this);
C._ts_tree_cursor_delete_wasm(this.tree[0]);
this[0] = this[1] = this[2] = 0;

View file

@ -1,74 +0,0 @@
import { describe, expect, it } from 'vitest';
import { gc, event, Finalizer } from './memory';
// hijack finalization registry before import web-tree-sitter
globalThis.FinalizationRegistry = Finalizer;
describe('Memory Management', () => {
describe('call .delete()', () => {
it('test free memory manually', async () => {
const timer = setInterval(() => {
gc();
}, 100);
let done = 0;
event.on('gc', () => {
done++;
});
await (async () => {
const { JavaScript } = await (await import('./helper')).default;
const { Parser, Query } = await import('../src');
const parser = new Parser();
parser.setLanguage(JavaScript);
const tree = parser.parse('1+1')!;
const copyTree = tree.copy();
const cursor = tree.walk();
const copyCursor = cursor.copy();
const lookaheadIterator = JavaScript.lookaheadIterator(cursor.currentNode.nextParseState)!;
const query = new Query(JavaScript, '(identifier) @element');
parser.delete();
tree.delete();
copyTree.delete();
cursor.delete();
copyCursor.delete();
lookaheadIterator.delete();
query.delete();
})();
// wait for gc
await new Promise((resolve) => setTimeout(resolve, 1000));
clearInterval(timer);
// expect no gc event fired
expect(done).toBe(0);
});
});
describe('do not call .delete()', () => {
it('test free memory automatically', async () => {
const timer = setInterval(() => {
gc();
}, 100);
let done = 0;
const promise = new Promise((resolve) => {
event.on('gc', () => {
if (++done === 7) {
resolve(undefined);
clearInterval(timer);
}
console.log('free memory times: ', done);
});
});
await (async () => {
const { JavaScript } = await (await import('./helper')).default;
const { Parser, Query } = await import('../src');
const parser = new Parser(); // 1
parser.setLanguage(JavaScript);
const tree = parser.parse('1+1')!; // 2
tree.copy(); // 3
const cursor = tree.walk(); // 4
cursor.copy(); // 5
JavaScript.lookaheadIterator(cursor.currentNode.nextParseState)!; // 6
new Query(JavaScript, '(identifier) @element'); // 7
})();
await promise;
});
});
});

View file

@ -1,20 +0,0 @@
import { EventEmitter } from 'events';
import { Session } from 'inspector';
const session = new Session();
session.connect();
export function gc() {
session.post('HeapProfiler.collectGarbage');
}
export const event = new EventEmitter();
export class Finalizer<T> extends FinalizationRegistry<T> {
constructor(handler: (value: T) => void) {
super((value) => {
handler(value);
event.emit('gc');
});
}
}

View file

@ -1,25 +0,0 @@
import { describe, it } from 'vitest';
describe('FinalizationRegistry is unsupported', () => {
it('test FinalizationRegistry is unsupported', async () => {
// @ts-expect-error: test FinalizationRegistry is not supported
globalThis.FinalizationRegistry = undefined;
const { JavaScript } = await (await import('./helper')).default;
const { Parser, Query } = await import('../src');
const parser = new Parser();
parser.setLanguage(JavaScript);
const tree = parser.parse('1+1')!;
const copyTree = tree.copy();
const cursor = tree.walk();
const copyCursor = cursor.copy();
const lookaheadIterator = JavaScript.lookaheadIterator(cursor.currentNode.nextParseState)!;
const query = new Query(JavaScript, '(identifier) @element');
parser.delete();
tree.delete();
copyTree.delete();
cursor.delete();
copyCursor.delete();
lookaheadIterator.delete();
query.delete();
});
});