Compare commits
26 commits
master
...
ts-capture
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c2e50ccd11 | ||
|
|
1a54b1794d | ||
|
|
4c89725111 | ||
|
|
705130705a | ||
|
|
e5ee144b0a | ||
|
|
90885404ce | ||
|
|
152d2756fc | ||
|
|
f05efbb352 | ||
|
|
1f221c8500 | ||
|
|
fdca0718bc | ||
|
|
fa7b1b2a66 | ||
|
|
adcc4d1f7b | ||
|
|
7d9c544c96 | ||
|
|
c1e49d1571 | ||
|
|
eae6554735 | ||
|
|
48ee942c4f | ||
|
|
9ee2b87dd6 | ||
|
|
fb91deb8d9 | ||
|
|
789a966f96 | ||
|
|
3c49fef0e3 | ||
|
|
8a297b86bc | ||
|
|
ac6644016c | ||
|
|
a80765614b | ||
|
|
34602af22c | ||
|
|
c4f81931e6 | ||
|
|
25777e5a64 |
67 changed files with 343 additions and 3299 deletions
25
.github/scripts/wasm_stdlib.js
vendored
25
.github/scripts/wasm_stdlib.js
vendored
|
|
@ -1,25 +0,0 @@
|
|||
module.exports = async ({ github, context, core }) => {
|
||||
if (context.eventName !== 'pull_request') return;
|
||||
|
||||
const prNumber = context.payload.pull_request.number;
|
||||
const owner = context.repo.owner;
|
||||
const repo = context.repo.repo;
|
||||
|
||||
const { data: files } = await github.rest.pulls.listFiles({
|
||||
owner,
|
||||
repo,
|
||||
pull_number: prNumber
|
||||
});
|
||||
|
||||
const changedFiles = files.map(file => file.filename);
|
||||
|
||||
const wasmStdLibSrc = 'crates/language/wasm/';
|
||||
const dirChanged = changedFiles.some(file => file.startsWith(wasmStdLibSrc));
|
||||
|
||||
if (!dirChanged) return;
|
||||
|
||||
const wasmStdLibHeader = 'lib/src/wasm/wasm-stdlib.h';
|
||||
const requiredChanged = changedFiles.includes(wasmStdLibHeader);
|
||||
|
||||
if (!requiredChanged) core.setFailed(`Changes detected in ${wasmStdLibSrc} but ${wasmStdLibHeader} was not modified.`);
|
||||
};
|
||||
2
.github/workflows/backport.yml
vendored
2
.github/workflows/backport.yml
vendored
|
|
@ -24,7 +24,7 @@ jobs:
|
|||
private-key: ${{ secrets.BACKPORT_KEY }}
|
||||
|
||||
- name: Create backport PR
|
||||
uses: korthout/backport-action@v4
|
||||
uses: korthout/backport-action@v3
|
||||
with:
|
||||
pull_title: "${pull_title}"
|
||||
label_pattern: "^ci:backport ([^ ]+)$"
|
||||
|
|
|
|||
4
.github/workflows/build.yml
vendored
4
.github/workflows/build.yml
vendored
|
|
@ -278,7 +278,7 @@ jobs:
|
|||
|
||||
- name: Upload CLI artifact
|
||||
if: "!matrix.no-run"
|
||||
uses: actions/upload-artifact@v6
|
||||
uses: actions/upload-artifact@v5
|
||||
with:
|
||||
name: tree-sitter.${{ matrix.platform }}
|
||||
path: target/${{ matrix.target }}/release/tree-sitter${{ contains(matrix.target, 'windows') && '.exe' || '' }}
|
||||
|
|
@ -287,7 +287,7 @@ jobs:
|
|||
|
||||
- name: Upload Wasm artifacts
|
||||
if: matrix.platform == 'linux-x64'
|
||||
uses: actions/upload-artifact@v6
|
||||
uses: actions/upload-artifact@v5
|
||||
with:
|
||||
name: tree-sitter.wasm
|
||||
path: |
|
||||
|
|
|
|||
3
.github/workflows/ci.yml
vendored
3
.github/workflows/ci.yml
vendored
|
|
@ -44,6 +44,3 @@ jobs:
|
|||
|
||||
build:
|
||||
uses: ./.github/workflows/build.yml
|
||||
|
||||
check-wasm-stdlib:
|
||||
uses: ./.github/workflows/wasm_stdlib.yml
|
||||
|
|
|
|||
2
.github/workflows/release.yml
vendored
2
.github/workflows/release.yml
vendored
|
|
@ -25,7 +25,7 @@ jobs:
|
|||
uses: actions/checkout@v6
|
||||
|
||||
- name: Download build artifacts
|
||||
uses: actions/download-artifact@v7
|
||||
uses: actions/download-artifact@v6
|
||||
with:
|
||||
path: artifacts
|
||||
|
||||
|
|
|
|||
19
.github/workflows/wasm_stdlib.yml
vendored
19
.github/workflows/wasm_stdlib.yml
vendored
|
|
@ -1,19 +0,0 @@
|
|||
name: Check Wasm Stdlib build
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
|
||||
jobs:
|
||||
check:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: Check directory changes
|
||||
uses: actions/github-script@v8
|
||||
with:
|
||||
script: |
|
||||
const scriptPath = `${process.env.GITHUB_WORKSPACE}/.github/scripts/wasm_stdlib.js`;
|
||||
const script = require(scriptPath);
|
||||
return script({ github, context, core });
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
cmake_minimum_required(VERSION 3.13)
|
||||
|
||||
project(tree-sitter
|
||||
VERSION "0.27.0"
|
||||
VERSION "0.26.3"
|
||||
DESCRIPTION "An incremental parsing system for programming tools"
|
||||
HOMEPAGE_URL "https://tree-sitter.github.io/tree-sitter/"
|
||||
LANGUAGES C)
|
||||
|
|
|
|||
2749
Cargo.lock
generated
2749
Cargo.lock
generated
File diff suppressed because it is too large
Load diff
33
Cargo.toml
33
Cargo.toml
|
|
@ -1,26 +1,17 @@
|
|||
[workspace]
|
||||
default-members = ["crates/cli"]
|
||||
members = [
|
||||
"crates/cli",
|
||||
"crates/config",
|
||||
"crates/generate",
|
||||
"crates/highlight",
|
||||
"crates/loader",
|
||||
"crates/tags",
|
||||
"crates/xtask",
|
||||
"crates/language",
|
||||
"lib",
|
||||
]
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
version = "0.27.0"
|
||||
version = "0.26.3"
|
||||
authors = [
|
||||
"Max Brunsfeld <maxbrunsfeld@gmail.com>",
|
||||
"Amaan Qureshi <amaanq12@gmail.com>",
|
||||
]
|
||||
edition = "2021"
|
||||
rust-version = "1.85"
|
||||
rust-version = "1.84"
|
||||
homepage = "https://tree-sitter.github.io/tree-sitter"
|
||||
repository = "https://github.com/tree-sitter/tree-sitter"
|
||||
license = "MIT"
|
||||
|
|
@ -106,8 +97,8 @@ ansi_colours = "1.2.3"
|
|||
anstyle = "1.0.13"
|
||||
anyhow = "1.0.100"
|
||||
bstr = "1.12.0"
|
||||
cc = "1.2.53"
|
||||
clap = { version = "4.5.54", features = [
|
||||
cc = "1.2.48"
|
||||
clap = { version = "4.5.53", features = [
|
||||
"cargo",
|
||||
"derive",
|
||||
"env",
|
||||
|
|
@ -115,7 +106,7 @@ clap = { version = "4.5.54", features = [
|
|||
"string",
|
||||
"unstable-styles",
|
||||
] }
|
||||
clap_complete = "4.5.65"
|
||||
clap_complete = "4.5.61"
|
||||
clap_complete_nushell = "4.5.10"
|
||||
crc32fast = "1.5.0"
|
||||
ctor = "0.2.9"
|
||||
|
|
@ -140,7 +131,7 @@ rustc-hash = "2.1.1"
|
|||
schemars = "1.0.5"
|
||||
semver = { version = "1.0.27", features = ["serde"] }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.149", features = ["preserve_order"] }
|
||||
serde_json = { version = "1.0.145", features = ["preserve_order"] }
|
||||
similar = "2.7.0"
|
||||
smallbitvec = "2.6.0"
|
||||
streaming-iterator = "0.1.9"
|
||||
|
|
@ -153,11 +144,11 @@ walkdir = "2.5.0"
|
|||
wasmparser = "0.243.0"
|
||||
webbrowser = "1.0.5"
|
||||
|
||||
tree-sitter = { version = "0.27.0", path = "./lib" }
|
||||
tree-sitter-generate = { version = "0.27.0", path = "./crates/generate" }
|
||||
tree-sitter-loader = { version = "0.27.0", path = "./crates/loader" }
|
||||
tree-sitter-config = { version = "0.27.0", path = "./crates/config" }
|
||||
tree-sitter-highlight = { version = "0.27.0", path = "./crates/highlight" }
|
||||
tree-sitter-tags = { version = "0.27.0", path = "./crates/tags" }
|
||||
tree-sitter = { version = "0.26.3", path = "./lib" }
|
||||
tree-sitter-generate = { version = "0.26.3", path = "./crates/generate" }
|
||||
tree-sitter-loader = { version = "0.26.3", path = "./crates/loader" }
|
||||
tree-sitter-config = { version = "0.26.3", path = "./crates/config" }
|
||||
tree-sitter-highlight = { version = "0.26.3", path = "./crates/highlight" }
|
||||
tree-sitter-tags = { version = "0.26.3", path = "./crates/tags" }
|
||||
|
||||
tree-sitter-language = { version = "0.1", path = "./crates/language" }
|
||||
|
|
|
|||
2
Makefile
2
Makefile
|
|
@ -1,4 +1,4 @@
|
|||
VERSION := 0.27.0
|
||||
VERSION := 0.26.3
|
||||
DESCRIPTION := An incremental parsing system for programming tools
|
||||
HOMEPAGE_URL := https://tree-sitter.github.io/tree-sitter/
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
.{
|
||||
.name = .tree_sitter,
|
||||
.fingerprint = 0x841224b447ac0d4f,
|
||||
.version = "0.27.0",
|
||||
.version = "0.26.3",
|
||||
.minimum_zig_version = "0.14.1",
|
||||
.paths = .{
|
||||
"build.zig",
|
||||
|
|
|
|||
|
|
@ -7,8 +7,7 @@
|
|||
[npmjs.com]: https://www.npmjs.org/package/tree-sitter-cli
|
||||
[npmjs.com badge]: https://img.shields.io/npm/v/tree-sitter-cli.svg?color=%23BF4A4A
|
||||
|
||||
The Tree-sitter CLI allows you to develop, test, and use Tree-sitter grammars from the command line. It works on `MacOS`,
|
||||
`Linux`, and `Windows`.
|
||||
The Tree-sitter CLI allows you to develop, test, and use Tree-sitter grammars from the command line. It works on `MacOS`, `Linux`, and `Windows`.
|
||||
|
||||
### Installation
|
||||
|
||||
|
|
@ -35,11 +34,9 @@ The `tree-sitter` binary itself has no dependencies, but specific commands have
|
|||
|
||||
### Commands
|
||||
|
||||
* `generate` - The `tree-sitter generate` command will generate a Tree-sitter parser based on the grammar in the current
|
||||
working directory. See [the documentation] for more information.
|
||||
* `generate` - The `tree-sitter generate` command will generate a Tree-sitter parser based on the grammar in the current working directory. See [the documentation] for more information.
|
||||
|
||||
* `test` - The `tree-sitter test` command will run the unit tests for the Tree-sitter parser in the current working directory.
|
||||
See [the documentation] for more information.
|
||||
* `test` - The `tree-sitter test` command will run the unit tests for the Tree-sitter parser in the current working directory. See [the documentation] for more information.
|
||||
|
||||
* `parse` - The `tree-sitter parse` command will parse a file (or list of files) using Tree-sitter parsers.
|
||||
|
||||
|
|
|
|||
4
crates/cli/npm/package-lock.json
generated
4
crates/cli/npm/package-lock.json
generated
|
|
@ -1,12 +1,12 @@
|
|||
{
|
||||
"name": "tree-sitter-cli",
|
||||
"version": "0.27.0",
|
||||
"version": "0.26.3",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "tree-sitter-cli",
|
||||
"version": "0.27.0",
|
||||
"version": "0.26.3",
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"bin": {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "tree-sitter-cli",
|
||||
"version": "0.27.0",
|
||||
"version": "0.26.3",
|
||||
"author": {
|
||||
"name": "Max Brunsfeld",
|
||||
"email": "maxbrunsfeld@gmail.com"
|
||||
|
|
|
|||
|
|
@ -953,7 +953,7 @@ fn render_node_range(
|
|||
|
||||
fn cst_render_node(
|
||||
opts: &ParseFileOptions,
|
||||
cursor: &TreeCursor,
|
||||
cursor: &mut TreeCursor,
|
||||
source_code: &[u8],
|
||||
out: &mut impl Write,
|
||||
total_width: usize,
|
||||
|
|
|
|||
|
|
@ -19,8 +19,7 @@
|
|||
--light-scrollbar-track: #f1f1f1;
|
||||
--light-scrollbar-thumb: #c1c1c1;
|
||||
--light-scrollbar-thumb-hover: #a8a8a8;
|
||||
--light-tree-row-bg: #e3f2fd;
|
||||
|
||||
|
||||
--dark-bg: #1d1f21;
|
||||
--dark-border: #2d2d2d;
|
||||
--dark-text: #c5c8c6;
|
||||
|
|
@ -29,7 +28,6 @@
|
|||
--dark-scrollbar-track: #25282c;
|
||||
--dark-scrollbar-thumb: #4a4d51;
|
||||
--dark-scrollbar-thumb-hover: #5a5d61;
|
||||
--dark-tree-row-bg: #373737;
|
||||
|
||||
--primary-color: #0550ae;
|
||||
--primary-color-alpha: rgba(5, 80, 174, 0.1);
|
||||
|
|
@ -44,7 +42,6 @@
|
|||
--text-color: var(--dark-text);
|
||||
--panel-bg: var(--dark-panel-bg);
|
||||
--code-bg: var(--dark-code-bg);
|
||||
--tree-row-bg: var(--dark-tree-row-bg);
|
||||
}
|
||||
|
||||
[data-theme="light"] {
|
||||
|
|
@ -53,7 +50,6 @@
|
|||
--text-color: var(--light-text);
|
||||
--panel-bg: white;
|
||||
--code-bg: white;
|
||||
--tree-row-bg: var(--light-tree-row-bg);
|
||||
}
|
||||
|
||||
/* Base Styles */
|
||||
|
|
@ -279,7 +275,7 @@
|
|||
}
|
||||
|
||||
#output-container a.highlighted {
|
||||
background-color: #cae2ff;
|
||||
background-color: #d9d9d9;
|
||||
color: red;
|
||||
border-radius: 3px;
|
||||
text-decoration: underline;
|
||||
|
|
@ -350,7 +346,7 @@
|
|||
}
|
||||
|
||||
& #output-container a.highlighted {
|
||||
background-color: #656669;
|
||||
background-color: #373b41;
|
||||
color: red;
|
||||
}
|
||||
|
||||
|
|
@ -377,9 +373,6 @@
|
|||
color: var(--dark-text);
|
||||
}
|
||||
}
|
||||
.tree-row:has(.highlighted) {
|
||||
background-color: var(--tree-row-bg);
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
|
||||
|
|
|
|||
|
|
@ -1068,6 +1068,7 @@ fn run_tests(
|
|||
return Ok(true);
|
||||
}
|
||||
|
||||
let failure_count = test_summary.parse_failures.len();
|
||||
let mut ran_test_in_group = false;
|
||||
|
||||
let matches_filter = |name: &str, file_name: &Option<String>, opts: &TestOptions| {
|
||||
|
|
@ -1131,7 +1132,7 @@ fn run_tests(
|
|||
test_summary.parse_results.pop_traversal();
|
||||
|
||||
if let Some(file_path) = file_path {
|
||||
if opts.update {
|
||||
if opts.update && test_summary.parse_failures.len() - failure_count > 0 {
|
||||
write_tests(&file_path, corrected_entries)?;
|
||||
}
|
||||
corrected_entries.clear();
|
||||
|
|
|
|||
|
|
@ -225,7 +225,7 @@ impl Pattern {
|
|||
}
|
||||
|
||||
// Find every matching combination of child patterns and child nodes.
|
||||
let mut finished_matches = Vec::<Match<'_, 'tree>>::new();
|
||||
let mut finished_matches = Vec::<Match>::new();
|
||||
if cursor.goto_first_child() {
|
||||
let mut match_states = vec![(0, mat)];
|
||||
loop {
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ log.workspace = true
|
|||
pathdiff = { version = "0.2.3", optional = true }
|
||||
regex.workspace = true
|
||||
regex-syntax.workspace = true
|
||||
rquickjs = { version = "0.11.0", optional = true, features = [
|
||||
rquickjs = { version = "0.10.0", optional = true, features = [
|
||||
"bindgen",
|
||||
"loader",
|
||||
"macro",
|
||||
|
|
|
|||
|
|
@ -95,27 +95,9 @@ impl Console {
|
|||
Type::Module => "module".to_string(),
|
||||
Type::BigInt => v.get::<String>().unwrap_or_else(|_| "BigInt".to_string()),
|
||||
Type::Unknown => "unknown".to_string(),
|
||||
Type::Array => {
|
||||
let js_vals = v
|
||||
.as_array()
|
||||
.unwrap()
|
||||
.iter::<Value<'_>>()
|
||||
.filter_map(|x| x.ok())
|
||||
.map(|x| {
|
||||
if x.is_string() {
|
||||
format!("'{}'", Self::format_args(&[x]))
|
||||
} else {
|
||||
Self::format_args(&[x])
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
|
||||
format!("[ {js_vals} ]")
|
||||
}
|
||||
Type::Symbol
|
||||
| Type::Object
|
||||
| Type::Proxy
|
||||
| Type::Array
|
||||
| Type::Function
|
||||
| Type::Constructor
|
||||
| Type::Promise
|
||||
|
|
@ -215,11 +197,11 @@ fn try_resolve_path(path: &Path) -> rquickjs::Result<PathBuf> {
|
|||
}
|
||||
|
||||
#[allow(clippy::needless_pass_by_value)]
|
||||
fn require_from_module<'js>(
|
||||
ctx: Ctx<'js>,
|
||||
fn require_from_module<'a>(
|
||||
ctx: Ctx<'a>,
|
||||
module_path: String,
|
||||
from_module: &str,
|
||||
) -> rquickjs::Result<Value<'js>> {
|
||||
) -> rquickjs::Result<Value<'a>> {
|
||||
let current_module = PathBuf::from(from_module);
|
||||
let current_dir = if current_module.is_file() {
|
||||
current_module.parent().unwrap_or(Path::new("."))
|
||||
|
|
@ -234,13 +216,13 @@ fn require_from_module<'js>(
|
|||
load_module_from_content(&ctx, &resolved_path, &contents)
|
||||
}
|
||||
|
||||
fn load_module_from_content<'js>(
|
||||
ctx: &Ctx<'js>,
|
||||
fn load_module_from_content<'a>(
|
||||
ctx: &Ctx<'a>,
|
||||
path: &Path,
|
||||
contents: &str,
|
||||
) -> rquickjs::Result<Value<'js>> {
|
||||
) -> rquickjs::Result<Value<'a>> {
|
||||
if path.extension().is_some_and(|ext| ext == "json") {
|
||||
return ctx.eval::<Value<'js>, _>(format!("JSON.parse({contents:?})"));
|
||||
return ctx.eval::<Value, _>(format!("JSON.parse({contents:?})"));
|
||||
}
|
||||
|
||||
let exports = Object::new(ctx.clone())?;
|
||||
|
|
@ -256,7 +238,7 @@ fn load_module_from_content<'js>(
|
|||
let module_path = filename.clone();
|
||||
let require = Function::new(
|
||||
ctx.clone(),
|
||||
move |ctx_inner: Ctx<'js>, target_path: String| -> rquickjs::Result<Value<'js>> {
|
||||
move |ctx_inner: Ctx<'a>, target_path: String| -> rquickjs::Result<Value<'a>> {
|
||||
require_from_module(ctx_inner, target_path, &module_path)
|
||||
},
|
||||
)?;
|
||||
|
|
@ -264,8 +246,8 @@ fn load_module_from_content<'js>(
|
|||
let wrapper =
|
||||
format!("(function(exports, require, module, __filename, __dirname) {{ {contents} }})");
|
||||
|
||||
let module_func = ctx.eval::<Function<'js>, _>(wrapper)?;
|
||||
module_func.call::<_, Value<'js>>((exports, require, module_obj.clone(), filename, dirname))?;
|
||||
let module_func = ctx.eval::<Function, _>(wrapper)?;
|
||||
module_func.call::<_, Value>((exports, require, module_obj.clone(), filename, dirname))?;
|
||||
|
||||
module_obj.get("exports")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,4 +28,4 @@ regex.workspace = true
|
|||
thiserror.workspace = true
|
||||
streaming-iterator.workspace = true
|
||||
|
||||
tree-sitter.workspace = true
|
||||
tree-sitter = "0.26"
|
||||
|
|
|
|||
|
|
@ -297,6 +297,7 @@ impl TSHighlighter {
|
|||
})
|
||||
})
|
||||
},
|
||||
&|_, _, _| true,
|
||||
);
|
||||
|
||||
if let Ok(highlights) = highlights {
|
||||
|
|
|
|||
|
|
@ -162,15 +162,17 @@ struct LocalScope<'a> {
|
|||
local_defs: Vec<LocalDef<'a>>,
|
||||
}
|
||||
|
||||
struct HighlightIter<'a, F>
|
||||
struct HighlightIter<'a, F, G>
|
||||
where
|
||||
F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
|
||||
G: Fn(&QueryMatch, &Query, &[u8]) -> bool + 'a,
|
||||
{
|
||||
source: &'a [u8],
|
||||
language_name: &'a str,
|
||||
byte_offset: usize,
|
||||
highlighter: &'a mut Highlighter,
|
||||
injection_callback: F,
|
||||
capture_filter: &'a G,
|
||||
cancellation_flag: Option<&'a AtomicUsize>,
|
||||
layers: Vec<HighlightIterLayer<'a>>,
|
||||
iter_count: usize,
|
||||
|
|
@ -181,7 +183,7 @@ where
|
|||
struct HighlightIterLayer<'a> {
|
||||
_tree: Tree,
|
||||
cursor: QueryCursor,
|
||||
captures: iter::Peekable<_QueryCaptures<'a, 'a, &'a [u8], &'a [u8]>>,
|
||||
captures: iter::Peekable<Box<dyn Iterator<Item = (QueryMatch<'a, 'a>, usize)> + 'a>>,
|
||||
config: &'a HighlightConfiguration,
|
||||
highlight_end_stack: Vec<usize>,
|
||||
scope_stack: Vec<LocalScope<'a>>,
|
||||
|
|
@ -189,7 +191,7 @@ struct HighlightIterLayer<'a> {
|
|||
depth: usize,
|
||||
}
|
||||
|
||||
pub struct _QueryCaptures<'query, 'tree, T: TextProvider<I>, I: AsRef<[u8]>> {
|
||||
pub struct _QueryCaptures<'query, 'tree: 'query, T: TextProvider<I>, I: AsRef<[u8]>> {
|
||||
ptr: *mut ffi::TSQueryCursor,
|
||||
query: &'query Query,
|
||||
text_provider: T,
|
||||
|
|
@ -225,7 +227,7 @@ impl<'tree> _QueryMatch<'_, 'tree> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'query, 'tree, T: TextProvider<I>, I: AsRef<[u8]>> Iterator
|
||||
impl<'query, 'tree: 'query, T: TextProvider<I>, I: AsRef<[u8]>> Iterator
|
||||
for _QueryCaptures<'query, 'tree, T, I>
|
||||
{
|
||||
type Item = (QueryMatch<'query, 'tree>, usize);
|
||||
|
|
@ -244,6 +246,7 @@ impl<'query, 'tree, T: TextProvider<I>, I: AsRef<[u8]>> Iterator
|
|||
&m.assume_init(),
|
||||
self.ptr,
|
||||
));
|
||||
|
||||
if result.satisfies_text_predicates(
|
||||
self.query,
|
||||
&mut self.buffer1,
|
||||
|
|
@ -252,6 +255,7 @@ impl<'query, 'tree, T: TextProvider<I>, I: AsRef<[u8]>> Iterator
|
|||
) {
|
||||
return Some((result, capture_index as usize));
|
||||
}
|
||||
|
||||
result.remove();
|
||||
} else {
|
||||
return None;
|
||||
|
|
@ -287,6 +291,7 @@ impl Highlighter {
|
|||
source: &'a [u8],
|
||||
cancellation_flag: Option<&'a AtomicUsize>,
|
||||
mut injection_callback: impl FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
|
||||
query_filter: &'a impl Fn(&QueryMatch, &Query, &[u8]) -> bool,
|
||||
) -> Result<impl Iterator<Item = Result<HighlightEvent, Error>> + 'a, Error> {
|
||||
let layers = HighlightIterLayer::new(
|
||||
source,
|
||||
|
|
@ -294,6 +299,7 @@ impl Highlighter {
|
|||
self,
|
||||
cancellation_flag,
|
||||
&mut injection_callback,
|
||||
query_filter,
|
||||
config,
|
||||
0,
|
||||
vec![Range {
|
||||
|
|
@ -309,6 +315,7 @@ impl Highlighter {
|
|||
language_name: &config.language_name,
|
||||
byte_offset: 0,
|
||||
injection_callback,
|
||||
capture_filter: query_filter,
|
||||
cancellation_flag,
|
||||
highlighter: self,
|
||||
iter_count: 0,
|
||||
|
|
@ -509,12 +516,16 @@ impl<'a> HighlightIterLayer<'a> {
|
|||
/// disjoint ranges are parsed as one syntax tree), these will be eagerly processed and
|
||||
/// added to the returned vector.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn new<F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a>(
|
||||
fn new<
|
||||
F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
|
||||
G: Fn(&QueryMatch, &Query, &[u8]) -> bool,
|
||||
>(
|
||||
source: &'a [u8],
|
||||
parent_name: Option<&str>,
|
||||
highlighter: &mut Highlighter,
|
||||
cancellation_flag: Option<&'a AtomicUsize>,
|
||||
injection_callback: &mut F,
|
||||
query_filter: &'a G,
|
||||
mut config: &'a HighlightConfiguration,
|
||||
mut depth: usize,
|
||||
mut ranges: Vec<Range>,
|
||||
|
|
@ -594,7 +605,6 @@ impl<'a> HighlightIterLayer<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
// SAFETY:
|
||||
// The `captures` iterator borrows the `Tree` and the `QueryCursor`, which
|
||||
// prevents them from being moved. But both of these values are really just
|
||||
// pointers, so it's actually ok to move them.
|
||||
|
|
@ -602,12 +612,22 @@ impl<'a> HighlightIterLayer<'a> {
|
|||
let cursor_ref = unsafe {
|
||||
mem::transmute::<&mut QueryCursor, &'static mut QueryCursor>(&mut cursor)
|
||||
};
|
||||
|
||||
let captures = unsafe {
|
||||
std::mem::transmute::<QueryCaptures<_, _>, _QueryCaptures<_, _>>(
|
||||
cursor_ref.captures(&config.query, tree_ref.root_node(), source),
|
||||
)
|
||||
}
|
||||
.peekable();
|
||||
std::mem::transmute::<
|
||||
QueryCaptures<_, _>,
|
||||
_QueryCaptures<'a, 'a, &'a [u8], &'a [u8]>,
|
||||
>(cursor_ref.captures(
|
||||
&config.query,
|
||||
tree_ref.root_node(),
|
||||
source,
|
||||
))
|
||||
};
|
||||
|
||||
let captures: Box<dyn Iterator<Item = _>> =
|
||||
Box::new(captures.filter(|(result, _): &(_, _)| {
|
||||
query_filter(result, &config.query, source)
|
||||
}));
|
||||
|
||||
result.push(HighlightIterLayer {
|
||||
highlight_end_stack: Vec::new(),
|
||||
|
|
@ -619,7 +639,7 @@ impl<'a> HighlightIterLayer<'a> {
|
|||
cursor,
|
||||
depth,
|
||||
_tree: tree,
|
||||
captures,
|
||||
captures: captures.peekable(),
|
||||
config,
|
||||
ranges,
|
||||
});
|
||||
|
|
@ -757,9 +777,10 @@ impl<'a> HighlightIterLayer<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'a, F> HighlightIter<'a, F>
|
||||
impl<'a, F, G> HighlightIter<'a, F, G>
|
||||
where
|
||||
F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
|
||||
G: Fn(&QueryMatch, &Query, &[u8]) -> bool,
|
||||
{
|
||||
fn emit_event(
|
||||
&mut self,
|
||||
|
|
@ -823,9 +844,10 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
impl<'a, F> Iterator for HighlightIter<'a, F>
|
||||
impl<'a, F, G> Iterator for HighlightIter<'a, F, G>
|
||||
where
|
||||
F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
|
||||
G: Fn(&QueryMatch, &Query, &[u8]) -> bool,
|
||||
{
|
||||
type Item = Result<HighlightEvent, Error>;
|
||||
|
||||
|
|
@ -922,6 +944,7 @@ where
|
|||
self.highlighter,
|
||||
self.cancellation_flag,
|
||||
&mut self.injection_callback,
|
||||
self.capture_filter,
|
||||
config,
|
||||
self.layers[0].depth + 1,
|
||||
ranges,
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
name = "tree-sitter-language"
|
||||
description = "The tree-sitter Language type, used by the library and by language implementations"
|
||||
version = "0.1.7"
|
||||
version = "0.1.6"
|
||||
authors.workspace = true
|
||||
edition.workspace = true
|
||||
rust-version = "1.77"
|
||||
|
|
|
|||
|
|
@ -765,7 +765,7 @@ impl Loader {
|
|||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn get_all_language_configurations(&self) -> Vec<(&LanguageConfiguration<'static>, &Path)> {
|
||||
pub fn get_all_language_configurations(&self) -> Vec<(&LanguageConfiguration, &Path)> {
|
||||
self.language_configurations
|
||||
.iter()
|
||||
.map(|c| (c, self.languages_by_id[c.language_id].0.as_ref()))
|
||||
|
|
@ -775,7 +775,7 @@ impl Loader {
|
|||
pub fn language_configuration_for_scope(
|
||||
&self,
|
||||
scope: &str,
|
||||
) -> LoaderResult<Option<(Language, &LanguageConfiguration<'static>)>> {
|
||||
) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
|
||||
for configuration in &self.language_configurations {
|
||||
if configuration.scope.as_ref().is_some_and(|s| s == scope) {
|
||||
let language = self.language_for_id(configuration.language_id)?;
|
||||
|
|
@ -788,7 +788,7 @@ impl Loader {
|
|||
pub fn language_configuration_for_first_line_regex(
|
||||
&self,
|
||||
path: &Path,
|
||||
) -> LoaderResult<Option<(Language, &LanguageConfiguration<'static>)>> {
|
||||
) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
|
||||
self.language_configuration_ids_by_first_line_regex
|
||||
.iter()
|
||||
.try_fold(None, |_, (regex, ids)| {
|
||||
|
|
@ -817,7 +817,7 @@ impl Loader {
|
|||
pub fn language_configuration_for_file_name(
|
||||
&self,
|
||||
path: &Path,
|
||||
) -> LoaderResult<Option<(Language, &LanguageConfiguration<'static>)>> {
|
||||
) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
|
||||
// Find all the language configurations that match this file name
|
||||
// or a suffix of the file name.
|
||||
let configuration_ids = path
|
||||
|
|
@ -889,7 +889,7 @@ impl Loader {
|
|||
pub fn language_configuration_for_injection_string(
|
||||
&self,
|
||||
string: &str,
|
||||
) -> LoaderResult<Option<(Language, &LanguageConfiguration<'static>)>> {
|
||||
) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
|
||||
let mut best_match_length = 0;
|
||||
let mut best_match_position = None;
|
||||
for (i, configuration) in self.language_configurations.iter().enumerate() {
|
||||
|
|
@ -1539,9 +1539,7 @@ impl Loader {
|
|||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn get_language_configuration_in_current_path(
|
||||
&self,
|
||||
) -> Option<&LanguageConfiguration<'static>> {
|
||||
pub fn get_language_configuration_in_current_path(&self) -> Option<&LanguageConfiguration> {
|
||||
self.language_configuration_in_current_path
|
||||
.map(|i| &self.language_configurations[i])
|
||||
}
|
||||
|
|
@ -1550,7 +1548,7 @@ impl Loader {
|
|||
&mut self,
|
||||
parser_path: &Path,
|
||||
set_current_path_config: bool,
|
||||
) -> LoaderResult<&[LanguageConfiguration<'static>]> {
|
||||
) -> LoaderResult<&[LanguageConfiguration]> {
|
||||
let initial_language_configuration_count = self.language_configurations.len();
|
||||
|
||||
match TreeSitterJSON::from_file(parser_path) {
|
||||
|
|
|
|||
|
|
@ -313,7 +313,6 @@ impl TagsContext {
|
|||
)
|
||||
.ok_or(Error::Cancelled)?;
|
||||
|
||||
// SAFETY:
|
||||
// The `matches` iterator borrows the `Tree`, which prevents it from being
|
||||
// moved. But the tree is really just a pointer, so it's actually ok to
|
||||
// move it.
|
||||
|
|
|
|||
|
|
@ -73,8 +73,9 @@ The behaviors of these three files are described in the next section.
|
|||
|
||||
## Queries
|
||||
|
||||
Tree-sitter's syntax highlighting system is based on *tree queries*, which are a general system for pattern-matching on
|
||||
Tree-sitter's syntax trees. See [this section][pattern matching] of the documentation for more information about tree queries.
|
||||
Tree-sitter's syntax highlighting system is based on *tree queries*, which are a general system for pattern-matching on Tree-sitter's
|
||||
syntax trees. See [this section][pattern matching] of the documentation for more information
|
||||
about tree queries.
|
||||
|
||||
Syntax highlighting is controlled by *three* different types of query files that are usually included in the `queries` folder.
|
||||
The default names for the query files use the `.scm` file. We chose this extension because it commonly used for files written
|
||||
|
|
|
|||
|
|
@ -3,8 +3,7 @@
|
|||
Tree-sitter can be used in conjunction with its [query language][query language] as a part of code navigation systems.
|
||||
An example of such a system can be seen in the `tree-sitter tags` command, which emits a textual dump of the interesting
|
||||
syntactic nodes in its file argument. A notable application of this is GitHub's support for [search-based code navigation][gh search].
|
||||
This document exists to describe how to integrate with such systems, and how to extend this functionality to any language
|
||||
with a Tree-sitter grammar.
|
||||
This document exists to describe how to integrate with such systems, and how to extend this functionality to any language with a Tree-sitter grammar.
|
||||
|
||||
## Tagging and captures
|
||||
|
||||
|
|
@ -13,9 +12,9 @@ entities. Having found them, you use a syntax capture to label the entity and it
|
|||
|
||||
The essence of a given tag lies in two pieces of data: the _role_ of the entity that is matched
|
||||
(i.e. whether it is a definition or a reference) and the _kind_ of that entity, which describes how the entity is used
|
||||
(i.e. whether it's a class definition, function call, variable reference, and so on). Our convention is to use a syntax
|
||||
capture following the `@role.kind` capture name format, and another inner capture, always called `@name`, that pulls out
|
||||
the name of a given identifier.
|
||||
(i.e. whether it's a class definition, function call, variable reference, and so on). Our convention is to use a syntax capture
|
||||
following the `@role.kind` capture name format, and another inner capture, always called `@name`, that pulls out the name
|
||||
of a given identifier.
|
||||
|
||||
You may optionally include a capture named `@doc` to bind a docstring. For convenience purposes, the tagging system provides
|
||||
two built-in functions, `#select-adjacent!` and `#strip!` that are convenient for removing comment syntax from a docstring.
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ cargo install --path crates/cli
|
|||
If you're going to be in a fast iteration cycle and would like the CLI to build faster, you can use the `release-dev` profile:
|
||||
|
||||
```sh
|
||||
cargo build --profile release-dev
|
||||
cargo build --release --profile release-dev
|
||||
# or
|
||||
cargo install --path crates/cli --profile release-dev
|
||||
```
|
||||
|
|
@ -93,8 +93,7 @@ cargo xtask build-wasm-stdlib
|
|||
|
||||
This command looks for the [Wasi SDK][wasi_sdk] indicated by the `TREE_SITTER_WASI_SDK_PATH`
|
||||
environment variable. If you don't have the binary, it can be downloaded from wasi-sdk's [releases][wasi-sdk-releases]
|
||||
page. Note that any changes to `crates/language/wasm/**` requires rebuilding the tree-sitter Wasm stdlib via
|
||||
`cargo xtask build-wasm-stdlib`.
|
||||
page.
|
||||
|
||||
### Debugging
|
||||
|
||||
|
|
|
|||
|
|
@ -19,8 +19,8 @@ will attempt to build the parser in the current working directory.
|
|||
### `-w/--wasm`
|
||||
|
||||
Compile the parser as a Wasm module. This command looks for the [Wasi SDK][wasi_sdk] indicated by the `TREE_SITTER_WASI_SDK_PATH`
|
||||
environment variable. If you don't have the binary, the CLI will attempt to download it for you to `<CACHE_DIR>/tree-sitter/wasi-sdk/`,
|
||||
where `<CACHE_DIR>` is resolved according to the [XDG base directory][XDG] or Window's [Known_Folder_Locations][Known_Folder].
|
||||
environment variable. If you don't have the binary, the CLI will attempt to download it for you to `<CACHE_DIR>/tree-sitter/wasi-sdk/`, where
|
||||
`<CACHE_DIR>` is resolved according to the [XDG base directory][XDG] or Window's [Known_Folder_Locations][Known_Folder].
|
||||
|
||||
### `-o/--output`
|
||||
|
||||
|
|
@ -37,8 +37,7 @@ in the external scanner does so using their allocator.
|
|||
|
||||
### `-0/--debug`
|
||||
|
||||
Compile the parser with debug flags enabled. This is useful when debugging issues that require a debugger like `gdb` or
|
||||
`lldb`.
|
||||
Compile the parser with debug flags enabled. This is useful when debugging issues that require a debugger like `gdb` or `lldb`.
|
||||
|
||||
[Known_Folder]: https://learn.microsoft.com/en-us/windows/win32/shell/knownfolderid
|
||||
[wasi_sdk]: https://github.com/WebAssembly/wasi-sdk
|
||||
|
|
|
|||
|
|
@ -1,8 +1,6 @@
|
|||
# `tree-sitter dump-languages`
|
||||
|
||||
The `dump-languages` command prints out a list of all the languages that the CLI knows about. This can be useful for debugging
|
||||
purposes, or for scripting. The paths to search comes from the config file's [`parser-directories`][parser-directories]
|
||||
object.
|
||||
The `dump-languages` command prints out a list of all the languages that the CLI knows about. This can be useful for debugging purposes, or for scripting. The paths to search comes from the config file's [`parser-directories`][parser-directories] object.
|
||||
|
||||
```bash
|
||||
tree-sitter dump-languages [OPTIONS] # Aliases: langs
|
||||
|
|
@ -12,7 +10,6 @@ tree-sitter dump-languages [OPTIONS] # Aliases: langs
|
|||
|
||||
### `--config-path`
|
||||
|
||||
The path to the configuration file. Ordinarily, the CLI will use the default location as explained in the [init-config](./init-config.md)
|
||||
command. This flag allows you to explicitly override that default, and use a config defined elsewhere.
|
||||
The path to the configuration file. Ordinarily, the CLI will use the default location as explained in the [init-config](./init-config.md) command. This flag allows you to explicitly override that default, and use a config defined elsewhere.
|
||||
|
||||
[parser-directories]: ./init-config.md#parser-directories
|
||||
|
|
|
|||
|
|
@ -1,39 +1,30 @@
|
|||
# `tree-sitter generate`
|
||||
|
||||
The most important command for grammar development is `tree-sitter generate`, which reads the grammar in structured form
|
||||
and outputs C files that can be compiled into a shared or static library (e.g., using the [`build`](./build.md) command).
|
||||
The most important command you'll use is `tree-sitter generate`. This command reads the `grammar.js` file in your current
|
||||
working directory and creates a file called `src/parser.c`, which implements the parser. After making changes to your grammar,
|
||||
just run `tree-sitter generate` again.
|
||||
|
||||
```bash
|
||||
tree-sitter generate [OPTIONS] [GRAMMAR_PATH] # Aliases: gen, g
|
||||
```
|
||||
|
||||
The optional `GRAMMAR_PATH` argument should point to the structured grammar, in one of two forms:
|
||||
- `grammar.js` a (ESM or CJS) JavaScript file; if the argument is omitted, it defaults to `./grammar.js`.
|
||||
- `grammar.json` a structured representation of the grammar that is created as a byproduct of `generate`; this can be used
|
||||
to regenerate a missing `parser.c` without requiring a JavaScript runtime (useful when distributing parsers to consumers).
|
||||
The grammar path argument allows you to specify a path to a `grammar.js` JavaScript file, or `grammar.json` JSON file.
|
||||
In case your `grammar.js` file is in a non-standard path, you can specify it yourself. But, if you are using a parser
|
||||
where `grammar.json` was already generated, or it was hand-written, you can tell the CLI to generate the parser *based*
|
||||
on this JSON file. This avoids relying on a JavaScript file and avoids the dependency on a JavaScript runtime.
|
||||
|
||||
If there is an ambiguity or *local ambiguity* in your grammar, Tree-sitter will detect it during parser generation, and
|
||||
it will exit with a `Unresolved conflict` error message. To learn more about conflicts and how to handle them, see
|
||||
it will exit with a `Unresolved conflict` error message. To learn more about conflicts and how to handle them, check out
|
||||
the section on [`Structuring Rules Well`](../creating-parsers/3-writing-the-grammar.md#structuring-rules-well)
|
||||
in the user guide.
|
||||
|
||||
## Generated files
|
||||
|
||||
- `src/parser.c` implements the parser logic specified in the grammar.
|
||||
- `src/tree_sitter/parser.h` provides basic C definitions that are used in the generated `parser.c` file.
|
||||
- `src/tree_sitter/alloc.h` provides memory allocation macros that can be used in an external scanner.
|
||||
- `src/tree_sitter/array.h` provides array macros that can be used in an external scanner.
|
||||
- `src/grammar.json` contains a structured representation of the grammar; can be used to regenerate the parser without having
|
||||
to re-evaluate the `grammar.js`.
|
||||
- `src/node-types.json` provides type information about individual syntax nodes; see the section on [`Static Node Types`](../using-parsers/6-static-node-types.md).
|
||||
|
||||
|
||||
## Options
|
||||
|
||||
### `-l/--log`
|
||||
|
||||
Print the log of the parser generation process. This includes information such as what tokens are included in the error
|
||||
recovery state, what keywords were extracted, what states were split and why, and the entry point state.
|
||||
Print the log of the parser generation process. This is really only useful if you know what you're doing, or are investigating
|
||||
a bug in the CLI itself. It logs info such as what tokens are included in the error recovery state,
|
||||
what keywords were extracted, what states were split and why, and the entry point state.
|
||||
|
||||
### `--abi <VERSION>`
|
||||
|
||||
|
|
@ -63,8 +54,7 @@ The path to the JavaScript runtime executable to use when generating the parser.
|
|||
Note that you can also set this with `TREE_SITTER_JS_RUNTIME`. Starting from version 0.26, you can
|
||||
also pass in `native` to use the experimental native QuickJS runtime that comes bundled with the CLI.
|
||||
This avoids the dependency on a JavaScript runtime entirely. The native QuickJS runtime is compatible
|
||||
with ESM as well as with CommonJS in strict mode. If your grammar depends on `npm` to install dependencies such as base
|
||||
grammars, the native runtime can be used *after* running `npm install`.
|
||||
with ESM as well as with CommonJS in strict mode. If your grammar depends on `npm` to install dependencies such as base grammars, the native runtime can be used *after* running `npm install`.
|
||||
|
||||
### `--disable-optimization`
|
||||
|
||||
|
|
|
|||
|
|
@ -52,8 +52,7 @@ The path to the directory containing the grammar.
|
|||
|
||||
### `--config-path <CONFIG_PATH>`
|
||||
|
||||
The path to an alternative configuration (`config.json`) file. See [the init-config command](./init-config.md) for more
|
||||
information.
|
||||
The path to an alternative configuration (`config.json`) file. See [the init-config command](./init-config.md) for more information.
|
||||
|
||||
### `-n/--test-number <TEST_NUMBER>`
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,4 @@
|
|||
# CLI Overview
|
||||
|
||||
The `tree-sitter` command-line interface is used to create, manage, test, and build tree-sitter parsers. It is controlled
|
||||
by
|
||||
|
||||
- a personal `tree-sitter/config.json` config file generated by [`tree-sitter init-config`](./init-config.md)
|
||||
- a parser `tree-sitter.json` config file generated by [`tree-sitter init`](./init.md).
|
||||
|
||||
Let's go over all of the functionality of the `tree-sitter` command line interface.
|
||||
Once you feel that you have enough of a grasp on the CLI, you can move onto the grammar authoring section to learn more about writing your own parser.
|
||||
|
|
|
|||
|
|
@ -8,94 +8,30 @@ we recommend using git for version control of your grammar.
|
|||
tree-sitter init [OPTIONS] # Aliases: i
|
||||
```
|
||||
|
||||
## Generated files
|
||||
## Options
|
||||
|
||||
### Required files
|
||||
### `--update`
|
||||
|
||||
The following required files are always created if missing:
|
||||
Update outdated generated files, if needed.
|
||||
|
||||
- `tree-sitter.json` - The main configuration file that determines how `tree-sitter` interacts with the grammar. If missing,
|
||||
the `init` command will prompt the user for the required fields. See [below](./init.md#structure-of-tree-sitterjson) for
|
||||
the full documentation of the structure of this file.
|
||||
- `package.json` - The `npm` manifest for the parser. This file is required for some `tree-sitter` subcommands, and if the
|
||||
grammar has dependencies (e.g., another published base grammar that this grammar extends).
|
||||
- `grammar.js` - An empty template for the main grammar file; see [the section on creating parsers](../2-creating-parser).
|
||||
### `-p/--grammar-path <PATH>`
|
||||
|
||||
### Language bindings
|
||||
|
||||
Language bindings are files that allow your parser to be directly used by projects written in the respective language.
|
||||
The following bindings are created if enabled in `tree-sitter.json`:
|
||||
|
||||
#### C/C++
|
||||
|
||||
- `Makefile` — This file tells [`make`][make] how to compile your language.
|
||||
- `CMakeLists.txt` — This file tells [`cmake`][cmake] how to compile your language.
|
||||
- `bindings/c/tree_sitter/tree-sitter-language.h` — This file provides the C interface of your language.
|
||||
- `bindings/c/tree-sitter-language.pc` — This file provides [pkg-config][pkg-config] metadata about your language's C library.
|
||||
|
||||
#### Go
|
||||
|
||||
- `go.mod` — This file is the manifest of the Go module.
|
||||
- `bindings/go/binding.go` — This file wraps your language in a Go module.
|
||||
- `bindings/go/binding_test.go` — This file contains a test for the Go package.
|
||||
|
||||
#### Node
|
||||
|
||||
- `binding.gyp` — This file tells Node.js how to compile your language.
|
||||
- `bindings/node/binding.cc` — This file wraps your language in a JavaScript module for Node.js.
|
||||
- `bindings/node/index.js` — This is the file that Node.js initially loads when using your language.
|
||||
- `bindings/node/index.d.ts` — This file provides type hints for your parser when used in TypeScript.
|
||||
- `bindings/node/binding_test.js` — This file contains a test for the Node.js package.
|
||||
|
||||
#### Java
|
||||
|
||||
- `pom.xml` - This file is the manifest of the Maven package.
|
||||
- `bindings/java/main/namespace/language/TreeSitterLanguage.java` - This file wraps your language in a Java class.
|
||||
- `bindings/java/test/TreeSitterLanguageTest.java` - This file contains a test for the Java package.
|
||||
|
||||
#### Python
|
||||
|
||||
- `pyproject.toml` — This file is the manifest of the Python package.
|
||||
- `setup.py` — This file tells Python how to compile your language.
|
||||
- `bindings/python/tree_sitter_language/binding.c` — This file wraps your language in a Python module.
|
||||
- `bindings/python/tree_sitter_language/__init__.py` — This file tells Python how to load your language.
|
||||
- `bindings/python/tree_sitter_language/__init__.pyi` — This file provides type hints for your parser when used in Python.
|
||||
- `bindings/python/tree_sitter_language/py.typed` — This file provides type hints for your parser when used in Python.
|
||||
- `bindings/python/tests/test_binding.py` — This file contains a test for the Python package.
|
||||
|
||||
#### Rust
|
||||
|
||||
- `Cargo.toml` — This file is the manifest of the Rust package.
|
||||
- `bindings/rust/build.rs` — This file tells Rust how to compile your language.
|
||||
- `bindings/rust/lib.rs` — This file wraps your language in a Rust crate when used in Rust.
|
||||
|
||||
#### Swift
|
||||
|
||||
- `Package.swift` — This file tells Swift how to compile your language.
|
||||
- `bindings/swift/TreeSitterLanguage/language.h` — This file wraps your language in a Swift module when used in Swift.
|
||||
- `bindings/swift/TreeSitterLanguageTests/TreeSitterLanguageTests.swift` — This file contains a test for the Swift package.
|
||||
|
||||
#### Zig
|
||||
|
||||
- `build.zig` - This file tells Zig how to compile your language.
|
||||
- `build.zig.zon` - This file is the manifest of the Zig package.
|
||||
- `bindings/zig/root.zig` - This file wraps your language in a Zig module.
|
||||
- `bindings/zig/test.zig` - This file contains a test for the Zig package.
|
||||
|
||||
### Additional files
|
||||
|
||||
In addition, the following files are created that aim to improve the development experience:
|
||||
|
||||
- `.editorconfig` — This file tells your editor how to format your code. More information about this file can be found [here][editorconfig].
|
||||
- `.gitattributes` — This file tells Git how to handle line endings and tells GitHub which files are generated.
|
||||
- `.gitignore` — This file tells Git which files to ignore when committing changes.
|
||||
The path to the directory containing the grammar.
|
||||
|
||||
## Structure of `tree-sitter.json`
|
||||
|
||||
The main file of interest for users to configure is `tree-sitter.json`, which tells the CLI information about your grammar,
|
||||
such as the location of queries.
|
||||
|
||||
### The `grammars` field
|
||||
|
||||
This field is an array of objects, though you typically only need one object in this array unless your repo has
|
||||
multiple grammars (for example, `Typescript` and `TSX`), e.g.,
|
||||
multiple grammars (for example, `Typescript` and `TSX`).
|
||||
|
||||
### Example
|
||||
|
||||
Typically, the objects in the `"tree-sitter"` array only needs to specify a few keys:
|
||||
|
||||
```json
|
||||
{
|
||||
"tree-sitter": [
|
||||
|
|
@ -113,7 +49,7 @@ multiple grammars (for example, `Typescript` and `TSX`), e.g.,
|
|||
}
|
||||
```
|
||||
|
||||
#### Basic fields
|
||||
#### Basic Fields
|
||||
|
||||
These keys specify basic information about the parser:
|
||||
|
||||
|
|
@ -129,12 +65,12 @@ parser to files that should be checked for modifications during recompilation.
|
|||
This is useful during development to have changes to other files besides scanner.c
|
||||
be picked up by the cli.
|
||||
|
||||
#### Language detection
|
||||
#### Language Detection
|
||||
|
||||
These keys help to decide whether the language applies to a given file:
|
||||
|
||||
- `file-types` — An array of filename suffix strings (not including the dot). The grammar will be used for files whose names
|
||||
end with one of these suffixes. Note that the suffix may match an *entire* filename.
|
||||
- `file-types` — An array of filename suffix strings. The grammar will be used for files whose names end with one of
|
||||
these suffixes. Note that the suffix may match an *entire* filename.
|
||||
|
||||
- `first-line-regex` — A regex pattern that will be tested against the first line of a file
|
||||
to determine whether this language applies to the file. If present, this regex will be used for any file whose
|
||||
|
|
@ -149,14 +85,14 @@ no `content-regex` will be preferred over this one.
|
|||
should be used for a potential *language injection* site.
|
||||
Language injection is described in more detail in [the relevant section](../3-syntax-highlighting.md#language-injection).
|
||||
|
||||
#### Query paths
|
||||
#### Query Paths
|
||||
|
||||
These keys specify relative paths from the directory containing `tree-sitter.json` to the files that control syntax highlighting:
|
||||
|
||||
- `highlights` — Path to a *highlight query*. Default: `queries/highlights.scm`
|
||||
- `locals` — Path to a *local variable query*. Default: `queries/locals.scm`.
|
||||
- `injections` — Path to an *injection query*. Default: `queries/injections.scm`.
|
||||
- `tags` — Path to a *tag query*. Default: `queries/tags.scm`.
|
||||
- `tags` — Path to an *tag query*. Default: `queries/tags.scm`.
|
||||
|
||||
### The `metadata` field
|
||||
|
||||
|
|
@ -185,19 +121,81 @@ Each key is a language name, and the value is a boolean.
|
|||
- `swift` (default: `false`)
|
||||
- `zig` (default: `false`)
|
||||
|
||||
## Options
|
||||
## Binding Files
|
||||
|
||||
### `-u/--update`
|
||||
When you run `tree-sitter init`, the CLI will also generate a number of files in your repository that allow for your parser
|
||||
to be used from different language. Here is a list of these bindings files that are generated, and what their purpose is:
|
||||
|
||||
Update outdated generated files, if possible.
|
||||
### C/C++
|
||||
|
||||
**Note:** Existing files that may have been edited manually are _not_ updated in general. To force an update to such files,
|
||||
remove them and call `tree-sitter init -u` again.
|
||||
- `Makefile` — This file tells [`make`][make] how to compile your language.
|
||||
- `CMakeLists.txt` — This file tells [`cmake`][cmake] how to compile your language.
|
||||
- `bindings/c/tree_sitter/tree-sitter-language.h` — This file provides the C interface of your language.
|
||||
- `bindings/c/tree-sitter-language.pc` — This file provides [pkg-config][pkg-config] metadata about your language's C library.
|
||||
- `src/tree_sitter/parser.h` — This file provides some basic C definitions that are used in your generated `parser.c` file.
|
||||
- `src/tree_sitter/alloc.h` — This file provides some memory allocation macros that are to be used in your external scanner,
|
||||
if you have one.
|
||||
- `src/tree_sitter/array.h` — This file provides some array macros that are to be used in your external scanner,
|
||||
if you have one.
|
||||
|
||||
### `-p/--grammar-path <PATH>`
|
||||
### Go
|
||||
|
||||
The path to the directory containing the grammar.
|
||||
- `go.mod` — This file is the manifest of the Go module.
|
||||
- `bindings/go/binding.go` — This file wraps your language in a Go module.
|
||||
- `bindings/go/binding_test.go` — This file contains a test for the Go package.
|
||||
|
||||
### Node
|
||||
|
||||
- `binding.gyp` — This file tells Node.js how to compile your language.
|
||||
- `package.json` — This file is the manifest of the Node.js package.
|
||||
- `bindings/node/binding.cc` — This file wraps your language in a JavaScript module for Node.js.
|
||||
- `bindings/node/index.js` — This is the file that Node.js initially loads when using your language.
|
||||
- `bindings/node/index.d.ts` — This file provides type hints for your parser when used in TypeScript.
|
||||
- `bindings/node/binding_test.js` — This file contains a test for the Node.js package.
|
||||
|
||||
### Java
|
||||
|
||||
- `pom.xml` - This file is the manifest of the Maven package.
|
||||
- `bindings/java/main/namespace/language/TreeSitterLanguage.java` - This file wraps your language in a Java class.
|
||||
- `bindings/java/test/TreeSitterLanguageTest.java` - This file contains a test for the Java package.
|
||||
|
||||
### Python
|
||||
|
||||
- `pyproject.toml` — This file is the manifest of the Python package.
|
||||
- `setup.py` — This file tells Python how to compile your language.
|
||||
- `bindings/python/tree_sitter_language/binding.c` — This file wraps your language in a Python module.
|
||||
- `bindings/python/tree_sitter_language/__init__.py` — This file tells Python how to load your language.
|
||||
`bindings/python/tree_sitter_language/__init__.pyi` — This file provides type hints for your parser when used in Python.
|
||||
- `bindings/python/tree_sitter_language/py.typed` — This file provides type hints for your parser when used in Python.
|
||||
- `bindings/python/tests/test_binding.py` — This file contains a test for the Python package.
|
||||
|
||||
### Rust
|
||||
|
||||
- `Cargo.toml` — This file is the manifest of the Rust package.
|
||||
- `bindings/rust/lib.rs` — This file wraps your language in a Rust crate when used in Rust.
|
||||
- `bindings/rust/build.rs` — This file wraps the building process for the Rust crate.
|
||||
|
||||
### Swift
|
||||
|
||||
- `Package.swift` — This file tells Swift how to compile your language.
|
||||
- `bindings/swift/TreeSitterLanguage/language.h` — This file wraps your language in a Swift module when used in Swift.
|
||||
- `bindings/swift/TreeSitterLanguageTests/TreeSitterLanguageTests.swift` — This file contains a test for the Swift package.
|
||||
|
||||
### Zig
|
||||
|
||||
- `build.zig` - This file tells Zig how to compile your language.
|
||||
- `build.zig.zon` - This file is the manifest of the Zig package.
|
||||
- `bindings/zig/root.zig` - This file wraps your language in a Zig module.
|
||||
- `bindings/zig/test.zig` - This file contains a test for the Zig package.
|
||||
|
||||
### Additional Files
|
||||
|
||||
Additionally, there's a few other files that are generated when you run `tree-sitter init`,
|
||||
that aim to improve the development experience:
|
||||
|
||||
- `.editorconfig` — This file tells your editor how to format your code. More information about this file can be found [here][editorconfig]
|
||||
- `.gitattributes` — This file tells Git how to handle line endings, and tells GitHub what files are generated.
|
||||
- `.gitignore` — This file tells Git what files to ignore when committing changes.
|
||||
|
||||
[cmake]: https://cmake.org/cmake/help/latest
|
||||
[editorconfig]: https://editorconfig.org
|
||||
|
|
|
|||
|
|
@ -78,8 +78,7 @@ Suppress main output.
|
|||
|
||||
### `--edits <EDITS>...`
|
||||
|
||||
Apply edits after parsing the file. Edits are in the form of `row,col|position delcount insert_text` where row and col,
|
||||
or position are 0-indexed.
|
||||
Apply edits after parsing the file. Edits are in the form of `row,col|position delcount insert_text` where row and col, or position are 0-indexed.
|
||||
|
||||
### `--encoding <ENCODING>`
|
||||
|
||||
|
|
@ -96,8 +95,7 @@ Output parsing results in a JSON format.
|
|||
|
||||
### `--config-path <CONFIG_PATH>`
|
||||
|
||||
The path to an alternative configuration (`config.json`) file. See [the init-config command](./init-config.md) for more
|
||||
information.
|
||||
The path to an alternative configuration (`config.json`) file. See [the init-config command](./init-config.md) for more information.
|
||||
|
||||
### `-n/--test-number <TEST_NUMBER>`
|
||||
|
||||
|
|
|
|||
|
|
@ -7,8 +7,8 @@ tree-sitter playground [OPTIONS] # Aliases: play, pg, web-ui
|
|||
```
|
||||
|
||||
```admonish note
|
||||
For this to work, you must have already built the parser as a Wasm module. This can be done with the [`build`](./build.md)
|
||||
subcommand (`tree-sitter build --wasm`).
|
||||
For this to work, you must have already built the parser as a Wasm module. This can be done with the [`build`](./build.md) subcommand
|
||||
(`tree-sitter build --wasm`).
|
||||
```
|
||||
|
||||
## Options
|
||||
|
|
|
|||
|
|
@ -47,8 +47,8 @@ The range of rows in which the query will be executed. The format is `start_row:
|
|||
|
||||
### `--containing-row-range <ROW_RANGE>`
|
||||
|
||||
The range of rows in which the query will be executed. Only the matches that are fully contained within the provided row
|
||||
range will be returned.
|
||||
The range of rows in which the query will be executed. Only the matches that are fully contained within the provided row range
|
||||
will be returned.
|
||||
|
||||
### `--scope <SCOPE>`
|
||||
|
||||
|
|
@ -64,8 +64,7 @@ Whether to run query tests or not.
|
|||
|
||||
### `--config-path <CONFIG_PATH>`
|
||||
|
||||
The path to an alternative configuration (`config.json`) file. See [the init-config command](./init-config.md) for more
|
||||
information.
|
||||
The path to an alternative configuration (`config.json`) file. See [the init-config command](./init-config.md) for more information.
|
||||
|
||||
### `-n/--test-number <TEST_NUMBER>`
|
||||
|
||||
|
|
|
|||
|
|
@ -31,8 +31,7 @@ The path to the directory containing the grammar.
|
|||
|
||||
### `--config-path <CONFIG_PATH>`
|
||||
|
||||
The path to an alternative configuration (`config.json`) file. See [the init-config command](./init-config.md) for more
|
||||
information.
|
||||
The path to an alternative configuration (`config.json`) file. See [the init-config command](./init-config.md) for more information.
|
||||
|
||||
### `-n/--test-number <TEST_NUMBER>`
|
||||
|
||||
|
|
|
|||
|
|
@ -63,8 +63,7 @@ When using the `--debug-graph` option, open the log file in the default browser.
|
|||
|
||||
### `--config-path <CONFIG_PATH>`
|
||||
|
||||
The path to an alternative configuration (`config.json`) file. See [the init-config command](./init-config.md) for more
|
||||
information.
|
||||
The path to an alternative configuration (`config.json`) file. See [the init-config command](./init-config.md) for more information.
|
||||
|
||||
### `--show-fields`
|
||||
|
||||
|
|
|
|||
|
|
@ -25,9 +25,11 @@ tree-sitter version --bump minor # minor bump
|
|||
tree-sitter version --bump major # major bump
|
||||
```
|
||||
|
||||
As a grammar author, you should keep the version of your grammar in sync across different bindings. However, doing so manually
|
||||
is error-prone and tedious, so this command takes care of the burden. If you are using a version control system, it is recommended
|
||||
to commit the changes made by this command, and to tag the commit with the new version.
|
||||
As a grammar author, you should keep the version of your grammar in sync across
|
||||
different bindings. However, doing so manually is error-prone and tedious, so
|
||||
this command takes care of the burden. If you are using a version control system,
|
||||
it is recommended to commit the changes made by this command, and to tag the
|
||||
commit with the new version.
|
||||
|
||||
To print the current version without bumping it, use:
|
||||
|
||||
|
|
|
|||
|
|
@ -17,8 +17,8 @@ DSL through the `RustRegex` class. Simply pass your regex pattern as a string:
|
|||
```
|
||||
|
||||
Unlike JavaScript's builtin `RegExp` class, which takes a pattern and flags as separate arguments, `RustRegex` only
|
||||
accepts a single pattern string. While it doesn't support separate flags, you can use inline flags within the pattern
|
||||
itself. For more details about Rust's regex syntax and capabilities, check out the [Rust regex documentation][rust regex].
|
||||
accepts a single pattern string. While it doesn't support separate flags, you can use inline flags within the pattern itself.
|
||||
For more details about Rust's regex syntax and capabilities, check out the [Rust regex documentation][rust regex].
|
||||
|
||||
```admonish note
|
||||
Only a subset of the Regex engine is actually supported. This is due to certain features like lookahead and lookaround
|
||||
|
|
@ -50,10 +50,10 @@ The previous `repeat` rule is implemented in `repeat1` but is included because i
|
|||
- **Options : `optional(rule)`** — This function creates a rule that matches *zero or one* occurrence of a given rule.
|
||||
It is analogous to the `[x]` (square bracket) syntax in EBNF notation.
|
||||
|
||||
- **Precedence : `prec(number, rule)`** — This function marks the given rule with a numerical precedence, which will be
|
||||
used to resolve [*LR(1) Conflicts*][lr-conflict] at parser-generation time. When two rules overlap in a way that represents
|
||||
either a true ambiguity or a *local* ambiguity given one token of lookahead, Tree-sitter will try to resolve the conflict
|
||||
by matching the rule with the higher precedence. The default precedence of all rules is zero. This works similarly to the
|
||||
- **Precedence : `prec(number, rule)`** — This function marks the given rule with a numerical precedence, which will be used
|
||||
to resolve [*LR(1) Conflicts*][lr-conflict] at parser-generation time. When two rules overlap in a way that represents either
|
||||
a true ambiguity or a *local* ambiguity given one token of lookahead, Tree-sitter will try to resolve the conflict by matching
|
||||
the rule with the higher precedence. The default precedence of all rules is zero. This works similarly to the
|
||||
[precedence directives][yacc-prec] in Yacc grammars.
|
||||
|
||||
This function can also be used to assign lexical precedence to a given
|
||||
|
|
@ -115,8 +115,8 @@ want to create syntax tree nodes at runtime.
|
|||
|
||||
- **`conflicts`** — an array of arrays of rule names. Each inner array represents a set of rules that's involved in an
|
||||
*LR(1) conflict* that is *intended to exist* in the grammar. When these conflicts occur at runtime, Tree-sitter will use
|
||||
the GLR algorithm to explore all the possible interpretations. If *multiple* parses end up succeeding, Tree-sitter will
|
||||
pick the subtree whose corresponding rule has the highest total *dynamic precedence*.
|
||||
the GLR algorithm to explore all the possible interpretations. If *multiple* parses end up succeeding, Tree-sitter will pick
|
||||
the subtree whose corresponding rule has the highest total *dynamic precedence*.
|
||||
|
||||
- **`externals`** — an array of token names which can be returned by an
|
||||
[*external scanner*][external-scanners]. External scanners allow you to write custom C code which runs during the lexing
|
||||
|
|
@ -139,10 +139,10 @@ for more details.
|
|||
array of reserved rules. The reserved rule in the array must be a terminal token meaning it must be a string, regex, token,
|
||||
or terminal rule. The reserved rule must also exist and be used in the grammar, specifying arbitrary tokens will not work.
|
||||
The *first* reserved word set in the object is the global word set, meaning it applies to every rule in every parse state.
|
||||
However, certain keywords are contextual, depending on the rule. For example, in JavaScript, keywords are typically not
|
||||
allowed as ordinary variables, however, they *can* be used as a property name. In this situation, the `reserved` function
|
||||
would be used, and the word set to pass in would be the name of the word set that is declared in the `reserved` object that
|
||||
corresponds to an empty array, signifying *no* keywords are reserved.
|
||||
However, certain keywords are contextual, depending on the rule. For example, in JavaScript, keywords are typically not allowed
|
||||
as ordinary variables, however, they *can* be used as a property name. In this situation, the `reserved` function would be used,
|
||||
and the word set to pass in would be the name of the word set that is declared in the `reserved` object that corresponds to an
|
||||
empty array, signifying *no* keywords are reserved.
|
||||
|
||||
[bison-dprec]: https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html
|
||||
[ebnf]: https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
# Writing the Grammar
|
||||
|
||||
Writing a grammar requires creativity. There are an infinite number of CFGs (context-free grammars) that can be used to
|
||||
describe any given language. To produce a good Tree-sitter parser, you need to create a grammar with two important properties:
|
||||
Writing a grammar requires creativity. There are an infinite number of CFGs (context-free grammars) that can be used to describe
|
||||
any given language. To produce a good Tree-sitter parser, you need to create a grammar with two important properties:
|
||||
|
||||
1. **An intuitive structure** — Tree-sitter's output is a [concrete syntax tree][cst]; each node in the tree corresponds
|
||||
directly to a [terminal or non-terminal symbol][non-terminal] in the grammar. So to produce an easy-to-analyze tree, there
|
||||
|
|
@ -139,8 +139,8 @@ instead. It's often useful to check your progress by trying to parse some real c
|
|||
## Structuring Rules Well
|
||||
|
||||
Imagine that you were just starting work on the [Tree-sitter JavaScript parser][tree-sitter-javascript]. Naively, you might
|
||||
try to directly mirror the structure of the [ECMAScript Language Spec][ecmascript-spec]. To illustrate the problem with
|
||||
this approach, consider the following line of code:
|
||||
try to directly mirror the structure of the [ECMAScript Language Spec][ecmascript-spec]. To illustrate the problem with this
|
||||
approach, consider the following line of code:
|
||||
|
||||
```js
|
||||
return x + y;
|
||||
|
|
@ -181,17 +181,16 @@ which are unrelated to the actual code.
|
|||
|
||||
## Standard Rule Names
|
||||
|
||||
Tree-sitter places no restrictions on how to name the rules of your grammar. It can be helpful, however, to follow certain
|
||||
conventions used by many other established grammars in the ecosystem. Some of these well-established patterns are listed
|
||||
below:
|
||||
Tree-sitter places no restrictions on how to name the rules of your grammar. It can be helpful, however, to follow certain conventions
|
||||
used by many other established grammars in the ecosystem. Some of these well-established patterns are listed below:
|
||||
|
||||
- `source_file`: Represents an entire source file, this rule is commonly used as the root node for a grammar,
|
||||
- `expression`/`statement`: Used to represent statements and expressions for a given language. Commonly defined as a choice
|
||||
between several more specific sub-expression/sub-statement rules.
|
||||
- `expression`/`statement`: Used to represent statements and expressions for a given language. Commonly defined as a choice between several
|
||||
more specific sub-expression/sub-statement rules.
|
||||
- `block`: Used as the parent node for block scopes, with its children representing the block's contents.
|
||||
- `type`: Represents the types of a language such as `int`, `char`, and `void`.
|
||||
- `identifier`: Used for constructs like variable names, function arguments, and object fields; this rule is commonly used
|
||||
as the `word` token in grammars.
|
||||
- `identifier`: Used for constructs like variable names, function arguments, and object fields; this rule is commonly used as the `word`
|
||||
token in grammars.
|
||||
- `string`: Used to represent `"string literals"`.
|
||||
- `comment`: Used to represent comments, this rule is commonly used as an `extra`.
|
||||
|
||||
|
|
@ -309,9 +308,9 @@ This is where `prec.left` and `prec.right` come into use. We want to select the
|
|||
|
||||
## Using Conflicts
|
||||
|
||||
Sometimes, conflicts are actually desirable. In our JavaScript grammar, expressions and patterns can create intentional
|
||||
ambiguity. A construct like `[x, y]` could be legitimately parsed as both an array literal (like in `let a = [x, y]`) or
|
||||
as a destructuring pattern (like in `let [x, y] = arr`).
|
||||
Sometimes, conflicts are actually desirable. In our JavaScript grammar, expressions and patterns can create intentional ambiguity.
|
||||
A construct like `[x, y]` could be legitimately parsed as both an array literal (like in `let a = [x, y]`) or as a destructuring
|
||||
pattern (like in `let [x, y] = arr`).
|
||||
|
||||
```js
|
||||
export default grammar({
|
||||
|
|
@ -565,8 +564,8 @@ as mentioned in the previous page, is `token(prec(N, ...))`.
|
|||
## Keywords
|
||||
|
||||
Many languages have a set of _keyword_ tokens (e.g. `if`, `for`, `return`), as well as a more general token (e.g. `identifier`)
|
||||
that matches any word, including many of the keyword strings. For example, JavaScript has a keyword `instanceof`, which
|
||||
is used as a binary operator, like this:
|
||||
that matches any word, including many of the keyword strings. For example, JavaScript has a keyword `instanceof`, which is
|
||||
used as a binary operator, like this:
|
||||
|
||||
```js
|
||||
if (a instanceof Something) b();
|
||||
|
|
|
|||
|
|
@ -143,10 +143,10 @@ the second argument, the current character will be treated as whitespace; whites
|
|||
associated with tokens emitted by the external scanner.
|
||||
|
||||
- **`void (*mark_end)(TSLexer *)`** — A function for marking the end of the recognized token. This allows matching tokens
|
||||
that require multiple characters of lookahead. By default, (if you don't call `mark_end`), any character that you moved
|
||||
past using the `advance` function will be included in the size of the token. But once you call `mark_end`, then any later
|
||||
calls to `advance` will _not_ increase the size of the returned token. You can call `mark_end` multiple times to increase
|
||||
the size of the token.
|
||||
that require multiple characters of lookahead. By default, (if you don't call `mark_end`), any character that you moved past
|
||||
using the `advance` function will be included in the size of the token. But once you call `mark_end`, then any later calls
|
||||
to `advance` will _not_ increase the size of the returned token. You can call `mark_end` multiple times to increase the size
|
||||
of the token.
|
||||
|
||||
- **`uint32_t (*get_column)(TSLexer *)`** — A function for querying the current column position of the lexer. It returns
|
||||
the number of codepoints since the start of the current line. The codepoint position is recalculated on every call to this
|
||||
|
|
@ -185,9 +185,9 @@ if (valid_symbols[INDENT] || valid_symbols[DEDENT]) {
|
|||
|
||||
### Allocator
|
||||
|
||||
Instead of using libc's `malloc`, `calloc`, `realloc`, and `free`, you should use the versions prefixed with `ts_` from
|
||||
`tree_sitter/alloc.h`. These macros can allow a potential consumer to override the default allocator with their own implementation,
|
||||
but by default will use the libc functions.
|
||||
Instead of using libc's `malloc`, `calloc`, `realloc`, and `free`, you should use the versions prefixed with `ts_` from `tree_sitter/alloc.h`.
|
||||
These macros can allow a potential consumer to override the default allocator with their own implementation, but by default
|
||||
will use the libc functions.
|
||||
|
||||
As a consumer of the tree-sitter core library as well as any parser libraries that might use allocations, you can enable
|
||||
overriding the default allocator and have it use the same one as the library allocator, of which you can set with `ts_set_allocator`.
|
||||
|
|
@ -195,8 +195,7 @@ To enable this overriding in scanners, you must compile them with the `TREE_SITT
|
|||
the library must be linked into your final app dynamically, since it needs to resolve the internal functions at runtime.
|
||||
If you are compiling an executable binary that uses the core library, but want to load parsers dynamically at runtime, then
|
||||
you will have to use a special linker flag on Unix. For non-Darwin systems, that would be `--dynamic-list` and for Darwin
|
||||
systems, that would be `-exported_symbols_list`. The CLI does exactly this, so you can use it as a reference (check out
|
||||
`cli/build.rs`).
|
||||
systems, that would be `-exported_symbols_list`. The CLI does exactly this, so you can use it as a reference (check out `cli/build.rs`).
|
||||
|
||||
For example, assuming you wanted to allocate 100 bytes for your scanner, you'd do so like the following example:
|
||||
|
||||
|
|
@ -294,10 +293,9 @@ bool tree_sitter_my_language_external_scanner_scan(
|
|||
|
||||
## Other External Scanner Details
|
||||
|
||||
External scanners have priority over Tree-sitter's normal lexing process. When a token listed in the externals array is
|
||||
valid at a given position, the external scanner is called first. This makes external scanners a powerful way to override
|
||||
Tree-sitter's default lexing behavior, especially for cases that can't be handled with regular lexical rules, parsing, or
|
||||
dynamic precedence.
|
||||
External scanners have priority over Tree-sitter's normal lexing process. When a token listed in the externals array is valid
|
||||
at a given position, the external scanner is called first. This makes external scanners a powerful way to override Tree-sitter's
|
||||
default lexing behavior, especially for cases that can't be handled with regular lexical rules, parsing, or dynamic precedence.
|
||||
|
||||
During error recovery, Tree-sitter's first step is to call the external scanner's scan function with all tokens marked as
|
||||
valid. Your scanner should detect and handle this case appropriately. One simple approach is to add an unused "sentinel"
|
||||
|
|
|
|||
|
|
@ -39,8 +39,8 @@ It only shows the *named* nodes, as described in [this section][named-vs-anonymo
|
|||
```
|
||||
|
||||
The expected output section can also *optionally* show the [*field names*][node-field-names] associated with each child
|
||||
node. To include field names in your tests, you write a node's field name followed by a colon, before the node itself
|
||||
in the S-expression:
|
||||
node. To include field names in your tests, you write a node's field name followed by a colon, before the node itself in
|
||||
the S-expression:
|
||||
|
||||
```query
|
||||
(source_file
|
||||
|
|
@ -87,11 +87,6 @@ The recommendation is to be comprehensive in adding tests. If it's a visible nod
|
|||
directory. It's typically a good idea to test all the permutations of each language construct. This increases test coverage,
|
||||
but doubly acquaints readers with a way to examine expected outputs and understand the "edges" of a language.
|
||||
|
||||
```admonish tip
|
||||
After modifying the grammar, you can run `tree-sitter test -u`
|
||||
to update all syntax trees in corpus files with current parser output.
|
||||
```
|
||||
|
||||
## Attributes
|
||||
|
||||
Tests can be annotated with a few `attributes`. Attributes must be put in the header, below the test name, and start with
|
||||
|
|
@ -104,8 +99,8 @@ you can repeat the attribute on a new line.
|
|||
|
||||
The following attributes are available:
|
||||
|
||||
* `:cst` - This attribute specifies that the expected output should be in the form of a CST instead of the normal S-expression.
|
||||
This CST matches the format given by `parse --cst`.
|
||||
* `:cst` - This attribute specifies that the expected output should be in the form of a CST instead of the normal S-expression. This
|
||||
CST matches the format given by `parse --cst`.
|
||||
* `:error` — This attribute will assert that the parse tree contains an error. It's useful to just validate that a certain
|
||||
input is invalid without displaying the whole parse tree, as such you should omit the parse tree below the `---` line.
|
||||
* `:fail-fast` — This attribute will stop the testing of additional cases if the test marked with this attribute fails.
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
# Creating parsers
|
||||
|
||||
Developing Tree-sitter grammars can have a difficult learning curve, but once you get the hang of it, it can be fun and
|
||||
even zen-like. This document will help you to get started and to develop a useful mental model.
|
||||
Developing Tree-sitter grammars can have a difficult learning curve, but once you get the hang of it, it can be fun and even
|
||||
zen-like. This document will help you to get started and to develop a useful mental model.
|
||||
|
|
|
|||
|
|
@ -10,8 +10,7 @@ file and efficiently update the syntax tree as the source file is edited. Tree-s
|
|||
- **General** enough to parse any programming language
|
||||
- **Fast** enough to parse on every keystroke in a text editor
|
||||
- **Robust** enough to provide useful results even in the presence of syntax errors
|
||||
- **Dependency-free** so that the runtime library (which is written in pure [C11](https://github.com/tree-sitter/tree-sitter/tree/master/lib))
|
||||
can be embedded in any application
|
||||
- **Dependency-free** so that the runtime library (which is written in pure [C11](https://github.com/tree-sitter/tree-sitter/tree/master/lib)) can be embedded in any application
|
||||
|
||||
## Language Bindings
|
||||
|
||||
|
|
|
|||
|
|
@ -2,8 +2,7 @@
|
|||
|
||||
## Providing the Code
|
||||
|
||||
In the example on the previous page, we parsed source code stored in a simple string using the `ts_parser_parse_string`
|
||||
function:
|
||||
In the example on the previous page, we parsed source code stored in a simple string using the `ts_parser_parse_string` function:
|
||||
|
||||
```c
|
||||
TSTree *ts_parser_parse_string(
|
||||
|
|
@ -136,10 +135,10 @@ Consider a grammar rule like this:
|
|||
if_statement: $ => seq("if", "(", $._expression, ")", $._statement);
|
||||
```
|
||||
|
||||
A syntax node representing an `if_statement` in this language would have 5 children: the condition expression, the body
|
||||
statement, as well as the `if`, `(`, and `)` tokens. The expression and the statement would be marked as _named_ nodes,
|
||||
because they have been given explicit names in the grammar. But the `if`, `(`, and `)` nodes would _not_ be named nodes,
|
||||
because they are represented in the grammar as simple strings.
|
||||
A syntax node representing an `if_statement` in this language would have 5 children: the condition expression, the body statement,
|
||||
as well as the `if`, `(`, and `)` tokens. The expression and the statement would be marked as _named_ nodes, because they
|
||||
have been given explicit names in the grammar. But the `if`, `(`, and `)` nodes would _not_ be named nodes, because they
|
||||
are represented in the grammar as simple strings.
|
||||
|
||||
You can check whether any given node is named:
|
||||
|
||||
|
|
|
|||
|
|
@ -19,8 +19,8 @@ typedef struct {
|
|||
void ts_tree_edit(TSTree *, const TSInputEdit *);
|
||||
```
|
||||
|
||||
Then, you can call `ts_parser_parse` again, passing in the old tree. This will create a new tree that internally shares
|
||||
structure with the old tree.
|
||||
Then, you can call `ts_parser_parse` again, passing in the old tree. This will create a new tree that internally shares structure
|
||||
with the old tree.
|
||||
|
||||
When you edit a syntax tree, the positions of its nodes will change. If you have stored any `TSNode` instances outside of
|
||||
the `TSTree`, you must update their positions separately, using the same `TSInputEdit` value, in order to update their
|
||||
|
|
|
|||
|
|
@ -108,9 +108,9 @@ In Tree-sitter grammars, there are usually certain rules that represent abstract
|
|||
"type", "declaration"). In the `grammar.js` file, these are often written as [hidden rules][hidden rules]
|
||||
whose definition is a simple [`choice`][grammar dsl] where each member is just a single symbol.
|
||||
|
||||
Normally, hidden rules are not mentioned in the node types file, since they don't appear in the syntax tree. But if you
|
||||
add a hidden rule to the grammar's [`supertypes` list][grammar dsl], then it _will_ show up in the node types file, with
|
||||
the following special entry:
|
||||
Normally, hidden rules are not mentioned in the node types file, since they don't appear in the syntax tree. But if you add
|
||||
a hidden rule to the grammar's [`supertypes` list][grammar dsl], then it _will_ show up in the node
|
||||
types file, with the following special entry:
|
||||
|
||||
- `"subtypes"` — An array of objects that specify the _types_ of nodes that this 'supertype' node can wrap.
|
||||
|
||||
|
|
|
|||
|
|
@ -15,11 +15,8 @@ A given version of the tree-sitter library is only able to load parsers generate
|
|||
| >=0.20.3, <=0.24 | 13 | 14 |
|
||||
| >=0.25 | 13 | 15 |
|
||||
|
||||
By default, the tree-sitter CLI will generate parsers using the latest available ABI for that version, but an older ABI
|
||||
(supported by the CLI) can be selected by passing the [`--abi` option][abi_option] to the `generate` command.
|
||||
By default, the tree-sitter CLI will generate parsers using the latest available ABI for that version, but an older ABI (supported by the CLI) can be selected by passing the [`--abi` option][abi_option] to the `generate` command.
|
||||
|
||||
Note that the ABI version range supported by the CLI can be smaller than for the library: When a new ABI version is released,
|
||||
older versions will be phased out over a deprecation period, which starts with no longer being able to generate parsers
|
||||
with the oldest ABI version.
|
||||
Note that the ABI version range supported by the CLI can be smaller than for the library: When a new ABI version is released, older versions will be phased out over a deprecation period, which starts with no longer being able to generate parsers with the oldest ABI version.
|
||||
|
||||
[abi_option]: ../cli/generate.md#--abi-version
|
||||
|
|
|
|||
|
|
@ -6,8 +6,8 @@ the core concepts remain the same.
|
|||
|
||||
Tree-sitter's parsing functionality is implemented through its C API, with all functions documented in the [tree_sitter/api.h][api.h]
|
||||
header file, but if you're working in another language, you can use one of the following bindings found [here](../index.md#language-bindings),
|
||||
each providing idiomatic access to Tree-sitter's functionality. Of these bindings, the official ones have their own API
|
||||
doc hosted online at the following pages:
|
||||
each providing idiomatic access to Tree-sitter's functionality. Of these bindings, the official ones have their own API docs
|
||||
hosted online at the following pages:
|
||||
|
||||
- [Go][go]
|
||||
- [Java]
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
# Query Syntax
|
||||
|
||||
A _query_ consists of one or more _patterns_, where each pattern is an [S-expression][s-exp] that matches a certain set
|
||||
of nodes in a syntax tree. The expression to match a given node consists of a pair of parentheses containing two things:
|
||||
the node's type, and optionally, a series of other S-expressions that match the node's children. For example, this pattern
|
||||
would match any `binary_expression` node whose children are both `number_literal` nodes:
|
||||
A _query_ consists of one or more _patterns_, where each pattern is an [S-expression][s-exp] that matches a certain set of
|
||||
nodes in a syntax tree. The expression to match a given node consists of a pair of parentheses containing two things: the
|
||||
node's type, and optionally, a series of other S-expressions that match the node's children. For example, this pattern would
|
||||
match any `binary_expression` node whose children are both `number_literal` nodes:
|
||||
|
||||
```query
|
||||
(binary_expression (number_literal) (number_literal))
|
||||
|
|
@ -99,10 +99,10 @@ by `(ERROR)` queries. Specific missing node types can also be queried:
|
|||
### Supertype Nodes
|
||||
|
||||
Some node types are marked as _supertypes_ in a grammar. A supertype is a node type that contains multiple
|
||||
subtypes. For example, in the [JavaScript grammar example][grammar], `expression` is a supertype that can represent any
|
||||
kind of expression, such as a `binary_expression`, `call_expression`, or `identifier`. You can use supertypes in queries
|
||||
to match any of their subtypes, rather than having to list out each subtype individually. For example, this pattern would
|
||||
match any kind of expression, even though it's not a visible node in the syntax tree:
|
||||
subtypes. For example, in the [JavaScript grammar example][grammar], `expression` is a supertype that can represent any kind
|
||||
of expression, such as a `binary_expression`, `call_expression`, or `identifier`. You can use supertypes in queries to match
|
||||
any of their subtypes, rather than having to list out each subtype individually. For example, this pattern would match any
|
||||
kind of expression, even though it's not a visible node in the syntax tree:
|
||||
|
||||
```query
|
||||
(expression) @any-expression
|
||||
|
|
|
|||
|
|
@ -128,15 +128,15 @@ This pattern would match any builtin variable that is not a local variable, beca
|
|||
|
||||
# Directives
|
||||
|
||||
Similar to predicates, directives are a way to associate arbitrary metadata with a pattern. The only difference between
|
||||
predicates and directives is that directives end in a `!` character instead of `?` character.
|
||||
Similar to predicates, directives are a way to associate arbitrary metadata with a pattern. The only difference between predicates
|
||||
and directives is that directives end in a `!` character instead of `?` character.
|
||||
|
||||
Tree-sitter's CLI supports the following directives by default:
|
||||
|
||||
## The `set!` directive
|
||||
|
||||
This directive allows you to associate key-value pairs with a pattern. The key and value can be any arbitrary text that
|
||||
you see fit.
|
||||
This directive allows you to associate key-value pairs with a pattern. The key and value can be any arbitrary text that you
|
||||
see fit.
|
||||
|
||||
```query
|
||||
((comment) @injection.content
|
||||
|
|
@ -156,8 +156,8 @@ another capture are preserved. It takes two arguments, both of which are capture
|
|||
### The `#strip!` directive
|
||||
|
||||
The `#strip!` directive allows you to remove text from a capture. It takes two arguments: the first is the capture to strip
|
||||
text from, and the second is a regular expression to match against the text. Any text matched by the regular expression
|
||||
will be removed from the text associated with the capture.
|
||||
text from, and the second is a regular expression to match against the text. Any text matched by the regular expression will
|
||||
be removed from the text associated with the capture.
|
||||
|
||||
For an example on the `#select-adjacent!` and `#strip!` directives,
|
||||
view the [code navigation](../../4-code-navigation.md#examples) documentation.
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@
|
|||
eachSystem = lib.genAttrs systems;
|
||||
pkgsFor = inputs.nixpkgs.legacyPackages;
|
||||
|
||||
version = "0.27.0";
|
||||
version = "0.26.3";
|
||||
|
||||
fs = lib.fileset;
|
||||
src = fs.toSource {
|
||||
|
|
|
|||
|
|
@ -317,7 +317,7 @@ pub trait Decode {
|
|||
|
||||
/// A stateful object for walking a syntax [`Tree`] efficiently.
|
||||
#[doc(alias = "TSTreeCursor")]
|
||||
pub struct TreeCursor<'tree>(ffi::TSTreeCursor, PhantomData<&'tree ()>);
|
||||
pub struct TreeCursor<'cursor>(ffi::TSTreeCursor, PhantomData<&'cursor ()>);
|
||||
|
||||
/// A set of patterns that match nodes in a syntax tree.
|
||||
#[doc(alias = "TSQuery")]
|
||||
|
|
@ -392,7 +392,7 @@ pub struct QueryMatch<'cursor, 'tree> {
|
|||
}
|
||||
|
||||
/// A sequence of [`QueryMatch`]es associated with a given [`QueryCursor`].
|
||||
pub struct QueryMatches<'query, 'tree, T: TextProvider<I>, I: AsRef<[u8]>> {
|
||||
pub struct QueryMatches<'query, 'tree: 'query, T: TextProvider<I>, I: AsRef<[u8]>> {
|
||||
ptr: *mut ffi::TSQueryCursor,
|
||||
query: &'query Query,
|
||||
text_provider: T,
|
||||
|
|
@ -407,7 +407,7 @@ pub struct QueryMatches<'query, 'tree, T: TextProvider<I>, I: AsRef<[u8]>> {
|
|||
///
|
||||
/// During iteration, each element contains a [`QueryMatch`] and index. The index can
|
||||
/// be used to access the new capture inside of the [`QueryMatch::captures`]'s [`captures`].
|
||||
pub struct QueryCaptures<'query, 'tree, T: TextProvider<I>, I: AsRef<[u8]>> {
|
||||
pub struct QueryCaptures<'query, 'tree: 'query, T: TextProvider<I>, I: AsRef<[u8]>> {
|
||||
ptr: *mut ffi::TSQueryCursor,
|
||||
query: &'query Query,
|
||||
text_provider: T,
|
||||
|
|
@ -1581,7 +1581,7 @@ impl<'tree> Node<'tree> {
|
|||
/// Get the [`Language`] that was used to parse this node's syntax tree.
|
||||
#[doc(alias = "ts_node_language")]
|
||||
#[must_use]
|
||||
pub fn language(&self) -> LanguageRef<'tree> {
|
||||
pub fn language(&self) -> LanguageRef {
|
||||
LanguageRef(unsafe { ffi::ts_node_language(self.0) }, PhantomData)
|
||||
}
|
||||
|
||||
|
|
@ -2082,11 +2082,11 @@ impl fmt::Display for Node<'_> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'tree> TreeCursor<'tree> {
|
||||
impl<'cursor> TreeCursor<'cursor> {
|
||||
/// Get the tree cursor's current [`Node`].
|
||||
#[doc(alias = "ts_tree_cursor_current_node")]
|
||||
#[must_use]
|
||||
pub fn node(&self) -> Node<'tree> {
|
||||
pub fn node(&self) -> Node<'cursor> {
|
||||
Node(
|
||||
unsafe { ffi::ts_tree_cursor_current_node(&self.0) },
|
||||
PhantomData,
|
||||
|
|
@ -2227,7 +2227,7 @@ impl<'tree> TreeCursor<'tree> {
|
|||
/// Re-initialize this tree cursor to start at the original node that the
|
||||
/// cursor was constructed with.
|
||||
#[doc(alias = "ts_tree_cursor_reset")]
|
||||
pub fn reset(&mut self, node: Node<'tree>) {
|
||||
pub fn reset(&mut self, node: Node<'cursor>) {
|
||||
unsafe { ffi::ts_tree_cursor_reset(&mut self.0, node.0) };
|
||||
}
|
||||
|
||||
|
|
@ -3404,7 +3404,7 @@ impl QueryProperty {
|
|||
/// Provide a `StreamingIterator` instead of the traditional `Iterator`, as the
|
||||
/// underlying object in the C library gets updated on each iteration. Copies would
|
||||
/// have their internal state overwritten, leading to Undefined Behavior
|
||||
impl<'query, 'tree, T: TextProvider<I>, I: AsRef<[u8]>> StreamingIterator
|
||||
impl<'query, 'tree: 'query, T: TextProvider<I>, I: AsRef<[u8]>> StreamingIterator
|
||||
for QueryMatches<'query, 'tree, T, I>
|
||||
{
|
||||
type Item = QueryMatch<'query, 'tree>;
|
||||
|
|
@ -3435,13 +3435,15 @@ impl<'query, 'tree, T: TextProvider<I>, I: AsRef<[u8]>> StreamingIterator
|
|||
}
|
||||
}
|
||||
|
||||
impl<T: TextProvider<I>, I: AsRef<[u8]>> StreamingIteratorMut for QueryMatches<'_, '_, T, I> {
|
||||
impl<'query, 'tree: 'query, T: TextProvider<I>, I: AsRef<[u8]>> StreamingIteratorMut
|
||||
for QueryMatches<'query, 'tree, T, I>
|
||||
{
|
||||
fn get_mut(&mut self) -> Option<&mut Self::Item> {
|
||||
self.current_match.as_mut()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'query, 'tree, T: TextProvider<I>, I: AsRef<[u8]>> StreamingIterator
|
||||
impl<'query, 'tree: 'query, T: TextProvider<I>, I: AsRef<[u8]>> StreamingIterator
|
||||
for QueryCaptures<'query, 'tree, T, I>
|
||||
{
|
||||
type Item = (QueryMatch<'query, 'tree>, usize);
|
||||
|
|
@ -3478,7 +3480,9 @@ impl<'query, 'tree, T: TextProvider<I>, I: AsRef<[u8]>> StreamingIterator
|
|||
}
|
||||
}
|
||||
|
||||
impl<T: TextProvider<I>, I: AsRef<[u8]>> StreamingIteratorMut for QueryCaptures<'_, '_, T, I> {
|
||||
impl<'query, 'tree: 'query, T: TextProvider<I>, I: AsRef<[u8]>> StreamingIteratorMut
|
||||
for QueryCaptures<'query, 'tree, T, I>
|
||||
{
|
||||
fn get_mut(&mut self) -> Option<&mut Self::Item> {
|
||||
self.current_match.as_mut()
|
||||
}
|
||||
|
|
@ -3618,8 +3622,8 @@ impl From<ffi::TSRange> for Range {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<&InputEdit> for ffi::TSInputEdit {
|
||||
fn from(val: &InputEdit) -> Self {
|
||||
impl From<&'_ InputEdit> for ffi::TSInputEdit {
|
||||
fn from(val: &'_ InputEdit) -> Self {
|
||||
Self {
|
||||
start_byte: val.start_byte as u32,
|
||||
old_end_byte: val.old_end_byte as u32,
|
||||
|
|
|
|||
4
lib/binding_web/package-lock.json
generated
4
lib/binding_web/package-lock.json
generated
|
|
@ -1,12 +1,12 @@
|
|||
{
|
||||
"name": "web-tree-sitter",
|
||||
"version": "0.27.0",
|
||||
"version": "0.26.3",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "web-tree-sitter",
|
||||
"version": "0.27.0",
|
||||
"version": "0.26.3",
|
||||
"license": "MIT",
|
||||
"devDependencies": {
|
||||
"@eslint/js": "^9.39.1",
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "web-tree-sitter",
|
||||
"version": "0.27.0",
|
||||
"version": "0.26.3",
|
||||
"description": "Tree-sitter bindings for the web",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
|
|
|
|||
|
|
@ -1,8 +0,0 @@
|
|||
export function newFinalizer<T>(handler: (value: T) => void): FinalizationRegistry<T> | undefined {
|
||||
try {
|
||||
return new FinalizationRegistry(handler);
|
||||
} catch(e) {
|
||||
console.error('Unsupported FinalizationRegistry:', e);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,10 +1,5 @@
|
|||
import { C, Internal, assertInternal } from './constants';
|
||||
import { Language } from './language';
|
||||
import { newFinalizer } from './finalization_registry';
|
||||
|
||||
const finalizer = newFinalizer((address: number) => {
|
||||
C._ts_lookahead_iterator_delete(address);
|
||||
});
|
||||
|
||||
export class LookaheadIterator implements Iterable<string> {
|
||||
/** @internal */
|
||||
|
|
@ -18,7 +13,6 @@ export class LookaheadIterator implements Iterable<string> {
|
|||
assertInternal(internal);
|
||||
this[0] = address;
|
||||
this.language = language;
|
||||
finalizer?.register(this, address, this);
|
||||
}
|
||||
|
||||
/** Get the current symbol of the lookahead iterator. */
|
||||
|
|
@ -33,7 +27,6 @@ export class LookaheadIterator implements Iterable<string> {
|
|||
|
||||
/** Delete the lookahead iterator, freeing its resources. */
|
||||
delete(): void {
|
||||
finalizer?.unregister(this);
|
||||
C._ts_lookahead_iterator_delete(this[0]);
|
||||
this[0] = 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@ import { Language } from './language';
|
|||
import { marshalRange, unmarshalRange } from './marshal';
|
||||
import { checkModule, initializeBinding } from './bindings';
|
||||
import { Tree } from './tree';
|
||||
import { newFinalizer } from './finalization_registry';
|
||||
|
||||
/**
|
||||
* Options for parsing
|
||||
|
|
@ -83,11 +82,6 @@ export let LANGUAGE_VERSION: number;
|
|||
*/
|
||||
export let MIN_COMPATIBLE_VERSION: number;
|
||||
|
||||
const finalizer = newFinalizer((addresses: number[]) => {
|
||||
C._ts_parser_delete(addresses[0]);
|
||||
C._free(addresses[1]);
|
||||
});
|
||||
|
||||
/**
|
||||
* A stateful object that is used to produce a {@link Tree} based on some
|
||||
* source code.
|
||||
|
|
@ -123,7 +117,6 @@ export class Parser {
|
|||
*/
|
||||
constructor() {
|
||||
this.initialize();
|
||||
finalizer?.register(this, [this[0], this[1]], this);
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
|
|
@ -138,7 +131,6 @@ export class Parser {
|
|||
|
||||
/** Delete the parser, freeing its resources. */
|
||||
delete() {
|
||||
finalizer?.unregister(this);
|
||||
C._ts_parser_delete(this[0]);
|
||||
C._free(this[1]);
|
||||
this[0] = 0;
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@ import { Node } from './node';
|
|||
import { marshalNode, unmarshalCaptures } from './marshal';
|
||||
import { TRANSFER_BUFFER } from './parser';
|
||||
import { Language } from './language';
|
||||
import { newFinalizer } from './finalization_registry';
|
||||
|
||||
const PREDICATE_STEP_TYPE_CAPTURE = 1;
|
||||
|
||||
|
|
@ -507,10 +506,6 @@ function parsePattern(
|
|||
}
|
||||
}
|
||||
|
||||
const finalizer = newFinalizer((address: number) => {
|
||||
C._ts_query_delete(address);
|
||||
});
|
||||
|
||||
export class Query {
|
||||
/** @internal */
|
||||
private [0] = 0; // Internal handle for Wasm
|
||||
|
|
@ -692,12 +687,10 @@ export class Query {
|
|||
this.assertedProperties = assertedProperties;
|
||||
this.refutedProperties = refutedProperties;
|
||||
this.exceededMatchLimit = false;
|
||||
finalizer?.register(this, address, this);
|
||||
}
|
||||
|
||||
/** Delete the query, freeing its resources. */
|
||||
delete(): void {
|
||||
finalizer?.unregister(this);
|
||||
C._ts_query_delete(this[0]);
|
||||
this[0] = 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ import { TreeCursor } from './tree_cursor';
|
|||
import { marshalEdit, marshalPoint, unmarshalNode, unmarshalRange } from './marshal';
|
||||
import { TRANSFER_BUFFER } from './parser';
|
||||
import { Edit } from './edit';
|
||||
import { newFinalizer } from './finalization_registry';
|
||||
|
||||
/** @internal */
|
||||
export function getText(tree: Tree, startIndex: number, endIndex: number, startPosition: Point): string {
|
||||
|
|
@ -29,10 +28,6 @@ export function getText(tree: Tree, startIndex: number, endIndex: number, startP
|
|||
return result ?? '';
|
||||
}
|
||||
|
||||
const finalizer = newFinalizer((address: number) => {
|
||||
C._ts_tree_delete(address);
|
||||
});
|
||||
|
||||
/** A tree that represents the syntactic structure of a source code file. */
|
||||
export class Tree {
|
||||
/** @internal */
|
||||
|
|
@ -50,7 +45,6 @@ export class Tree {
|
|||
this[0] = address;
|
||||
this.language = language;
|
||||
this.textCallback = textCallback;
|
||||
finalizer?.register(this, address, this);
|
||||
}
|
||||
|
||||
/** Create a shallow copy of the syntax tree. This is very fast. */
|
||||
|
|
@ -61,7 +55,6 @@ export class Tree {
|
|||
|
||||
/** Delete the syntax tree, freeing its resources. */
|
||||
delete(): void {
|
||||
finalizer?.unregister(this);
|
||||
C._ts_tree_delete(this[0]);
|
||||
this[0] = 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,11 +3,6 @@ import { marshalNode, marshalPoint, marshalTreeCursor, unmarshalNode, unmarshalP
|
|||
import { Node } from './node';
|
||||
import { TRANSFER_BUFFER } from './parser';
|
||||
import { getText, Tree } from './tree';
|
||||
import { newFinalizer } from './finalization_registry';
|
||||
|
||||
const finalizer = newFinalizer((address: number) => {
|
||||
C._ts_tree_cursor_delete_wasm(address);
|
||||
});
|
||||
|
||||
/** A stateful object for walking a syntax {@link Tree} efficiently. */
|
||||
export class TreeCursor {
|
||||
|
|
@ -35,7 +30,6 @@ export class TreeCursor {
|
|||
assertInternal(internal);
|
||||
this.tree = tree;
|
||||
unmarshalTreeCursor(this);
|
||||
finalizer?.register(this, this.tree[0], this);
|
||||
}
|
||||
|
||||
/** Creates a deep copy of the tree cursor. This allocates new memory. */
|
||||
|
|
@ -48,7 +42,6 @@ export class TreeCursor {
|
|||
|
||||
/** Delete the tree cursor, freeing its resources. */
|
||||
delete(): void {
|
||||
finalizer?.unregister(this);
|
||||
marshalTreeCursor(this);
|
||||
C._ts_tree_cursor_delete_wasm(this.tree[0]);
|
||||
this[0] = this[1] = this[2] = 0;
|
||||
|
|
|
|||
|
|
@ -1,74 +0,0 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { gc, event, Finalizer } from './memory';
|
||||
|
||||
// hijack finalization registry before import web-tree-sitter
|
||||
globalThis.FinalizationRegistry = Finalizer;
|
||||
|
||||
describe('Memory Management', () => {
|
||||
describe('call .delete()', () => {
|
||||
it('test free memory manually', async () => {
|
||||
const timer = setInterval(() => {
|
||||
gc();
|
||||
}, 100);
|
||||
let done = 0;
|
||||
event.on('gc', () => {
|
||||
done++;
|
||||
});
|
||||
await (async () => {
|
||||
const { JavaScript } = await (await import('./helper')).default;
|
||||
const { Parser, Query } = await import('../src');
|
||||
const parser = new Parser();
|
||||
parser.setLanguage(JavaScript);
|
||||
const tree = parser.parse('1+1')!;
|
||||
const copyTree = tree.copy();
|
||||
const cursor = tree.walk();
|
||||
const copyCursor = cursor.copy();
|
||||
const lookaheadIterator = JavaScript.lookaheadIterator(cursor.currentNode.nextParseState)!;
|
||||
const query = new Query(JavaScript, '(identifier) @element');
|
||||
parser.delete();
|
||||
tree.delete();
|
||||
copyTree.delete();
|
||||
cursor.delete();
|
||||
copyCursor.delete();
|
||||
lookaheadIterator.delete();
|
||||
query.delete();
|
||||
})();
|
||||
// wait for gc
|
||||
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||||
clearInterval(timer);
|
||||
// expect no gc event fired
|
||||
expect(done).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('do not call .delete()', () => {
|
||||
it('test free memory automatically', async () => {
|
||||
const timer = setInterval(() => {
|
||||
gc();
|
||||
}, 100);
|
||||
let done = 0;
|
||||
const promise = new Promise((resolve) => {
|
||||
event.on('gc', () => {
|
||||
if (++done === 7) {
|
||||
resolve(undefined);
|
||||
clearInterval(timer);
|
||||
}
|
||||
console.log('free memory times: ', done);
|
||||
});
|
||||
});
|
||||
await (async () => {
|
||||
const { JavaScript } = await (await import('./helper')).default;
|
||||
const { Parser, Query } = await import('../src');
|
||||
const parser = new Parser(); // 1
|
||||
parser.setLanguage(JavaScript);
|
||||
const tree = parser.parse('1+1')!; // 2
|
||||
tree.copy(); // 3
|
||||
const cursor = tree.walk(); // 4
|
||||
cursor.copy(); // 5
|
||||
JavaScript.lookaheadIterator(cursor.currentNode.nextParseState)!; // 6
|
||||
new Query(JavaScript, '(identifier) @element'); // 7
|
||||
})();
|
||||
await promise;
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
import { EventEmitter } from 'events';
|
||||
import { Session } from 'inspector';
|
||||
|
||||
const session = new Session();
|
||||
session.connect();
|
||||
|
||||
export function gc() {
|
||||
session.post('HeapProfiler.collectGarbage');
|
||||
}
|
||||
|
||||
export const event = new EventEmitter();
|
||||
|
||||
export class Finalizer<T> extends FinalizationRegistry<T> {
|
||||
constructor(handler: (value: T) => void) {
|
||||
super((value) => {
|
||||
handler(value);
|
||||
event.emit('gc');
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
import { describe, it } from 'vitest';
|
||||
|
||||
describe('FinalizationRegistry is unsupported', () => {
|
||||
it('test FinalizationRegistry is unsupported', async () => {
|
||||
// @ts-expect-error: test FinalizationRegistry is not supported
|
||||
globalThis.FinalizationRegistry = undefined;
|
||||
const { JavaScript } = await (await import('./helper')).default;
|
||||
const { Parser, Query } = await import('../src');
|
||||
const parser = new Parser();
|
||||
parser.setLanguage(JavaScript);
|
||||
const tree = parser.parse('1+1')!;
|
||||
const copyTree = tree.copy();
|
||||
const cursor = tree.walk();
|
||||
const copyCursor = cursor.copy();
|
||||
const lookaheadIterator = JavaScript.lookaheadIterator(cursor.currentNode.nextParseState)!;
|
||||
const query = new Query(JavaScript, '(identifier) @element');
|
||||
parser.delete();
|
||||
tree.delete();
|
||||
copyTree.delete();
|
||||
cursor.delete();
|
||||
copyCursor.delete();
|
||||
lookaheadIterator.delete();
|
||||
query.delete();
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue