feat: move tree-sitter configuration to dedicated file (#3700)

This commit is contained in:
Amaan Qureshi 2024-09-30 11:11:23 -04:00 committed by GitHub
parent 94a8262110
commit ea3846a2c5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 1828 additions and 536 deletions

107
Cargo.lock generated
View file

@ -280,6 +280,19 @@ dependencies = [
"memchr",
]
[[package]]
name = "console"
version = "0.15.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb"
dependencies = [
"encode_unicode",
"lazy_static",
"libc",
"unicode-width",
"windows-sys 0.52.0",
]
[[package]]
name = "core-foundation"
version = "0.10.0"
@ -447,6 +460,20 @@ dependencies = [
"windows-sys 0.59.0",
]
[[package]]
name = "dialoguer"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "658bce805d770f407bc62102fca7c2c64ceef2fbcb2b8bd19d2765ce093980de"
dependencies = [
"console",
"fuzzy-matcher",
"shell-words",
"tempfile",
"thiserror",
"zeroize",
]
[[package]]
name = "diff"
version = "0.1.13"
@ -492,6 +519,12 @@ version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d"
[[package]]
name = "encode_unicode"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
[[package]]
name = "equivalent"
version = "1.0.1"
@ -551,6 +584,15 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "fuzzy-matcher"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "54614a3312934d066701a80f20f15fa3b56d67ac7722b39eea5b4c9dd1d66c94"
dependencies = [
"thread_local",
]
[[package]]
name = "getrandom"
version = "0.2.15"
@ -946,9 +988,12 @@ dependencies = [
[[package]]
name = "once_cell"
version = "1.19.0"
version = "1.20.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
checksum = "82881c4be219ab5faaf2ad5e5e5ecdff8c66bd7402ca3160975c93b24961afd1"
dependencies = [
"portable-atomic",
]
[[package]]
name = "openssl-probe"
@ -1004,6 +1049,12 @@ version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2"
[[package]]
name = "portable-atomic"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc9c68a3f6da06753e9335d63e27f6b9754dd1920d941135b7ea8224f141adb2"
[[package]]
name = "postcard"
version = "1.0.10"
@ -1137,9 +1188,9 @@ dependencies = [
[[package]]
name = "regex"
version = "1.10.6"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619"
checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8"
dependencies = [
"aho-corasick",
"memchr",
@ -1149,9 +1200,9 @@ dependencies = [
[[package]]
name = "regex-automata"
version = "0.4.7"
version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3"
dependencies = [
"aho-corasick",
"memchr",
@ -1160,9 +1211,9 @@ dependencies = [
[[package]]
name = "regex-syntax"
version = "0.8.4"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
[[package]]
name = "rustc-hash"
@ -1209,6 +1260,9 @@ name = "semver"
version = "1.0.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b"
dependencies = [
"serde",
]
[[package]]
name = "serde"
@ -1252,6 +1306,12 @@ dependencies = [
"serde",
]
[[package]]
name = "shell-words"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde"
[[package]]
name = "shlex"
version = "1.3.0"
@ -1328,9 +1388,9 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
[[package]]
name = "tempfile"
version = "3.12.0"
version = "3.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04cbcdd0c794ebb0d4cf35e88edd2f7d2c4c3e9a5a6dab322839b321c6a87a64"
checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b"
dependencies = [
"cfg-if",
"fastrand",
@ -1368,6 +1428,16 @@ dependencies = [
"syn",
]
[[package]]
name = "thread_local"
version = "1.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c"
dependencies = [
"cfg-if",
"once_cell",
]
[[package]]
name = "tiny_http"
version = "0.12.0"
@ -1484,6 +1554,7 @@ dependencies = [
"clap_complete",
"ctor",
"ctrlc",
"dialoguer",
"dirs",
"filetime",
"glob",
@ -1516,6 +1587,7 @@ dependencies = [
"tree-sitter-tags",
"tree-sitter-tests-proc-macro",
"unindent",
"url",
"walkdir",
"wasmparser",
"webbrowser",
@ -1580,12 +1652,14 @@ dependencies = [
"once_cell",
"path-slash",
"regex",
"semver",
"serde",
"serde_json",
"tempfile",
"tree-sitter",
"tree-sitter-highlight",
"tree-sitter-tags",
"url",
]
[[package]]
@ -1630,6 +1704,12 @@ dependencies = [
"tinyvec",
]
[[package]]
name = "unicode-width"
version = "0.1.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af"
[[package]]
name = "unicode-xid"
version = "0.2.6"
@ -1651,6 +1731,7 @@ dependencies = [
"form_urlencoded",
"idna",
"percent-encoding",
"serde",
]
[[package]]
@ -2295,3 +2376,9 @@ dependencies = [
"quote",
"syn",
]
[[package]]
name = "zeroize"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde"

View file

@ -58,6 +58,7 @@ clap = { version = "4.5.18", features = [
clap_complete = "4.5.29"
ctor = "0.2.8"
ctrlc = { version = "3.4.5", features = ["termination"] }
dialoguer = { version = "0.11.0", features = ["fuzzy-select"] }
dirs = "5.0.1"
filetime = "0.2.25"
fs4 = "0.9.1"
@ -78,7 +79,7 @@ rand = "0.8.5"
regex = "1.10.6"
regex-syntax = "0.8.4"
rustc-hash = "2.0.0"
semver = "1.0.23"
semver = { version = "1.0.23", features = ["serde"] }
serde = { version = "1.0.210", features = ["derive"] }
serde_derive = "1.0.210"
serde_json = { version = "1.0.128", features = ["preserve_order"] }
@ -90,6 +91,7 @@ thiserror = "1.0.64"
tiny_http = "0.12.0"
toml = "0.8.19"
unindent = "0.2.3"
url = { version = "2.5.2", features = ["serde"] }
walkdir = "2.5.0"
wasmparser = "0.217.0"
webbrowser = "1.0.2"

View file

@ -32,6 +32,7 @@ clap.workspace = true
clap_complete.workspace = true
ctor.workspace = true
ctrlc.workspace = true
dialoguer.workspace = true
dirs.workspace = true
filetime.workspace = true
glob.workspace = true
@ -54,6 +55,7 @@ similar.workspace = true
smallbitvec.workspace = true
streaming-iterator.workspace = true
tiny_http.workspace = true
url.workspace = true
walkdir.workspace = true
wasmparser.workspace = true
webbrowser.workspace = true

View file

@ -26,8 +26,6 @@ semver.workspace = true
serde.workspace = true
serde_json.workspace = true
smallbitvec.workspace = true
url.workspace = true
tree-sitter.workspace = true
[target."cfg(windows)".dependencies]
url = "2.5.2"

View file

@ -28,10 +28,12 @@ libloading.workspace = true
once_cell.workspace = true
path-slash.workspace = true
regex.workspace = true
semver.workspace = true
serde.workspace = true
serde_json.workspace = true
tempfile.workspace = true
url.workspace = true
tree-sitter = {workspace = true}
tree-sitter-highlight = {workspace = true, optional = true}
tree-sitter-tags = {workspace = true, optional = true}
tree-sitter = { workspace = true }
tree-sitter-highlight = { workspace = true, optional = true }
tree-sitter-tags = { workspace = true, optional = true }

View file

@ -25,6 +25,7 @@ use libloading::{Library, Symbol};
use once_cell::unsync::OnceCell;
use path_slash::PathBufExt as _;
use regex::{Regex, RegexBuilder};
use semver::Version;
use serde::{Deserialize, Deserializer, Serialize};
use tree_sitter::Language;
#[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
@ -35,6 +36,7 @@ use tree_sitter::QueryErrorKind;
use tree_sitter_highlight::HighlightConfiguration;
#[cfg(feature = "tree-sitter-tags")]
use tree_sitter_tags::{Error as TagsError, TagsConfiguration};
use url::Url;
pub const EMSCRIPTEN_TAG: &str = concat!("docker.io/emscripten/emsdk:", env!("EMSCRIPTEN_VERSION"));
@ -48,6 +50,196 @@ pub struct Config {
pub parser_directories: Vec<PathBuf>,
}
#[derive(Serialize, Deserialize, Clone, Default)]
#[serde(untagged)]
pub enum PathsJSON {
#[default]
Empty,
Single(String),
Multiple(Vec<String>),
}
impl PathsJSON {
fn into_vec(self) -> Option<Vec<String>> {
match self {
Self::Empty => None,
Self::Single(s) => Some(vec![s]),
Self::Multiple(s) => Some(s),
}
}
fn is_empty(&self) -> bool {
matches!(self, Self::Empty)
}
}
#[derive(Serialize, Deserialize, Clone)]
#[serde(untagged)]
pub enum PackageJSONAuthor {
String(String),
Object {
name: String,
email: Option<String>,
url: Option<String>,
},
}
#[derive(Serialize, Deserialize, Clone)]
#[serde(untagged)]
pub enum PackageJSONRepository {
String(String),
Object { url: String },
}
#[derive(Serialize, Deserialize)]
pub struct PackageJSON {
pub name: String,
pub version: Version,
pub description: Option<String>,
pub author: Option<PackageJSONAuthor>,
pub maintainers: Option<Vec<PackageJSONAuthor>>,
pub license: Option<String>,
pub repository: Option<PackageJSONRepository>,
#[serde(default)]
#[serde(rename = "tree-sitter", skip_serializing_if = "Option::is_none")]
pub tree_sitter: Option<Vec<LanguageConfigurationJSON>>,
}
fn default_path() -> PathBuf {
PathBuf::from(".")
}
#[derive(Serialize, Deserialize, Clone)]
#[serde(rename_all = "kebab-case")]
pub struct LanguageConfigurationJSON {
#[serde(default = "default_path")]
pub path: PathBuf,
pub scope: Option<String>,
pub file_types: Option<Vec<String>>,
pub content_regex: Option<String>,
pub first_line_regex: Option<String>,
pub injection_regex: Option<String>,
#[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
pub highlights: PathsJSON,
#[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
pub injections: PathsJSON,
#[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
pub locals: PathsJSON,
#[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
pub tags: PathsJSON,
#[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
pub external_files: PathsJSON,
}
#[derive(Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub struct TreeSitterJSON {
pub grammars: Vec<Grammar>,
pub metadata: Metadata,
#[serde(default)]
pub bindings: Bindings,
}
impl TreeSitterJSON {
pub fn from_file(path: &Path) -> Option<Self> {
if let Ok(file) = fs::File::open(path.join("tree-sitter.json")) {
Some(serde_json::from_reader(file).ok()?)
} else {
None
}
}
pub fn has_multiple_language_configs(&self) -> bool {
self.grammars.len() > 1
}
}
#[derive(Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub struct Grammar {
pub name: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub upper_camel_name: Option<String>,
pub scope: String,
pub path: PathBuf,
#[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
pub external_files: PathsJSON,
pub file_types: Option<Vec<String>>,
#[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
pub highlights: PathsJSON,
#[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
pub injections: PathsJSON,
#[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
pub locals: PathsJSON,
#[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
pub tags: PathsJSON,
#[serde(skip_serializing_if = "Option::is_none")]
pub injection_regex: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub first_line_regex: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub content_regex: Option<String>,
}
#[derive(Serialize, Deserialize)]
pub struct Metadata {
pub version: Version,
#[serde(skip_serializing_if = "Option::is_none")]
pub license: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub description: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub authors: Option<Vec<Author>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub links: Option<Links>,
#[serde(skip_serializing_if = "Option::is_none")]
pub namespace: Option<String>,
}
#[derive(Serialize, Deserialize)]
pub struct Author {
pub name: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub email: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub url: Option<String>,
}
#[derive(Serialize, Deserialize)]
pub struct Links {
pub repository: Url,
#[serde(skip_serializing_if = "Option::is_none")]
pub homepage: Option<String>,
}
#[derive(Serialize, Deserialize)]
#[serde(default)]
pub struct Bindings {
pub c: bool,
pub go: bool,
pub java: bool,
pub kotlin: bool,
pub node: bool,
pub python: bool,
pub rust: bool,
pub swift: bool,
}
impl Default for Bindings {
fn default() -> Self {
Self {
c: true,
go: true,
java: false,
kotlin: false,
node: true,
python: true,
rust: true,
swift: true,
}
}
}
// Replace `~` or `$HOME` with home path string.
// (While paths like "~/.tree-sitter/config.json" can be deserialized,
// they're not valid path for I/O modules.)
@ -930,57 +1122,6 @@ impl Loader {
parser_path: &Path,
set_current_path_config: bool,
) -> Result<&[LanguageConfiguration]> {
#[derive(Deserialize, Clone, Default)]
#[serde(untagged)]
enum PathsJSON {
#[default]
Empty,
Single(String),
Multiple(Vec<String>),
}
impl PathsJSON {
fn into_vec(self) -> Option<Vec<String>> {
match self {
Self::Empty => None,
Self::Single(s) => Some(vec![s]),
Self::Multiple(s) => Some(s),
}
}
}
#[derive(Deserialize)]
struct LanguageConfigurationJSON {
#[serde(default)]
path: PathBuf,
scope: Option<String>,
#[serde(rename = "file-types")]
file_types: Option<Vec<String>>,
#[serde(rename = "content-regex")]
content_regex: Option<String>,
#[serde(rename = "first-line-regex")]
first_line_regex: Option<String>,
#[serde(rename = "injection-regex")]
injection_regex: Option<String>,
#[serde(default)]
highlights: PathsJSON,
#[serde(default)]
injections: PathsJSON,
#[serde(default)]
locals: PathsJSON,
#[serde(default)]
tags: PathsJSON,
#[serde(default, rename = "external-files")]
external_files: PathsJSON,
}
#[derive(Deserialize)]
struct PackageJSON {
#[serde(default)]
#[serde(rename = "tree-sitter")]
tree_sitter: Vec<LanguageConfigurationJSON>,
}
#[derive(Deserialize)]
struct GrammarJSON {
name: String,
@ -988,41 +1129,40 @@ impl Loader {
let initial_language_configuration_count = self.language_configurations.len();
if let Ok(package_json_contents) = fs::read_to_string(parser_path.join("package.json")) {
let package_json = serde_json::from_str::<PackageJSON>(&package_json_contents);
if let Ok(package_json) = package_json {
let language_count = self.languages_by_id.len();
for config_json in package_json.tree_sitter {
// Determine the path to the parser directory. This can be specified in
// the package.json, but defaults to the directory containing the package.json.
let language_path = parser_path.join(config_json.path);
if let Some(config) = TreeSitterJSON::from_file(parser_path) {
let language_count = self.languages_by_id.len();
for grammar in config.grammars {
// Determine the path to the parser directory. This can be specified in
// the package.json, but defaults to the directory containing the
// package.json.
let language_path = parser_path.join(grammar.path);
let grammar_path = language_path.join("src").join("grammar.json");
let mut grammar_file = fs::File::open(grammar_path)
.with_context(|| "Failed to read grammar.json")?;
let grammar_json: GrammarJSON =
serde_json::from_reader(BufReader::new(&mut grammar_file))
.with_context(|| "Failed to parse grammar.json")?;
let grammar_path = language_path.join("src").join("grammar.json");
let mut grammar_file =
fs::File::open(grammar_path).with_context(|| "Failed to read grammar.json")?;
let grammar_json: GrammarJSON =
serde_json::from_reader(BufReader::new(&mut grammar_file))
.with_context(|| "Failed to parse grammar.json")?;
// Determine if a previous language configuration in this package.json file
// already uses the same language.
let mut language_id = None;
for (id, (path, _, _)) in
self.languages_by_id.iter().enumerate().skip(language_count)
{
if language_path == *path {
language_id = Some(id);
}
// Determine if a previous language configuration in this package.json file
// already uses the same language.
let mut language_id = None;
for (id, (path, _, _)) in
self.languages_by_id.iter().enumerate().skip(language_count)
{
if language_path == *path {
language_id = Some(id);
}
}
// If not, add a new language path to the list.
let language_id = if let Some(language_id) = language_id {
language_id
} else {
self.languages_by_id.push((
// If not, add a new language path to the list.
let language_id = if let Some(language_id) = language_id {
language_id
} else {
self.languages_by_id.push((
language_path,
OnceCell::new(),
config_json.external_files.clone().into_vec().map(|files| {
grammar.external_files.clone().into_vec().map(|files| {
files.into_iter()
.map(|path| {
let path = parser_path.join(path);
@ -1036,57 +1176,55 @@ impl Loader {
.collect::<Result<Vec<_>>>()
}).transpose()?,
));
self.languages_by_id.len() - 1
};
self.languages_by_id.len() - 1
};
let configuration = LanguageConfiguration {
root_path: parser_path.to_path_buf(),
language_name: grammar_json.name.clone(),
scope: config_json.scope,
language_id,
file_types: config_json.file_types.unwrap_or_default(),
content_regex: Self::regex(config_json.content_regex.as_deref()),
first_line_regex: Self::regex(config_json.first_line_regex.as_deref()),
injection_regex: Self::regex(config_json.injection_regex.as_deref()),
injections_filenames: config_json.injections.into_vec(),
locals_filenames: config_json.locals.into_vec(),
tags_filenames: config_json.tags.into_vec(),
highlights_filenames: config_json.highlights.into_vec(),
#[cfg(feature = "tree-sitter-highlight")]
highlight_config: OnceCell::new(),
#[cfg(feature = "tree-sitter-tags")]
tags_config: OnceCell::new(),
#[cfg(feature = "tree-sitter-highlight")]
highlight_names: &self.highlight_names,
#[cfg(feature = "tree-sitter-highlight")]
use_all_highlight_names: self.use_all_highlight_names,
};
let configuration = LanguageConfiguration {
root_path: parser_path.to_path_buf(),
language_name: grammar_json.name,
scope: Some(grammar.scope),
language_id,
file_types: grammar.file_types.unwrap_or_default(),
content_regex: Self::regex(grammar.content_regex.as_deref()),
first_line_regex: Self::regex(grammar.first_line_regex.as_deref()),
injection_regex: Self::regex(grammar.injection_regex.as_deref()),
injections_filenames: grammar.injections.into_vec(),
locals_filenames: grammar.locals.into_vec(),
tags_filenames: grammar.tags.into_vec(),
highlights_filenames: grammar.highlights.into_vec(),
#[cfg(feature = "tree-sitter-highlight")]
highlight_config: OnceCell::new(),
#[cfg(feature = "tree-sitter-tags")]
tags_config: OnceCell::new(),
#[cfg(feature = "tree-sitter-highlight")]
highlight_names: &self.highlight_names,
#[cfg(feature = "tree-sitter-highlight")]
use_all_highlight_names: self.use_all_highlight_names,
};
for file_type in &configuration.file_types {
self.language_configuration_ids_by_file_type
.entry(file_type.to_string())
.or_default()
.push(self.language_configurations.len());
}
if let Some(first_line_regex) = &configuration.first_line_regex {
self.language_configuration_ids_by_first_line_regex
.entry(first_line_regex.to_string())
.or_default()
.push(self.language_configurations.len());
}
for file_type in &configuration.file_types {
self.language_configuration_ids_by_file_type
.entry(file_type.to_string())
.or_default()
.push(self.language_configurations.len());
}
if let Some(first_line_regex) = &configuration.first_line_regex {
self.language_configuration_ids_by_first_line_regex
.entry(first_line_regex.to_string())
.or_default()
.push(self.language_configurations.len());
}
self.language_configurations.push(unsafe {
mem::transmute::<LanguageConfiguration<'_>, LanguageConfiguration<'static>>(
configuration,
)
});
self.language_configurations.push(unsafe {
mem::transmute::<LanguageConfiguration<'_>, LanguageConfiguration<'static>>(
configuration,
)
});
if set_current_path_config
&& self.language_configuration_in_current_path.is_none()
{
self.language_configuration_in_current_path =
Some(self.language_configurations.len() - 1);
}
if set_current_path_config && self.language_configuration_in_current_path.is_none()
{
self.language_configuration_in_current_path =
Some(self.language_configurations.len() - 1);
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -8,8 +8,11 @@ use anstyle::{AnsiColor, Color, Style};
use anyhow::{anyhow, Context, Result};
use clap::{crate_authors, Args, Command, FromArgMatches as _, Subcommand};
use clap_complete::{generate, Shell};
use dialoguer::{theme::ColorfulTheme, Confirm, FuzzySelect, Input};
use glob::glob;
use heck::ToUpperCamelCase;
use regex::Regex;
use semver::Version;
use tree_sitter::{ffi, Parser, Point};
use tree_sitter_cli::{
fuzz::{
@ -17,7 +20,9 @@ use tree_sitter_cli::{
LOG_GRAPH_ENABLED, START_SEED,
},
highlight,
init::{generate_grammar_files, lookup_package_json_for_path},
init::{
generate_grammar_files, lookup_package_json_for_path, migrate_package_json, JsonConfigOpts,
},
logger,
parse::{self, ParseFileOptions, ParseOutput},
playground, query, tags,
@ -26,8 +31,9 @@ use tree_sitter_cli::{
};
use tree_sitter_config::Config;
use tree_sitter_highlight::Highlighter;
use tree_sitter_loader as loader;
use tree_sitter_loader::{self as loader, TreeSitterJSON};
use tree_sitter_tags::TagsContext;
use url::Url;
const BUILD_VERSION: &str = env!("CARGO_PKG_VERSION");
const BUILD_SHA: Option<&'static str> = option_env!("BUILD_SHA");
@ -436,18 +442,216 @@ impl InitConfig {
}
impl Init {
fn run(self, current_dir: &Path) -> Result<()> {
if let Some(dir_name) = current_dir
.file_name()
.map(|x| x.to_string_lossy().to_ascii_lowercase())
{
if let Some(language_name) = dir_name
.strip_prefix("tree-sitter-")
.or_else(|| Some(dir_name.as_ref()))
{
generate_grammar_files(current_dir, language_name, self.update)?;
fn run(self, current_dir: &Path, migrated: bool) -> Result<()> {
let configure_json = if current_dir.join("tree-sitter.json").exists() {
Confirm::with_theme(&ColorfulTheme::default())
.with_prompt("It looks like you already have a `tree-sitter.json` file. Do you want to re-configure it?")
.interact()?
} else if current_dir.join("package.json").exists() {
!migrated
} else {
true
};
let (language_name, json_config_opts) = if configure_json {
let mut opts = JsonConfigOpts::default();
let name = || {
Input::<String>::with_theme(&ColorfulTheme::default())
.with_prompt("Parser name")
.validate_with(|input: &String| {
if input.chars().all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_') {
Ok(())
} else {
Err("The name must be lowercase and contain only letters, digits, and underscores")
}
})
.interact_text()
};
let upper_camel_name = |name: &str| {
Input::<String>::with_theme(&ColorfulTheme::default())
.with_prompt("UpperCamelCase name")
.default(name.to_upper_camel_case())
.interact_text()
};
let description = |name: &str| {
Input::<String>::with_theme(&ColorfulTheme::default())
.with_prompt("Description")
.default(format!(
"{} grammar for tree-sitter",
name.to_upper_camel_case()
))
.show_default(false)
.allow_empty(true)
.interact_text()
};
let repository = |name: &str| {
Input::<Url>::with_theme(&ColorfulTheme::default())
.with_prompt("Repository URL")
.allow_empty(true)
.default(
Url::parse(&format!(
"https://github.com/tree-sitter/tree-sitter-{name}"
))
.expect("Failed to parse default repository URL"),
)
.show_default(false)
.interact_text()
};
let scope = |name: &str| {
Input::<String>::with_theme(&ColorfulTheme::default())
.with_prompt("TextMate scope")
.default(format!("source.{name}"))
.interact_text()
};
let file_types = |name: &str| {
Input::<String>::with_theme(&ColorfulTheme::default())
.with_prompt("File types (space-separated)")
.default(format!(".{name}"))
.interact_text()
.map(|ft| {
let mut set = HashSet::new();
for ext in ft.split(' ') {
let ext = ext.trim();
if !ext.is_empty() {
set.insert(ext.to_string());
}
}
set.into_iter().collect::<Vec<_>>()
})
};
let initial_version = || {
Input::<Version>::with_theme(&ColorfulTheme::default())
.with_prompt("Version")
.default(Version::new(0, 1, 0))
.interact_text()
};
let license = || {
Input::<String>::with_theme(&ColorfulTheme::default())
.with_prompt("License")
.default("MIT".to_string())
.allow_empty(true)
.interact()
};
let author = || {
Input::<String>::with_theme(&ColorfulTheme::default())
.with_prompt("Author name")
.interact_text()
};
let email = || {
Input::with_theme(&ColorfulTheme::default())
.with_prompt("Author email")
.validate_with({
let mut force = None;
move |input: &String| -> Result<(), &str> {
if input.contains('@') || input.trim().is_empty() || force.as_ref().map_or(false, |old| old == input) {
Ok(())
} else {
force = Some(input.clone());
Err("This is not an email address; type the same value again to force use")
}
}
})
.allow_empty(true)
.interact_text().map(|e| (!e.trim().is_empty()).then_some(e))
};
let url = || {
Input::<String>::with_theme(&ColorfulTheme::default())
.with_prompt("Author URL")
.allow_empty(true)
.validate_with(|input: &String| -> Result<(), &str> {
if input.trim().is_empty() || Url::parse(input).is_ok() {
Ok(())
} else {
Err("This is not a valid URL")
}
})
.interact_text()
.map(|e| (!e.trim().is_empty()).then(|| Url::parse(&e).unwrap()))
};
let choices = [
"name",
"upper_camel_name",
"description",
"repository",
"scope",
"file_types",
"version",
"license",
"author",
"email",
"url",
"exit",
];
macro_rules! set_choice {
($choice:expr) => {
match $choice {
"name" => opts.name = name()?,
"upper_camel_name" => opts.upper_camel_name = upper_camel_name(&opts.name)?,
"description" => opts.description = description(&opts.name)?,
"repository" => opts.repository = Some(repository(&opts.name)?),
"scope" => opts.scope = scope(&opts.name)?,
"file_types" => opts.file_types = file_types(&opts.name)?,
"version" => opts.version = initial_version()?,
"license" => opts.license = license()?,
"author" => opts.author = author()?,
"email" => opts.email = email()?,
"url" => opts.url = url()?,
"exit" => break,
_ => unreachable!(),
}
};
}
}
// Initial configuration
for choice in choices.iter().take(choices.len() - 1) {
set_choice!(*choice);
}
// Loop for editing the configuration
loop {
println!(
"Your current configuration:\n{}",
serde_json::to_string_pretty(&opts)?
);
if Confirm::with_theme(&ColorfulTheme::default())
.with_prompt("Does the config above look correct?")
.interact()?
{
break;
}
let idx = FuzzySelect::with_theme(&ColorfulTheme::default())
.with_prompt("Which field would you like to change?")
.items(&choices)
.interact()?;
set_choice!(choices[idx]);
}
(opts.name.clone(), Some(opts))
} else {
let json = serde_json::from_reader::<_, TreeSitterJSON>(
fs::File::open(current_dir.join("tree-sitter.json"))
.with_context(|| "Failed to open tree-sitter.json")?,
)?;
(json.grammars[0].name.clone(), None)
};
generate_grammar_files(current_dir, &language_name, self.update, json_config_opts)?;
Ok(())
}
@ -1082,9 +1286,17 @@ fn run() -> Result<()> {
let current_dir = env::current_dir().unwrap();
let loader = loader::Loader::new()?;
let migrated = if !current_dir.join("tree-sitter.json").exists()
&& current_dir.join("package.json").exists()
{
migrate_package_json(&current_dir).with_context(|| "Failed to migrate package.json")?
} else {
false
};
match command {
Commands::InitConfig(_) => InitConfig::run()?,
Commands::Init(init_options) => init_options.run(&current_dir)?,
Commands::Init(init_options) => init_options.run(&current_dir, migrated)?,
Commands::Generate(generate_options) => generate_options.run(loader, &current_dir)?,
Commands::Build(build_options) => build_options.run(loader, &current_dir)?,
Commands::Parse(parse_options) => parse_options.run(loader, &current_dir)?,

View file

@ -3,7 +3,7 @@ libdir=@LIBDIR@
includedir=@INCLUDEDIR@
Name: tree-sitter-PARSER_NAME
Description: CAMEL_PARSER_NAME grammar for tree-sitter
Description: PARSER_DESCRIPTION
URL: @URL@
Version: @VERSION@
Requires: @REQUIRES@

View file

@ -1,4 +1,4 @@
"""CAMEL_PARSER_NAME grammar for tree-sitter"""
"""PARSER_DESCRIPTION"""
from importlib.resources import files as _files

View file

@ -1,12 +1,13 @@
[package]
name = "tree-sitter-PARSER_NAME"
description = "CAMEL_PARSER_NAME grammar for tree-sitter"
description = "PARSER_DESCRIPTION"
version = "0.0.1"
license = "MIT"
authors = ["PARSER_AUTHOR_NAME PARSER_AUTHOR_EMAIL"]
license = "PARSER_LICENSE"
readme = "README.md"
keywords = ["incremental", "parsing", "tree-sitter", "PARSER_NAME"]
categories = ["parsing", "text-editors"]
repository = "https://github.com/tree-sitter/tree-sitter-PARSER_NAME"
repository = "PARSER_URL"
edition = "2021"
autoexamples = false

View file

@ -4,7 +4,7 @@ import (
"testing"
tree_sitter "github.com/tree-sitter/go-tree-sitter"
tree_sitter_LOWER_PARSER_NAME "github.com/tree-sitter/tree-sitter-PARSER_NAME/bindings/go"
tree_sitter_LOWER_PARSER_NAME "PARSER_URL_STRIPPED/bindings/go"
)
func TestCanLoadGrammar(t *testing.T) {

View file

@ -1,4 +1,4 @@
module github.com/tree-sitter/tree-sitter-LOWER_PARSER_NAME
module PARSER_URL_STRIPPED
go 1.23

View file

@ -1,3 +1,9 @@
/**
* @file PARSER_DESCRIPTION
* @author PARSER_AUTHOR_NAME PARSER_AUTHOR_EMAIL
* @license PARSER_LICENSE
*/
/// <reference types="tree-sitter-cli/dsl" />
// @ts-check

View file

@ -1,9 +1,14 @@
{
"name": "tree-sitter-PARSER_NAME",
"version": "0.0.1",
"description": "CAMEL_PARSER_NAME grammar for tree-sitter",
"description": "PARSER_DESCRIPTION",
"repository": "github:tree-sitter/tree-sitter-PARSER_NAME",
"license": "MIT",
"license": "PARSER_LICENSE",
"author": {
"name": "PARSER_AUTHOR_NAME",
"email": "PARSER_AUTHOR_EMAIL",
"url": "PARSER_AUTHOR_URL"
},
"main": "bindings/node",
"types": "bindings/node",
"keywords": [

View file

@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "tree-sitter-PARSER_NAME"
description = "CAMEL_PARSER_NAME grammar for tree-sitter"
description = "PARSER_DESCRIPTION"
version = "0.0.1"
keywords = ["incremental", "parsing", "tree-sitter", "PARSER_NAME"]
classifiers = [
@ -12,14 +12,15 @@ classifiers = [
"License :: OSI Approved :: MIT License",
"Topic :: Software Development :: Compilers",
"Topic :: Text Processing :: Linguistic",
"Typing :: Typed"
"Typing :: Typed",
]
authors = [{ name = "PARSER_AUTHOR_NAME", email = "PARSER_AUTHOR_EMAIL" }]
requires-python = ">=3.9"
license.text = "MIT"
license.text = "PARSER_LICENSE"
readme = "README.md"
[project.urls]
Homepage = "https://github.com/tree-sitter/tree-sitter-PARSER_NAME"
Homepage = "PARSER_URL"
[project.optional-dependencies]
core = ["tree-sitter~=0.22"]

View file

@ -8,17 +8,20 @@ use crate::tests::helpers::fixtures::scratch_dir;
fn detect_language_by_first_line_regex() {
let strace_dir = tree_sitter_dir(
r#"{
"name": "tree-sitter-strace",
"version": "0.0.1",
"tree-sitter": [
"grammars": [
{
"name": "strace",
"path": ".",
"scope": "source.strace",
"file-types": [
"strace"
],
"first-line-regex": "[0-9:.]* *execve"
}
]
],
"metadata": {
"version": "0.0.1"
}
}
"#,
"strace",
@ -56,16 +59,19 @@ fn detect_language_by_first_line_regex() {
let dummy_dir = tree_sitter_dir(
r#"{
"name": "tree-sitter-dummy",
"version": "0.0.1",
"tree-sitter": [
"grammars": [
{
"name": "dummy",
"scope": "source.dummy",
"path": ".",
"file-types": [
"dummy"
]
}
]
],
"metadata": {
"version": "0.0.1"
}
}
"#,
"dummy",
@ -83,9 +89,9 @@ fn detect_language_by_first_line_regex() {
);
}
fn tree_sitter_dir(package_json: &str, name: &str) -> tempfile::TempDir {
fn tree_sitter_dir(tree_sitter_json: &str, name: &str) -> tempfile::TempDir {
let temp_dir = tempfile::tempdir().unwrap();
fs::write(temp_dir.path().join("package.json"), package_json).unwrap();
fs::write(temp_dir.path().join("tree-sitter.json"), tree_sitter_json).unwrap();
fs::create_dir_all(temp_dir.path().join("src/tree_sitter")).unwrap();
fs::write(
temp_dir.path().join("src/grammar.json"),

View file

@ -0,0 +1,266 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": {
"grammars": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "The name of the grammar.",
"pattern": "^[a-z0-9_]+$"
},
"camelcase": {
"type": "string",
"description": "The name converted to CamelCase.",
"pattern": "^\\w+$",
"examples": [
"Rust",
"HTML"
],
"$comment": "This is used in the description and the class names."
},
"scope": {
"type": "string",
"description": "The TextMate scope that represents this language.",
"pattern": "^(source|text)(\\.\\w+)+$",
"examples": [
"source.rust",
"text.html"
]
},
"path": {
"type": "string",
"default": ".",
"description": "The relative path to the directory containing the grammar."
},
"external-files": {
"type": "array",
"description": "The relative paths to files that should be checked for modifications during recompilation.",
"items": {
"type": "string"
},
"minItems": 1
},
"file-types": {
"type": "array",
"description": "An array of filename suffix strings.",
"items": {
"type": "string"
},
"minItems": 1
},
"highlights": {
"anyOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"type": "string"
},
"minItems": 1
}
],
"default": "queries/highlights.scm",
"description": "The path(s) to the grammar's highlight queries."
},
"injections": {
"anyOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"type": "string"
},
"minItems": 1
}
],
"default": "queries/injections.scm",
"description": "The path(s) to the grammar's injection queries."
},
"locals": {
"anyOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"type": "string"
},
"minItems": 1
}
],
"default": "queries/locals.scm",
"description": "The path(s) to the grammar's local variable queries."
},
"tags": {
"anyOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"type": "string"
},
"minItems": 1
}
],
"default": "queries/tags.scm",
"description": "The path(s) to the grammar's code navigation queries."
},
"injection-regex": {
"type": "string",
"format": "regex",
"description": "A regex pattern that will be tested against a language name in order to determine whether this language should be used for a potential language injection site."
},
"first-line-regex": {
"type": "string",
"format": "regex",
"description": "A regex pattern that will be tested against the first line of a file in order to determine whether this language applies to the file."
},
"content-regex": {
"type": "string",
"format": "regex",
"description": "A regex pattern that will be tested against the contents of the file in order to break ties in cases where multiple grammars matched the file."
}
},
"required": [
"name",
"scope"
]
},
"minItems": 1
},
"metadata": {
"type": "object",
"properties": {
"version": {
"type": "string",
"description": "The current version of the project.",
"pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$",
"$comment": "The CLI will use this version to update package.json, Cargo.toml, pyproject.toml, Makefile."
},
"license": {
"type": "string",
"default": "MIT",
"description": "The project's license."
},
"description": {
"type": "string",
"description": "The project's description.",
"examples": [
"Rust grammar for tree-sitter"
]
},
"links": {
"type": "object",
"properties": {
"repository": {
"type": "string",
"format": "uri",
"description": "The project's repository."
},
"homepage": {
"type": "string",
"format": "uri",
"description": "The project's homepage."
}
},
"required": [
"repository"
]
},
"authors": {
"type": "array",
"items": {
"type": "object",
"description": "The project's author(s).",
"properties": {
"name": {
"type": "string"
},
"email": {
"type": "string",
"format": "email"
},
"url": {
"type": "string",
"format": "uri"
}
},
"required": [
"name"
]
},
"minItems": 1
},
"namespace": {
"type": "string",
"description": "The namespace for the Java & Kotlin packages.",
"default": "io.github.tree-sitter",
"$comment": "Used as is in the Maven/Gradle group name and transformed accordingly for the package names and directories (e.g. io.github.treesitter.jtreesitter.html - src/main/java/io/github/treesitter/jtreesitter/html)."
}
},
"required": [
"version",
"links"
]
},
"bindings": {
"type": "object",
"description": "The language bindings that will be generated.",
"properties": {
"c": {
"type": "boolean",
"default": true,
"const": true,
"$comment": "Always generated"
},
"go": {
"type": "boolean",
"default": true
},
"java": {
"type": "boolean",
"default": true
},
"kotlin": {
"type": "boolean",
"default": true
},
"node": {
"type": "boolean",
"default": true,
"const": true,
"$comment": "Always generated (for now)"
},
"python": {
"type": "boolean",
"default": true
},
"rust": {
"type": "boolean",
"default": true,
"const": true,
"$comment": "Always generated"
},
"swift": {
"type": "boolean",
"default": true
}
}
}
},
"required": [
"grammars",
"metadata"
]
}

View file

@ -33,27 +33,14 @@ mkdir tree-sitter-${YOUR_LANGUAGE_NAME}
cd tree-sitter-${YOUR_LANGUAGE_NAME}
```
You can use the `npm` command line tool to create a `package.json` file that describes your project, and allows your parser to be used from Node.js.
You can use the `tree-sitter` CLI tool to set up your project, and allows your parser to be used from multiple languages.
```sh
# This will prompt you for input
npm init
# This installs a small module that lets your parser be used from Node
npm install --save nan
# This installs the Tree-sitter CLI itself
npm install --save-dev tree-sitter-cli
tree-sitter init
```
The last command will install the CLI into the `node_modules` folder in your working directory. An executable program called `tree-sitter` will be created inside of `node_modules/.bin/`. You may want to follow the Node.js convention of adding that folder to your `PATH` so that you can easily run this program when working in this directory.
```sh
# In your shell profile script
export PATH=$PATH:./node_modules/.bin
```
Once you have the CLI installed, create a file called `grammar.js` with the following contents:
Once you have installed the CLI and run through the `init` command's prompts, a file called `grammar.js` should exist with the following contents:
```js
/// <reference types="tree-sitter-cli/dsl" />
@ -69,7 +56,7 @@ module.exports = grammar({
});
```
Then run the following command:
Now, run the following command:
```sh
tree-sitter generate
@ -103,6 +90,80 @@ You now have a working parser.
Let's go over all of the functionality of the `tree-sitter` command line tool.
### Command: `init`
The first command you will likely run is the `init` command. This command sets up an empty repository with everything you need to get going with a grammar repository.
It only has one optional argument, `--update`, which will update outdated generated files, if needed.
The main file of interest for users to configure is `tree-sitter.json`, which tells the CLI information about your grammar, such as the queries.
#### Structure of `tree-sitter.json`
##### The `grammars` field
This field is an array of objects, you typically only need one object in this array, unless your repo has multiple grammars (e.g. like `Typescript` and `TSX`)
###### Basics
These keys specify basic information about the parser:
* `scope` (required) - A string like `"source.js"` that identifies the language. Currently, we strive to match the scope names used by popular [TextMate grammars](https://macromates.com/manual/en/language_grammars) and by the [Linguist](https://github.com/github/linguist) library.
* `path` - A relative path from the directory containing `tree-sitter.json` to another directory containing the `src/` folder, which contains the actual generated parser. The default value is `"."` (so that `src/` is in the same folder as `tree-sitter.json`), and this very rarely needs to be overridden.
* `external-files` - A list of relative paths from the root dir of a
parser to files that should be checked for modifications during recompilation.
This is useful during development to have changes to other files besides scanner.c
be picked up by the cli.
###### Language Detection
These keys help to decide whether the language applies to a given file:
* `file-types` - An array of filename suffix strings. The grammar will be used for files whose names end with one of these suffixes. Note that the suffix may match an *entire* filename.
* `first-line-regex` - A regex pattern that will be tested against the first line of a file in order to determine whether this language applies to the file. If present, this regex will be used for any file whose language does not match any grammar's `file-types`.
* `content-regex` - A regex pattern that will be tested against the contents of the file in order to break ties in cases where multiple grammars matched the file using the above two criteria. If the regex matches, this grammar will be preferred over another grammar with no `content-regex`. If the regex does not match, a grammar with no `content-regex` will be preferred over this one.
* `injection-regex` - A regex pattern that will be tested against a *language name* in order to determine whether this language should be used for a potential *language injection* site. Language injection is described in more detail in [a later section](#language-injection).
###### Query Paths
These keys specify relative paths from the directory containing `tree-sitter.json` to the files that control syntax highlighting:
* `highlights` - Path to a *highlight query*. Default: `queries/highlights.scm`
* `locals` - Path to a *local variable query*. Default: `queries/locals.scm`.
* `injections` - Path to an *injection query*. Default: `queries/injections.scm`.
The behaviors of these three files are described in the next section.
##### The `metadata` field
This field contains information that tree-sitter will use to populate relevant bindings' files, especially their versions. A future
`bump-version` and `publish` subcommand will leverage this version information as well. Typically, this will all be set up when you
run `tree-sitter init`, but you are welcome to update it as you see fit.
* `version` (required) - The current version of your grammar, which should follow [semver](https://semver.org)
* `license` - The license of your grammar, which should be a valid [SPDX license](https://spdx.org/licenses)
* `description` - The brief description of your grammar
* `authors` (required) - An array of objects that contain a `name` field, and optionally an `email` and `url` field. Each field is a string
* `links` - An object that contains a `repository` field, and optionally a `homepage` field. Each field is a string
* `namespace` - The namespace for the `Java` and `Kotlin` bindings, defaults to `io.github.tree-sitter` if not provided
##### The `bindings` field
This field controls what bindings are generated when the `init` command is run. Each key is a language name, and the value is a boolean.
* `c` (default: `true`)
* `go` (default: `true`)
* `java` (default: `false`)
* `kotlin` (default: `false`)
* `node` (default: `true`)
* `python` (default: `true`)
* `rust` (default: `true`)
* `swift` (default: `false`)
### Command: `generate`
The most important command you'll use is `tree-sitter generate`. This command reads the `grammar.js` file in your current working directory and creates a file called `src/parser.c`, which implements the parser. After making changes to your grammar, just run `tree-sitter generate` again.
@ -254,12 +315,12 @@ A couple of attributes also take in a parameter, which require the use of parent
The following attributes are available:
- `:skip` — This attribute will skip the test when running `tree-sitter test`.
* `:skip` — This attribute will skip the test when running `tree-sitter test`.
This is useful when you want to temporarily disable running a test without deleting it.
- `:error` — This attribute will assert that the parse tree contains an error. It's useful to just validate that a certain input is invalid without displaying the whole parse tree, as such you should omit the parse tree below the `---` line.
- `:fail-fast` — This attribute will stop the testing additional tests if the test marked with this attribute fails.
- `:language(LANG)` — This attribute will run the tests using the parser for the specified language. This is useful for multi-parser repos, such as XML and DTD, or Typescript and TSX. The default parser will be the first entry in the `tree-sitter` field in the root `package.json`, so having a way to pick a second or even third parser is useful.
- `:platform(PLATFORM)` — This attribute specifies the platform on which the test should run. It is useful to test platform-specific behavior (e.g. Windows newlines are different from Unix). This attribute must match up with Rust's [`std::env::consts::OS`](https://doc.rust-lang.org/std/env/consts/constant.OS.html).
* `:error` — This attribute will assert that the parse tree contains an error. It's useful to just validate that a certain input is invalid without displaying the whole parse tree, as such you should omit the parse tree below the `---` line.
* `:fail-fast` — This attribute will stop the testing additional tests if the test marked with this attribute fails.
* `:language(LANG)` — This attribute will run the tests using the parser for the specified language. This is useful for multi-parser repos, such as XML and DTD, or Typescript and TSX. The default parser used will always be the first entry in the `grammars` field in the `tree-sitter.json` config file, so having a way to pick a second or even third parser is useful.
* `:platform(PLATFORM)` — This attribute specifies the platform on which the test should run. It is useful to test platform-specific behavior (e.g. Windows newlines are different from Unix). This attribute must match up with Rust's [`std::env::consts::OS`](https://doc.rust-lang.org/std/env/consts/constant.OS.html).
Examples using attributes:
@ -855,7 +916,7 @@ This function is responsible for recognizing external tokens. It should return `
* **`uint32_t (*get_column)(TSLexer *)`** - A function for querying the current column position of the lexer. It returns the number of codepoints since the start of the current line. The codepoint position is recalculated on every call to this function by reading from the start of the line.
* **`bool (*is_at_included_range_start)(const TSLexer *)`** - A function for checking whether the parser has just skipped some characters in the document. When parsing an embedded document using the `ts_parser_set_included_ranges` function (described in the [multi-language document section][multi-language-section]), the scanner may want to apply some special behavior when moving to a disjoint part of the document. For example, in [EJS documents][ejs], the JavaScript parser uses this function to enable inserting automatic semicolon tokens in between the code directives, delimited by `<%` and `%>`.
* **`bool (*eof)(const TSLexer *)`** - A function for determining whether the lexer is at the end of the file. The value of `lookahead` will be `0` at the end of a file, but this function should be used instead of checking for that value because the `0` or "NUL" value is also a valid character that could be present in the file being parsed.
- **`void (*log)(const TSLexer *, const char * format, ...)`** - A `printf`-like function for logging. The log is viewable through e.g. `tree-sitter parse --debug` or the browser's console after checking the `log` option in the [Playground](./playground).
* **`void (*log)(const TSLexer *, const char * format, ...)`** - A `printf`-like function for logging. The log is viewable through e.g. `tree-sitter parse --debug` or the browser's console after checking the `log` option in the [Playground](./playground).
The third argument to the `scan` function is an array of booleans that indicates which of external tokens are currently expected by the parser. You should only look for a given token if it is valid according to this array. At the same time, you cannot backtrack, so you may need to combine certain pieces of logic.
@ -994,11 +1055,9 @@ Be very careful when emitting zero-width tokens from your external scanner, and
[antlr]: https://www.antlr.org
[bison-dprec]: https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html
[bison]: https://en.wikipedia.org/wiki/GNU_bison
[c-linkage]: https://en.cppreference.com/w/cpp/language/language_linkage
[cargo]: https://doc.rust-lang.org/cargo/getting-started/installation.html
[crate]: https://crates.io/crates/tree-sitter-cli
[cst]: https://en.wikipedia.org/wiki/Parse_tree
[dfa]: https://en.wikipedia.org/wiki/Deterministic_finite_automaton
[ebnf]: https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form
[ecmascript-spec]: https://262.ecma-international.org/6.0/
[ejs]: https://ejs.co
@ -1014,7 +1073,6 @@ Be very careful when emitting zero-width tokens from your external scanner, and
[multi-language-section]: ./using-parsers#multi-language-documents
[named-vs-anonymous-nodes-section]: ./using-parsers#named-vs-anonymous-nodes
[field-names-section]: ./using-parsers#node-field-names
[nan]: https://github.com/nodejs/nan
[node-module]: https://www.npmjs.com/package/tree-sitter-cli
[node.js]: https://nodejs.org
[static-node-types]: ./using-parsers#static-node-types

View file

@ -14,10 +14,10 @@ This document explains how the Tree-sitter syntax highlighting system works, usi
All of the files needed to highlight a given language are normally included in the same git repository as the Tree-sitter grammar for that language (for example, [`tree-sitter-javascript`](https://github.com/tree-sitter/tree-sitter-javascript), [`tree-sitter-ruby`](https://github.com/tree-sitter/tree-sitter-ruby)). In order to run syntax highlighting from the command-line, three types of files are needed:
1. Per-user configuration in `~/.config/tree-sitter/config.json`
2. Language configuration in grammar repositories' `package.json` files.
2. Language configuration in grammar repositories' `tree-sitter.json` files.
3. Tree queries in the grammars repositories' `queries` folders.
For an example of the language-specific files, see the [`package.json` file](https://github.com/tree-sitter/tree-sitter-ruby/blob/master/package.json) and [`queries` directory](https://github.com/tree-sitter/tree-sitter-ruby/tree/master/queries) in the `tree-sitter-ruby` repository. The following sections describe the behavior of each file.
For an example of the language-specific files, see the [`tree-sitter.json` file](https://github.com/tree-sitter/tree-sitter-ruby/blob/master/tree-sitter.json) and [`queries` directory](https://github.com/tree-sitter/tree-sitter-ruby/tree/master/queries) in the `tree-sitter-ruby` repository. The following sections describe the behavior of each file.
## Per-user Configuration
@ -82,7 +82,7 @@ Styling values can be any of the following:
## Language Configuration
The `package.json` file is used by package managers like `npm`. Within this file, the Tree-sitter CLI looks for data nested under the top-level `"tree-sitter"` key. This key is expected to contain an array of objects with the following keys:
The `tree-sitter.json` file is used by the Tree-sitter CLI. Within this file, the CLI looks for data nested under the top-level `"grammars"` key. This key is expected to contain an array of objects with the following keys:
### Basics
@ -90,7 +90,7 @@ These keys specify basic information about the parser:
* `scope` (required) - A string like `"source.js"` that identifies the language. Currently, we strive to match the scope names used by popular [TextMate grammars](https://macromates.com/manual/en/language_grammars) and by the [Linguist](https://github.com/github/linguist) library.
* `path` (optional) - A relative path from the directory containing `package.json` to another directory containing the `src/` folder, which contains the actual generated parser. The default value is `"."` (so that `src/` is in the same folder as `package.json`), and this very rarely needs to be overridden.
* `path` (optional) - A relative path from the directory containing `tree-sitter.json` to another directory containing the `src/` folder, which contains the actual generated parser. The default value is `"."` (so that `src/` is in the same folder as `tree-sitter.json`), and this very rarely needs to be overridden.
* `external-files` (optional) - A list of relative paths from the root dir of a
parser to files that should be checked for modifications during recompilation.
@ -111,7 +111,7 @@ These keys help to decide whether the language applies to a given file:
### Query Paths
These keys specify relative paths from the directory containing `package.json` to the files that control syntax highlighting:
These keys specify relative paths from the directory containing `tree-sitter.json` to the files that control syntax highlighting:
* `highlights` - Path to a *highlight query*. Default: `queries/highlights.scm`
* `locals` - Path to a *local variable query*. Default: `queries/locals.scm`.