Merge branch 'master' into HEAD
This commit is contained in:
commit
026231e93d
173 changed files with 22878 additions and 6961 deletions
2
.gitattributes
vendored
Normal file
2
.gitattributes
vendored
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
/lib/src/unicode/*.h linguist-vendored
|
||||
/lib/src/unicode/LICENSE linguist-vendored
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -15,6 +15,7 @@ docs/assets/js/tree-sitter.js
|
|||
/target
|
||||
*.rs.bk
|
||||
*.a
|
||||
*.dylib
|
||||
*.o
|
||||
*.obj
|
||||
*.exp
|
||||
|
|
|
|||
3
.gitmodules
vendored
3
.gitmodules
vendored
|
|
@ -1,3 +0,0 @@
|
|||
[submodule "externals/utf8proc"]
|
||||
path = lib/utf8proc
|
||||
url = https://github.com/julialang/utf8proc
|
||||
15
.travis.yml
15
.travis.yml
|
|
@ -2,6 +2,9 @@ language: rust
|
|||
rust:
|
||||
- stable
|
||||
|
||||
env:
|
||||
CFLAGS="-Wall -Wextra -Werror -Wstrict-prototypes"
|
||||
|
||||
matrix:
|
||||
include:
|
||||
- os: osx
|
||||
|
|
@ -11,8 +14,8 @@ matrix:
|
|||
|
||||
before_install:
|
||||
# Install node
|
||||
- nvm install 10
|
||||
- nvm use 10
|
||||
- nvm install 12
|
||||
- nvm use 12
|
||||
|
||||
# Download emscripten and create a shorthand for adding it to the PATH.
|
||||
# Don't add it to the path globally because it overrides the default
|
||||
|
|
@ -23,6 +26,9 @@ script:
|
|||
# Build the WASM binding
|
||||
- (eval "$WASM_ENV" && script/build-wasm)
|
||||
|
||||
# build the shared/static libraries
|
||||
- make
|
||||
|
||||
# Build the CLI
|
||||
- cargo build --release
|
||||
|
||||
|
|
@ -32,7 +38,6 @@ script:
|
|||
- (eval "$WASM_ENV" && script/generate-fixtures-wasm)
|
||||
|
||||
# Run the tests
|
||||
- export TREE_SITTER_STATIC_ANALYSIS=1
|
||||
- script/test
|
||||
- script/test-wasm
|
||||
- script/benchmark
|
||||
|
|
@ -53,8 +58,6 @@ deploy:
|
|||
file_glob: true
|
||||
file:
|
||||
- "tree-sitter-*.gz"
|
||||
- "target/release/tree-sitter.js"
|
||||
- "target/release/tree-sitter.wasm"
|
||||
draft: true
|
||||
overwrite: true
|
||||
skip_cleanup: true
|
||||
|
|
@ -65,5 +68,3 @@ cache:
|
|||
cargo: true
|
||||
directories:
|
||||
- target/emsdk
|
||||
- test/fixtures/grammars
|
||||
- /home/travis/.emscripten_cache
|
||||
|
|
|
|||
1
CONTRIBUTING.md
Symbolic link
1
CONTRIBUTING.md
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
docs/section-6-contributing.md
|
||||
706
Cargo.lock
generated
706
Cargo.lock
generated
File diff suppressed because it is too large
Load diff
71
Makefile
Normal file
71
Makefile
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
VERSION := 0.6.3
|
||||
|
||||
# install directory layout
|
||||
PREFIX ?= /usr/local
|
||||
INCLUDEDIR ?= $(PREFIX)/include
|
||||
LIBDIR ?= $(PREFIX)/lib
|
||||
PCLIBDIR ?= $(LIBDIR)/pkgconfig
|
||||
|
||||
# collect sources
|
||||
ifneq ($(AMALGAMATED),1)
|
||||
SRC := $(wildcard lib/src/*.c)
|
||||
# do not double-include amalgamation
|
||||
SRC := $(filter-out lib/src/lib.c,$(SRC))
|
||||
else
|
||||
# use amalgamated build
|
||||
SRC := lib/src/lib.c
|
||||
endif
|
||||
OBJ := $(SRC:.c=.o)
|
||||
|
||||
# define default flags, and override to append mandatory flags
|
||||
CFLAGS ?= -O3 -Wall -Wextra -Werror
|
||||
override CFLAGS += -std=gnu99 -fPIC -Ilib/src -Ilib/include
|
||||
|
||||
# ABI versioning
|
||||
SONAME_MAJOR := 0
|
||||
SONAME_MINOR := 0
|
||||
|
||||
# OS-specific bits
|
||||
ifeq ($(shell uname),Darwin)
|
||||
SOEXT = dylib
|
||||
SOEXTVER_MAJOR = $(SONAME_MAJOR).dylib
|
||||
SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).dylib
|
||||
LINKSHARED += -dynamiclib -Wl,-install_name,$(LIBDIR)/libtree-sitter.$(SONAME_MAJOR).dylib
|
||||
else
|
||||
SOEXT = so
|
||||
SOEXTVER_MAJOR = so.$(SONAME_MAJOR)
|
||||
SOEXTVER = so.$(SONAME_MAJOR).$(SONAME_MINOR)
|
||||
LINKSHARED += -shared -Wl,-soname,libtree-sitter.so.$(SONAME_MAJOR)
|
||||
endif
|
||||
ifneq (,$(filter $(shell uname),FreeBSD NetBSD DragonFly))
|
||||
PCLIBDIR := $(PREFIX)/libdata/pkgconfig
|
||||
endif
|
||||
|
||||
all: libtree-sitter.a libtree-sitter.$(SOEXTVER)
|
||||
|
||||
libtree-sitter.a: $(OBJ)
|
||||
$(AR) rcs $@ $^
|
||||
|
||||
libtree-sitter.$(SOEXTVER): $(OBJ)
|
||||
$(CC) $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@
|
||||
ln -sf $@ libtree-sitter.$(SOEXT)
|
||||
ln -sf $@ libtree-sitter.$(SOEXTVER_MAJOR)
|
||||
|
||||
install: all
|
||||
install -d '$(DESTDIR)$(LIBDIR)'
|
||||
install -m755 libtree-sitter.a '$(DESTDIR)$(LIBDIR)'/libtree-sitter.a
|
||||
install -m755 libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER)
|
||||
ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER_MAJOR)
|
||||
ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXT)
|
||||
install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter
|
||||
install -m644 lib/include/tree_sitter/*.h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/
|
||||
install -d '$(DESTDIR)$(PCLIBDIR)'
|
||||
sed -e 's|@LIBDIR@|$(LIBDIR)|;s|@INCLUDEDIR@|$(INCLUDEDIR)|;s|@VERSION@|$(VERSION)|' \
|
||||
-e 's|=$(PREFIX)|=$${prefix}|' \
|
||||
-e 's|@PREFIX@|$(PREFIX)|' \
|
||||
tree-sitter.pc.in > '$(DESTDIR)$(PCLIBDIR)'/tree-sitter.pc
|
||||
|
||||
clean:
|
||||
rm -f lib/src/*.o libtree-sitter.a libtree-sitter.$(SOEXT) libtree-sitter.$(SOEXTVER_MAJOR) libtree-sitter.$(SOEXTVER)
|
||||
|
||||
.PHONY: all install clean
|
||||
15
README.md
15
README.md
|
|
@ -5,9 +5,14 @@
|
|||
|
||||
Tree-sitter is a parser generator tool and an incremental parsing library. It can build a concrete syntax tree for a source file and efficiently update the syntax tree as the source file is edited. Tree-sitter aims to be:
|
||||
|
||||
* **General** enough to parse any programming language
|
||||
* **Fast** enough to parse on every keystroke in a text editor
|
||||
* **Robust** enough to provide useful results even in the presence of syntax errors
|
||||
* **Dependency-free** so that the runtime library (which is written in pure C) can be embedded in any application
|
||||
- **General** enough to parse any programming language
|
||||
- **Fast** enough to parse on every keystroke in a text editor
|
||||
- **Robust** enough to provide useful results even in the presence of syntax errors
|
||||
- **Dependency-free** so that the runtime library (which is written in pure C) can be embedded in any application
|
||||
|
||||
[Documentation](https://tree-sitter.github.io/tree-sitter/)
|
||||
## Links
|
||||
|
||||
- [Documentation](https://tree-sitter.github.io)
|
||||
- [Rust binding](lib/binding_rust/README.md)
|
||||
- [WASM binding](lib/binding_web/README.md)
|
||||
- [Command-line interface](cli/README.md)
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
name = "tree-sitter-cli"
|
||||
description = "CLI tool for developing, testing, and using Tree-sitter parsers"
|
||||
version = "0.15.8"
|
||||
version = "0.17.3"
|
||||
authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
|
||||
edition = "2018"
|
||||
license = "MIT"
|
||||
|
|
@ -19,31 +19,36 @@ name = "benchmark"
|
|||
harness = false
|
||||
|
||||
[dependencies]
|
||||
cc = "1.0"
|
||||
ansi_term = "0.11"
|
||||
difference = "2.0"
|
||||
lazy_static = "1.2.0"
|
||||
smallbitvec = "2.3.0"
|
||||
cc = "1.0"
|
||||
atty = "0.2"
|
||||
clap = "2.32"
|
||||
difference = "2.0"
|
||||
dirs = "2.0.2"
|
||||
glob = "0.3.0"
|
||||
lazy_static = "1.2.0"
|
||||
libloading = "0.5"
|
||||
once_cell = "0.1.8"
|
||||
regex = "1"
|
||||
regex-syntax = "0.6.4"
|
||||
serde = "1.0"
|
||||
serde_derive = "1.0"
|
||||
regex-syntax = "0.6.4"
|
||||
regex = "1"
|
||||
rsass = "^0.11.0"
|
||||
smallbitvec = "2.3.0"
|
||||
tiny_http = "0.6"
|
||||
webbrowser = "0.5.1"
|
||||
|
||||
[dependencies.tree-sitter]
|
||||
version = ">= 0.3.7"
|
||||
version = ">= 0.17.0"
|
||||
path = "../lib"
|
||||
|
||||
[dependencies.tree-sitter-highlight]
|
||||
version = ">= 0.1.0"
|
||||
version = ">= 0.3.0"
|
||||
path = "../highlight"
|
||||
|
||||
[dependencies.tree-sitter-tags]
|
||||
version = ">= 0.1.0"
|
||||
path = "../tags"
|
||||
|
||||
[dependencies.serde_json]
|
||||
version = "1.0"
|
||||
features = ["preserve_order"]
|
||||
|
|
|
|||
|
|
@ -2,8 +2,8 @@ use lazy_static::lazy_static;
|
|||
use std::collections::BTreeMap;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::Instant;
|
||||
use std::{env, fs, usize};
|
||||
use tree_sitter::{Language, Parser};
|
||||
use std::{env, fs, str, usize};
|
||||
use tree_sitter::{Language, Parser, Query};
|
||||
use tree_sitter_cli::error::Error;
|
||||
use tree_sitter_cli::loader::Loader;
|
||||
|
||||
|
|
@ -17,28 +17,34 @@ lazy_static! {
|
|||
static ref REPETITION_COUNT: usize = env::var("TREE_SITTER_BENCHMARK_REPETITION_COUNT")
|
||||
.map(|s| usize::from_str_radix(&s, 10).unwrap())
|
||||
.unwrap_or(5);
|
||||
|
||||
static ref TEST_LOADER: Loader = Loader::new(SCRATCH_DIR.clone());
|
||||
static ref EXAMPLE_PATHS_BY_LANGUAGE_DIR: BTreeMap<PathBuf, Vec<PathBuf>> = {
|
||||
fn process_dir(result: &mut BTreeMap<PathBuf, Vec<PathBuf>>, dir: &Path) {
|
||||
static ref EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR: BTreeMap<PathBuf, (Vec<PathBuf>, Vec<PathBuf>)> = {
|
||||
fn process_dir(result: &mut BTreeMap<PathBuf, (Vec<PathBuf>, Vec<PathBuf>)>, dir: &Path) {
|
||||
if dir.join("grammar.js").exists() {
|
||||
let relative_path = dir.strip_prefix(GRAMMARS_DIR.as_path()).unwrap();
|
||||
let (example_paths, query_paths) =
|
||||
result.entry(relative_path.to_owned()).or_default();
|
||||
|
||||
if let Ok(example_files) = fs::read_dir(&dir.join("examples")) {
|
||||
result.insert(
|
||||
relative_path.to_owned(),
|
||||
example_files
|
||||
.filter_map(|p| {
|
||||
let p = p.unwrap().path();
|
||||
if p.is_file() {
|
||||
Some(p)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect(),
|
||||
);
|
||||
} else {
|
||||
result.insert(relative_path.to_owned(), Vec::new());
|
||||
example_paths.extend(example_files.filter_map(|p| {
|
||||
let p = p.unwrap().path();
|
||||
if p.is_file() {
|
||||
Some(p.to_owned())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
if let Ok(query_files) = fs::read_dir(&dir.join("queries")) {
|
||||
query_paths.extend(query_files.filter_map(|p| {
|
||||
let p = p.unwrap().path();
|
||||
if p.is_file() {
|
||||
Some(p.to_owned())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}));
|
||||
}
|
||||
} else {
|
||||
for entry in fs::read_dir(&dir).unwrap() {
|
||||
|
|
@ -57,20 +63,25 @@ lazy_static! {
|
|||
}
|
||||
|
||||
fn main() {
|
||||
let mut parser = Parser::new();
|
||||
let max_path_length = EXAMPLE_PATHS_BY_LANGUAGE_DIR
|
||||
.iter()
|
||||
.flat_map(|(_, paths)| paths.iter())
|
||||
.map(|p| p.file_name().unwrap().to_str().unwrap().chars().count())
|
||||
let max_path_length = EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR
|
||||
.values()
|
||||
.flat_map(|(e, q)| {
|
||||
e.iter()
|
||||
.chain(q.iter())
|
||||
.map(|s| s.file_name().unwrap().to_str().unwrap().len())
|
||||
})
|
||||
.max()
|
||||
.unwrap();
|
||||
|
||||
let mut all_normal_speeds = Vec::new();
|
||||
let mut all_error_speeds = Vec::new();
|
||||
.unwrap_or(0);
|
||||
|
||||
eprintln!("Benchmarking with {} repetitions", *REPETITION_COUNT);
|
||||
|
||||
for (language_path, example_paths) in EXAMPLE_PATHS_BY_LANGUAGE_DIR.iter() {
|
||||
let mut parser = Parser::new();
|
||||
let mut all_normal_speeds = Vec::new();
|
||||
let mut all_error_speeds = Vec::new();
|
||||
|
||||
for (language_path, (example_paths, query_paths)) in
|
||||
EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR.iter()
|
||||
{
|
||||
let language_name = language_path.file_name().unwrap().to_str().unwrap();
|
||||
|
||||
if let Some(filter) = LANGUAGE_FILTER.as_ref() {
|
||||
|
|
@ -80,9 +91,24 @@ fn main() {
|
|||
}
|
||||
|
||||
eprintln!("\nLanguage: {}", language_name);
|
||||
parser.set_language(get_language(language_path)).unwrap();
|
||||
let language = get_language(language_path);
|
||||
parser.set_language(language).unwrap();
|
||||
|
||||
eprintln!(" Normal examples:");
|
||||
eprintln!(" Constructing Queries");
|
||||
for path in query_paths {
|
||||
if let Some(filter) = EXAMPLE_FILTER.as_ref() {
|
||||
if !path.to_str().unwrap().contains(filter.as_str()) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
parse(&path, max_path_length, |source| {
|
||||
Query::new(language, str::from_utf8(source).unwrap())
|
||||
.expect("Failed to parse query");
|
||||
});
|
||||
}
|
||||
|
||||
eprintln!(" Parsing Valid Code:");
|
||||
let mut normal_speeds = Vec::new();
|
||||
for example_path in example_paths {
|
||||
if let Some(filter) = EXAMPLE_FILTER.as_ref() {
|
||||
|
|
@ -91,12 +117,16 @@ fn main() {
|
|||
}
|
||||
}
|
||||
|
||||
normal_speeds.push(parse(&mut parser, example_path, max_path_length));
|
||||
normal_speeds.push(parse(example_path, max_path_length, |code| {
|
||||
parser.parse(code, None).expect("Failed to parse");
|
||||
}));
|
||||
}
|
||||
|
||||
eprintln!(" Error examples (mismatched languages):");
|
||||
eprintln!(" Parsing Invalid Code (mismatched languages):");
|
||||
let mut error_speeds = Vec::new();
|
||||
for (other_language_path, example_paths) in EXAMPLE_PATHS_BY_LANGUAGE_DIR.iter() {
|
||||
for (other_language_path, (example_paths, _)) in
|
||||
EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR.iter()
|
||||
{
|
||||
if other_language_path != language_path {
|
||||
for example_path in example_paths {
|
||||
if let Some(filter) = EXAMPLE_FILTER.as_ref() {
|
||||
|
|
@ -105,7 +135,9 @@ fn main() {
|
|||
}
|
||||
}
|
||||
|
||||
error_speeds.push(parse(&mut parser, example_path, max_path_length));
|
||||
error_speeds.push(parse(example_path, max_path_length, |code| {
|
||||
parser.parse(code, None).expect("Failed to parse");
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -124,7 +156,7 @@ fn main() {
|
|||
all_error_speeds.extend(error_speeds);
|
||||
}
|
||||
|
||||
eprintln!("\nOverall");
|
||||
eprintln!("\n Overall");
|
||||
if let Some((average_normal, worst_normal)) = aggregate(&all_normal_speeds) {
|
||||
eprintln!(" Average Speed (normal): {} bytes/ms", average_normal);
|
||||
eprintln!(" Worst Speed (normal): {} bytes/ms", worst_normal);
|
||||
|
|
@ -137,7 +169,7 @@ fn main() {
|
|||
eprintln!("");
|
||||
}
|
||||
|
||||
fn aggregate(speeds: &Vec<(usize)>) -> Option<(usize, usize)> {
|
||||
fn aggregate(speeds: &Vec<usize>) -> Option<(usize, usize)> {
|
||||
if speeds.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
|
@ -152,28 +184,25 @@ fn aggregate(speeds: &Vec<(usize)>) -> Option<(usize, usize)> {
|
|||
Some((total / speeds.len(), max))
|
||||
}
|
||||
|
||||
fn parse(parser: &mut Parser, example_path: &Path, max_path_length: usize) -> usize {
|
||||
fn parse(path: &Path, max_path_length: usize, mut action: impl FnMut(&[u8])) -> usize {
|
||||
eprint!(
|
||||
" {:width$}\t",
|
||||
example_path.file_name().unwrap().to_str().unwrap(),
|
||||
path.file_name().unwrap().to_str().unwrap(),
|
||||
width = max_path_length
|
||||
);
|
||||
|
||||
let source_code = fs::read(example_path)
|
||||
.map_err(Error::wrap(|| format!("Failed to read {:?}", example_path)))
|
||||
let source_code = fs::read(path)
|
||||
.map_err(Error::wrap(|| format!("Failed to read {:?}", path)))
|
||||
.unwrap();
|
||||
let time = Instant::now();
|
||||
for _ in 0..*REPETITION_COUNT {
|
||||
parser
|
||||
.parse(&source_code, None)
|
||||
.expect("Incompatible language version");
|
||||
action(&source_code);
|
||||
}
|
||||
let duration = time.elapsed() / (*REPETITION_COUNT as u32);
|
||||
let duration_ms =
|
||||
duration.as_secs() as f64 * 1000.0 + duration.subsec_nanos() as f64 / 1000000.0;
|
||||
let speed = (source_code.len() as f64 / duration_ms) as usize;
|
||||
let duration_ms = duration.as_millis();
|
||||
let speed = source_code.len() as u128 / (duration_ms + 1);
|
||||
eprintln!("time {} ms\tspeed {} bytes/ms", duration_ms as usize, speed);
|
||||
speed
|
||||
speed as usize
|
||||
}
|
||||
|
||||
fn get_language(path: &Path) -> Language {
|
||||
|
|
|
|||
16
cli/build.rs
16
cli/build.rs
|
|
@ -1,4 +1,4 @@
|
|||
use std::path::PathBuf;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::{env, fs};
|
||||
|
||||
fn main() {
|
||||
|
|
@ -6,12 +6,25 @@ fn main() {
|
|||
println!("cargo:rustc-env={}={}", "BUILD_SHA", git_sha);
|
||||
}
|
||||
|
||||
if wasm_files_present() {
|
||||
println!("cargo:rustc-cfg={}", "TREE_SITTER_EMBED_WASM_BINDING");
|
||||
}
|
||||
|
||||
println!(
|
||||
"cargo:rustc-env=BUILD_TARGET={}",
|
||||
std::env::var("TARGET").unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
fn wasm_files_present() -> bool {
|
||||
let paths = [
|
||||
"../lib/binding_web/tree-sitter.js",
|
||||
"../lib/binding_web/tree-sitter.wasm",
|
||||
];
|
||||
|
||||
paths.iter().all(|p| Path::new(p).exists())
|
||||
}
|
||||
|
||||
fn read_git_sha() -> Option<String> {
|
||||
let mut repo_path = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap());
|
||||
|
||||
|
|
@ -51,7 +64,6 @@ fn read_git_sha() -> Option<String> {
|
|||
}
|
||||
return fs::read_to_string(&ref_filename).ok();
|
||||
}
|
||||
|
||||
// If we're on a detached commit, then the `HEAD` file itself contains the sha.
|
||||
else if head_content.len() == 40 {
|
||||
return Some(head_content);
|
||||
|
|
|
|||
356
cli/npm/dsl.d.ts
vendored
Normal file
356
cli/npm/dsl.d.ts
vendored
Normal file
|
|
@ -0,0 +1,356 @@
|
|||
type AliasRule = {type: 'ALIAS'; named: boolean; content: Rule; value: string};
|
||||
type BlankRule = {type: 'BLANK'};
|
||||
type ChoiceRule = {type: 'CHOICE'; members: Rule[]};
|
||||
type FieldRule = {type: 'FIELD'; name: string; content: Rule};
|
||||
type ImmediateTokenRule = {type: 'IMMEDIATE_TOKEN'; content: Rule};
|
||||
type PatternRule = {type: 'PATTERN'; value: string};
|
||||
type PrecDynamicRule = {type: 'PREC_DYNAMIC'; content: Rule; value: number};
|
||||
type PrecLeftRule = {type: 'PREC_LEFT'; content: Rule; value: number};
|
||||
type PrecRightRule = {type: 'PREC_RIGHT'; content: Rule; value: number};
|
||||
type PrecRule = {type: 'PREC'; content: Rule; value: number};
|
||||
type Repeat1Rule = {type: 'REPEAT1'; content: Rule};
|
||||
type RepeatRule = {type: 'REPEAT'; content: Rule};
|
||||
type SeqRule = {type: 'SEQ'; members: Rule[]};
|
||||
type StringRule = {type: 'STRING'; value: string};
|
||||
type SymbolRule<Name extends string> = {type: 'SYMBOL'; name: Name};
|
||||
type TokenRule = {type: 'TOKEN'; content: Rule};
|
||||
|
||||
type Rule =
|
||||
| AliasRule
|
||||
| BlankRule
|
||||
| ChoiceRule
|
||||
| FieldRule
|
||||
| ImmediateTokenRule
|
||||
| PatternRule
|
||||
| PrecDynamicRule
|
||||
| PrecLeftRule
|
||||
| PrecRightRule
|
||||
| PrecRule
|
||||
| Repeat1Rule
|
||||
| RepeatRule
|
||||
| SeqRule
|
||||
| StringRule
|
||||
| SymbolRule<string>
|
||||
| TokenRule;
|
||||
|
||||
type RuleOrLiteral = Rule | RegExp | string;
|
||||
|
||||
type GrammarSymbols<RuleName extends string> = {
|
||||
[name in RuleName]: SymbolRule<name>;
|
||||
} &
|
||||
Record<string, SymbolRule<string>>;
|
||||
|
||||
type RuleBuilder<RuleName extends string> = (
|
||||
$: GrammarSymbols<RuleName>,
|
||||
) => RuleOrLiteral;
|
||||
|
||||
type RuleBuilders<
|
||||
RuleName extends string,
|
||||
BaseGrammarRuleName extends string
|
||||
> = {
|
||||
[name in RuleName]: RuleBuilder<RuleName | BaseGrammarRuleName>;
|
||||
};
|
||||
|
||||
interface Grammar<
|
||||
RuleName extends string,
|
||||
BaseGrammarRuleName extends string = never,
|
||||
Rules extends RuleBuilders<RuleName, BaseGrammarRuleName> = RuleBuilders<
|
||||
RuleName,
|
||||
BaseGrammarRuleName
|
||||
>
|
||||
> {
|
||||
/**
|
||||
* Name of the grammar language.
|
||||
*/
|
||||
name: string;
|
||||
|
||||
/** Mapping of grammar rule names to rule builder functions. */
|
||||
rules: Rules;
|
||||
|
||||
/**
|
||||
* An array of arrays of rule names. Each inner array represents a set of
|
||||
* rules that's involved in an _LR(1) conflict_ that is _intended to exist_
|
||||
* in the grammar. When these conflicts occur at runtime, Tree-sitter will
|
||||
* use the GLR algorithm to explore all of the possible interpretations. If
|
||||
* _multiple_ parses end up succeeding, Tree-sitter will pick the subtree
|
||||
* whose corresponding rule has the highest total _dynamic precedence_.
|
||||
*
|
||||
* @param $ grammar rules
|
||||
*/
|
||||
conflicts?: (
|
||||
$: GrammarSymbols<RuleName | BaseGrammarRuleName>,
|
||||
) => RuleOrLiteral[][];
|
||||
|
||||
/**
|
||||
* An array of token names which can be returned by an _external scanner_.
|
||||
* External scanners allow you to write custom C code which runs during the
|
||||
* lexing process in order to handle lexical rules (e.g. Python's indentation
|
||||
* tokens) that cannot be described by regular expressions.
|
||||
*
|
||||
* @param $ grammar rules
|
||||
* @param previous array of externals from the base schema, if any
|
||||
*
|
||||
* @see https://tree-sitter.github.io/tree-sitter/creating-parsers#external-scanners
|
||||
*/
|
||||
externals?: (
|
||||
$: Record<string, SymbolRule<string>>,
|
||||
previous: Rule[],
|
||||
) => SymbolRule<string>[];
|
||||
|
||||
/**
|
||||
* An array of tokens that may appear anywhere in the language. This
|
||||
* is often used for whitespace and comments. The default value of
|
||||
* extras is to accept whitespace. To control whitespace explicitly,
|
||||
* specify extras: `$ => []` in your grammar.
|
||||
*
|
||||
* @param $ grammar rules
|
||||
*/
|
||||
extras?: (
|
||||
$: GrammarSymbols<RuleName | BaseGrammarRuleName>,
|
||||
) => RuleOrLiteral[];
|
||||
|
||||
/**
|
||||
* An array of rules that should be automatically removed from the
|
||||
* grammar by replacing all of their usages with a copy of their definition.
|
||||
* This is useful for rules that are used in multiple places but for which
|
||||
* you don't want to create syntax tree nodes at runtime.
|
||||
*
|
||||
* @param $ grammar rules
|
||||
*/
|
||||
inline?: (
|
||||
$: GrammarSymbols<RuleName | BaseGrammarRuleName>,
|
||||
) => RuleOrLiteral[];
|
||||
|
||||
/**
|
||||
* A list of hidden rule names that should be considered supertypes in the
|
||||
* generated node types file.
|
||||
*
|
||||
* @param $ grammar rules
|
||||
*
|
||||
* @see http://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
|
||||
*/
|
||||
supertypes?: (
|
||||
$: GrammarSymbols<RuleName | BaseGrammarRuleName>,
|
||||
) => RuleOrLiteral[];
|
||||
|
||||
/**
|
||||
* The name of a token that will match keywords for the purpose of the
|
||||
* keyword extraction optimization.
|
||||
*
|
||||
* @param $ grammar rules
|
||||
*
|
||||
* @see https://tree-sitter.github.io/tree-sitter/creating-parsers#keyword-extraction
|
||||
*/
|
||||
word?: ($: GrammarSymbols<RuleName | BaseGrammarRuleName>) => RuleOrLiteral;
|
||||
}
|
||||
|
||||
type GrammarSchema<RuleName extends string> = {
|
||||
[K in keyof Grammar<RuleName>]: K extends 'rules'
|
||||
? Record<RuleName, Rule>
|
||||
: Grammar<RuleName>[K];
|
||||
};
|
||||
|
||||
/**
|
||||
* Causes the given rule to appear with an alternative name in the syntax tree.
|
||||
* For instance with `alias($.foo, 'bar')`, the aliased rule will appear as an
|
||||
* anonymous node, as if the rule had been written as the simple string.
|
||||
*
|
||||
* @param rule rule that will be aliased
|
||||
* @param name target name for the alias
|
||||
*/
|
||||
declare function alias(rule: RuleOrLiteral, name: string): AliasRule;
|
||||
|
||||
/**
|
||||
* Causes the given rule to appear as an alternative named node, for instance
|
||||
* with `alias($.foo, $.bar)`, the aliased rule `foo` will appear as a named
|
||||
* node called `bar`.
|
||||
*
|
||||
* @param rule rule that will be aliased
|
||||
* @param symbol target symbol for the alias
|
||||
*/
|
||||
declare function alias(
|
||||
rule: RuleOrLiteral,
|
||||
symbol: SymbolRule<string>,
|
||||
): AliasRule;
|
||||
|
||||
/**
|
||||
* Creates a blank rule, matching nothing.
|
||||
*/
|
||||
declare function blank(): BlankRule;
|
||||
|
||||
/**
|
||||
* Assigns a field name to the child node(s) matched by the given rule.
|
||||
* In the resulting syntax tree, you can then use that field name to
|
||||
* access specific children.
|
||||
*
|
||||
* @param name name of the field
|
||||
* @param rule rule the field should match
|
||||
*/
|
||||
declare function field(name: string, rule: RuleOrLiteral): FieldRule;
|
||||
|
||||
/**
|
||||
* Creates a rule that matches one of a set of possible rules. The order
|
||||
* of the arguments does not matter. This is analogous to the `|` (pipe)
|
||||
* operator in EBNF notation.
|
||||
*
|
||||
* @param options possible rule choices
|
||||
*/
|
||||
declare function choice(...options: RuleOrLiteral[]): ChoiceRule;
|
||||
|
||||
/**
|
||||
* Creates a rule that matches zero or one occurrence of a given rule.
|
||||
* It is analogous to the `[x]` (square bracket) syntax in EBNF notation.
|
||||
*
|
||||
* @param value rule to be made optional
|
||||
*/
|
||||
declare function optional(rule: RuleOrLiteral): ChoiceRule;
|
||||
|
||||
/**
|
||||
* Marks the given rule with a numerical precedence which will be used to
|
||||
* resolve LR(1) conflicts at parser-generation time. When two rules overlap
|
||||
* in a way that represents either a true ambiguity or a _local_ ambiguity
|
||||
* given one token of lookahead, Tree-sitter will try to resolve the conflict by
|
||||
* matching the rule with the higher precedence. The default precedence of all
|
||||
* rules is zero. This works similarly to the precedence directives in Yacc grammars.
|
||||
*
|
||||
* @param number precedence weight
|
||||
* @param rule rule being weighted
|
||||
*
|
||||
* @see https://en.wikipedia.org/wiki/LR_parser#Conflicts_in_the_constructed_tables
|
||||
* @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
|
||||
*/
|
||||
declare const prec: {
|
||||
(number: number, rule: RuleOrLiteral): PrecRule;
|
||||
|
||||
/**
|
||||
* Marks the given rule as left-associative (and optionally applies a
|
||||
* numerical precedence). When an LR(1) conflict arises in which all of the
|
||||
* rules have the same numerical precedence, Tree-sitter will consult the
|
||||
* rules' associativity. If there is a left-associative rule, Tree-sitter
|
||||
* will prefer matching a rule that ends _earlier_. This works similarly to
|
||||
* associativity directives in Yacc grammars.
|
||||
*
|
||||
* @param number (optional) precedence weight
|
||||
* @param rule rule to mark as left-associative
|
||||
*
|
||||
* @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
|
||||
*/
|
||||
left(rule: RuleOrLiteral): PrecLeftRule;
|
||||
left(number: number, rule: RuleOrLiteral): PrecLeftRule;
|
||||
|
||||
/**
|
||||
* Marks the given rule as right-associative (and optionally applies a
|
||||
* numerical precedence). When an LR(1) conflict arises in which all of the
|
||||
* rules have the same numerical precedence, Tree-sitter will consult the
|
||||
* rules' associativity. If there is a right-associative rule, Tree-sitter
|
||||
* will prefer matching a rule that ends _later_. This works similarly to
|
||||
* associativity directives in Yacc grammars.
|
||||
*
|
||||
* @param number (optional) precedence weight
|
||||
* @param rule rule to mark as right-associative
|
||||
*
|
||||
* @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
|
||||
*/
|
||||
right(rule: RuleOrLiteral): PrecRightRule;
|
||||
right(number: number, rule: RuleOrLiteral): PrecRightRule;
|
||||
|
||||
/**
|
||||
* Marks the given rule with a numerical precedence which will be used to
|
||||
* resolve LR(1) conflicts at _runtime_ instead of parser-generation time.
|
||||
* This is only necessary when handling a conflict dynamically using the
|
||||
* `conflicts` field in the grammar, and when there is a genuine _ambiguity_:
|
||||
* multiple rules correctly match a given piece of code. In that event,
|
||||
* Tree-sitter compares the total dynamic precedence associated with each
|
||||
* rule, and selects the one with the highest total. This is similar to
|
||||
* dynamic precedence directives in Bison grammars.
|
||||
*
|
||||
* @param number precedence weight
|
||||
* @param rule rule being weighted
|
||||
*
|
||||
* @see https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html
|
||||
*/
|
||||
dynamic(number: number, rule: RuleOrLiteral): PrecDynamicRule;
|
||||
};
|
||||
|
||||
/**
|
||||
* Creates a rule that matches _zero-or-more_ occurrences of a given rule.
|
||||
* It is analogous to the `{x}` (curly brace) syntax in EBNF notation. This
|
||||
* rule is implemented in terms of `repeat1` but is included because it
|
||||
* is very commonly used.
|
||||
*
|
||||
* @param rule rule to repeat, zero or more times
|
||||
*/
|
||||
declare function repeat(rule: RuleOrLiteral): RepeatRule;
|
||||
|
||||
/**
|
||||
* Creates a rule that matches one-or-more occurrences of a given rule.
|
||||
*
|
||||
* @param rule rule to repeat, one or more times
|
||||
*/
|
||||
declare function repeat1(rule: RuleOrLiteral): Repeat1Rule;
|
||||
|
||||
/**
|
||||
* Creates a rule that matches any number of other rules, one after another.
|
||||
* It is analogous to simply writing multiple symbols next to each other
|
||||
* in EBNF notation.
|
||||
*
|
||||
* @param rules ordered rules that comprise the sequence
|
||||
*/
|
||||
declare function seq(...rules: RuleOrLiteral[]): SeqRule;
|
||||
|
||||
/**
|
||||
* Creates a symbol rule, representing another rule in the grammar by name.
|
||||
*
|
||||
* @param name name of the target rule
|
||||
*/
|
||||
declare function sym<Name extends string>(name: Name): SymbolRule<Name>;
|
||||
|
||||
/**
|
||||
* Marks the given rule as producing only a single token. Tree-sitter's
|
||||
* default is to treat each String or RegExp literal in the grammar as a
|
||||
* separate token. Each token is matched separately by the lexer and
|
||||
* returned as its own leaf node in the tree. The token function allows
|
||||
* you to express a complex rule using the DSL functions (rather
|
||||
* than as a single regular expression) but still have Tree-sitter treat
|
||||
* it as a single token.
|
||||
*
|
||||
* @param rule rule to represent as a single token
|
||||
*/
|
||||
declare const token: {
|
||||
(rule: RuleOrLiteral): TokenRule;
|
||||
|
||||
/**
|
||||
* Marks the given rule as producing an immediate token. This allows
|
||||
* the parser to produce a different token based on whether or not
|
||||
* there are `extras` preceding the token's main content. When there
|
||||
* are _no_ leading `extras`, an immediate token is preferred over a
|
||||
* normal token which would otherwise match.
|
||||
*
|
||||
* @param rule rule to represent as an immediate token
|
||||
*/
|
||||
immediate(rule: RuleOrLiteral): ImmediateTokenRule;
|
||||
};
|
||||
|
||||
/**
|
||||
* Creates a new language grammar with the provided schema.
|
||||
*
|
||||
* @param options grammar options
|
||||
*/
|
||||
declare function grammar<RuleName extends string>(
|
||||
options: Grammar<RuleName>,
|
||||
): GrammarSchema<RuleName>;
|
||||
|
||||
/**
|
||||
* Extends an existing language grammar with the provided options,
|
||||
* creating a new language.
|
||||
*
|
||||
* @param baseGrammar base grammar schema to extend from
|
||||
* @param options grammar options for the new extended language
|
||||
*/
|
||||
declare function grammar<
|
||||
BaseGrammarRuleName extends string,
|
||||
RuleName extends string
|
||||
>(
|
||||
baseGrammar: GrammarSchema<BaseGrammarRuleName>,
|
||||
options: Grammar<RuleName, BaseGrammarRuleName>,
|
||||
): GrammarSchema<RuleName | BaseGrammarRuleName>;
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "tree-sitter-cli",
|
||||
"version": "0.15.8",
|
||||
"version": "0.17.3",
|
||||
"author": "Max Brunsfeld",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
use super::test_highlight;
|
||||
use std::fmt::Write;
|
||||
use std::io;
|
||||
use tree_sitter_highlight::PropertySheetError;
|
||||
use tree_sitter::{QueryError, QueryErrorKind};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Error(pub Vec<String>);
|
||||
|
|
@ -50,6 +51,34 @@ impl Error {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'a> From<(&str, QueryError)> for Error {
|
||||
fn from((path, error): (&str, QueryError)) -> Self {
|
||||
let mut msg = format!("Query error at {}:{}. ", path, error.row + 1);
|
||||
match error.kind {
|
||||
QueryErrorKind::Capture => write!(&mut msg, "Invalid capture name {}", error.message),
|
||||
QueryErrorKind::Field => write!(&mut msg, "Invalid field name {}", error.message),
|
||||
QueryErrorKind::NodeType => write!(&mut msg, "Invalid node type {}", error.message),
|
||||
QueryErrorKind::Syntax => write!(&mut msg, "Invalid syntax:\n{}", error.message),
|
||||
QueryErrorKind::Structure => write!(&mut msg, "Impossible pattern:\n{}", error.message),
|
||||
QueryErrorKind::Predicate => write!(&mut msg, "Invalid predicate: {}", error.message),
|
||||
}
|
||||
.unwrap();
|
||||
Self::new(msg)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<tree_sitter_highlight::Error> for Error {
|
||||
fn from(error: tree_sitter_highlight::Error) -> Self {
|
||||
Error::new(format!("{:?}", error))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<tree_sitter_tags::Error> for Error {
|
||||
fn from(error: tree_sitter_tags::Error) -> Self {
|
||||
Error::new(format!("{}", error))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<serde_json::Error> for Error {
|
||||
fn from(error: serde_json::Error) -> Self {
|
||||
Error::new(error.to_string())
|
||||
|
|
@ -62,8 +91,14 @@ impl From<io::Error> for Error {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<rsass::Error> for Error {
|
||||
fn from(error: rsass::Error) -> Self {
|
||||
impl From<glob::PatternError> for Error {
|
||||
fn from(error: glob::PatternError) -> Self {
|
||||
Error::new(error.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<glob::GlobError> for Error {
|
||||
fn from(error: glob::GlobError) -> Self {
|
||||
Error::new(error.to_string())
|
||||
}
|
||||
}
|
||||
|
|
@ -74,18 +109,14 @@ impl From<regex_syntax::ast::Error> for Error {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<test_highlight::Failure> for Error {
|
||||
fn from(error: test_highlight::Failure) -> Self {
|
||||
Error::new(error.message())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for Error {
|
||||
fn from(error: String) -> Self {
|
||||
Error::new(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PropertySheetError> for Error {
|
||||
fn from(error: PropertySheetError) -> Self {
|
||||
match error {
|
||||
PropertySheetError::InvalidFormat(e) => Self::from(e),
|
||||
PropertySheetError::InvalidRegex(e) => Self::regex(&e.to_string()),
|
||||
PropertySheetError::InvalidJSON(e) => Self::from(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ use super::coincident_tokens::CoincidentTokenIndex;
|
|||
use super::token_conflicts::TokenConflictMap;
|
||||
use crate::generate::dedup::split_state_id_groups;
|
||||
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
|
||||
use crate::generate::nfa::{CharacterSet, NfaCursor};
|
||||
use crate::generate::nfa::NfaCursor;
|
||||
use crate::generate::rules::{Symbol, TokenSet};
|
||||
use crate::generate::tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable};
|
||||
use log::info;
|
||||
|
|
@ -189,13 +189,10 @@ impl<'a> LexTableBuilder<'a> {
|
|||
// character that leads to the empty set of NFA states.
|
||||
if eof_valid {
|
||||
let (next_state_id, _) = self.add_state(Vec::new(), false);
|
||||
self.table.states[state_id].advance_actions.push((
|
||||
CharacterSet::empty().add_char('\0'),
|
||||
AdvanceAction {
|
||||
state: next_state_id,
|
||||
in_main_token: true,
|
||||
},
|
||||
));
|
||||
self.table.states[state_id].eof_action = Some(AdvanceAction {
|
||||
state: next_state_id,
|
||||
in_main_token: true,
|
||||
});
|
||||
}
|
||||
|
||||
for transition in transitions {
|
||||
|
|
@ -273,6 +270,7 @@ fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
|
|||
let signature = (
|
||||
i == 0,
|
||||
state.accept_action,
|
||||
state.eof_action.is_some(),
|
||||
state
|
||||
.advance_actions
|
||||
.iter()
|
||||
|
|
@ -320,6 +318,9 @@ fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
|
|||
for (_, advance_action) in new_state.advance_actions.iter_mut() {
|
||||
advance_action.state = group_ids_by_state_id[advance_action.state];
|
||||
}
|
||||
if let Some(eof_action) = &mut new_state.eof_action {
|
||||
eof_action.state = group_ids_by_state_id[eof_action.state];
|
||||
}
|
||||
new_states.push(new_state);
|
||||
}
|
||||
|
||||
|
|
@ -364,6 +365,9 @@ fn sort_states(table: &mut LexTable, parse_table: &mut ParseTable) {
|
|||
for (_, advance_action) in state.advance_actions.iter_mut() {
|
||||
advance_action.state = new_ids_by_old_id[advance_action.state];
|
||||
}
|
||||
if let Some(eof_action) = &mut state.eof_action {
|
||||
eof_action.state = new_ids_by_old_id[eof_action.state];
|
||||
}
|
||||
state
|
||||
})
|
||||
.collect();
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ use crate::generate::grammars::{
|
|||
use crate::generate::node_types::VariableInfo;
|
||||
use crate::generate::rules::{Associativity, Symbol, SymbolType, TokenSet};
|
||||
use crate::generate::tables::{
|
||||
FieldLocation, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
|
||||
FieldLocation, GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
|
||||
ProductionInfo, ProductionInfoId,
|
||||
};
|
||||
use core::ops::Range;
|
||||
|
|
@ -16,17 +16,19 @@ use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
|
|||
use std::fmt::Write;
|
||||
use std::u32;
|
||||
|
||||
// For conflict reporting, each parse state is associated with an example
|
||||
// sequence of symbols that could lead to that parse state.
|
||||
type SymbolSequence = Vec<Symbol>;
|
||||
|
||||
type AuxiliarySymbolSequence = Vec<AuxiliarySymbolInfo>;
|
||||
pub(crate) type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>);
|
||||
|
||||
#[derive(Clone)]
|
||||
struct AuxiliarySymbolInfo {
|
||||
auxiliary_symbol: Symbol,
|
||||
parent_symbols: Vec<Symbol>,
|
||||
}
|
||||
|
||||
type SymbolSequence = Vec<Symbol>;
|
||||
type AuxiliarySymbolSequence = Vec<AuxiliarySymbolInfo>;
|
||||
|
||||
pub(crate) type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>);
|
||||
|
||||
struct ParseStateQueueEntry {
|
||||
state_id: ParseStateId,
|
||||
preceding_auxiliary_symbols: AuxiliarySymbolSequence,
|
||||
|
|
@ -41,6 +43,7 @@ struct ParseTableBuilder<'a> {
|
|||
state_ids_by_item_set: HashMap<ParseItemSet<'a>, ParseStateId>,
|
||||
parse_state_info_by_id: Vec<ParseStateInfo<'a>>,
|
||||
parse_state_queue: VecDeque<ParseStateQueueEntry>,
|
||||
non_terminal_extra_states: Vec<(Symbol, usize)>,
|
||||
parse_table: ParseTable,
|
||||
}
|
||||
|
||||
|
|
@ -52,7 +55,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
.push(ProductionInfo::default());
|
||||
|
||||
// Add the error state at index 0.
|
||||
self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
|
||||
self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default(), false);
|
||||
|
||||
// Add the starting state at index 1.
|
||||
self.add_parse_state(
|
||||
|
|
@ -66,8 +69,40 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
.iter()
|
||||
.cloned(),
|
||||
),
|
||||
false,
|
||||
);
|
||||
|
||||
// Compute the possible item sets for non-terminal extras.
|
||||
let mut non_terminal_extra_item_sets_by_first_terminal = BTreeMap::new();
|
||||
for extra_non_terminal in self
|
||||
.syntax_grammar
|
||||
.extra_symbols
|
||||
.iter()
|
||||
.filter(|s| s.is_non_terminal())
|
||||
{
|
||||
let variable = &self.syntax_grammar.variables[extra_non_terminal.index];
|
||||
for production in &variable.productions {
|
||||
non_terminal_extra_item_sets_by_first_terminal
|
||||
.entry(production.first_symbol().unwrap())
|
||||
.or_insert(ParseItemSet::default())
|
||||
.insert(
|
||||
ParseItem {
|
||||
variable_index: extra_non_terminal.index as u32,
|
||||
production,
|
||||
step_index: 1,
|
||||
},
|
||||
&[Symbol::end()].iter().cloned().collect(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Add a state for each starting terminal of a non-terminal extra rule.
|
||||
for (terminal, item_set) in non_terminal_extra_item_sets_by_first_terminal {
|
||||
self.non_terminal_extra_states
|
||||
.push((terminal, self.parse_table.states.len()));
|
||||
self.add_parse_state(&Vec::new(), &Vec::new(), item_set, true);
|
||||
}
|
||||
|
||||
while let Some(entry) = self.parse_state_queue.pop_front() {
|
||||
let item_set = self
|
||||
.item_set_builder
|
||||
|
|
@ -91,9 +126,15 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
preceding_symbols: &SymbolSequence,
|
||||
preceding_auxiliary_symbols: &AuxiliarySymbolSequence,
|
||||
item_set: ParseItemSet<'a>,
|
||||
is_non_terminal_extra: bool,
|
||||
) -> ParseStateId {
|
||||
match self.state_ids_by_item_set.entry(item_set) {
|
||||
// If an equivalent item set has already been processed, then return
|
||||
// the existing parse state index.
|
||||
Entry::Occupied(o) => *o.get(),
|
||||
|
||||
// Otherwise, insert a new parse state and add it to the queue of
|
||||
// parse states to populate.
|
||||
Entry::Vacant(v) => {
|
||||
let core = v.key().core();
|
||||
let core_count = self.core_ids_by_core.len();
|
||||
|
|
@ -116,6 +157,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
terminal_entries: HashMap::new(),
|
||||
nonterminal_entries: HashMap::new(),
|
||||
core_id,
|
||||
is_non_terminal_extra,
|
||||
});
|
||||
self.parse_state_queue.push_back(ParseStateQueueEntry {
|
||||
state_id,
|
||||
|
|
@ -138,7 +180,12 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
let mut non_terminal_successors = BTreeMap::new();
|
||||
let mut lookaheads_with_conflicts = TokenSet::new();
|
||||
|
||||
// Each item in the item set contributes to either or a Shift action or a Reduce
|
||||
// action in this state.
|
||||
for (item, lookaheads) in &item_set.entries {
|
||||
// If the item is unfinished, then this state has a transition for the item's
|
||||
// next symbol. Advance the item to its next step and insert the resulting
|
||||
// item into the successor item set.
|
||||
if let Some(next_symbol) = item.symbol() {
|
||||
let successor = item.successor();
|
||||
if next_symbol.is_non_terminal() {
|
||||
|
|
@ -160,7 +207,10 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
.or_insert_with(|| ParseItemSet::default())
|
||||
.insert(successor, lookaheads);
|
||||
}
|
||||
} else {
|
||||
}
|
||||
// If the item is finished, then add a Reduce action to this state based
|
||||
// on this item.
|
||||
else {
|
||||
let action = if item.is_augmented() {
|
||||
ParseAction::Accept
|
||||
} else {
|
||||
|
|
@ -179,6 +229,10 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
.terminal_entries
|
||||
.entry(lookahead);
|
||||
let entry = entry.or_insert_with(|| ParseTableEntry::new());
|
||||
|
||||
// While inserting Reduce actions, eagerly resolve conflicts related
|
||||
// to precedence: avoid inserting lower-precedence reductions, and
|
||||
// clear the action list when inserting higher-precedence reductions.
|
||||
if entry.actions.is_empty() {
|
||||
entry.actions.push(action);
|
||||
} else if action.precedence() > entry.actions[0].precedence() {
|
||||
|
|
@ -193,12 +247,16 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
// Having computed the the successor item sets for each symbol, add a new
|
||||
// parse state for each of these item sets, and add a corresponding Shift
|
||||
// action to this state.
|
||||
for (symbol, next_item_set) in terminal_successors {
|
||||
preceding_symbols.push(symbol);
|
||||
let next_state_id = self.add_parse_state(
|
||||
&preceding_symbols,
|
||||
&preceding_auxiliary_symbols,
|
||||
next_item_set,
|
||||
self.parse_table.states[state_id].is_non_terminal_extra,
|
||||
);
|
||||
preceding_symbols.pop();
|
||||
|
||||
|
|
@ -226,13 +284,19 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
&preceding_symbols,
|
||||
&preceding_auxiliary_symbols,
|
||||
next_item_set,
|
||||
self.parse_table.states[state_id].is_non_terminal_extra,
|
||||
);
|
||||
preceding_symbols.pop();
|
||||
self.parse_table.states[state_id]
|
||||
.nonterminal_entries
|
||||
.insert(symbol, next_state_id);
|
||||
.insert(symbol, GotoAction::Goto(next_state_id));
|
||||
}
|
||||
|
||||
// For any symbol with multiple actions, perform conflict resolution.
|
||||
// This will either
|
||||
// * choose one action over the others using precedence or associativity
|
||||
// * keep multiple actions if this conflict has been whitelisted in the grammar
|
||||
// * fail, terminating the parser generation process
|
||||
for symbol in lookaheads_with_conflicts.iter() {
|
||||
self.handle_conflict(
|
||||
&item_set,
|
||||
|
|
@ -243,15 +307,50 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
)?;
|
||||
}
|
||||
|
||||
// Finally, add actions for the grammar's `extra` symbols.
|
||||
let state = &mut self.parse_table.states[state_id];
|
||||
for extra_token in &self.syntax_grammar.extra_tokens {
|
||||
state
|
||||
.terminal_entries
|
||||
.entry(*extra_token)
|
||||
.or_insert(ParseTableEntry {
|
||||
reusable: true,
|
||||
actions: vec![ParseAction::ShiftExtra],
|
||||
});
|
||||
let is_non_terminal_extra = state.is_non_terminal_extra;
|
||||
let is_end_of_non_terminal_extra =
|
||||
is_non_terminal_extra && state.terminal_entries.len() == 1;
|
||||
|
||||
// Add actions for the start tokens of each non-terminal extra rule.
|
||||
// These actions are added to every state except for the states that are
|
||||
// alread within non-terminal extras. Non-terminal extras are not allowed
|
||||
// to nest within each other.
|
||||
if !is_non_terminal_extra {
|
||||
for (terminal, state_id) in &self.non_terminal_extra_states {
|
||||
state
|
||||
.terminal_entries
|
||||
.entry(*terminal)
|
||||
.or_insert(ParseTableEntry {
|
||||
reusable: true,
|
||||
actions: vec![ParseAction::Shift {
|
||||
state: *state_id,
|
||||
is_repetition: false,
|
||||
}],
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Add ShiftExtra actions for the terminal extra tokens. These actions
|
||||
// are added to every state except for those at the ends of non-terminal
|
||||
// extras.
|
||||
if !is_end_of_non_terminal_extra {
|
||||
for extra_token in &self.syntax_grammar.extra_symbols {
|
||||
if extra_token.is_non_terminal() {
|
||||
state
|
||||
.nonterminal_entries
|
||||
.insert(*extra_token, GotoAction::ShiftExtra);
|
||||
} else {
|
||||
state
|
||||
.terminal_entries
|
||||
.entry(*extra_token)
|
||||
.or_insert(ParseTableEntry {
|
||||
reusable: true,
|
||||
actions: vec![ParseAction::ShiftExtra],
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
|
@ -362,8 +461,8 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
// If all reduce actions are left associative, remove the SHIFT action.
|
||||
// If all reduce actions are right associative, remove the REDUCE actions.
|
||||
// If all Reduce actions are left associative, remove the SHIFT action.
|
||||
// If all Reduce actions are right associative, remove the REDUCE actions.
|
||||
match (has_left, has_non, has_right) {
|
||||
(true, false, false) => {
|
||||
entry.actions.pop();
|
||||
|
|
@ -744,7 +843,7 @@ fn populate_following_tokens(
|
|||
}
|
||||
}
|
||||
}
|
||||
for extra in &grammar.extra_tokens {
|
||||
for extra in &grammar.extra_symbols {
|
||||
if extra.is_terminal() {
|
||||
for entry in result.iter_mut() {
|
||||
entry.insert(*extra);
|
||||
|
|
@ -774,6 +873,7 @@ pub(crate) fn build_parse_table<'a>(
|
|||
lexical_grammar,
|
||||
item_set_builder,
|
||||
variable_info,
|
||||
non_terminal_extra_states: Vec::new(),
|
||||
state_ids_by_item_set: HashMap::new(),
|
||||
core_ids_by_core: HashMap::new(),
|
||||
parse_state_info_by_id: Vec::new(),
|
||||
|
|
|
|||
|
|
@ -2,7 +2,9 @@ use super::token_conflicts::TokenConflictMap;
|
|||
use crate::generate::dedup::split_state_id_groups;
|
||||
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
|
||||
use crate::generate::rules::{AliasMap, Symbol, TokenSet};
|
||||
use crate::generate::tables::{ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry};
|
||||
use crate::generate::tables::{
|
||||
GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
|
||||
};
|
||||
use log::info;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::mem;
|
||||
|
|
@ -66,6 +68,7 @@ impl<'a> Minimizer<'a> {
|
|||
..
|
||||
} => {
|
||||
if !self.simple_aliases.contains_key(&symbol)
|
||||
&& !self.syntax_grammar.supertype_symbols.contains(&symbol)
|
||||
&& !aliased_symbols.contains(&symbol)
|
||||
&& self.syntax_grammar.variables[symbol.index].kind
|
||||
!= VariableType::Named
|
||||
|
|
@ -101,7 +104,10 @@ impl<'a> Minimizer<'a> {
|
|||
state.update_referenced_states(|other_state_id, state| {
|
||||
if let Some(symbol) = unit_reduction_symbols_by_state.get(&other_state_id) {
|
||||
done = false;
|
||||
state.nonterminal_entries[symbol]
|
||||
match state.nonterminal_entries.get(symbol) {
|
||||
Some(GotoAction::Goto(state_id)) => *state_id,
|
||||
_ => other_state_id,
|
||||
}
|
||||
} else {
|
||||
other_state_id
|
||||
}
|
||||
|
|
@ -194,6 +200,9 @@ impl<'a> Minimizer<'a> {
|
|||
right_state: &ParseState,
|
||||
group_ids_by_state_id: &Vec<ParseStateId>,
|
||||
) -> bool {
|
||||
if left_state.is_non_terminal_extra != right_state.is_non_terminal_extra {
|
||||
return true;
|
||||
}
|
||||
for (token, left_entry) in &left_state.terminal_entries {
|
||||
if let Some(right_entry) = right_state.terminal_entries.get(token) {
|
||||
if self.entries_conflict(
|
||||
|
|
@ -262,18 +271,24 @@ impl<'a> Minimizer<'a> {
|
|||
|
||||
for (symbol, s1) in &state1.nonterminal_entries {
|
||||
if let Some(s2) = state2.nonterminal_entries.get(symbol) {
|
||||
let group1 = group_ids_by_state_id[*s1];
|
||||
let group2 = group_ids_by_state_id[*s2];
|
||||
if group1 != group2 {
|
||||
info!(
|
||||
"split states {} {} - successors for {} are split: {} {}",
|
||||
state1.id,
|
||||
state2.id,
|
||||
self.symbol_name(symbol),
|
||||
s1,
|
||||
s2,
|
||||
);
|
||||
return true;
|
||||
match (s1, s2) {
|
||||
(GotoAction::ShiftExtra, GotoAction::ShiftExtra) => continue,
|
||||
(GotoAction::Goto(s1), GotoAction::Goto(s2)) => {
|
||||
let group1 = group_ids_by_state_id[*s1];
|
||||
let group2 = group_ids_by_state_id[*s2];
|
||||
if group1 != group2 {
|
||||
info!(
|
||||
"split states {} {} - successors for {} are split: {} {}",
|
||||
state1.id,
|
||||
state2.id,
|
||||
self.symbol_name(symbol),
|
||||
s1,
|
||||
s2,
|
||||
);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
_ => return true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -271,6 +271,7 @@ fn identify_keywords(
|
|||
cursor.reset(vec![variable.start_state]);
|
||||
if all_chars_are_alphabetical(&cursor)
|
||||
&& token_conflict_map.does_match_same_string(i, word_token.index)
|
||||
&& !token_conflict_map.does_match_different_string(i, word_token.index)
|
||||
{
|
||||
info!(
|
||||
"Keywords - add candidate {}",
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
use crate::generate::build_tables::item::{TokenSetDisplay};
|
||||
use crate::generate::build_tables::item::TokenSetDisplay;
|
||||
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
|
||||
use crate::generate::nfa::{CharacterSet, NfaCursor, NfaTransition};
|
||||
use crate::generate::rules::TokenSet;
|
||||
use std::collections::HashSet;
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::HashSet;
|
||||
use std::fmt;
|
||||
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||
|
|
@ -13,6 +13,7 @@ struct TokenConflictStatus {
|
|||
does_match_valid_continuation: bool,
|
||||
does_match_separators: bool,
|
||||
matches_same_string: bool,
|
||||
matches_different_string: bool,
|
||||
}
|
||||
|
||||
pub(crate) struct TokenConflictMap<'a> {
|
||||
|
|
@ -25,6 +26,12 @@ pub(crate) struct TokenConflictMap<'a> {
|
|||
}
|
||||
|
||||
impl<'a> TokenConflictMap<'a> {
|
||||
/// Create a token conflict map based on a lexical grammar, which describes the structure
|
||||
/// each token, and a `following_token` map, which indicates which tokens may be appear
|
||||
/// immediately after each other token.
|
||||
///
|
||||
/// This analyzes the possible kinds of overlap between each pair of tokens and stores
|
||||
/// them in a matrix.
|
||||
pub fn new(grammar: &'a LexicalGrammar, following_tokens: Vec<TokenSet>) -> Self {
|
||||
let mut cursor = NfaCursor::new(&grammar.nfa, Vec::new());
|
||||
let starting_chars = get_starting_chars(&mut cursor, grammar);
|
||||
|
|
@ -50,12 +57,21 @@ impl<'a> TokenConflictMap<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Does token `i` match any strings that token `j` also matches, such that token `i`
|
||||
/// is preferred over token `j`?
|
||||
pub fn has_same_conflict_status(&self, a: usize, b: usize, other: usize) -> bool {
|
||||
let left = &self.status_matrix[matrix_index(self.n, a, other)];
|
||||
let right = &self.status_matrix[matrix_index(self.n, b, other)];
|
||||
left == right
|
||||
}
|
||||
|
||||
/// Does token `i` match any strings that token `j` does *not* match?
|
||||
pub fn does_match_different_string(&self, i: usize, j: usize) -> bool {
|
||||
self.status_matrix[matrix_index(self.n, i, j)].matches_different_string
|
||||
}
|
||||
|
||||
/// Does token `i` match any strings that token `j` also matches, where
|
||||
/// token `i` is preferred over token `j`?
|
||||
pub fn does_match_same_string(&self, i: usize, j: usize) -> bool {
|
||||
self.status_matrix[matrix_index(self.n, i, j)].matches_same_string
|
||||
}
|
||||
|
|
@ -67,6 +83,7 @@ impl<'a> TokenConflictMap<'a> {
|
|||
|| entry.matches_same_string
|
||||
}
|
||||
|
||||
/// Does token `i` match any strings that are *prefixes* of strings matched by `j`?
|
||||
pub fn does_match_prefix(&self, i: usize, j: usize) -> bool {
|
||||
self.status_matrix[matrix_index(self.n, i, j)].matches_prefix
|
||||
}
|
||||
|
|
@ -239,19 +256,29 @@ fn compute_conflict_status(
|
|||
);
|
||||
|
||||
while let Some(state_set) = state_set_queue.pop() {
|
||||
// Don't pursue states where there's no potential for conflict.
|
||||
if grammar.variable_indices_for_nfa_states(&state_set).count() > 1 {
|
||||
cursor.reset(state_set);
|
||||
} else {
|
||||
let mut live_variable_indices = grammar.variable_indices_for_nfa_states(&state_set);
|
||||
|
||||
// If only one of the two tokens could possibly match from this state, then
|
||||
// there is no reason to analyze any of its successors. Just record the fact
|
||||
// that the token matches a string that the other token does not match.
|
||||
let first_live_variable_index = live_variable_indices.next().unwrap();
|
||||
if live_variable_indices.count() == 0 {
|
||||
if first_live_variable_index == i {
|
||||
result.0.matches_different_string = true;
|
||||
} else {
|
||||
result.1.matches_different_string = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
let has_sep = cursor.transition_chars().any(|(_, sep)| sep);
|
||||
// Don't pursue states where there's no potential for conflict.
|
||||
cursor.reset(state_set);
|
||||
let within_separator = cursor.transition_chars().any(|(_, sep)| sep);
|
||||
|
||||
// Examine each possible completed token in this state.
|
||||
let mut completion = None;
|
||||
for (id, precedence) in cursor.completions() {
|
||||
if has_sep {
|
||||
if within_separator {
|
||||
if id == i {
|
||||
result.0.does_match_separators = true;
|
||||
} else {
|
||||
|
|
@ -316,7 +343,7 @@ fn compute_conflict_status(
|
|||
&transition,
|
||||
completed_id,
|
||||
completed_precedence,
|
||||
has_sep,
|
||||
within_separator,
|
||||
) {
|
||||
can_advance = true;
|
||||
if advanced_id == i {
|
||||
|
|
|
|||
|
|
@ -292,7 +292,12 @@ function grammar(baseGrammar, options) {
|
|||
|
||||
extras = options.extras
|
||||
.call(ruleBuilder, ruleBuilder, baseGrammar.extras)
|
||||
.map(normalize);
|
||||
|
||||
if (!Array.isArray(extras)) {
|
||||
throw new Error("Grammar's 'extras' function must return an array.")
|
||||
}
|
||||
|
||||
extras = extras.map(normalize);
|
||||
}
|
||||
|
||||
let word = baseGrammar.word;
|
||||
|
|
|
|||
|
|
@ -1,15 +1,15 @@
|
|||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"title": "tree-sitter grammar specification",
|
||||
"type": "object",
|
||||
|
||||
"required": [
|
||||
"name",
|
||||
"rules"
|
||||
],
|
||||
"required": ["name", "rules"],
|
||||
|
||||
"additionalProperties": false,
|
||||
|
||||
"properties": {
|
||||
"name": {
|
||||
"description": "the name of the grammar",
|
||||
"type": "string",
|
||||
"pattern": "^[a-zA-Z_]\\w*"
|
||||
},
|
||||
|
|
@ -60,6 +60,15 @@
|
|||
"word": {
|
||||
"type": "string",
|
||||
"pattern": "^[a-zA-Z_]\\w*"
|
||||
},
|
||||
|
||||
"supertypes": {
|
||||
"description": "A list of hidden rule names that should be considered supertypes in the generated node types file. See http://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types.",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"description": "the name of a rule in `rules` or `extras`",
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
|
|
@ -96,20 +105,19 @@
|
|||
"type": "string",
|
||||
"pattern": "^PATTERN$"
|
||||
},
|
||||
"value": {"type": "string"}
|
||||
"value": { "type": "string" }
|
||||
},
|
||||
"required": ["type", "value"]
|
||||
},
|
||||
|
||||
"symbol-rule": {
|
||||
"required": ["name"],
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"pattern": "^SYMBOL$"
|
||||
},
|
||||
"name": {"type": "string"}
|
||||
"name": { "type": "string" }
|
||||
},
|
||||
"required": ["type", "name"]
|
||||
},
|
||||
|
|
@ -210,6 +218,20 @@
|
|||
"required": ["type", "content"]
|
||||
},
|
||||
|
||||
"field-rule": {
|
||||
"properties": {
|
||||
"name": { "type": "string" },
|
||||
"type": {
|
||||
"type": "string",
|
||||
"pattern": "^FIELD$"
|
||||
},
|
||||
"content": {
|
||||
"$ref": "#/definitions/rule"
|
||||
}
|
||||
},
|
||||
"required": ["name", "type", "content"]
|
||||
},
|
||||
|
||||
"prec-rule": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
@ -239,6 +261,7 @@
|
|||
{ "$ref": "#/definitions/repeat1-rule" },
|
||||
{ "$ref": "#/definitions/repeat-rule" },
|
||||
{ "$ref": "#/definitions/token-rule" },
|
||||
{ "$ref": "#/definitions/field-rule" },
|
||||
{ "$ref": "#/definitions/prec-rule" }
|
||||
]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ pub(crate) struct Variable {
|
|||
pub(crate) struct InputGrammar {
|
||||
pub name: String,
|
||||
pub variables: Vec<Variable>,
|
||||
pub extra_tokens: Vec<Rule>,
|
||||
pub extra_symbols: Vec<Rule>,
|
||||
pub expected_conflicts: Vec<Vec<String>>,
|
||||
pub external_tokens: Vec<Rule>,
|
||||
pub variables_to_inline: Vec<String>,
|
||||
|
|
@ -87,7 +87,7 @@ pub(crate) struct ExternalToken {
|
|||
#[derive(Debug, Default)]
|
||||
pub(crate) struct SyntaxGrammar {
|
||||
pub variables: Vec<SyntaxVariable>,
|
||||
pub extra_tokens: Vec<Symbol>,
|
||||
pub extra_symbols: Vec<Symbol>,
|
||||
pub expected_conflicts: Vec<Vec<Symbol>>,
|
||||
pub external_tokens: Vec<ExternalToken>,
|
||||
pub supertype_symbols: Vec<Symbol>,
|
||||
|
|
|
|||
|
|
@ -6,13 +6,12 @@ mod node_types;
|
|||
mod npm_files;
|
||||
pub mod parse_grammar;
|
||||
mod prepare_grammar;
|
||||
pub mod properties;
|
||||
mod render;
|
||||
mod rules;
|
||||
mod tables;
|
||||
|
||||
use self::build_tables::build_tables;
|
||||
use self::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType};
|
||||
use self::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
|
||||
use self::parse_grammar::parse_grammar;
|
||||
use self::prepare_grammar::prepare_grammar;
|
||||
use self::render::render_c_code;
|
||||
|
|
@ -20,9 +19,8 @@ use self::rules::AliasMap;
|
|||
use crate::error::{Error, Result};
|
||||
use lazy_static::lazy_static;
|
||||
use regex::{Regex, RegexBuilder};
|
||||
use std::collections::HashSet;
|
||||
use std::fs::{self, File};
|
||||
use std::io::{BufWriter, Write};
|
||||
use std::fs;
|
||||
use std::io::Write;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::{Command, Stdio};
|
||||
|
||||
|
|
@ -33,15 +31,9 @@ lazy_static! {
|
|||
.unwrap();
|
||||
}
|
||||
|
||||
const NEW_HEADER_PARTS: [&'static str; 2] = [
|
||||
"
|
||||
uint32_t large_state_count;
|
||||
const uint16_t *small_parse_table;
|
||||
const uint32_t *small_parse_table_map;",
|
||||
"
|
||||
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
|
||||
",
|
||||
];
|
||||
const NEW_HEADER_PARTS: &[&'static str] = &["
|
||||
const uint16_t *alias_map;
|
||||
uint32_t state_count;"];
|
||||
|
||||
struct GeneratedParser {
|
||||
c_code: String,
|
||||
|
|
@ -51,13 +43,11 @@ struct GeneratedParser {
|
|||
pub fn generate_parser_in_directory(
|
||||
repo_path: &PathBuf,
|
||||
grammar_path: Option<&str>,
|
||||
properties_only: bool,
|
||||
next_abi: bool,
|
||||
report_symbol_name: Option<&str>,
|
||||
) -> Result<()> {
|
||||
let src_path = repo_path.join("src");
|
||||
let header_path = src_path.join("tree_sitter");
|
||||
let properties_dir_path = repo_path.join("properties");
|
||||
|
||||
// Ensure that the output directories exist.
|
||||
fs::create_dir_all(&src_path)?;
|
||||
|
|
@ -82,71 +72,48 @@ pub fn generate_parser_in_directory(
|
|||
prepare_grammar(&input_grammar)?;
|
||||
let language_name = input_grammar.name;
|
||||
|
||||
// If run with no arguments, read all of the property sheets and compile them to JSON.
|
||||
if grammar_path.is_none() {
|
||||
let token_names = get_token_names(&syntax_grammar, &lexical_grammar);
|
||||
if let Ok(entries) = fs::read_dir(properties_dir_path) {
|
||||
for entry in entries {
|
||||
let css_path = entry?.path();
|
||||
let css = fs::read_to_string(&css_path)?;
|
||||
let sheet = properties::generate_property_sheet(&css_path, &css, &token_names)?;
|
||||
let property_sheet_json_path = src_path
|
||||
.join(css_path.file_name().unwrap())
|
||||
.with_extension("json");
|
||||
let property_sheet_json_file =
|
||||
File::create(&property_sheet_json_path).map_err(Error::wrap(|| {
|
||||
format!("Failed to create {:?}", property_sheet_json_path)
|
||||
}))?;
|
||||
let mut writer = BufWriter::new(property_sheet_json_file);
|
||||
serde_json::to_writer_pretty(&mut writer, &sheet)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Generate the parser and related files.
|
||||
if !properties_only {
|
||||
let GeneratedParser {
|
||||
c_code,
|
||||
node_types_json,
|
||||
} = generate_parser_for_grammar_with_opts(
|
||||
&language_name,
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
inlines,
|
||||
simple_aliases,
|
||||
next_abi,
|
||||
report_symbol_name,
|
||||
)?;
|
||||
let GeneratedParser {
|
||||
c_code,
|
||||
node_types_json,
|
||||
} = generate_parser_for_grammar_with_opts(
|
||||
&language_name,
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
inlines,
|
||||
simple_aliases,
|
||||
next_abi,
|
||||
report_symbol_name,
|
||||
)?;
|
||||
|
||||
write_file(&src_path.join("parser.c"), c_code)?;
|
||||
write_file(&src_path.join("node-types.json"), node_types_json)?;
|
||||
write_file(&src_path.join("parser.c"), c_code)?;
|
||||
write_file(&src_path.join("node-types.json"), node_types_json)?;
|
||||
|
||||
if next_abi {
|
||||
write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?;
|
||||
} else {
|
||||
let mut header = tree_sitter::PARSER_HEADER.to_string();
|
||||
if next_abi {
|
||||
write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?;
|
||||
} else {
|
||||
let mut header = tree_sitter::PARSER_HEADER.to_string();
|
||||
|
||||
for part in &NEW_HEADER_PARTS {
|
||||
let pos = header
|
||||
.find(part)
|
||||
.expect("Missing expected part of parser.h header");
|
||||
header.replace_range(pos..(pos + part.len()), "");
|
||||
}
|
||||
|
||||
write_file(&header_path.join("parser.h"), header)?;
|
||||
for part in NEW_HEADER_PARTS.iter() {
|
||||
let pos = header
|
||||
.find(part)
|
||||
.expect("Missing expected part of parser.h header");
|
||||
header.replace_range(pos..(pos + part.len()), "");
|
||||
}
|
||||
|
||||
ensure_file(&repo_path.join("index.js"), || {
|
||||
npm_files::index_js(&language_name)
|
||||
})?;
|
||||
ensure_file(&src_path.join("binding.cc"), || {
|
||||
npm_files::binding_cc(&language_name)
|
||||
})?;
|
||||
ensure_file(&repo_path.join("binding.gyp"), || {
|
||||
npm_files::binding_gyp(&language_name)
|
||||
})?;
|
||||
write_file(&header_path.join("parser.h"), header)?;
|
||||
}
|
||||
|
||||
ensure_file(&repo_path.join("index.js"), || {
|
||||
npm_files::index_js(&language_name)
|
||||
})?;
|
||||
ensure_file(&src_path.join("binding.cc"), || {
|
||||
npm_files::binding_cc(&language_name)
|
||||
})?;
|
||||
ensure_file(&repo_path.join("binding.gyp"), || {
|
||||
npm_files::binding_gyp(&language_name)
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
@ -176,7 +143,8 @@ fn generate_parser_for_grammar_with_opts(
|
|||
next_abi: bool,
|
||||
report_symbol_name: Option<&str>,
|
||||
) -> Result<GeneratedParser> {
|
||||
let variable_info = node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &inlines)?;
|
||||
let variable_info =
|
||||
node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases)?;
|
||||
let node_types_json = node_types::generate_node_types_json(
|
||||
&syntax_grammar,
|
||||
&lexical_grammar,
|
||||
|
|
@ -208,35 +176,6 @@ fn generate_parser_for_grammar_with_opts(
|
|||
})
|
||||
}
|
||||
|
||||
fn get_token_names(
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
) -> HashSet<String> {
|
||||
let mut result = HashSet::new();
|
||||
for variable in &lexical_grammar.variables {
|
||||
if variable.kind == VariableType::Named {
|
||||
result.insert(variable.name.clone());
|
||||
}
|
||||
}
|
||||
for token in &syntax_grammar.external_tokens {
|
||||
if token.kind == VariableType::Named {
|
||||
result.insert(token.name.clone());
|
||||
}
|
||||
}
|
||||
for variable in &syntax_grammar.variables {
|
||||
for production in &variable.productions {
|
||||
for step in &production.steps {
|
||||
if let Some(alias) = &step.alias {
|
||||
if !step.symbol.is_non_terminal() && alias.is_named {
|
||||
result.insert(alias.value.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn load_grammar_file(grammar_path: &Path) -> Result<String> {
|
||||
match grammar_path.extension().and_then(|e| e.to_str()) {
|
||||
Some("js") => Ok(load_js_grammar_file(grammar_path)?),
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
use std::char;
|
||||
use std::cmp::max;
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::HashSet;
|
||||
use std::fmt;
|
||||
use std::mem::swap;
|
||||
use std::ops::Range;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum CharacterSet {
|
||||
|
|
@ -178,6 +180,40 @@ impl CharacterSet {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn ranges<'a>(
|
||||
chars: &'a Vec<char>,
|
||||
ruled_out_characters: &'a HashSet<u32>,
|
||||
) -> impl Iterator<Item = Range<char>> + 'a {
|
||||
let mut prev_range: Option<Range<char>> = None;
|
||||
chars
|
||||
.iter()
|
||||
.map(|c| (*c, false))
|
||||
.chain(Some(('\0', true)))
|
||||
.filter_map(move |(c, done)| {
|
||||
if done {
|
||||
return prev_range.clone();
|
||||
}
|
||||
if ruled_out_characters.contains(&(c as u32)) {
|
||||
return None;
|
||||
}
|
||||
if let Some(range) = prev_range.clone() {
|
||||
let mut prev_range_successor = range.end as u32 + 1;
|
||||
while prev_range_successor < c as u32 {
|
||||
if !ruled_out_characters.contains(&prev_range_successor) {
|
||||
prev_range = Some(c..c);
|
||||
return Some(range);
|
||||
}
|
||||
prev_range_successor += 1;
|
||||
}
|
||||
prev_range = Some(range.start..c);
|
||||
None
|
||||
} else {
|
||||
prev_range = Some(c..c);
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn contains(&self, c: char) -> bool {
|
||||
match self {
|
||||
|
|
@ -266,6 +302,13 @@ fn compare_chars(left: &Vec<char>, right: &Vec<char>) -> SetComparision {
|
|||
result.common = true;
|
||||
}
|
||||
}
|
||||
|
||||
match (i, j) {
|
||||
(Some(_), _) => result.left_only = true,
|
||||
(_, Some(_)) => result.right_only = true,
|
||||
_ => {}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
|
|
@ -718,7 +761,7 @@ mod tests {
|
|||
.add_range('d', 'e')
|
||||
);
|
||||
|
||||
// A whitelist and an intersecting blacklist.
|
||||
// An inclusion and an intersecting exclusion.
|
||||
// Both sets contain 'e', 'f', and 'm'
|
||||
let mut a = CharacterSet::empty()
|
||||
.add_range('c', 'h')
|
||||
|
|
@ -748,7 +791,7 @@ mod tests {
|
|||
assert_eq!(a, CharacterSet::Include(vec!['c', 'd', 'g', 'h', 'k', 'l']));
|
||||
assert_eq!(b, CharacterSet::empty().add_range('a', 'm').negate());
|
||||
|
||||
// A blacklist and an overlapping blacklist.
|
||||
// An exclusion and an overlapping inclusion.
|
||||
// Both sets exclude 'c', 'd', and 'e'
|
||||
let mut a = CharacterSet::empty().add_range('a', 'e').negate();
|
||||
let mut b = CharacterSet::empty().add_range('c', 'h').negate();
|
||||
|
|
@ -759,7 +802,7 @@ mod tests {
|
|||
assert_eq!(a, CharacterSet::Include(vec!['f', 'g', 'h']));
|
||||
assert_eq!(b, CharacterSet::Include(vec!['a', 'b']));
|
||||
|
||||
// A blacklist and a larger blacklist.
|
||||
// An exclusion and a larger exclusion.
|
||||
let mut a = CharacterSet::empty().add_range('b', 'c').negate();
|
||||
let mut b = CharacterSet::empty().add_range('a', 'd').negate();
|
||||
assert_eq!(
|
||||
|
|
@ -810,5 +853,53 @@ mod tests {
|
|||
);
|
||||
assert!(a.does_intersect(&b));
|
||||
assert!(b.does_intersect(&a));
|
||||
|
||||
let (a, b) = (
|
||||
CharacterSet::Include(vec!['c']),
|
||||
CharacterSet::Exclude(vec!['a']),
|
||||
);
|
||||
assert!(a.does_intersect(&b));
|
||||
assert!(b.does_intersect(&a));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_character_set_get_ranges() {
|
||||
struct Row {
|
||||
chars: Vec<char>,
|
||||
ruled_out_chars: Vec<char>,
|
||||
expected_ranges: Vec<Range<char>>,
|
||||
}
|
||||
|
||||
let table = [
|
||||
Row {
|
||||
chars: vec!['a'],
|
||||
ruled_out_chars: vec![],
|
||||
expected_ranges: vec!['a'..'a'],
|
||||
},
|
||||
Row {
|
||||
chars: vec!['a', 'b', 'c', 'e', 'z'],
|
||||
ruled_out_chars: vec![],
|
||||
expected_ranges: vec!['a'..'c', 'e'..'e', 'z'..'z'],
|
||||
},
|
||||
Row {
|
||||
chars: vec!['a', 'b', 'c', 'e', 'h', 'z'],
|
||||
ruled_out_chars: vec!['d', 'f', 'g'],
|
||||
expected_ranges: vec!['a'..'h', 'z'..'z'],
|
||||
},
|
||||
];
|
||||
|
||||
for Row {
|
||||
chars,
|
||||
ruled_out_chars,
|
||||
expected_ranges,
|
||||
} in table.iter()
|
||||
{
|
||||
let ruled_out_chars = ruled_out_chars
|
||||
.into_iter()
|
||||
.map(|c: &char| *c as u32)
|
||||
.collect();
|
||||
let ranges = CharacterSet::ranges(chars, &ruled_out_chars).collect::<Vec<_>>();
|
||||
assert_eq!(ranges, *expected_ranges);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -87,7 +87,7 @@ pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
|
|||
})
|
||||
}
|
||||
|
||||
let extra_tokens = grammar_json
|
||||
let extra_symbols = grammar_json
|
||||
.extras
|
||||
.unwrap_or(Vec::new())
|
||||
.into_iter()
|
||||
|
|
@ -107,7 +107,7 @@ pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
|
|||
name: grammar_json.name,
|
||||
word_token: grammar_json.word,
|
||||
variables,
|
||||
extra_tokens,
|
||||
extra_symbols,
|
||||
expected_conflicts,
|
||||
external_tokens,
|
||||
supertype_symbols,
|
||||
|
|
|
|||
|
|
@ -283,7 +283,7 @@ mod tests {
|
|||
fn build_grammar(variables: Vec<Variable>) -> ExtractedSyntaxGrammar {
|
||||
ExtractedSyntaxGrammar {
|
||||
variables,
|
||||
extra_tokens: Vec::new(),
|
||||
extra_symbols: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
expected_conflicts: Vec::new(),
|
||||
variables_to_inline: Vec::new(),
|
||||
|
|
|
|||
293
cli/src/generate/prepare_grammar/extract_default_aliases.rs
Normal file
293
cli/src/generate/prepare_grammar/extract_default_aliases.rs
Normal file
|
|
@ -0,0 +1,293 @@
|
|||
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
|
||||
use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType};
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
struct SymbolStatus {
|
||||
aliases: Vec<(Alias, usize)>,
|
||||
appears_unaliased: bool,
|
||||
}
|
||||
|
||||
// Update the grammar by finding symbols that always are aliased, and for each such symbol,
|
||||
// promoting one of its aliases to a "default alias", which is applied globally instead
|
||||
// of in a context-specific way.
|
||||
//
|
||||
// This has two benefits:
|
||||
// * It reduces the overhead of storing production-specific alias info in the parse table.
|
||||
// * Within an `ERROR` node, no context-specific aliases will be applied. This transformation
|
||||
// ensures that the children of an `ERROR` node have symbols that are consistent with the
|
||||
// way that they would appear in a valid syntax tree.
|
||||
pub(super) fn extract_default_aliases(
|
||||
syntax_grammar: &mut SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
) -> AliasMap {
|
||||
let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()];
|
||||
let mut non_terminal_status_list =
|
||||
vec![SymbolStatus::default(); syntax_grammar.variables.len()];
|
||||
let mut external_status_list =
|
||||
vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()];
|
||||
|
||||
// For each grammar symbol, find all of the aliases under which the symbol appears,
|
||||
// and determine whether or not the symbol ever appears *unaliased*.
|
||||
for variable in syntax_grammar.variables.iter() {
|
||||
for production in variable.productions.iter() {
|
||||
for step in production.steps.iter() {
|
||||
let mut status = match step.symbol.kind {
|
||||
SymbolType::External => &mut external_status_list[step.symbol.index],
|
||||
SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index],
|
||||
SymbolType::Terminal => &mut terminal_status_list[step.symbol.index],
|
||||
SymbolType::End => panic!("Unexpected end token"),
|
||||
};
|
||||
|
||||
// Default aliases don't work for inlined variables.
|
||||
if syntax_grammar.variables_to_inline.contains(&step.symbol) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(alias) = &step.alias {
|
||||
if let Some(count_for_alias) = status
|
||||
.aliases
|
||||
.iter_mut()
|
||||
.find_map(|(a, count)| if a == alias { Some(count) } else { None })
|
||||
{
|
||||
*count_for_alias += 1;
|
||||
} else {
|
||||
status.aliases.push((alias.clone(), 1));
|
||||
}
|
||||
} else {
|
||||
status.appears_unaliased = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let symbols_with_statuses = (terminal_status_list
|
||||
.iter_mut()
|
||||
.enumerate()
|
||||
.map(|(i, status)| (Symbol::terminal(i), status)))
|
||||
.chain(
|
||||
non_terminal_status_list
|
||||
.iter_mut()
|
||||
.enumerate()
|
||||
.map(|(i, status)| (Symbol::non_terminal(i), status)),
|
||||
)
|
||||
.chain(
|
||||
external_status_list
|
||||
.iter_mut()
|
||||
.enumerate()
|
||||
.map(|(i, status)| (Symbol::external(i), status)),
|
||||
);
|
||||
|
||||
// For each symbol that always appears aliased, find the alias the occurs most often,
|
||||
// and designate that alias as the symbol's "default alias". Store all of these
|
||||
// default aliases in a map that will be returned.
|
||||
let mut result = AliasMap::new();
|
||||
for (symbol, status) in symbols_with_statuses {
|
||||
if status.appears_unaliased {
|
||||
status.aliases.clear();
|
||||
} else {
|
||||
if let Some(default_entry) = status
|
||||
.aliases
|
||||
.iter()
|
||||
.enumerate()
|
||||
.max_by_key(|(i, (_, count))| (count, -(*i as i64)))
|
||||
.map(|(_, entry)| entry.clone())
|
||||
{
|
||||
status.aliases.clear();
|
||||
status.aliases.push(default_entry.clone());
|
||||
result.insert(symbol, default_entry.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Wherever a symbol is aliased as its default alias, remove the usage of the alias,
|
||||
// because it will now be redundant.
|
||||
let mut alias_positions_to_clear = Vec::new();
|
||||
for variable in syntax_grammar.variables.iter_mut() {
|
||||
alias_positions_to_clear.clear();
|
||||
|
||||
for (i, production) in variable.productions.iter().enumerate() {
|
||||
for (j, step) in production.steps.iter().enumerate() {
|
||||
let status = match step.symbol.kind {
|
||||
SymbolType::External => &mut external_status_list[step.symbol.index],
|
||||
SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index],
|
||||
SymbolType::Terminal => &mut terminal_status_list[step.symbol.index],
|
||||
SymbolType::End => panic!("Unexpected end token"),
|
||||
};
|
||||
|
||||
// If this step is aliased as the symbol's default alias, then remove that alias.
|
||||
if step.alias.is_some()
|
||||
&& step.alias.as_ref() == status.aliases.get(0).map(|t| &t.0)
|
||||
{
|
||||
let mut other_productions_must_use_this_alias_at_this_index = false;
|
||||
for (other_i, other_production) in variable.productions.iter().enumerate() {
|
||||
if other_i != i
|
||||
&& other_production.steps.len() > j
|
||||
&& other_production.steps[j].alias == step.alias
|
||||
&& result.get(&other_production.steps[j].symbol) != step.alias.as_ref()
|
||||
{
|
||||
other_productions_must_use_this_alias_at_this_index = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if !other_productions_must_use_this_alias_at_this_index {
|
||||
alias_positions_to_clear.push((i, j));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (production_index, step_index) in &alias_positions_to_clear {
|
||||
variable.productions[*production_index].steps[*step_index].alias = None;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::generate::grammars::{
|
||||
LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType,
|
||||
};
|
||||
use crate::generate::nfa::Nfa;
|
||||
|
||||
#[test]
|
||||
fn test_extract_simple_aliases() {
|
||||
let mut syntax_grammar = SyntaxGrammar {
|
||||
variables: vec![
|
||||
SyntaxVariable {
|
||||
name: "v1".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
|
||||
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
|
||||
ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true),
|
||||
],
|
||||
}],
|
||||
},
|
||||
SyntaxVariable {
|
||||
name: "v2".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
// Token 0 is always aliased as "a1".
|
||||
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
|
||||
// Token 1 is aliased within rule `v1` above, but not here.
|
||||
ProductionStep::new(Symbol::terminal(1)),
|
||||
// Token 2 is aliased differently here than in `v1`. The alias from
|
||||
// `v1` should be promoted to the default alias, because `v1` appears
|
||||
// first in the grammar.
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true),
|
||||
// Token 3 is also aliased differently here than in `v1`. In this case,
|
||||
// this alias should be promoted to the default alias, because it is
|
||||
// used a greater number of times (twice).
|
||||
ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true),
|
||||
ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true),
|
||||
],
|
||||
}],
|
||||
},
|
||||
],
|
||||
extra_symbols: Vec::new(),
|
||||
expected_conflicts: Vec::new(),
|
||||
variables_to_inline: Vec::new(),
|
||||
supertype_symbols: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
word_token: None,
|
||||
};
|
||||
|
||||
let lexical_grammar = LexicalGrammar {
|
||||
nfa: Nfa::new(),
|
||||
variables: vec![
|
||||
LexicalVariable {
|
||||
name: "t0".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
LexicalVariable {
|
||||
name: "t1".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
LexicalVariable {
|
||||
name: "t2".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
LexicalVariable {
|
||||
name: "t3".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar);
|
||||
assert_eq!(default_aliases.len(), 3);
|
||||
|
||||
assert_eq!(
|
||||
default_aliases.get(&Symbol::terminal(0)),
|
||||
Some(&Alias {
|
||||
value: "a1".to_string(),
|
||||
is_named: true,
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
default_aliases.get(&Symbol::terminal(2)),
|
||||
Some(&Alias {
|
||||
value: "a3".to_string(),
|
||||
is_named: true,
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
default_aliases.get(&Symbol::terminal(3)),
|
||||
Some(&Alias {
|
||||
value: "a6".to_string(),
|
||||
is_named: true,
|
||||
})
|
||||
);
|
||||
assert_eq!(default_aliases.get(&Symbol::terminal(1)), None);
|
||||
|
||||
assert_eq!(
|
||||
syntax_grammar.variables,
|
||||
vec![
|
||||
SyntaxVariable {
|
||||
name: "v1".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
|
||||
ProductionStep::new(Symbol::terminal(2)),
|
||||
ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true),
|
||||
],
|
||||
},],
|
||||
},
|
||||
SyntaxVariable {
|
||||
name: "v2".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::terminal(1)),
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true),
|
||||
ProductionStep::new(Symbol::terminal(3)),
|
||||
ProductionStep::new(Symbol::terminal(3)),
|
||||
],
|
||||
},],
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,223 +0,0 @@
|
|||
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
|
||||
use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType};
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
struct SymbolStatus {
|
||||
alias: Option<Alias>,
|
||||
conflicting: bool,
|
||||
}
|
||||
|
||||
pub(super) fn extract_simple_aliases(
|
||||
syntax_grammar: &mut SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
) -> AliasMap {
|
||||
// Determine which symbols in the grammars are *always* aliased to a single name.
|
||||
let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()];
|
||||
let mut non_terminal_status_list =
|
||||
vec![SymbolStatus::default(); syntax_grammar.variables.len()];
|
||||
let mut external_status_list =
|
||||
vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()];
|
||||
for variable in syntax_grammar.variables.iter() {
|
||||
for production in variable.productions.iter() {
|
||||
for step in production.steps.iter() {
|
||||
let mut status = match step.symbol {
|
||||
Symbol {
|
||||
kind: SymbolType::External,
|
||||
index,
|
||||
} => &mut external_status_list[index],
|
||||
Symbol {
|
||||
kind: SymbolType::NonTerminal,
|
||||
index,
|
||||
} => &mut non_terminal_status_list[index],
|
||||
Symbol {
|
||||
kind: SymbolType::Terminal,
|
||||
index,
|
||||
} => &mut terminal_status_list[index],
|
||||
Symbol {
|
||||
kind: SymbolType::End,
|
||||
..
|
||||
} => panic!("Unexpected end token"),
|
||||
};
|
||||
|
||||
if step.alias.is_none() {
|
||||
status.alias = None;
|
||||
status.conflicting = true;
|
||||
}
|
||||
|
||||
if !status.conflicting {
|
||||
if status.alias.is_none() {
|
||||
status.alias = step.alias.clone();
|
||||
} else if status.alias != step.alias {
|
||||
status.alias = None;
|
||||
status.conflicting = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove the aliases for those symbols.
|
||||
for variable in syntax_grammar.variables.iter_mut() {
|
||||
for production in variable.productions.iter_mut() {
|
||||
for step in production.steps.iter_mut() {
|
||||
let status = match step.symbol {
|
||||
Symbol {
|
||||
kind: SymbolType::External,
|
||||
index,
|
||||
} => &external_status_list[index],
|
||||
Symbol {
|
||||
kind: SymbolType::NonTerminal,
|
||||
index,
|
||||
} => &non_terminal_status_list[index],
|
||||
Symbol {
|
||||
kind: SymbolType::Terminal,
|
||||
index,
|
||||
} => &terminal_status_list[index],
|
||||
Symbol {
|
||||
kind: SymbolType::End,
|
||||
..
|
||||
} => panic!("Unexpected end token"),
|
||||
};
|
||||
|
||||
if status.alias.is_some() {
|
||||
step.alias = None;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Populate a map of the symbols to their aliases.
|
||||
let mut result = AliasMap::new();
|
||||
for (i, status) in terminal_status_list.into_iter().enumerate() {
|
||||
if let Some(alias) = status.alias {
|
||||
result.insert(Symbol::terminal(i), alias);
|
||||
}
|
||||
}
|
||||
for (i, status) in non_terminal_status_list.into_iter().enumerate() {
|
||||
if let Some(alias) = status.alias {
|
||||
result.insert(Symbol::non_terminal(i), alias);
|
||||
}
|
||||
}
|
||||
for (i, status) in external_status_list.into_iter().enumerate() {
|
||||
if let Some(alias) = status.alias {
|
||||
result.insert(Symbol::external(i), alias);
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::generate::grammars::{
|
||||
LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType,
|
||||
};
|
||||
use crate::generate::nfa::Nfa;
|
||||
|
||||
#[test]
|
||||
fn test_extract_simple_aliases() {
|
||||
let mut syntax_grammar = SyntaxGrammar {
|
||||
variables: vec![
|
||||
SyntaxVariable {
|
||||
name: "v1".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
|
||||
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
|
||||
],
|
||||
}],
|
||||
},
|
||||
SyntaxVariable {
|
||||
name: "v2".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
// Token 0 is always aliased as "a1".
|
||||
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
|
||||
// Token 1 is aliased above, but not here.
|
||||
ProductionStep::new(Symbol::terminal(1)),
|
||||
// Token 2 is aliased differently than above.
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true),
|
||||
],
|
||||
}],
|
||||
},
|
||||
],
|
||||
extra_tokens: Vec::new(),
|
||||
expected_conflicts: Vec::new(),
|
||||
variables_to_inline: Vec::new(),
|
||||
supertype_symbols: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
word_token: None,
|
||||
};
|
||||
|
||||
let lexical_grammar = LexicalGrammar {
|
||||
nfa: Nfa::new(),
|
||||
variables: vec![
|
||||
LexicalVariable {
|
||||
name: "t1".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
LexicalVariable {
|
||||
name: "t2".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
LexicalVariable {
|
||||
name: "t3".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar);
|
||||
assert_eq!(simple_aliases.len(), 1);
|
||||
assert_eq!(
|
||||
simple_aliases[&Symbol::terminal(0)],
|
||||
Alias {
|
||||
value: "a1".to_string(),
|
||||
is_named: true,
|
||||
}
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
syntax_grammar.variables,
|
||||
vec![
|
||||
SyntaxVariable {
|
||||
name: "v1".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
// 'Simple' alias removed
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
// Other aliases unchanged
|
||||
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
|
||||
],
|
||||
},],
|
||||
},
|
||||
SyntaxVariable {
|
||||
name: "v2".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::terminal(1)),
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true),
|
||||
],
|
||||
},],
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -90,21 +90,13 @@ pub(super) fn extract_tokens(
|
|||
.collect();
|
||||
|
||||
let mut separators = Vec::new();
|
||||
let mut extra_tokens = Vec::new();
|
||||
for rule in grammar.extra_tokens {
|
||||
let mut extra_symbols = Vec::new();
|
||||
for rule in grammar.extra_symbols {
|
||||
if let Rule::Symbol(symbol) = rule {
|
||||
let new_symbol = symbol_replacer.replace_symbol(symbol);
|
||||
if new_symbol.is_non_terminal() {
|
||||
return Error::err(format!(
|
||||
"Non-token symbol '{}' cannot be used as an extra token",
|
||||
&variables[new_symbol.index].name
|
||||
));
|
||||
} else {
|
||||
extra_tokens.push(new_symbol);
|
||||
}
|
||||
extra_symbols.push(symbol_replacer.replace_symbol(symbol));
|
||||
} else {
|
||||
if let Some(index) = lexical_variables.iter().position(|v| v.rule == rule) {
|
||||
extra_tokens.push(Symbol::terminal(index));
|
||||
extra_symbols.push(Symbol::terminal(index));
|
||||
} else {
|
||||
separators.push(rule);
|
||||
}
|
||||
|
|
@ -158,7 +150,7 @@ pub(super) fn extract_tokens(
|
|||
ExtractedSyntaxGrammar {
|
||||
variables,
|
||||
expected_conflicts,
|
||||
extra_tokens,
|
||||
extra_symbols,
|
||||
variables_to_inline,
|
||||
supertype_symbols,
|
||||
external_tokens,
|
||||
|
|
@ -415,15 +407,15 @@ mod test {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn test_extracting_extra_tokens() {
|
||||
fn test_extracting_extra_symbols() {
|
||||
let mut grammar = build_grammar(vec![
|
||||
Variable::named("rule_0", Rule::string("x")),
|
||||
Variable::named("comment", Rule::pattern("//.*")),
|
||||
]);
|
||||
grammar.extra_tokens = vec![Rule::string(" "), Rule::non_terminal(1)];
|
||||
grammar.extra_symbols = vec![Rule::string(" "), Rule::non_terminal(1)];
|
||||
|
||||
let (syntax_grammar, lexical_grammar) = extract_tokens(grammar).unwrap();
|
||||
assert_eq!(syntax_grammar.extra_tokens, vec![Symbol::terminal(1),]);
|
||||
assert_eq!(syntax_grammar.extra_symbols, vec![Symbol::terminal(1),]);
|
||||
assert_eq!(lexical_grammar.separators, vec![Rule::string(" "),]);
|
||||
}
|
||||
|
||||
|
|
@ -472,28 +464,6 @@ mod test {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_error_on_non_terminal_symbol_extras() {
|
||||
let mut grammar = build_grammar(vec![
|
||||
Variable::named("rule_0", Rule::non_terminal(1)),
|
||||
Variable::named("rule_1", Rule::non_terminal(2)),
|
||||
Variable::named("rule_2", Rule::string("x")),
|
||||
]);
|
||||
grammar.extra_tokens = vec![Rule::non_terminal(1)];
|
||||
|
||||
match extract_tokens(grammar) {
|
||||
Err(e) => {
|
||||
assert_eq!(
|
||||
e.message(),
|
||||
"Non-token symbol 'rule_1' cannot be used as an extra token"
|
||||
);
|
||||
}
|
||||
_ => {
|
||||
panic!("Expected an error but got no error");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_error_on_external_with_same_name_as_non_terminal() {
|
||||
let mut grammar = build_grammar(vec![
|
||||
|
|
@ -522,7 +492,7 @@ mod test {
|
|||
fn build_grammar(variables: Vec<Variable>) -> InternedGrammar {
|
||||
InternedGrammar {
|
||||
variables,
|
||||
extra_tokens: Vec::new(),
|
||||
extra_symbols: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
expected_conflicts: Vec::new(),
|
||||
variables_to_inline: Vec::new(),
|
||||
|
|
|
|||
|
|
@ -199,7 +199,7 @@ unless they are used only as the grammar's start rule.
|
|||
}
|
||||
}
|
||||
Ok(SyntaxGrammar {
|
||||
extra_tokens: grammar.extra_tokens,
|
||||
extra_symbols: grammar.extra_symbols,
|
||||
expected_conflicts: grammar.expected_conflicts,
|
||||
variables_to_inline: grammar.variables_to_inline,
|
||||
external_tokens: grammar.external_tokens,
|
||||
|
|
|
|||
|
|
@ -30,9 +30,9 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
|
|||
external_tokens.push(Variable { name, kind, rule });
|
||||
}
|
||||
|
||||
let mut extra_tokens = Vec::with_capacity(grammar.extra_tokens.len());
|
||||
for extra_token in grammar.extra_tokens.iter() {
|
||||
extra_tokens.push(interner.intern_rule(extra_token)?);
|
||||
let mut extra_symbols = Vec::with_capacity(grammar.extra_symbols.len());
|
||||
for extra_token in grammar.extra_symbols.iter() {
|
||||
extra_symbols.push(interner.intern_rule(extra_token)?);
|
||||
}
|
||||
|
||||
let mut supertype_symbols = Vec::with_capacity(grammar.supertype_symbols.len());
|
||||
|
|
@ -73,10 +73,16 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
|
|||
);
|
||||
}
|
||||
|
||||
for (i, variable) in variables.iter_mut().enumerate() {
|
||||
if supertype_symbols.contains(&Symbol::non_terminal(i)) {
|
||||
variable.kind = VariableType::Hidden;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(InternedGrammar {
|
||||
variables,
|
||||
external_tokens,
|
||||
extra_tokens,
|
||||
extra_symbols,
|
||||
expected_conflicts,
|
||||
variables_to_inline,
|
||||
supertype_symbols,
|
||||
|
|
@ -236,7 +242,7 @@ mod tests {
|
|||
InputGrammar {
|
||||
variables,
|
||||
name: "the_language".to_string(),
|
||||
extra_tokens: Vec::new(),
|
||||
extra_symbols: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
expected_conflicts: Vec::new(),
|
||||
variables_to_inline: Vec::new(),
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
mod expand_repeats;
|
||||
mod expand_tokens;
|
||||
mod extract_simple_aliases;
|
||||
mod extract_default_aliases;
|
||||
mod extract_tokens;
|
||||
mod flatten_grammar;
|
||||
mod intern_symbols;
|
||||
|
|
@ -8,7 +8,7 @@ mod process_inlines;
|
|||
|
||||
use self::expand_repeats::expand_repeats;
|
||||
pub(crate) use self::expand_tokens::expand_tokens;
|
||||
use self::extract_simple_aliases::extract_simple_aliases;
|
||||
use self::extract_default_aliases::extract_default_aliases;
|
||||
use self::extract_tokens::extract_tokens;
|
||||
use self::flatten_grammar::flatten_grammar;
|
||||
use self::intern_symbols::intern_symbols;
|
||||
|
|
@ -21,7 +21,7 @@ use crate::generate::rules::{AliasMap, Rule, Symbol};
|
|||
|
||||
pub(crate) struct IntermediateGrammar<T, U> {
|
||||
variables: Vec<Variable>,
|
||||
extra_tokens: Vec<T>,
|
||||
extra_symbols: Vec<T>,
|
||||
expected_conflicts: Vec<Vec<Symbol>>,
|
||||
external_tokens: Vec<U>,
|
||||
variables_to_inline: Vec<Symbol>,
|
||||
|
|
@ -52,7 +52,7 @@ pub(crate) fn prepare_grammar(
|
|||
let syntax_grammar = expand_repeats(syntax_grammar);
|
||||
let mut syntax_grammar = flatten_grammar(syntax_grammar)?;
|
||||
let lexical_grammar = expand_tokens(lexical_grammar)?;
|
||||
let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar);
|
||||
let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar);
|
||||
let inlines = process_inlines(&syntax_grammar);
|
||||
Ok((syntax_grammar, lexical_grammar, inlines, simple_aliases))
|
||||
Ok((syntax_grammar, lexical_grammar, inlines, default_aliases))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -127,6 +127,9 @@ impl InlinedProductionMapBuilder {
|
|||
last_inserted_step.associativity = removed_step.associativity;
|
||||
}
|
||||
}
|
||||
if p.dynamic_precedence.abs() > production.dynamic_precedence.abs() {
|
||||
production.dynamic_precedence = p.dynamic_precedence;
|
||||
}
|
||||
production
|
||||
}),
|
||||
);
|
||||
|
|
@ -196,7 +199,7 @@ mod tests {
|
|||
fn test_basic_inlining() {
|
||||
let grammar = SyntaxGrammar {
|
||||
expected_conflicts: Vec::new(),
|
||||
extra_tokens: Vec::new(),
|
||||
extra_symbols: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
supertype_symbols: Vec::new(),
|
||||
word_token: None,
|
||||
|
|
@ -226,7 +229,7 @@ mod tests {
|
|||
],
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
dynamic_precedence: -2,
|
||||
steps: vec![ProductionStep::new(Symbol::terminal(14))],
|
||||
},
|
||||
],
|
||||
|
|
@ -258,7 +261,7 @@ mod tests {
|
|||
],
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
dynamic_precedence: -2,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(10)),
|
||||
ProductionStep::new(Symbol::terminal(14)),
|
||||
|
|
@ -327,7 +330,7 @@ mod tests {
|
|||
Symbol::non_terminal(3),
|
||||
],
|
||||
expected_conflicts: Vec::new(),
|
||||
extra_tokens: Vec::new(),
|
||||
extra_symbols: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
supertype_symbols: Vec::new(),
|
||||
word_token: None,
|
||||
|
|
@ -429,7 +432,7 @@ mod tests {
|
|||
},
|
||||
],
|
||||
expected_conflicts: Vec::new(),
|
||||
extra_tokens: Vec::new(),
|
||||
extra_symbols: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
supertype_symbols: Vec::new(),
|
||||
word_token: None,
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -1,3 +1,4 @@
|
|||
use super::grammars::VariableType;
|
||||
use smallbitvec::SmallBitVec;
|
||||
use std::collections::HashMap;
|
||||
use std::iter::FromIterator;
|
||||
|
|
@ -139,6 +140,16 @@ impl Rule {
|
|||
}
|
||||
}
|
||||
|
||||
impl Alias {
|
||||
pub fn kind(&self) -> VariableType {
|
||||
if self.is_named {
|
||||
VariableType::Named
|
||||
} else {
|
||||
VariableType::Anonymous
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl Rule {
|
||||
pub fn terminal(index: usize) -> Self {
|
||||
|
|
@ -366,7 +377,7 @@ impl FromIterator<Symbol> for TokenSet {
|
|||
|
||||
fn add_metadata<T: FnOnce(&mut MetadataParams)>(input: Rule, f: T) -> Rule {
|
||||
match input {
|
||||
Rule::Metadata { rule, mut params } => {
|
||||
Rule::Metadata { rule, mut params } if !params.is_token => {
|
||||
f(&mut params);
|
||||
Rule::Metadata { rule, params }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,6 +24,12 @@ pub(crate) enum ParseAction {
|
|||
},
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub(crate) enum GotoAction {
|
||||
Goto(ParseStateId),
|
||||
ShiftExtra,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct ParseTableEntry {
|
||||
pub actions: Vec<ParseAction>,
|
||||
|
|
@ -34,10 +40,11 @@ pub(crate) struct ParseTableEntry {
|
|||
pub(crate) struct ParseState {
|
||||
pub id: ParseStateId,
|
||||
pub terminal_entries: HashMap<Symbol, ParseTableEntry>,
|
||||
pub nonterminal_entries: HashMap<Symbol, ParseStateId>,
|
||||
pub nonterminal_entries: HashMap<Symbol, GotoAction>,
|
||||
pub lex_state_id: usize,
|
||||
pub external_lex_state_id: usize,
|
||||
pub core_id: usize,
|
||||
pub is_non_terminal_extra: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
|
||||
|
|
@ -70,6 +77,7 @@ pub(crate) struct AdvanceAction {
|
|||
#[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub(crate) struct LexState {
|
||||
pub accept_action: Option<Symbol>,
|
||||
pub eof_action: Option<AdvanceAction>,
|
||||
pub advance_actions: Vec<(CharacterSet, AdvanceAction)>,
|
||||
}
|
||||
|
||||
|
|
@ -103,7 +111,13 @@ impl ParseState {
|
|||
_ => None,
|
||||
})
|
||||
})
|
||||
.chain(self.nonterminal_entries.iter().map(|(_, state)| *state))
|
||||
.chain(self.nonterminal_entries.iter().filter_map(|(_, action)| {
|
||||
if let GotoAction::Goto(state) = action {
|
||||
Some(*state)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn update_referenced_states<F>(&mut self, mut f: F)
|
||||
|
|
@ -121,15 +135,18 @@ impl ParseState {
|
|||
}
|
||||
}
|
||||
}
|
||||
for (symbol, other_state) in &self.nonterminal_entries {
|
||||
let result = f(*other_state, self);
|
||||
if result != *other_state {
|
||||
updates.push((*symbol, 0, result));
|
||||
for (symbol, action) in &self.nonterminal_entries {
|
||||
if let GotoAction::Goto(other_state) = action {
|
||||
let result = f(*other_state, self);
|
||||
if result != *other_state {
|
||||
updates.push((*symbol, 0, result));
|
||||
}
|
||||
}
|
||||
}
|
||||
for (symbol, action_index, new_state) in updates {
|
||||
if symbol.is_non_terminal() {
|
||||
self.nonterminal_entries.insert(symbol, new_state);
|
||||
self.nonterminal_entries
|
||||
.insert(symbol, GotoAction::Goto(new_state));
|
||||
} else {
|
||||
let entry = self.terminal_entries.get_mut(&symbol).unwrap();
|
||||
if let ParseAction::Shift { is_repetition, .. } = entry.actions[action_index] {
|
||||
|
|
|
|||
|
|
@ -1,26 +1,58 @@
|
|||
use super::util;
|
||||
use crate::error::Result;
|
||||
use crate::loader::Loader;
|
||||
use ansi_term::{Color, Style};
|
||||
use ansi_term::Color;
|
||||
use lazy_static::lazy_static;
|
||||
use serde::ser::SerializeMap;
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
use serde_json::{json, Value};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
use std::time::Instant;
|
||||
use std::{fmt, fs, io, path, thread};
|
||||
use tree_sitter::{Language, PropertySheet};
|
||||
use tree_sitter_highlight::{highlight, highlight_html, Highlight, HighlightEvent, Properties};
|
||||
use std::{fs, io, path, str, usize};
|
||||
use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent, Highlighter, HtmlRenderer};
|
||||
|
||||
pub const HTML_HEADER: &'static str = "
|
||||
<!doctype HTML>
|
||||
<head>
|
||||
<title>Tree-sitter Highlighting</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: monospace
|
||||
}
|
||||
.line-number {
|
||||
user-select: none;
|
||||
text-align: right;
|
||||
color: rgba(27,31,35,.3);
|
||||
padding: 0 10px;
|
||||
}
|
||||
.line {
|
||||
white-space: pre;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
";
|
||||
|
||||
pub const HTML_FOOTER: &'static str = "
|
||||
</body>
|
||||
";
|
||||
|
||||
lazy_static! {
|
||||
static ref CSS_STYLES_BY_COLOR_ID: Vec<String> =
|
||||
serde_json::from_str(include_str!("../vendor/xterm-colors.json")).unwrap();
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Style {
|
||||
pub ansi: ansi_term::Style,
|
||||
pub css: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Theme {
|
||||
ansi_styles: Vec<Option<Style>>,
|
||||
css_styles: Vec<Option<String>>,
|
||||
pub styles: Vec<Style>,
|
||||
pub highlight_names: Vec<String>,
|
||||
}
|
||||
|
||||
impl Theme {
|
||||
|
|
@ -29,14 +61,8 @@ impl Theme {
|
|||
Ok(serde_json::from_str(&json).unwrap_or_default())
|
||||
}
|
||||
|
||||
fn ansi_style(&self, highlight: Highlight) -> Option<&Style> {
|
||||
self.ansi_styles[highlight as usize].as_ref()
|
||||
}
|
||||
|
||||
fn css_style(&self, highlight: Highlight) -> Option<&str> {
|
||||
self.css_styles[highlight as usize]
|
||||
.as_ref()
|
||||
.map(|s| s.as_str())
|
||||
pub fn default_style(&self) -> Style {
|
||||
Style::default()
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -45,20 +71,21 @@ impl<'de> Deserialize<'de> for Theme {
|
|||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
let highlight_count = Highlight::Unknown as usize + 1;
|
||||
let mut ansi_styles = vec![None; highlight_count];
|
||||
let mut css_styles = vec![None; highlight_count];
|
||||
if let Ok(colors) = HashMap::<Highlight, Value>::deserialize(deserializer) {
|
||||
for (highlight, style_value) in colors {
|
||||
let mut styles = Vec::new();
|
||||
let mut highlight_names = Vec::new();
|
||||
if let Ok(colors) = HashMap::<String, Value>::deserialize(deserializer) {
|
||||
highlight_names.reserve(colors.len());
|
||||
styles.reserve(colors.len());
|
||||
for (name, style_value) in colors {
|
||||
let mut style = Style::default();
|
||||
parse_style(&mut style, style_value);
|
||||
ansi_styles[highlight as usize] = Some(style);
|
||||
css_styles[highlight as usize] = Some(style_to_css(style));
|
||||
highlight_names.push(name);
|
||||
styles.push(style);
|
||||
}
|
||||
}
|
||||
Ok(Self {
|
||||
ansi_styles,
|
||||
css_styles,
|
||||
styles,
|
||||
highlight_names,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
@ -68,48 +95,40 @@ impl Serialize for Theme {
|
|||
where
|
||||
S: Serializer,
|
||||
{
|
||||
let entry_count = self.ansi_styles.iter().filter(|i| i.is_some()).count();
|
||||
let mut map = serializer.serialize_map(Some(entry_count))?;
|
||||
for (i, style) in self.ansi_styles.iter().enumerate() {
|
||||
let highlight = Highlight::from_usize(i).unwrap();
|
||||
if highlight == Highlight::Unknown {
|
||||
break;
|
||||
}
|
||||
if let Some(style) = style {
|
||||
let color = style.foreground.map(|color| match color {
|
||||
Color::Black => json!("black"),
|
||||
Color::Blue => json!("blue"),
|
||||
Color::Cyan => json!("cyan"),
|
||||
Color::Green => json!("green"),
|
||||
Color::Purple => json!("purple"),
|
||||
Color::Red => json!("red"),
|
||||
Color::White => json!("white"),
|
||||
Color::Yellow => json!("yellow"),
|
||||
Color::RGB(r, g, b) => json!(format!("#{:x?}{:x?}{:x?}", r, g, b)),
|
||||
Color::Fixed(n) => json!(n),
|
||||
});
|
||||
if style.is_bold || style.is_italic || style.is_underline {
|
||||
let mut entry = HashMap::new();
|
||||
if let Some(color) = color {
|
||||
entry.insert("color", color);
|
||||
}
|
||||
if style.is_bold {
|
||||
entry.insert("bold", Value::Bool(true));
|
||||
}
|
||||
if style.is_italic {
|
||||
entry.insert("italic", Value::Bool(true));
|
||||
}
|
||||
if style.is_underline {
|
||||
entry.insert("underline", Value::Bool(true));
|
||||
}
|
||||
map.serialize_entry(&highlight, &entry)?;
|
||||
} else if let Some(color) = color {
|
||||
map.serialize_entry(&highlight, &color)?;
|
||||
} else {
|
||||
map.serialize_entry(&highlight, &Value::Null)?;
|
||||
let mut map = serializer.serialize_map(Some(self.styles.len()))?;
|
||||
for (name, style) in self.highlight_names.iter().zip(&self.styles) {
|
||||
let style = &style.ansi;
|
||||
let color = style.foreground.map(|color| match color {
|
||||
Color::Black => json!("black"),
|
||||
Color::Blue => json!("blue"),
|
||||
Color::Cyan => json!("cyan"),
|
||||
Color::Green => json!("green"),
|
||||
Color::Purple => json!("purple"),
|
||||
Color::Red => json!("red"),
|
||||
Color::White => json!("white"),
|
||||
Color::Yellow => json!("yellow"),
|
||||
Color::RGB(r, g, b) => json!(format!("#{:x?}{:x?}{:x?}", r, g, b)),
|
||||
Color::Fixed(n) => json!(n),
|
||||
});
|
||||
if style.is_bold || style.is_italic || style.is_underline {
|
||||
let mut style_json = HashMap::new();
|
||||
if let Some(color) = color {
|
||||
style_json.insert("color", color);
|
||||
}
|
||||
if style.is_bold {
|
||||
style_json.insert("bold", Value::Bool(true));
|
||||
}
|
||||
if style.is_italic {
|
||||
style_json.insert("italic", Value::Bool(true));
|
||||
}
|
||||
if style.is_underline {
|
||||
style_json.insert("underline", Value::Bool(true));
|
||||
}
|
||||
map.serialize_entry(&name, &style_json)?;
|
||||
} else if let Some(color) = color {
|
||||
map.serialize_entry(&name, &color)?;
|
||||
} else {
|
||||
map.serialize_entry(&highlight, &Value::Null)?;
|
||||
map.serialize_entry(&name, &Value::Null)?;
|
||||
}
|
||||
}
|
||||
map.end()
|
||||
|
|
@ -149,42 +168,39 @@ impl Default for Theme {
|
|||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for Theme {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{{")?;
|
||||
let mut first = true;
|
||||
for (i, style) in self.ansi_styles.iter().enumerate() {
|
||||
if let Some(style) = style {
|
||||
let highlight = Highlight::from_usize(i).unwrap();
|
||||
if !first {
|
||||
write!(f, ", ")?;
|
||||
}
|
||||
write!(f, "{:?}: {:?}", highlight, style)?;
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
write!(f, "}}")?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_style(style: &mut Style, json: Value) {
|
||||
if let Value::Object(entries) = json {
|
||||
for (property_name, value) in entries {
|
||||
match property_name.as_str() {
|
||||
"bold" => *style = style.bold(),
|
||||
"italic" => *style = style.italic(),
|
||||
"underline" => *style = style.underline(),
|
||||
"bold" => {
|
||||
if value == Value::Bool(true) {
|
||||
style.ansi = style.ansi.bold()
|
||||
}
|
||||
}
|
||||
"italic" => {
|
||||
if value == Value::Bool(true) {
|
||||
style.ansi = style.ansi.italic()
|
||||
}
|
||||
}
|
||||
"underline" => {
|
||||
if value == Value::Bool(true) {
|
||||
style.ansi = style.ansi.underline()
|
||||
}
|
||||
}
|
||||
"color" => {
|
||||
if let Some(color) = parse_color(value) {
|
||||
*style = style.fg(color);
|
||||
style.ansi = style.ansi.fg(color);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
style.css = Some(style_to_css(style.ansi));
|
||||
} else if let Some(color) = parse_color(json) {
|
||||
*style = style.fg(color);
|
||||
style.ansi = style.ansi.fg(color);
|
||||
style.css = Some(style_to_css(style.ansi));
|
||||
} else {
|
||||
style.css = None;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -223,9 +239,12 @@ fn parse_color(json: Value) -> Option<Color> {
|
|||
}
|
||||
}
|
||||
|
||||
fn style_to_css(style: Style) -> String {
|
||||
fn style_to_css(style: ansi_term::Style) -> String {
|
||||
use std::fmt::Write;
|
||||
let mut result = "style='".to_string();
|
||||
if style.is_underline {
|
||||
write!(&mut result, "text-decoration: underline;").unwrap();
|
||||
}
|
||||
if style.is_bold {
|
||||
write!(&mut result, "font-weight: bold;").unwrap();
|
||||
}
|
||||
|
|
@ -254,163 +273,95 @@ fn color_to_css(color: Color) -> &'static str {
|
|||
}
|
||||
}
|
||||
|
||||
fn cancel_on_stdin() -> Arc<AtomicUsize> {
|
||||
let result = Arc::new(AtomicUsize::new(0));
|
||||
thread::spawn({
|
||||
let flag = result.clone();
|
||||
move || {
|
||||
let mut line = String::new();
|
||||
io::stdin().read_line(&mut line).unwrap();
|
||||
flag.store(1, Ordering::Relaxed);
|
||||
}
|
||||
});
|
||||
result
|
||||
}
|
||||
|
||||
pub fn ansi(
|
||||
loader: &Loader,
|
||||
theme: &Theme,
|
||||
source: &[u8],
|
||||
language: Language,
|
||||
property_sheet: &PropertySheet<Properties>,
|
||||
config: &HighlightConfiguration,
|
||||
print_time: bool,
|
||||
cancellation_flag: Option<&AtomicUsize>,
|
||||
) -> Result<()> {
|
||||
use std::io::Write;
|
||||
let stdout = io::stdout();
|
||||
let mut stdout = stdout.lock();
|
||||
|
||||
let cancellation_flag = cancel_on_stdin();
|
||||
let time = Instant::now();
|
||||
let mut highlight_stack = Vec::new();
|
||||
for event in highlight(
|
||||
source,
|
||||
language,
|
||||
property_sheet,
|
||||
Some(cancellation_flag.as_ref()),
|
||||
|s| language_for_injection_string(loader, s),
|
||||
)
|
||||
.map_err(|e| e.to_string())?
|
||||
{
|
||||
let event = event.map_err(|e| e.to_string())?;
|
||||
match event {
|
||||
HighlightEvent::Source { start, end } => {
|
||||
if let Some(style) = highlight_stack.last().and_then(|s| theme.ansi_style(*s)) {
|
||||
style.paint(&source[start..end]).write_to(&mut stdout)?;
|
||||
} else {
|
||||
stdout.write_all(&source[start..end])?;
|
||||
}
|
||||
}
|
||||
HighlightEvent::HighlightStart(h) => {
|
||||
highlight_stack.push(h);
|
||||
let mut highlighter = Highlighter::new();
|
||||
|
||||
let events = highlighter.highlight(config, source, cancellation_flag, |string| {
|
||||
loader.highlight_config_for_injection_string(string)
|
||||
})?;
|
||||
|
||||
let mut style_stack = vec![theme.default_style().ansi];
|
||||
for event in events {
|
||||
match event? {
|
||||
HighlightEvent::HighlightStart(highlight) => {
|
||||
style_stack.push(theme.styles[highlight.0].ansi);
|
||||
}
|
||||
HighlightEvent::HighlightEnd => {
|
||||
highlight_stack.pop();
|
||||
style_stack.pop();
|
||||
}
|
||||
HighlightEvent::Source { start, end } => {
|
||||
style_stack
|
||||
.last()
|
||||
.unwrap()
|
||||
.paint(&source[start..end])
|
||||
.write_to(&mut stdout)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if print_time {
|
||||
let duration = time.elapsed();
|
||||
let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000;
|
||||
eprintln!("{} ms", duration_ms);
|
||||
eprintln!("Time: {}ms", time.elapsed().as_millis());
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub const HTML_HEADER: &'static str = "
|
||||
<!doctype HTML>
|
||||
<head>
|
||||
<title>Tree-sitter Highlighting</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: monospace
|
||||
}
|
||||
.line-number {
|
||||
user-select: none;
|
||||
text-align: right;
|
||||
color: rgba(27,31,35,.3);
|
||||
padding: 0 10px;
|
||||
}
|
||||
.line {
|
||||
white-space: pre;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
";
|
||||
|
||||
pub const HTML_FOOTER: &'static str = "
|
||||
</body>
|
||||
";
|
||||
|
||||
pub fn html(
|
||||
loader: &Loader,
|
||||
theme: &Theme,
|
||||
source: &[u8],
|
||||
language: Language,
|
||||
property_sheet: &PropertySheet<Properties>,
|
||||
config: &HighlightConfiguration,
|
||||
quiet: bool,
|
||||
print_time: bool,
|
||||
) -> Result<()> {
|
||||
use std::io::Write;
|
||||
|
||||
let stdout = io::stdout();
|
||||
let mut stdout = stdout.lock();
|
||||
write!(&mut stdout, "<table>\n")?;
|
||||
let time = Instant::now();
|
||||
let cancellation_flag = util::cancel_on_stdin();
|
||||
let mut highlighter = Highlighter::new();
|
||||
|
||||
let cancellation_flag = cancel_on_stdin();
|
||||
let lines = highlight_html(
|
||||
source,
|
||||
language,
|
||||
property_sheet,
|
||||
Some(cancellation_flag.as_ref()),
|
||||
|s| language_for_injection_string(loader, s),
|
||||
|highlight| {
|
||||
if let Some(css_style) = theme.css_style(highlight) {
|
||||
css_style
|
||||
} else {
|
||||
""
|
||||
}
|
||||
},
|
||||
)
|
||||
.map_err(|e| e.to_string())?;
|
||||
for (i, line) in lines.into_iter().enumerate() {
|
||||
write!(
|
||||
&mut stdout,
|
||||
"<tr><td class=line-number>{}</td><td class=line>{}</td></tr>\n",
|
||||
i + 1,
|
||||
line
|
||||
)?;
|
||||
let events = highlighter.highlight(config, source, Some(&cancellation_flag), |string| {
|
||||
loader.highlight_config_for_injection_string(string)
|
||||
})?;
|
||||
|
||||
let mut renderer = HtmlRenderer::new();
|
||||
renderer.render(events, source, &move |highlight| {
|
||||
if let Some(css_style) = &theme.styles[highlight.0].css {
|
||||
css_style.as_bytes()
|
||||
} else {
|
||||
"".as_bytes()
|
||||
}
|
||||
})?;
|
||||
|
||||
if !quiet {
|
||||
write!(&mut stdout, "<table>\n")?;
|
||||
for (i, line) in renderer.lines().enumerate() {
|
||||
write!(
|
||||
&mut stdout,
|
||||
"<tr><td class=line-number>{}</td><td class=line>{}</td></tr>\n",
|
||||
i + 1,
|
||||
line
|
||||
)?;
|
||||
}
|
||||
|
||||
write!(&mut stdout, "</table>\n")?;
|
||||
}
|
||||
write!(&mut stdout, "</table>\n")?;
|
||||
|
||||
if print_time {
|
||||
eprintln!("Time: {}ms", time.elapsed().as_millis());
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn language_for_injection_string<'a>(
|
||||
loader: &'a Loader,
|
||||
string: &str,
|
||||
) -> Option<(Language, &'a PropertySheet<Properties>)> {
|
||||
match loader.language_configuration_for_injection_string(string) {
|
||||
Err(e) => {
|
||||
eprintln!(
|
||||
"Failed to load language for injection string '{}': {}",
|
||||
string,
|
||||
e.message()
|
||||
);
|
||||
None
|
||||
}
|
||||
Ok(None) => None,
|
||||
Ok(Some((language, configuration))) => {
|
||||
match configuration.highlight_property_sheet(language) {
|
||||
Err(e) => {
|
||||
eprintln!(
|
||||
"Failed to load property sheet for injection string '{}': {}",
|
||||
string,
|
||||
e.message()
|
||||
);
|
||||
None
|
||||
}
|
||||
Ok(None) => None,
|
||||
Ok(Some(sheet)) => Some((language, sheet)),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,7 +5,11 @@ pub mod highlight;
|
|||
pub mod loader;
|
||||
pub mod logger;
|
||||
pub mod parse;
|
||||
pub mod query;
|
||||
pub mod query_testing;
|
||||
pub mod tags;
|
||||
pub mod test;
|
||||
pub mod test_highlight;
|
||||
pub mod util;
|
||||
pub mod wasm;
|
||||
pub mod web_ui;
|
||||
|
|
|
|||
|
|
@ -5,12 +5,15 @@ use regex::{Regex, RegexBuilder};
|
|||
use serde_derive::Deserialize;
|
||||
use std::collections::HashMap;
|
||||
use std::io::BufReader;
|
||||
use std::ops::Range;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::Command;
|
||||
use std::sync::Mutex;
|
||||
use std::time::SystemTime;
|
||||
use std::{fs, mem};
|
||||
use tree_sitter::{Language, PropertySheet};
|
||||
use tree_sitter_highlight::{load_property_sheet, Properties};
|
||||
use tree_sitter::{Language, QueryError};
|
||||
use tree_sitter_highlight::HighlightConfiguration;
|
||||
use tree_sitter_tags::{Error as TagsError, TagsConfiguration};
|
||||
|
||||
#[cfg(unix)]
|
||||
const DYLIB_EXTENSION: &'static str = "so";
|
||||
|
|
@ -20,23 +23,31 @@ const DYLIB_EXTENSION: &'static str = "dll";
|
|||
|
||||
const BUILD_TARGET: &'static str = env!("BUILD_TARGET");
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct LanguageConfiguration {
|
||||
pub struct LanguageConfiguration<'a> {
|
||||
pub scope: Option<String>,
|
||||
pub content_regex: Option<Regex>,
|
||||
pub _first_line_regex: Option<Regex>,
|
||||
pub injection_regex: Option<Regex>,
|
||||
pub file_types: Vec<String>,
|
||||
pub highlight_property_sheet_path: Option<PathBuf>,
|
||||
pub root_path: PathBuf,
|
||||
pub highlights_filenames: Option<Vec<String>>,
|
||||
pub injections_filenames: Option<Vec<String>>,
|
||||
pub locals_filenames: Option<Vec<String>>,
|
||||
pub tags_filenames: Option<Vec<String>>,
|
||||
language_id: usize,
|
||||
highlight_property_sheet: OnceCell<Option<PropertySheet<Properties>>>,
|
||||
highlight_config: OnceCell<Option<HighlightConfiguration>>,
|
||||
tags_config: OnceCell<Option<TagsConfiguration>>,
|
||||
highlight_names: &'a Mutex<Vec<String>>,
|
||||
use_all_highlight_names: bool,
|
||||
}
|
||||
|
||||
pub struct Loader {
|
||||
parser_lib_path: PathBuf,
|
||||
languages_by_id: Vec<(PathBuf, OnceCell<Language>)>,
|
||||
language_configurations: Vec<LanguageConfiguration>,
|
||||
language_configurations: Vec<LanguageConfiguration<'static>>,
|
||||
language_configuration_ids_by_file_type: HashMap<String, Vec<usize>>,
|
||||
highlight_names: Box<Mutex<Vec<String>>>,
|
||||
use_all_highlight_names: bool,
|
||||
}
|
||||
|
||||
unsafe impl Send for Loader {}
|
||||
|
|
@ -49,9 +60,22 @@ impl Loader {
|
|||
languages_by_id: Vec::new(),
|
||||
language_configurations: Vec::new(),
|
||||
language_configuration_ids_by_file_type: HashMap::new(),
|
||||
highlight_names: Box::new(Mutex::new(Vec::new())),
|
||||
use_all_highlight_names: true,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn configure_highlights(&mut self, names: &Vec<String>) {
|
||||
self.use_all_highlight_names = false;
|
||||
let mut highlights = self.highlight_names.lock().unwrap();
|
||||
highlights.clear();
|
||||
highlights.extend(names.iter().cloned());
|
||||
}
|
||||
|
||||
pub fn highlight_names(&self) -> Vec<String> {
|
||||
self.highlight_names.lock().unwrap().clone()
|
||||
}
|
||||
|
||||
pub fn find_all_languages(&mut self, parser_src_paths: &Vec<PathBuf>) -> Result<()> {
|
||||
for parser_container_dir in parser_src_paths.iter() {
|
||||
if let Ok(entries) = fs::read_dir(parser_container_dir) {
|
||||
|
|
@ -134,11 +158,12 @@ impl Loader {
|
|||
if configuration_ids.len() == 1 {
|
||||
configuration = &self.language_configurations[configuration_ids[0]];
|
||||
}
|
||||
|
||||
// If multiple language configurations match, then determine which
|
||||
// one to use by applying the configurations' content regexes.
|
||||
else {
|
||||
let file_contents = fs::read_to_string(path)?;
|
||||
let file_contents = fs::read(path)
|
||||
.map_err(Error::wrap(|| format!("Failed to read path {:?}", path)))?;
|
||||
let file_contents = String::from_utf8_lossy(&file_contents);
|
||||
let mut best_score = -2isize;
|
||||
let mut best_configuration_id = None;
|
||||
for configuration_id in configuration_ids {
|
||||
|
|
@ -151,7 +176,6 @@ impl Loader {
|
|||
if let Some(mat) = content_regex.find(&file_contents) {
|
||||
score = (mat.end() - mat.start()) as isize;
|
||||
}
|
||||
|
||||
// If the content regex does not match, then *penalize* this
|
||||
// language configuration, so that language configurations
|
||||
// without content regexes are preferred over those with
|
||||
|
|
@ -338,10 +362,63 @@ impl Loader {
|
|||
Ok(language)
|
||||
}
|
||||
|
||||
fn find_language_configurations_at_path<'a>(
|
||||
pub fn highlight_config_for_injection_string<'a>(
|
||||
&'a self,
|
||||
string: &str,
|
||||
) -> Option<&'a HighlightConfiguration> {
|
||||
match self.language_configuration_for_injection_string(string) {
|
||||
Err(e) => {
|
||||
eprintln!(
|
||||
"Failed to load language for injection string '{}': {}",
|
||||
string,
|
||||
e.message()
|
||||
);
|
||||
None
|
||||
}
|
||||
Ok(None) => None,
|
||||
Ok(Some((language, configuration))) => match configuration.highlight_config(language) {
|
||||
Err(e) => {
|
||||
eprintln!(
|
||||
"Failed to load property sheet for injection string '{}': {}",
|
||||
string,
|
||||
e.message()
|
||||
);
|
||||
None
|
||||
}
|
||||
Ok(None) => None,
|
||||
Ok(Some(config)) => Some(config),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn find_language_configurations_at_path<'a>(
|
||||
&'a mut self,
|
||||
parser_path: &Path,
|
||||
) -> Result<&[LanguageConfiguration]> {
|
||||
#[derive(Deserialize)]
|
||||
#[serde(untagged)]
|
||||
enum PathsJSON {
|
||||
Empty,
|
||||
Single(String),
|
||||
Multiple(Vec<String>),
|
||||
}
|
||||
|
||||
impl Default for PathsJSON {
|
||||
fn default() -> Self {
|
||||
PathsJSON::Empty
|
||||
}
|
||||
}
|
||||
|
||||
impl PathsJSON {
|
||||
fn into_vec(self) -> Option<Vec<String>> {
|
||||
match self {
|
||||
PathsJSON::Empty => None,
|
||||
PathsJSON::Single(s) => Some(vec![s]),
|
||||
PathsJSON::Multiple(s) => Some(s),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct LanguageConfigurationJSON {
|
||||
#[serde(default)]
|
||||
|
|
@ -355,7 +432,14 @@ impl Loader {
|
|||
first_line_regex: Option<String>,
|
||||
#[serde(rename = "injection-regex")]
|
||||
injection_regex: Option<String>,
|
||||
highlights: Option<String>,
|
||||
#[serde(default)]
|
||||
highlights: PathsJSON,
|
||||
#[serde(default)]
|
||||
injections: PathsJSON,
|
||||
#[serde(default)]
|
||||
locals: PathsJSON,
|
||||
#[serde(default)]
|
||||
tags: PathsJSON,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
|
|
@ -394,22 +478,21 @@ impl Loader {
|
|||
});
|
||||
|
||||
let configuration = LanguageConfiguration {
|
||||
root_path: parser_path.to_path_buf(),
|
||||
scope: config_json.scope,
|
||||
language_id,
|
||||
file_types: config_json.file_types.unwrap_or(Vec::new()),
|
||||
content_regex: config_json
|
||||
.content_regex
|
||||
.and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()),
|
||||
_first_line_regex: config_json
|
||||
.first_line_regex
|
||||
.and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()),
|
||||
injection_regex: config_json
|
||||
.injection_regex
|
||||
.and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()),
|
||||
highlight_property_sheet_path: config_json
|
||||
.highlights
|
||||
.map(|h| parser_path.join(h)),
|
||||
highlight_property_sheet: OnceCell::new(),
|
||||
content_regex: Self::regex(config_json.content_regex),
|
||||
_first_line_regex: Self::regex(config_json.first_line_regex),
|
||||
injection_regex: Self::regex(config_json.injection_regex),
|
||||
injections_filenames: config_json.injections.into_vec(),
|
||||
locals_filenames: config_json.locals.into_vec(),
|
||||
tags_filenames: config_json.tags.into_vec(),
|
||||
highlights_filenames: config_json.highlights.into_vec(),
|
||||
highlight_config: OnceCell::new(),
|
||||
tags_config: OnceCell::new(),
|
||||
highlight_names: &*self.highlight_names,
|
||||
use_all_highlight_names: self.use_all_highlight_names,
|
||||
};
|
||||
|
||||
for file_type in &configuration.file_types {
|
||||
|
|
@ -419,7 +502,8 @@ impl Loader {
|
|||
.push(self.language_configurations.len());
|
||||
}
|
||||
|
||||
self.language_configurations.push(configuration);
|
||||
self.language_configurations
|
||||
.push(unsafe { mem::transmute(configuration) });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -427,52 +511,184 @@ impl Loader {
|
|||
if self.language_configurations.len() == initial_language_configuration_count
|
||||
&& parser_path.join("src").join("grammar.json").exists()
|
||||
{
|
||||
self.language_configurations.push(LanguageConfiguration {
|
||||
let configuration = LanguageConfiguration {
|
||||
root_path: parser_path.to_owned(),
|
||||
language_id: self.languages_by_id.len(),
|
||||
file_types: Vec::new(),
|
||||
scope: None,
|
||||
content_regex: None,
|
||||
injection_regex: None,
|
||||
file_types: Vec::new(),
|
||||
_first_line_regex: None,
|
||||
highlight_property_sheet_path: None,
|
||||
highlight_property_sheet: OnceCell::new(),
|
||||
});
|
||||
injection_regex: None,
|
||||
injections_filenames: None,
|
||||
locals_filenames: None,
|
||||
highlights_filenames: None,
|
||||
tags_filenames: None,
|
||||
highlight_config: OnceCell::new(),
|
||||
tags_config: OnceCell::new(),
|
||||
highlight_names: &*self.highlight_names,
|
||||
use_all_highlight_names: self.use_all_highlight_names,
|
||||
};
|
||||
self.language_configurations
|
||||
.push(unsafe { mem::transmute(configuration) });
|
||||
self.languages_by_id
|
||||
.push((parser_path.to_owned(), OnceCell::new()));
|
||||
}
|
||||
|
||||
Ok(&self.language_configurations[initial_language_configuration_count..])
|
||||
}
|
||||
|
||||
fn regex(pattern: Option<String>) -> Option<Regex> {
|
||||
pattern.and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok())
|
||||
}
|
||||
}
|
||||
|
||||
impl LanguageConfiguration {
|
||||
pub fn highlight_property_sheet(
|
||||
&self,
|
||||
language: Language,
|
||||
) -> Result<Option<&PropertySheet<Properties>>> {
|
||||
self.highlight_property_sheet
|
||||
impl<'a> LanguageConfiguration<'a> {
|
||||
pub fn highlight_config(&self, language: Language) -> Result<Option<&HighlightConfiguration>> {
|
||||
return self
|
||||
.highlight_config
|
||||
.get_or_try_init(|| {
|
||||
if let Some(path) = &self.highlight_property_sheet_path {
|
||||
let sheet_json = fs::read_to_string(path).map_err(Error::wrap(|| {
|
||||
format!(
|
||||
"Failed to read property sheet {:?}",
|
||||
path.file_name().unwrap()
|
||||
)
|
||||
}))?;
|
||||
let sheet =
|
||||
load_property_sheet(language, &sheet_json).map_err(Error::wrap(|| {
|
||||
format!(
|
||||
"Failed to parse property sheet {:?}",
|
||||
path.file_name().unwrap()
|
||||
)
|
||||
}))?;
|
||||
Ok(Some(sheet))
|
||||
} else {
|
||||
let (highlights_query, highlight_ranges) =
|
||||
self.read_queries(&self.highlights_filenames, "highlights.scm")?;
|
||||
let (injections_query, injection_ranges) =
|
||||
self.read_queries(&self.injections_filenames, "injections.scm")?;
|
||||
let (locals_query, locals_ranges) =
|
||||
self.read_queries(&self.locals_filenames, "locals.scm")?;
|
||||
|
||||
if highlights_query.is_empty() {
|
||||
Ok(None)
|
||||
} else {
|
||||
let mut result = HighlightConfiguration::new(
|
||||
language,
|
||||
&highlights_query,
|
||||
&injections_query,
|
||||
&locals_query,
|
||||
)
|
||||
.map_err(|error| {
|
||||
if error.offset < injections_query.len() {
|
||||
Self::include_path_in_query_error(
|
||||
error,
|
||||
&injection_ranges,
|
||||
&injections_query,
|
||||
0,
|
||||
)
|
||||
} else if error.offset < injections_query.len() + locals_query.len() {
|
||||
Self::include_path_in_query_error(
|
||||
error,
|
||||
&locals_ranges,
|
||||
&locals_query,
|
||||
injections_query.len(),
|
||||
)
|
||||
} else {
|
||||
Self::include_path_in_query_error(
|
||||
error,
|
||||
&highlight_ranges,
|
||||
&highlights_query,
|
||||
injections_query.len() + locals_query.len(),
|
||||
)
|
||||
}
|
||||
})?;
|
||||
let mut all_highlight_names = self.highlight_names.lock().unwrap();
|
||||
if self.use_all_highlight_names {
|
||||
for capture_name in result.query.capture_names() {
|
||||
if !all_highlight_names.contains(capture_name) {
|
||||
all_highlight_names.push(capture_name.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
result.configure(&all_highlight_names);
|
||||
Ok(Some(result))
|
||||
}
|
||||
})
|
||||
.map(Option::as_ref);
|
||||
}
|
||||
|
||||
pub fn tags_config(&self, language: Language) -> Result<Option<&TagsConfiguration>> {
|
||||
self.tags_config
|
||||
.get_or_try_init(|| {
|
||||
let (tags_query, tags_ranges) =
|
||||
self.read_queries(&self.tags_filenames, "tags.scm")?;
|
||||
let (locals_query, locals_ranges) =
|
||||
self.read_queries(&self.locals_filenames, "locals.scm")?;
|
||||
if tags_query.is_empty() {
|
||||
Ok(None)
|
||||
} else {
|
||||
TagsConfiguration::new(language, &tags_query, &locals_query)
|
||||
.map(Some)
|
||||
.map_err(|error| {
|
||||
if let TagsError::Query(error) = error {
|
||||
if error.offset < locals_query.len() {
|
||||
Self::include_path_in_query_error(
|
||||
error,
|
||||
&locals_ranges,
|
||||
&locals_query,
|
||||
0,
|
||||
)
|
||||
} else {
|
||||
Self::include_path_in_query_error(
|
||||
error,
|
||||
&tags_ranges,
|
||||
&tags_query,
|
||||
locals_query.len(),
|
||||
)
|
||||
}
|
||||
.into()
|
||||
} else {
|
||||
error.into()
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
.map(Option::as_ref)
|
||||
}
|
||||
|
||||
fn include_path_in_query_error<'b>(
|
||||
mut error: QueryError,
|
||||
ranges: &'b Vec<(String, Range<usize>)>,
|
||||
source: &str,
|
||||
start_offset: usize,
|
||||
) -> (&'b str, QueryError) {
|
||||
let offset_within_section = error.offset - start_offset;
|
||||
let (path, range) = ranges
|
||||
.iter()
|
||||
.find(|(_, range)| range.contains(&offset_within_section))
|
||||
.unwrap();
|
||||
error.offset = offset_within_section - range.start;
|
||||
error.row = source[range.start..offset_within_section]
|
||||
.chars()
|
||||
.filter(|c| *c == '\n')
|
||||
.count();
|
||||
(path.as_ref(), error)
|
||||
}
|
||||
|
||||
fn read_queries(
|
||||
&self,
|
||||
paths: &Option<Vec<String>>,
|
||||
default_path: &str,
|
||||
) -> Result<(String, Vec<(String, Range<usize>)>)> {
|
||||
let mut query = String::new();
|
||||
let mut path_ranges = Vec::new();
|
||||
if let Some(paths) = paths.as_ref() {
|
||||
for path in paths {
|
||||
let abs_path = self.root_path.join(path);
|
||||
let prev_query_len = query.len();
|
||||
query += &fs::read_to_string(&abs_path).map_err(Error::wrap(|| {
|
||||
format!("Failed to read query file {:?}", path)
|
||||
}))?;
|
||||
path_ranges.push((path.clone(), prev_query_len..query.len()));
|
||||
}
|
||||
} else {
|
||||
let queries_path = self.root_path.join("queries");
|
||||
let path = queries_path.join(default_path);
|
||||
if path.exists() {
|
||||
query = fs::read_to_string(&path).map_err(Error::wrap(|| {
|
||||
format!("Failed to read query file {:?}", path)
|
||||
}))?;
|
||||
path_ranges.push((default_path.to_string(), 0..query.len()));
|
||||
}
|
||||
}
|
||||
|
||||
Ok((query, path_ranges))
|
||||
}
|
||||
}
|
||||
|
||||
fn needs_recompile(
|
||||
|
|
|
|||
382
cli/src/main.rs
382
cli/src/main.rs
|
|
@ -1,10 +1,13 @@
|
|||
use clap::{App, AppSettings, Arg, SubCommand};
|
||||
use error::Error;
|
||||
use glob::glob;
|
||||
use std::path::Path;
|
||||
use std::process::exit;
|
||||
use std::{env, fs, u64};
|
||||
use tree_sitter::Language;
|
||||
use tree_sitter_cli::{
|
||||
config, error, generate, highlight, loader, logger, parse, test, wasm, web_ui,
|
||||
config, error, generate, highlight, loader, logger, parse, query, tags, test, test_highlight,
|
||||
util, wasm, web_ui,
|
||||
};
|
||||
|
||||
const BUILD_VERSION: &'static str = env!("CARGO_PKG_VERSION");
|
||||
|
|
@ -38,8 +41,7 @@ fn run() -> error::Result<()> {
|
|||
.about("Generate a parser")
|
||||
.arg(Arg::with_name("grammar-path").index(1))
|
||||
.arg(Arg::with_name("log").long("log"))
|
||||
.arg(Arg::with_name("next-abi").long("next-abi"))
|
||||
.arg(Arg::with_name("properties-only").long("properties"))
|
||||
.arg(Arg::with_name("prev-abi").long("prev-abi"))
|
||||
.arg(
|
||||
Arg::with_name("report-states-for-rule")
|
||||
.long("report-states-for-rule")
|
||||
|
|
@ -50,19 +52,20 @@ fn run() -> error::Result<()> {
|
|||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("parse")
|
||||
.about("Parse a file")
|
||||
.about("Parse files")
|
||||
.arg(Arg::with_name("paths-file").long("paths").takes_value(true))
|
||||
.arg(
|
||||
Arg::with_name("path")
|
||||
Arg::with_name("paths")
|
||||
.index(1)
|
||||
.multiple(true)
|
||||
.required(true),
|
||||
.required(false),
|
||||
)
|
||||
.arg(Arg::with_name("scope").long("scope").takes_value(true))
|
||||
.arg(Arg::with_name("debug").long("debug").short("d"))
|
||||
.arg(Arg::with_name("debug-graph").long("debug-graph").short("D"))
|
||||
.arg(Arg::with_name("quiet").long("quiet").short("q"))
|
||||
.arg(Arg::with_name("stat").long("stat").short("s"))
|
||||
.arg(Arg::with_name("time").long("time").short("t"))
|
||||
.arg(Arg::with_name("allow-cancellation").long("cancel"))
|
||||
.arg(Arg::with_name("timeout").long("timeout").takes_value(true))
|
||||
.arg(
|
||||
Arg::with_name("edits")
|
||||
|
|
@ -73,6 +76,40 @@ fn run() -> error::Result<()> {
|
|||
.number_of_values(1),
|
||||
),
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("query")
|
||||
.about("Search files using a syntax tree query")
|
||||
.arg(Arg::with_name("query-path").index(1).required(true))
|
||||
.arg(Arg::with_name("paths-file").long("paths").takes_value(true))
|
||||
.arg(
|
||||
Arg::with_name("paths")
|
||||
.index(2)
|
||||
.multiple(true)
|
||||
.required(false),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("byte-range")
|
||||
.help("The range of byte offsets in which the query will be executed")
|
||||
.long("byte-range")
|
||||
.takes_value(true),
|
||||
)
|
||||
.arg(Arg::with_name("scope").long("scope").takes_value(true))
|
||||
.arg(Arg::with_name("captures").long("captures").short("c"))
|
||||
.arg(Arg::with_name("test").long("test")),
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("tags")
|
||||
.arg(Arg::with_name("quiet").long("quiet").short("q"))
|
||||
.arg(Arg::with_name("time").long("time").short("t"))
|
||||
.arg(Arg::with_name("scope").long("scope").takes_value(true))
|
||||
.arg(Arg::with_name("paths-file").long("paths").takes_value(true))
|
||||
.arg(
|
||||
Arg::with_name("paths")
|
||||
.help("The source file to use")
|
||||
.index(1)
|
||||
.multiple(true),
|
||||
),
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("test")
|
||||
.about("Run a parser's tests")
|
||||
|
|
@ -89,15 +126,17 @@ fn run() -> error::Result<()> {
|
|||
.subcommand(
|
||||
SubCommand::with_name("highlight")
|
||||
.about("Highlight a file")
|
||||
.arg(Arg::with_name("paths-file").long("paths").takes_value(true))
|
||||
.arg(
|
||||
Arg::with_name("path")
|
||||
Arg::with_name("paths")
|
||||
.index(1)
|
||||
.multiple(true)
|
||||
.required(true),
|
||||
.required(false),
|
||||
)
|
||||
.arg(Arg::with_name("scope").long("scope").takes_value(true))
|
||||
.arg(Arg::with_name("html").long("html").short("h"))
|
||||
.arg(Arg::with_name("time").long("time").short("t")),
|
||||
.arg(Arg::with_name("time").long("time").short("t"))
|
||||
.arg(Arg::with_name("quiet").long("quiet").short("q")),
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("build-wasm")
|
||||
|
|
@ -110,7 +149,14 @@ fn run() -> error::Result<()> {
|
|||
.arg(Arg::with_name("path").index(1).multiple(true)),
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("web-ui").about("Test a parser interactively in the browser"),
|
||||
SubCommand::with_name("web-ui")
|
||||
.about("Test a parser interactively in the browser")
|
||||
.arg(
|
||||
Arg::with_name("quiet")
|
||||
.long("quiet")
|
||||
.short("q")
|
||||
.help("open in default browser"),
|
||||
),
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("dump-languages")
|
||||
|
|
@ -128,7 +174,6 @@ fn run() -> error::Result<()> {
|
|||
config.save(&home_dir)?;
|
||||
} else if let Some(matches) = matches.subcommand_matches("generate") {
|
||||
let grammar_path = matches.value_of("grammar-path");
|
||||
let properties_only = matches.is_present("properties-only");
|
||||
let report_symbol_name = matches.value_of("report-states-for-rule").or_else(|| {
|
||||
if matches.is_present("report-states") {
|
||||
Some("")
|
||||
|
|
@ -139,24 +184,40 @@ fn run() -> error::Result<()> {
|
|||
if matches.is_present("log") {
|
||||
logger::init();
|
||||
}
|
||||
let next_abi = matches.is_present("next-abi");
|
||||
let prev_abi = matches.is_present("prev-abi");
|
||||
generate::generate_parser_in_directory(
|
||||
¤t_dir,
|
||||
grammar_path,
|
||||
properties_only,
|
||||
next_abi,
|
||||
!prev_abi,
|
||||
report_symbol_name,
|
||||
)?;
|
||||
} else if let Some(matches) = matches.subcommand_matches("test") {
|
||||
let debug = matches.is_present("debug");
|
||||
let debug_graph = matches.is_present("debug-graph");
|
||||
let filter = matches.value_of("filter");
|
||||
let update = matches.is_present("update");
|
||||
let corpus_path = current_dir.join("corpus");
|
||||
if let Some(language) = loader.languages_at_path(¤t_dir)?.first() {
|
||||
test::run_tests_at_path(*language, &corpus_path, debug, debug_graph, filter, update)?;
|
||||
} else {
|
||||
eprintln!("No language found");
|
||||
let filter = matches.value_of("filter");
|
||||
let languages = loader.languages_at_path(¤t_dir)?;
|
||||
let language = languages
|
||||
.first()
|
||||
.ok_or_else(|| "No language found".to_string())?;
|
||||
let test_dir = current_dir.join("test");
|
||||
|
||||
// Run the corpus tests. Look for them at two paths: `test/corpus` and `corpus`.
|
||||
let mut test_corpus_dir = test_dir.join("corpus");
|
||||
if !test_corpus_dir.is_dir() {
|
||||
test_corpus_dir = current_dir.join("corpus");
|
||||
}
|
||||
if test_corpus_dir.is_dir() {
|
||||
test::run_tests_at_path(*language, &test_corpus_dir, debug, debug_graph, filter, update)?;
|
||||
}
|
||||
|
||||
// Check that all of the queries are valid.
|
||||
test::check_queries_at_path(*language, ¤t_dir.join("queries"))?;
|
||||
|
||||
// Run the syntax highlighting tests.
|
||||
let test_highlight_dir = test_dir.join("highlight");
|
||||
if test_highlight_dir.is_dir() {
|
||||
test_highlight::test_highlights(&loader, &test_highlight_dir)?;
|
||||
}
|
||||
} else if let Some(matches) = matches.subcommand_matches("parse") {
|
||||
let debug = matches.is_present("debug");
|
||||
|
|
@ -166,56 +227,27 @@ fn run() -> error::Result<()> {
|
|||
let edits = matches
|
||||
.values_of("edits")
|
||||
.map_or(Vec::new(), |e| e.collect());
|
||||
let allow_cancellation = matches.is_present("allow-cancellation");
|
||||
let cancellation_flag = util::cancel_on_stdin();
|
||||
|
||||
let timeout = matches
|
||||
.value_of("timeout")
|
||||
.map_or(0, |t| u64::from_str_radix(t, 10).unwrap());
|
||||
loader.find_all_languages(&config.parser_directories)?;
|
||||
let paths = matches
|
||||
.values_of("path")
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
|
||||
|
||||
let max_path_length = paths.iter().map(|p| p.chars().count()).max().unwrap();
|
||||
let mut has_error = false;
|
||||
loader.find_all_languages(&config.parser_directories)?;
|
||||
|
||||
let should_track_stats = matches.is_present("stat");
|
||||
let mut stats = parse::Stats::default();
|
||||
|
||||
for path in paths {
|
||||
let path = Path::new(path);
|
||||
let language = if let Some(scope) = matches.value_of("scope") {
|
||||
if let Some(config) =
|
||||
loader
|
||||
.language_configuration_for_scope(scope)
|
||||
.map_err(Error::wrap(|| {
|
||||
format!("Failed to load language for scope '{}'", scope)
|
||||
}))?
|
||||
{
|
||||
config.0
|
||||
} else {
|
||||
return Error::err(format!("Unknown scope '{}'", scope));
|
||||
}
|
||||
} else if let Some((lang, _)) = loader
|
||||
.language_configuration_for_file_name(path)
|
||||
.map_err(Error::wrap(|| {
|
||||
format!(
|
||||
"Failed to load language for file name {:?}",
|
||||
path.file_name().unwrap()
|
||||
)
|
||||
}))?
|
||||
{
|
||||
lang
|
||||
} else if let Some(lang) = loader
|
||||
.languages_at_path(¤t_dir)
|
||||
.map_err(Error::wrap(|| {
|
||||
"Failed to load language in current directory"
|
||||
}))?
|
||||
.first()
|
||||
.cloned()
|
||||
{
|
||||
lang
|
||||
} else {
|
||||
eprintln!("No language found");
|
||||
return Ok(());
|
||||
};
|
||||
has_error |= parse::parse_file_at_path(
|
||||
let path = Path::new(&path);
|
||||
let language =
|
||||
select_language(&mut loader, path, ¤t_dir, matches.value_of("scope"))?;
|
||||
|
||||
let this_file_errored = parse::parse_file_at_path(
|
||||
language,
|
||||
path,
|
||||
&edits,
|
||||
|
|
@ -225,36 +257,86 @@ fn run() -> error::Result<()> {
|
|||
timeout,
|
||||
debug,
|
||||
debug_graph,
|
||||
allow_cancellation,
|
||||
Some(&cancellation_flag),
|
||||
)?;
|
||||
|
||||
if should_track_stats {
|
||||
stats.total_parses += 1;
|
||||
if !this_file_errored {
|
||||
stats.successful_parses += 1;
|
||||
}
|
||||
}
|
||||
|
||||
has_error |= this_file_errored;
|
||||
}
|
||||
|
||||
if should_track_stats {
|
||||
println!("{}", stats)
|
||||
}
|
||||
|
||||
if has_error {
|
||||
return Error::err(String::new());
|
||||
}
|
||||
} else if let Some(matches) = matches.subcommand_matches("query") {
|
||||
let ordered_captures = matches.values_of("captures").is_some();
|
||||
let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
|
||||
loader.find_all_languages(&config.parser_directories)?;
|
||||
let language = select_language(
|
||||
&mut loader,
|
||||
Path::new(&paths[0]),
|
||||
¤t_dir,
|
||||
matches.value_of("scope"),
|
||||
)?;
|
||||
let query_path = Path::new(matches.value_of("query-path").unwrap());
|
||||
let range = matches.value_of("byte-range").map(|br| {
|
||||
let r: Vec<&str> = br.split(":").collect();
|
||||
(r[0].parse().unwrap(), r[1].parse().unwrap())
|
||||
});
|
||||
let should_test = matches.is_present("test");
|
||||
query::query_files_at_paths(
|
||||
language,
|
||||
paths,
|
||||
query_path,
|
||||
ordered_captures,
|
||||
range,
|
||||
should_test,
|
||||
)?;
|
||||
} else if let Some(matches) = matches.subcommand_matches("tags") {
|
||||
loader.find_all_languages(&config.parser_directories)?;
|
||||
let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
|
||||
tags::generate_tags(
|
||||
&loader,
|
||||
matches.value_of("scope"),
|
||||
&paths,
|
||||
matches.is_present("quiet"),
|
||||
matches.is_present("time"),
|
||||
)?;
|
||||
} else if let Some(matches) = matches.subcommand_matches("highlight") {
|
||||
let paths = matches.values_of("path").unwrap().into_iter();
|
||||
let html_mode = matches.is_present("html");
|
||||
let time = matches.is_present("time");
|
||||
loader.configure_highlights(&config.theme.highlight_names);
|
||||
loader.find_all_languages(&config.parser_directories)?;
|
||||
|
||||
if html_mode {
|
||||
let time = matches.is_present("time");
|
||||
let quiet = matches.is_present("quiet");
|
||||
let html_mode = quiet || matches.is_present("html");
|
||||
let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
|
||||
|
||||
if html_mode && !quiet {
|
||||
println!("{}", highlight::HTML_HEADER);
|
||||
}
|
||||
|
||||
let language_config;
|
||||
let cancellation_flag = util::cancel_on_stdin();
|
||||
|
||||
let mut lang = None;
|
||||
if let Some(scope) = matches.value_of("scope") {
|
||||
language_config = loader.language_configuration_for_scope(scope)?;
|
||||
if language_config.is_none() {
|
||||
lang = loader.language_configuration_for_scope(scope)?;
|
||||
if lang.is_none() {
|
||||
return Error::err(format!("Unknown scope '{}'", scope));
|
||||
}
|
||||
} else {
|
||||
language_config = None;
|
||||
}
|
||||
|
||||
for path in paths {
|
||||
let path = Path::new(path);
|
||||
let (language, language_config) = match language_config {
|
||||
let path = Path::new(&path);
|
||||
let (language, language_config) = match lang {
|
||||
Some(v) => v,
|
||||
None => match loader.language_configuration_for_file_name(path)? {
|
||||
Some(v) => v,
|
||||
|
|
@ -265,30 +347,56 @@ fn run() -> error::Result<()> {
|
|||
},
|
||||
};
|
||||
|
||||
if let Some(sheet) = language_config.highlight_property_sheet(language)? {
|
||||
if let Some(highlight_config) = language_config.highlight_config(language)? {
|
||||
let source = fs::read(path)?;
|
||||
if html_mode {
|
||||
highlight::html(&loader, &config.theme, &source, language, sheet)?;
|
||||
highlight::html(
|
||||
&loader,
|
||||
&config.theme,
|
||||
&source,
|
||||
highlight_config,
|
||||
quiet,
|
||||
time,
|
||||
)?;
|
||||
} else {
|
||||
highlight::ansi(&loader, &config.theme, &source, language, sheet, time)?;
|
||||
highlight::ansi(
|
||||
&loader,
|
||||
&config.theme,
|
||||
&source,
|
||||
highlight_config,
|
||||
time,
|
||||
Some(&cancellation_flag),
|
||||
)?;
|
||||
}
|
||||
} else {
|
||||
return Error::err(format!("No syntax highlighting property sheet specified"));
|
||||
eprintln!("No syntax highlighting config found for path {:?}", path);
|
||||
}
|
||||
}
|
||||
|
||||
if html_mode && !quiet {
|
||||
println!("{}", highlight::HTML_FOOTER);
|
||||
}
|
||||
} else if let Some(matches) = matches.subcommand_matches("build-wasm") {
|
||||
let grammar_path = current_dir.join(matches.value_of("path").unwrap_or(""));
|
||||
wasm::compile_language_to_wasm(&grammar_path, matches.is_present("docker"))?;
|
||||
} else if matches.subcommand_matches("web-ui").is_some() {
|
||||
web_ui::serve(¤t_dir);
|
||||
} else if let Some(matches) = matches.subcommand_matches("web-ui") {
|
||||
let open_in_browser = !matches.is_present("quiet");
|
||||
web_ui::serve(¤t_dir, open_in_browser);
|
||||
} else if matches.subcommand_matches("dump-languages").is_some() {
|
||||
loader.find_all_languages(&config.parser_directories)?;
|
||||
for (configuration, language_path) in loader.get_all_language_configurations() {
|
||||
println!(
|
||||
"scope: {}\nparser: {:?}\nproperties: {:?}\nfile_types: {:?}\ncontent_regex: {:?}\ninjection_regex: {:?}\n",
|
||||
concat!(
|
||||
"scope: {}\n",
|
||||
"parser: {:?}\n",
|
||||
"highlights: {:?}\n",
|
||||
"file_types: {:?}\n",
|
||||
"content_regex: {:?}\n",
|
||||
"injection_regex: {:?}\n",
|
||||
),
|
||||
configuration.scope.as_ref().unwrap_or(&String::new()),
|
||||
language_path,
|
||||
configuration.highlight_property_sheet_path,
|
||||
configuration.highlights_filenames,
|
||||
configuration.file_types,
|
||||
configuration.content_regex,
|
||||
configuration.injection_regex,
|
||||
|
|
@ -298,3 +406,107 @@ fn run() -> error::Result<()> {
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn collect_paths<'a>(
|
||||
paths_file: Option<&str>,
|
||||
paths: Option<impl Iterator<Item = &'a str>>,
|
||||
) -> error::Result<Vec<String>> {
|
||||
if let Some(paths_file) = paths_file {
|
||||
return Ok(fs::read_to_string(paths_file)
|
||||
.map_err(Error::wrap(|| {
|
||||
format!("Failed to read paths file {}", paths_file)
|
||||
}))?
|
||||
.trim()
|
||||
.split_ascii_whitespace()
|
||||
.map(String::from)
|
||||
.collect::<Vec<_>>());
|
||||
}
|
||||
|
||||
if let Some(paths) = paths {
|
||||
let mut result = Vec::new();
|
||||
|
||||
let mut incorporate_path = |path: &str, positive| {
|
||||
if positive {
|
||||
result.push(path.to_string());
|
||||
} else {
|
||||
if let Some(index) = result.iter().position(|p| p == path) {
|
||||
result.remove(index);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
for mut path in paths {
|
||||
let mut positive = true;
|
||||
if path.starts_with("!") {
|
||||
positive = false;
|
||||
path = path.trim_start_matches("!");
|
||||
}
|
||||
|
||||
if Path::new(path).exists() {
|
||||
incorporate_path(path, positive);
|
||||
} else {
|
||||
let paths = glob(path)
|
||||
.map_err(Error::wrap(|| format!("Invalid glob pattern {:?}", path)))?;
|
||||
for path in paths {
|
||||
if let Some(path) = path?.to_str() {
|
||||
incorporate_path(path, positive);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if result.is_empty() {
|
||||
Error::err(
|
||||
"No files were found at or matched by the provided pathname/glob".to_string(),
|
||||
)?;
|
||||
}
|
||||
|
||||
return Ok(result);
|
||||
}
|
||||
|
||||
Err(Error::new("Must provide one or more paths".to_string()))
|
||||
}
|
||||
|
||||
fn select_language(
|
||||
loader: &mut loader::Loader,
|
||||
path: &Path,
|
||||
current_dir: &Path,
|
||||
scope: Option<&str>,
|
||||
) -> Result<Language, Error> {
|
||||
if let Some(scope) = scope {
|
||||
if let Some(config) =
|
||||
loader
|
||||
.language_configuration_for_scope(scope)
|
||||
.map_err(Error::wrap(|| {
|
||||
format!("Failed to load language for scope '{}'", scope)
|
||||
}))?
|
||||
{
|
||||
Ok(config.0)
|
||||
} else {
|
||||
return Error::err(format!("Unknown scope '{}'", scope));
|
||||
}
|
||||
} else if let Some((lang, _)) =
|
||||
loader
|
||||
.language_configuration_for_file_name(path)
|
||||
.map_err(Error::wrap(|| {
|
||||
format!(
|
||||
"Failed to load language for file name {:?}",
|
||||
path.file_name().unwrap()
|
||||
)
|
||||
}))?
|
||||
{
|
||||
Ok(lang)
|
||||
} else if let Some(lang) = loader
|
||||
.languages_at_path(¤t_dir)
|
||||
.map_err(Error::wrap(|| {
|
||||
"Failed to load language in current directory"
|
||||
}))?
|
||||
.first()
|
||||
.cloned()
|
||||
{
|
||||
Ok(lang)
|
||||
} else {
|
||||
eprintln!("No language found");
|
||||
Error::err("No language found".to_string())
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,9 +2,9 @@ use super::error::{Error, Result};
|
|||
use super::util;
|
||||
use std::io::{self, Write};
|
||||
use std::path::Path;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
use std::time::Instant;
|
||||
use std::{fs, thread, usize};
|
||||
use std::{fmt, fs, usize};
|
||||
use tree_sitter::{InputEdit, Language, LogType, Parser, Point, Tree};
|
||||
|
||||
#[derive(Debug)]
|
||||
|
|
@ -14,6 +14,22 @@ pub struct Edit {
|
|||
pub inserted_text: Vec<u8>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Stats {
|
||||
pub successful_parses: usize,
|
||||
pub total_parses: usize,
|
||||
}
|
||||
|
||||
impl fmt::Display for Stats {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
return writeln!(f, "Total parses: {}; successful parses: {}; failed parses: {}; success percentage: {:.2}%",
|
||||
self.total_parses,
|
||||
self.successful_parses,
|
||||
self.total_parses - self.successful_parses,
|
||||
(self.successful_parses as f64) / (self.total_parses as f64) * 100.0);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_file_at_path(
|
||||
language: Language,
|
||||
path: &Path,
|
||||
|
|
@ -24,7 +40,7 @@ pub fn parse_file_at_path(
|
|||
timeout: u64,
|
||||
debug: bool,
|
||||
debug_graph: bool,
|
||||
allow_cancellation: bool,
|
||||
cancellation_flag: Option<&AtomicUsize>,
|
||||
) -> Result<bool> {
|
||||
let mut _log_session = None;
|
||||
let mut parser = Parser::new();
|
||||
|
|
@ -35,16 +51,7 @@ pub fn parse_file_at_path(
|
|||
|
||||
// If the `--cancel` flag was passed, then cancel the parse
|
||||
// when the user types a newline.
|
||||
if allow_cancellation {
|
||||
let flag = Box::new(AtomicUsize::new(0));
|
||||
unsafe { parser.set_cancellation_flag(Some(&flag)) };
|
||||
thread::spawn(move || {
|
||||
let mut line = String::new();
|
||||
io::stdin().read_line(&mut line).unwrap();
|
||||
eprintln!("Cancelling");
|
||||
flag.store(1, Ordering::Relaxed);
|
||||
});
|
||||
}
|
||||
unsafe { parser.set_cancellation_flag(cancellation_flag) };
|
||||
|
||||
// Set a timeout based on the `--time` flag.
|
||||
parser.set_timeout_micros(timeout);
|
||||
|
|
@ -70,10 +77,18 @@ pub fn parse_file_at_path(
|
|||
let mut stdout = stdout.lock();
|
||||
|
||||
if let Some(mut tree) = tree {
|
||||
for edit in edits {
|
||||
if debug_graph && !edits.is_empty() {
|
||||
println!("BEFORE:\n{}", String::from_utf8_lossy(&source_code));
|
||||
}
|
||||
|
||||
for (i, edit) in edits.iter().enumerate() {
|
||||
let edit = parse_edit_flag(&source_code, edit)?;
|
||||
perform_edit(&mut tree, &mut source_code, &edit);
|
||||
tree = parser.parse(&source_code, Some(&tree)).unwrap();
|
||||
|
||||
if debug_graph {
|
||||
println!("AFTER {}:\n{}", i, String::from_utf8_lossy(&source_code));
|
||||
}
|
||||
}
|
||||
|
||||
let duration = time.elapsed();
|
||||
|
|
|
|||
100
cli/src/query.rs
Normal file
100
cli/src/query.rs
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
use super::error::{Error, Result};
|
||||
use crate::query_testing;
|
||||
use std::fs;
|
||||
use std::io::{self, Write};
|
||||
use std::path::Path;
|
||||
use tree_sitter::{Language, Node, Parser, Query, QueryCursor};
|
||||
|
||||
pub fn query_files_at_paths(
|
||||
language: Language,
|
||||
paths: Vec<String>,
|
||||
query_path: &Path,
|
||||
ordered_captures: bool,
|
||||
range: Option<(usize, usize)>,
|
||||
should_test: bool,
|
||||
) -> Result<()> {
|
||||
let stdout = io::stdout();
|
||||
let mut stdout = stdout.lock();
|
||||
|
||||
let query_source = fs::read_to_string(query_path).map_err(Error::wrap(|| {
|
||||
format!("Error reading query file {:?}", query_path)
|
||||
}))?;
|
||||
let query = Query::new(language, &query_source)
|
||||
.map_err(|e| Error::new(format!("Query compilation failed: {:?}", e)))?;
|
||||
|
||||
let mut query_cursor = QueryCursor::new();
|
||||
if let Some((beg, end)) = range {
|
||||
query_cursor.set_byte_range(beg, end);
|
||||
}
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(language).map_err(|e| e.to_string())?;
|
||||
|
||||
for path in paths {
|
||||
let mut results = Vec::new();
|
||||
|
||||
writeln!(&mut stdout, "{}", path)?;
|
||||
|
||||
let source_code = fs::read(&path).map_err(Error::wrap(|| {
|
||||
format!("Error reading source file {:?}", path)
|
||||
}))?;
|
||||
let text_callback = |n: Node| &source_code[n.byte_range()];
|
||||
let tree = parser.parse(&source_code, None).unwrap();
|
||||
|
||||
if ordered_captures {
|
||||
for (mat, capture_index) in
|
||||
query_cursor.captures(&query, tree.root_node(), text_callback)
|
||||
{
|
||||
let capture = mat.captures[capture_index];
|
||||
let capture_name = &query.capture_names()[capture.index as usize];
|
||||
writeln!(
|
||||
&mut stdout,
|
||||
" pattern: {}, capture: {}, row: {}, text: {:?}",
|
||||
mat.pattern_index,
|
||||
capture_name,
|
||||
capture.node.start_position().row,
|
||||
capture.node.utf8_text(&source_code).unwrap_or("")
|
||||
)?;
|
||||
results.push(query_testing::CaptureInfo {
|
||||
name: capture_name.to_string(),
|
||||
start: capture.node.start_position(),
|
||||
end: capture.node.end_position(),
|
||||
});
|
||||
}
|
||||
} else {
|
||||
for m in query_cursor.matches(&query, tree.root_node(), text_callback) {
|
||||
writeln!(&mut stdout, " pattern: {}", m.pattern_index)?;
|
||||
for capture in m.captures {
|
||||
let start = capture.node.start_position();
|
||||
let end = capture.node.end_position();
|
||||
let capture_name = &query.capture_names()[capture.index as usize];
|
||||
if end.row == start.row {
|
||||
writeln!(
|
||||
&mut stdout,
|
||||
" capture: {}, start: {}, text: {:?}",
|
||||
capture_name,
|
||||
start,
|
||||
capture.node.utf8_text(&source_code).unwrap_or("")
|
||||
)?;
|
||||
} else {
|
||||
writeln!(
|
||||
&mut stdout,
|
||||
" capture: {}, start: {}, end: {}",
|
||||
capture_name, start, end,
|
||||
)?;
|
||||
}
|
||||
results.push(query_testing::CaptureInfo {
|
||||
name: capture_name.to_string(),
|
||||
start: capture.node.start_position(),
|
||||
end: capture.node.end_position(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
if should_test {
|
||||
query_testing::assert_expected_captures(results, path, &mut parser, language)?
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
150
cli/src/query_testing.rs
Normal file
150
cli/src/query_testing.rs
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
use crate::error;
|
||||
use crate::error::Result;
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
use std::fs;
|
||||
use tree_sitter::{Language, Parser, Point};
|
||||
|
||||
lazy_static! {
|
||||
static ref CAPTURE_NAME_REGEX: Regex = Regex::new("[\\w_\\-.]+").unwrap();
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
pub struct CaptureInfo {
|
||||
pub name: String,
|
||||
pub start: Point,
|
||||
pub end: Point,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub struct Assertion {
|
||||
pub position: Point,
|
||||
pub expected_capture_name: String,
|
||||
}
|
||||
|
||||
/// Parse the given source code, finding all of the comments that contain
|
||||
/// highlighting assertions. Return a vector of (position, expected highlight name)
|
||||
/// pairs.
|
||||
pub fn parse_position_comments(
|
||||
parser: &mut Parser,
|
||||
language: Language,
|
||||
source: &[u8],
|
||||
) -> Result<Vec<Assertion>> {
|
||||
let mut result = Vec::new();
|
||||
let mut assertion_ranges = Vec::new();
|
||||
|
||||
// Parse the code.
|
||||
parser.set_included_ranges(&[]).unwrap();
|
||||
parser.set_language(language).unwrap();
|
||||
let tree = parser.parse(source, None).unwrap();
|
||||
|
||||
// Walk the tree, finding comment nodes that contain assertions.
|
||||
let mut ascending = false;
|
||||
let mut cursor = tree.root_node().walk();
|
||||
loop {
|
||||
if ascending {
|
||||
let node = cursor.node();
|
||||
|
||||
// Find every comment node.
|
||||
if node.kind().contains("comment") {
|
||||
if let Ok(text) = node.utf8_text(source) {
|
||||
let mut position = node.start_position();
|
||||
if position.row == 0 {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Find the arrow character ("^" or '<-") in the comment. A left arrow
|
||||
// refers to the column where the comment node starts. An up arrow refers
|
||||
// to its own column.
|
||||
let mut has_left_caret = false;
|
||||
let mut has_arrow = false;
|
||||
let mut arrow_end = 0;
|
||||
for (i, c) in text.char_indices() {
|
||||
arrow_end = i + 1;
|
||||
if c == '-' && has_left_caret {
|
||||
has_arrow = true;
|
||||
break;
|
||||
}
|
||||
if c == '^' {
|
||||
has_arrow = true;
|
||||
position.column += i;
|
||||
break;
|
||||
}
|
||||
has_left_caret = c == '<';
|
||||
}
|
||||
|
||||
// If the comment node contains an arrow and a highlight name, record the
|
||||
// highlight name and the position.
|
||||
if let (true, Some(mat)) =
|
||||
(has_arrow, CAPTURE_NAME_REGEX.find(&text[arrow_end..]))
|
||||
{
|
||||
assertion_ranges.push((node.start_position(), node.end_position()));
|
||||
result.push(Assertion {
|
||||
position: position,
|
||||
expected_capture_name: mat.as_str().to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Continue walking the tree.
|
||||
if cursor.goto_next_sibling() {
|
||||
ascending = false;
|
||||
} else if !cursor.goto_parent() {
|
||||
break;
|
||||
}
|
||||
} else if !cursor.goto_first_child() {
|
||||
ascending = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Adjust the row number in each assertion's position to refer to the line of
|
||||
// code *above* the assertion. There can be multiple lines of assertion comments,
|
||||
// so the positions may have to be decremented by more than one row.
|
||||
let mut i = 0;
|
||||
for assertion in result.iter_mut() {
|
||||
loop {
|
||||
let on_assertion_line = assertion_ranges[i..]
|
||||
.iter()
|
||||
.any(|(start, _)| start.row == assertion.position.row);
|
||||
if on_assertion_line {
|
||||
assertion.position.row -= 1;
|
||||
} else {
|
||||
while i < assertion_ranges.len()
|
||||
&& assertion_ranges[i].0.row < assertion.position.row
|
||||
{
|
||||
i += 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The assertions can end up out of order due to the line adjustments.
|
||||
result.sort_unstable_by_key(|a| a.position);
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn assert_expected_captures(
|
||||
infos: Vec<CaptureInfo>,
|
||||
path: String,
|
||||
parser: &mut Parser,
|
||||
language: Language,
|
||||
) -> Result<()> {
|
||||
let contents = fs::read_to_string(path)?;
|
||||
let pairs = parse_position_comments(parser, language, contents.as_bytes())?;
|
||||
for info in &infos {
|
||||
if let Some(found) = pairs.iter().find(|p| {
|
||||
p.position.row == info.start.row && p.position >= info.start && p.position < info.end
|
||||
}) {
|
||||
if found.expected_capture_name != info.name && info.name != "name" {
|
||||
Err(error::Error::new(format!(
|
||||
"Assertion failed: at {}, found {}, expected {}",
|
||||
info.start, found.expected_capture_name, info.name
|
||||
)))?
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
98
cli/src/tags.rs
Normal file
98
cli/src/tags.rs
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
use super::loader::Loader;
|
||||
use super::util;
|
||||
use crate::error::{Error, Result};
|
||||
use std::io::{self, Write};
|
||||
use std::path::Path;
|
||||
use std::time::Instant;
|
||||
use std::{fs, str};
|
||||
use tree_sitter_tags::TagsContext;
|
||||
|
||||
pub fn generate_tags(
|
||||
loader: &Loader,
|
||||
scope: Option<&str>,
|
||||
paths: &[String],
|
||||
quiet: bool,
|
||||
time: bool,
|
||||
) -> Result<()> {
|
||||
let mut lang = None;
|
||||
if let Some(scope) = scope {
|
||||
lang = loader.language_configuration_for_scope(scope)?;
|
||||
if lang.is_none() {
|
||||
return Error::err(format!("Unknown scope '{}'", scope));
|
||||
}
|
||||
}
|
||||
|
||||
let mut context = TagsContext::new();
|
||||
let cancellation_flag = util::cancel_on_stdin();
|
||||
let stdout = io::stdout();
|
||||
let mut stdout = stdout.lock();
|
||||
|
||||
for path in paths {
|
||||
let path = Path::new(&path);
|
||||
let (language, language_config) = match lang {
|
||||
Some(v) => v,
|
||||
None => match loader.language_configuration_for_file_name(path)? {
|
||||
Some(v) => v,
|
||||
None => {
|
||||
eprintln!("No language found for path {:?}", path);
|
||||
continue;
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
if let Some(tags_config) = language_config.tags_config(language)? {
|
||||
let indent;
|
||||
if paths.len() > 1 {
|
||||
if !quiet {
|
||||
writeln!(&mut stdout, "{}", path.to_string_lossy())?;
|
||||
}
|
||||
indent = "\t"
|
||||
} else {
|
||||
indent = "";
|
||||
};
|
||||
|
||||
let source = fs::read(path)?;
|
||||
let t0 = Instant::now();
|
||||
for tag in context
|
||||
.generate_tags(tags_config, &source, Some(&cancellation_flag))?
|
||||
.0
|
||||
{
|
||||
let tag = tag?;
|
||||
if !quiet {
|
||||
write!(
|
||||
&mut stdout,
|
||||
"{}{:<10}\t | {:<8}\t{} {} - {} `{}`",
|
||||
indent,
|
||||
str::from_utf8(&source[tag.name_range]).unwrap_or(""),
|
||||
&tags_config.syntax_type_name(tag.syntax_type_id),
|
||||
if tag.is_definition { "def" } else { "ref" },
|
||||
tag.span.start,
|
||||
tag.span.end,
|
||||
str::from_utf8(&source[tag.line_range]).unwrap_or(""),
|
||||
)?;
|
||||
if let Some(docs) = tag.docs {
|
||||
if docs.len() > 120 {
|
||||
write!(&mut stdout, "\t{:?}...", docs.get(0..120).unwrap_or(""))?;
|
||||
} else {
|
||||
write!(&mut stdout, "\t{:?}", &docs)?;
|
||||
}
|
||||
}
|
||||
writeln!(&mut stdout, "")?;
|
||||
}
|
||||
}
|
||||
|
||||
if time {
|
||||
writeln!(
|
||||
&mut stdout,
|
||||
"{}time: {}ms",
|
||||
indent,
|
||||
t0.elapsed().as_millis(),
|
||||
)?;
|
||||
}
|
||||
} else {
|
||||
eprintln!("No tags config found for path {:?}", path);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -11,7 +11,7 @@ use std::fs;
|
|||
use std::io::{self, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::str;
|
||||
use tree_sitter::{Language, LogType, Parser};
|
||||
use tree_sitter::{Language, LogType, Parser, Query};
|
||||
|
||||
lazy_static! {
|
||||
static ref HEADER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^===+\r?\n([^=]*)\r?\n===+\r?\n")
|
||||
|
|
@ -112,6 +112,24 @@ pub fn run_tests_at_path(
|
|||
}
|
||||
}
|
||||
|
||||
pub fn check_queries_at_path(language: Language, path: &Path) -> Result<()> {
|
||||
if path.exists() {
|
||||
for entry in fs::read_dir(path)? {
|
||||
let entry = entry?;
|
||||
let filepath = entry.file_name();
|
||||
let filepath = filepath.to_str().unwrap_or("");
|
||||
let hidden = filepath.starts_with(".");
|
||||
if !hidden {
|
||||
let content = fs::read_to_string(entry.path()).map_err(Error::wrap(|| {
|
||||
format!("Error reading query file {:?}", entry.file_name())
|
||||
}))?;
|
||||
Query::new(language, &content).map_err(|e| (filepath, e))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn print_diff_key() {
|
||||
println!(
|
||||
"\n{} / {}",
|
||||
|
|
|
|||
275
cli/src/test_highlight.rs
Normal file
275
cli/src/test_highlight.rs
Normal file
|
|
@ -0,0 +1,275 @@
|
|||
use super::error::Result;
|
||||
use crate::loader::Loader;
|
||||
use crate::query_testing::{parse_position_comments, Assertion};
|
||||
use ansi_term::Colour;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use tree_sitter::Point;
|
||||
use tree_sitter_highlight::{Highlight, HighlightConfiguration, HighlightEvent, Highlighter};
|
||||
|
||||
pub struct Failure {
|
||||
row: usize,
|
||||
column: usize,
|
||||
expected_highlight: String,
|
||||
actual_highlights: Vec<String>,
|
||||
}
|
||||
|
||||
impl Failure {
|
||||
pub fn message(&self) -> String {
|
||||
let mut result = format!(
|
||||
"Failure - row: {}, column: {}, expected highlight '{}', actual highlights: ",
|
||||
self.row, self.column, self.expected_highlight
|
||||
);
|
||||
if self.actual_highlights.is_empty() {
|
||||
result += "none.";
|
||||
} else {
|
||||
for (i, actual_highlight) in self.actual_highlights.iter().enumerate() {
|
||||
if i > 0 {
|
||||
result += ", ";
|
||||
}
|
||||
result += "'";
|
||||
result += actual_highlight;
|
||||
result += "'";
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
pub fn test_highlights(loader: &Loader, directory: &Path) -> Result<()> {
|
||||
let mut failed = false;
|
||||
let mut highlighter = Highlighter::new();
|
||||
|
||||
println!("syntax highlighting:");
|
||||
for highlight_test_file in fs::read_dir(directory)? {
|
||||
let highlight_test_file = highlight_test_file?;
|
||||
let test_file_path = highlight_test_file.path();
|
||||
let test_file_name = highlight_test_file.file_name();
|
||||
let (language, language_config) = loader
|
||||
.language_configuration_for_file_name(&test_file_path)?
|
||||
.ok_or_else(|| format!("No language found for path {:?}", test_file_path))?;
|
||||
let highlight_config = language_config
|
||||
.highlight_config(language)?
|
||||
.ok_or_else(|| format!("No highlighting config found for {:?}", test_file_path))?;
|
||||
match test_highlight(
|
||||
&loader,
|
||||
&mut highlighter,
|
||||
highlight_config,
|
||||
fs::read(&test_file_path)?.as_slice(),
|
||||
) {
|
||||
Ok(assertion_count) => {
|
||||
println!(
|
||||
" ✓ {} ({} assertions)",
|
||||
Colour::Green.paint(test_file_name.to_string_lossy().as_ref()),
|
||||
assertion_count
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
println!(
|
||||
" ✗ {}",
|
||||
Colour::Red.paint(test_file_name.to_string_lossy().as_ref())
|
||||
);
|
||||
println!(" {}", e.message());
|
||||
failed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if failed {
|
||||
Err(String::new().into())
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
pub fn iterate_assertions(
|
||||
assertions: &Vec<Assertion>,
|
||||
highlights: &Vec<(Point, Point, Highlight)>,
|
||||
highlight_names: &Vec<String>,
|
||||
) -> Result<usize> {
|
||||
// Iterate through all of the highlighting assertions, checking each one against the
|
||||
// actual highlights.
|
||||
let mut i = 0;
|
||||
let mut actual_highlights = Vec::<&String>::new();
|
||||
for Assertion {
|
||||
position,
|
||||
expected_capture_name: expected_highlight,
|
||||
} in assertions
|
||||
{
|
||||
let mut passed = false;
|
||||
actual_highlights.clear();
|
||||
|
||||
'highlight_loop: loop {
|
||||
// The assertions are ordered by position, so skip past all of the highlights that
|
||||
// end at or before this assertion's position.
|
||||
if let Some(highlight) = highlights.get(i) {
|
||||
if highlight.1 <= *position {
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Iterate through all of the highlights that start at or before this assertion's,
|
||||
// position, looking for one that matches the assertion.
|
||||
let mut j = i;
|
||||
while let (false, Some(highlight)) = (passed, highlights.get(j)) {
|
||||
if highlight.0 > *position {
|
||||
break 'highlight_loop;
|
||||
}
|
||||
|
||||
// If the highlight matches the assertion, this test passes. Otherwise,
|
||||
// add this highlight to the list of actual highlights that span the
|
||||
// assertion's position, in order to generate an error message in the event
|
||||
// of a failure.
|
||||
let highlight_name = &highlight_names[(highlight.2).0];
|
||||
if *highlight_name == *expected_highlight {
|
||||
passed = true;
|
||||
break 'highlight_loop;
|
||||
} else {
|
||||
actual_highlights.push(highlight_name);
|
||||
}
|
||||
|
||||
j += 1;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if !passed {
|
||||
return Err(Failure {
|
||||
row: position.row,
|
||||
column: position.column,
|
||||
expected_highlight: expected_highlight.clone(),
|
||||
actual_highlights: actual_highlights.into_iter().cloned().collect(),
|
||||
}
|
||||
.into());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(assertions.len())
|
||||
}
|
||||
|
||||
pub fn test_highlight(
|
||||
loader: &Loader,
|
||||
highlighter: &mut Highlighter,
|
||||
highlight_config: &HighlightConfiguration,
|
||||
source: &[u8],
|
||||
) -> Result<usize> {
|
||||
// Highlight the file, and parse out all of the highlighting assertions.
|
||||
let highlight_names = loader.highlight_names();
|
||||
let highlights = get_highlight_positions(loader, highlighter, highlight_config, source)?;
|
||||
let assertions =
|
||||
parse_position_comments(highlighter.parser(), highlight_config.language, source)?;
|
||||
|
||||
iterate_assertions(&assertions, &highlights, &highlight_names)?;
|
||||
|
||||
// Iterate through all of the highlighting assertions, checking each one against the
|
||||
// actual highlights.
|
||||
let mut i = 0;
|
||||
let mut actual_highlights = Vec::<&String>::new();
|
||||
for Assertion {
|
||||
position,
|
||||
expected_capture_name: expected_highlight,
|
||||
} in &assertions
|
||||
{
|
||||
let mut passed = false;
|
||||
actual_highlights.clear();
|
||||
|
||||
'highlight_loop: loop {
|
||||
// The assertions are ordered by position, so skip past all of the highlights that
|
||||
// end at or before this assertion's position.
|
||||
if let Some(highlight) = highlights.get(i) {
|
||||
if highlight.1 <= *position {
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Iterate through all of the highlights that start at or before this assertion's,
|
||||
// position, looking for one that matches the assertion.
|
||||
let mut j = i;
|
||||
while let (false, Some(highlight)) = (passed, highlights.get(j)) {
|
||||
if highlight.0 > *position {
|
||||
break 'highlight_loop;
|
||||
}
|
||||
|
||||
// If the highlight matches the assertion, this test passes. Otherwise,
|
||||
// add this highlight to the list of actual highlights that span the
|
||||
// assertion's position, in order to generate an error message in the event
|
||||
// of a failure.
|
||||
let highlight_name = &highlight_names[(highlight.2).0];
|
||||
if *highlight_name == *expected_highlight {
|
||||
passed = true;
|
||||
break 'highlight_loop;
|
||||
} else {
|
||||
actual_highlights.push(highlight_name);
|
||||
}
|
||||
|
||||
j += 1;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if !passed {
|
||||
return Err(Failure {
|
||||
row: position.row,
|
||||
column: position.column,
|
||||
expected_highlight: expected_highlight.clone(),
|
||||
actual_highlights: actual_highlights.into_iter().cloned().collect(),
|
||||
}
|
||||
.into());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(assertions.len())
|
||||
}
|
||||
|
||||
pub fn get_highlight_positions(
|
||||
loader: &Loader,
|
||||
highlighter: &mut Highlighter,
|
||||
highlight_config: &HighlightConfiguration,
|
||||
source: &[u8],
|
||||
) -> Result<Vec<(Point, Point, Highlight)>> {
|
||||
let mut row = 0;
|
||||
let mut column = 0;
|
||||
let mut byte_offset = 0;
|
||||
let mut was_newline = false;
|
||||
let mut result = Vec::new();
|
||||
let mut highlight_stack = Vec::new();
|
||||
let source = String::from_utf8_lossy(source);
|
||||
let mut char_indices = source.char_indices();
|
||||
for event in highlighter.highlight(highlight_config, source.as_bytes(), None, |string| {
|
||||
loader.highlight_config_for_injection_string(string)
|
||||
})? {
|
||||
match event? {
|
||||
HighlightEvent::HighlightStart(h) => highlight_stack.push(h),
|
||||
HighlightEvent::HighlightEnd => {
|
||||
highlight_stack.pop();
|
||||
}
|
||||
HighlightEvent::Source { start, end } => {
|
||||
let mut start_position = Point::new(row, column);
|
||||
while byte_offset < end {
|
||||
if byte_offset <= start {
|
||||
start_position = Point::new(row, column);
|
||||
}
|
||||
if let Some((i, c)) = char_indices.next() {
|
||||
if was_newline {
|
||||
row += 1;
|
||||
column = 0;
|
||||
} else {
|
||||
column += i - byte_offset;
|
||||
}
|
||||
was_newline = c == '\n';
|
||||
byte_offset = i;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if let Some(highlight) = highlight_stack.last() {
|
||||
result.push((start_position, Point::new(row, column), *highlight))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
|
@ -21,7 +21,11 @@ const LANGUAGES: &'static [&'static str] = &[
|
|||
"go",
|
||||
"html",
|
||||
"javascript",
|
||||
"json",
|
||||
"php",
|
||||
"python",
|
||||
"ruby",
|
||||
"rust",
|
||||
];
|
||||
|
||||
lazy_static! {
|
||||
|
|
@ -57,7 +61,11 @@ fn test_real_language_corpus_files() {
|
|||
}
|
||||
|
||||
let language = get_language(language_name);
|
||||
let corpus_dir = grammars_dir.join(language_name).join("corpus");
|
||||
let mut corpus_dir = grammars_dir.join(language_name).join("corpus");
|
||||
if !corpus_dir.is_dir() {
|
||||
corpus_dir = grammars_dir.join(language_name).join("test").join("corpus");
|
||||
}
|
||||
|
||||
let error_corpus_file = error_corpus_dir.join(&format!("{}_errors.txt", language_name));
|
||||
let main_tests = parse_tests(&corpus_dir).unwrap();
|
||||
let error_tests = parse_tests(&error_corpus_file).unwrap_or(TestEntry::default());
|
||||
|
|
@ -300,7 +308,8 @@ fn check_consistent_sizes(tree: &Tree, input: &Vec<u8>) {
|
|||
let mut last_child_end_point = start_point;
|
||||
let mut some_child_has_changes = false;
|
||||
let mut actual_named_child_count = 0;
|
||||
for child in node.children() {
|
||||
for i in 0..node.child_count() {
|
||||
let child = node.child(i).unwrap();
|
||||
assert!(child.start_byte() >= last_child_end_byte);
|
||||
assert!(child.start_position() >= last_child_end_point);
|
||||
check(child, line_offsets);
|
||||
|
|
|
|||
|
|
@ -51,6 +51,12 @@ pub fn stop_recording() {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn record(f: impl FnOnce()) {
|
||||
start_recording();
|
||||
f();
|
||||
stop_recording();
|
||||
}
|
||||
|
||||
fn record_alloc(ptr: *mut c_void) {
|
||||
let mut recorder = RECORDER.lock();
|
||||
if recorder.enabled {
|
||||
|
|
|
|||
|
|
@ -2,8 +2,8 @@ use crate::loader::Loader;
|
|||
use lazy_static::lazy_static;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use tree_sitter::{Language, PropertySheet};
|
||||
use tree_sitter_highlight::{load_property_sheet, Properties};
|
||||
use tree_sitter::Language;
|
||||
use tree_sitter_highlight::HighlightConfiguration;
|
||||
|
||||
include!("./dirs.rs");
|
||||
|
||||
|
|
@ -11,6 +11,10 @@ lazy_static! {
|
|||
static ref TEST_LOADER: Loader = Loader::new(SCRATCH_DIR.clone());
|
||||
}
|
||||
|
||||
pub fn test_loader<'a>() -> &'a Loader {
|
||||
&*TEST_LOADER
|
||||
}
|
||||
|
||||
pub fn fixtures_dir<'a>() -> &'static Path {
|
||||
&FIXTURES_DIR
|
||||
}
|
||||
|
|
@ -21,18 +25,33 @@ pub fn get_language(name: &str) -> Language {
|
|||
.unwrap()
|
||||
}
|
||||
|
||||
pub fn get_property_sheet_json(language_name: &str, sheet_name: &str) -> String {
|
||||
let path = GRAMMARS_DIR
|
||||
.join(language_name)
|
||||
.join("src")
|
||||
.join(sheet_name);
|
||||
fs::read_to_string(path).unwrap()
|
||||
pub fn get_language_queries_path(language_name: &str) -> PathBuf {
|
||||
GRAMMARS_DIR.join(language_name).join("queries")
|
||||
}
|
||||
|
||||
pub fn get_property_sheet(language_name: &str, sheet_name: &str) -> PropertySheet<Properties> {
|
||||
let json = get_property_sheet_json(language_name, sheet_name);
|
||||
pub fn get_highlight_config(
|
||||
language_name: &str,
|
||||
injection_query_filename: Option<&str>,
|
||||
highlight_names: &[String],
|
||||
) -> HighlightConfiguration {
|
||||
let language = get_language(language_name);
|
||||
load_property_sheet(language, &json).unwrap()
|
||||
let queries_path = get_language_queries_path(language_name);
|
||||
let highlights_query = fs::read_to_string(queries_path.join("highlights.scm")).unwrap();
|
||||
let injections_query = if let Some(injection_query_filename) = injection_query_filename {
|
||||
fs::read_to_string(queries_path.join(injection_query_filename)).unwrap()
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
let locals_query = fs::read_to_string(queries_path.join("locals.scm")).unwrap_or(String::new());
|
||||
let mut result = HighlightConfiguration::new(
|
||||
language,
|
||||
&highlights_query,
|
||||
&injections_query,
|
||||
&locals_query,
|
||||
)
|
||||
.unwrap();
|
||||
result.configure(highlight_names);
|
||||
result
|
||||
}
|
||||
|
||||
pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> Language {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
pub(super) mod allocations;
|
||||
pub(super) mod edits;
|
||||
pub(super) mod fixtures;
|
||||
pub(super) mod random;
|
||||
pub(super) mod scope_sequence;
|
||||
pub(super) mod edits;
|
||||
|
|
|
|||
|
|
@ -1,32 +1,92 @@
|
|||
use super::helpers::fixtures::{get_language, get_property_sheet, get_property_sheet_json};
|
||||
use super::helpers::fixtures::{get_highlight_config, get_language, get_language_queries_path};
|
||||
use lazy_static::lazy_static;
|
||||
use std::ffi::CString;
|
||||
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::{ptr, slice, str};
|
||||
use tree_sitter::{Language, PropertySheet};
|
||||
use std::{fs, ptr, slice, str};
|
||||
use tree_sitter_highlight::{
|
||||
c, highlight, highlight_html, Error, Highlight, HighlightEvent, Properties,
|
||||
c, Error, Highlight, HighlightConfiguration, HighlightEvent, Highlighter, HtmlRenderer,
|
||||
};
|
||||
|
||||
lazy_static! {
|
||||
static ref JS_SHEET: PropertySheet<Properties> =
|
||||
get_property_sheet("javascript", "highlights.json");
|
||||
static ref HTML_SHEET: PropertySheet<Properties> =
|
||||
get_property_sheet("html", "highlights.json");
|
||||
static ref EJS_SHEET: PropertySheet<Properties> =
|
||||
get_property_sheet("embedded-template", "highlights-ejs.json");
|
||||
static ref RUST_SHEET: PropertySheet<Properties> =
|
||||
get_property_sheet("rust", "highlights.json");
|
||||
static ref SCOPE_CLASS_STRINGS: Vec<String> = {
|
||||
let mut result = Vec::new();
|
||||
let mut i = 0;
|
||||
while let Some(highlight) = Highlight::from_usize(i) {
|
||||
result.push(format!("class={:?}", highlight));
|
||||
i += 1;
|
||||
}
|
||||
result
|
||||
};
|
||||
static ref JS_HIGHLIGHT: HighlightConfiguration =
|
||||
get_highlight_config("javascript", Some("injections.scm"), &HIGHLIGHT_NAMES);
|
||||
static ref JSDOC_HIGHLIGHT: HighlightConfiguration =
|
||||
get_highlight_config("jsdoc", None, &HIGHLIGHT_NAMES);
|
||||
static ref HTML_HIGHLIGHT: HighlightConfiguration =
|
||||
get_highlight_config("html", Some("injections.scm"), &HIGHLIGHT_NAMES);
|
||||
static ref EJS_HIGHLIGHT: HighlightConfiguration = get_highlight_config(
|
||||
"embedded-template",
|
||||
Some("injections-ejs.scm"),
|
||||
&HIGHLIGHT_NAMES
|
||||
);
|
||||
static ref RUST_HIGHLIGHT: HighlightConfiguration =
|
||||
get_highlight_config("rust", Some("injections.scm"), &HIGHLIGHT_NAMES);
|
||||
static ref HIGHLIGHT_NAMES: Vec<String> = [
|
||||
"attribute",
|
||||
"carriage-return",
|
||||
"comment",
|
||||
"constant",
|
||||
"constructor",
|
||||
"function.builtin",
|
||||
"function",
|
||||
"embedded",
|
||||
"keyword",
|
||||
"operator",
|
||||
"property.builtin",
|
||||
"property",
|
||||
"punctuation",
|
||||
"punctuation.bracket",
|
||||
"punctuation.delimiter",
|
||||
"punctuation.special",
|
||||
"string",
|
||||
"tag",
|
||||
"type.builtin",
|
||||
"type",
|
||||
"variable.builtin",
|
||||
"variable.parameter",
|
||||
"variable",
|
||||
]
|
||||
.iter()
|
||||
.cloned()
|
||||
.map(String::from)
|
||||
.collect();
|
||||
static ref HTML_ATTRS: Vec<String> = HIGHLIGHT_NAMES
|
||||
.iter()
|
||||
.map(|s| format!("class={}", s))
|
||||
.collect();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_highlighting_javascript() {
|
||||
let source = "const a = function(b) { return b + c; }";
|
||||
assert_eq!(
|
||||
&to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
|
||||
&[vec![
|
||||
("const", vec!["keyword"]),
|
||||
(" ", vec![]),
|
||||
("a", vec!["function"]),
|
||||
(" ", vec![]),
|
||||
("=", vec!["operator"]),
|
||||
(" ", vec![]),
|
||||
("function", vec!["keyword"]),
|
||||
("(", vec!["punctuation.bracket"]),
|
||||
("b", vec!["variable.parameter"]),
|
||||
(")", vec!["punctuation.bracket"]),
|
||||
(" ", vec![]),
|
||||
("{", vec!["punctuation.bracket"]),
|
||||
(" ", vec![]),
|
||||
("return", vec!["keyword"]),
|
||||
(" ", vec![]),
|
||||
("b", vec!["variable.parameter"]),
|
||||
(" ", vec![]),
|
||||
("+", vec!["operator"]),
|
||||
(" ", vec![]),
|
||||
("c", vec!["variable"]),
|
||||
(";", vec!["punctuation.delimiter"]),
|
||||
(" ", vec![]),
|
||||
("}", vec!["punctuation.bracket"]),
|
||||
]]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -34,57 +94,65 @@ fn test_highlighting_injected_html_in_javascript() {
|
|||
let source = vec!["const s = html `<div>${a < b}</div>`;"].join("\n");
|
||||
|
||||
assert_eq!(
|
||||
&to_token_vector(&source, get_language("javascript"), &JS_SHEET).unwrap(),
|
||||
&to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
|
||||
&[vec![
|
||||
("const", vec![Highlight::Keyword]),
|
||||
("const", vec!["keyword"]),
|
||||
(" ", vec![]),
|
||||
("s", vec![Highlight::Variable]),
|
||||
("s", vec!["variable"]),
|
||||
(" ", vec![]),
|
||||
("=", vec![Highlight::Operator]),
|
||||
("=", vec!["operator"]),
|
||||
(" ", vec![]),
|
||||
("html", vec![Highlight::Function]),
|
||||
("html", vec!["function"]),
|
||||
(" ", vec![]),
|
||||
("`<", vec![Highlight::String]),
|
||||
("div", vec![Highlight::String, Highlight::Tag]),
|
||||
(">", vec![Highlight::String]),
|
||||
(
|
||||
"${",
|
||||
vec![
|
||||
Highlight::String,
|
||||
Highlight::Embedded,
|
||||
Highlight::PunctuationSpecial
|
||||
]
|
||||
),
|
||||
(
|
||||
"a",
|
||||
vec![Highlight::String, Highlight::Embedded, Highlight::Variable]
|
||||
),
|
||||
(" ", vec![Highlight::String, Highlight::Embedded]),
|
||||
(
|
||||
"<",
|
||||
vec![Highlight::String, Highlight::Embedded, Highlight::Operator]
|
||||
),
|
||||
(" ", vec![Highlight::String, Highlight::Embedded]),
|
||||
(
|
||||
"b",
|
||||
vec![Highlight::String, Highlight::Embedded, Highlight::Variable]
|
||||
),
|
||||
(
|
||||
"}",
|
||||
vec![
|
||||
Highlight::String,
|
||||
Highlight::Embedded,
|
||||
Highlight::PunctuationSpecial
|
||||
]
|
||||
),
|
||||
("</", vec![Highlight::String]),
|
||||
("div", vec![Highlight::String, Highlight::Tag]),
|
||||
(">`", vec![Highlight::String]),
|
||||
(";", vec![Highlight::PunctuationDelimiter]),
|
||||
("`", vec!["string"]),
|
||||
("<", vec!["string", "punctuation.bracket"]),
|
||||
("div", vec!["string", "tag"]),
|
||||
(">", vec!["string", "punctuation.bracket"]),
|
||||
("${", vec!["string", "embedded", "punctuation.special"]),
|
||||
("a", vec!["string", "embedded", "variable"]),
|
||||
(" ", vec!["string", "embedded"]),
|
||||
("<", vec!["string", "embedded", "operator"]),
|
||||
(" ", vec!["string", "embedded"]),
|
||||
("b", vec!["string", "embedded", "variable"]),
|
||||
("}", vec!["string", "embedded", "punctuation.special"]),
|
||||
("</", vec!["string", "punctuation.bracket"]),
|
||||
("div", vec!["string", "tag"]),
|
||||
(">", vec!["string", "punctuation.bracket"]),
|
||||
("`", vec!["string"]),
|
||||
(";", vec!["punctuation.delimiter"]),
|
||||
]]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_highlighting_injected_javascript_in_html_mini() {
|
||||
let source = "<script>const x = new Thing();</script>";
|
||||
|
||||
assert_eq!(
|
||||
&to_token_vector(source, &HTML_HIGHLIGHT).unwrap(),
|
||||
&[vec![
|
||||
("<", vec!["punctuation.bracket"]),
|
||||
("script", vec!["tag"]),
|
||||
(">", vec!["punctuation.bracket"]),
|
||||
("const", vec!["keyword"]),
|
||||
(" ", vec![]),
|
||||
("x", vec!["variable"]),
|
||||
(" ", vec![]),
|
||||
("=", vec!["operator"]),
|
||||
(" ", vec![]),
|
||||
("new", vec!["keyword"]),
|
||||
(" ", vec![]),
|
||||
("Thing", vec!["constructor"]),
|
||||
("(", vec!["punctuation.bracket"]),
|
||||
(")", vec!["punctuation.bracket"]),
|
||||
(";", vec!["punctuation.delimiter"]),
|
||||
("</", vec!["punctuation.bracket"]),
|
||||
("script", vec!["tag"]),
|
||||
(">", vec!["punctuation.bracket"]),
|
||||
],]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_highlighting_injected_javascript_in_html() {
|
||||
let source = vec![
|
||||
|
|
@ -97,38 +165,44 @@ fn test_highlighting_injected_javascript_in_html() {
|
|||
.join("\n");
|
||||
|
||||
assert_eq!(
|
||||
&to_token_vector(&source, get_language("html"), &HTML_SHEET).unwrap(),
|
||||
&to_token_vector(&source, &HTML_HIGHLIGHT).unwrap(),
|
||||
&[
|
||||
vec![("<", vec![]), ("body", vec![Highlight::Tag]), (">", vec![]),],
|
||||
vec![
|
||||
(" <", vec![]),
|
||||
("script", vec![Highlight::Tag]),
|
||||
(">", vec![]),
|
||||
("<", vec!["punctuation.bracket"]),
|
||||
("body", vec!["tag"]),
|
||||
(">", vec!["punctuation.bracket"]),
|
||||
],
|
||||
vec![
|
||||
(" ", vec![]),
|
||||
("<", vec!["punctuation.bracket"]),
|
||||
("script", vec!["tag"]),
|
||||
(">", vec!["punctuation.bracket"]),
|
||||
],
|
||||
vec![
|
||||
(" ", vec![]),
|
||||
("const", vec![Highlight::Keyword]),
|
||||
("const", vec!["keyword"]),
|
||||
(" ", vec![]),
|
||||
("x", vec![Highlight::Variable]),
|
||||
("x", vec!["variable"]),
|
||||
(" ", vec![]),
|
||||
("=", vec![Highlight::Operator]),
|
||||
("=", vec!["operator"]),
|
||||
(" ", vec![]),
|
||||
("new", vec![Highlight::Keyword]),
|
||||
("new", vec!["keyword"]),
|
||||
(" ", vec![]),
|
||||
("Thing", vec![Highlight::Constructor]),
|
||||
("(", vec![Highlight::PunctuationBracket]),
|
||||
(")", vec![Highlight::PunctuationBracket]),
|
||||
(";", vec![Highlight::PunctuationDelimiter]),
|
||||
("Thing", vec!["constructor"]),
|
||||
("(", vec!["punctuation.bracket"]),
|
||||
(")", vec!["punctuation.bracket"]),
|
||||
(";", vec!["punctuation.delimiter"]),
|
||||
],
|
||||
vec![
|
||||
(" </", vec![]),
|
||||
("script", vec![Highlight::Tag]),
|
||||
(">", vec![]),
|
||||
(" ", vec![]),
|
||||
("</", vec!["punctuation.bracket"]),
|
||||
("script", vec!["tag"]),
|
||||
(">", vec!["punctuation.bracket"]),
|
||||
],
|
||||
vec![
|
||||
("</", vec![]),
|
||||
("body", vec![Highlight::Tag]),
|
||||
(">", vec![]),
|
||||
("</", vec!["punctuation.bracket"]),
|
||||
("body", vec!["tag"]),
|
||||
(">", vec!["punctuation.bracket"]),
|
||||
],
|
||||
]
|
||||
);
|
||||
|
|
@ -147,13 +221,13 @@ fn test_highlighting_multiline_nodes_to_html() {
|
|||
.join("\n");
|
||||
|
||||
assert_eq!(
|
||||
&to_html(&source, get_language("javascript"), &JS_SHEET,).unwrap(),
|
||||
&to_html(&source, &JS_HIGHLIGHT).unwrap(),
|
||||
&[
|
||||
"<span class=Keyword>const</span> <span class=Constant>SOMETHING</span> <span class=Operator>=</span> <span class=String>`</span>\n".to_string(),
|
||||
"<span class=String> one <span class=Embedded><span class=PunctuationSpecial>${</span></span></span>\n".to_string(),
|
||||
"<span class=String><span class=Embedded> <span class=Function>two</span><span class=PunctuationBracket>(</span><span class=PunctuationBracket>)</span></span></span>\n".to_string(),
|
||||
"<span class=String><span class=Embedded> <span class=PunctuationSpecial>}</span></span> three</span>\n".to_string(),
|
||||
"<span class=String>`</span>\n".to_string(),
|
||||
"<span class=keyword>const</span> <span class=constant>SOMETHING</span> <span class=operator>=</span> <span class=string>`</span>\n".to_string(),
|
||||
"<span class=string> one <span class=embedded><span class=punctuation.special>${</span></span></span>\n".to_string(),
|
||||
"<span class=string><span class=embedded> <span class=function>two</span><span class=punctuation.bracket>(</span><span class=punctuation.bracket>)</span></span></span>\n".to_string(),
|
||||
"<span class=string><span class=embedded> <span class=punctuation.special>}</span></span> three</span>\n".to_string(),
|
||||
"<span class=string>`</span>\n".to_string(),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
|
@ -169,51 +243,51 @@ fn test_highlighting_with_local_variable_tracking() {
|
|||
.join("\n");
|
||||
|
||||
assert_eq!(
|
||||
&to_token_vector(&source, get_language("javascript"), &JS_SHEET).unwrap(),
|
||||
&to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
|
||||
&[
|
||||
vec![
|
||||
("module", vec![Highlight::VariableBuiltin]),
|
||||
(".", vec![Highlight::PunctuationDelimiter]),
|
||||
("exports", vec![Highlight::Property]),
|
||||
("module", vec!["variable.builtin"]),
|
||||
(".", vec!["punctuation.delimiter"]),
|
||||
("exports", vec!["function"]),
|
||||
(" ", vec![]),
|
||||
("=", vec![Highlight::Operator]),
|
||||
("=", vec!["operator"]),
|
||||
(" ", vec![]),
|
||||
("function", vec![Highlight::Keyword]),
|
||||
("function", vec!["keyword"]),
|
||||
(" ", vec![]),
|
||||
("a", vec![Highlight::Function]),
|
||||
("(", vec![Highlight::PunctuationBracket]),
|
||||
("b", vec![Highlight::VariableParameter]),
|
||||
(")", vec![Highlight::PunctuationBracket]),
|
||||
("a", vec!["function"]),
|
||||
("(", vec!["punctuation.bracket"]),
|
||||
("b", vec!["variable.parameter"]),
|
||||
(")", vec!["punctuation.bracket"]),
|
||||
(" ", vec![]),
|
||||
("{", vec![Highlight::PunctuationBracket])
|
||||
("{", vec!["punctuation.bracket"])
|
||||
],
|
||||
vec![
|
||||
(" ", vec![]),
|
||||
("const", vec![Highlight::Keyword]),
|
||||
("const", vec!["keyword"]),
|
||||
(" ", vec![]),
|
||||
("module", vec![Highlight::Variable]),
|
||||
("module", vec!["variable"]),
|
||||
(" ", vec![]),
|
||||
("=", vec![Highlight::Operator]),
|
||||
("=", vec!["operator"]),
|
||||
(" ", vec![]),
|
||||
("c", vec![Highlight::Variable]),
|
||||
(";", vec![Highlight::PunctuationDelimiter])
|
||||
("c", vec!["variable"]),
|
||||
(";", vec!["punctuation.delimiter"])
|
||||
],
|
||||
vec![
|
||||
(" ", vec![]),
|
||||
("console", vec![Highlight::VariableBuiltin]),
|
||||
(".", vec![Highlight::PunctuationDelimiter]),
|
||||
("log", vec![Highlight::Function]),
|
||||
("(", vec![Highlight::PunctuationBracket]),
|
||||
("console", vec!["variable.builtin"]),
|
||||
(".", vec!["punctuation.delimiter"]),
|
||||
("log", vec!["function"]),
|
||||
("(", vec!["punctuation.bracket"]),
|
||||
// Not a builtin, because `module` was defined as a variable above.
|
||||
("module", vec![Highlight::Variable]),
|
||||
(",", vec![Highlight::PunctuationDelimiter]),
|
||||
("module", vec!["variable"]),
|
||||
(",", vec!["punctuation.delimiter"]),
|
||||
(" ", vec![]),
|
||||
// A parameter, because `b` was defined as a parameter above.
|
||||
("b", vec![Highlight::VariableParameter]),
|
||||
(")", vec![Highlight::PunctuationBracket]),
|
||||
(";", vec![Highlight::PunctuationDelimiter]),
|
||||
("b", vec!["variable.parameter"]),
|
||||
(")", vec!["punctuation.bracket"]),
|
||||
(";", vec!["punctuation.delimiter"]),
|
||||
],
|
||||
vec![("}", vec![Highlight::PunctuationBracket])]
|
||||
vec![("}", vec!["punctuation.bracket"])]
|
||||
],
|
||||
);
|
||||
}
|
||||
|
|
@ -234,41 +308,95 @@ fn test_highlighting_empty_lines() {
|
|||
.join("\n");
|
||||
|
||||
assert_eq!(
|
||||
&to_html(&source, get_language("javascript"), &JS_SHEET,).unwrap(),
|
||||
&to_html(&source, &JS_HIGHLIGHT,).unwrap(),
|
||||
&[
|
||||
"<span class=Keyword>class</span> <span class=Constructor>A</span> <span class=PunctuationBracket>{</span>\n".to_string(),
|
||||
"<span class=keyword>class</span> <span class=constructor>A</span> <span class=punctuation.bracket>{</span>\n".to_string(),
|
||||
"\n".to_string(),
|
||||
" <span class=Function>b</span><span class=PunctuationBracket>(</span><span class=VariableParameter>c</span><span class=PunctuationBracket>)</span> <span class=PunctuationBracket>{</span>\n".to_string(),
|
||||
" <span class=function>b</span><span class=punctuation.bracket>(</span><span class=variable.parameter>c</span><span class=punctuation.bracket>)</span> <span class=punctuation.bracket>{</span>\n".to_string(),
|
||||
"\n".to_string(),
|
||||
" <span class=Function>d</span><span class=PunctuationBracket>(</span><span class=Variable>e</span><span class=PunctuationBracket>)</span>\n".to_string(),
|
||||
" <span class=function>d</span><span class=punctuation.bracket>(</span><span class=variable>e</span><span class=punctuation.bracket>)</span>\n".to_string(),
|
||||
"\n".to_string(),
|
||||
" <span class=PunctuationBracket>}</span>\n".to_string(),
|
||||
" <span class=punctuation.bracket>}</span>\n".to_string(),
|
||||
"\n".to_string(),
|
||||
"<span class=PunctuationBracket>}</span>\n".to_string(),
|
||||
"<span class=punctuation.bracket>}</span>\n".to_string(),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_highlighting_ejs() {
|
||||
let source = vec!["<div><% foo() %></div>"].join("\n");
|
||||
fn test_highlighting_carriage_returns() {
|
||||
let source = "a = \"a\rb\"\r\nb\r";
|
||||
|
||||
assert_eq!(
|
||||
&to_token_vector(&source, get_language("embedded-template"), &EJS_SHEET).unwrap(),
|
||||
&to_html(&source, &JS_HIGHLIGHT).unwrap(),
|
||||
&[
|
||||
"<span class=variable>a</span> <span class=operator>=</span> <span class=string>"a<span class=carriage-return></span>b"</span>\n",
|
||||
"<span class=variable>b</span>\n",
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_highlighting_ejs_with_html_and_javascript() {
|
||||
let source = vec!["<div><% foo() %></div><script> bar() </script>"].join("\n");
|
||||
|
||||
assert_eq!(
|
||||
&to_token_vector(&source, &EJS_HIGHLIGHT).unwrap(),
|
||||
&[[
|
||||
("<", vec![]),
|
||||
("div", vec![Highlight::Tag]),
|
||||
(">", vec![]),
|
||||
("<%", vec![Highlight::Keyword]),
|
||||
("<", vec!["punctuation.bracket"]),
|
||||
("div", vec!["tag"]),
|
||||
(">", vec!["punctuation.bracket"]),
|
||||
("<%", vec!["keyword"]),
|
||||
(" ", vec![]),
|
||||
("foo", vec![Highlight::Function]),
|
||||
("(", vec![Highlight::PunctuationBracket]),
|
||||
(")", vec![Highlight::PunctuationBracket]),
|
||||
("foo", vec!["function"]),
|
||||
("(", vec!["punctuation.bracket"]),
|
||||
(")", vec!["punctuation.bracket"]),
|
||||
(" ", vec![]),
|
||||
("%>", vec![Highlight::Keyword]),
|
||||
("</", vec![]),
|
||||
("div", vec![Highlight::Tag]),
|
||||
(">", vec![])
|
||||
("%>", vec!["keyword"]),
|
||||
("</", vec!["punctuation.bracket"]),
|
||||
("div", vec!["tag"]),
|
||||
(">", vec!["punctuation.bracket"]),
|
||||
("<", vec!["punctuation.bracket"]),
|
||||
("script", vec!["tag"]),
|
||||
(">", vec!["punctuation.bracket"]),
|
||||
(" ", vec![]),
|
||||
("bar", vec!["function"]),
|
||||
("(", vec!["punctuation.bracket"]),
|
||||
(")", vec!["punctuation.bracket"]),
|
||||
(" ", vec![]),
|
||||
("</", vec!["punctuation.bracket"]),
|
||||
("script", vec!["tag"]),
|
||||
(">", vec!["punctuation.bracket"]),
|
||||
]],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_highlighting_javascript_with_jsdoc() {
|
||||
// Regression test: the middle comment has no highlights. This should not prevent
|
||||
// later injections from highlighting properly.
|
||||
let source = vec!["a /* @see a */ b; /* nothing */ c; /* @see b */"].join("\n");
|
||||
|
||||
assert_eq!(
|
||||
&to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
|
||||
&[[
|
||||
("a", vec!["variable"]),
|
||||
(" ", vec![]),
|
||||
("/* ", vec!["comment"]),
|
||||
("@see", vec!["comment", "keyword"]),
|
||||
(" a */", vec!["comment"]),
|
||||
(" ", vec![]),
|
||||
("b", vec!["variable"]),
|
||||
(";", vec!["punctuation.delimiter"]),
|
||||
(" ", vec![]),
|
||||
("/* nothing */", vec!["comment"]),
|
||||
(" ", vec![]),
|
||||
("c", vec!["variable"]),
|
||||
(";", vec!["punctuation.delimiter"]),
|
||||
(" ", vec![]),
|
||||
("/* ", vec!["comment"]),
|
||||
("@see", vec!["comment", "keyword"]),
|
||||
(" b */", vec!["comment"])
|
||||
]],
|
||||
);
|
||||
}
|
||||
|
|
@ -278,33 +406,36 @@ fn test_highlighting_with_content_children_included() {
|
|||
let source = vec!["assert!(", " a.b.c() < D::e::<F>()", ");"].join("\n");
|
||||
|
||||
assert_eq!(
|
||||
&to_token_vector(&source, get_language("rust"), &RUST_SHEET).unwrap(),
|
||||
&to_token_vector(&source, &RUST_HIGHLIGHT).unwrap(),
|
||||
&[
|
||||
vec![
|
||||
("assert", vec![Highlight::Function]),
|
||||
("!", vec![Highlight::Function]),
|
||||
("(", vec![Highlight::PunctuationBracket]),
|
||||
("assert", vec!["function"]),
|
||||
("!", vec!["function"]),
|
||||
("(", vec!["punctuation.bracket"]),
|
||||
],
|
||||
vec![
|
||||
(" a", vec![]),
|
||||
(".", vec![Highlight::PunctuationDelimiter]),
|
||||
("b", vec![Highlight::Property]),
|
||||
(".", vec![Highlight::PunctuationDelimiter]),
|
||||
("c", vec![Highlight::Function]),
|
||||
("(", vec![Highlight::PunctuationBracket]),
|
||||
(")", vec![Highlight::PunctuationBracket]),
|
||||
(".", vec!["punctuation.delimiter"]),
|
||||
("b", vec!["property"]),
|
||||
(".", vec!["punctuation.delimiter"]),
|
||||
("c", vec!["function"]),
|
||||
("(", vec!["punctuation.bracket"]),
|
||||
(")", vec!["punctuation.bracket"]),
|
||||
(" < ", vec![]),
|
||||
("D", vec![Highlight::Type]),
|
||||
("::", vec![Highlight::PunctuationDelimiter]),
|
||||
("e", vec![Highlight::Function]),
|
||||
("::", vec![Highlight::PunctuationDelimiter]),
|
||||
("<", vec![Highlight::PunctuationBracket]),
|
||||
("F", vec![Highlight::Type]),
|
||||
(">", vec![Highlight::PunctuationBracket]),
|
||||
("(", vec![Highlight::PunctuationBracket]),
|
||||
(")", vec![Highlight::PunctuationBracket]),
|
||||
("D", vec!["type"]),
|
||||
("::", vec!["punctuation.delimiter"]),
|
||||
("e", vec!["function"]),
|
||||
("::", vec!["punctuation.delimiter"]),
|
||||
("<", vec!["punctuation.bracket"]),
|
||||
("F", vec!["type"]),
|
||||
(">", vec!["punctuation.bracket"]),
|
||||
("(", vec!["punctuation.bracket"]),
|
||||
(")", vec!["punctuation.bracket"]),
|
||||
],
|
||||
vec![(")", vec![Highlight::PunctuationBracket]), (";", vec![]),]
|
||||
vec![
|
||||
(")", vec!["punctuation.bracket"]),
|
||||
(";", vec!["punctuation.delimiter"]),
|
||||
]
|
||||
],
|
||||
);
|
||||
}
|
||||
|
|
@ -325,73 +456,97 @@ fn test_highlighting_cancellation() {
|
|||
test_language_for_injection_string(name)
|
||||
};
|
||||
|
||||
// Constructing the highlighter, which eagerly parses the outer document,
|
||||
// should not fail.
|
||||
let highlighter = highlight(
|
||||
source.as_bytes(),
|
||||
get_language("html"),
|
||||
&HTML_SHEET,
|
||||
Some(&cancellation_flag),
|
||||
injection_callback,
|
||||
)
|
||||
.unwrap();
|
||||
// The initial `highlight` call, which eagerly parses the outer document, should not fail.
|
||||
let mut highlighter = Highlighter::new();
|
||||
let events = highlighter
|
||||
.highlight(
|
||||
&HTML_HIGHLIGHT,
|
||||
source.as_bytes(),
|
||||
Some(&cancellation_flag),
|
||||
injection_callback,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Iterating the scopes should not panic. It should return an error
|
||||
// once the cancellation is detected.
|
||||
for event in highlighter {
|
||||
// Iterating the scopes should not panic. It should return an error once the
|
||||
// cancellation is detected.
|
||||
for event in events {
|
||||
if let Err(e) = event {
|
||||
assert_eq!(e, Error::Cancelled);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
panic!("Expected an error while iterating highlighter");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_highlighting_via_c_api() {
|
||||
let js_lang = get_language("javascript");
|
||||
let html_lang = get_language("html");
|
||||
let js_sheet = get_property_sheet_json("javascript", "highlights.json");
|
||||
let js_sheet = c_string(&js_sheet);
|
||||
let html_sheet = get_property_sheet_json("html", "highlights.json");
|
||||
let html_sheet = c_string(&html_sheet);
|
||||
let highlights = vec![
|
||||
"class=tag\0",
|
||||
"class=function\0",
|
||||
"class=string\0",
|
||||
"class=keyword\0",
|
||||
];
|
||||
let highlight_names = highlights
|
||||
.iter()
|
||||
.map(|h| h["class=".len()..].as_ptr() as *const i8)
|
||||
.collect::<Vec<_>>();
|
||||
let highlight_attrs = highlights
|
||||
.iter()
|
||||
.map(|h| h.as_bytes().as_ptr() as *const i8)
|
||||
.collect::<Vec<_>>();
|
||||
let highlighter = c::ts_highlighter_new(
|
||||
&highlight_names[0] as *const *const i8,
|
||||
&highlight_attrs[0] as *const *const i8,
|
||||
highlights.len() as u32,
|
||||
);
|
||||
|
||||
let class_tag = c_string("class=tag");
|
||||
let class_function = c_string("class=function");
|
||||
let class_string = c_string("class=string");
|
||||
let class_keyword = c_string("class=keyword");
|
||||
|
||||
let js_scope_name = c_string("source.js");
|
||||
let html_scope_name = c_string("text.html.basic");
|
||||
let injection_regex = c_string("^(javascript|js)$");
|
||||
let source_code = c_string("<script>\nconst a = b('c');\nc.d();\n</script>");
|
||||
|
||||
let attribute_strings = &mut [ptr::null(); Highlight::Unknown as usize + 1];
|
||||
attribute_strings[Highlight::Tag as usize] = class_tag.as_ptr();
|
||||
attribute_strings[Highlight::String as usize] = class_string.as_ptr();
|
||||
attribute_strings[Highlight::Keyword as usize] = class_keyword.as_ptr();
|
||||
attribute_strings[Highlight::Function as usize] = class_function.as_ptr();
|
||||
let js_scope = c_string("source.js");
|
||||
let js_injection_regex = c_string("^javascript");
|
||||
let language = get_language("javascript");
|
||||
let queries = get_language_queries_path("javascript");
|
||||
let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap();
|
||||
let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap();
|
||||
let locals_query = fs::read_to_string(queries.join("locals.scm")).unwrap();
|
||||
c::ts_highlighter_add_language(
|
||||
highlighter,
|
||||
js_scope.as_ptr(),
|
||||
js_injection_regex.as_ptr(),
|
||||
language,
|
||||
highlights_query.as_ptr() as *const i8,
|
||||
injections_query.as_ptr() as *const i8,
|
||||
locals_query.as_ptr() as *const i8,
|
||||
highlights_query.len() as u32,
|
||||
injections_query.len() as u32,
|
||||
locals_query.len() as u32,
|
||||
);
|
||||
|
||||
let html_scope = c_string("text.html.basic");
|
||||
let html_injection_regex = c_string("^html");
|
||||
let language = get_language("html");
|
||||
let queries = get_language_queries_path("html");
|
||||
let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap();
|
||||
let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap();
|
||||
c::ts_highlighter_add_language(
|
||||
highlighter,
|
||||
html_scope.as_ptr(),
|
||||
html_injection_regex.as_ptr(),
|
||||
language,
|
||||
highlights_query.as_ptr() as *const i8,
|
||||
injections_query.as_ptr() as *const i8,
|
||||
ptr::null(),
|
||||
highlights_query.len() as u32,
|
||||
injections_query.len() as u32,
|
||||
0,
|
||||
);
|
||||
|
||||
let highlighter = c::ts_highlighter_new(attribute_strings.as_ptr());
|
||||
let buffer = c::ts_highlight_buffer_new();
|
||||
|
||||
c::ts_highlighter_add_language(
|
||||
highlighter,
|
||||
html_scope_name.as_ptr(),
|
||||
html_lang,
|
||||
html_sheet.as_ptr(),
|
||||
ptr::null_mut(),
|
||||
);
|
||||
c::ts_highlighter_add_language(
|
||||
highlighter,
|
||||
js_scope_name.as_ptr(),
|
||||
js_lang,
|
||||
js_sheet.as_ptr(),
|
||||
injection_regex.as_ptr(),
|
||||
);
|
||||
c::ts_highlighter_highlight(
|
||||
highlighter,
|
||||
html_scope_name.as_ptr(),
|
||||
html_scope.as_ptr(),
|
||||
source_code.as_ptr(),
|
||||
source_code.as_bytes().len() as u32,
|
||||
buffer,
|
||||
|
|
@ -421,8 +576,8 @@ fn test_highlighting_via_c_api() {
|
|||
lines,
|
||||
vec![
|
||||
"<<span class=tag>script</span>>\n",
|
||||
"<span class=keyword>const</span> <span>a</span> <span>=</span> <span class=function>b</span><span>(</span><span class=string>'c'</span><span>)</span><span>;</span>\n",
|
||||
"<span>c</span><span>.</span><span class=function>d</span><span>(</span><span>)</span><span>;</span>\n",
|
||||
"<span class=keyword>const</span> a = <span class=function>b</span>(<span class=string>'c'</span>);\n",
|
||||
"c.<span class=function>d</span>();\n",
|
||||
"</<span class=tag>script</span>>\n",
|
||||
]
|
||||
);
|
||||
|
|
@ -433,7 +588,7 @@ fn test_highlighting_via_c_api() {
|
|||
|
||||
#[test]
|
||||
fn test_decode_utf8_lossy() {
|
||||
use tree_sitter_highlight::util::LossyUtf8;
|
||||
use tree_sitter::LossyUtf8;
|
||||
|
||||
let parts = LossyUtf8::new(b"hi").collect::<Vec<_>>();
|
||||
assert_eq!(parts, vec!["hi"]);
|
||||
|
|
@ -452,50 +607,60 @@ fn c_string(s: &str) -> CString {
|
|||
CString::new(s.as_bytes().to_vec()).unwrap()
|
||||
}
|
||||
|
||||
fn test_language_for_injection_string<'a>(
|
||||
string: &str,
|
||||
) -> Option<(Language, &'a PropertySheet<Properties>)> {
|
||||
fn test_language_for_injection_string<'a>(string: &str) -> Option<&'a HighlightConfiguration> {
|
||||
match string {
|
||||
"javascript" => Some((get_language("javascript"), &JS_SHEET)),
|
||||
"html" => Some((get_language("html"), &HTML_SHEET)),
|
||||
"rust" => Some((get_language("rust"), &RUST_SHEET)),
|
||||
"javascript" => Some(&JS_HIGHLIGHT),
|
||||
"html" => Some(&HTML_HIGHLIGHT),
|
||||
"rust" => Some(&RUST_HIGHLIGHT),
|
||||
"jsdoc" => Some(&JSDOC_HIGHLIGHT),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn to_html<'a>(
|
||||
src: &'a str,
|
||||
language: Language,
|
||||
property_sheet: &'a PropertySheet<Properties>,
|
||||
language_config: &'a HighlightConfiguration,
|
||||
) -> Result<Vec<String>, Error> {
|
||||
highlight_html(
|
||||
src.as_bytes(),
|
||||
language,
|
||||
property_sheet,
|
||||
let src = src.as_bytes();
|
||||
let mut renderer = HtmlRenderer::new();
|
||||
let mut highlighter = Highlighter::new();
|
||||
let events = highlighter.highlight(
|
||||
language_config,
|
||||
src,
|
||||
None,
|
||||
&test_language_for_injection_string,
|
||||
&|highlight| SCOPE_CLASS_STRINGS[highlight as usize].as_str(),
|
||||
)
|
||||
)?;
|
||||
|
||||
renderer.set_carriage_return_highlight(
|
||||
HIGHLIGHT_NAMES
|
||||
.iter()
|
||||
.position(|s| s == "carriage-return")
|
||||
.map(Highlight),
|
||||
);
|
||||
renderer
|
||||
.render(events, src, &|highlight| HTML_ATTRS[highlight.0].as_bytes())
|
||||
.unwrap();
|
||||
Ok(renderer.lines().map(|s| s.to_string()).collect())
|
||||
}
|
||||
|
||||
fn to_token_vector<'a>(
|
||||
src: &'a str,
|
||||
language: Language,
|
||||
property_sheet: &'a PropertySheet<Properties>,
|
||||
) -> Result<Vec<Vec<(&'a str, Vec<Highlight>)>>, Error> {
|
||||
language_config: &'a HighlightConfiguration,
|
||||
) -> Result<Vec<Vec<(&'a str, Vec<&'static str>)>>, Error> {
|
||||
let src = src.as_bytes();
|
||||
let mut highlighter = Highlighter::new();
|
||||
let mut lines = Vec::new();
|
||||
let mut highlights = Vec::new();
|
||||
let mut line = Vec::new();
|
||||
for event in highlight(
|
||||
let events = highlighter.highlight(
|
||||
language_config,
|
||||
src,
|
||||
language,
|
||||
property_sheet,
|
||||
None,
|
||||
&test_language_for_injection_string,
|
||||
)? {
|
||||
)?;
|
||||
for event in events {
|
||||
match event? {
|
||||
HighlightEvent::HighlightStart(s) => highlights.push(s),
|
||||
HighlightEvent::HighlightStart(s) => highlights.push(HIGHLIGHT_NAMES[s.0].as_str()),
|
||||
HighlightEvent::HighlightEnd => {
|
||||
highlights.pop();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,5 +3,8 @@ mod helpers;
|
|||
mod highlight_test;
|
||||
mod node_test;
|
||||
mod parser_test;
|
||||
mod properties_test;
|
||||
mod pathological_test;
|
||||
mod query_test;
|
||||
mod tags_test;
|
||||
mod test_highlight_test;
|
||||
mod tree_test;
|
||||
|
|
|
|||
|
|
@ -1,62 +0,0 @@
|
|||
use super::helpers::fixtures::get_test_language;
|
||||
use crate::generate::generate_parser_for_grammar;
|
||||
use tree_sitter::Parser;
|
||||
|
||||
#[test]
|
||||
fn test_basic_node_refs() {
|
||||
let (parser_name, parser_code) = generate_parser_for_grammar(
|
||||
r#"
|
||||
{
|
||||
"name": "test_grammar_with_refs",
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s+"}
|
||||
],
|
||||
"rules": {
|
||||
"rule_a": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "REF",
|
||||
"value": "ref_1",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "child-1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "child-2"
|
||||
},
|
||||
{
|
||||
"type": "BLANK"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "REF",
|
||||
"value": "ref_2",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "child-3"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let mut parser = Parser::new();
|
||||
let language = get_test_language(&parser_name, &parser_code, None);
|
||||
parser.set_language(language).unwrap();
|
||||
|
||||
let tree = parser.parse("child-1 child-2 child-3", None).unwrap();
|
||||
let root_node = tree.root_node();
|
||||
assert_eq!(root_node.child_by_ref("ref_1"), root_node.child(0));
|
||||
assert_eq!(root_node.child_by_ref("ref_2"), root_node.child(2));
|
||||
}
|
||||
|
|
@ -167,6 +167,79 @@ fn test_node_child() {
|
|||
assert_eq!(tree.root_node().parent(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_node_children() {
|
||||
let tree = parse_json_example();
|
||||
let mut cursor = tree.walk();
|
||||
let array_node = tree.root_node().child(0).unwrap();
|
||||
assert_eq!(
|
||||
array_node
|
||||
.children(&mut cursor)
|
||||
.map(|n| n.kind())
|
||||
.collect::<Vec<_>>(),
|
||||
&["[", "number", ",", "false", ",", "object", "]",]
|
||||
);
|
||||
assert_eq!(
|
||||
array_node
|
||||
.named_children(&mut cursor)
|
||||
.map(|n| n.kind())
|
||||
.collect::<Vec<_>>(),
|
||||
&["number", "false", "object"]
|
||||
);
|
||||
let object_node = array_node
|
||||
.named_children(&mut cursor)
|
||||
.find(|n| n.kind() == "object")
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
object_node
|
||||
.children(&mut cursor)
|
||||
.map(|n| n.kind())
|
||||
.collect::<Vec<_>>(),
|
||||
&["{", "pair", "}",]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_node_children_by_field_name() {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("python")).unwrap();
|
||||
let source = "
|
||||
if one:
|
||||
a()
|
||||
elif two:
|
||||
b()
|
||||
elif three:
|
||||
c()
|
||||
elif four:
|
||||
d()
|
||||
";
|
||||
|
||||
let tree = parser.parse(source, None).unwrap();
|
||||
let node = tree.root_node().child(0).unwrap();
|
||||
assert_eq!(node.kind(), "if_statement");
|
||||
let mut cursor = tree.walk();
|
||||
let alternatives = node.children_by_field_name("alternative", &mut cursor);
|
||||
let alternative_texts =
|
||||
alternatives.map(|n| &source[n.child_by_field_name("condition").unwrap().byte_range()]);
|
||||
assert_eq!(
|
||||
alternative_texts.collect::<Vec<_>>(),
|
||||
&["two", "three", "four",]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_node_parent_of_child_by_field_name() {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("javascript")).unwrap();
|
||||
let tree = parser.parse("foo(a().b[0].c.d.e())", None).unwrap();
|
||||
let call_node = tree.root_node().named_child(0).unwrap().named_child(0).unwrap();
|
||||
assert_eq!(call_node.kind(), "call_expression");
|
||||
|
||||
// Regression test - when a field points to a hidden node (in this case, `_expression`)
|
||||
// the hidden node should not be added to the node parent cache.
|
||||
assert_eq!(call_node.child_by_field_name("function").unwrap().parent(), Some(call_node));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_node_named_child() {
|
||||
let tree = parse_json_example();
|
||||
|
|
@ -627,6 +700,63 @@ fn test_node_is_named_but_aliased_as_anonymous() {
|
|||
assert_eq!(root_node.named_child(0).unwrap().kind(), "c");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_node_numeric_symbols_respect_simple_aliases() {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("python")).unwrap();
|
||||
|
||||
// Example 1:
|
||||
// Python argument lists can contain "splat" arguments, which are not allowed within
|
||||
// other expressions. This includes `parenthesized_list_splat` nodes like `(*b)`. These
|
||||
// `parenthesized_list_splat` nodes are aliased as `parenthesized_expression`. Their numeric
|
||||
// `symbol`, aka `kind_id` should match that of a normal `parenthesized_expression`.
|
||||
let tree = parser.parse("(a((*b)))", None).unwrap();
|
||||
let root = tree.root_node();
|
||||
assert_eq!(
|
||||
root.to_sexp(),
|
||||
"(module (expression_statement (parenthesized_expression (call function: (identifier) arguments: (argument_list (parenthesized_expression (list_splat (identifier))))))))",
|
||||
);
|
||||
|
||||
let outer_expr_node = root.child(0).unwrap().child(0).unwrap();
|
||||
assert_eq!(outer_expr_node.kind(), "parenthesized_expression");
|
||||
|
||||
let inner_expr_node = outer_expr_node
|
||||
.named_child(0)
|
||||
.unwrap()
|
||||
.child_by_field_name("arguments")
|
||||
.unwrap()
|
||||
.named_child(0)
|
||||
.unwrap();
|
||||
assert_eq!(inner_expr_node.kind(), "parenthesized_expression");
|
||||
assert_eq!(inner_expr_node.kind_id(), outer_expr_node.kind_id());
|
||||
|
||||
// Example 2:
|
||||
// Ruby handles the unary (negative) and binary (minus) `-` operators using two different
|
||||
// tokens. One or more of these is an external token that's aliased as `-`. Their numeric
|
||||
// kind ids should match.
|
||||
parser.set_language(get_language("ruby")).unwrap();
|
||||
let tree = parser.parse("-a - b", None).unwrap();
|
||||
let root = tree.root_node();
|
||||
assert_eq!(
|
||||
root.to_sexp(),
|
||||
"(program (binary left: (unary operand: (identifier)) right: (identifier)))",
|
||||
);
|
||||
|
||||
let binary_node = root.child(0).unwrap();
|
||||
assert_eq!(binary_node.kind(), "binary");
|
||||
|
||||
let unary_minus_node = binary_node
|
||||
.child_by_field_name("left")
|
||||
.unwrap()
|
||||
.child(0)
|
||||
.unwrap();
|
||||
assert_eq!(unary_minus_node.kind(), "-");
|
||||
|
||||
let binary_minus_node = binary_node.child_by_field_name("operator").unwrap();
|
||||
assert_eq!(binary_minus_node.kind(), "-");
|
||||
assert_eq!(unary_minus_node.kind_id(), binary_minus_node.kind_id());
|
||||
}
|
||||
|
||||
fn get_all_nodes(tree: &Tree) -> Vec<Node> {
|
||||
let mut result = Vec::new();
|
||||
let mut visited_children = false;
|
||||
|
|
|
|||
|
|
@ -1,13 +1,14 @@
|
|||
use super::helpers::allocations;
|
||||
use super::helpers::edits::ReadRecorder;
|
||||
use super::helpers::fixtures::{get_language, get_test_language};
|
||||
use crate::generate::generate_parser_for_grammar;
|
||||
use crate::parse::{perform_edit, Edit};
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::{thread, time};
|
||||
use tree_sitter::{InputEdit, LogType, Parser, Point, Range};
|
||||
use tree_sitter::{IncludedRangesError, InputEdit, LogType, Parser, Point, Range};
|
||||
|
||||
#[test]
|
||||
fn test_basic_parsing() {
|
||||
fn test_parsing_simple_string() {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("rust")).unwrap();
|
||||
|
||||
|
|
@ -26,7 +27,11 @@ fn test_basic_parsing() {
|
|||
|
||||
assert_eq!(
|
||||
root_node.to_sexp(),
|
||||
"(source_file (struct_item (type_identifier) (field_declaration_list)) (function_item (identifier) (parameters) (block)))"
|
||||
concat!(
|
||||
"(source_file ",
|
||||
"(struct_item name: (type_identifier) body: (field_declaration_list)) ",
|
||||
"(function_item name: (identifier) parameters: (parameters) body: (block)))"
|
||||
)
|
||||
);
|
||||
|
||||
let struct_node = root_node.child(0).unwrap();
|
||||
|
|
@ -118,7 +123,17 @@ fn test_parsing_with_custom_utf8_input() {
|
|||
.unwrap();
|
||||
|
||||
let root = tree.root_node();
|
||||
assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))");
|
||||
assert_eq!(
|
||||
root.to_sexp(),
|
||||
concat!(
|
||||
"(source_file ",
|
||||
"(function_item ",
|
||||
"(visibility_modifier) ",
|
||||
"name: (identifier) ",
|
||||
"parameters: (parameters) ",
|
||||
"body: (block (integer_literal))))"
|
||||
)
|
||||
);
|
||||
assert_eq!(root.kind(), "source_file");
|
||||
assert_eq!(root.has_error(), false);
|
||||
assert_eq!(root.child(0).unwrap().kind(), "function_item");
|
||||
|
|
@ -154,7 +169,10 @@ fn test_parsing_with_custom_utf16_input() {
|
|||
.unwrap();
|
||||
|
||||
let root = tree.root_node();
|
||||
assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))");
|
||||
assert_eq!(
|
||||
root.to_sexp(),
|
||||
"(source_file (function_item (visibility_modifier) name: (identifier) parameters: (parameters) body: (block (integer_literal))))"
|
||||
);
|
||||
assert_eq!(root.kind(), "source_file");
|
||||
assert_eq!(root.has_error(), false);
|
||||
assert_eq!(root.child(0).unwrap().kind(), "function_item");
|
||||
|
|
@ -175,7 +193,10 @@ fn test_parsing_with_callback_returning_owned_strings() {
|
|||
.unwrap();
|
||||
|
||||
let root = tree.root_node();
|
||||
assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))");
|
||||
assert_eq!(
|
||||
root.to_sexp(),
|
||||
"(source_file (function_item (visibility_modifier) name: (identifier) parameters: (parameters) body: (block (integer_literal))))"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -192,7 +213,7 @@ fn test_parsing_text_with_byte_order_mark() {
|
|||
.unwrap();
|
||||
assert_eq!(
|
||||
tree.root_node().to_sexp(),
|
||||
"(source_file (function_item (identifier) (parameters) (block)))"
|
||||
"(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))"
|
||||
);
|
||||
assert_eq!(tree.root_node().start_byte(), 2);
|
||||
|
||||
|
|
@ -200,7 +221,7 @@ fn test_parsing_text_with_byte_order_mark() {
|
|||
let mut tree = parser.parse("\u{FEFF}fn a() {}", None).unwrap();
|
||||
assert_eq!(
|
||||
tree.root_node().to_sexp(),
|
||||
"(source_file (function_item (identifier) (parameters) (block)))"
|
||||
"(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))"
|
||||
);
|
||||
assert_eq!(tree.root_node().start_byte(), 3);
|
||||
|
||||
|
|
@ -216,7 +237,7 @@ fn test_parsing_text_with_byte_order_mark() {
|
|||
let mut tree = parser.parse(" \u{FEFF}fn a() {}", Some(&tree)).unwrap();
|
||||
assert_eq!(
|
||||
tree.root_node().to_sexp(),
|
||||
"(source_file (ERROR (UNEXPECTED 65279)) (function_item (identifier) (parameters) (block)))"
|
||||
"(source_file (ERROR (UNEXPECTED 65279)) (function_item name: (identifier) parameters: (parameters) body: (block)))"
|
||||
);
|
||||
assert_eq!(tree.root_node().start_byte(), 1);
|
||||
|
||||
|
|
@ -232,11 +253,52 @@ fn test_parsing_text_with_byte_order_mark() {
|
|||
let tree = parser.parse("\u{FEFF}fn a() {}", Some(&tree)).unwrap();
|
||||
assert_eq!(
|
||||
tree.root_node().to_sexp(),
|
||||
"(source_file (function_item (identifier) (parameters) (block)))"
|
||||
"(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))"
|
||||
);
|
||||
assert_eq!(tree.root_node().start_byte(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parsing_invalid_chars_at_eof() {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("json")).unwrap();
|
||||
let tree = parser.parse(b"\xdf", None).unwrap();
|
||||
assert_eq!(tree.root_node().to_sexp(), "(ERROR (UNEXPECTED INVALID))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parsing_unexpected_null_characters_within_source() {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("javascript")).unwrap();
|
||||
let tree = parser.parse(b"var \0 something;", None).unwrap();
|
||||
assert_eq!(
|
||||
tree.root_node().to_sexp(),
|
||||
"(program (variable_declaration (ERROR (UNEXPECTED '\\0')) (variable_declarator name: (identifier))))"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parsing_ends_when_input_callback_returns_empty() {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("javascript")).unwrap();
|
||||
let mut i = 0;
|
||||
let source = b"abcdefghijklmnoqrs";
|
||||
let tree = parser
|
||||
.parse_with(
|
||||
&mut |offset, _| {
|
||||
i += 1;
|
||||
if offset >= 6 {
|
||||
b""
|
||||
} else {
|
||||
&source[offset..usize::min(source.len(), offset + 3)]
|
||||
}
|
||||
},
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(tree.root_node().end_byte(), 6);
|
||||
}
|
||||
|
||||
// Incremental parsing
|
||||
|
||||
#[test]
|
||||
|
|
@ -333,6 +395,18 @@ fn test_parsing_after_editing_end_of_code() {
|
|||
assert_eq!(recorder.strings_read(), vec![" * ", "abc.d)",]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parsing_empty_file_with_reused_tree() {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("rust")).unwrap();
|
||||
|
||||
let tree = parser.parse("", None);
|
||||
parser.parse("", tree.as_ref());
|
||||
|
||||
let tree = parser.parse("\n ", None);
|
||||
parser.parse("\n ", tree.as_ref());
|
||||
}
|
||||
|
||||
// Thread safety
|
||||
|
||||
#[test]
|
||||
|
|
@ -388,7 +462,7 @@ fn test_parsing_on_multiple_threads() {
|
|||
|
||||
#[test]
|
||||
fn test_parsing_cancelled_by_another_thread() {
|
||||
let cancellation_flag = Box::new(AtomicUsize::new(0));
|
||||
let cancellation_flag = std::sync::Arc::new(AtomicUsize::new(0));
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("javascript")).unwrap();
|
||||
|
|
@ -409,9 +483,10 @@ fn test_parsing_cancelled_by_another_thread() {
|
|||
);
|
||||
assert!(tree.is_some());
|
||||
|
||||
let flag = cancellation_flag.clone();
|
||||
let cancel_thread = thread::spawn(move || {
|
||||
thread::sleep(time::Duration::from_millis(100));
|
||||
cancellation_flag.store(1, Ordering::SeqCst);
|
||||
flag.store(1, Ordering::SeqCst);
|
||||
});
|
||||
|
||||
// Infinite input
|
||||
|
|
@ -547,6 +622,56 @@ fn test_parsing_with_a_timeout_and_a_reset() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parsing_with_a_timeout_and_implicit_reset() {
|
||||
allocations::record(|| {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("javascript")).unwrap();
|
||||
|
||||
parser.set_timeout_micros(5);
|
||||
let tree = parser.parse(
|
||||
"[\"ok\", 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]",
|
||||
None,
|
||||
);
|
||||
assert!(tree.is_none());
|
||||
|
||||
// Changing the parser's language implicitly resets, discarding
|
||||
// the previous partial parse.
|
||||
parser.set_language(get_language("json")).unwrap();
|
||||
parser.set_timeout_micros(0);
|
||||
let tree = parser.parse(
|
||||
"[null, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]",
|
||||
None,
|
||||
).unwrap();
|
||||
assert_eq!(
|
||||
tree.root_node()
|
||||
.named_child(0)
|
||||
.unwrap()
|
||||
.named_child(0)
|
||||
.unwrap()
|
||||
.kind(),
|
||||
"null"
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parsing_with_timeout_and_no_completion() {
|
||||
allocations::record(|| {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("javascript")).unwrap();
|
||||
|
||||
parser.set_timeout_micros(5);
|
||||
let tree = parser.parse(
|
||||
"[\"ok\", 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]",
|
||||
None,
|
||||
);
|
||||
assert!(tree.is_none());
|
||||
|
||||
// drop the parser when it has an unfinished parse
|
||||
});
|
||||
}
|
||||
|
||||
// Included Ranges
|
||||
|
||||
#[test]
|
||||
|
|
@ -559,7 +684,9 @@ fn test_parsing_with_one_included_range() {
|
|||
let script_content_node = html_tree.root_node().child(1).unwrap().child(1).unwrap();
|
||||
assert_eq!(script_content_node.kind(), "raw_text");
|
||||
|
||||
parser.set_included_ranges(&[script_content_node.range()]);
|
||||
parser
|
||||
.set_included_ranges(&[script_content_node.range()])
|
||||
.unwrap();
|
||||
parser.set_language(get_language("javascript")).unwrap();
|
||||
let js_tree = parser.parse(source_code, None).unwrap();
|
||||
|
||||
|
|
@ -599,26 +726,28 @@ fn test_parsing_with_multiple_included_ranges() {
|
|||
let close_quote_node = template_string_node.child(3).unwrap();
|
||||
|
||||
parser.set_language(get_language("html")).unwrap();
|
||||
parser.set_included_ranges(&[
|
||||
Range {
|
||||
start_byte: open_quote_node.end_byte(),
|
||||
start_point: open_quote_node.end_position(),
|
||||
end_byte: interpolation_node1.start_byte(),
|
||||
end_point: interpolation_node1.start_position(),
|
||||
},
|
||||
Range {
|
||||
start_byte: interpolation_node1.end_byte(),
|
||||
start_point: interpolation_node1.end_position(),
|
||||
end_byte: interpolation_node2.start_byte(),
|
||||
end_point: interpolation_node2.start_position(),
|
||||
},
|
||||
Range {
|
||||
start_byte: interpolation_node2.end_byte(),
|
||||
start_point: interpolation_node2.end_position(),
|
||||
end_byte: close_quote_node.start_byte(),
|
||||
end_point: close_quote_node.start_position(),
|
||||
},
|
||||
]);
|
||||
parser
|
||||
.set_included_ranges(&[
|
||||
Range {
|
||||
start_byte: open_quote_node.end_byte(),
|
||||
start_point: open_quote_node.end_position(),
|
||||
end_byte: interpolation_node1.start_byte(),
|
||||
end_point: interpolation_node1.start_position(),
|
||||
},
|
||||
Range {
|
||||
start_byte: interpolation_node1.end_byte(),
|
||||
start_point: interpolation_node1.end_position(),
|
||||
end_byte: interpolation_node2.start_byte(),
|
||||
end_point: interpolation_node2.start_position(),
|
||||
},
|
||||
Range {
|
||||
start_byte: interpolation_node2.end_byte(),
|
||||
start_point: interpolation_node2.end_position(),
|
||||
end_byte: close_quote_node.start_byte(),
|
||||
end_point: close_quote_node.start_position(),
|
||||
},
|
||||
])
|
||||
.unwrap();
|
||||
let html_tree = parser.parse(source_code, None).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
|
|
@ -667,6 +796,47 @@ fn test_parsing_with_multiple_included_ranges() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parsing_error_in_invalid_included_ranges() {
|
||||
let mut parser = Parser::new();
|
||||
|
||||
// Ranges are not ordered
|
||||
let error = parser
|
||||
.set_included_ranges(&[
|
||||
Range {
|
||||
start_byte: 23,
|
||||
end_byte: 29,
|
||||
start_point: Point::new(0, 23),
|
||||
end_point: Point::new(0, 29),
|
||||
},
|
||||
Range {
|
||||
start_byte: 0,
|
||||
end_byte: 5,
|
||||
start_point: Point::new(0, 0),
|
||||
end_point: Point::new(0, 5),
|
||||
},
|
||||
Range {
|
||||
start_byte: 50,
|
||||
end_byte: 60,
|
||||
start_point: Point::new(0, 50),
|
||||
end_point: Point::new(0, 60),
|
||||
},
|
||||
])
|
||||
.unwrap_err();
|
||||
assert_eq!(error, IncludedRangesError(1));
|
||||
|
||||
// Range ends before it starts
|
||||
let error = parser
|
||||
.set_included_ranges(&[Range {
|
||||
start_byte: 10,
|
||||
end_byte: 5,
|
||||
start_point: Point::new(0, 10),
|
||||
end_point: Point::new(0, 5),
|
||||
}])
|
||||
.unwrap_err();
|
||||
assert_eq!(error, IncludedRangesError(0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parsing_utf16_code_with_errors_at_the_end_of_an_included_range() {
|
||||
let source_code = "<script>a.</script>";
|
||||
|
|
@ -677,12 +847,14 @@ fn test_parsing_utf16_code_with_errors_at_the_end_of_an_included_range() {
|
|||
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("javascript")).unwrap();
|
||||
parser.set_included_ranges(&[Range {
|
||||
start_byte,
|
||||
end_byte,
|
||||
start_point: Point::new(0, start_byte),
|
||||
end_point: Point::new(0, end_byte),
|
||||
}]);
|
||||
parser
|
||||
.set_included_ranges(&[Range {
|
||||
start_byte,
|
||||
end_byte,
|
||||
start_point: Point::new(0, start_byte),
|
||||
end_point: Point::new(0, end_byte),
|
||||
}])
|
||||
.unwrap();
|
||||
let tree = parser.parse_utf16(&utf16_source_code, None).unwrap();
|
||||
assert_eq!(tree.root_node().to_sexp(), "(program (ERROR (identifier)))");
|
||||
}
|
||||
|
|
@ -697,20 +869,22 @@ fn test_parsing_with_external_scanner_that_uses_included_range_boundaries() {
|
|||
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("javascript")).unwrap();
|
||||
parser.set_included_ranges(&[
|
||||
Range {
|
||||
start_byte: range1_start_byte,
|
||||
end_byte: range1_end_byte,
|
||||
start_point: Point::new(0, range1_start_byte),
|
||||
end_point: Point::new(0, range1_end_byte),
|
||||
},
|
||||
Range {
|
||||
start_byte: range2_start_byte,
|
||||
end_byte: range2_end_byte,
|
||||
start_point: Point::new(0, range2_start_byte),
|
||||
end_point: Point::new(0, range2_end_byte),
|
||||
},
|
||||
]);
|
||||
parser
|
||||
.set_included_ranges(&[
|
||||
Range {
|
||||
start_byte: range1_start_byte,
|
||||
end_byte: range1_end_byte,
|
||||
start_point: Point::new(0, range1_start_byte),
|
||||
end_point: Point::new(0, range1_end_byte),
|
||||
},
|
||||
Range {
|
||||
start_byte: range2_start_byte,
|
||||
end_byte: range2_end_byte,
|
||||
start_point: Point::new(0, range2_start_byte),
|
||||
end_point: Point::new(0, range2_end_byte),
|
||||
},
|
||||
])
|
||||
.unwrap();
|
||||
|
||||
let tree = parser.parse(source_code, None).unwrap();
|
||||
let root = tree.root_node();
|
||||
|
|
@ -758,20 +932,22 @@ fn test_parsing_with_a_newly_excluded_range() {
|
|||
let directive_start = source_code.find("<%=").unwrap();
|
||||
let directive_end = source_code.find("</span>").unwrap();
|
||||
let source_code_end = source_code.len();
|
||||
parser.set_included_ranges(&[
|
||||
Range {
|
||||
start_byte: 0,
|
||||
end_byte: directive_start,
|
||||
start_point: Point::new(0, 0),
|
||||
end_point: Point::new(0, directive_start),
|
||||
},
|
||||
Range {
|
||||
start_byte: directive_end,
|
||||
end_byte: source_code_end,
|
||||
start_point: Point::new(0, directive_end),
|
||||
end_point: Point::new(0, source_code_end),
|
||||
},
|
||||
]);
|
||||
parser
|
||||
.set_included_ranges(&[
|
||||
Range {
|
||||
start_byte: 0,
|
||||
end_byte: directive_start,
|
||||
start_point: Point::new(0, 0),
|
||||
end_point: Point::new(0, directive_start),
|
||||
},
|
||||
Range {
|
||||
start_byte: directive_end,
|
||||
end_byte: source_code_end,
|
||||
start_point: Point::new(0, directive_end),
|
||||
end_point: Point::new(0, source_code_end),
|
||||
},
|
||||
])
|
||||
.unwrap();
|
||||
let tree = parser.parse(&source_code, Some(&first_tree)).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
|
|
@ -809,59 +985,73 @@ fn test_parsing_with_a_newly_excluded_range() {
|
|||
|
||||
#[test]
|
||||
fn test_parsing_with_a_newly_included_range() {
|
||||
let source_code = "<div><%= foo() %></div><div><%= bar() %>";
|
||||
let first_code_start_index = source_code.find(" foo").unwrap();
|
||||
let first_code_end_index = first_code_start_index + 7;
|
||||
let second_code_start_index = source_code.find(" bar").unwrap();
|
||||
let second_code_end_index = second_code_start_index + 7;
|
||||
let ranges = [
|
||||
Range {
|
||||
start_byte: first_code_start_index,
|
||||
end_byte: first_code_end_index,
|
||||
start_point: Point::new(0, first_code_start_index),
|
||||
end_point: Point::new(0, first_code_end_index),
|
||||
},
|
||||
Range {
|
||||
start_byte: second_code_start_index,
|
||||
end_byte: second_code_end_index,
|
||||
start_point: Point::new(0, second_code_start_index),
|
||||
end_point: Point::new(0, second_code_end_index),
|
||||
},
|
||||
];
|
||||
let source_code = "<div><%= foo() %></div><span><%= bar() %></span><%= baz() %>";
|
||||
let range1_start = source_code.find(" foo").unwrap();
|
||||
let range2_start = source_code.find(" bar").unwrap();
|
||||
let range3_start = source_code.find(" baz").unwrap();
|
||||
let range1_end = range1_start + 7;
|
||||
let range2_end = range2_start + 7;
|
||||
let range3_end = range3_start + 7;
|
||||
|
||||
// Parse only the first code directive as JavaScript
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("javascript")).unwrap();
|
||||
parser.set_included_ranges(&ranges[0..1]);
|
||||
let first_tree = parser.parse(source_code, None).unwrap();
|
||||
parser
|
||||
.set_included_ranges(&[simple_range(range1_start, range1_end)])
|
||||
.unwrap();
|
||||
let tree = parser.parse(source_code, None).unwrap();
|
||||
assert_eq!(
|
||||
first_tree.root_node().to_sexp(),
|
||||
tree.root_node().to_sexp(),
|
||||
concat!(
|
||||
"(program",
|
||||
" (expression_statement (call_expression function: (identifier) arguments: (arguments))))",
|
||||
)
|
||||
);
|
||||
|
||||
// Parse both the code directives as JavaScript, using the old tree as a reference.
|
||||
parser.set_included_ranges(&ranges);
|
||||
let tree = parser.parse(&source_code, Some(&first_tree)).unwrap();
|
||||
// Parse both the first and third code directives as JavaScript, using the old tree as a
|
||||
// reference.
|
||||
parser
|
||||
.set_included_ranges(&[
|
||||
simple_range(range1_start, range1_end),
|
||||
simple_range(range3_start, range3_end),
|
||||
])
|
||||
.unwrap();
|
||||
let tree2 = parser.parse(&source_code, Some(&tree)).unwrap();
|
||||
assert_eq!(
|
||||
tree.root_node().to_sexp(),
|
||||
tree2.root_node().to_sexp(),
|
||||
concat!(
|
||||
"(program",
|
||||
" (expression_statement (call_expression function: (identifier) arguments: (arguments)))",
|
||||
" (expression_statement (call_expression function: (identifier) arguments: (arguments))))",
|
||||
)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
tree.changed_ranges(&first_tree).collect::<Vec<_>>(),
|
||||
vec![Range {
|
||||
start_byte: first_code_end_index + 1,
|
||||
end_byte: second_code_end_index + 1,
|
||||
start_point: Point::new(0, first_code_end_index + 1),
|
||||
end_point: Point::new(0, second_code_end_index + 1),
|
||||
}]
|
||||
tree2.changed_ranges(&tree).collect::<Vec<_>>(),
|
||||
&[simple_range(range1_end, range3_end)]
|
||||
);
|
||||
|
||||
// Parse all three code directives as JavaScript, using the old tree as a
|
||||
// reference.
|
||||
parser
|
||||
.set_included_ranges(&[
|
||||
simple_range(range1_start, range1_end),
|
||||
simple_range(range2_start, range2_end),
|
||||
simple_range(range3_start, range3_end),
|
||||
])
|
||||
.unwrap();
|
||||
let tree3 = parser.parse(&source_code, Some(&tree)).unwrap();
|
||||
assert_eq!(
|
||||
tree3.root_node().to_sexp(),
|
||||
concat!(
|
||||
"(program",
|
||||
" (expression_statement (call_expression function: (identifier) arguments: (arguments)))",
|
||||
" (expression_statement (call_expression function: (identifier) arguments: (arguments)))",
|
||||
" (expression_statement (call_expression function: (identifier) arguments: (arguments))))",
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
tree3.changed_ranges(&tree2).collect::<Vec<_>>(),
|
||||
&[simple_range(range2_start + 1, range2_end - 1)]
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -899,20 +1089,22 @@ fn test_parsing_with_included_ranges_and_missing_tokens() {
|
|||
// There's a missing `a` token at the beginning of the code. It must be inserted
|
||||
// at the beginning of the first included range, not at {0, 0}.
|
||||
let source_code = "__bc__bc__";
|
||||
parser.set_included_ranges(&[
|
||||
Range {
|
||||
start_byte: 2,
|
||||
end_byte: 4,
|
||||
start_point: Point::new(0, 2),
|
||||
end_point: Point::new(0, 4),
|
||||
},
|
||||
Range {
|
||||
start_byte: 6,
|
||||
end_byte: 8,
|
||||
start_point: Point::new(0, 6),
|
||||
end_point: Point::new(0, 8),
|
||||
},
|
||||
]);
|
||||
parser
|
||||
.set_included_ranges(&[
|
||||
Range {
|
||||
start_byte: 2,
|
||||
end_byte: 4,
|
||||
start_point: Point::new(0, 2),
|
||||
end_point: Point::new(0, 4),
|
||||
},
|
||||
Range {
|
||||
start_byte: 6,
|
||||
end_byte: 8,
|
||||
start_point: Point::new(0, 6),
|
||||
end_point: Point::new(0, 8),
|
||||
},
|
||||
])
|
||||
.unwrap();
|
||||
|
||||
let tree = parser.parse(source_code, None).unwrap();
|
||||
let root = tree.root_node();
|
||||
|
|
@ -923,3 +1115,12 @@ fn test_parsing_with_included_ranges_and_missing_tokens() {
|
|||
assert_eq!(root.start_byte(), 2);
|
||||
assert_eq!(root.child(3).unwrap().start_byte(), 4);
|
||||
}
|
||||
|
||||
fn simple_range(start: usize, end: usize) -> Range {
|
||||
Range {
|
||||
start_byte: start,
|
||||
end_byte: end,
|
||||
start_point: Point::new(0, start),
|
||||
end_point: Point::new(0, end),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
15
cli/src/tests/pathological_test.rs
Normal file
15
cli/src/tests/pathological_test.rs
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
use super::helpers::allocations;
|
||||
use super::helpers::fixtures::get_language;
|
||||
use tree_sitter::Parser;
|
||||
|
||||
#[test]
|
||||
fn test_pathological_example_1() {
|
||||
let language = "cpp";
|
||||
let source = r#"*ss<s"ss<sqXqss<s._<s<sq<(qqX<sqss<s.ss<sqsssq<(qss<qssqXqss<s._<s<sq<(qqX<sqss<s.ss<sqsssq<(qss<sqss<sqss<s._<s<sq>(qqX<sqss<s.ss<sqsssq<(qss<sq&=ss<s<sqss<s._<s<sq<(qqX<sqss<s.ss<sqs"#;
|
||||
|
||||
allocations::record(|| {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language(language)).unwrap();
|
||||
parser.parse(source, None).unwrap();
|
||||
});
|
||||
}
|
||||
|
|
@ -1,265 +0,0 @@
|
|||
use super::helpers::fixtures::get_language;
|
||||
use crate::generate::properties;
|
||||
use serde_derive::Deserialize;
|
||||
use serde_json;
|
||||
|
||||
use std::collections::HashSet;
|
||||
use tree_sitter::{Parser, PropertySheet};
|
||||
#[derive(Debug, Default, Deserialize, PartialEq, Eq)]
|
||||
struct Properties {
|
||||
a: Option<String>,
|
||||
b: Option<String>,
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_walk_with_properties_with_nth_child() {
|
||||
let language = get_language("javascript");
|
||||
let property_sheet = PropertySheet::<Properties>::new(
|
||||
language,
|
||||
&generate_property_sheet_string(
|
||||
"/some/path.css",
|
||||
"
|
||||
binary_expression > identifier:nth-child(2) {
|
||||
a: x;
|
||||
}
|
||||
|
||||
binary_expression > identifier {
|
||||
a: y;
|
||||
}
|
||||
|
||||
identifier {
|
||||
a: z;
|
||||
}
|
||||
",
|
||||
),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let source_code = "a = b || c;";
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(language).unwrap();
|
||||
let tree = parser.parse(source_code, None).unwrap();
|
||||
|
||||
let mut cursor = tree.walk_with_properties(&property_sheet, source_code.as_bytes());
|
||||
assert_eq!(cursor.node().kind(), "program");
|
||||
assert!(cursor.goto_first_child());
|
||||
assert_eq!(cursor.node().kind(), "expression_statement");
|
||||
assert!(cursor.goto_first_child());
|
||||
assert_eq!(cursor.node().kind(), "assignment_expression");
|
||||
|
||||
assert!(cursor.goto_first_child());
|
||||
assert_eq!(cursor.node().kind(), "identifier");
|
||||
assert_eq!(
|
||||
*cursor.node_properties(),
|
||||
Properties {
|
||||
a: Some("z".to_string()),
|
||||
b: None
|
||||
}
|
||||
);
|
||||
|
||||
assert!(cursor.goto_next_sibling());
|
||||
assert_eq!(cursor.node().kind(), "=");
|
||||
assert!(cursor.goto_next_sibling());
|
||||
assert_eq!(cursor.node().kind(), "binary_expression");
|
||||
|
||||
assert!(cursor.goto_first_child());
|
||||
assert_eq!(cursor.node().kind(), "identifier");
|
||||
assert_eq!(
|
||||
*cursor.node_properties(),
|
||||
Properties {
|
||||
a: Some("y".to_string()),
|
||||
b: None
|
||||
}
|
||||
);
|
||||
|
||||
assert!(cursor.goto_next_sibling());
|
||||
assert_eq!(cursor.node().kind(), "||");
|
||||
assert!(cursor.goto_next_sibling());
|
||||
assert_eq!(cursor.node().kind(), "identifier");
|
||||
assert_eq!(
|
||||
*cursor.node_properties(),
|
||||
Properties {
|
||||
a: Some("x".to_string()),
|
||||
b: None
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_walk_with_properties_with_regexes() {
|
||||
let language = get_language("javascript");
|
||||
let property_sheet = PropertySheet::<Properties>::new(
|
||||
language,
|
||||
&generate_property_sheet_string(
|
||||
"/some/path.css",
|
||||
"
|
||||
identifier {
|
||||
&[text='^[A-Z]'] {
|
||||
a: y;
|
||||
}
|
||||
|
||||
&[text='^[A-Z_]+$'] {
|
||||
a: z;
|
||||
}
|
||||
|
||||
a: x;
|
||||
}
|
||||
",
|
||||
),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let source_code = "const ABC = Def(ghi);";
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(language).unwrap();
|
||||
let tree = parser.parse(source_code, None).unwrap();
|
||||
|
||||
let mut cursor = tree.walk_with_properties(&property_sheet, source_code.as_bytes());
|
||||
assert_eq!(cursor.node().kind(), "program");
|
||||
assert!(cursor.goto_first_child());
|
||||
assert_eq!(cursor.node().kind(), "lexical_declaration");
|
||||
assert!(cursor.goto_first_child());
|
||||
assert_eq!(cursor.node().kind(), "const");
|
||||
assert!(cursor.goto_next_sibling());
|
||||
assert_eq!(cursor.node().kind(), "variable_declarator");
|
||||
|
||||
// The later selector with a text regex overrides the earlier one.
|
||||
assert!(cursor.goto_first_child());
|
||||
assert_eq!(cursor.node().kind(), "identifier");
|
||||
assert_eq!(
|
||||
*cursor.node_properties(),
|
||||
Properties {
|
||||
a: Some("z".to_string()),
|
||||
b: None
|
||||
}
|
||||
);
|
||||
|
||||
assert!(cursor.goto_next_sibling());
|
||||
assert_eq!(cursor.node().kind(), "=");
|
||||
assert!(cursor.goto_next_sibling());
|
||||
assert_eq!(cursor.node().kind(), "call_expression");
|
||||
|
||||
// The selectors with text regexes override the selector without one.
|
||||
assert!(cursor.goto_first_child());
|
||||
assert_eq!(cursor.node().kind(), "identifier");
|
||||
assert_eq!(
|
||||
*cursor.node_properties(),
|
||||
Properties {
|
||||
a: Some("y".to_string()),
|
||||
b: None
|
||||
}
|
||||
);
|
||||
|
||||
assert!(cursor.goto_next_sibling());
|
||||
assert_eq!(cursor.node().kind(), "arguments");
|
||||
assert!(cursor.goto_first_child());
|
||||
assert_eq!(cursor.node().kind(), "(");
|
||||
|
||||
// This node doesn't match either of the regexes.
|
||||
assert!(cursor.goto_next_sibling());
|
||||
assert_eq!(cursor.node().kind(), "identifier");
|
||||
assert_eq!(
|
||||
*cursor.node_properties(),
|
||||
Properties {
|
||||
a: Some("x".to_string()),
|
||||
b: None
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_walk_with_properties_based_on_fields() {
|
||||
let language = get_language("javascript");
|
||||
let property_sheet = PropertySheet::<Properties>::new(
|
||||
language,
|
||||
&generate_property_sheet_string(
|
||||
"/some/path.css",
|
||||
"
|
||||
arrow_function > .parameter {
|
||||
a: x;
|
||||
}
|
||||
|
||||
function_declaration {
|
||||
& > .parameters > identifier {
|
||||
a: y;
|
||||
}
|
||||
|
||||
& > .name {
|
||||
b: z;
|
||||
}
|
||||
}
|
||||
|
||||
identifier {
|
||||
a: w;
|
||||
}
|
||||
",
|
||||
),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let source_code = "function a(b) { return c => c + b; }";
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(language).unwrap();
|
||||
let tree = parser.parse(source_code, None).unwrap();
|
||||
let mut cursor = tree.walk_with_properties(&property_sheet, source_code.as_bytes());
|
||||
|
||||
assert!(cursor.goto_first_child());
|
||||
assert_eq!(cursor.node().kind(), "function_declaration");
|
||||
assert!(cursor.goto_first_child());
|
||||
assert_eq!(cursor.node().kind(), "function");
|
||||
assert_eq!(*cursor.node_properties(), Properties::default());
|
||||
|
||||
assert!(cursor.goto_next_sibling());
|
||||
assert_eq!(cursor.node().kind(), "identifier");
|
||||
assert_eq!(
|
||||
*cursor.node_properties(),
|
||||
Properties {
|
||||
a: Some("w".to_string()),
|
||||
b: Some("z".to_string())
|
||||
}
|
||||
);
|
||||
|
||||
assert!(cursor.goto_next_sibling());
|
||||
assert_eq!(cursor.node().kind(), "formal_parameters");
|
||||
assert_eq!(*cursor.node_properties(), Properties::default());
|
||||
|
||||
assert!(cursor.goto_first_child());
|
||||
assert_eq!(cursor.node().kind(), "(");
|
||||
assert_eq!(*cursor.node_properties(), Properties::default());
|
||||
assert!(cursor.goto_next_sibling());
|
||||
assert_eq!(cursor.node().kind(), "identifier");
|
||||
assert_eq!(
|
||||
*cursor.node_properties(),
|
||||
Properties {
|
||||
a: Some("y".to_string()),
|
||||
b: None,
|
||||
}
|
||||
);
|
||||
|
||||
assert!(cursor.goto_parent());
|
||||
assert!(cursor.goto_next_sibling());
|
||||
assert_eq!(cursor.node().kind(), "statement_block");
|
||||
assert!(cursor.goto_first_child());
|
||||
assert!(cursor.goto_next_sibling());
|
||||
assert_eq!(cursor.node().kind(), "return_statement");
|
||||
assert!(cursor.goto_first_child());
|
||||
assert!(cursor.goto_next_sibling());
|
||||
assert_eq!(cursor.node().kind(), "arrow_function");
|
||||
assert!(cursor.goto_first_child());
|
||||
assert_eq!(cursor.node().kind(), "identifier");
|
||||
assert_eq!(
|
||||
*cursor.node_properties(),
|
||||
Properties {
|
||||
a: Some("x".to_string()),
|
||||
b: None,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
fn generate_property_sheet_string(path: &str, css: &str) -> String {
|
||||
serde_json::to_string(&properties::generate_property_sheet(path, css, &HashSet::new()).unwrap())
|
||||
.unwrap()
|
||||
}
|
||||
3027
cli/src/tests/query_test.rs
Normal file
3027
cli/src/tests/query_test.rs
Normal file
File diff suppressed because it is too large
Load diff
437
cli/src/tests/tags_test.rs
Normal file
437
cli/src/tests/tags_test.rs
Normal file
|
|
@ -0,0 +1,437 @@
|
|||
use super::helpers::allocations;
|
||||
use super::helpers::fixtures::{get_language, get_language_queries_path};
|
||||
use std::ffi::CStr;
|
||||
use std::ffi::CString;
|
||||
use std::{fs, ptr, slice, str};
|
||||
use tree_sitter::Point;
|
||||
use tree_sitter_tags::c_lib as c;
|
||||
use tree_sitter_tags::{Error, TagsConfiguration, TagsContext};
|
||||
|
||||
const PYTHON_TAG_QUERY: &'static str = r#"
|
||||
(
|
||||
(function_definition
|
||||
name: (identifier) @name
|
||||
body: (block . (expression_statement (string) @doc))) @definition.function
|
||||
(#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
|
||||
)
|
||||
|
||||
(function_definition
|
||||
name: (identifier) @name) @definition.function
|
||||
|
||||
(
|
||||
(class_definition
|
||||
name: (identifier) @name
|
||||
body: (block
|
||||
. (expression_statement (string) @doc))) @definition.class
|
||||
(#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
|
||||
)
|
||||
|
||||
(class_definition
|
||||
name: (identifier) @name) @definition.class
|
||||
|
||||
(call
|
||||
function: (identifier) @name) @reference.call
|
||||
|
||||
(call
|
||||
function: (attribute
|
||||
attribute: (identifier) @name)) @reference.call
|
||||
"#;
|
||||
|
||||
const JS_TAG_QUERY: &'static str = r#"
|
||||
(
|
||||
(comment)* @doc .
|
||||
(class_declaration
|
||||
name: (identifier) @name) @definition.class
|
||||
(#select-adjacent! @doc @definition.class)
|
||||
(#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
|
||||
)
|
||||
|
||||
(
|
||||
(comment)* @doc .
|
||||
(method_definition
|
||||
name: (property_identifier) @name) @definition.method
|
||||
(#select-adjacent! @doc @definition.method)
|
||||
(#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
|
||||
)
|
||||
|
||||
(
|
||||
(comment)* @doc .
|
||||
(function_declaration
|
||||
name: (identifier) @name) @definition.function
|
||||
(#select-adjacent! @doc @definition.function)
|
||||
(#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
|
||||
)
|
||||
|
||||
(call_expression
|
||||
function: (identifier) @name) @reference.call
|
||||
"#;
|
||||
|
||||
const RUBY_TAG_QUERY: &'static str = r#"
|
||||
(method
|
||||
name: (_) @name) @definition.method
|
||||
|
||||
(method_call
|
||||
method: (identifier) @name) @reference.call
|
||||
|
||||
(setter (identifier) @ignore)
|
||||
|
||||
((identifier) @name @reference.call
|
||||
(#is-not? local))
|
||||
"#;
|
||||
|
||||
#[test]
|
||||
fn test_tags_python() {
|
||||
let language = get_language("python");
|
||||
let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
|
||||
let mut tag_context = TagsContext::new();
|
||||
|
||||
let source = br#"
|
||||
class Customer:
|
||||
"""
|
||||
Data about a customer
|
||||
"""
|
||||
|
||||
def age(self):
|
||||
'''
|
||||
Get the customer's age
|
||||
'''
|
||||
compute_age(self.id)
|
||||
}
|
||||
"#;
|
||||
|
||||
let tags = tag_context
|
||||
.generate_tags(&tags_config, source, None)
|
||||
.unwrap()
|
||||
.0
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
tags.iter()
|
||||
.map(|t| (
|
||||
substr(source, &t.name_range),
|
||||
tags_config.syntax_type_name(t.syntax_type_id)
|
||||
))
|
||||
.collect::<Vec<_>>(),
|
||||
&[
|
||||
("Customer", "class"),
|
||||
("age", "function"),
|
||||
("compute_age", "call"),
|
||||
]
|
||||
);
|
||||
|
||||
assert_eq!(substr(source, &tags[0].line_range), "class Customer:");
|
||||
assert_eq!(substr(source, &tags[1].line_range), "def age(self):");
|
||||
assert_eq!(tags[0].docs.as_ref().unwrap(), "Data about a customer");
|
||||
assert_eq!(tags[1].docs.as_ref().unwrap(), "Get the customer's age");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tags_javascript() {
|
||||
let language = get_language("javascript");
|
||||
let tags_config = TagsConfiguration::new(language, JS_TAG_QUERY, "").unwrap();
|
||||
let source = br#"
|
||||
// hi
|
||||
|
||||
// Data about a customer.
|
||||
// bla bla bla
|
||||
class Customer {
|
||||
/*
|
||||
* Get the customer's age
|
||||
*/
|
||||
getAge() {
|
||||
}
|
||||
}
|
||||
|
||||
// ok
|
||||
|
||||
class Agent {
|
||||
|
||||
}
|
||||
"#;
|
||||
|
||||
let mut tag_context = TagsContext::new();
|
||||
let tags = tag_context
|
||||
.generate_tags(&tags_config, source, None)
|
||||
.unwrap()
|
||||
.0
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
tags.iter()
|
||||
.map(|t| (
|
||||
substr(source, &t.name_range),
|
||||
t.span.clone(),
|
||||
tags_config.syntax_type_name(t.syntax_type_id)
|
||||
))
|
||||
.collect::<Vec<_>>(),
|
||||
&[
|
||||
("Customer", Point::new(5, 10)..Point::new(5, 18), "class",),
|
||||
("getAge", Point::new(9, 8)..Point::new(9, 14), "method",),
|
||||
("Agent", Point::new(15, 10)..Point::new(15, 15), "class",)
|
||||
]
|
||||
);
|
||||
assert_eq!(
|
||||
tags[0].docs.as_ref().unwrap(),
|
||||
"Data about a customer.\nbla bla bla"
|
||||
);
|
||||
assert_eq!(tags[1].docs.as_ref().unwrap(), "Get the customer's age");
|
||||
assert_eq!(tags[2].docs, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tags_columns_measured_in_utf16_code_units() {
|
||||
let language = get_language("python");
|
||||
let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
|
||||
let mut tag_context = TagsContext::new();
|
||||
|
||||
let source = r#""❤️❤️❤️".hello_α_ω()"#.as_bytes();
|
||||
|
||||
let tag = tag_context
|
||||
.generate_tags(&tags_config, source, None)
|
||||
.unwrap()
|
||||
.0
|
||||
.next()
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(substr(source, &tag.name_range), "hello_α_ω");
|
||||
assert_eq!(tag.span, Point::new(0, 21)..Point::new(0, 32));
|
||||
assert_eq!(tag.utf16_column_range, 9..18);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tags_ruby() {
|
||||
let language = get_language("ruby");
|
||||
let locals_query =
|
||||
fs::read_to_string(get_language_queries_path("ruby").join("locals.scm")).unwrap();
|
||||
let tags_config = TagsConfiguration::new(language, RUBY_TAG_QUERY, &locals_query).unwrap();
|
||||
let source = strip_whitespace(
|
||||
8,
|
||||
"
|
||||
b = 1
|
||||
|
||||
def foo=()
|
||||
c = 1
|
||||
|
||||
# a is a method because it is not in scope
|
||||
# b is a method because `b` doesn't capture variables from its containing scope
|
||||
bar a, b, c
|
||||
|
||||
[1, 2, 3].each do |a|
|
||||
# a is a parameter
|
||||
# b is a method
|
||||
# c is a variable, because the block captures variables from its containing scope.
|
||||
baz a, b, c
|
||||
end
|
||||
end",
|
||||
);
|
||||
|
||||
let mut tag_context = TagsContext::new();
|
||||
let tags = tag_context
|
||||
.generate_tags(&tags_config, source.as_bytes(), None)
|
||||
.unwrap()
|
||||
.0
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
tags.iter()
|
||||
.map(|t| (
|
||||
substr(source.as_bytes(), &t.name_range),
|
||||
tags_config.syntax_type_name(t.syntax_type_id),
|
||||
(t.span.start.row, t.span.start.column),
|
||||
))
|
||||
.collect::<Vec<_>>(),
|
||||
&[
|
||||
("foo=", "method", (2, 4)),
|
||||
("bar", "call", (7, 4)),
|
||||
("a", "call", (7, 8)),
|
||||
("b", "call", (7, 11)),
|
||||
("each", "call", (9, 14)),
|
||||
("baz", "call", (13, 8)),
|
||||
("b", "call", (13, 15),),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tags_cancellation() {
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
|
||||
allocations::record(|| {
|
||||
// Large javascript document
|
||||
let source = (0..500)
|
||||
.map(|_| "/* hi */ class A { /* ok */ b() {} }\n")
|
||||
.collect::<String>();
|
||||
|
||||
let cancellation_flag = AtomicUsize::new(0);
|
||||
let language = get_language("javascript");
|
||||
let tags_config = TagsConfiguration::new(language, JS_TAG_QUERY, "").unwrap();
|
||||
|
||||
let mut tag_context = TagsContext::new();
|
||||
let tags = tag_context
|
||||
.generate_tags(&tags_config, source.as_bytes(), Some(&cancellation_flag))
|
||||
.unwrap();
|
||||
|
||||
for (i, tag) in tags.0.enumerate() {
|
||||
if i == 150 {
|
||||
cancellation_flag.store(1, Ordering::SeqCst);
|
||||
}
|
||||
if let Err(e) = tag {
|
||||
assert_eq!(e, Error::Cancelled);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
panic!("Expected to halt tagging with an error");
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_capture() {
|
||||
let language = get_language("python");
|
||||
let e = TagsConfiguration::new(language, "(identifier) @method", "")
|
||||
.expect_err("expected InvalidCapture error");
|
||||
assert_eq!(e, Error::InvalidCapture("method".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tags_with_parse_error() {
|
||||
let language = get_language("python");
|
||||
let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
|
||||
let mut tag_context = TagsContext::new();
|
||||
|
||||
let source = br#"
|
||||
class Fine: pass
|
||||
class Bad
|
||||
"#;
|
||||
|
||||
let (tags, failed) = tag_context
|
||||
.generate_tags(&tags_config, source, None)
|
||||
.unwrap();
|
||||
|
||||
let newtags = tags.collect::<Result<Vec<_>, _>>().unwrap();
|
||||
|
||||
assert!(failed, "syntax error should have been detected");
|
||||
|
||||
assert_eq!(
|
||||
newtags.iter()
|
||||
.map(|t| (
|
||||
substr(source, &t.name_range),
|
||||
tags_config.syntax_type_name(t.syntax_type_id)
|
||||
))
|
||||
.collect::<Vec<_>>(),
|
||||
&[
|
||||
("Fine", "class"),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_tags_via_c_api() {
|
||||
allocations::record(|| {
|
||||
let tagger = c::ts_tagger_new();
|
||||
let buffer = c::ts_tags_buffer_new();
|
||||
let scope_name = "source.js";
|
||||
let language = get_language("javascript");
|
||||
|
||||
let source_code = strip_whitespace(
|
||||
12,
|
||||
"
|
||||
var a = 1;
|
||||
|
||||
// one
|
||||
// two
|
||||
// three
|
||||
function b() {
|
||||
}
|
||||
|
||||
// four
|
||||
// five
|
||||
class C extends D {
|
||||
|
||||
}
|
||||
|
||||
b(a);",
|
||||
);
|
||||
|
||||
let c_scope_name = CString::new(scope_name).unwrap();
|
||||
let result = c::ts_tagger_add_language(
|
||||
tagger,
|
||||
c_scope_name.as_ptr(),
|
||||
language,
|
||||
JS_TAG_QUERY.as_ptr(),
|
||||
ptr::null(),
|
||||
JS_TAG_QUERY.len() as u32,
|
||||
0,
|
||||
);
|
||||
assert_eq!(result, c::TSTagsError::Ok);
|
||||
|
||||
let result = c::ts_tagger_tag(
|
||||
tagger,
|
||||
c_scope_name.as_ptr(),
|
||||
source_code.as_ptr(),
|
||||
source_code.len() as u32,
|
||||
buffer,
|
||||
ptr::null(),
|
||||
);
|
||||
assert_eq!(result, c::TSTagsError::Ok);
|
||||
let tags = unsafe {
|
||||
slice::from_raw_parts(
|
||||
c::ts_tags_buffer_tags(buffer),
|
||||
c::ts_tags_buffer_tags_len(buffer) as usize,
|
||||
)
|
||||
};
|
||||
let docs = str::from_utf8(unsafe {
|
||||
slice::from_raw_parts(
|
||||
c::ts_tags_buffer_docs(buffer) as *const u8,
|
||||
c::ts_tags_buffer_docs_len(buffer) as usize,
|
||||
)
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let syntax_types: Vec<&str> = unsafe {
|
||||
let mut len: u32 = 0;
|
||||
let ptr =
|
||||
c::ts_tagger_syntax_kinds_for_scope_name(tagger, c_scope_name.as_ptr(), &mut len);
|
||||
slice::from_raw_parts(ptr, len as usize)
|
||||
.iter()
|
||||
.map(|i| CStr::from_ptr(*i).to_str().unwrap())
|
||||
.collect()
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
tags.iter()
|
||||
.map(|tag| (
|
||||
syntax_types[tag.syntax_type_id as usize],
|
||||
&source_code[tag.name_start_byte as usize..tag.name_end_byte as usize],
|
||||
&source_code[tag.line_start_byte as usize..tag.line_end_byte as usize],
|
||||
&docs[tag.docs_start_byte as usize..tag.docs_end_byte as usize],
|
||||
))
|
||||
.collect::<Vec<_>>(),
|
||||
&[
|
||||
("function", "b", "function b() {", "one\ntwo\nthree"),
|
||||
("class", "C", "class C extends D {", "four\nfive"),
|
||||
("call", "b", "b(a);", "")
|
||||
]
|
||||
);
|
||||
|
||||
c::ts_tags_buffer_delete(buffer);
|
||||
c::ts_tagger_delete(tagger);
|
||||
});
|
||||
}
|
||||
|
||||
fn substr<'a>(source: &'a [u8], range: &std::ops::Range<usize>) -> &'a str {
|
||||
std::str::from_utf8(&source[range.clone()]).unwrap()
|
||||
}
|
||||
|
||||
fn strip_whitespace(indent: usize, s: &str) -> String {
|
||||
s.lines()
|
||||
.skip(1)
|
||||
.map(|line| &line[line.len().min(indent)..])
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n")
|
||||
}
|
||||
64
cli/src/tests/test_highlight_test.rs
Normal file
64
cli/src/tests/test_highlight_test.rs
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
use super::helpers::fixtures::{get_highlight_config, get_language, test_loader};
|
||||
use crate::query_testing::{parse_position_comments, Assertion};
|
||||
use crate::test_highlight::get_highlight_positions;
|
||||
use tree_sitter::{Parser, Point};
|
||||
use tree_sitter_highlight::{Highlight, Highlighter};
|
||||
|
||||
#[test]
|
||||
fn test_highlight_test_with_basic_test() {
|
||||
let language = get_language("javascript");
|
||||
let config = get_highlight_config(
|
||||
"javascript",
|
||||
Some("injections.scm"),
|
||||
&[
|
||||
"function".to_string(),
|
||||
"variable.parameter".to_string(),
|
||||
"keyword".to_string(),
|
||||
],
|
||||
);
|
||||
let source = [
|
||||
"var abc = function(d) {",
|
||||
" // ^ function",
|
||||
" // ^ keyword",
|
||||
" return d + e;",
|
||||
" // ^ variable.parameter",
|
||||
"};",
|
||||
]
|
||||
.join("\n");
|
||||
|
||||
let assertions =
|
||||
parse_position_comments(&mut Parser::new(), language, source.as_bytes()).unwrap();
|
||||
assert_eq!(
|
||||
assertions,
|
||||
&[
|
||||
Assertion {
|
||||
position: Point::new(0, 5),
|
||||
expected_capture_name: "function".to_string()
|
||||
},
|
||||
Assertion {
|
||||
position: Point::new(0, 11),
|
||||
expected_capture_name: "keyword".to_string()
|
||||
},
|
||||
Assertion {
|
||||
position: Point::new(3, 9),
|
||||
expected_capture_name: "variable.parameter".to_string()
|
||||
},
|
||||
]
|
||||
);
|
||||
|
||||
let mut highlighter = Highlighter::new();
|
||||
let highlight_positions =
|
||||
get_highlight_positions(test_loader(), &mut highlighter, &config, source.as_bytes())
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
highlight_positions,
|
||||
&[
|
||||
(Point::new(0, 0), Point::new(0, 3), Highlight(2)), // "var"
|
||||
(Point::new(0, 4), Point::new(0, 7), Highlight(0)), // "abc"
|
||||
(Point::new(0, 10), Point::new(0, 18), Highlight(2)), // "function"
|
||||
(Point::new(0, 19), Point::new(0, 20), Highlight(1)), // "d"
|
||||
(Point::new(3, 2), Point::new(3, 8), Highlight(2)), // "return"
|
||||
(Point::new(3, 9), Point::new(3, 10), Highlight(1)), // "d"
|
||||
]
|
||||
);
|
||||
}
|
||||
|
|
@ -1,12 +1,32 @@
|
|||
use super::error::{Error, Result};
|
||||
use std::io;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
use tree_sitter::Parser;
|
||||
|
||||
#[cfg(unix)]
|
||||
use std::path::PathBuf;
|
||||
#[cfg(unix)]
|
||||
use std::process::{Child, ChildStdin, Command, Stdio};
|
||||
use tree_sitter::Parser;
|
||||
|
||||
#[cfg(unix)]
|
||||
const HTML_HEADER: &[u8] = b"<!DOCTYPE html>\n<style>svg { width: 100%; }</style>\n\n";
|
||||
|
||||
pub fn cancel_on_stdin() -> Arc<AtomicUsize> {
|
||||
let result = Arc::new(AtomicUsize::new(0));
|
||||
if atty::is(atty::Stream::Stdin) {
|
||||
thread::spawn({
|
||||
let flag = result.clone();
|
||||
move || {
|
||||
let mut line = String::new();
|
||||
io::stdin().read_line(&mut line).unwrap();
|
||||
flag.store(1, Ordering::Relaxed);
|
||||
}
|
||||
});
|
||||
}
|
||||
result
|
||||
}
|
||||
#[cfg(windows)]
|
||||
pub struct LogSession();
|
||||
|
||||
|
|
@ -14,12 +34,12 @@ pub struct LogSession();
|
|||
pub struct LogSession(PathBuf, Option<Child>, Option<ChildStdin>);
|
||||
|
||||
#[cfg(windows)]
|
||||
pub fn log_graphs(_parser: &mut Parser, _path: &str) -> std::io::Result<LogSession> {
|
||||
pub fn log_graphs(_parser: &mut Parser, _path: &str) -> Result<LogSession> {
|
||||
Ok(LogSession())
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
pub fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result<LogSession> {
|
||||
pub fn log_graphs(parser: &mut Parser, path: &str) -> Result<LogSession> {
|
||||
use std::io::Write;
|
||||
|
||||
let mut dot_file = std::fs::File::create(path)?;
|
||||
|
|
@ -29,11 +49,13 @@ pub fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result<LogSession
|
|||
.stdin(Stdio::piped())
|
||||
.stdout(dot_file)
|
||||
.spawn()
|
||||
.expect("Failed to run Dot");
|
||||
.map_err(Error::wrap(|| {
|
||||
"Failed to run the `dot` command. Check that graphviz is installed."
|
||||
}))?;
|
||||
let dot_stdin = dot_process
|
||||
.stdin
|
||||
.take()
|
||||
.expect("Failed to open stdin for Dot");
|
||||
.ok_or_else(|| Error::new("Failed to open stdin for `dot` process.".to_string()))?;
|
||||
parser.print_dot_graphs(&dot_stdin);
|
||||
Ok(LogSession(
|
||||
PathBuf::from(path),
|
||||
|
|
|
|||
|
|
@ -57,9 +57,11 @@ pub fn compile_language_to_wasm(language_dir: &Path, force_docker: bool) -> Resu
|
|||
}
|
||||
|
||||
// Run `emcc` in a container using the `emscripten-slim` image
|
||||
command.args(&["trzeci/emscripten-slim", "emcc"]);
|
||||
command.args(&["emscripten/emsdk", "emcc"]);
|
||||
} else {
|
||||
return Error::err("You must have either emcc or docker on your PATH to run this command".to_string());
|
||||
return Error::err(
|
||||
"You must have either emcc or docker on your PATH to run this command".to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
command.args(&[
|
||||
|
|
@ -81,31 +83,22 @@ pub fn compile_language_to_wasm(language_dir: &Path, force_docker: bool) -> Resu
|
|||
"src",
|
||||
]);
|
||||
|
||||
// Find source files to pass to emscripten
|
||||
let src_entries = fs::read_dir(&src_dir).map_err(Error::wrap(|| {
|
||||
format!("Failed to read source directory {:?}", src_dir)
|
||||
}))?;
|
||||
let src = Path::new("src");
|
||||
let parser_c_path = src.join("parser.c");
|
||||
let scanner_c_path = src.join("scanner.c");
|
||||
let scanner_cc_path = src.join("scanner.cc");
|
||||
let scanner_cpp_path = src.join("scanner.cpp");
|
||||
|
||||
for entry in src_entries {
|
||||
let entry = entry?;
|
||||
let file_name = entry.file_name();
|
||||
|
||||
// Do not compile the node.js binding file.
|
||||
if file_name
|
||||
.to_str()
|
||||
.map_or(false, |s| s.starts_with("binding"))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Compile any .c, .cc, or .cpp files
|
||||
if let Some(extension) = Path::new(&file_name).extension().and_then(|s| s.to_str()) {
|
||||
if extension == "c" || extension == "cc" || extension == "cpp" {
|
||||
command.arg(Path::new("src").join(entry.file_name()));
|
||||
}
|
||||
}
|
||||
if language_dir.join(&scanner_cc_path).exists() {
|
||||
command.arg("-xc++").arg(&scanner_cc_path);
|
||||
} else if language_dir.join(&scanner_cpp_path).exists() {
|
||||
command.arg("-xc++").arg(&scanner_cpp_path);
|
||||
} else if language_dir.join(&scanner_c_path).exists() {
|
||||
command.arg(&scanner_c_path);
|
||||
}
|
||||
|
||||
command.arg(&parser_c_path);
|
||||
|
||||
let output = command
|
||||
.output()
|
||||
.map_err(Error::wrap(|| "Failed to run emcc command"))?;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>tree-sitter THE_LANGUAGE_NAME</title>
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/codemirror/5.45.0/codemirror.min.css">
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/clusterize.js/0.18.0/clusterize.min.css">
|
||||
|
|
@ -7,7 +8,7 @@
|
|||
</head>
|
||||
|
||||
<body>
|
||||
<div id="playground-container">
|
||||
<div id="playground-container" style="visibility: hidden;">
|
||||
<header>
|
||||
<div class=header-item>
|
||||
<bold>THE_LANGUAGE_NAME</bold>
|
||||
|
|
@ -18,18 +19,31 @@
|
|||
<input id="logging-checkbox" type="checkbox"></input>
|
||||
</div>
|
||||
|
||||
<div class=header-item>
|
||||
<label for="query-checkbox">query</label>
|
||||
<input id="query-checkbox" type="checkbox"></input>
|
||||
</div>
|
||||
|
||||
<div class=header-item>
|
||||
<label for="update-time">parse time: </label>
|
||||
<span id="update-time"></span>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
<main>
|
||||
<select id="language-select" style="display: none;">
|
||||
<option value="parser">Parser</option>
|
||||
</select>
|
||||
</header>
|
||||
|
||||
<textarea id="code-input"></textarea>
|
||||
<main>
|
||||
<div id="input-pane">
|
||||
<div id="code-container">
|
||||
<textarea id="code-input"></textarea>
|
||||
</div>
|
||||
|
||||
<div id="query-container" style="visibility: hidden; position: absolute;">
|
||||
<textarea id="query-input"></textarea>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="output-container-scroll">
|
||||
<pre id="output-container" class="highlight"></pre>
|
||||
|
|
@ -51,15 +65,13 @@
|
|||
|
||||
<style>
|
||||
body {
|
||||
font: Sans Serif;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
#playground-container {
|
||||
position: absolute;
|
||||
top: 0;
|
||||
bottom: 0;
|
||||
left: 0;
|
||||
right: 0;
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
}
|
||||
|
|
@ -73,24 +85,51 @@
|
|||
}
|
||||
|
||||
main {
|
||||
flex: 1;
|
||||
position: relative;
|
||||
}
|
||||
|
||||
#input-pane {
|
||||
position: absolute;
|
||||
top: 0;
|
||||
left: 0;
|
||||
bottom: 0;
|
||||
right: 50%;
|
||||
display: flex;
|
||||
height: 100%;
|
||||
flex-direction: row;
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
#code-container, #query-container {
|
||||
flex: 1;
|
||||
position: relative;
|
||||
overflow: hidden;
|
||||
border-right: 1px solid #aaa;
|
||||
border-bottom: 1px solid #aaa;
|
||||
}
|
||||
|
||||
#output-container-scroll {
|
||||
position: absolute;
|
||||
top: 0;
|
||||
left: 50%;
|
||||
bottom: 0;
|
||||
right: 0;
|
||||
}
|
||||
|
||||
.header-item {
|
||||
margin-right: 30px;
|
||||
}
|
||||
|
||||
.CodeMirror {
|
||||
width: 50%;
|
||||
#playground-container .CodeMirror {
|
||||
position: absolute;
|
||||
top: 0;
|
||||
bottom: 0;
|
||||
left: 0;
|
||||
right: 0;
|
||||
height: 100%;
|
||||
border-right: 1px solid #aaa;
|
||||
}
|
||||
|
||||
#output-container-scroll {
|
||||
width: 50%;
|
||||
height: 100%;
|
||||
flex: 1;
|
||||
padding: 0;
|
||||
overflow: auto;
|
||||
}
|
||||
|
|
@ -124,5 +163,9 @@
|
|||
border-radius: 3px;
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
.query-error {
|
||||
text-decoration: underline red dashed;
|
||||
}
|
||||
</style>
|
||||
</body>
|
||||
|
|
|
|||
|
|
@ -1,26 +1,63 @@
|
|||
use super::error::Error;
|
||||
use super::wasm;
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::net::TcpListener;
|
||||
use std::path::Path;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::str::FromStr;
|
||||
use tiny_http::{Header, Response, Server};
|
||||
use webbrowser;
|
||||
|
||||
const HTML: &'static str = include_str!("./web_ui.html");
|
||||
const PLAYGROUND_JS: &'static [u8] = include_bytes!("../../docs/assets/js/playground.js");
|
||||
macro_rules! resource {
|
||||
($name: tt, $path: tt) => {
|
||||
#[cfg(TREE_SITTER_EMBED_WASM_BINDING)]
|
||||
fn $name(tree_sitter_dir: &Option<PathBuf>) -> Vec<u8> {
|
||||
if let Some(tree_sitter_dir) = tree_sitter_dir {
|
||||
fs::read(tree_sitter_dir.join($path)).unwrap()
|
||||
} else {
|
||||
include_bytes!(concat!("../../", $path)).to_vec()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
const LIB_JS: &'static [u8] = include_bytes!("../../lib/binding_web/tree-sitter.js");
|
||||
#[cfg(unix)]
|
||||
const LIB_WASM: &'static [u8] = include_bytes!("../../lib/binding_web/tree-sitter.wasm");
|
||||
#[cfg(not(TREE_SITTER_EMBED_WASM_BINDING))]
|
||||
fn $name(tree_sitter_dir: &Option<PathBuf>) -> Vec<u8> {
|
||||
if let Some(tree_sitter_dir) = tree_sitter_dir {
|
||||
fs::read(tree_sitter_dir.join($path)).unwrap()
|
||||
} else {
|
||||
include_bytes!(concat!("../../", $path)).to_vec()
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
const LIB_JS: &'static [u8] = &[];
|
||||
#[cfg(windows)]
|
||||
const LIB_WASM: &'static [u8] = &[];
|
||||
macro_rules! optional_resource {
|
||||
($name: tt, $path: tt) => {
|
||||
#[cfg(TREE_SITTER_EMBED_WASM_BINDING)]
|
||||
fn $name(tree_sitter_dir: &Option<PathBuf>) -> Vec<u8> {
|
||||
if let Some(tree_sitter_dir) = tree_sitter_dir {
|
||||
fs::read(tree_sitter_dir.join($path)).unwrap()
|
||||
} else {
|
||||
include_bytes!(concat!("../../", $path)).to_vec()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn serve(grammar_path: &Path) {
|
||||
#[cfg(not(TREE_SITTER_EMBED_WASM_BINDING))]
|
||||
fn $name(tree_sitter_dir: &Option<PathBuf>) -> Vec<u8> {
|
||||
if let Some(tree_sitter_dir) = tree_sitter_dir {
|
||||
fs::read(tree_sitter_dir.join($path)).unwrap()
|
||||
} else {
|
||||
Vec::new()
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
resource!(get_main_html, "cli/src/web_ui.html");
|
||||
resource!(get_playground_js, "docs/assets/js/playground.js");
|
||||
optional_resource!(get_lib_js, "lib/binding_web/tree-sitter.js");
|
||||
optional_resource!(get_lib_wasm, "lib/binding_web/tree-sitter.wasm");
|
||||
|
||||
pub fn serve(grammar_path: &Path, open_in_browser: bool) {
|
||||
let port = get_available_port().expect("Couldn't find an available port");
|
||||
let url = format!("127.0.0.1:{}", port);
|
||||
let server = Server::http(&url).expect("Failed to start web server");
|
||||
|
|
@ -36,37 +73,42 @@ pub fn serve(grammar_path: &Path) {
|
|||
)
|
||||
}))
|
||||
.unwrap();
|
||||
if open_in_browser {
|
||||
if let Err(_) = webbrowser::open(&format!("http://127.0.0.1:{}", port)) {
|
||||
eprintln!("Failed to open '{}' in a web browser", url);
|
||||
}
|
||||
}
|
||||
|
||||
webbrowser::open(&format!("http://127.0.0.1:{}", port))
|
||||
.map_err(Error::wrap(|| {
|
||||
format!("Failed to open '{}' in a web browser", url)
|
||||
}))
|
||||
.unwrap();
|
||||
|
||||
let html = HTML
|
||||
let tree_sitter_dir = env::var("TREE_SITTER_BASE_DIR").map(PathBuf::from).ok();
|
||||
let main_html = String::from_utf8(get_main_html(&tree_sitter_dir))
|
||||
.unwrap()
|
||||
.replace("THE_LANGUAGE_NAME", &grammar_name)
|
||||
.into_bytes();
|
||||
let playground_js = get_playground_js(&tree_sitter_dir);
|
||||
let lib_js = get_lib_js(&tree_sitter_dir);
|
||||
let lib_wasm = get_lib_wasm(&tree_sitter_dir);
|
||||
|
||||
let html_header = Header::from_str("Content-Type: text/html").unwrap();
|
||||
let js_header = Header::from_str("Content-Type: application/javascript").unwrap();
|
||||
let wasm_header = Header::from_str("Content-Type: application/wasm").unwrap();
|
||||
|
||||
for request in server.incoming_requests() {
|
||||
let res = match request.url() {
|
||||
"/" => response(&html, &html_header),
|
||||
"/playground.js" => response(PLAYGROUND_JS, &js_header),
|
||||
"/" => response(&main_html, &html_header),
|
||||
"/playground.js" => response(&playground_js, &js_header),
|
||||
"/tree-sitter-parser.wasm" => response(&language_wasm, &wasm_header),
|
||||
"/tree-sitter.js" => {
|
||||
if cfg!(windows) {
|
||||
redirect("https://tree-sitter.github.io/tree-sitter.js")
|
||||
} else {
|
||||
response(LIB_JS, &js_header)
|
||||
response(&lib_js, &js_header)
|
||||
}
|
||||
}
|
||||
"/tree-sitter.wasm" => {
|
||||
if cfg!(windows) {
|
||||
redirect("https://tree-sitter.github.io/tree-sitter.wasm")
|
||||
} else {
|
||||
response(LIB_WASM, &wasm_header)
|
||||
response(&lib_wasm, &wasm_header)
|
||||
}
|
||||
}
|
||||
_ => response(b"Not found", &html_header).with_status_code(404),
|
||||
|
|
|
|||
|
|
@ -191,14 +191,14 @@ GEM
|
|||
rb-fsevent (>= 0.9.3)
|
||||
rb-inotify (>= 0.9.7)
|
||||
mercenary (0.3.6)
|
||||
mini_portile2 (2.3.0)
|
||||
mini_portile2 (2.4.0)
|
||||
minima (2.1.1)
|
||||
jekyll (~> 3.3)
|
||||
minitest (5.11.3)
|
||||
multipart-post (2.0.0)
|
||||
net-dns (0.8.0)
|
||||
nokogiri (1.8.2)
|
||||
mini_portile2 (~> 2.3.0)
|
||||
nokogiri (1.10.8)
|
||||
mini_portile2 (~> 2.4.0)
|
||||
octokit (4.8.0)
|
||||
sawyer (~> 0.8.0, >= 0.5.3)
|
||||
pathutil (0.16.1)
|
||||
|
|
@ -210,7 +210,7 @@ GEM
|
|||
rouge (2.2.1)
|
||||
ruby-enum (0.7.2)
|
||||
i18n
|
||||
rubyzip (1.2.1)
|
||||
rubyzip (2.0.0)
|
||||
safe_yaml (1.0.4)
|
||||
sass (3.5.5)
|
||||
sass-listen (~> 4.0.0)
|
||||
|
|
|
|||
|
|
@ -21,7 +21,8 @@
|
|||
<div id="sidebar">
|
||||
<nav id="table-of-contents">
|
||||
<a class="logo table-of-contents-section" href="https://github.com/tree-sitter/tree-sitter">
|
||||
<img src="{{ '/assets/images/tree-sitter-small.png' | relative_url }}" width=200 height=200 />
|
||||
<img title="Tree-sitter logo, link to github repo" src="{{ '/assets/images/tree-sitter-small.png' | relative_url }}" width=200 height=200 />
|
||||
<span class="github-repo">GitHub repository</span>
|
||||
</a>
|
||||
|
||||
{% for other_page in site.html_pages %}
|
||||
|
|
|
|||
|
|
@ -12,6 +12,17 @@ body {
|
|||
overflow: scroll;
|
||||
}
|
||||
|
||||
a[href^="http"]:after {
|
||||
content: "";
|
||||
display: inline-block;
|
||||
transform: translate(0px, 2px);
|
||||
width: .9em;
|
||||
height: .9em;
|
||||
margin-left: 3px;
|
||||
background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='%23777'%3E%3Cpath d='M20 3h-5a1 1 0 1 0 0 2h3L8 14a1 1 0 1 0 2 2l9-10v3a1 1 0 1 0 2 0V4a1 1 0 0 0-1-1zM5 3L3 5v14l2 2h14l2-2v-6a1 1 0 1 0-2 0v6H5V5h6a1 1 0 1 0 0-2H5z'/%3E%3C/svg%3E");
|
||||
background-size: cover;
|
||||
}
|
||||
|
||||
#container {
|
||||
position: relative;
|
||||
max-width: $container-width;
|
||||
|
|
@ -31,6 +42,12 @@ body {
|
|||
overflow-y: auto;
|
||||
border-right: 1px solid #ccc;
|
||||
z-index: 1;
|
||||
|
||||
.github-repo {
|
||||
display: inline-block;
|
||||
padding-left: 3.75em;
|
||||
font-size: .85em;
|
||||
}
|
||||
}
|
||||
|
||||
#sidebar-toggle-link {
|
||||
|
|
@ -57,6 +74,10 @@ body {
|
|||
text-decoration: underline;
|
||||
}
|
||||
|
||||
a > span {
|
||||
text-decoration: inherit;
|
||||
}
|
||||
|
||||
.table-of-contents-section {
|
||||
border-bottom: 1px solid #ccc;
|
||||
}
|
||||
|
|
@ -118,7 +139,7 @@ body {
|
|||
}
|
||||
|
||||
#playground-container {
|
||||
> .CodeMirror {
|
||||
.CodeMirror {
|
||||
height: auto;
|
||||
max-height: 350px;
|
||||
border: 1px solid #aaa;
|
||||
|
|
@ -129,7 +150,7 @@ body {
|
|||
max-height: 350px;
|
||||
}
|
||||
|
||||
h4, select, .field {
|
||||
h4, select, .field, label {
|
||||
display: inline-block;
|
||||
margin-right: 20px;
|
||||
}
|
||||
|
|
@ -161,3 +182,7 @@ a.highlighted {
|
|||
background-color: #ddd;
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
.query-error {
|
||||
text-decoration: underline red dashed;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,16 +1,39 @@
|
|||
let tree;
|
||||
|
||||
(async () => {
|
||||
const CAPTURE_REGEX = /@\s*([\w\._-]+)/g;
|
||||
const COLORS_BY_INDEX = [
|
||||
'blue',
|
||||
'chocolate',
|
||||
'darkblue',
|
||||
'darkcyan',
|
||||
'darkgreen',
|
||||
'darkred',
|
||||
'darkslategray',
|
||||
'dimgray',
|
||||
'green',
|
||||
'indigo',
|
||||
'navy',
|
||||
'red',
|
||||
'sienna',
|
||||
];
|
||||
|
||||
const scriptURL = document.currentScript.getAttribute('src');
|
||||
|
||||
const codeInput = document.getElementById('code-input');
|
||||
const languageSelect = document.getElementById('language-select');
|
||||
const loggingCheckbox = document.getElementById('logging-checkbox');
|
||||
const outputContainer = document.getElementById('output-container');
|
||||
const outputContainerScroll = document.getElementById('output-container-scroll');
|
||||
const playgroundContainer = document.getElementById('playground-container');
|
||||
const queryCheckbox = document.getElementById('query-checkbox');
|
||||
const queryContainer = document.getElementById('query-container');
|
||||
const queryInput = document.getElementById('query-input');
|
||||
const updateTimeSpan = document.getElementById('update-time');
|
||||
const demoContainer = document.getElementById('playground-container');
|
||||
const languagesByName = {};
|
||||
|
||||
loadState();
|
||||
|
||||
await TreeSitter.init();
|
||||
|
||||
const parser = new TreeSitter();
|
||||
|
|
@ -18,6 +41,12 @@ let tree;
|
|||
lineNumbers: true,
|
||||
showCursorWhenSelecting: true
|
||||
});
|
||||
|
||||
const queryEditor = CodeMirror.fromTextArea(queryInput, {
|
||||
lineNumbers: true,
|
||||
showCursorWhenSelecting: true
|
||||
});
|
||||
|
||||
const cluster = new Clusterize({
|
||||
rows: [],
|
||||
noDataText: null,
|
||||
|
|
@ -25,22 +54,30 @@ let tree;
|
|||
scrollElem: outputContainerScroll
|
||||
});
|
||||
const renderTreeOnCodeChange = debounce(renderTree, 50);
|
||||
const saveStateOnChange = debounce(saveState, 2000);
|
||||
const runTreeQueryOnChange = debounce(runTreeQuery, 50);
|
||||
|
||||
let languageName = languageSelect.value;
|
||||
let treeRows = null;
|
||||
let treeRowHighlightedIndex = -1;
|
||||
let parseCount = 0;
|
||||
let isRendering = 0;
|
||||
let query;
|
||||
|
||||
codeEditor.on('changes', handleCodeChange);
|
||||
codeEditor.on('viewportChange', runTreeQueryOnChange);
|
||||
codeEditor.on('cursorActivity', debounce(handleCursorMovement, 150));
|
||||
queryEditor.on('changes', debounce(handleQueryChange, 150));
|
||||
|
||||
loggingCheckbox.addEventListener('change', handleLoggingChange);
|
||||
queryCheckbox.addEventListener('change', handleQueryEnableChange);
|
||||
languageSelect.addEventListener('change', handleLanguageChange);
|
||||
outputContainer.addEventListener('click', handleTreeClick);
|
||||
|
||||
handleQueryEnableChange();
|
||||
await handleLanguageChange()
|
||||
|
||||
demoContainer.style.visibility = 'visible';
|
||||
playgroundContainer.style.visibility = 'visible';
|
||||
|
||||
async function handleLanguageChange() {
|
||||
const newLanguageName = languageSelect.value;
|
||||
|
|
@ -62,15 +99,17 @@ let tree;
|
|||
languageName = newLanguageName;
|
||||
parser.setLanguage(languagesByName[newLanguageName]);
|
||||
handleCodeChange();
|
||||
handleQueryChange();
|
||||
}
|
||||
|
||||
async function handleCodeChange(editor, changes) {
|
||||
const newText = codeEditor.getValue() + '\n';
|
||||
const edits = tree && changes && changes.map(treeEditForEditorChange);
|
||||
|
||||
const start = performance.now();
|
||||
if (tree && changes) {
|
||||
for (const change of changes) {
|
||||
tree.edit(treeEditForEditorChange(change));
|
||||
if (edits) {
|
||||
for (const edit of edits) {
|
||||
tree.edit(edit);
|
||||
}
|
||||
}
|
||||
const newTree = parser.parse(newText, tree);
|
||||
|
|
@ -81,6 +120,8 @@ let tree;
|
|||
tree = newTree;
|
||||
parseCount++;
|
||||
renderTreeOnCodeChange();
|
||||
runTreeQueryOnChange();
|
||||
saveStateOnChange();
|
||||
}
|
||||
|
||||
async function renderTree() {
|
||||
|
|
@ -164,6 +205,107 @@ let tree;
|
|||
handleCursorMovement();
|
||||
}
|
||||
|
||||
function runTreeQuery(_, startRow, endRow) {
|
||||
if (endRow == null) {
|
||||
const viewport = codeEditor.getViewport();
|
||||
startRow = viewport.from;
|
||||
endRow = viewport.to;
|
||||
}
|
||||
|
||||
codeEditor.operation(() => {
|
||||
const marks = codeEditor.getAllMarks();
|
||||
marks.forEach(m => m.clear());
|
||||
|
||||
if (tree && query) {
|
||||
const captures = query.captures(
|
||||
tree.rootNode,
|
||||
{row: startRow, column: 0},
|
||||
{row: endRow, column: 0},
|
||||
);
|
||||
let lastNodeId;
|
||||
for (const {name, node} of captures) {
|
||||
if (node.id === lastNodeId) continue;
|
||||
lastNodeId = node.id;
|
||||
const {startPosition, endPosition} = node;
|
||||
codeEditor.markText(
|
||||
{line: startPosition.row, ch: startPosition.column},
|
||||
{line: endPosition.row, ch: endPosition.column},
|
||||
{
|
||||
inclusiveLeft: true,
|
||||
inclusiveRight: true,
|
||||
css: `color: ${colorForCaptureName(name)}`
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function handleQueryChange() {
|
||||
if (query) {
|
||||
query.delete();
|
||||
query.deleted = true;
|
||||
query = null;
|
||||
}
|
||||
|
||||
queryEditor.operation(() => {
|
||||
queryEditor.getAllMarks().forEach(m => m.clear());
|
||||
if (!queryCheckbox.checked) return;
|
||||
|
||||
const queryText = queryEditor.getValue();
|
||||
|
||||
try {
|
||||
query = parser.getLanguage().query(queryText);
|
||||
let match;
|
||||
|
||||
let row = 0;
|
||||
queryEditor.eachLine((line) => {
|
||||
while (match = CAPTURE_REGEX.exec(line.text)) {
|
||||
queryEditor.markText(
|
||||
{line: row, ch: match.index},
|
||||
{line: row, ch: match.index + match[0].length},
|
||||
{
|
||||
inclusiveLeft: true,
|
||||
inclusiveRight: true,
|
||||
css: `color: ${colorForCaptureName(match[1])}`
|
||||
}
|
||||
);
|
||||
}
|
||||
row++;
|
||||
});
|
||||
} catch (error) {
|
||||
const startPosition = queryEditor.posFromIndex(error.index);
|
||||
const endPosition = {
|
||||
line: startPosition.line,
|
||||
ch: startPosition.ch + (error.length || Infinity)
|
||||
};
|
||||
|
||||
if (error.index === queryText.length) {
|
||||
if (startPosition.ch > 0) {
|
||||
startPosition.ch--;
|
||||
} else if (startPosition.row > 0) {
|
||||
startPosition.row--;
|
||||
startPosition.column = Infinity;
|
||||
}
|
||||
}
|
||||
|
||||
queryEditor.markText(
|
||||
startPosition,
|
||||
endPosition,
|
||||
{
|
||||
className: 'query-error',
|
||||
inclusiveLeft: true,
|
||||
inclusiveRight: true,
|
||||
attributes: {title: error.message}
|
||||
}
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
runTreeQuery();
|
||||
saveQueryState();
|
||||
}
|
||||
|
||||
function handleCursorMovement() {
|
||||
if (isRendering) return;
|
||||
|
||||
|
|
@ -236,6 +378,17 @@ let tree;
|
|||
}
|
||||
}
|
||||
|
||||
function handleQueryEnableChange() {
|
||||
if (queryCheckbox.checked) {
|
||||
queryContainer.style.visibility = '';
|
||||
queryContainer.style.position = '';
|
||||
} else {
|
||||
queryContainer.style.visibility = 'hidden';
|
||||
queryContainer.style.position = 'absolute';
|
||||
}
|
||||
handleQueryChange();
|
||||
}
|
||||
|
||||
function treeEditForEditorChange(change) {
|
||||
const oldLineCount = change.removed.length;
|
||||
const newLineCount = change.text.length;
|
||||
|
|
@ -262,6 +415,35 @@ let tree;
|
|||
};
|
||||
}
|
||||
|
||||
function colorForCaptureName(capture) {
|
||||
const id = query.captureNames.indexOf(capture);
|
||||
return COLORS_BY_INDEX[id % COLORS_BY_INDEX.length];
|
||||
}
|
||||
|
||||
function loadState() {
|
||||
const language = localStorage.getItem("language");
|
||||
const sourceCode = localStorage.getItem("sourceCode");
|
||||
const query = localStorage.getItem("query");
|
||||
const queryEnabled = localStorage.getItem("queryEnabled");
|
||||
if (language != null && sourceCode != null && query != null) {
|
||||
queryInput.value = query;
|
||||
codeInput.value = sourceCode;
|
||||
languageSelect.value = language;
|
||||
queryCheckbox.checked = (queryEnabled === 'true');
|
||||
}
|
||||
}
|
||||
|
||||
function saveState() {
|
||||
localStorage.setItem("language", languageSelect.value);
|
||||
localStorage.setItem("sourceCode", codeEditor.getValue());
|
||||
saveQueryState();
|
||||
}
|
||||
|
||||
function saveQueryState() {
|
||||
localStorage.setItem("queryEnabled", queryCheckbox.checked);
|
||||
localStorage.setItem("query", queryEditor.getValue());
|
||||
}
|
||||
|
||||
function debounce(func, wait, immediate) {
|
||||
var timeout;
|
||||
return function() {
|
||||
|
|
|
|||
|
|
@ -28,16 +28,19 @@ Parsers for these languages are fairly complete:
|
|||
|
||||
* [Bash](https://github.com/tree-sitter/tree-sitter-bash)
|
||||
* [C](https://github.com/tree-sitter/tree-sitter-c)
|
||||
* [C#](https://github.com/tree-sitter/tree-sitter-c-sharp)
|
||||
* [C++](https://github.com/tree-sitter/tree-sitter-cpp)
|
||||
* [CSS](https://github.com/tree-sitter/tree-sitter-css)
|
||||
* [Elm](https://github.com/razzeee/tree-sitter-elm)
|
||||
* [Eno](https://github.com/eno-lang/tree-sitter-eno)
|
||||
* [ERB / EJS](https://github.com/tree-sitter/tree-sitter-embedded-template)
|
||||
- [Fennel](https://github.com/travonted/tree-sitter-fennel)
|
||||
* [Go](https://github.com/tree-sitter/tree-sitter-go)
|
||||
* [HTML](https://github.com/tree-sitter/tree-sitter-html)
|
||||
* [Java](https://github.com/tree-sitter/tree-sitter-java)
|
||||
* [JavaScript](https://github.com/tree-sitter/tree-sitter-javascript)
|
||||
* [Lua](https://github.com/Azganoth/tree-sitter-lua)
|
||||
* [Markdown](https://github.com/ikatyang/tree-sitter-markdown)
|
||||
* [OCaml](https://github.com/tree-sitter/tree-sitter-ocaml)
|
||||
* [PHP](https://github.com/tree-sitter/tree-sitter-php)
|
||||
* [Python](https://github.com/tree-sitter/tree-sitter-python)
|
||||
|
|
@ -47,16 +50,20 @@ Parsers for these languages are fairly complete:
|
|||
* [TOML](https://github.com/ikatyang/tree-sitter-toml)
|
||||
* [TypeScript](https://github.com/tree-sitter/tree-sitter-typescript)
|
||||
* [Verilog](https://github.com/tree-sitter/tree-sitter-verilog)
|
||||
* [VHDL](https://github.com/alemuller/tree-sitter-vhdl)
|
||||
* [Vue](https://github.com/ikatyang/tree-sitter-vue)
|
||||
* [YAML](https://github.com/ikatyang/tree-sitter-yaml)
|
||||
* [WASM](https://github.com/wasm-lsp/tree-sitter-wasm)
|
||||
|
||||
Parsers for these languages are in development:
|
||||
|
||||
* [Agda](https://github.com/tree-sitter/tree-sitter-agda)
|
||||
* [C-sharp](https://github.com/tree-sitter/tree-sitter-c-sharp)
|
||||
* [Haskell](https://github.com/tree-sitter/tree-sitter-haskell)
|
||||
* [Julia](https://github.com/tree-sitter/tree-sitter-julia)
|
||||
* [Nix](https://github.com/cstrahan/tree-sitter-nix)
|
||||
* [Scala](https://github.com/tree-sitter/tree-sitter-scala)
|
||||
* [SPARQL](https://github.com/BonaBeavis/tree-sitter-sparql)
|
||||
* [Swift](https://github.com/tree-sitter/tree-sitter-swift)
|
||||
|
||||
### Talks on Tree-sitter
|
||||
|
||||
|
|
|
|||
|
|
@ -5,45 +5,39 @@ permalink: using-parsers
|
|||
|
||||
# Using Parsers
|
||||
|
||||
All of Tree-sitter's parsing functionality is exposed through C APIs. Applications written in higher-level languages can use Tree-sitter via binding libraries like [node-tree-sitter](https://github.com/tree-sitter/node-tree-sitter) or [rust-tree-sitter](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust), which have their own documentation.
|
||||
All of Tree-sitter's parsing functionality is exposed through C APIs. Applications written in higher-level languages can use Tree-sitter via binding libraries like [node-tree-sitter](https://github.com/tree-sitter/node-tree-sitter) or the [tree-sitter rust crate](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust), which have their own documentation.
|
||||
|
||||
This document will describes the general concepts of how to use Tree-sitter, which should be relevant regardless of what language you're using. It also goes into some C-specific details that are useful if you're using the C API directly or are building a new binding to a different language.
|
||||
This document will describe the general concepts of how to use Tree-sitter, which should be relevant regardless of what language you're using. It also goes into some C-specific details that are useful if you're using the C API directly or are building a new binding to a different language.
|
||||
|
||||
All of the API functions shown here are declared and documented in the `tree_sitter/api.h` header file.
|
||||
All of the API functions shown here are declared and documented in the [`tree_sitter/api.h`](https://github.com/tree-sitter/tree-sitter/blob/master/lib/include/tree_sitter/api.h) header file. You may also want to browse the [online Rust API docs](https://docs.rs/tree-sitter), which correspond to the C APIs closely.
|
||||
|
||||
## Building the Library
|
||||
## Getting Started
|
||||
|
||||
Building the library requires one git submodule: [`utf8proc`](https://github.com/JuliaStrings/utf8proc). Make sure that `utf8proc` is downloaded by running this command from the Tree-sitter directory:
|
||||
### Building the Library
|
||||
|
||||
```sh
|
||||
git submodule update --init
|
||||
```
|
||||
To build the library on a POSIX system, just run `make` in the Tree-sitter directory. This will create a static library called `libtree-sitter.a` as well as dynamic libraries.
|
||||
|
||||
To build the library on a POSIX system, run this script, which will create a static library called `libtree-sitter.a` in the Tree-sitter folder:
|
||||
|
||||
```sh
|
||||
script/build-lib
|
||||
```
|
||||
|
||||
Alternatively, you can use the library in a larger project by adding one source file to the project. This source file needs three directories to be in the include path when compiled:
|
||||
Alternatively, you can incorporate the library in a larger project's build system by adding one source file to the build. This source file needs two directories to be in the include path when compiled:
|
||||
|
||||
**source file:**
|
||||
* `tree-sitter/lib/src/lib.c`
|
||||
|
||||
- `tree-sitter/lib/src/lib.c`
|
||||
|
||||
**include directories:**
|
||||
* `tree-sitter/lib/src`
|
||||
* `tree-sitter/lib/include`
|
||||
* `tree-sitter/lib/utf8proc`
|
||||
|
||||
## The Objects
|
||||
- `tree-sitter/lib/src`
|
||||
- `tree-sitter/lib/include`
|
||||
|
||||
### The Basic Objects
|
||||
|
||||
There are four main types of objects involved when using Tree-sitter: languages, parsers, syntax trees, and syntax nodes. In C, these are called `TSLanguage`, `TSParser`, `TSTree`, and `TSNode`.
|
||||
* An `TSLanguage` is an opaque object that defines how to parse a particular programming language. The code for each `TSLanguage` is generated by Tree-sitter. Many languages are already available in separate git repositories within the the [Tree-sitter GitHub organization](https://github.com/tree-sitter). See [the next page](./creating-parsers) for how to create new languages.
|
||||
* A `TSParser` is a stateful object that can be assigned a `TSLanguage` and used to produce a `TSTree` based on some source code.
|
||||
* A `TSTree` represents the syntax tree of an entire source code file. It contains `TSNode` instances that indicate the structure of the source code. It can also be edited and used to produce a new `TSTree` in the event that the source code changes.
|
||||
* A `TSNode` represents a single node in the syntax tree. It tracks its start and end positions in the source code, as well as its relation to other nodes like its parent, siblings and children.
|
||||
|
||||
## An Example Program
|
||||
- A `TSLanguage` is an opaque object that defines how to parse a particular programming language. The code for each `TSLanguage` is generated by Tree-sitter. Many languages are already available in separate git repositories within the the [Tree-sitter GitHub organization](https://github.com/tree-sitter). See [the next page](./creating-parsers) for how to create new languages.
|
||||
- A `TSParser` is a stateful object that can be assigned a `TSLanguage` and used to produce a `TSTree` based on some source code.
|
||||
- A `TSTree` represents the syntax tree of an entire source code file. It contains `TSNode` instances that indicate the structure of the source code. It can also be edited and used to produce a new `TSTree` in the event that the source code changes.
|
||||
- A `TSNode` represents a single node in the syntax tree. It tracks its start and end positions in the source code, as well as its relation to other nodes like its parent, siblings and children.
|
||||
|
||||
### An Example Program
|
||||
|
||||
Here's an example of a simple C program that uses the Tree-sitter [JSON parser](https://github.com/tree-sitter/tree-sitter-json).
|
||||
|
||||
|
|
@ -83,7 +77,7 @@ int main() {
|
|||
TSNode number_node = ts_node_named_child(array_node, 0);
|
||||
|
||||
// Check that the nodes have the expected types.
|
||||
assert(strcmp(ts_node_type(root_node), "value") == 0);
|
||||
assert(strcmp(ts_node_type(root_node), "document") == 0);
|
||||
assert(strcmp(ts_node_type(array_node), "array") == 0);
|
||||
assert(strcmp(ts_node_type(number_node), "number") == 0);
|
||||
|
||||
|
|
@ -118,7 +112,9 @@ clang \
|
|||
./test-json-parser
|
||||
```
|
||||
|
||||
## Providing the Source Code
|
||||
## Basic Parsing
|
||||
|
||||
### Providing the Code
|
||||
|
||||
In the example above, we parsed source code stored in a simple string using the `ts_parser_parse_string` function:
|
||||
|
||||
|
|
@ -131,7 +127,7 @@ TSTree *ts_parser_parse_string(
|
|||
);
|
||||
```
|
||||
|
||||
You may want to parse source code that's stored in a custom data structure, like a [piece table](https://en.wikipedia.org/wiki/Piece_table) or a [rope](https://en.wikipedia.org/wiki/Rope_(data_structure)). In this case, you can use the more general `ts_parser_parse` function:
|
||||
You may want to parse source code that's stored in a custom data structure, like a [piece table](https://en.wikipedia.org/wiki/Piece_table) or a [rope](<https://en.wikipedia.org/wiki/Rope_(data_structure)>). In this case, you can use the more general `ts_parser_parse` function:
|
||||
|
||||
```c
|
||||
TSTree *ts_parser_parse(
|
||||
|
|
@ -156,9 +152,9 @@ typedef struct {
|
|||
} TSInput;
|
||||
```
|
||||
|
||||
## Syntax Nodes
|
||||
### Syntax Nodes
|
||||
|
||||
Tree-sitter provides a [DOM](https://en.wikipedia.org/wiki/Document_Object_Model)-style interface for inspecting syntax trees. A syntax node's *type* is a string that indicates which grammar rule the node represents.
|
||||
Tree-sitter provides a [DOM](https://en.wikipedia.org/wiki/Document_Object_Model)-style interface for inspecting syntax trees. A syntax node's _type_ is a string that indicates which grammar rule the node represents.
|
||||
|
||||
```c
|
||||
const char *ts_node_type(TSNode);
|
||||
|
|
@ -179,9 +175,9 @@ TSPoint ts_node_start_point(TSNode);
|
|||
TSPoint ts_node_end_point(TSNode);
|
||||
```
|
||||
|
||||
## Retrieving Nodes
|
||||
### Retrieving Nodes
|
||||
|
||||
Every tree has a *root node*:
|
||||
Every tree has a _root node_:
|
||||
|
||||
```c
|
||||
TSNode ts_tree_root_node(const TSTree *);
|
||||
|
|
@ -202,29 +198,23 @@ TSNode ts_node_prev_sibling(TSNode);
|
|||
TSNode ts_node_parent(TSNode);
|
||||
```
|
||||
|
||||
These methods may all return a *null node* to indicate, for example, that a node does not *have* a next sibling. You can check if a node is null:
|
||||
These methods may all return a _null node_ to indicate, for example, that a node does not _have_ a next sibling. You can check if a node is null:
|
||||
|
||||
```c
|
||||
bool ts_node_is_null(TSNode);
|
||||
```
|
||||
|
||||
## Named vs Anonymous Nodes
|
||||
### Named vs Anonymous Nodes
|
||||
|
||||
Tree-sitter produces [*concrete* syntax trees](https://en.wikipedia.org/wiki/Parse_tree) - trees that contain nodes for every individual token in the source code, including things like commas and parentheses. This is important for use-cases that deal with individual tokens, like [syntax highlighting](https://en.wikipedia.org/wiki/Syntax_highlighting). But some types of code analysis are easier to perform using an [*abstract* syntax tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) - a tree in which the less important details have been removed. Tree-sitter's trees support these use cases by making a distinction between *named* and *anonymous* nodes.
|
||||
Tree-sitter produces [_concrete_ syntax trees](https://en.wikipedia.org/wiki/Parse_tree) - trees that contain nodes for every individual token in the source code, including things like commas and parentheses. This is important for use-cases that deal with individual tokens, like [syntax highlighting](https://en.wikipedia.org/wiki/Syntax_highlighting). But some types of code analysis are easier to perform using an [_abstract_ syntax tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) - a tree in which the less important details have been removed. Tree-sitter's trees support these use cases by making a distinction between _named_ and _anonymous_ nodes.
|
||||
|
||||
Consider a grammar rule like this:
|
||||
|
||||
```js
|
||||
if_statement: $ => seq(
|
||||
'if',
|
||||
'(',
|
||||
$._expression,
|
||||
')',
|
||||
$._statement,
|
||||
)
|
||||
if_statement: ($) => seq("if", "(", $._expression, ")", $._statement);
|
||||
```
|
||||
|
||||
A syntax node representing an `if_statement` in this language would have 5 children: the condition expression, the body statement, as well as the `if`, `(`, and `)` tokens. The expression and the statement would be marked as *named* nodes, because they have been given explicit names in the grammar. But the `if`, `(`, and `)` nodes would *not* be named nodes, because they are represented in the grammar as simple strings.
|
||||
A syntax node representing an `if_statement` in this language would have 5 children: the condition expression, the body statement, as well as the `if`, `(`, and `)` tokens. The expression and the statement would be marked as _named_ nodes, because they have been given explicit names in the grammar. But the `if`, `(`, and `)` nodes would _not_ be named nodes, because they are represented in the grammar as simple strings.
|
||||
|
||||
You can check whether any given node is named:
|
||||
|
||||
|
|
@ -243,9 +233,9 @@ TSNode ts_node_prev_named_sibling(TSNode);
|
|||
|
||||
If you use this group of methods, the syntax tree functions much like an abstract syntax tree.
|
||||
|
||||
## Node Field Names
|
||||
### Node Field Names
|
||||
|
||||
To make syntax nodes easier to analyze, many grammars assign unique *field names* to particular child nodes. The the next page [explains](./creating-parsers#using-fields) how to do this on your own grammars. If a syntax node has fields, you can access its children using their field name:
|
||||
To make syntax nodes easier to analyze, many grammars assign unique _field names_ to particular child nodes. The next page [explains](./creating-parsers#using-fields) how to do this on your own grammars. If a syntax node has fields, you can access its children using their field name:
|
||||
|
||||
```c
|
||||
TSNode ts_node_child_by_field_name(
|
||||
|
|
@ -269,9 +259,11 @@ The field ids can be used in place of the name:
|
|||
TSNode ts_node_child_by_field_id(TSNode, TSFieldId);
|
||||
```
|
||||
|
||||
## Editing
|
||||
## Advanced Parsing
|
||||
|
||||
In applications like text editors, you often need to re-parse a file after its source code has changed. Tree-sitter is designed to support this use case efficiently. There are two steps required. First, you must *edit* the syntax tree, which adjusts the ranges of its nodes so that they stay in sync with the code.
|
||||
### Editing
|
||||
|
||||
In applications like text editors, you often need to re-parse a file after its source code has changed. Tree-sitter is designed to support this use case efficiently. There are two steps required. First, you must _edit_ the syntax tree, which adjusts the ranges of its nodes so that they stay in sync with the code.
|
||||
|
||||
```c
|
||||
typedef struct {
|
||||
|
|
@ -294,13 +286,13 @@ When you edit a syntax tree, the positions of its nodes will change. If you have
|
|||
void ts_node_edit(TSNode *, const TSInputEdit *);
|
||||
```
|
||||
|
||||
This `ts_node_edit` function is *only* needed in the case where you have retrieved `TSNode` instances *before* editing the tree, and then *after* editing the tree, you want to continue to use those specific node instances. Often, you'll just want to re-fetch nodes from the edited tree, in which case `ts_node_edit` is not needed.
|
||||
This `ts_node_edit` function is _only_ needed in the case where you have retrieved `TSNode` instances _before_ editing the tree, and then _after_ editing the tree, you want to continue to use those specific node instances. Often, you'll just want to re-fetch nodes from the edited tree, in which case `ts_node_edit` is not needed.
|
||||
|
||||
## Multi-language Documents
|
||||
### Multi-language Documents
|
||||
|
||||
Sometimes, different parts of a file may be written in different languages. For example, templating languages like [EJS](http://ejs.co) and [ERB](https://ruby-doc.org/stdlib-2.5.1/libdoc/erb/rdoc/ERB.html) allow you to generate HTML by writing a mixture of HTML and another language like JavaScript or Ruby.
|
||||
|
||||
Tree-sitter handles these types of documents by allowing you to create a syntax tree based on the text in certain *ranges* of a file.
|
||||
Tree-sitter handles these types of documents by allowing you to create a syntax tree based on the text in certain _ranges_ of a file.
|
||||
|
||||
```c
|
||||
typedef struct {
|
||||
|
|
@ -402,7 +394,7 @@ int main(int argc, const char **argv) {
|
|||
|
||||
This API allows for great flexibility in how languages can be composed. Tree-sitter is not responsible for mediating the interactions between languages. Instead, you are free to do that using arbitrary application-specific logic.
|
||||
|
||||
## Concurrency
|
||||
### Concurrency
|
||||
|
||||
Tree-sitter supports multi-threaded use cases by making syntax trees very cheap to copy.
|
||||
|
||||
|
|
@ -410,4 +402,443 @@ Tree-sitter supports multi-threaded use cases by making syntax trees very cheap
|
|||
TSTree *ts_tree_copy(const TSTree *);
|
||||
```
|
||||
|
||||
Internally, copying a syntax tree just entails incrementing an atomic reference count. Conceptually, it provides you a new tree which you can freely query, edit, reparse, or delete on a new thread while continuing to use the original tree on a different thread. Note that individual `TSTree` instances are *not* thread safe; you must copy a tree if you want to use it on multiple threads simultaneously.
|
||||
Internally, copying a syntax tree just entails incrementing an atomic reference count. Conceptually, it provides you a new tree which you can freely query, edit, reparse, or delete on a new thread while continuing to use the original tree on a different thread. Note that individual `TSTree` instances are _not_ thread safe; you must copy a tree if you want to use it on multiple threads simultaneously.
|
||||
|
||||
## Other Tree Operations
|
||||
|
||||
### Walking Trees with Tree Cursors
|
||||
|
||||
You can access every node in a syntax tree using the `TSNode` APIs [described above](#retrieving-nodes), but if you need to access a large number of nodes, the fastest way to do so is with a _tree cursor_. A cursor is a stateful object that allows you to walk a syntax tree with maximum efficiency.
|
||||
|
||||
You can initialize a cursor from any node:
|
||||
|
||||
```c
|
||||
TSTreeCursor ts_tree_cursor_new(TSNode);
|
||||
```
|
||||
|
||||
You can move the cursor around the tree:
|
||||
|
||||
```c
|
||||
bool ts_tree_cursor_goto_first_child(TSTreeCursor *);
|
||||
bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *);
|
||||
bool ts_tree_cursor_goto_parent(TSTreeCursor *);
|
||||
```
|
||||
|
||||
These methods return `true` if the cursor successfully moved and `false` if there was no node to move to.
|
||||
|
||||
You can always retrieve the cursor's current node, as well as the [field name](#node-field-names) that is associated with the current node.
|
||||
|
||||
```c
|
||||
TSNode ts_tree_cursor_current_node(const TSTreeCursor *);
|
||||
const char *ts_tree_cursor_current_field_name(const TSTreeCursor *);
|
||||
TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *);
|
||||
```
|
||||
|
||||
## Pattern Matching with Queries
|
||||
|
||||
Many code analysis tasks involve searching for patterns in syntax trees. Tree-sitter provides a small declarative language for expressing these patterns and searching for matches. The language is similar to the format of Tree-sitter's [unit test system](./creating-parsers#command-test).
|
||||
|
||||
### Query Syntax
|
||||
|
||||
A _query_ consists of one or more _patterns_, where each pattern is an [S-expression](https://en.wikipedia.org/wiki/S-expression) that matches a certain set of nodes in a syntax tree. The expression to match a given node consists of a pair of parentheses containing two things: the node's type, and optionally, a series of other S-expressions that match the node's children. For example, this pattern would match any `binary_expression` node whose children are both `number_literal` nodes:
|
||||
|
||||
```
|
||||
(binary_expression (number_literal) (number_literal))
|
||||
```
|
||||
|
||||
Children can also be omitted. For example, this would match any `binary_expression` where at least _one_ of child is a `string_literal` node:
|
||||
|
||||
```
|
||||
(binary_expression (string_literal))
|
||||
```
|
||||
|
||||
#### Fields
|
||||
|
||||
In general, it's a good idea to make patterns more specific by specifying [field names](#node-field-names) associated with child nodes. You do this by prefixing a child pattern with a field name followed by a colon. For example, this pattern would match an `assignment_expression` node where the `left` child is a `member_expression` whose `object` is a `call_expression`.
|
||||
|
||||
```
|
||||
(assignment_expression
|
||||
left: (member_expression
|
||||
object: (call_expression)))
|
||||
```
|
||||
|
||||
#### Anonymous Nodes
|
||||
|
||||
The parenthesized syntax for writing nodes only applies to [named nodes](#named-vs-anonymous-nodes). To match specific anonymous nodes, you write their name between double quotes. For example, this pattern would match any `binary_expression` where the operator is `!=` and the right side is `null`:
|
||||
|
||||
```
|
||||
(binary_expression
|
||||
operator: "!="
|
||||
right: (null))
|
||||
```
|
||||
|
||||
#### Capturing Nodes
|
||||
|
||||
When matching patterns, you may want to process specific nodes within the pattern. Captures allow you to associate names with specific nodes in a pattern, so that you can later refer to those nodes by those names. Capture names are written _after_ the nodes that they refer to, and start with an `@` character.
|
||||
|
||||
For example, this pattern would match any assignment of a `function` to an `identifier`, and it would associate the name `the-function-name` with the identifier:
|
||||
|
||||
```
|
||||
(assignment_expression
|
||||
left: (identifier) @the-function-name
|
||||
right: (function))
|
||||
```
|
||||
|
||||
And this pattern would match all method definitions, associating the name `the-method-name` with the method name, `the-class-name` with the containing class name:
|
||||
|
||||
```
|
||||
(class_declaration
|
||||
name: (identifier) @the-class-name
|
||||
body: (class_body
|
||||
(method_definition
|
||||
name: (property_identifier) @the-method-name)))
|
||||
```
|
||||
|
||||
#### Quantification Operators
|
||||
|
||||
You can match a repeating sequence of sibling nodes using the postfix `+` and `*` _repetition_ operators, which work analogously to the `+` and `*` operators [in regular expressions](https://en.wikipedia.org/wiki/Regular_expression#Basic_concepts). The `+` operator matches _one or more_ repetitions of a pattern, and the `*` operator matches _zero or more_.
|
||||
|
||||
For example, this pattern would match a sequence of one or more comments:
|
||||
|
||||
```
|
||||
(comment)+
|
||||
```
|
||||
|
||||
This pattern would match a class declaration, capturing all of the decorators if any were present:
|
||||
|
||||
```
|
||||
(class_declaration
|
||||
(decorator)* @the-decorator
|
||||
name: (identifier) @the-name)
|
||||
```
|
||||
|
||||
You can also mark a node as optional using the `?` operator. For example, this pattern would match all function calls, capturing a string argument if one was present:
|
||||
|
||||
```
|
||||
(call_expression
|
||||
function: (identifier) @the-function
|
||||
arguments: (arguments (string)? @the-string-arg))
|
||||
```
|
||||
|
||||
#### Grouping Sibling Nodes
|
||||
|
||||
You can also use parentheses for grouping a sequence of _sibling_ nodes. For example, this pattern would match a comment followed by a function declaration:
|
||||
|
||||
```
|
||||
(
|
||||
(comment)
|
||||
(function_declaration)
|
||||
)
|
||||
```
|
||||
|
||||
Any of the quantification operators mentioned above (`+`, `*`, and `?`) can also be applied to groups. For example, this pattern would match a comma-separated series of numbers:
|
||||
|
||||
```
|
||||
(
|
||||
(number)
|
||||
("," (number))*
|
||||
)
|
||||
```
|
||||
|
||||
#### Alternations
|
||||
|
||||
An alternation is written as a pair of square brackets (`[]`) containing a list of alternative patterns.
|
||||
This is similar to _character classes_ from regular expressions (`[abc]` matches either a, b, or c).
|
||||
|
||||
For example, this pattern would match a call to either a variable or an object property.
|
||||
In the case of a variable, capture it as `@function`, and in the case of a property, capture it as `@method`:
|
||||
|
||||
```
|
||||
(call_expression
|
||||
function: [
|
||||
(identifier) @function
|
||||
(member_expression
|
||||
property: (property_identifier) @method)
|
||||
])
|
||||
```
|
||||
|
||||
This pattern would match a set of possible keyword tokens, capturing them as `@keyword`:
|
||||
|
||||
```
|
||||
[
|
||||
"break"
|
||||
"atch"
|
||||
"delete"
|
||||
"else"
|
||||
"for"
|
||||
"function"
|
||||
"if"
|
||||
"return"
|
||||
"try"
|
||||
"while"
|
||||
] @keyword
|
||||
```
|
||||
|
||||
#### Wildcard Node
|
||||
|
||||
A wildcard node is represented with an underscore (`(_)`), it matches any node.
|
||||
This is similar to `.` in regular expressions.
|
||||
|
||||
For example, this pattern would match any node inside a call:
|
||||
|
||||
```
|
||||
(call (_) @call.inner)
|
||||
```
|
||||
|
||||
|
||||
#### Anchors
|
||||
|
||||
The anchor operator, `.`, is used to constrain the ways in which child patterns are matched. It has different behaviors depending on where it's placed inside a query.
|
||||
|
||||
When `.` is placed before the _first_ child within a parent pattern, the child will only match when it is the first named node in the parent. For example, the below pattern matches a given `array` node at most once, assigning the `@the-element` capture to the first `identifier` node in the parent `array`:
|
||||
|
||||
```
|
||||
(array . (identifier) @the-element)
|
||||
```
|
||||
|
||||
Without this anchor, the pattern would match once for every identifier in the array, with `@the-element` bound to each matched identifier.
|
||||
|
||||
Similarly, an anchor placed after a pattern's _last_ child will cause that child pattern to only match nodes that are the last named child of their parent. The below pattern matches only nodes that are the last named child within a `block`.
|
||||
|
||||
```
|
||||
(block (_) @last-expression .)
|
||||
```
|
||||
|
||||
Finally, an anchor _between_ two child patterns will cause the patterns to only match nodes that are immediate siblings. The pattern below, given a long dotted name like `a.b.c.d`, will only match pairs of consecutive identifiers: `a, b`, `b, c`, and `c, d`.
|
||||
|
||||
```
|
||||
(dotted_name
|
||||
(identifier) @prev-id
|
||||
.
|
||||
(identifier) @next-id)
|
||||
```
|
||||
|
||||
Without the anchor, non-consecutive pairs like `a, c` and `b, d` would also be matched.
|
||||
|
||||
The restrictions placed on a pattern by an anchor operator ignore anonymous nodes.
|
||||
|
||||
#### Predicates
|
||||
|
||||
You can also specify arbitrary metadata and conditions associed with a pattern by adding _predicate_ S-expressions anywhere within your pattern. Predicate S-expressions start with a _predicate name_ beginning with a `#` character. After that, they can contain an arbitrary number of `@`-prefixed capture names or strings.
|
||||
|
||||
For example, this pattern would match identifier whose names is written in `SCREAMING_SNAKE_CASE`:
|
||||
|
||||
```
|
||||
(
|
||||
(identifier) @constant
|
||||
(#match? @constant "^[A-Z][A-Z_]+")
|
||||
)
|
||||
```
|
||||
|
||||
And this pattern would match key-value pairs where the `value` is an identifier with the same name as the key:
|
||||
|
||||
```
|
||||
(
|
||||
(pair
|
||||
key: (property_identifier) @key-name
|
||||
value: (identifier) @value-name)
|
||||
(#eq? @key-name @value-name)
|
||||
)
|
||||
```
|
||||
|
||||
_Note_ - Predicates are not handled directly by the Tree-sitter C library. They are just exposed in a structured form so that higher-level code can perform the filtering. However, higher-level bindings to Tree-sitter like [the Rust crate](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust) or the [WebAssembly binding](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web) implement a few common predicates like `#eq?` and `#match?`.
|
||||
|
||||
### The Query API
|
||||
|
||||
Create a query by specifying a string containing one or more patterns:
|
||||
|
||||
```c
|
||||
TSQuery *ts_query_new(
|
||||
const TSLanguage *language,
|
||||
const char *source,
|
||||
uint32_t source_len,
|
||||
uint32_t *error_offset,
|
||||
TSQueryError *error_type
|
||||
);
|
||||
```
|
||||
|
||||
If there is an error in the query, then the `error_offset` argument will be set to the byte offset of the error, and the `error_type` argument will be set to a value that indicates the type of error:
|
||||
|
||||
```c
|
||||
typedef enum {
|
||||
TSQueryErrorNone = 0,
|
||||
TSQueryErrorSyntax,
|
||||
TSQueryErrorNodeType,
|
||||
TSQueryErrorField,
|
||||
TSQueryErrorCapture,
|
||||
} TSQueryError;
|
||||
```
|
||||
|
||||
The `TSQuery` value is immutable and can be safely shared between threads. To execute the query, create a `TSQueryCursor`, which carries the state needed for processing the queries. The query cursor should not be shared between threads, but can be reused for many query executions.
|
||||
|
||||
```c
|
||||
TSQueryCursor *ts_query_cursor_new(void);
|
||||
```
|
||||
|
||||
You can then execute the query on a given syntax node:
|
||||
|
||||
```c
|
||||
void ts_query_cursor_exec(TSQueryCursor *, const TSQuery *, TSNode);
|
||||
```
|
||||
|
||||
You can then iterate over the matches:
|
||||
|
||||
```c
|
||||
typedef struct {
|
||||
TSNode node;
|
||||
uint32_t index;
|
||||
} TSQueryCapture;
|
||||
|
||||
typedef struct {
|
||||
uint32_t id;
|
||||
uint16_t pattern_index;
|
||||
uint16_t capture_count;
|
||||
const TSQueryCapture *captures;
|
||||
} TSQueryMatch;
|
||||
|
||||
bool ts_query_cursor_next_match(TSQueryCursor *, TSQueryMatch *match);
|
||||
```
|
||||
|
||||
This function will return `false` when there are no more matches. Otherwise, it will populate the `match` with data about which pattern matched and which nodes were captured.
|
||||
|
||||
## Static Node Types
|
||||
|
||||
In languages with static typing, it can be helpful for syntax trees to provide specific type information about individual syntax nodes. Tree-sitter makes this information available via a generated file called `node-types.json`. This _node types_ file provides structured data about every possible syntax node in a grammar.
|
||||
|
||||
You can use this data to generate type declarations in statically-typed programming languages. For example, GitHub's [Semantic](https://github.com/github/semantic) uses these node types files to [generate Haskell data types](https://github.com/github/semantic/tree/master/semantic-ast) for every possible syntax node, which allows for code analysis algorithms to be structurally verified by the Haskell type system.
|
||||
|
||||
The node types file contains an array of objects, each of which describes a particular type of syntax node using the following entries:
|
||||
|
||||
#### Basic Info
|
||||
|
||||
Every object in this array has these two entries:
|
||||
|
||||
- `"type"` - A string that indicates which grammar rule the node represents. This corresponds to the `ts_node_type` function described [above](#syntax-nodes).
|
||||
- `"named"` - A boolean that indicates whether this kind of node corresponds to a rule name in the grammar or just a string literal. See [above](#named-vs-anonymous-nodes) for more info.
|
||||
|
||||
Examples:
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "string_literal",
|
||||
"named": true
|
||||
}
|
||||
{
|
||||
"type": "+",
|
||||
"named": false
|
||||
}
|
||||
```
|
||||
|
||||
Together, these two fields constitute a unique identifier for a node type; no two top-level objects in the `node-types.json` should have the same values for both `"type"` and `"named"`.
|
||||
|
||||
#### Internal Nodes
|
||||
|
||||
Many syntax nodes can have _children_. The node type object describes the possible children that a node can have using the following entries:
|
||||
|
||||
- `"fields"` - An object that describes the possible [fields](#node-field-names) that the node can have. The keys of this object are field names, and the values are _child type_ objects, described below.
|
||||
- `"children"` - Another _child type_ object that describes all of the node's possible _named_ children _without_ fields.
|
||||
|
||||
A _child type_ object describes a set of child nodes using the following entries:
|
||||
|
||||
- `"required"` - A boolean indicating whether there is always _at least one_ node in this set.
|
||||
- `"multiple"` - A boolean indicating whether there can be _multiple_ nodes in this set.
|
||||
- `"types"`- An array of objects that represent the possible types of nodes in this set. Each object has two keys: `"type"` and `"named"`, whose meanings are described above.
|
||||
|
||||
Example with fields:
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "method_definition",
|
||||
"named": true,
|
||||
"fields": {
|
||||
"body": {
|
||||
"multiple": false,
|
||||
"required": true,
|
||||
"types": [{ "type": "statement_block", "named": true }]
|
||||
},
|
||||
"decorator": {
|
||||
"multiple": true,
|
||||
"required": false,
|
||||
"types": [{ "type": "decorator", "named": true }]
|
||||
},
|
||||
"name": {
|
||||
"multiple": false,
|
||||
"required": true,
|
||||
"types": [
|
||||
{ "type": "computed_property_name", "named": true },
|
||||
{ "type": "property_identifier", "named": true }
|
||||
]
|
||||
},
|
||||
"parameters": {
|
||||
"multiple": false,
|
||||
"required": true,
|
||||
"types": [{ "type": "formal_parameters", "named": true }]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Example with children:
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "array",
|
||||
"named": true,
|
||||
"fields": {},
|
||||
"children": {
|
||||
"multiple": true,
|
||||
"required": false,
|
||||
"types": [
|
||||
{ "type": "_expression", "named": true },
|
||||
{ "type": "spread_element", "named": true }
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Supertype Nodes
|
||||
|
||||
In Tree-sitter grammars, there are usually certain rules that represent abstract _categories_ of syntax nodes (e.g. "expression", "type", "declaration"). In the `grammar.js` file, these are often written as [hidden rules](./creating-parsers#hiding-rules) whose definition is a simple [`choice`](./creating-parsers#the-grammar-dsl) where each member is just a single symbol.
|
||||
|
||||
Normally, hidden rules are not mentioned in the node types file, since they don't appear in the syntax tree. But if you add a hidden rule to the grammar's [`supertypes` list](./creating-parsers#the-grammar-dsl), then it _will_ show up in the node types file, with the following special entry:
|
||||
|
||||
- `"subtypes"` - An array of objects that specify the _types_ of nodes that this 'supertype' node can wrap.
|
||||
|
||||
Example:
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "_declaration",
|
||||
"named": true,
|
||||
"subtypes": [
|
||||
{ "type": "class_declaration", "named": true },
|
||||
{ "type": "function_declaration", "named": true },
|
||||
{ "type": "generator_function_declaration", "named": true },
|
||||
{ "type": "lexical_declaration", "named": true },
|
||||
{ "type": "variable_declaration", "named": true }
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Supertype nodes will also appear elsewhere in the node types file, as children of other node types, in a way that corresponds with how the supertype rule was used in the grammar. This can make the node types much shorter and easier to read, because a single supertype will take the place of multiple subtypes.
|
||||
|
||||
Example:
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "export_statement",
|
||||
"named": true,
|
||||
"fields": {
|
||||
"declaration": {
|
||||
"multiple": false,
|
||||
"required": false,
|
||||
"types": [{ "type": "_declaration", "named": true }]
|
||||
},
|
||||
"source": {
|
||||
"multiple": false,
|
||||
"required": false,
|
||||
"types": [{ "type": "string", "named": true }]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -13,16 +13,16 @@ Developing Tree-sitter grammars can have a difficult learning curve, but once yo
|
|||
|
||||
In order to develop a Tree-sitter parser, there are two dependencies that you need to install:
|
||||
|
||||
* **Node.js** - Tree-sitter grammars are written in JavaScript, and Tree-sitter uses [Node.js][node.js] to interpret JavaScript files. It requires the `node` command to be in one of the directories in your [`PATH`][path-env]. It shouldn't matter what version of Node you have.
|
||||
* **Node.js** - Tree-sitter grammars are written in JavaScript, and Tree-sitter uses [Node.js][node.js] to interpret JavaScript files. It requires the `node` command to be in one of the directories in your [`PATH`][path-env]. You'll need Node.js version 6.0 or greater.
|
||||
* **A C Compiler** - Tree-sitter creates parsers that are written in C. In order to run and test these parsers with the `tree-sitter parse` or `tree-sitter test` commands, you must have a C/C++ compiler installed. Tree-sitter will try to look for these compilers in the standard places for each platform.
|
||||
|
||||
### Installation
|
||||
|
||||
To create a Tree-sitter parser, you need to use the [the `tree-sitter` CLI][tree-sitter-cli]. You can install the CLI in a few different ways:
|
||||
To create a Tree-sitter parser, you need to use [the `tree-sitter` CLI][tree-sitter-cli]. You can install the CLI in a few different ways:
|
||||
|
||||
* Install the `tree-sitter-cli` [Node.js module][node-module] using [`npm`][npm], the Node package manager. This is the recommended approach, and it is discussed further in the next section.
|
||||
* Download a binary for your platform from [the latest GitHub release][releases], and put it into a directory on your `PATH`.
|
||||
* Build the `tree-sitter-cli` [Rust crate][crate] from source using [`cargo`][cargo], the Rust package manager.
|
||||
* Build the `tree-sitter-cli` [Rust crate][crate] from source using [`cargo`][cargo], the Rust package manager. See [the contributing docs](/docs/section-5-contributing.md#developing-tree-sitter) for more information.
|
||||
|
||||
### Project Setup
|
||||
|
||||
|
|
@ -66,7 +66,7 @@ module.exports = grammar({
|
|||
});
|
||||
```
|
||||
|
||||
Then run the the following command:
|
||||
Then run the following command:
|
||||
|
||||
```sh
|
||||
tree-sitter generate
|
||||
|
|
@ -110,9 +110,9 @@ If there is an ambiguity or *local ambiguity* in your grammar, Tree-sitter will
|
|||
|
||||
The `tree-sitter test` command allows you to easily test that your parser is working correctly.
|
||||
|
||||
For each rule that you add to the grammar, you should first create a *test* that describes how the syntax trees should look when parsing that rule. These tests are written using specially-formatted text files in a `corpus` directory in your parser's root folder.
|
||||
For each rule that you add to the grammar, you should first create a *test* that describes how the syntax trees should look when parsing that rule. These tests are written using specially-formatted text files in the `corpus/` or `test/corpus/` directories within your parser's root folder.
|
||||
|
||||
For example, you might have a file called `corpus/statements.txt` that contains a series of entries like this:
|
||||
For example, you might have a file called `test/corpus/statements.txt` that contains a series of entries like this:
|
||||
|
||||
```
|
||||
==================
|
||||
|
|
@ -152,7 +152,7 @@ func x() int {
|
|||
|
||||
These tests are important. They serve as the parser's API documentation, and they can be run every time you change the grammar to verify that everything still parses correctly.
|
||||
|
||||
By default, the `tree-sitter test` command runs all of the tests in your `corpus` folder. To run a particular test, you can use the the `-f` flag:
|
||||
By default, the `tree-sitter test` command runs all of the tests in your `corpus` or `test/corpus/` folder. To run a particular test, you can use the `-f` flag:
|
||||
|
||||
```sh
|
||||
tree-sitter test -f 'Return statements'
|
||||
|
|
@ -164,6 +164,10 @@ The recommendation is to be comprehensive in adding tests. If it's a visible nod
|
|||
|
||||
You might notice that the first time you run `tree-sitter test` after regenerating your parser, it takes some extra time. This is because Tree-sitter automatically compiles your C code into a dynamically-loadable library. It recompiles your parser as-needed whenever you update it by re-running `tree-sitter generate`.
|
||||
|
||||
#### Syntax Highlighting Tests
|
||||
|
||||
The `tree-sitter test` command will *also* run any syntax highlighting tests in the `test/highlight` folder, if it exists. For more information about syntax highlighting tests, see [the syntax highlighting page][syntax-highlighting-tests].
|
||||
|
||||
### Command: `parse`
|
||||
|
||||
You can run your parser on an arbitrary file using `tree-sitter parse`. This will print the resulting the syntax tree, including nodes' ranges and field names, like this:
|
||||
|
|
@ -180,12 +184,16 @@ You can run your parser on an arbitrary file using `tree-sitter parse`. This wil
|
|||
(int_literal [1, 9] - [1, 10]))))))
|
||||
```
|
||||
|
||||
You can pass as many files to `tree-sitter parse` as your OS will allow. The command will exit with a non-zero status code if any parse errors occurred. You can also prevent the syntax trees from being printed using the `--quiet` flag. This makes `tree-sitter parse` usable as a secondary testing strategy: you can check that a large number of files parse without error:
|
||||
You can pass any number of file paths and glob patterns to `tree-sitter parse`, and it will parse all of the given files. The command will exit with a non-zero status code if any parse errors occurred. You can also prevent the syntax trees from being printed using the `--quiet` flag. Additionally, the `--stat` flag prints out aggregated parse success/failure information for all processed files. This makes `tree-sitter parse` usable as a secondary testing strategy: you can check that a large number of files parse without error:
|
||||
|
||||
```sh
|
||||
find ./examples -name '*.go' | xargs -n 1000 tree-sitter parse --quiet
|
||||
tree-sitter parse 'examples/**/*.go' --quiet --stat
|
||||
```
|
||||
|
||||
### Command: `highlight`
|
||||
|
||||
You can run syntax highlighting on an arbitrary file using `tree-sitter highlight`. This can either output colors directly to your terminal using ansi escape codes, or produce HTML (if the `--html` flag is passed). For more information, see [the syntax highlighting page][syntax-highlighting].
|
||||
|
||||
### The Grammar DSL
|
||||
|
||||
The following is a complete list of built-in functions you can use in your `grammar.js` to define rules. Use-cases for some of these functions will be explained in more detail in later sections.
|
||||
|
|
@ -196,12 +204,13 @@ The following is a complete list of built-in functions you can use in your `gram
|
|||
* **Alternatives : `choice(rule1, rule2, ...)`** - This function creates a rule that matches *one* of a set of possible rules. The order of the arguments does not matter. This is analogous to the `|` (pipe) operator in EBNF notation.
|
||||
* **Repetitions : `repeat(rule)`** - This function creates a rule that matches *zero-or-more* occurrences of a given rule. It is analogous to the `{x}` (curly brace) syntax in EBNF notation.
|
||||
* **Repetitions : `repeat1(rule)`** - This function creates a rule that matches *one-or-more* occurrences of a given rule. The previous `repeat` rule is implemented in terms of `repeat1` but is included because it is very commonly used.
|
||||
* **Options : `optional(rule)`** - This function creates a rule that matches *zero or one* occurrence of a given rule it is analogous to the `[x]` (square bracket) syntax in EBNF notation.
|
||||
* **Options : `optional(rule)`** - This function creates a rule that matches *zero or one* occurrence of a given rule. It is analogous to the `[x]` (square bracket) syntax in EBNF notation.
|
||||
* **Precedence : `prec(number, rule)`** - This function marks the given rule with a numerical precedence which will be used to resolve [*LR(1) Conflicts*][lr-conflict] at parser-generation time. When two rules overlap in a way that represents either a true ambiguity or a *local* ambiguity given one token of lookahead, Tree-sitter will try to resolve the conflict by matching the rule with the higher precedence. The default precedence of all rules is zero. This works similarly to the [precedence directives][yacc-prec] in Yacc grammars.
|
||||
* **Left Associativity : `prec.left([number], rule)`** - This function marks the given rule as left-associative (and optionally applies a numerical precedence). When an LR(1) conflict arises in which all of the rules have the same numerical precedence, Tree-sitter will consult the rules' associativity. If there is a left-associative rule, Tree-sitter will prefer matching a rule that ends *earlier*. This works similarly to [associativity directives][yacc-prec] in Yacc grammars.
|
||||
* **Right Associativity : `prec.right([number], rule)`** - This function is like `prec.left`, but it instructs Tree-sitter to prefer matching a rule that ends *later*.
|
||||
* **Dynamic Precedence : `prec.dynamic(number, rule)`** - This function is similar to `prec`, but the given numerical precedence is applied at *runtime* instead of at parser generation time. This is only necessary when handling a conflict dynamically using the the `conflicts` field in the grammar, and when there is a genuine *ambiguity*: multiple rules correctly match a given piece of code. In that event, Tree-sitter compares the total dynamic precedence associated with each rule, and selects the one with the highest total. This is similar to [dynamic precedence directives][bison-dprec] in Bison grammars.
|
||||
* **Dynamic Precedence : `prec.dynamic(number, rule)`** - This function is similar to `prec`, but the given numerical precedence is applied at *runtime* instead of at parser generation time. This is only necessary when handling a conflict dynamically using the `conflicts` field in the grammar, and when there is a genuine *ambiguity*: multiple rules correctly match a given piece of code. In that event, Tree-sitter compares the total dynamic precedence associated with each rule, and selects the one with the highest total. This is similar to [dynamic precedence directives][bison-dprec] in Bison grammars.
|
||||
* **Tokens : `token(rule)`** - This function marks the given rule as producing only a single token. Tree-sitter's default is to treat each String or RegExp literal in the grammar as a separate token. Each token is matched separately by the lexer and returned as its own leaf node in the tree. The `token` function allows you to express a complex rule using the functions described above (rather than as a single regular expression) but still have Tree-sitter treat it as a single token.
|
||||
* **Immediate Tokens : `token.immediate(rule)`** - Usually, whitespace (and any other extras, such as comments) is optional before each token. This function means that the token will only match if there is no whitespace.
|
||||
* **Aliases : `alias(rule, name)`** - This function causes the given rule to *appear* with an alternative name in the syntax tree. If `name` is a *symbol*, as in `alias($.foo, $.bar)`, then the aliased rule will *appear* as a [named node][named-vs-anonymous-nodes-section] called `bar`. And if `name` is a *string literal*, as in `alias($.foo, 'bar')`, then the aliased rule will appear as an [anonymous node][named-vs-anonymous-nodes-section], as if the rule had been written as the simple string.
|
||||
* **Field Names : `field(name, rule)`** - This function assigns a *field name* to the child node(s) matched by the given rule. In the resulting syntax tree, you can then use that field name to access specific children.
|
||||
|
||||
|
|
@ -212,6 +221,7 @@ In addition to the `name` and `rules` fields, grammars have a few other optional
|
|||
* **`conflicts`** - an array of arrays of rule names. Each inner array represents a set of rules that's involved in an *LR(1) conflict* that is *intended to exist* in the grammar. When these conflicts occur at runtime, Tree-sitter will use the GLR algorithm to explore all of the possible interpretations. If *multiple* parses end up succeeding, Tree-sitter will pick the subtree whose corresponding rule has the highest total *dynamic precedence*.
|
||||
* **`externals`** - an array of token names which can be returned by an [*external scanner*](#external-scanners). External scanners allow you to write custom C code which runs during the lexing process in order to handle lexical rules (e.g. Python's indentation tokens) that cannot be described by regular expressions.
|
||||
* **`word`** - the name of a token that will match keywords for the purpose of the [keyword extraction](#keyword-extraction) optimization.
|
||||
* **`supertypes`** an array of hidden rule names which should be considered to be 'supertypes' in the generated [*node types* file][static-node-types].
|
||||
|
||||
|
||||
## Writing the Grammar
|
||||
|
|
@ -335,7 +345,7 @@ Imagine that you were just starting work on the [Tree-sitter JavaScript parser][
|
|||
return x + y;
|
||||
```
|
||||
|
||||
According to the specification, this line is a `ReturnStatement`, the fragment `x + y` is an `AdditiveExpression`, and `x` and `y` are both `IdentifierReferences`. The relationship between these constructs is captured by a complex series of production rules:
|
||||
According to the specification, this line is a `ReturnStatement`, the fragment `x + y` is an `AdditiveExpression`, and `x` and `y` are both `IdentifierReferences`. The relationship between these constructs is captured by a complex series of production rules:
|
||||
|
||||
```
|
||||
ReturnStatement -> 'return' Expression
|
||||
|
|
@ -496,6 +506,8 @@ Grammars often contain multiple tokens that can match the same characters. For e
|
|||
|
||||
4. **Match Specificity** - If there are two valid tokens with the same precedence and which both match the same number of characters, Tree-sitter will prefer a token that is specified in the grammar as a `String` over a token specified as a `RegExp`.
|
||||
|
||||
5. **Rule Order** - If none of the above criteria can be used to select one token over another, Tree-sitter will prefer the token that appears earlier in the grammar.
|
||||
|
||||
### Keywords
|
||||
|
||||
Many languages have a set of *keyword* tokens (e.g. `if`, `for`, `return`), as well as a more general token (e.g. `identifier`) that matches any word, including many of the keyword strings. For example, JavaScript has a keyword `instanceof`, which is used as a binary operator, like this:
|
||||
|
|
@ -656,7 +668,7 @@ bool tree_sitter_my_language_external_scanner_scan(
|
|||
|
||||
This function is responsible for recognizing external tokens. It should return `true` if a token was recognized, and `false` otherwise. It is called with a "lexer" struct with the following fields:
|
||||
|
||||
* **`uint32_t lookahead`** - The current next character in the input stream, represented as a 32-bit unicode code point.
|
||||
* **`int32_t lookahead`** - The current next character in the input stream, represented as a 32-bit unicode code point.
|
||||
* **`TSSymbol result_symbol`** - The symbol that was recognized. Your scan function should *assign* to this field one of the values from the `TokenType` enum, described above.
|
||||
* **`void (*advance)(TSLexer *, bool skip)`** - A function for advancing to the next character. If you pass `true` for the second argument, the current character will be treated as whitespace.
|
||||
* **`void (*mark_end)(TSLexer *)`** - A function for marking the end of the recognized token. This allows matching tokens that require multiple characters of lookahead. By default (if you don't call `mark_end`), any character that you moved past using the `advance` function will be included in the size of the token. But once you call `mark_end`, then any later calls to `advance` will *not* increase the size of the returned token. You can call `mark_end` multiple times to increase the size of the token.
|
||||
|
|
@ -707,6 +719,7 @@ if (valid_symbols[INDENT] || valid_symbol[DEDENT]) {
|
|||
[nan]: https://github.com/nodejs/nan
|
||||
[node-module]: https://www.npmjs.com/package/tree-sitter-cli
|
||||
[node.js]: https://nodejs.org
|
||||
[static-node-types]: ./using-parsers#static-node-types
|
||||
[non-terminal]: https://en.wikipedia.org/wiki/Terminal_and_nonterminal_symbols
|
||||
[npm]: https://docs.npmjs.com
|
||||
[path-env]: https://en.wikipedia.org/wiki/PATH_(variable)
|
||||
|
|
@ -714,6 +727,8 @@ if (valid_symbols[INDENT] || valid_symbol[DEDENT]) {
|
|||
[percent-string]: https://docs.ruby-lang.org/en/2.5.0/syntax/literals_rdoc.html#label-Percent+Strings
|
||||
[releases]: https://github.com/tree-sitter/tree-sitter/releases/latest
|
||||
[s-exp]: https://en.wikipedia.org/wiki/S-expression
|
||||
[syntax-highlighting]: ./syntax-highlighting
|
||||
[syntax-highlighting-tests]: ./syntax-highlighting#unit-testing
|
||||
[tree-sitter-cli]: https://github.com/tree-sitter/tree-sitter/tree/master/cli
|
||||
[tree-sitter-javascript]: https://github.com/tree-sitter/tree-sitter-javascript
|
||||
[yacc-prec]: https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
|
||||
|
|
|
|||
429
docs/section-4-syntax-highlighting.md
Normal file
429
docs/section-4-syntax-highlighting.md
Normal file
|
|
@ -0,0 +1,429 @@
|
|||
---
|
||||
title: Syntax Highlighting
|
||||
permalink: syntax-highlighting
|
||||
---
|
||||
|
||||
# Syntax Highlighting
|
||||
|
||||
Syntax highlighting is a very common feature in applications that deal with code. Tree-sitter has built-in support for syntax highlighting, via the [`tree-sitter-highlight`](https://github.com/tree-sitter/tree-sitter/tree/master/highlight) library, which is currently used on GitHub.com for highlighting code written in several languages. You can also perform syntax highlighting at the command line using the `tree-sitter highlight` command.
|
||||
|
||||
This document explains how the Tree-sitter syntax highlighting system works, using the command line interface. If you are using `tree-sitter-highlight` library (either from C or from Rust), all of these concepts are still applicable, but the configuration data is provided using in-memory objects, rather than files.
|
||||
|
||||
**Note - If you are working on syntax highlighting in the [Atom](https://atom.io/) text editor, you should consult [the grammar-creation page](https://flight-manual.atom.io/hacking-atom/sections/creating-a-grammar/) of the Atom Flight Manual, *not* this document. Atom currently uses a different syntax highlighting system that is also based on Tree-sitter, but is older than the one described here.**
|
||||
|
||||
## Overview
|
||||
|
||||
All of the files needed to highlight a given language are normally included in the same git repository as the Tree-sitter grammar for that language (for example, [`tree-sitter-javascript`](https://github.com/tree-sitter/tree-sitter-javascript), [`tree-sitter-ruby`](https://github.com/tree-sitter/tree-sitter-ruby)). In order to run syntax highlighting from the command-line, three types of files are needed:
|
||||
|
||||
1. Global configuration in `~/.tree-sitter/config.json`
|
||||
2. Language configuration in grammar repositories' `package.json` files.
|
||||
3. Tree queries in the grammars repositories' `queries` folders.
|
||||
|
||||
For an example of the language-specific files, see the [`package.json` file](https://github.com/tree-sitter/tree-sitter-ruby/blob/master/package.json) and [`queries` directory](https://github.com/tree-sitter/tree-sitter-ruby/tree/master/queries) in the `tree-sitter-ruby` repository. The following sections describe the behavior of each file.
|
||||
|
||||
## Global Configuration
|
||||
|
||||
The Tree-sitter CLI automatically creates a directory in your home folder called `~/.tree-sitter`. This is used to store compiled language binaries, and it can also contain a JSON configuration file. To automatically create a default config file, run this command:
|
||||
|
||||
```sh
|
||||
tree-sitter init-config
|
||||
```
|
||||
|
||||
### Paths
|
||||
|
||||
The `tree-sitter highlight` command takes one or more file paths, and tries to automatically determine which language should be used to highlight those files. In order to do this, it needs to know *where* to look for Tree-sitter grammars on your filesystem. You can control this using the `"parser-directories"` key in your configuration file:
|
||||
|
||||
```json
|
||||
{
|
||||
"parser-directories": [
|
||||
"/Users/my-name/code",
|
||||
"/Users/my-name/other-code"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Currently, any folder within one of these *parser directories* whose name begins with "tree-sitter-" will be treated as a Tree-sitter grammar repository.
|
||||
|
||||
### Theme
|
||||
|
||||
The Tree-sitter highlighting system works by annotating ranges of source code with logical "highlight names" like `function.method`, `type.builtin`, `keyword`, etc. In order to decide what *color* should be used for rendering each highlight, a *theme* is needed.
|
||||
|
||||
In `~/.tree-sitter/config.json`, the `"theme"` value is an object whose keys are dot-separated highlight names like `function.builtin` or `keyword`, and whose values are JSON expressions that represent text styling parameters.
|
||||
|
||||
#### Highlight Names
|
||||
|
||||
A theme can contain multiple keys that share a common subsequence. Examples:
|
||||
* `variable` and `variable.parameter`
|
||||
* `function`, `function.builtin`, and `function.method`
|
||||
|
||||
For a given highlight produced, styling will be determined based on the **longest matching theme key**. For example, the highlight `function.builtin.static` would match the key `function.builtin` rather than `function`.
|
||||
|
||||
#### Styling Values
|
||||
|
||||
Styling values can be any of the following:
|
||||
|
||||
* Integers from 0 to 255, representing ANSI terminal color ids.
|
||||
* Strings like `"#e45649"` representing hexadecimal RGB colors.
|
||||
* Strings naming basic ANSI colors like `"red"`, `"black"`, `"purple"`, or `"cyan"`.
|
||||
* Objects with the following keys:
|
||||
* `color` - An integer or string as described above.
|
||||
* `underline` - A boolean indicating whether the text should be underlined.
|
||||
* `italic` - A boolean indicating whether the text should be italicized.
|
||||
* `bold` - A boolean indicating whether the text should be bold-face.
|
||||
|
||||
## Language Configuration
|
||||
|
||||
The `package.json` file is used by package managers like `npm`. Within this file, the Tree-sitter CLI looks for data nested under the top-level `"tree-sitter"` key. This key is expected to contain an array of objects with the following keys:
|
||||
|
||||
### Basics
|
||||
|
||||
These keys specify basic information about the parser:
|
||||
|
||||
* `scope` (required) - A string like `"source.js"` that identifies the language. Currently, we strive to match the scope names used by popular [TextMate grammars](https://macromates.com/manual/en/language_grammars) and by the [Linguist](https://github.com/github/linguist) library.
|
||||
|
||||
* `path` (optional) - A relative path from the directory containing `package.json` to another directory containing the `src/` folder, which contains the actual generated parser. The default value is `"."` (so that `src/` is in the same folder as `package.json`), and this very rarely needs to be overridden.
|
||||
|
||||
### Language Detection
|
||||
|
||||
These keys help to decide whether the language applies to a given file:
|
||||
|
||||
* `file-types` - An array of filename suffix strings. The grammar will be used for files whose names end with one of these suffixes. Note that the suffix may match an *entire* filename.
|
||||
|
||||
* `first-line-regex` - A regex pattern that will be tested against the first line of a file in order to determine whether this language applies to the file. If present, this regex will be used for any file whose language does not match any grammar's `file-types`.
|
||||
|
||||
* `content-regex` - A regex pattern that will be tested against the contents of the file in order to break ties in cases where multiple grammars matched the file using the above two criteria. If the regex matches, this grammar will be preferred over another grammar with no `content-regex`. If the regex does not match, a grammar with no `content-regex` will be preferred over this one.
|
||||
|
||||
* `injection-regex` - A regex pattern that will be tested against a *language name* in order to determine whether this language should be used for a potential *language injection* site. Language injection is described in more detail in [a later section](#language-injection-query).
|
||||
|
||||
### Query Paths
|
||||
|
||||
These keys specify relative paths from the directory containing `package.json` to the files that control syntax highlighting:
|
||||
|
||||
* `highlights` - Path to a *highlight query*. Default: `queries/highlights.scm`
|
||||
* `locals` - Path to a *local variable query*. Default: `queries/locals.scm`.
|
||||
* `injections` - Path to an *injection query*. Default: `queries/injections.scm`.
|
||||
|
||||
The behaviors of these three files are described in the next section.
|
||||
|
||||
### Example
|
||||
|
||||
Typically, the `"tree-sitter"` array only needs to contain one object, which only needs to specify a few keys:
|
||||
|
||||
```json
|
||||
{
|
||||
"tree-sitter": [
|
||||
{
|
||||
"scope": "source.ruby",
|
||||
"file-types": [
|
||||
"rb",
|
||||
"gemspec",
|
||||
"Gemfile",
|
||||
"Rakefile"
|
||||
],
|
||||
"first-line-regex": "#!.*\\bruby$"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Queries
|
||||
|
||||
Tree-sitter's syntax highlighting system is based on *tree queries*, which are a general system for pattern-matching on Tree-sitter's syntax trees. See [this section](./using-parsers#pattern-matching-with-queries) of the documentation for more information about tree queries.
|
||||
|
||||
Syntax highlighting is controlled by *three* different types of query files that are usually included in the `queries` folder. The default names for the query files use the `.scm` file. We chose this extension because it commonly used for files written in [Scheme](https://en.wikipedia.org/wiki/Scheme_%28programming_language%29), a popular dialect of Lisp, and these query files use a Lisp-like syntax.
|
||||
|
||||
Alternatively, you can think of `.scm` as an acronym for "Source Code Matching".
|
||||
|
||||
### Highlights
|
||||
|
||||
The most important query is called the highlights query. The highlights query uses *captures* to assign arbitrary *highlight names* to different nodes in the tree. Each highlight name can then be mapped to a color (as described [above](#theme)). Commonly used highlight names include `keyword`, `function`, `type`, `property`, and `string`. Names can also be dot-separated like `function.builtin`.
|
||||
|
||||
#### Example Input
|
||||
|
||||
For example, consider the following Go code:
|
||||
|
||||
```go
|
||||
func increment(a int) int {
|
||||
return a + 1
|
||||
}
|
||||
```
|
||||
|
||||
With this syntax tree:
|
||||
|
||||
```
|
||||
(source_file
|
||||
(function_declaration
|
||||
name: (identifier)
|
||||
parameters: (parameter_list
|
||||
(parameter_declaration
|
||||
name: (identifier)
|
||||
type: (type_identifier)))
|
||||
result: (type_identifier)
|
||||
body: (block
|
||||
(return_statement
|
||||
(expression_list
|
||||
(binary_expression
|
||||
left: (identifier)
|
||||
right: (int_literal)))))))
|
||||
```
|
||||
|
||||
#### Example Query
|
||||
|
||||
Suppose we wanted to render this code with the following colors:
|
||||
* keywords `func` and `return` in purple
|
||||
* function `increment` in blue
|
||||
* type `int` in green
|
||||
* number `5` brown
|
||||
|
||||
We can assign each of these categories a *highlight name* using a query like this:
|
||||
|
||||
```
|
||||
; highlights.scm
|
||||
|
||||
"func" @keyword
|
||||
"return" @keyword
|
||||
(type_identifier) @type
|
||||
(int_literal) @number
|
||||
(function_declaration name: (identifier) @function)
|
||||
```
|
||||
|
||||
Then, in our `~/.tree-sitter/config.json` file, we could map each of these highlight names to a color:
|
||||
|
||||
```json
|
||||
{
|
||||
"theme": {
|
||||
"keyword": "purple",
|
||||
"function": "blue",
|
||||
"type": "green",
|
||||
"number": "brown"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Result
|
||||
|
||||
Running `tree-sitter highlight` on this Go file would produce output like this:
|
||||
|
||||
<pre class='highlight' style='border: 1px solid #aaa;'>
|
||||
<span style='color: purple;'>func</span> <span style='color: #005fd7;'>increment</span>(<span>a</span> <span style='color: green;'>int</span>) <span style='color: green;'>int</span> {
|
||||
<span style='color: purple;'>return</span> <span>a</span> <span style='font-weight: bold;color: #4e4e4e;'>+</span> <span style='font-weight: bold;color: #875f00;'>1</span>
|
||||
}
|
||||
</pre>
|
||||
|
||||
### Local Variables
|
||||
|
||||
Good syntax highlighting helps the reader to quickly distinguish between the different types of *entities* in their code. Ideally, if a given entity appears in *multiple* places, it should be colored the same in each place. The Tree-sitter syntax highlighting system can help you to achieve this by keeping track of local scopes and variables.
|
||||
|
||||
The *local variables* query is different from the highlights query in that, while the highlights query uses *arbitrary* capture names which can then be mapped to colors, the locals variable query uses a fixed set of capture names, each of which has a special meaning.
|
||||
|
||||
The capture names are as follows:
|
||||
|
||||
* `@local.scope` - indicates that a syntax node introduces a new local scope.
|
||||
* `@local.definition` - indicates that a syntax node contains the *name* of a definition within the current local scope.
|
||||
* `@local.reference` - indicates that a syntax node contains the *name* which *may* refer to an earlier definition within some enclosing scope.
|
||||
|
||||
When highlighting a file, Tree-sitter will keep track of the set of scopes that contains any given position, and the set of definitions within each scope. When processing a syntax node that is captured as a `local.reference`, Tree-sitter will try to find a definition for a name that matches the node's text. If it finds a match, Tree-sitter will ensure that the *reference* and the *definition* are colored the same.
|
||||
|
||||
The information produced by this query can also be *used* by the highlights query. You can *disable* a pattern for nodes which have been identified as local variables by adding the predicate `(#is-not? local)` to the pattern. This is used in the example below:
|
||||
|
||||
#### Example Input
|
||||
|
||||
Consider this Ruby code:
|
||||
|
||||
```ruby
|
||||
def process_list(list)
|
||||
context = current_context
|
||||
list.map do |item|
|
||||
process_item(item, context)
|
||||
end
|
||||
end
|
||||
|
||||
item = 5
|
||||
list = [item]
|
||||
```
|
||||
|
||||
With this syntax tree:
|
||||
|
||||
```
|
||||
(program
|
||||
(method
|
||||
name: (identifier)
|
||||
parameters: (method_parameters
|
||||
(identifier))
|
||||
(assignment
|
||||
left: (identifier)
|
||||
right: (identifier))
|
||||
(method_call
|
||||
method: (call
|
||||
receiver: (identifier)
|
||||
method: (identifier))
|
||||
block: (do_block
|
||||
(block_parameters
|
||||
(identifier))
|
||||
(method_call
|
||||
method: (identifier)
|
||||
arguments: (argument_list
|
||||
(identifier)
|
||||
(identifier))))))
|
||||
(assignment
|
||||
left: (identifier)
|
||||
right: (integer))
|
||||
(assignment
|
||||
left: (identifier)
|
||||
right: (array
|
||||
(identifier))))
|
||||
```
|
||||
|
||||
There are several different types of names within this method:
|
||||
|
||||
* `process_list` is a method.
|
||||
* Within this method, `list` is a formal parameter
|
||||
* `context` is a local variable.
|
||||
* `current_context` is *not* a local variable, so it must be a method.
|
||||
* Within the `do` block, `item` is a formal parameter
|
||||
* Later on, `item` and `list` are both local variables (not formal parameters).
|
||||
|
||||
#### Example Queries
|
||||
|
||||
Let's write some queries that let us clearly distinguish between these types of names. First, set up the highlighting query, as described in the previous section. We'll assign distinct colors to method calls, method definitions, and formal parameters:
|
||||
|
||||
```
|
||||
; highlights.scm
|
||||
|
||||
(call method: (identifier) @function.method)
|
||||
(method_call method: (identifier) @function.method)
|
||||
|
||||
(method name: (identifier) @function.method)
|
||||
|
||||
(method_parameters (identifier) @variable.parameter)
|
||||
(block_parameters (identifier) @variable.parameter)
|
||||
|
||||
((identifier) @function.method
|
||||
(#is-not? local))
|
||||
```
|
||||
|
||||
Then, we'll set up a local variable query to keep track of the variables and scopes. Here, we're indicating that methods and blocks create local *scopes*, parameters and assignments create *definitions*, and other identifiers should be considered *references*:
|
||||
|
||||
```
|
||||
; locals.scm
|
||||
|
||||
(method) @local.scope
|
||||
(do_block) @local.scope
|
||||
|
||||
(method_parameters (identifier) @local.definition)
|
||||
(block_parameters (identifier) @local.definition)
|
||||
|
||||
(assignment left:(identifier) @local.definition)
|
||||
|
||||
(identifier) @local.reference
|
||||
```
|
||||
|
||||
#### Result
|
||||
|
||||
Running `tree-sitter highlight` on this ruby file would produce output like this:
|
||||
|
||||
<pre class='highlight' style='border: 1px solid #aaa;'>
|
||||
<span style='color: purple;'>def</span> <span style='color: #005fd7;'>process_list</span><span style='color: #4e4e4e;'>(</span><span style='text-decoration: underline;'>list</span><span style='color: #4e4e4e;'>)</span>
|
||||
<span>context</span> <span style='font-weight: bold;color: #4e4e4e;'>=</span> <span style='color: #005fd7;'>current_context</span>
|
||||
<span style='text-decoration: underline;'>list</span><span style='color: #4e4e4e;'>.</span><span style='color: #005fd7;'>map</span> <span style='color: purple;'>do</span> |<span style='text-decoration: underline;'>item</span>|
|
||||
<span style='color: #005fd7;'>process_item</span>(<span style='text-decoration: underline;'>item</span><span style='color: #4e4e4e;'>,</span> <span>context</span><span style='color: #4e4e4e;'>)</span>
|
||||
<span style='color: purple;'>end</span>
|
||||
<span style='color: purple;'>end</span>
|
||||
|
||||
<span>item</span> <span style='font-weight: bold;color: #4e4e4e;'>=</span> <span style='font-weight: bold;color: #875f00;'>5</span>
|
||||
<span>list</span> <span style='font-weight: bold;color: #4e4e4e;'>=</span> [<span>item</span><span style='color: #4e4e4e;'>]</span>
|
||||
</pre>
|
||||
|
||||
### Language Injection
|
||||
|
||||
Some source files contain code written in multiple different languages. Examples include:
|
||||
* HTML files, which can contain JavaScript inside of `<script>` tags and CSS inside of `<style>` tags
|
||||
* [ERB](https://en.wikipedia.org/wiki/ERuby) files, which contain Ruby inside of `<% %>` tags, and HTML outside of those tags
|
||||
* PHP files, which can contain HTML between the `<php` tags
|
||||
* JavaScript files, which contain regular expression syntax within regex literals
|
||||
* Ruby, which can contain snippets of code inside of heredoc literals, where the heredoc delimiter often indicates the language
|
||||
|
||||
All of these examples can be modeled in terms of a *parent* syntax tree and one or more *injected* syntax trees, which reside *inside* of certain nodes in the parent tree. The language injection query allows you to specify these "injections" using the following captures:
|
||||
|
||||
* `@injection.content` - indicates that the captured node should have its contents re-parsed using another language.
|
||||
* `@injection.language` - indicates that the captured node's text may contain the *name* of a language that should be used to re-parse the `@injection.content`.
|
||||
|
||||
The language injection behavior can also be configured by some properties associated with patterns:
|
||||
|
||||
* `injection.language` - can be used to hard-code the name of a specific language.
|
||||
* `injection.combined` - indicates that *all* of the matching nodes in the tree should have their content parsed as *one* nested document.
|
||||
* `injection.include-children` - indicates that the `@injection.content` node's *entire* text should be re-parsed, including the text of its child nodes. By default, child nodes' text will be *excluded* from the injected document.
|
||||
|
||||
#### Examples
|
||||
|
||||
Consider this ruby code:
|
||||
|
||||
```ruby
|
||||
system <<-BASH.strip!
|
||||
abc --def | ghi > jkl
|
||||
BASH
|
||||
```
|
||||
|
||||
With this syntax tree:
|
||||
|
||||
```
|
||||
(program
|
||||
(method_call
|
||||
method: (identifier)
|
||||
arguments: (argument_list
|
||||
(call
|
||||
receiver: (heredoc_beginning)
|
||||
method: (identifier))))
|
||||
(heredoc_body
|
||||
(heredoc_end)))
|
||||
```
|
||||
|
||||
The following query would specify that the contents of the heredoc should be parsed using a language named "BASH" (because that is the text of the `heredoc_end` node):
|
||||
|
||||
```
|
||||
(heredoc_body
|
||||
(heredoc_end) @injection.language) @injection.content
|
||||
```
|
||||
|
||||
You can also force the language using the `#set!` predicate.
|
||||
For example, this will force the language to be always `ruby`.
|
||||
|
||||
```
|
||||
((heredoc_body) @injection.content
|
||||
(#set! injection.language "ruby"))
|
||||
```
|
||||
|
||||
## Unit Testing
|
||||
|
||||
Tree-sitter has a built-in way to verify the results of syntax highlighting. The interface is based on [Sublime Text's system](https://www.sublimetext.com/docs/3/syntax.html#testing) for testing highlighting.
|
||||
|
||||
Tests are written as normal source code files that contain specially-formatted *comments* that make assertions about the surrounding syntax highlighting. These files are stored in the `test/highlight` directory in a grammar repository.
|
||||
|
||||
Here is an example of a syntax highlighting test for JavaScript:
|
||||
|
||||
```js
|
||||
var abc = function(d) {
|
||||
// <- keyword
|
||||
// ^ keyword
|
||||
// ^ variable.parameter
|
||||
// ^ function
|
||||
|
||||
if (a) {
|
||||
// <- keyword
|
||||
// ^ punctuation.bracket
|
||||
|
||||
foo(`foo ${bar}`);
|
||||
// <- function
|
||||
// ^ string
|
||||
// ^ variable
|
||||
}
|
||||
};
|
||||
```
|
||||
|
||||
From the Sublime text docs:
|
||||
|
||||
> The two types of tests are:
|
||||
>
|
||||
> **Caret**: ^ this will test the following selector against the scope on the most recent non-test line. It will test it at the same column the ^ is in. Consecutive ^s will test each column against the selector.
|
||||
>
|
||||
> **Arrow**: <- this will test the following selector against the scope on the most recent non-test line. It will test it at the same column as the comment character is in.
|
||||
|
|
@ -18,7 +18,7 @@ To make changes to Tree-sitter, you should have:
|
|||
1. A C compiler, for compiling the core library and the generated parsers.
|
||||
2. A [Rust toolchain](https://rustup.rs/), for compiling the Rust bindings, the highlighting library, and the CLI.
|
||||
3. Node.js and NPM, for generating parsers from `grammar.js` files.
|
||||
4. Either [Docker](https://www.docker.com/) or [Emscripten](https://emscripten.org/), for compiling the library to WASM.
|
||||
4. Either [Emscripten](https://emscripten.org/) or [Docker](https://www.docker.com/), for compiling the library to WASM.
|
||||
|
||||
### Building
|
||||
|
||||
|
|
@ -29,13 +29,7 @@ git clone https://github.com/tree-sitter/tree-sitter
|
|||
cd tree-sitter
|
||||
```
|
||||
|
||||
Clone the [`utf8proc`](https://juliastrings.github.io/utf8proc/) submodule:
|
||||
|
||||
```
|
||||
git submodule update --init
|
||||
```
|
||||
|
||||
Build the WASM library. We do this first because it gets embedded in the CLI to enable the `web-ui` command. If you have emscripten installed, this will use your `emcc` compiler. Otherwise, it will use Docker:
|
||||
Optionally, build the WASM library. If you skip this step, then the `tree-sitter web-ui` command will require an internet connection. If you have emscripten installed, this will use your `emcc` compiler. Otherwise, it will use Docker:
|
||||
|
||||
```sh
|
||||
./script/build-wasm
|
||||
|
|
@ -92,7 +86,7 @@ You can run the tests under the debugger (either `lldb` or `gdb`) using the `-g`
|
|||
script/test test_does_something -g
|
||||
```
|
||||
|
||||
Part of the Tree-sitter test suite involves parsing the *corpus* tests for several different languages and performing randomized edits to each example in the corpus. If you just want to run the tests for a particular *language*, you can pass the `-l` flag. And if you want to run a particular *example* from the corpus, you can pass the `-e` flag:
|
||||
Part of the Tree-sitter test suite involves parsing the _corpus_ tests for several different languages and performing randomized edits to each example in the corpus. If you just want to run the tests for a particular _language_, you can pass the `-l` flag. And if you want to run a particular _example_ from the corpus, you can pass the `-e` flag:
|
||||
|
||||
```sh
|
||||
script/test -l javascript -e Arrays
|
||||
|
|
@ -102,18 +96,18 @@ script/test -l javascript -e Arrays
|
|||
|
||||
The main [`tree-sitter/tree-sitter`](https://github.com/tree-sitter/tree-sitter) repository contains the source code for several packages that are published to package registries for different languages:
|
||||
|
||||
* Rust crates on [crates.io](https://crates.io):
|
||||
* [`tree-sitter`](https://crates.io/crates/tree-sitter) - A Rust binding to the core library
|
||||
* [`tree-sitter-highlight`](https://crates.io/crates/tree-sitter-highlight) - The syntax-highlighting library
|
||||
* [`tree-sitter-cli`](https://crates.io/crates/tree-sitter-cli) - The command-line tool
|
||||
* JavaScript modules on [npmjs.com](https://npmjs.com):
|
||||
* [`web-tree-sitter`](https://www.npmjs.com/package/web-tree-sitter) - A WASM-based JavaScript binding to the core library
|
||||
* [`tree-sitter-cli`](https://www.npmjs.com/package/tree-sitter-cli) - The command-line tool
|
||||
- Rust crates on [crates.io](https://crates.io):
|
||||
- [`tree-sitter`](https://crates.io/crates/tree-sitter) - A Rust binding to the core library
|
||||
- [`tree-sitter-highlight`](https://crates.io/crates/tree-sitter-highlight) - The syntax-highlighting library
|
||||
- [`tree-sitter-cli`](https://crates.io/crates/tree-sitter-cli) - The command-line tool
|
||||
- JavaScript modules on [npmjs.com](https://npmjs.com):
|
||||
- [`web-tree-sitter`](https://www.npmjs.com/package/web-tree-sitter) - A WASM-based JavaScript binding to the core library
|
||||
- [`tree-sitter-cli`](https://www.npmjs.com/package/tree-sitter-cli) - The command-line tool
|
||||
|
||||
There are also several other dependent repositories that contain other published packages:
|
||||
|
||||
* [`tree-sitter/node-tree-sitter`](https://github.com/tree-sitter/py-tree-sitter) - Node.js bindings to the core library, published as [`tree-sitter`](https://www.npmjs.com/package/tree-sitter) on npmjs.com
|
||||
* [`tree-sitter/py-tree-sitter`](https://github.com/tree-sitter/py-tree-sitter) - Python bindings to the core library, published as [`tree-sitter`](https://pypi.org/project/tree-sitter) on [PyPI.org](https://pypi.org).
|
||||
- [`tree-sitter/node-tree-sitter`](https://github.com/tree-sitter/py-tree-sitter) - Node.js bindings to the core library, published as [`tree-sitter`](https://www.npmjs.com/package/tree-sitter) on npmjs.com
|
||||
- [`tree-sitter/py-tree-sitter`](https://github.com/tree-sitter/py-tree-sitter) - Python bindings to the core library, published as [`tree-sitter`](https://pypi.org/project/tree-sitter) on [PyPI.org](https://pypi.org).
|
||||
|
||||
## Publishing New Releases
|
||||
|
||||
|
|
@ -121,31 +115,31 @@ Publishing a new release of the CLI requires these steps:
|
|||
|
||||
1. Commit and push all outstanding changes and verify that CI passes:
|
||||
|
||||
```sh
|
||||
git commit -m "Fix things"
|
||||
git push
|
||||
```
|
||||
```sh
|
||||
git commit -m "Fix things"
|
||||
git push
|
||||
```
|
||||
|
||||
2. Create a new tag:
|
||||
|
||||
```sh
|
||||
script/version patch
|
||||
```
|
||||
```sh
|
||||
script/version patch
|
||||
```
|
||||
|
||||
This will determine the current version, increment the *patch* version number, and update the `Cargo.toml` and `package.json` files for the Rust and Node CLI packages. It will then create a commit and a tag for the new version. For more information about the arguments that are allowed, see the documentation for the [`npm version`](https://docs.npmjs.com/cli/version) command.
|
||||
This will determine the current version, increment the _patch_ version number, and update the `Cargo.toml` and `package.json` files for the Rust and Node CLI packages. It will then create a commit and a tag for the new version. For more information about the arguments that are allowed, see the documentation for the [`npm version`](https://docs.npmjs.com/cli/version) command.
|
||||
|
||||
3. Push the commit and the tag:
|
||||
|
||||
```sh
|
||||
git push
|
||||
git push --tags
|
||||
```
|
||||
```sh
|
||||
git push
|
||||
git push --tags
|
||||
```
|
||||
|
||||
4. Wait for CI to pass. Because of the git tag, the CI jobs will publish artifacts to [a GitHub release](https://github.com/tree-sitter/tree-sitter/releases). The npm module of `tree-sitter-cli` works by downloading the appropriate binary from the corresponding GitHub release during installation. So it's best not to publish the npm package until the binaries are uploaded.
|
||||
|
||||
5. Publish the npm package:
|
||||
|
||||
```sh
|
||||
cd cli/npm
|
||||
npm publish
|
||||
```
|
||||
```sh
|
||||
cd cli/npm
|
||||
npm publish
|
||||
```
|
||||
|
|
@ -16,24 +16,35 @@ permalink: playground
|
|||
<option value="bash">Bash</option>
|
||||
<option value="c">C</option>
|
||||
<option value="cpp">C++</option>
|
||||
<option value="c_sharp">C#</option>
|
||||
<option value="go">Go</option>
|
||||
<option value="html">HTML</option>
|
||||
<option value="java">Java</option>
|
||||
<option value="javascript" selected="selected">JavaScript</option>
|
||||
<option value="php">PHP</option>
|
||||
<option value="python">Python</option>
|
||||
<option value="ruby">Ruby</option>
|
||||
<option value="rust">Rust</option>
|
||||
<option value="toml">TOML</option>
|
||||
<option value="typescript">TypeScript</option>
|
||||
<option value="yaml">YAML</option>
|
||||
<option value="ql">QL</option>
|
||||
</select>
|
||||
|
||||
<input id="logging-checkbox" type="checkbox"></input>
|
||||
<label for="logging-checkbox">Log</label>
|
||||
|
||||
<input id="query-checkbox" type="checkbox"></input>
|
||||
<label for="query-checkbox">Query</label>
|
||||
|
||||
<textarea id="code-input">
|
||||
</textarea>
|
||||
|
||||
<div id="query-container" style="visibility: hidden; position: absolute;">
|
||||
<h4>Query</h4>
|
||||
<textarea id="query-input"></textarea>
|
||||
</div>
|
||||
|
||||
<h4>Tree</h4>
|
||||
<span id="update-time"></span>
|
||||
<div id="output-container-scroll">
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
name = "tree-sitter-highlight"
|
||||
description = "Library for performing syntax highlighting with Tree-sitter"
|
||||
version = "0.1.6"
|
||||
version = "0.3.0"
|
||||
authors = [
|
||||
"Max Brunsfeld <maxbrunsfeld@gmail.com>",
|
||||
"Tim Clem <timothy.clem@gmail.com>"
|
||||
|
|
@ -18,9 +18,6 @@ crate-type = ["lib", "staticlib"]
|
|||
|
||||
[dependencies]
|
||||
regex = "1"
|
||||
serde = "1.0"
|
||||
serde_json = "1.0"
|
||||
serde_derive = "1.0"
|
||||
|
||||
[dependencies.tree-sitter]
|
||||
version = ">= 0.3.7"
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
Tree-sitter Highlighting
|
||||
Tree-sitter Highlight
|
||||
=========================
|
||||
|
||||
[](https://travis-ci.org/tree-sitter/tree-sitter)
|
||||
|
|
@ -14,42 +14,94 @@ extern "C" tree_sitter_html();
|
|||
extern "C" tree_sitter_javascript();
|
||||
```
|
||||
|
||||
Load some *property sheets*:
|
||||
Define the list of highlight names that you will recognize:
|
||||
|
||||
```rust
|
||||
use tree_sitter_highlight::load_property_sheet;
|
||||
let highlight_names : Vec<String> = [
|
||||
"attribute",
|
||||
"constant",
|
||||
"function.builtin",
|
||||
"function",
|
||||
"keyword",
|
||||
"operator",
|
||||
"property",
|
||||
"punctuation",
|
||||
"punctuation.bracket",
|
||||
"punctuation.delimiter",
|
||||
"string",
|
||||
"string.special",
|
||||
"tag",
|
||||
"type",
|
||||
"type.builtin",
|
||||
"variable",
|
||||
"variable.builtin",
|
||||
"variable.parameter",
|
||||
]
|
||||
.iter()
|
||||
.cloned()
|
||||
.map(String::from)
|
||||
.collect();
|
||||
```
|
||||
|
||||
let javascript_property_sheet = load_property_sheet(
|
||||
fs::read_to_string("./tree-sitter-javascript/src/highlights.json").unwrap()
|
||||
Create a highlighter. You need one of these for each thread that you're using for syntax highlighting:
|
||||
|
||||
```rust
|
||||
use tree_sitter_highlight::Highlighter;
|
||||
|
||||
let highlighter = Highlighter::new();
|
||||
```
|
||||
|
||||
Load some highlighting queries from the `queries` directory of some language repositories:
|
||||
|
||||
```rust
|
||||
use tree_sitter_highlight::HighlightConfiguration;
|
||||
|
||||
let html_language = unsafe { tree_sitter_html() };
|
||||
let javascript_language = unsafe { tree_sitter_javascript() };
|
||||
|
||||
let html_config = HighlightConfiguration::new(
|
||||
html_language,
|
||||
&fs::read_to_string("./tree-sitter-html/queries/highlights.scm").unwrap(),
|
||||
&fs::read_to_string("./tree-sitter-html/queries/injections.scm").unwrap(),
|
||||
"",
|
||||
).unwrap();
|
||||
|
||||
let html_property_sheet = load_property_sheet(
|
||||
fs::read_to_string("./tree-sitter-html/src/highlights.json").unwrap()
|
||||
let javascript_config = HighlightConfiguration::new(
|
||||
javascript_language,
|
||||
&fs::read_to_string("./tree-sitter-javascript/queries/highlights.scm").unwrap(),
|
||||
&fs::read_to_string("./tree-sitter-javascript/queries/injections.scm").unwrap(),
|
||||
&fs::read_to_string("./tree-sitter-javascript/queries/locals.scm").unwrap(),
|
||||
).unwrap();
|
||||
```
|
||||
|
||||
Configure the recognized names:
|
||||
|
||||
```rust
|
||||
javascript_config.configure(&highlight_names);
|
||||
```
|
||||
|
||||
Highlight some code:
|
||||
|
||||
```rust
|
||||
use tree_sitter_highlight::{highlight, HighlightEvent};
|
||||
use tree_sitter_highlight::HighlightEvent;
|
||||
|
||||
let highlights = highlight(
|
||||
let highlights = highlighter.highlight(
|
||||
&javascript_config,
|
||||
b"const x = new Y();",
|
||||
unsafe { tree_sitter_javascript() },
|
||||
&javascript_property_sheet,
|
||||
&|_| None
|
||||
None,
|
||||
|_| None
|
||||
).unwrap();
|
||||
|
||||
for event in highlights {
|
||||
match event {
|
||||
HighlightEvent::Source(s) {
|
||||
eprintln!("source: {:?}", s);
|
||||
match event.unwrap() {
|
||||
HighlightEvent::Source {start, end} => {
|
||||
eprintln!("source: {}-{}", start, end);
|
||||
},
|
||||
HighlightEvent::ScopeStart(s) {
|
||||
eprintln!("scope started: {:?}", s);
|
||||
HighlightEvent::HighlightStart(s) => {
|
||||
eprintln!("highlight style started: {:?}", s);
|
||||
},
|
||||
HighlightEvent::ScopeEnd(s) {
|
||||
eprintln!("scope ended: {:?}", s);
|
||||
HighlightEvent::HighlightEnd => {
|
||||
eprintln!("highlight style ended");
|
||||
},
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,49 +12,20 @@ typedef enum {
|
|||
TSHighlightUnknownScope,
|
||||
TSHighlightTimeout,
|
||||
TSHighlightInvalidLanguage,
|
||||
TSHighlightInvalidUtf8,
|
||||
TSHighlightInvalidRegex,
|
||||
TSHighlightInvalidQuery,
|
||||
} TSHighlightError;
|
||||
|
||||
// The list of scopes which can be styled for syntax highlighting.
|
||||
// When constructing a `TSHighlighter`, you need to construct an
|
||||
// `attribute_strings` array whose elements correspond to these values.
|
||||
enum TSHighlightValue {
|
||||
TSHighlightValueAttribute,
|
||||
TSHighlightValueComment,
|
||||
TSHighlightValueConstant,
|
||||
TSHighlightValueConstantBuiltin,
|
||||
TSHighlightValueConstructor,
|
||||
TSHighlightValueConstructorBuiltin,
|
||||
TSHighlightValueEmbedded,
|
||||
TSHighlightValueEscape,
|
||||
TSHighlightValueFunction,
|
||||
TSHighlightValueFunctionBuiltin,
|
||||
TSHighlightValueKeyword,
|
||||
TSHighlightValueNumber,
|
||||
TSHighlightValueOperator,
|
||||
TSHighlightValueProperty,
|
||||
TSHighlightValuePropertyBuiltin,
|
||||
TSHighlightValuePunctuation,
|
||||
TSHighlightValuePunctuationBracket,
|
||||
TSHighlightValuePunctuationDelimiter,
|
||||
TSHighlightValuePunctuationSpecial,
|
||||
TSHighlightValueString,
|
||||
TSHighlightValueStringSpecial,
|
||||
TSHighlightValueTag,
|
||||
TSHighlightValueType,
|
||||
TSHighlightValueTypeBuiltin,
|
||||
TSHighlightValueVariable,
|
||||
TSHighlightValueVariableBuiltin,
|
||||
TSHighlightValueVariableParameter,
|
||||
TSHighlightValueUnknown,
|
||||
};
|
||||
|
||||
typedef struct TSHighlighter TSHighlighter;
|
||||
typedef struct TSHighlightBuffer TSHighlightBuffer;
|
||||
|
||||
// Construct a `TSHighlighter` by providing a list of strings containing
|
||||
// the HTML attributes that should be applied for each highlight value.
|
||||
TSHighlighter *ts_highlighter_new(
|
||||
const char **attribute_strings
|
||||
const char **highlight_names,
|
||||
const char **attribute_strings,
|
||||
uint32_t highlight_count
|
||||
);
|
||||
|
||||
// Delete a syntax highlighter.
|
||||
|
|
@ -67,17 +38,22 @@ void ts_highlighter_delete(TSHighlighter *);
|
|||
// with that language. You can also optionally provide an 'injection regex',
|
||||
// which is used to detect when this language has been embedded in a document
|
||||
// written in a different language.
|
||||
int ts_highlighter_add_language(
|
||||
TSHighlightError ts_highlighter_add_language(
|
||||
TSHighlighter *self,
|
||||
const char *scope_name,
|
||||
const char *injection_regex,
|
||||
const TSLanguage *language,
|
||||
const char *property_sheet_json,
|
||||
const char *injection_regex
|
||||
const char *highlight_query,
|
||||
const char *injection_query,
|
||||
const char *locals_query,
|
||||
uint32_t highlight_query_len,
|
||||
uint32_t injection_query_len,
|
||||
uint32_t locals_query_len
|
||||
);
|
||||
|
||||
// Compute syntax highlighting for a given document. You must first
|
||||
// create a `TSHighlightBuffer` to hold the output.
|
||||
int ts_highlighter_highlight(
|
||||
TSHighlightError ts_highlighter_highlight(
|
||||
const TSHighlighter *self,
|
||||
const char *scope_name,
|
||||
const char *source_code,
|
||||
|
|
|
|||
|
|
@ -1,25 +1,24 @@
|
|||
use super::{load_property_sheet, Error, Highlight, Highlighter, HtmlRenderer, Properties};
|
||||
use super::{Error, Highlight, HighlightConfiguration, Highlighter, HtmlRenderer};
|
||||
use regex::Regex;
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::CStr;
|
||||
use std::os::raw::c_char;
|
||||
use std::process::abort;
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
use std::{fmt, slice};
|
||||
use tree_sitter::{Language, PropertySheet};
|
||||
|
||||
struct LanguageConfiguration {
|
||||
language: Language,
|
||||
property_sheet: PropertySheet<Properties>,
|
||||
injection_regex: Option<Regex>,
|
||||
}
|
||||
use std::{fmt, slice, str};
|
||||
use tree_sitter::Language;
|
||||
|
||||
pub struct TSHighlighter {
|
||||
languages: HashMap<String, LanguageConfiguration>,
|
||||
languages: HashMap<String, (Option<Regex>, HighlightConfiguration)>,
|
||||
attribute_strings: Vec<&'static [u8]>,
|
||||
highlight_names: Vec<String>,
|
||||
carriage_return_index: Option<usize>,
|
||||
}
|
||||
|
||||
pub struct TSHighlightBuffer(HtmlRenderer);
|
||||
pub struct TSHighlightBuffer {
|
||||
highlighter: Highlighter,
|
||||
renderer: HtmlRenderer,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
pub enum ErrorCode {
|
||||
|
|
@ -27,33 +26,110 @@ pub enum ErrorCode {
|
|||
UnknownScope,
|
||||
Timeout,
|
||||
InvalidLanguage,
|
||||
InvalidUtf8,
|
||||
InvalidRegex,
|
||||
InvalidQuery,
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlighter_new(
|
||||
highlight_names: *const *const c_char,
|
||||
attribute_strings: *const *const c_char,
|
||||
highlight_count: u32,
|
||||
) -> *mut TSHighlighter {
|
||||
let highlight_names =
|
||||
unsafe { slice::from_raw_parts(highlight_names, highlight_count as usize) };
|
||||
let attribute_strings =
|
||||
unsafe { slice::from_raw_parts(attribute_strings, Highlight::Unknown as usize + 1) };
|
||||
unsafe { slice::from_raw_parts(attribute_strings, highlight_count as usize) };
|
||||
let highlight_names = highlight_names
|
||||
.into_iter()
|
||||
.map(|s| unsafe { CStr::from_ptr(*s).to_string_lossy().to_string() })
|
||||
.collect::<Vec<_>>();
|
||||
let attribute_strings = attribute_strings
|
||||
.into_iter()
|
||||
.map(|s| {
|
||||
if s.is_null() {
|
||||
&[]
|
||||
} else {
|
||||
unsafe { CStr::from_ptr(*s).to_bytes() }
|
||||
}
|
||||
})
|
||||
.map(|s| unsafe { CStr::from_ptr(*s).to_bytes() })
|
||||
.collect();
|
||||
let carriage_return_index = highlight_names.iter().position(|s| s == "carriage-return");
|
||||
Box::into_raw(Box::new(TSHighlighter {
|
||||
languages: HashMap::new(),
|
||||
attribute_strings,
|
||||
highlight_names,
|
||||
carriage_return_index,
|
||||
}))
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlighter_add_language(
|
||||
this: *mut TSHighlighter,
|
||||
scope_name: *const c_char,
|
||||
injection_regex: *const c_char,
|
||||
language: Language,
|
||||
highlight_query: *const c_char,
|
||||
injection_query: *const c_char,
|
||||
locals_query: *const c_char,
|
||||
highlight_query_len: u32,
|
||||
injection_query_len: u32,
|
||||
locals_query_len: u32,
|
||||
) -> ErrorCode {
|
||||
let f = move || {
|
||||
let this = unwrap_mut_ptr(this);
|
||||
let scope_name = unsafe { CStr::from_ptr(scope_name) };
|
||||
let scope_name = scope_name
|
||||
.to_str()
|
||||
.or(Err(ErrorCode::InvalidUtf8))?
|
||||
.to_string();
|
||||
let injection_regex = if injection_regex.is_null() {
|
||||
None
|
||||
} else {
|
||||
let pattern = unsafe { CStr::from_ptr(injection_regex) };
|
||||
let pattern = pattern.to_str().or(Err(ErrorCode::InvalidUtf8))?;
|
||||
Some(Regex::new(pattern).or(Err(ErrorCode::InvalidRegex))?)
|
||||
};
|
||||
|
||||
let highlight_query = unsafe {
|
||||
slice::from_raw_parts(highlight_query as *const u8, highlight_query_len as usize)
|
||||
};
|
||||
let highlight_query = str::from_utf8(highlight_query).or(Err(ErrorCode::InvalidUtf8))?;
|
||||
|
||||
let injection_query = if injection_query_len > 0 {
|
||||
let query = unsafe {
|
||||
slice::from_raw_parts(injection_query as *const u8, injection_query_len as usize)
|
||||
};
|
||||
str::from_utf8(query).or(Err(ErrorCode::InvalidUtf8))?
|
||||
} else {
|
||||
""
|
||||
};
|
||||
|
||||
let locals_query = if locals_query_len > 0 {
|
||||
let query = unsafe {
|
||||
slice::from_raw_parts(locals_query as *const u8, locals_query_len as usize)
|
||||
};
|
||||
str::from_utf8(query).or(Err(ErrorCode::InvalidUtf8))?
|
||||
} else {
|
||||
""
|
||||
};
|
||||
|
||||
let mut config =
|
||||
HighlightConfiguration::new(language, highlight_query, injection_query, locals_query)
|
||||
.or(Err(ErrorCode::InvalidQuery))?;
|
||||
config.configure(&this.highlight_names);
|
||||
this.languages.insert(scope_name, (injection_regex, config));
|
||||
|
||||
Ok(())
|
||||
};
|
||||
|
||||
match f() {
|
||||
Ok(()) => ErrorCode::Ok,
|
||||
Err(e) => e,
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlight_buffer_new() -> *mut TSHighlightBuffer {
|
||||
Box::into_raw(Box::new(TSHighlightBuffer(HtmlRenderer::new())))
|
||||
Box::into_raw(Box::new(TSHighlightBuffer {
|
||||
highlighter: Highlighter::new(),
|
||||
renderer: HtmlRenderer::new(),
|
||||
}))
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
|
|
@ -69,59 +145,25 @@ pub extern "C" fn ts_highlight_buffer_delete(this: *mut TSHighlightBuffer) {
|
|||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlight_buffer_content(this: *const TSHighlightBuffer) -> *const u8 {
|
||||
let this = unwrap_ptr(this);
|
||||
this.0.html.as_slice().as_ptr()
|
||||
this.renderer.html.as_slice().as_ptr()
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlight_buffer_line_offsets(this: *const TSHighlightBuffer) -> *const u32 {
|
||||
let this = unwrap_ptr(this);
|
||||
this.0.line_offsets.as_slice().as_ptr()
|
||||
this.renderer.line_offsets.as_slice().as_ptr()
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlight_buffer_len(this: *const TSHighlightBuffer) -> u32 {
|
||||
let this = unwrap_ptr(this);
|
||||
this.0.html.len() as u32
|
||||
this.renderer.html.len() as u32
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlight_buffer_line_count(this: *const TSHighlightBuffer) -> u32 {
|
||||
let this = unwrap_ptr(this);
|
||||
this.0.line_offsets.len() as u32
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_highlighter_add_language(
|
||||
this: *mut TSHighlighter,
|
||||
scope_name: *const c_char,
|
||||
language: Language,
|
||||
property_sheet_json: *const c_char,
|
||||
injection_regex: *const c_char,
|
||||
) -> ErrorCode {
|
||||
let this = unwrap_mut_ptr(this);
|
||||
let scope_name = unsafe { CStr::from_ptr(scope_name) };
|
||||
let scope_name = unwrap(scope_name.to_str()).to_string();
|
||||
let property_sheet_json = unsafe { CStr::from_ptr(property_sheet_json) };
|
||||
let property_sheet_json = unwrap(property_sheet_json.to_str());
|
||||
|
||||
let property_sheet = unwrap(load_property_sheet(language, property_sheet_json));
|
||||
let injection_regex = if injection_regex.is_null() {
|
||||
None
|
||||
} else {
|
||||
let pattern = unsafe { CStr::from_ptr(injection_regex) };
|
||||
Some(unwrap(Regex::new(unwrap(pattern.to_str()))))
|
||||
};
|
||||
|
||||
this.languages.insert(
|
||||
scope_name,
|
||||
LanguageConfiguration {
|
||||
language,
|
||||
property_sheet,
|
||||
injection_regex,
|
||||
},
|
||||
);
|
||||
|
||||
ErrorCode::Ok
|
||||
this.renderer.line_offsets.len() as u32
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
|
|
@ -150,36 +192,38 @@ impl TSHighlighter {
|
|||
output: &mut TSHighlightBuffer,
|
||||
cancellation_flag: Option<&AtomicUsize>,
|
||||
) -> ErrorCode {
|
||||
let configuration = self.languages.get(scope_name);
|
||||
if configuration.is_none() {
|
||||
let entry = self.languages.get(scope_name);
|
||||
if entry.is_none() {
|
||||
return ErrorCode::UnknownScope;
|
||||
}
|
||||
let configuration = configuration.unwrap();
|
||||
let (_, configuration) = entry.unwrap();
|
||||
let languages = &self.languages;
|
||||
|
||||
let highlighter = Highlighter::new(
|
||||
let highlights = output.highlighter.highlight(
|
||||
configuration,
|
||||
source_code,
|
||||
configuration.language,
|
||||
&configuration.property_sheet,
|
||||
|injection_string| {
|
||||
languages.values().find_map(|conf| {
|
||||
conf.injection_regex.as_ref().and_then(|regex| {
|
||||
cancellation_flag,
|
||||
move |injection_string| {
|
||||
languages.values().find_map(|(injection_regex, config)| {
|
||||
injection_regex.as_ref().and_then(|regex| {
|
||||
if regex.is_match(injection_string) {
|
||||
Some((conf.language, &conf.property_sheet))
|
||||
Some(config)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
})
|
||||
},
|
||||
cancellation_flag,
|
||||
);
|
||||
|
||||
if let Ok(highlighter) = highlighter {
|
||||
output.0.reset();
|
||||
let result = output.0.render(highlighter, source_code, &|s| {
|
||||
self.attribute_strings[s as usize]
|
||||
});
|
||||
if let Ok(highlights) = highlights {
|
||||
output.renderer.reset();
|
||||
output
|
||||
.renderer
|
||||
.set_carriage_return_highlight(self.carriage_return_index.map(Highlight));
|
||||
let result = output
|
||||
.renderer
|
||||
.render(highlights, source_code, &|s| self.attribute_strings[s.0]);
|
||||
match result {
|
||||
Err(Error::Cancelled) => {
|
||||
return ErrorCode::Timeout;
|
||||
|
|
|
|||
1765
highlight/src/lib.rs
1765
highlight/src/lib.rs
File diff suppressed because it is too large
Load diff
|
|
@ -1,56 +1,3 @@
|
|||
use std::str;
|
||||
|
||||
pub struct LossyUtf8<'a> {
|
||||
bytes: &'a [u8],
|
||||
in_replacement: bool,
|
||||
}
|
||||
|
||||
impl<'a> LossyUtf8<'a> {
|
||||
pub fn new(bytes: &'a [u8]) -> Self {
|
||||
LossyUtf8 {
|
||||
bytes,
|
||||
in_replacement: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for LossyUtf8<'a> {
|
||||
type Item = &'a str;
|
||||
|
||||
fn next(&mut self) -> Option<&'a str> {
|
||||
if self.bytes.is_empty() {
|
||||
return None;
|
||||
}
|
||||
if self.in_replacement {
|
||||
self.in_replacement = false;
|
||||
return Some("\u{fffd}");
|
||||
}
|
||||
match str::from_utf8(self.bytes) {
|
||||
Ok(valid) => {
|
||||
self.bytes = &[];
|
||||
Some(valid)
|
||||
}
|
||||
Err(error) => {
|
||||
if let Some(error_len) = error.error_len() {
|
||||
let error_start = error.valid_up_to();
|
||||
if error_start > 0 {
|
||||
let result =
|
||||
unsafe { str::from_utf8_unchecked(&self.bytes[..error_start]) };
|
||||
self.bytes = &self.bytes[(error_start + error_len)..];
|
||||
self.in_replacement = true;
|
||||
Some(result)
|
||||
} else {
|
||||
self.bytes = &self.bytes[error_len..];
|
||||
Some("\u{fffd}")
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn html_escape(c: u8) -> Option<&'static [u8]> {
|
||||
match c as char {
|
||||
'>' => Some(b">"),
|
||||
|
|
|
|||
4
lib/.ccls
Normal file
4
lib/.ccls
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
-std=c99
|
||||
-Isrc
|
||||
-Iinclude
|
||||
-Iutf8proc
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
name = "tree-sitter"
|
||||
description = "Rust bindings to the Tree-sitter parsing library"
|
||||
version = "0.3.10"
|
||||
version = "0.17.1"
|
||||
authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
|
||||
license = "MIT"
|
||||
readme = "binding_rust/README.md"
|
||||
|
|
@ -15,15 +15,13 @@ include = [
|
|||
"/binding_rust/*",
|
||||
"/Cargo.toml",
|
||||
"/include/*",
|
||||
"/src/*",
|
||||
"/utf8proc/*",
|
||||
"/src/*.h",
|
||||
"/src/*.c",
|
||||
"/src/unicode/*",
|
||||
]
|
||||
|
||||
[dependencies]
|
||||
regex = "1"
|
||||
serde = "1.0"
|
||||
serde_json = "1.0"
|
||||
serde_derive = "1.0"
|
||||
|
||||
[build-dependencies]
|
||||
cc = "1.0"
|
||||
|
|
|
|||
|
|
@ -3,6 +3,5 @@ Subdirectories
|
|||
|
||||
* [`src`](./src) - C source code for the Tree-sitter library
|
||||
* [`include`](./include) - C headers for the Tree-sitter library
|
||||
* [`utf8proc`](./utf8proc) - A submodule for [`utf8proc`](https://juliastrings.github.io/utf8proc/), Tree-sitter's one library dependency.
|
||||
* [`binding_rust`](./binding_rust) - Rust bindings to the Tree-sitter library
|
||||
* [`binding_web`](./binding_web) - JavaScript bindings to the Tree-sitter library, using WebAssembly
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
Rust Tree-sitter
|
||||
================
|
||||
# Rust Tree-sitter
|
||||
|
||||
[](https://travis-ci.org/tree-sitter/tree-sitter)
|
||||
[](https://ci.appveyor.com/project/maxbrunsfeld/tree-sitter/branch/master)
|
||||
|
|
@ -14,15 +13,12 @@ First, create a parser:
|
|||
```rust
|
||||
use tree_sitter::{Parser, Language};
|
||||
|
||||
// ...
|
||||
|
||||
let mut parser = Parser::new();
|
||||
```
|
||||
|
||||
Tree-sitter languages consist of generated C code. To make sure they're properly compiled and linked, you can create a [build script](https://doc.rust-lang.org/cargo/reference/build-scripts.html) like the following (assuming `tree-sitter-javascript` is in your root directory):
|
||||
```rust
|
||||
extern crate cc;
|
||||
|
||||
```rust
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn main() {
|
||||
|
|
@ -36,7 +32,14 @@ fn main() {
|
|||
}
|
||||
```
|
||||
|
||||
To then use languages from rust, you must declare them as `extern "C"` functions and invoke them with `unsafe`. Then you can assign them to the parser.
|
||||
Add the `cc` crate to your `Cargo.toml` under `[build-dependencies]`:
|
||||
|
||||
```toml
|
||||
[build-dependencies]
|
||||
cc="*"
|
||||
```
|
||||
|
||||
To then use languages from rust, you must declare them as `extern "C"` functions and invoke them with `unsafe`. Then you can assign them to the parser.
|
||||
|
||||
```rust
|
||||
extern "C" { fn tree_sitter_c() -> Language; }
|
||||
|
|
|
|||
|
|
@ -19,6 +19,16 @@ pub struct TSParser {
|
|||
pub struct TSTree {
|
||||
_unused: [u8; 0],
|
||||
}
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct TSQuery {
|
||||
_unused: [u8; 0],
|
||||
}
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct TSQueryCursor {
|
||||
_unused: [u8; 0],
|
||||
}
|
||||
pub const TSInputEncoding_TSInputEncodingUTF8: TSInputEncoding = 0;
|
||||
pub const TSInputEncoding_TSInputEncodingUTF16: TSInputEncoding = 1;
|
||||
pub type TSInputEncoding = u32;
|
||||
|
|
@ -93,6 +103,37 @@ pub struct TSTreeCursor {
|
|||
pub id: *const ::std::os::raw::c_void,
|
||||
pub context: [u32; 2usize],
|
||||
}
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct TSQueryCapture {
|
||||
pub node: TSNode,
|
||||
pub index: u32,
|
||||
}
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct TSQueryMatch {
|
||||
pub id: u32,
|
||||
pub pattern_index: u16,
|
||||
pub capture_count: u16,
|
||||
pub captures: *const TSQueryCapture,
|
||||
}
|
||||
pub const TSQueryPredicateStepType_TSQueryPredicateStepTypeDone: TSQueryPredicateStepType = 0;
|
||||
pub const TSQueryPredicateStepType_TSQueryPredicateStepTypeCapture: TSQueryPredicateStepType = 1;
|
||||
pub const TSQueryPredicateStepType_TSQueryPredicateStepTypeString: TSQueryPredicateStepType = 2;
|
||||
pub type TSQueryPredicateStepType = u32;
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct TSQueryPredicateStep {
|
||||
pub type_: TSQueryPredicateStepType,
|
||||
pub value_id: u32,
|
||||
}
|
||||
pub const TSQueryError_TSQueryErrorNone: TSQueryError = 0;
|
||||
pub const TSQueryError_TSQueryErrorSyntax: TSQueryError = 1;
|
||||
pub const TSQueryError_TSQueryErrorNodeType: TSQueryError = 2;
|
||||
pub const TSQueryError_TSQueryErrorField: TSQueryError = 3;
|
||||
pub const TSQueryError_TSQueryErrorCapture: TSQueryError = 4;
|
||||
pub const TSQueryError_TSQueryErrorStructure: TSQueryError = 5;
|
||||
pub type TSQueryError = u32;
|
||||
extern "C" {
|
||||
#[doc = " Create a new parser."]
|
||||
pub fn ts_parser_new() -> *mut TSParser;
|
||||
|
|
@ -117,7 +158,7 @@ extern "C" {
|
|||
pub fn ts_parser_language(self_: *const TSParser) -> *const TSLanguage;
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Set the spans of text that the parser should include when parsing."]
|
||||
#[doc = " Set the ranges of text that the parser should include when parsing."]
|
||||
#[doc = ""]
|
||||
#[doc = " By default, the parser will always include entire documents. This function"]
|
||||
#[doc = " allows you to parse only a *portion* of a document but still return a syntax"]
|
||||
|
|
@ -127,7 +168,22 @@ extern "C" {
|
|||
#[doc = " The second and third parameters specify the location and length of an array"]
|
||||
#[doc = " of ranges. The parser does *not* take ownership of these ranges; it copies"]
|
||||
#[doc = " the data, so it doesn\'t matter how these ranges are allocated."]
|
||||
pub fn ts_parser_set_included_ranges(self_: *mut TSParser, ranges: *const TSRange, length: u32);
|
||||
#[doc = ""]
|
||||
#[doc = " If `length` is zero, then the entire document will be parsed. Otherwise,"]
|
||||
#[doc = " the given ranges must be ordered from earliest to latest in the document,"]
|
||||
#[doc = " and they must not overlap. That is, the following must hold for all"]
|
||||
#[doc = " `i` < `length - 1`:"]
|
||||
#[doc = ""]
|
||||
#[doc = " ranges[i].end_byte <= ranges[i + 1].start_byte"]
|
||||
#[doc = ""]
|
||||
#[doc = " If this requirement is not satisfied, the operation will fail, the ranges"]
|
||||
#[doc = " will not be assigned, and this function will return `false`. On success,"]
|
||||
#[doc = " this function returns `true`"]
|
||||
pub fn ts_parser_set_included_ranges(
|
||||
self_: *mut TSParser,
|
||||
ranges: *const TSRange,
|
||||
length: u32,
|
||||
) -> bool;
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Get the ranges of text that the parser will include when parsing."]
|
||||
|
|
@ -212,13 +268,15 @@ extern "C" {
|
|||
#[doc = " by default, it will resume where it left off on the next call to"]
|
||||
#[doc = " `ts_parser_parse` or other parsing functions. If you don\'t want to resume,"]
|
||||
#[doc = " and instead intend to use this parser to parse some other document, you must"]
|
||||
#[doc = " call this `ts_parser_reset` first."]
|
||||
#[doc = " call `ts_parser_reset` first."]
|
||||
pub fn ts_parser_reset(self_: *mut TSParser);
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Set the maximum duration in microseconds that parsing should be allowed to"]
|
||||
#[doc = " take before halting. If parsing takes longer than this, it will halt early,"]
|
||||
#[doc = " returning NULL. See `ts_parser_parse` for more information."]
|
||||
#[doc = " take before halting."]
|
||||
#[doc = ""]
|
||||
#[doc = " If parsing takes longer than this, it will halt early, returning NULL."]
|
||||
#[doc = " See `ts_parser_parse` for more information."]
|
||||
pub fn ts_parser_set_timeout_micros(self_: *mut TSParser, timeout: u64);
|
||||
}
|
||||
extern "C" {
|
||||
|
|
@ -226,10 +284,11 @@ extern "C" {
|
|||
pub fn ts_parser_timeout_micros(self_: *const TSParser) -> u64;
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Set the parser\'s current cancellation flag pointer. If a non-null pointer is"]
|
||||
#[doc = " assigned, then the parser will periodically read from this pointer during"]
|
||||
#[doc = " parsing. If it reads a non-zero value, it will halt early, returning NULL."]
|
||||
#[doc = " See `ts_parser_parse` for more information."]
|
||||
#[doc = " Set the parser\'s current cancellation flag pointer."]
|
||||
#[doc = ""]
|
||||
#[doc = " If a non-null pointer is assigned, then the parser will periodically read"]
|
||||
#[doc = " from this pointer during parsing. If it reads a non-zero value, it will"]
|
||||
#[doc = " halt early, returning NULL. See `ts_parser_parse` for more information."]
|
||||
pub fn ts_parser_set_cancellation_flag(self_: *mut TSParser, flag: *const usize);
|
||||
}
|
||||
extern "C" {
|
||||
|
|
@ -255,13 +314,6 @@ extern "C" {
|
|||
#[doc = " SVG output. You can turn off this logging by passing a negative number."]
|
||||
pub fn ts_parser_print_dot_graphs(self_: *mut TSParser, file: ::std::os::raw::c_int);
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Set whether or not the parser should halt immediately upon detecting an"]
|
||||
#[doc = " error. This will generally result in a syntax tree with an error at the"]
|
||||
#[doc = " root, and one or more partial syntax trees within the error. This behavior"]
|
||||
#[doc = " may not be supported long-term."]
|
||||
pub fn ts_parser_halt_on_error(self_: *mut TSParser, halt: bool);
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Create a shallow copy of the syntax tree. This is very fast."]
|
||||
#[doc = ""]
|
||||
|
|
@ -290,21 +342,21 @@ extern "C" {
|
|||
pub fn ts_tree_edit(self_: *mut TSTree, edit: *const TSInputEdit);
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Compare a new syntax tree to a previous syntax tree representing the same"]
|
||||
#[doc = " Compare an old edited syntax tree to a new syntax tree representing the same"]
|
||||
#[doc = " document, returning an array of ranges whose syntactic structure has changed."]
|
||||
#[doc = ""]
|
||||
#[doc = " For this to work correctly, the old syntax tree must have been edited such"]
|
||||
#[doc = " that its ranges match up to the new tree. Generally, you\'ll want to call"]
|
||||
#[doc = " this function right after calling one of the `ts_parser_parse` functions,"]
|
||||
#[doc = " passing in the new tree that was returned from `ts_parser_parse` and the old"]
|
||||
#[doc = " tree that was passed as a parameter."]
|
||||
#[doc = " this function right after calling one of the `ts_parser_parse` functions."]
|
||||
#[doc = " You need to pass the old tree that was passed to parse, as well as the new"]
|
||||
#[doc = " tree that was returned from that function."]
|
||||
#[doc = ""]
|
||||
#[doc = " The returned array is allocated using `malloc` and the caller is responsible"]
|
||||
#[doc = " for freeing it using `free`. The length of the array will be written to the"]
|
||||
#[doc = " given `length` pointer."]
|
||||
pub fn ts_tree_get_changed_ranges(
|
||||
self_: *const TSTree,
|
||||
old_tree: *const TSTree,
|
||||
new_tree: *const TSTree,
|
||||
length: *mut u32,
|
||||
) -> *mut TSRange;
|
||||
}
|
||||
|
|
@ -361,8 +413,8 @@ extern "C" {
|
|||
pub fn ts_node_is_missing(arg1: TSNode) -> bool;
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Check if the node is *missing*. Missing nodes are inserted by the parser in"]
|
||||
#[doc = " order to recover from certain kinds of syntax errors."]
|
||||
#[doc = " Check if the node is *extra*. Extra nodes represent things like comments,"]
|
||||
#[doc = " which are not required the grammar, but can appear anywhere."]
|
||||
pub fn ts_node_is_extra(arg1: TSNode) -> bool;
|
||||
}
|
||||
extern "C" {
|
||||
|
|
@ -483,7 +535,7 @@ extern "C" {
|
|||
pub fn ts_tree_cursor_delete(arg1: *mut TSTreeCursor);
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Re-initialize a tree cursor to start at a different ndoe."]
|
||||
#[doc = " Re-initialize a tree cursor to start at a different node."]
|
||||
pub fn ts_tree_cursor_reset(arg1: *mut TSTreeCursor, arg2: TSNode);
|
||||
}
|
||||
extern "C" {
|
||||
|
|
@ -521,14 +573,14 @@ extern "C" {
|
|||
pub fn ts_tree_cursor_goto_next_sibling(arg1: *mut TSTreeCursor) -> bool;
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Move the cursor to the first schild of its current node."]
|
||||
#[doc = " Move the cursor to the first child of its current node."]
|
||||
#[doc = ""]
|
||||
#[doc = " This returns `true` if the cursor successfully moved, and returns `false`"]
|
||||
#[doc = " if there were no children."]
|
||||
pub fn ts_tree_cursor_goto_first_child(arg1: *mut TSTreeCursor) -> bool;
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Move the cursor to the first schild of its current node that extends beyond"]
|
||||
#[doc = " Move the cursor to the first child of its current node that extends beyond"]
|
||||
#[doc = " the given byte offset."]
|
||||
#[doc = ""]
|
||||
#[doc = " This returns the index of the child node if one was found, and returns -1"]
|
||||
|
|
@ -538,6 +590,165 @@ extern "C" {
|
|||
extern "C" {
|
||||
pub fn ts_tree_cursor_copy(arg1: *const TSTreeCursor) -> TSTreeCursor;
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Create a new query from a string containing one or more S-expression"]
|
||||
#[doc = " patterns. The query is associated with a particular language, and can"]
|
||||
#[doc = " only be run on syntax nodes parsed with that language."]
|
||||
#[doc = ""]
|
||||
#[doc = " If all of the given patterns are valid, this returns a `TSQuery`."]
|
||||
#[doc = " If a pattern is invalid, this returns `NULL`, and provides two pieces"]
|
||||
#[doc = " of information about the problem:"]
|
||||
#[doc = " 1. The byte offset of the error is written to the `error_offset` parameter."]
|
||||
#[doc = " 2. The type of error is written to the `error_type` parameter."]
|
||||
pub fn ts_query_new(
|
||||
language: *const TSLanguage,
|
||||
source: *const ::std::os::raw::c_char,
|
||||
source_len: u32,
|
||||
error_offset: *mut u32,
|
||||
error_type: *mut TSQueryError,
|
||||
) -> *mut TSQuery;
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Delete a query, freeing all of the memory that it used."]
|
||||
pub fn ts_query_delete(arg1: *mut TSQuery);
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Get the number of patterns, captures, or string literals in the query."]
|
||||
pub fn ts_query_pattern_count(arg1: *const TSQuery) -> u32;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_query_capture_count(arg1: *const TSQuery) -> u32;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_query_string_count(arg1: *const TSQuery) -> u32;
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Get the byte offset where the given pattern starts in the query\'s source."]
|
||||
#[doc = ""]
|
||||
#[doc = " This can be useful when combining queries by concatenating their source"]
|
||||
#[doc = " code strings."]
|
||||
pub fn ts_query_start_byte_for_pattern(arg1: *const TSQuery, arg2: u32) -> u32;
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Get all of the predicates for the given pattern in the query."]
|
||||
#[doc = ""]
|
||||
#[doc = " The predicates are represented as a single array of steps. There are three"]
|
||||
#[doc = " types of steps in this array, which correspond to the three legal values for"]
|
||||
#[doc = " the `type` field:"]
|
||||
#[doc = " - `TSQueryPredicateStepTypeCapture` - Steps with this type represent names"]
|
||||
#[doc = " of captures. Their `value_id` can be used with the"]
|
||||
#[doc = " `ts_query_capture_name_for_id` function to obtain the name of the capture."]
|
||||
#[doc = " - `TSQueryPredicateStepTypeString` - Steps with this type represent literal"]
|
||||
#[doc = " strings. Their `value_id` can be used with the"]
|
||||
#[doc = " `ts_query_string_value_for_id` function to obtain their string value."]
|
||||
#[doc = " - `TSQueryPredicateStepTypeDone` - Steps with this type are *sentinels*"]
|
||||
#[doc = " that represent the end of an individual predicate. If a pattern has two"]
|
||||
#[doc = " predicates, then there will be two steps with this `type` in the array."]
|
||||
pub fn ts_query_predicates_for_pattern(
|
||||
self_: *const TSQuery,
|
||||
pattern_index: u32,
|
||||
length: *mut u32,
|
||||
) -> *const TSQueryPredicateStep;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_query_step_is_definite(self_: *const TSQuery, byte_offset: u32) -> bool;
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Get the name and length of one of the query\'s captures, or one of the"]
|
||||
#[doc = " query\'s string literals. Each capture and string is associated with a"]
|
||||
#[doc = " numeric id based on the order that it appeared in the query\'s source."]
|
||||
pub fn ts_query_capture_name_for_id(
|
||||
arg1: *const TSQuery,
|
||||
id: u32,
|
||||
length: *mut u32,
|
||||
) -> *const ::std::os::raw::c_char;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_query_string_value_for_id(
|
||||
arg1: *const TSQuery,
|
||||
id: u32,
|
||||
length: *mut u32,
|
||||
) -> *const ::std::os::raw::c_char;
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Disable a certain capture within a query."]
|
||||
#[doc = ""]
|
||||
#[doc = " This prevents the capture from being returned in matches, and also avoids"]
|
||||
#[doc = " any resource usage associated with recording the capture. Currently, there"]
|
||||
#[doc = " is no way to undo this."]
|
||||
pub fn ts_query_disable_capture(
|
||||
arg1: *mut TSQuery,
|
||||
arg2: *const ::std::os::raw::c_char,
|
||||
arg3: u32,
|
||||
);
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Disable a certain pattern within a query."]
|
||||
#[doc = ""]
|
||||
#[doc = " This prevents the pattern from matching and removes most of the overhead"]
|
||||
#[doc = " associated with the pattern. Currently, there is no way to undo this."]
|
||||
pub fn ts_query_disable_pattern(arg1: *mut TSQuery, arg2: u32);
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Create a new cursor for executing a given query."]
|
||||
#[doc = ""]
|
||||
#[doc = " The cursor stores the state that is needed to iteratively search"]
|
||||
#[doc = " for matches. To use the query cursor, first call `ts_query_cursor_exec`"]
|
||||
#[doc = " to start running a given query on a given syntax node. Then, there are"]
|
||||
#[doc = " two options for consuming the results of the query:"]
|
||||
#[doc = " 1. Repeatedly call `ts_query_cursor_next_match` to iterate over all of the"]
|
||||
#[doc = " the *matches* in the order that they were found. Each match contains the"]
|
||||
#[doc = " index of the pattern that matched, and an array of captures. Because"]
|
||||
#[doc = " multiple patterns can match the same set of nodes, one match may contain"]
|
||||
#[doc = " captures that appear *before* some of the captures from a previous match."]
|
||||
#[doc = " 2. Repeatedly call `ts_query_cursor_next_capture` to iterate over all of the"]
|
||||
#[doc = " individual *captures* in the order that they appear. This is useful if"]
|
||||
#[doc = " don\'t care about which pattern matched, and just want a single ordered"]
|
||||
#[doc = " sequence of captures."]
|
||||
#[doc = ""]
|
||||
#[doc = " If you don\'t care about consuming all of the results, you can stop calling"]
|
||||
#[doc = " `ts_query_cursor_next_match` or `ts_query_cursor_next_capture` at any point."]
|
||||
#[doc = " You can then start executing another query on another node by calling"]
|
||||
#[doc = " `ts_query_cursor_exec` again."]
|
||||
pub fn ts_query_cursor_new() -> *mut TSQueryCursor;
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Delete a query cursor, freeing all of the memory that it used."]
|
||||
pub fn ts_query_cursor_delete(arg1: *mut TSQueryCursor);
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Start running a given query on a given node."]
|
||||
pub fn ts_query_cursor_exec(arg1: *mut TSQueryCursor, arg2: *const TSQuery, arg3: TSNode);
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Set the range of bytes or (row, column) positions in which the query"]
|
||||
#[doc = " will be executed."]
|
||||
pub fn ts_query_cursor_set_byte_range(arg1: *mut TSQueryCursor, arg2: u32, arg3: u32);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_query_cursor_set_point_range(arg1: *mut TSQueryCursor, arg2: TSPoint, arg3: TSPoint);
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Advance to the next match of the currently running query."]
|
||||
#[doc = ""]
|
||||
#[doc = " If there is a match, write it to `*match` and return `true`."]
|
||||
#[doc = " Otherwise, return `false`."]
|
||||
pub fn ts_query_cursor_next_match(arg1: *mut TSQueryCursor, match_: *mut TSQueryMatch) -> bool;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_query_cursor_remove_match(arg1: *mut TSQueryCursor, id: u32);
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Advance to the next capture of the currently running query."]
|
||||
#[doc = ""]
|
||||
#[doc = " If there is a capture, write its match to `*match` and its index within"]
|
||||
#[doc = " the matche\'s capture list to `*capture_index`. Otherwise, return `false`."]
|
||||
pub fn ts_query_cursor_next_capture(
|
||||
arg1: *mut TSQueryCursor,
|
||||
match_: *mut TSQueryMatch,
|
||||
capture_index: *mut u32,
|
||||
) -> bool;
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Get the number of distinct node types in the language."]
|
||||
pub fn ts_language_symbol_count(arg1: *const TSLanguage) -> u32;
|
||||
|
|
@ -552,8 +763,10 @@ extern "C" {
|
|||
extern "C" {
|
||||
#[doc = " Get the numerical id for the given node type string."]
|
||||
pub fn ts_language_symbol_for_name(
|
||||
arg1: *const TSLanguage,
|
||||
arg2: *const ::std::os::raw::c_char,
|
||||
self_: *const TSLanguage,
|
||||
string: *const ::std::os::raw::c_char,
|
||||
length: u32,
|
||||
is_named: bool,
|
||||
) -> TSSymbol;
|
||||
}
|
||||
extern "C" {
|
||||
|
|
@ -591,5 +804,5 @@ extern "C" {
|
|||
pub fn ts_language_version(arg1: *const TSLanguage) -> u32;
|
||||
}
|
||||
|
||||
pub const TREE_SITTER_LANGUAGE_VERSION: usize = 11;
|
||||
pub const TREE_SITTER_LANGUAGE_VERSION: usize = 12;
|
||||
pub const TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION: usize = 9;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
extern crate cc;
|
||||
|
||||
use std::{env, fs};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::{env, fs};
|
||||
|
||||
fn main() {
|
||||
println!("cargo:rerun-if-env-changed=TREE_SITTER_STATIC_ANALYSIS");
|
||||
|
|
@ -21,13 +21,12 @@ fn main() {
|
|||
|
||||
let mut config = cc::Build::new();
|
||||
|
||||
println!("cargo:rerun-if-env-changed=TREE_SITTER_TEST");
|
||||
if env::var("TREE_SITTER_TEST").is_ok() {
|
||||
println!("cargo:rerun-if-env-changed=PROFILE");
|
||||
if env::var("PROFILE").map_or(false, |s| s == "debug") {
|
||||
config.define("TREE_SITTER_TEST", "");
|
||||
}
|
||||
|
||||
let src_path = Path::new("src");
|
||||
|
||||
for entry in fs::read_dir(&src_path).unwrap() {
|
||||
let entry = entry.unwrap();
|
||||
let path = src_path.join(entry.file_name());
|
||||
|
|
@ -37,10 +36,9 @@ fn main() {
|
|||
config
|
||||
.flag_if_supported("-std=c99")
|
||||
.flag_if_supported("-Wno-unused-parameter")
|
||||
.include(src_path)
|
||||
.include("include")
|
||||
.include("utf8proc")
|
||||
.file(src_path.join("lib.c"))
|
||||
.file(Path::new("binding_rust").join("helper.c"))
|
||||
.compile("tree-sitter");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,17 +0,0 @@
|
|||
#if defined(TREE_SITTER_TEST)
|
||||
|
||||
void ts_record_free(void *);
|
||||
|
||||
void rust_tree_sitter_free(void *p) {
|
||||
ts_record_free(p);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void free(void *);
|
||||
|
||||
void rust_tree_sitter_free(void *p) {
|
||||
free(p);
|
||||
}
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,8 +1,69 @@
|
|||
use std::os::raw::c_void;
|
||||
|
||||
extern "C" {
|
||||
#[link_name = "rust_tree_sitter_free"]
|
||||
/// In *Release* builds, the C library links directly against `malloc` and `free`.
|
||||
///
|
||||
/// When freeing memory that was allocated by C code, use `free` directly.
|
||||
#[cfg(not(debug_assertions))]
|
||||
#[link_name = "free"]
|
||||
pub fn free_ptr(ptr: *mut c_void);
|
||||
|
||||
/// In *Test* builds, the C library is compiled with the `TREE_SITTER_TEST` macro,
|
||||
/// so all calls to `malloc`, `free`, etc are linked against wrapper functions
|
||||
/// called `ts_record_malloc`, `ts_record_free`, etc. These symbols are defined
|
||||
/// in the `tree_sitter_cli::tests::helpers::allocations` module.
|
||||
///
|
||||
/// When freeing memory that was allocated by C code, use the `free` function
|
||||
/// from that module.
|
||||
#[cfg(debug_assertions)]
|
||||
#[link_name = "ts_record_free"]
|
||||
pub fn free_ptr(ptr: *mut c_void);
|
||||
|
||||
/// In *Debug* builds, the C library is compiled the same as in test builds: using
|
||||
/// the wrapper functions. This prevents the C library from having to be recompiled
|
||||
/// constantly when switching between running tests and compiling with RLS.
|
||||
///
|
||||
/// But we don't want to actually record allocations when running the library in
|
||||
/// debug mode, so we define symbols like `ts_record_malloc` to just delegate to
|
||||
/// the normal `malloc` functions.
|
||||
#[cfg(all(debug_assertions, not(test)))]
|
||||
fn malloc(size: usize) -> *mut c_void;
|
||||
#[cfg(all(debug_assertions, not(test)))]
|
||||
fn calloc(count: usize, size: usize) -> *mut c_void;
|
||||
#[cfg(all(debug_assertions, not(test)))]
|
||||
fn realloc(ptr: *mut c_void, size: usize) -> *mut c_void;
|
||||
#[cfg(all(debug_assertions, not(test)))]
|
||||
fn free(ptr: *mut c_void);
|
||||
}
|
||||
|
||||
#[cfg(all(debug_assertions, not(test)))]
|
||||
#[no_mangle]
|
||||
unsafe extern "C" fn ts_record_malloc(size: usize) -> *const c_void {
|
||||
malloc(size)
|
||||
}
|
||||
|
||||
#[cfg(all(debug_assertions, not(test)))]
|
||||
#[no_mangle]
|
||||
unsafe extern "C" fn ts_record_calloc(count: usize, size: usize) -> *const c_void {
|
||||
calloc(count, size)
|
||||
}
|
||||
|
||||
#[cfg(all(debug_assertions, not(test)))]
|
||||
#[no_mangle]
|
||||
unsafe extern "C" fn ts_record_realloc(ptr: *mut c_void, size: usize) -> *const c_void {
|
||||
realloc(ptr, size)
|
||||
}
|
||||
|
||||
#[cfg(all(debug_assertions, not(test)))]
|
||||
#[no_mangle]
|
||||
unsafe extern "C" fn ts_record_free(ptr: *mut c_void) {
|
||||
free(ptr)
|
||||
}
|
||||
|
||||
#[cfg(all(debug_assertions, not(test)))]
|
||||
#[no_mangle]
|
||||
extern "C" fn ts_toggle_allocation_recording(_: bool) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
pub struct CBufferIter<T> {
|
||||
|
|
@ -40,6 +101,8 @@ impl<T: Copy> ExactSizeIterator for CBufferIter<T> {}
|
|||
|
||||
impl<T> Drop for CBufferIter<T> {
|
||||
fn drop(&mut self) {
|
||||
unsafe { free_ptr(self.ptr as *mut c_void); }
|
||||
unsafe {
|
||||
free_ptr(self.ptr as *mut c_void);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
1
lib/binding_web/.gitignore
vendored
1
lib/binding_web/.gitignore
vendored
|
|
@ -1,4 +1,5 @@
|
|||
/tree-sitter.js
|
||||
/tree-sitter.wasm
|
||||
package-lock.json
|
||||
node_modules
|
||||
*.tgz
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
#include <tree_sitter/api.h>
|
||||
#include <stdio.h>
|
||||
#include "array.h"
|
||||
#include "point.h"
|
||||
|
||||
/*****************************/
|
||||
/* Section - Data marshaling */
|
||||
|
|
@ -31,18 +32,18 @@ static uint32_t byte_to_code_unit(uint32_t byte) {
|
|||
|
||||
static inline void marshal_node(const void **buffer, TSNode node) {
|
||||
buffer[0] = (const void *)node.id;
|
||||
buffer[1] = (const void *)node.context[0];
|
||||
buffer[1] = (const void *)byte_to_code_unit(node.context[0]);
|
||||
buffer[2] = (const void *)node.context[1];
|
||||
buffer[3] = (const void *)node.context[2];
|
||||
buffer[3] = (const void *)byte_to_code_unit(node.context[2]);
|
||||
buffer[4] = (const void *)node.context[3];
|
||||
}
|
||||
|
||||
static inline TSNode unmarshal_node(const TSTree *tree) {
|
||||
TSNode node;
|
||||
node.id = TRANSFER_BUFFER[0];
|
||||
node.context[0] = (uint32_t)TRANSFER_BUFFER[1];
|
||||
node.context[0] = code_unit_to_byte((uint32_t)TRANSFER_BUFFER[1]);
|
||||
node.context[1] = (uint32_t)TRANSFER_BUFFER[2];
|
||||
node.context[2] = (uint32_t)TRANSFER_BUFFER[3];
|
||||
node.context[2] = code_unit_to_byte((uint32_t)TRANSFER_BUFFER[3]);
|
||||
node.context[3] = (uint32_t)TRANSFER_BUFFER[4];
|
||||
node.tree = tree;
|
||||
return node;
|
||||
|
|
@ -114,18 +115,10 @@ extern void tree_sitter_parse_callback(
|
|||
);
|
||||
|
||||
extern void tree_sitter_log_callback(
|
||||
void *payload,
|
||||
TSLogType log_type,
|
||||
bool is_lex_message,
|
||||
const char *message
|
||||
);
|
||||
|
||||
void ts_parser_new_wasm() {
|
||||
TSParser *parser = ts_parser_new();
|
||||
char *input_buffer = calloc(INPUT_BUFFER_SIZE, sizeof(char));
|
||||
TRANSFER_BUFFER[0] = parser;
|
||||
TRANSFER_BUFFER[1] = input_buffer;
|
||||
}
|
||||
|
||||
static const char *call_parse_callback(
|
||||
void *payload,
|
||||
uint32_t byte,
|
||||
|
|
@ -147,8 +140,23 @@ static const char *call_parse_callback(
|
|||
return buffer;
|
||||
}
|
||||
|
||||
static void call_log_callback(
|
||||
void *payload,
|
||||
TSLogType log_type,
|
||||
const char *message
|
||||
) {
|
||||
tree_sitter_log_callback(log_type == TSLogTypeLex, message);
|
||||
}
|
||||
|
||||
void ts_parser_new_wasm() {
|
||||
TSParser *parser = ts_parser_new();
|
||||
char *input_buffer = calloc(INPUT_BUFFER_SIZE, sizeof(char));
|
||||
TRANSFER_BUFFER[0] = parser;
|
||||
TRANSFER_BUFFER[1] = input_buffer;
|
||||
}
|
||||
|
||||
void ts_parser_enable_logger_wasm(TSParser *self, bool should_log) {
|
||||
TSLogger logger = {self, should_log ? tree_sitter_log_callback : NULL};
|
||||
TSLogger logger = {self, should_log ? call_log_callback : NULL};
|
||||
ts_parser_set_logger(self, logger);
|
||||
}
|
||||
|
||||
|
|
@ -305,6 +313,7 @@ void ts_tree_cursor_current_node_wasm(const TSTree *tree) {
|
|||
/******************/
|
||||
|
||||
static TSTreeCursor scratch_cursor = {0};
|
||||
static TSQueryCursor *scratch_query_cursor = NULL;
|
||||
|
||||
uint16_t ts_node_symbol_wasm(const TSTree *tree) {
|
||||
TSNode node = unmarshal_node(tree);
|
||||
|
|
@ -464,12 +473,6 @@ void ts_node_named_children_wasm(const TSTree *tree) {
|
|||
TRANSFER_BUFFER[1] = result;
|
||||
}
|
||||
|
||||
bool point_lte(TSPoint a, TSPoint b) {
|
||||
if (a.row < b.row) return true;
|
||||
if (a.row > b.row) return false;
|
||||
return a.column <= b.column;
|
||||
}
|
||||
|
||||
bool symbols_contain(const uint32_t *set, uint32_t length, uint32_t value) {
|
||||
for (unsigned i = 0; i < length; i++) {
|
||||
if (set[i] == value) return true;
|
||||
|
|
@ -566,3 +569,90 @@ int ts_node_is_missing_wasm(const TSTree *tree) {
|
|||
TSNode node = unmarshal_node(tree);
|
||||
return ts_node_is_missing(node);
|
||||
}
|
||||
|
||||
/******************/
|
||||
/* Section - Query */
|
||||
/******************/
|
||||
|
||||
void ts_query_matches_wasm(
|
||||
const TSQuery *self,
|
||||
const TSTree *tree,
|
||||
uint32_t start_row,
|
||||
uint32_t start_column,
|
||||
uint32_t end_row,
|
||||
uint32_t end_column
|
||||
) {
|
||||
if (!scratch_query_cursor) scratch_query_cursor = ts_query_cursor_new();
|
||||
|
||||
TSNode node = unmarshal_node(tree);
|
||||
TSPoint start_point = {start_row, code_unit_to_byte(start_column)};
|
||||
TSPoint end_point = {end_row, code_unit_to_byte(end_column)};
|
||||
ts_query_cursor_set_point_range(scratch_query_cursor, start_point, end_point);
|
||||
ts_query_cursor_exec(scratch_query_cursor, self, node);
|
||||
|
||||
uint32_t index = 0;
|
||||
uint32_t match_count = 0;
|
||||
Array(const void *) result = array_new();
|
||||
|
||||
TSQueryMatch match;
|
||||
while (ts_query_cursor_next_match(scratch_query_cursor, &match)) {
|
||||
match_count++;
|
||||
array_grow_by(&result, 2 + 6 * match.capture_count);
|
||||
result.contents[index++] = (const void *)(uint32_t)match.pattern_index;
|
||||
result.contents[index++] = (const void *)(uint32_t)match.capture_count;
|
||||
for (unsigned i = 0; i < match.capture_count; i++) {
|
||||
const TSQueryCapture *capture = &match.captures[i];
|
||||
result.contents[index++] = (const void *)capture->index;
|
||||
marshal_node(result.contents + index, capture->node);
|
||||
index += 5;
|
||||
}
|
||||
}
|
||||
|
||||
TRANSFER_BUFFER[0] = (const void *)(match_count);
|
||||
TRANSFER_BUFFER[1] = result.contents;
|
||||
}
|
||||
|
||||
void ts_query_captures_wasm(
|
||||
const TSQuery *self,
|
||||
const TSTree *tree,
|
||||
uint32_t start_row,
|
||||
uint32_t start_column,
|
||||
uint32_t end_row,
|
||||
uint32_t end_column
|
||||
) {
|
||||
if (!scratch_query_cursor) scratch_query_cursor = ts_query_cursor_new();
|
||||
|
||||
TSNode node = unmarshal_node(tree);
|
||||
TSPoint start_point = {start_row, code_unit_to_byte(start_column)};
|
||||
TSPoint end_point = {end_row, code_unit_to_byte(end_column)};
|
||||
ts_query_cursor_set_point_range(scratch_query_cursor, start_point, end_point);
|
||||
ts_query_cursor_exec(scratch_query_cursor, self, node);
|
||||
|
||||
unsigned index = 0;
|
||||
unsigned capture_count = 0;
|
||||
Array(const void *) result = array_new();
|
||||
|
||||
TSQueryMatch match;
|
||||
uint32_t capture_index;
|
||||
while (ts_query_cursor_next_capture(
|
||||
scratch_query_cursor,
|
||||
&match,
|
||||
&capture_index
|
||||
)) {
|
||||
capture_count++;
|
||||
|
||||
array_grow_by(&result, 3 + 6 * match.capture_count);
|
||||
result.contents[index++] = (const void *)(uint32_t)match.pattern_index;
|
||||
result.contents[index++] = (const void *)(uint32_t)match.capture_count;
|
||||
result.contents[index++] = (const void *)(uint32_t)capture_index;
|
||||
for (unsigned i = 0; i < match.capture_count; i++) {
|
||||
const TSQueryCapture *capture = &match.captures[i];
|
||||
result.contents[index++] = (const void *)capture->index;
|
||||
marshal_node(result.contents + index, capture->node);
|
||||
index += 5;
|
||||
}
|
||||
}
|
||||
|
||||
TRANSFER_BUFFER[0] = (const void *)(capture_count);
|
||||
TRANSFER_BUFFER[1] = result.contents;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,6 +5,12 @@ const SIZE_OF_NODE = 5 * SIZE_OF_INT;
|
|||
const SIZE_OF_POINT = 2 * SIZE_OF_INT;
|
||||
const SIZE_OF_RANGE = 2 * SIZE_OF_INT + 2 * SIZE_OF_POINT;
|
||||
const ZERO_POINT = {row: 0, column: 0};
|
||||
const QUERY_WORD_REGEX = /[\w-.]*/g;
|
||||
|
||||
const PREDICATE_STEP_TYPE_CAPTURE = 1;
|
||||
const PREDICATE_STEP_TYPE_STRING = 2;
|
||||
|
||||
const LANGUAGE_FUNCTION_REGEX = /^_?tree_sitter_\w+/;
|
||||
|
||||
var VERSION;
|
||||
var MIN_COMPATIBLE_VERSION;
|
||||
|
|
@ -37,6 +43,8 @@ class Parser {
|
|||
delete() {
|
||||
C._ts_parser_delete(this[0]);
|
||||
C._free(this[1]);
|
||||
this[0] = 0;
|
||||
this[1] = 0;
|
||||
}
|
||||
|
||||
setLanguage(language) {
|
||||
|
|
@ -122,8 +130,8 @@ class Parser {
|
|||
C._ts_parser_set_timeout_micros(this[0], timeout);
|
||||
}
|
||||
|
||||
getTimeoutMicros(timeout) {
|
||||
C._ts_parser_timeout_micros(this[0]);
|
||||
getTimeoutMicros() {
|
||||
return C._ts_parser_timeout_micros(this[0]);
|
||||
}
|
||||
|
||||
setLogger(callback) {
|
||||
|
|
@ -143,9 +151,7 @@ class Parser {
|
|||
|
||||
class Tree {
|
||||
constructor(internal, address, language, textCallback) {
|
||||
if (internal !== INTERNAL) {
|
||||
throw new Error('Illegal constructor')
|
||||
}
|
||||
assertInternal(internal);
|
||||
this[0] = address;
|
||||
this.language = language;
|
||||
this.textCallback = textCallback;
|
||||
|
|
@ -158,6 +164,7 @@ class Tree {
|
|||
|
||||
delete() {
|
||||
C._ts_tree_delete(this[0]);
|
||||
this[0] = 0;
|
||||
}
|
||||
|
||||
edit(edit) {
|
||||
|
|
@ -201,65 +208,32 @@ class Tree {
|
|||
|
||||
class Node {
|
||||
constructor(internal, tree) {
|
||||
if (internal !== INTERNAL) {
|
||||
throw new Error('Illegal constructor')
|
||||
}
|
||||
assertInternal(internal);
|
||||
this.tree = tree;
|
||||
}
|
||||
|
||||
get id() {
|
||||
return this[0];
|
||||
}
|
||||
|
||||
get typeId() {
|
||||
marshalNode(this);
|
||||
return C._ts_node_symbol_wasm(this.tree);
|
||||
return C._ts_node_symbol_wasm(this.tree[0]);
|
||||
}
|
||||
|
||||
get type() {
|
||||
return this.tree.language.types[this.typeId] || 'ERROR';
|
||||
}
|
||||
|
||||
get startPosition() {
|
||||
marshalNode(this);
|
||||
C._ts_node_start_point_wasm(this.tree[0]);
|
||||
return unmarshalPoint(TRANSFER_BUFFER);
|
||||
}
|
||||
|
||||
get endPosition() {
|
||||
marshalNode(this);
|
||||
C._ts_node_end_point_wasm(this.tree[0]);
|
||||
return unmarshalPoint(TRANSFER_BUFFER);
|
||||
}
|
||||
|
||||
get startIndex() {
|
||||
marshalNode(this);
|
||||
return C._ts_node_start_index_wasm(this.tree[0]);
|
||||
}
|
||||
|
||||
get endIndex() {
|
||||
marshalNode(this);
|
||||
return C._ts_node_end_index_wasm(this.tree[0]);
|
||||
}
|
||||
|
||||
get text() {
|
||||
let {startIndex, endIndex} = this;
|
||||
const length = endIndex - startIndex;
|
||||
let result = this.tree.textCallback(startIndex, null, endIndex);
|
||||
startIndex += result.length;
|
||||
while (startIndex < endIndex) {
|
||||
const string = this.tree.textCallback(startIndex, null, endIndex);
|
||||
if (string && string.length > 0) {
|
||||
startIndex += string.length;
|
||||
result += string;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (startIndex > endIndex) {
|
||||
result = result.slice(0, length);
|
||||
}
|
||||
return result;
|
||||
return getText(this.tree, this.startIndex, this.endIndex);
|
||||
}
|
||||
|
||||
isNamed() {
|
||||
|
|
@ -283,11 +257,7 @@ class Node {
|
|||
}
|
||||
|
||||
equals(other) {
|
||||
if (this === other) return true;
|
||||
for (let i = 0; i < 5; i++) {
|
||||
if (this[i] !== other[i]) return false;
|
||||
}
|
||||
return true;
|
||||
return this.id === other.id;
|
||||
}
|
||||
|
||||
child(index) {
|
||||
|
|
@ -526,9 +496,7 @@ class Node {
|
|||
|
||||
class TreeCursor {
|
||||
constructor(internal, tree) {
|
||||
if (internal !== INTERNAL) {
|
||||
throw new Error('Illegal constructor')
|
||||
}
|
||||
assertInternal(internal);
|
||||
this.tree = tree;
|
||||
unmarshalTreeCursor(this);
|
||||
}
|
||||
|
|
@ -536,6 +504,7 @@ class TreeCursor {
|
|||
delete() {
|
||||
marshalTreeCursor(this);
|
||||
C._ts_tree_cursor_delete_wasm(this.tree[0]);
|
||||
this[0] = this[1] = this[2] = 0;
|
||||
}
|
||||
|
||||
reset(node) {
|
||||
|
|
@ -569,6 +538,13 @@ class TreeCursor {
|
|||
return C._ts_tree_cursor_current_node_is_missing_wasm(this.tree[0]) === 1;
|
||||
}
|
||||
|
||||
get nodeText() {
|
||||
marshalTreeCursor(this);
|
||||
const startIndex = C._ts_tree_cursor_start_index_wasm(this.tree[0]);
|
||||
const endIndex = C._ts_tree_cursor_end_index_wasm(this.tree[0]);
|
||||
return getText(this.tree, startIndex, endIndex);
|
||||
}
|
||||
|
||||
get startPosition() {
|
||||
marshalTreeCursor(this);
|
||||
C._ts_tree_cursor_start_position_wasm(this.tree[0]);
|
||||
|
|
@ -630,9 +606,7 @@ class TreeCursor {
|
|||
|
||||
class Language {
|
||||
constructor(internal, address) {
|
||||
if (internal !== INTERNAL) {
|
||||
throw new Error('Illegal constructor')
|
||||
}
|
||||
assertInternal(internal);
|
||||
this[0] = address;
|
||||
this.types = new Array(C._ts_language_symbol_count(this[0]));
|
||||
for (let i = 0, n = this.types.length; i < n; i++) {
|
||||
|
|
@ -669,7 +643,217 @@ class Language {
|
|||
}
|
||||
|
||||
fieldNameForId(fieldId) {
|
||||
return this.fields[fieldName] || null;
|
||||
return this.fields[fieldId] || null;
|
||||
}
|
||||
|
||||
query(source) {
|
||||
const sourceLength = lengthBytesUTF8(source);
|
||||
const sourceAddress = C._malloc(sourceLength + 1);
|
||||
stringToUTF8(source, sourceAddress, sourceLength + 1);
|
||||
const address = C._ts_query_new(
|
||||
this[0],
|
||||
sourceAddress,
|
||||
sourceLength,
|
||||
TRANSFER_BUFFER,
|
||||
TRANSFER_BUFFER + SIZE_OF_INT
|
||||
);
|
||||
|
||||
if (!address) {
|
||||
const errorId = getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
|
||||
const errorByte = getValue(TRANSFER_BUFFER, 'i32');
|
||||
const errorIndex = UTF8ToString(sourceAddress, errorByte).length;
|
||||
const suffix = source.substr(errorIndex, 100).split('\n')[0];
|
||||
let word = suffix.match(QUERY_WORD_REGEX)[0];
|
||||
let error;
|
||||
switch (errorId) {
|
||||
case 2:
|
||||
error = new RangeError(`Bad node name '${word}'`);
|
||||
break;
|
||||
case 3:
|
||||
error = new RangeError(`Bad field name '${word}'`);
|
||||
break;
|
||||
case 4:
|
||||
error = new RangeError(`Bad capture name @${word}`);
|
||||
break;
|
||||
case 5:
|
||||
error = new TypeError(`Bad pattern structure at offset ${errorIndex}: '${suffix}'...`);
|
||||
word = "";
|
||||
break;
|
||||
default:
|
||||
error = new SyntaxError(`Bad syntax at offset ${errorIndex}: '${suffix}'...`);
|
||||
word = "";
|
||||
break;
|
||||
}
|
||||
error.index = errorIndex;
|
||||
error.length = word.length;
|
||||
C._free(sourceAddress);
|
||||
throw error;
|
||||
}
|
||||
|
||||
const stringCount = C._ts_query_string_count(address);
|
||||
const captureCount = C._ts_query_capture_count(address);
|
||||
const patternCount = C._ts_query_pattern_count(address);
|
||||
const captureNames = new Array(captureCount);
|
||||
const stringValues = new Array(stringCount);
|
||||
|
||||
for (let i = 0; i < captureCount; i++) {
|
||||
const nameAddress = C._ts_query_capture_name_for_id(
|
||||
address,
|
||||
i,
|
||||
TRANSFER_BUFFER
|
||||
);
|
||||
const nameLength = getValue(TRANSFER_BUFFER, 'i32');
|
||||
captureNames[i] = UTF8ToString(nameAddress, nameLength);
|
||||
}
|
||||
|
||||
for (let i = 0; i < stringCount; i++) {
|
||||
const valueAddress = C._ts_query_string_value_for_id(
|
||||
address,
|
||||
i,
|
||||
TRANSFER_BUFFER
|
||||
);
|
||||
const nameLength = getValue(TRANSFER_BUFFER, 'i32');
|
||||
stringValues[i] = UTF8ToString(valueAddress, nameLength);
|
||||
}
|
||||
|
||||
const setProperties = new Array(patternCount);
|
||||
const assertedProperties = new Array(patternCount);
|
||||
const refutedProperties = new Array(patternCount);
|
||||
const predicates = new Array(patternCount);
|
||||
const textPredicates = new Array(patternCount);
|
||||
for (let i = 0; i < patternCount; i++) {
|
||||
const predicatesAddress = C._ts_query_predicates_for_pattern(
|
||||
address,
|
||||
i,
|
||||
TRANSFER_BUFFER
|
||||
);
|
||||
const stepCount = getValue(TRANSFER_BUFFER, 'i32');
|
||||
|
||||
predicates[i] = [];
|
||||
textPredicates[i] = [];
|
||||
|
||||
const steps = [];
|
||||
let stepAddress = predicatesAddress;
|
||||
for (let j = 0; j < stepCount; j++) {
|
||||
const stepType = getValue(stepAddress, 'i32');
|
||||
stepAddress += SIZE_OF_INT;
|
||||
const stepValueId = getValue(stepAddress, 'i32');
|
||||
stepAddress += SIZE_OF_INT;
|
||||
if (stepType === PREDICATE_STEP_TYPE_CAPTURE) {
|
||||
steps.push({type: 'capture', name: captureNames[stepValueId]});
|
||||
} else if (stepType === PREDICATE_STEP_TYPE_STRING) {
|
||||
steps.push({type: 'string', value: stringValues[stepValueId]});
|
||||
} else if (steps.length > 0) {
|
||||
if (steps[0].type !== 'string') {
|
||||
throw new Error('Predicates must begin with a literal value');
|
||||
}
|
||||
const operator = steps[0].value;
|
||||
let isPositive = true;
|
||||
switch (operator) {
|
||||
case 'not-eq?':
|
||||
isPositive = false;
|
||||
case 'eq?':
|
||||
if (steps.length !== 3) throw new Error(
|
||||
`Wrong number of arguments to \`#eq?\` predicate. Expected 2, got ${steps.length - 1}`
|
||||
);
|
||||
if (steps[1].type !== 'capture') throw new Error(
|
||||
`First argument of \`#eq?\` predicate must be a capture. Got "${steps[1].value}"`
|
||||
);
|
||||
if (steps[2].type === 'capture') {
|
||||
const captureName1 = steps[1].name;
|
||||
const captureName2 = steps[2].name;
|
||||
textPredicates[i].push(function(captures) {
|
||||
let node1, node2
|
||||
for (const c of captures) {
|
||||
if (c.name === captureName1) node1 = c.node;
|
||||
if (c.name === captureName2) node2 = c.node;
|
||||
}
|
||||
return (node1.text === node2.text) === isPositive;
|
||||
});
|
||||
} else {
|
||||
const captureName = steps[1].name;
|
||||
const stringValue = steps[2].value;
|
||||
textPredicates[i].push(function(captures) {
|
||||
for (const c of captures) {
|
||||
if (c.name === captureName) {
|
||||
return (c.node.text === stringValue) === isPositive;
|
||||
};
|
||||
}
|
||||
return false;
|
||||
});
|
||||
}
|
||||
break;
|
||||
|
||||
case 'not-match?':
|
||||
isPositive = false;
|
||||
case 'match?':
|
||||
if (steps.length !== 3) throw new Error(
|
||||
`Wrong number of arguments to \`#match?\` predicate. Expected 2, got ${steps.length - 1}.`
|
||||
);
|
||||
if (steps[1].type !== 'capture') throw new Error(
|
||||
`First argument of \`#match?\` predicate must be a capture. Got "${steps[1].value}".`
|
||||
);
|
||||
if (steps[2].type !== 'string') throw new Error(
|
||||
`Second argument of \`#match?\` predicate must be a string. Got @${steps[2].value}.`
|
||||
);
|
||||
const captureName = steps[1].name;
|
||||
const regex = new RegExp(steps[2].value);
|
||||
textPredicates[i].push(function(captures) {
|
||||
for (const c of captures) {
|
||||
if (c.name === captureName) return regex.test(c.node.text) === isPositive;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
break;
|
||||
|
||||
case 'set!':
|
||||
if (steps.length < 2 || steps.length > 3) throw new Error(
|
||||
`Wrong number of arguments to \`#set!\` predicate. Expected 1 or 2. Got ${steps.length - 1}.`
|
||||
);
|
||||
if (steps.some(s => s.type !== 'string')) throw new Error(
|
||||
`Arguments to \`#set!\` predicate must be a strings.".`
|
||||
);
|
||||
if (!setProperties[i]) setProperties[i] = {};
|
||||
setProperties[i][steps[1].value] = steps[2] ? steps[2].value : null;
|
||||
break;
|
||||
|
||||
case 'is?':
|
||||
case 'is-not?':
|
||||
if (steps.length < 2 || steps.length > 3) throw new Error(
|
||||
`Wrong number of arguments to \`#${operator}\` predicate. Expected 1 or 2. Got ${steps.length - 1}.`
|
||||
);
|
||||
if (steps.some(s => s.type !== 'string')) throw new Error(
|
||||
`Arguments to \`#${operator}\` predicate must be a strings.".`
|
||||
);
|
||||
const properties = operator === 'is?' ? assertedProperties : refutedProperties;
|
||||
if (!properties[i]) properties[i] = {};
|
||||
properties[i][steps[1].value] = steps[2] ? steps[2].value : null;
|
||||
break;
|
||||
|
||||
default:
|
||||
predicates[i].push({operator, operands: steps.slice(1)});
|
||||
}
|
||||
|
||||
steps.length = 0;
|
||||
}
|
||||
}
|
||||
|
||||
Object.freeze(setProperties[i]);
|
||||
Object.freeze(assertedProperties[i]);
|
||||
Object.freeze(refutedProperties[i]);
|
||||
}
|
||||
|
||||
C._free(sourceAddress);
|
||||
return new Query(
|
||||
INTERNAL,
|
||||
address,
|
||||
captureNames,
|
||||
textPredicates,
|
||||
predicates,
|
||||
Object.freeze(setProperties),
|
||||
Object.freeze(assertedProperties),
|
||||
Object.freeze(refutedProperties)
|
||||
);
|
||||
}
|
||||
|
||||
static load(url) {
|
||||
|
|
@ -695,15 +879,172 @@ class Language {
|
|||
}
|
||||
|
||||
return bytes
|
||||
.then(bytes => loadWebAssemblyModule(bytes, {loadAsync: true}))
|
||||
.then(bytes => loadSideModule(bytes, {loadAsync: true}))
|
||||
.then(mod => {
|
||||
const functionName = Object.keys(mod).find(key => key.includes("tree_sitter_"));
|
||||
const symbolNames = Object.keys(mod)
|
||||
const functionName = symbolNames.find(key =>
|
||||
LANGUAGE_FUNCTION_REGEX.test(key) &&
|
||||
!key.includes("external_scanner_")
|
||||
);
|
||||
if (!functionName) {
|
||||
console.log(`Couldn't find language function in WASM file. Symbols:\n${JSON.stringify(symbolNames, null, 2)}`)
|
||||
}
|
||||
const languageAddress = mod[functionName]();
|
||||
return new Language(INTERNAL, languageAddress);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
class Query {
|
||||
constructor(
|
||||
internal, address, captureNames, textPredicates, predicates,
|
||||
setProperties, assertedProperties, refutedProperties
|
||||
) {
|
||||
assertInternal(internal);
|
||||
this[0] = address;
|
||||
this.captureNames = captureNames;
|
||||
this.textPredicates = textPredicates;
|
||||
this.predicates = predicates;
|
||||
this.setProperties = setProperties;
|
||||
this.assertedProperties = assertedProperties;
|
||||
this.refutedProperties = refutedProperties;
|
||||
}
|
||||
|
||||
delete() {
|
||||
C._ts_query_delete(this[0]);
|
||||
this[0] = 0;
|
||||
}
|
||||
|
||||
matches(node, startPosition, endPosition) {
|
||||
if (!startPosition) startPosition = ZERO_POINT;
|
||||
if (!endPosition) endPosition = ZERO_POINT;
|
||||
|
||||
marshalNode(node);
|
||||
|
||||
C._ts_query_matches_wasm(
|
||||
this[0],
|
||||
node.tree[0],
|
||||
startPosition.row,
|
||||
startPosition.column,
|
||||
endPosition.row,
|
||||
endPosition.column
|
||||
);
|
||||
|
||||
const count = getValue(TRANSFER_BUFFER, 'i32');
|
||||
const startAddress = getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
|
||||
const result = new Array(count);
|
||||
|
||||
let address = startAddress;
|
||||
for (let i = 0; i < count; i++) {
|
||||
const pattern = getValue(address, 'i32');
|
||||
address += SIZE_OF_INT;
|
||||
const captureCount = getValue(address, 'i32');
|
||||
address += SIZE_OF_INT;
|
||||
|
||||
const captures = new Array(captureCount);
|
||||
address = unmarshalCaptures(this, node.tree, address, captures);
|
||||
if (this.textPredicates[pattern].every(p => p(captures))) {
|
||||
result[i] = {pattern, captures};
|
||||
const setProperties = this.setProperties[pattern];
|
||||
if (setProperties) result[i].setProperties = setProperties;
|
||||
const assertedProperties = this.assertedProperties[pattern];
|
||||
if (assertedProperties) result[i].assertedProperties = assertedProperties;
|
||||
const refutedProperties = this.refutedProperties[pattern];
|
||||
if (refutedProperties) result[i].refutedProperties = refutedProperties;
|
||||
}
|
||||
}
|
||||
|
||||
C._free(startAddress);
|
||||
return result;
|
||||
}
|
||||
|
||||
captures(node, startPosition, endPosition) {
|
||||
if (!startPosition) startPosition = ZERO_POINT;
|
||||
if (!endPosition) endPosition = ZERO_POINT;
|
||||
|
||||
marshalNode(node);
|
||||
|
||||
C._ts_query_captures_wasm(
|
||||
this[0],
|
||||
node.tree[0],
|
||||
startPosition.row,
|
||||
startPosition.column,
|
||||
endPosition.row,
|
||||
endPosition.column
|
||||
);
|
||||
|
||||
const count = getValue(TRANSFER_BUFFER, 'i32');
|
||||
const startAddress = getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
|
||||
const result = [];
|
||||
|
||||
const captures = [];
|
||||
let address = startAddress;
|
||||
for (let i = 0; i < count; i++) {
|
||||
const pattern = getValue(address, 'i32');
|
||||
address += SIZE_OF_INT;
|
||||
const captureCount = getValue(address, 'i32');
|
||||
address += SIZE_OF_INT;
|
||||
const captureIndex = getValue(address, 'i32');
|
||||
address += SIZE_OF_INT;
|
||||
|
||||
captures.length = captureCount
|
||||
address = unmarshalCaptures(this, node.tree, address, captures);
|
||||
|
||||
if (this.textPredicates[pattern].every(p => p(captures))) {
|
||||
const capture = captures[captureIndex];
|
||||
const setProperties = this.setProperties[pattern];
|
||||
if (setProperties) capture.setProperties = setProperties;
|
||||
const assertedProperties = this.assertedProperties[pattern];
|
||||
if (assertedProperties) capture.assertedProperties = assertedProperties;
|
||||
const refutedProperties = this.refutedProperties[pattern];
|
||||
if (refutedProperties) capture.refutedProperties = refutedProperties;
|
||||
result.push(capture);
|
||||
}
|
||||
}
|
||||
|
||||
C._free(startAddress);
|
||||
return result;
|
||||
}
|
||||
|
||||
predicatesForPattern(patternIndex) {
|
||||
return this.predicates[patternIndex]
|
||||
}
|
||||
}
|
||||
|
||||
function getText(tree, startIndex, endIndex) {
|
||||
const length = endIndex - startIndex;
|
||||
let result = tree.textCallback(startIndex, null, endIndex);
|
||||
startIndex += result.length;
|
||||
while (startIndex < endIndex) {
|
||||
const string = tree.textCallback(startIndex, null, endIndex);
|
||||
if (string && string.length > 0) {
|
||||
startIndex += string.length;
|
||||
result += string;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (startIndex > endIndex) {
|
||||
result = result.slice(0, length);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
function unmarshalCaptures(query, tree, address, result) {
|
||||
for (let i = 0, n = result.length; i < n; i++) {
|
||||
const captureIndex = getValue(address, 'i32');
|
||||
address += SIZE_OF_INT;
|
||||
const node = unmarshalNode(tree, address);
|
||||
address += SIZE_OF_NODE;
|
||||
result[i] = {name: query.captureNames[captureIndex], node};
|
||||
}
|
||||
return address;
|
||||
}
|
||||
|
||||
function assertInternal(x) {
|
||||
if (x !== INTERNAL) throw new Error('Illegal constructor')
|
||||
}
|
||||
|
||||
function isPoint(point) {
|
||||
return (
|
||||
point &&
|
||||
|
|
@ -714,22 +1055,36 @@ function isPoint(point) {
|
|||
|
||||
function marshalNode(node) {
|
||||
let address = TRANSFER_BUFFER;
|
||||
for (let i = 0; i < 5; i++) {
|
||||
setValue(address, node[i], 'i32');
|
||||
address += SIZE_OF_INT;
|
||||
}
|
||||
setValue(address, node.id, 'i32');
|
||||
address += SIZE_OF_INT;
|
||||
setValue(address, node.startIndex, 'i32');
|
||||
address += SIZE_OF_INT;
|
||||
setValue(address, node.startPosition.row, 'i32');
|
||||
address += SIZE_OF_INT;
|
||||
setValue(address, node.startPosition.column, 'i32');
|
||||
address += SIZE_OF_INT;
|
||||
setValue(address, node[0], 'i32');
|
||||
}
|
||||
|
||||
function unmarshalNode(tree, address = TRANSFER_BUFFER) {
|
||||
const id = getValue(address, 'i32');
|
||||
if (id === 0) return null;
|
||||
const result = new Node(INTERNAL, tree);
|
||||
result[0] = id;
|
||||
address += SIZE_OF_INT;
|
||||
for (let i = 1; i < 5; i++) {
|
||||
result[i] = getValue(address, 'i32');
|
||||
address += SIZE_OF_INT;
|
||||
}
|
||||
if (id === 0) return null;
|
||||
|
||||
const index = getValue(address, 'i32');
|
||||
address += SIZE_OF_INT;
|
||||
const row = getValue(address, 'i32');
|
||||
address += SIZE_OF_INT;
|
||||
const column = getValue(address, 'i32');
|
||||
address += SIZE_OF_INT;
|
||||
const other = getValue(address, 'i32');
|
||||
|
||||
const result = new Node(INTERNAL, tree);
|
||||
result.id = id;
|
||||
result.startIndex = index;
|
||||
result.startPosition = {row, column};
|
||||
result[0] = other;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -784,7 +1139,3 @@ function marshalEdit(edit) {
|
|||
}
|
||||
|
||||
Parser.Language = Language;
|
||||
|
||||
return Parser;
|
||||
|
||||
}));
|
||||
|
|
|
|||
|
|
@ -15,7 +15,6 @@
|
|||
"__ZNSt3__212basic_stringIwNS_11char_traitsIwEENS_9allocatorIwEEED2Ev",
|
||||
"__ZdlPv",
|
||||
"__Znwm",
|
||||
"___assert_fail",
|
||||
"_abort",
|
||||
"_iswalnum",
|
||||
"_iswalpha",
|
||||
|
|
@ -68,6 +67,18 @@
|
|||
"_ts_parser_new_wasm",
|
||||
"_ts_parser_parse_wasm",
|
||||
"_ts_parser_set_language",
|
||||
"_ts_parser_set_timeout_micros",
|
||||
"_ts_parser_timeout_micros",
|
||||
"_ts_query_capture_count",
|
||||
"_ts_query_capture_name_for_id",
|
||||
"_ts_query_captures_wasm",
|
||||
"_ts_query_delete",
|
||||
"_ts_query_matches_wasm",
|
||||
"_ts_query_new",
|
||||
"_ts_query_pattern_count",
|
||||
"_ts_query_predicates_for_pattern",
|
||||
"_ts_query_string_count",
|
||||
"_ts_query_string_value_for_id",
|
||||
"_ts_tree_cursor_current_field_id_wasm",
|
||||
"_ts_tree_cursor_current_node_id_wasm",
|
||||
"_ts_tree_cursor_current_node_is_missing_wasm",
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ mergeInto(LibraryManager.library, {
|
|||
}
|
||||
},
|
||||
|
||||
tree_sitter_log_callback: function(_payload, isLexMessage, messageAddress) {
|
||||
tree_sitter_log_callback: function(isLexMessage, messageAddress) {
|
||||
if (currentLogCallback) {
|
||||
const message = UTF8ToString(messageAddress);
|
||||
currentLogCallback(message, isLexMessage !== 0);
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "web-tree-sitter",
|
||||
"version": "0.15.9",
|
||||
"version": "0.17.1",
|
||||
"description": "Tree-sitter bindings for the web",
|
||||
"main": "tree-sitter.js",
|
||||
"types": "tree-sitter-web.d.ts",
|
||||
|
|
|
|||
2
lib/binding_web/suffix.js
Normal file
2
lib/binding_web/suffix.js
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
return Parser;
|
||||
}));
|
||||
|
|
@ -388,4 +388,24 @@ describe("Node", () => {
|
|||
assert.throws(() => number.closest({a: 1}), /Argument must be a string or array of strings/)
|
||||
});
|
||||
});
|
||||
|
||||
describe('.equals(other)', () => {
|
||||
it('returns true if the nodes are the same', () => {
|
||||
tree = parser.parse('1 + 2');
|
||||
|
||||
const sumNode = tree.rootNode.firstChild.firstChild;
|
||||
const node1 = sumNode.firstChild;
|
||||
const node2 = sumNode.firstChild;
|
||||
assert(node1.equals(node2));
|
||||
});
|
||||
|
||||
it('returns false if the nodes are not the same', () => {
|
||||
tree = parser.parse('1 + 2');
|
||||
|
||||
const sumNode = tree.rootNode.firstChild.firstChild;
|
||||
const node1 = sumNode.firstChild;
|
||||
const node2 = node1.nextSibling;
|
||||
assert(!node1.equals(node2));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue