Merge branch 'master' into m-novikov-add-parsers
This commit is contained in:
commit
e7dcd2b7c4
57 changed files with 822 additions and 353 deletions
2
.github/workflows/ci.yml
vendored
2
.github/workflows/ci.yml
vendored
|
|
@ -36,7 +36,7 @@ jobs:
|
|||
|
||||
- name: Read Emscripten version
|
||||
run: |
|
||||
printf 'EMSCRIPTEN_VERSION=%s\n' "$(cat emscripten-version)" >> $GITHUB_ENV
|
||||
printf 'EMSCRIPTEN_VERSION=%s\n' "$(cat cli/emscripten-version)" >> $GITHUB_ENV
|
||||
|
||||
- name: Cache artifacts
|
||||
id: cache
|
||||
|
|
|
|||
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -2,6 +2,7 @@ log*.html
|
|||
|
||||
.idea
|
||||
*.xcodeproj
|
||||
.vscode
|
||||
|
||||
fuzz-results
|
||||
|
||||
|
|
|
|||
12
Cargo.lock
generated
12
Cargo.lock
generated
|
|
@ -495,6 +495,12 @@ dependencies = [
|
|||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustc-hash"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.5"
|
||||
|
|
@ -541,9 +547,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "smallbitvec"
|
||||
version = "2.5.0"
|
||||
version = "2.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "797a4eaffb90d896f29698d45676f9f940a71936d7574996a7df54593ba209fa"
|
||||
checksum = "75ce4f9dc4a41b4c3476cc925f1efb11b66df373a8fde5d4b8915fa91b5d995e"
|
||||
|
||||
[[package]]
|
||||
name = "spin"
|
||||
|
|
@ -689,11 +695,13 @@ dependencies = [
|
|||
"dirs",
|
||||
"glob",
|
||||
"html-escape",
|
||||
"indexmap",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"rand",
|
||||
"regex",
|
||||
"regex-syntax",
|
||||
"rustc-hash",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
"serde_json",
|
||||
|
|
|
|||
2
LICENSE
2
LICENSE
|
|
@ -1,6 +1,6 @@
|
|||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2018 Max Brunsfeld
|
||||
Copyright (c) 2018-2021 Max Brunsfeld
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
|
|
|||
|
|
@ -27,40 +27,42 @@ difference = "2.0"
|
|||
dirs = "3.0"
|
||||
glob = "0.3.0"
|
||||
html-escape = "0.2.6"
|
||||
indexmap = "1"
|
||||
lazy_static = "1.2.0"
|
||||
regex = "1"
|
||||
regex-syntax = "0.6.4"
|
||||
rustc-hash = "1"
|
||||
serde = "1.0"
|
||||
serde_derive = "1.0"
|
||||
smallbitvec = "2.3.0"
|
||||
smallbitvec = "2.5.1"
|
||||
tiny_http = "0.8"
|
||||
walkdir = "2.3"
|
||||
webbrowser = "0.5.1"
|
||||
which = "4.1.0"
|
||||
|
||||
[dependencies.tree-sitter]
|
||||
version = ">= 0.17.0"
|
||||
version = "0.20"
|
||||
path = "../lib"
|
||||
|
||||
[dev-dependencies.tree-sitter]
|
||||
version = ">= 0.17.0"
|
||||
version = "0.20"
|
||||
path = "../lib"
|
||||
features = ["allocation-tracking"]
|
||||
|
||||
[dependencies.tree-sitter-config]
|
||||
version = ">= 0.19.0"
|
||||
version = "0.19.0"
|
||||
path = "config"
|
||||
|
||||
[dependencies.tree-sitter-highlight]
|
||||
version = ">= 0.3.0"
|
||||
version = "0.20"
|
||||
path = "../highlight"
|
||||
|
||||
[dependencies.tree-sitter-loader]
|
||||
version = ">= 0.19.0"
|
||||
version = "0.19.0"
|
||||
path = "loader"
|
||||
|
||||
[dependencies.tree-sitter-tags]
|
||||
version = ">= 0.1.0"
|
||||
version = "0.20"
|
||||
path = "../tags"
|
||||
|
||||
[dependencies.serde_json]
|
||||
|
|
|
|||
|
|
@ -36,4 +36,4 @@ The `tree-sitter` binary itself has no dependencies, but specific commands have
|
|||
|
||||
* `test` - The `tree-sitter test` command will run the unit tests for the Tree-sitter parser in the current working directory. See [the documentation](http://tree-sitter.github.io/tree-sitter/creating-parsers) for more information.
|
||||
|
||||
* `parse` - The `tree-sitter parse` command will parse a file (or list of file) using Tree-sitter parsers.
|
||||
* `parse` - The `tree-sitter parse` command will parse a file (or list of files) using Tree-sitter parsers.
|
||||
|
|
|
|||
11
cli/build.rs
11
cli/build.rs
|
|
@ -6,7 +6,7 @@ fn main() {
|
|||
println!("cargo:rustc-env={}={}", "BUILD_SHA", git_sha);
|
||||
}
|
||||
|
||||
if wasm_files_present() {
|
||||
if web_playground_files_present() {
|
||||
println!("cargo:rustc-cfg={}", "TREE_SITTER_EMBED_WASM_BINDING");
|
||||
}
|
||||
|
||||
|
|
@ -16,15 +16,16 @@ fn main() {
|
|||
"RUST_BINDING_VERSION", rust_binding_version,
|
||||
);
|
||||
|
||||
let emscripten_version = fs::read_to_string("../emscripten-version").unwrap();
|
||||
let emscripten_version = fs::read_to_string("emscripten-version").unwrap();
|
||||
println!(
|
||||
"cargo:rustc-env={}={}",
|
||||
"EMSCRIPTEN_VERSION", emscripten_version,
|
||||
);
|
||||
}
|
||||
|
||||
fn wasm_files_present() -> bool {
|
||||
fn web_playground_files_present() -> bool {
|
||||
let paths = [
|
||||
"../docs/assets/js/playground.js",
|
||||
"../lib/binding_web/tree-sitter.js",
|
||||
"../lib/binding_web/tree-sitter.wasm",
|
||||
];
|
||||
|
|
@ -81,10 +82,10 @@ fn read_git_sha() -> Option<String> {
|
|||
}
|
||||
|
||||
fn read_rust_binding_version() -> String {
|
||||
let path = "../lib/Cargo.toml";
|
||||
let path = "Cargo.toml";
|
||||
let text = fs::read_to_string(path).unwrap();
|
||||
let cargo_toml = toml::from_str::<toml::Value>(text.as_ref()).unwrap();
|
||||
cargo_toml["package"]["version"]
|
||||
cargo_toml["dependencies"]["tree-sitter"]["version"]
|
||||
.as_str()
|
||||
.unwrap()
|
||||
.trim_matches('"')
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
//! Manages tree-sitter's configuration file.
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
use std::path::PathBuf;
|
||||
|
|
@ -14,6 +14,7 @@ use std::{env, fs};
|
|||
/// This type holds the generic JSON content of the configuration file. Individual tree-sitter
|
||||
/// components will use the [`get`][] method to parse that JSON to extract configuration fields
|
||||
/// that are specific to that component.
|
||||
#[derive(Debug)]
|
||||
pub struct Config {
|
||||
pub location: PathBuf,
|
||||
pub config: Value,
|
||||
|
|
@ -64,8 +65,10 @@ impl Config {
|
|||
Some(location) => location,
|
||||
None => return Config::initial(),
|
||||
};
|
||||
let content = fs::read_to_string(&location)?;
|
||||
let config = serde_json::from_str(&content)?;
|
||||
let content = fs::read_to_string(&location)
|
||||
.with_context(|| format!("Failed to read {}", &location.to_string_lossy()))?;
|
||||
let config = serde_json::from_str(&content)
|
||||
.with_context(|| format!("Bad JSON config {}", &location.to_string_lossy()))?;
|
||||
Ok(Config { location, config })
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,13 +25,13 @@ version = "1.0"
|
|||
features = ["preserve_order"]
|
||||
|
||||
[dependencies.tree-sitter]
|
||||
version = ">= 0.19"
|
||||
version = "0.20"
|
||||
path = "../../lib"
|
||||
|
||||
[dependencies.tree-sitter-highlight]
|
||||
version = ">= 0.19"
|
||||
version = "0.20"
|
||||
path = "../../highlight"
|
||||
|
||||
[dependencies.tree-sitter-tags]
|
||||
version = ">= 0.19"
|
||||
version = "0.20"
|
||||
path = "../../tags"
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ use std::process::Command;
|
|||
use std::sync::Mutex;
|
||||
use std::time::SystemTime;
|
||||
use std::{fs, mem};
|
||||
use tree_sitter::{Language, QueryError};
|
||||
use tree_sitter::{Language, QueryError, QueryErrorKind};
|
||||
use tree_sitter_highlight::HighlightConfiguration;
|
||||
use tree_sitter_tags::{Error as TagsError, TagsConfiguration};
|
||||
|
||||
|
|
@ -101,6 +101,7 @@ pub struct Loader {
|
|||
language_configuration_ids_by_file_type: HashMap<String, Vec<usize>>,
|
||||
highlight_names: Box<Mutex<Vec<String>>>,
|
||||
use_all_highlight_names: bool,
|
||||
debug_build: bool,
|
||||
}
|
||||
|
||||
unsafe impl Send for Loader {}
|
||||
|
|
@ -122,6 +123,7 @@ impl Loader {
|
|||
language_configuration_ids_by_file_type: HashMap::new(),
|
||||
highlight_names: Box::new(Mutex::new(Vec::new())),
|
||||
use_all_highlight_names: true,
|
||||
debug_build: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -347,7 +349,11 @@ impl Loader {
|
|||
parser_path: &Path,
|
||||
scanner_path: &Option<PathBuf>,
|
||||
) -> Result<Language> {
|
||||
let mut library_path = self.parser_lib_path.join(name);
|
||||
let mut lib_name = name.to_string();
|
||||
if self.debug_build {
|
||||
lib_name.push_str(".debug._");
|
||||
}
|
||||
let mut library_path = self.parser_lib_path.join(lib_name);
|
||||
library_path.set_extension(DYLIB_EXTENSION);
|
||||
|
||||
let recompile = needs_recompile(&library_path, &parser_path, &scanner_path)
|
||||
|
|
@ -369,11 +375,13 @@ impl Loader {
|
|||
}
|
||||
|
||||
if cfg!(windows) {
|
||||
command
|
||||
.args(&["/nologo", "/LD", "/I"])
|
||||
.arg(header_path)
|
||||
.arg("/Od")
|
||||
.arg(parser_path);
|
||||
command.args(&["/nologo", "/LD", "/I"]).arg(header_path);
|
||||
if self.debug_build {
|
||||
command.arg("/Od");
|
||||
} else {
|
||||
command.arg("/O2");
|
||||
}
|
||||
command.arg(parser_path);
|
||||
if let Some(scanner_path) = scanner_path.as_ref() {
|
||||
command.arg(scanner_path);
|
||||
}
|
||||
|
|
@ -389,8 +397,18 @@ impl Loader {
|
|||
.arg("-I")
|
||||
.arg(header_path)
|
||||
.arg("-o")
|
||||
.arg(&library_path)
|
||||
.arg("-O2");
|
||||
.arg(&library_path);
|
||||
|
||||
if self.debug_build {
|
||||
command.arg("-O0");
|
||||
} else {
|
||||
command.arg("-O2");
|
||||
}
|
||||
|
||||
// For conditional compilation of external scanner code when
|
||||
// used internally by `tree-siteer parse` and other sub commands.
|
||||
command.arg("-DTREE_SITTER_INTERNAL_BUILD");
|
||||
|
||||
if let Some(scanner_path) = scanner_path.as_ref() {
|
||||
if scanner_path.extension() == Some("c".as_ref()) {
|
||||
command.arg("-xc").arg("-std=c99").arg(scanner_path);
|
||||
|
|
@ -639,6 +657,10 @@ impl Loader {
|
|||
Err(anyhow!("No language found"))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn use_debug_build(&mut self, flag: bool) {
|
||||
self.debug_build = flag;
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> LanguageConfiguration<'a> {
|
||||
|
|
@ -662,28 +684,31 @@ impl<'a> LanguageConfiguration<'a> {
|
|||
&injections_query,
|
||||
&locals_query,
|
||||
)
|
||||
.map_err(|error| {
|
||||
if error.offset < injections_query.len() {
|
||||
Self::include_path_in_query_error(
|
||||
error,
|
||||
&injection_ranges,
|
||||
&injections_query,
|
||||
0,
|
||||
)
|
||||
} else if error.offset < injections_query.len() + locals_query.len() {
|
||||
Self::include_path_in_query_error(
|
||||
error,
|
||||
&locals_ranges,
|
||||
&locals_query,
|
||||
injections_query.len(),
|
||||
)
|
||||
} else {
|
||||
Self::include_path_in_query_error(
|
||||
error,
|
||||
&highlight_ranges,
|
||||
&highlights_query,
|
||||
injections_query.len() + locals_query.len(),
|
||||
)
|
||||
.map_err(|error| match error.kind {
|
||||
QueryErrorKind::Language => Error::from(error),
|
||||
_ => {
|
||||
if error.offset < injections_query.len() {
|
||||
Self::include_path_in_query_error(
|
||||
error,
|
||||
&injection_ranges,
|
||||
&injections_query,
|
||||
0,
|
||||
)
|
||||
} else if error.offset < injections_query.len() + locals_query.len() {
|
||||
Self::include_path_in_query_error(
|
||||
error,
|
||||
&locals_ranges,
|
||||
&locals_query,
|
||||
injections_query.len(),
|
||||
)
|
||||
} else {
|
||||
Self::include_path_in_query_error(
|
||||
error,
|
||||
&highlight_ranges,
|
||||
&highlights_query,
|
||||
injections_query.len() + locals_query.len(),
|
||||
)
|
||||
}
|
||||
}
|
||||
})?;
|
||||
let mut all_highlight_names = self.highlight_names.lock().unwrap();
|
||||
|
|
|
|||
1
cli/npm/.gitignore
vendored
1
cli/npm/.gitignore
vendored
|
|
@ -2,3 +2,4 @@ tree-sitter
|
|||
tree-sitter.exe
|
||||
*.gz
|
||||
*.tgz
|
||||
LICENSE
|
||||
|
|
|
|||
|
|
@ -14,7 +14,8 @@
|
|||
],
|
||||
"main": "lib/api/index.js",
|
||||
"scripts": {
|
||||
"install": "node install.js"
|
||||
"install": "node install.js",
|
||||
"prepack": "cp ../../LICENSE ."
|
||||
},
|
||||
"bin": {
|
||||
"tree-sitter": "cli.js"
|
||||
|
|
|
|||
|
|
@ -347,7 +347,7 @@ fn lex_states_differ(
|
|||
fn sort_states(table: &mut LexTable, parse_table: &mut ParseTable) {
|
||||
// Get a mapping of old state index -> new_state_index
|
||||
let mut old_ids_by_new_id = (0..table.states.len()).collect::<Vec<_>>();
|
||||
&old_ids_by_new_id[1..].sort_by_key(|id| &table.states[*id]);
|
||||
old_ids_by_new_id[1..].sort_by_key(|id| &table.states[*id]);
|
||||
|
||||
// Get the inverse mapping
|
||||
let mut new_ids_by_old_id = vec![0; old_ids_by_new_id.len()];
|
||||
|
|
|
|||
|
|
@ -11,10 +11,14 @@ use crate::generate::tables::{
|
|||
ProductionInfo, ProductionInfoId,
|
||||
};
|
||||
use anyhow::{anyhow, Result};
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
|
||||
use std::fmt::Write;
|
||||
use std::hash::BuildHasherDefault;
|
||||
use std::u32;
|
||||
use std::{cmp::Ordering, collections::hash_map::Entry};
|
||||
|
||||
use indexmap::{map::Entry, IndexMap};
|
||||
use rustc_hash::FxHasher;
|
||||
|
||||
// For conflict reporting, each parse state is associated with an example
|
||||
// sequence of symbols that could lead to that parse state.
|
||||
|
|
@ -49,7 +53,7 @@ struct ParseTableBuilder<'a> {
|
|||
lexical_grammar: &'a LexicalGrammar,
|
||||
variable_info: &'a Vec<VariableInfo>,
|
||||
core_ids_by_core: HashMap<ParseItemSetCore<'a>, usize>,
|
||||
state_ids_by_item_set: HashMap<ParseItemSet<'a>, ParseStateId>,
|
||||
state_ids_by_item_set: IndexMap<ParseItemSet<'a>, ParseStateId, BuildHasherDefault<FxHasher>>,
|
||||
parse_state_info_by_id: Vec<ParseStateInfo<'a>>,
|
||||
parse_state_queue: VecDeque<ParseStateQueueEntry>,
|
||||
non_terminal_extra_states: Vec<(Symbol, usize)>,
|
||||
|
|
@ -147,13 +151,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
Entry::Vacant(v) => {
|
||||
let core = v.key().core();
|
||||
let core_count = self.core_ids_by_core.len();
|
||||
let core_id = match self.core_ids_by_core.entry(core) {
|
||||
Entry::Occupied(e) => *e.get(),
|
||||
Entry::Vacant(e) => {
|
||||
e.insert(core_count);
|
||||
core_count
|
||||
}
|
||||
};
|
||||
let core_id = *self.core_ids_by_core.entry(core).or_insert(core_count);
|
||||
|
||||
let state_id = self.parse_table.states.len();
|
||||
self.parse_state_info_by_id
|
||||
|
|
@ -163,8 +161,8 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
id: state_id,
|
||||
lex_state_id: 0,
|
||||
external_lex_state_id: 0,
|
||||
terminal_entries: HashMap::new(),
|
||||
nonterminal_entries: HashMap::new(),
|
||||
terminal_entries: IndexMap::default(),
|
||||
nonterminal_entries: IndexMap::default(),
|
||||
core_id,
|
||||
});
|
||||
self.parse_state_queue.push_back(ParseStateQueueEntry {
|
||||
|
|
@ -981,7 +979,7 @@ pub(crate) fn build_parse_table<'a>(
|
|||
item_set_builder,
|
||||
variable_info,
|
||||
non_terminal_extra_states: Vec::new(),
|
||||
state_ids_by_item_set: HashMap::new(),
|
||||
state_ids_by_item_set: IndexMap::default(),
|
||||
core_ids_by_core: HashMap::new(),
|
||||
parse_state_info_by_id: Vec::new(),
|
||||
parse_state_queue: VecDeque::new(),
|
||||
|
|
|
|||
|
|
@ -479,7 +479,7 @@ impl<'a> Minimizer<'a> {
|
|||
fn reorder_states_by_descending_size(&mut self) {
|
||||
// Get a mapping of old state index -> new_state_index
|
||||
let mut old_ids_by_new_id = (0..self.parse_table.states.len()).collect::<Vec<_>>();
|
||||
&old_ids_by_new_id.sort_unstable_by_key(|i| {
|
||||
old_ids_by_new_id.sort_unstable_by_key(|i| {
|
||||
// Don't changes states 0 (the error state) or 1 (the start state).
|
||||
if *i <= 1 {
|
||||
return *i as i64 - 1_000_000;
|
||||
|
|
|
|||
|
|
@ -169,6 +169,7 @@ fn load_grammar_file(grammar_path: &Path) -> Result<String> {
|
|||
}
|
||||
|
||||
fn load_js_grammar_file(grammar_path: &Path) -> Result<String> {
|
||||
let grammar_path = fs::canonicalize(grammar_path)?;
|
||||
let mut node_process = Command::new("node")
|
||||
.env("TREE_SITTER_GRAMMAR_PATH", grammar_path)
|
||||
.stdin(Stdio::piped())
|
||||
|
|
|
|||
|
|
@ -19,10 +19,16 @@ lazy_static! {
|
|||
serde_json::from_str(UNICODE_CATEGORIES_JSON).unwrap();
|
||||
static ref UNICODE_PROPERTIES: HashMap<&'static str, Vec<u32>> =
|
||||
serde_json::from_str(UNICODE_PROPERTIES_JSON).unwrap();
|
||||
static ref UNICODE_CATEGORY_ALIASES: HashMap<&'static str, String> =
|
||||
serde_json::from_str(UNICODE_CATEGORY_ALIASES_JSON).unwrap();
|
||||
static ref UNICODE_PROPERTY_ALIASES: HashMap<&'static str, String> =
|
||||
serde_json::from_str(UNICODE_PROPERTY_ALIASES_JSON).unwrap();
|
||||
}
|
||||
|
||||
const UNICODE_CATEGORIES_JSON: &'static str = include_str!("./unicode-categories.json");
|
||||
const UNICODE_PROPERTIES_JSON: &'static str = include_str!("./unicode-properties.json");
|
||||
const UNICODE_CATEGORY_ALIASES_JSON: &'static str = include_str!("./unicode-category-aliases.json");
|
||||
const UNICODE_PROPERTY_ALIASES_JSON: &'static str = include_str!("./unicode-property-aliases.json");
|
||||
const ALLOWED_REDUNDANT_ESCAPED_CHARS: [char; 4] = ['!', '\'', '"', '/'];
|
||||
|
||||
struct NfaBuilder {
|
||||
|
|
@ -394,12 +400,16 @@ impl NfaBuilder {
|
|||
category_letter = le.to_string();
|
||||
}
|
||||
ClassUnicodeKind::Named(class_name) => {
|
||||
if class_name.len() == 1 {
|
||||
category_letter = class_name.clone();
|
||||
let actual_class_name = UNICODE_CATEGORY_ALIASES
|
||||
.get(class_name.as_str())
|
||||
.or_else(|| UNICODE_PROPERTY_ALIASES.get(class_name.as_str()))
|
||||
.unwrap_or(class_name);
|
||||
if actual_class_name.len() == 1 {
|
||||
category_letter = actual_class_name.clone();
|
||||
} else {
|
||||
let code_points = UNICODE_CATEGORIES
|
||||
.get(class_name.as_str())
|
||||
.or_else(|| UNICODE_PROPERTIES.get(class_name.as_str()))
|
||||
.get(actual_class_name.as_str())
|
||||
.or_else(|| UNICODE_PROPERTIES.get(actual_class_name.as_str()))
|
||||
.ok_or_else(|| {
|
||||
anyhow!(
|
||||
"Regex error: Unsupported unicode character class {}",
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
{"Other":"C","Control":"Cc","cntrl":"Cc","Format":"Cf","Unassigned":"Cn","Private_Use":"Co","Surrogate":"Cs","Letter":"L","Cased_Letter":"LC","Lowercase_Letter":"Ll","Modifier_Letter":"Lm","Other_Letter":"Lo","Titlecase_Letter":"Lt","Uppercase_Letter":"Lu","Mark":"M","Combining_Mark":"M","Spacing_Mark":"Mc","Enclosing_Mark":"Me","Nonspacing_Mark":"Mn","Number":"N","Decimal_Number":"Nd","digit":"Nd","Letter_Number":"Nl","Other_Number":"No","Punctuation":"P","punct":"P","Connector_Punctuation":"Pc","Dash_Punctuation":"Pd","Close_Punctuation":"Pe","Final_Punctuation":"Pf","Initial_Punctuation":"Pi","Other_Punctuation":"Po","Open_Punctuation":"Ps","Symbol":"S","Currency_Symbol":"Sc","Modifier_Symbol":"Sk","Math_Symbol":"Sm","Other_Symbol":"So","Separator":"Z","Line_Separator":"Zl","Paragraph_Separator":"Zp","Space_Separator":"Zs"}
|
||||
|
|
@ -0,0 +1 @@
|
|||
{"cjkAccountingNumeric":"kAccountingNumeric","cjkOtherNumeric":"kOtherNumeric","cjkPrimaryNumeric":"kPrimaryNumeric","nv":"Numeric_Value","cf":"Case_Folding","cjkCompatibilityVariant":"kCompatibilityVariant","dm":"Decomposition_Mapping","FC_NFKC":"FC_NFKC_Closure","lc":"Lowercase_Mapping","NFKC_CF":"NFKC_Casefold","scf":"Simple_Case_Folding","sfc":"Simple_Case_Folding","slc":"Simple_Lowercase_Mapping","stc":"Simple_Titlecase_Mapping","suc":"Simple_Uppercase_Mapping","tc":"Titlecase_Mapping","uc":"Uppercase_Mapping","bmg":"Bidi_Mirroring_Glyph","bpb":"Bidi_Paired_Bracket","cjkIICore":"kIICore","cjkIRG_GSource":"kIRG_GSource","cjkIRG_HSource":"kIRG_HSource","cjkIRG_JSource":"kIRG_JSource","cjkIRG_KPSource":"kIRG_KPSource","cjkIRG_KSource":"kIRG_KSource","cjkIRG_MSource":"kIRG_MSource","cjkIRG_SSource":"kIRG_SSource","cjkIRG_TSource":"kIRG_TSource","cjkIRG_UKSource":"kIRG_UKSource","cjkIRG_USource":"kIRG_USource","cjkIRG_VSource":"kIRG_VSource","cjkRSUnicode":"kRSUnicode","Unicode_Radical_Stroke":"kRSUnicode","URS":"kRSUnicode","EqUIdeo":"Equivalent_Unified_Ideograph","isc":"ISO_Comment","JSN":"Jamo_Short_Name","na":"Name","na1":"Unicode_1_Name","Name_Alias":"Name_Alias","scx":"Script_Extensions","age":"Age","blk":"Block","sc":"Script","bc":"Bidi_Class","bpt":"Bidi_Paired_Bracket_Type","ccc":"Canonical_Combining_Class","dt":"Decomposition_Type","ea":"East_Asian_Width","gc":"General_Category","GCB":"Grapheme_Cluster_Break","hst":"Hangul_Syllable_Type","InPC":"Indic_Positional_Category","InSC":"Indic_Syllabic_Category","jg":"Joining_Group","jt":"Joining_Type","lb":"Line_Break","NFC_QC":"NFC_Quick_Check","NFD_QC":"NFD_Quick_Check","NFKC_QC":"NFKC_Quick_Check","NFKD_QC":"NFKD_Quick_Check","nt":"Numeric_Type","SB":"Sentence_Break","vo":"Vertical_Orientation","WB":"Word_Break","AHex":"ASCII_Hex_Digit","Alpha":"Alphabetic","Bidi_C":"Bidi_Control","Bidi_M":"Bidi_Mirrored","Cased":"Cased","CE":"Composition_Exclusion","CI":"Case_Ignorable","Comp_Ex":"Full_Composition_Exclusion","CWCF":"Changes_When_Casefolded","CWCM":"Changes_When_Casemapped","CWKCF":"Changes_When_NFKC_Casefolded","CWL":"Changes_When_Lowercased","CWT":"Changes_When_Titlecased","CWU":"Changes_When_Uppercased","Dash":"Dash","Dep":"Deprecated","DI":"Default_Ignorable_Code_Point","Dia":"Diacritic","EBase":"Emoji_Modifier_Base","EComp":"Emoji_Component","EMod":"Emoji_Modifier","Emoji":"Emoji","EPres":"Emoji_Presentation","Ext":"Extender","ExtPict":"Extended_Pictographic","Gr_Base":"Grapheme_Base","Gr_Ext":"Grapheme_Extend","Gr_Link":"Grapheme_Link","Hex":"Hex_Digit","Hyphen":"Hyphen","IDC":"ID_Continue","Ideo":"Ideographic","IDS":"ID_Start","IDSB":"IDS_Binary_Operator","IDST":"IDS_Trinary_Operator","Join_C":"Join_Control","LOE":"Logical_Order_Exception","Lower":"Lowercase","Math":"Math","NChar":"Noncharacter_Code_Point","OAlpha":"Other_Alphabetic","ODI":"Other_Default_Ignorable_Code_Point","OGr_Ext":"Other_Grapheme_Extend","OIDC":"Other_ID_Continue","OIDS":"Other_ID_Start","OLower":"Other_Lowercase","OMath":"Other_Math","OUpper":"Other_Uppercase","Pat_Syn":"Pattern_Syntax","Pat_WS":"Pattern_White_Space","PCM":"Prepended_Concatenation_Mark","QMark":"Quotation_Mark","Radical":"Radical","RI":"Regional_Indicator","SD":"Soft_Dotted","STerm":"Sentence_Terminal","Term":"Terminal_Punctuation","UIdeo":"Unified_Ideograph","Upper":"Uppercase","VS":"Variation_Selector","WSpace":"White_Space","space":"White_Space","XIDC":"XID_Continue","XIDS":"XID_Start","XO_NFC":"Expands_On_NFC","XO_NFD":"Expands_On_NFD","XO_NFKC":"Expands_On_NFKC","XO_NFKD":"Expands_On_NFKD"}
|
||||
|
|
@ -1057,7 +1057,7 @@ impl Generator {
|
|||
}
|
||||
|
||||
fn add_parse_table(&mut self) {
|
||||
let mut parse_table_entries = Vec::new();
|
||||
let mut parse_table_entries = HashMap::new();
|
||||
let mut next_parse_action_list_index = 0;
|
||||
|
||||
self.get_parse_action_list_id(
|
||||
|
|
@ -1224,6 +1224,11 @@ impl Generator {
|
|||
add_line!(self, "");
|
||||
}
|
||||
|
||||
let mut parse_table_entries: Vec<_> = parse_table_entries
|
||||
.into_iter()
|
||||
.map(|(entry, i)| (i, entry))
|
||||
.collect();
|
||||
parse_table_entries.sort_by_key(|(index, _)| *index);
|
||||
self.add_parse_action_list(parse_table_entries);
|
||||
}
|
||||
|
||||
|
|
@ -1404,17 +1409,17 @@ impl Generator {
|
|||
fn get_parse_action_list_id(
|
||||
&self,
|
||||
entry: &ParseTableEntry,
|
||||
parse_table_entries: &mut Vec<(usize, ParseTableEntry)>,
|
||||
parse_table_entries: &mut HashMap<ParseTableEntry, usize>,
|
||||
next_parse_action_list_index: &mut usize,
|
||||
) -> usize {
|
||||
if let Some((index, _)) = parse_table_entries.iter().find(|(_, e)| *e == *entry) {
|
||||
return *index;
|
||||
if let Some(&index) = parse_table_entries.get(entry) {
|
||||
index
|
||||
} else {
|
||||
let result = *next_parse_action_list_index;
|
||||
parse_table_entries.insert(entry.clone(), result);
|
||||
*next_parse_action_list_index += 1 + entry.actions.len();
|
||||
result
|
||||
}
|
||||
|
||||
let result = *next_parse_action_list_index;
|
||||
parse_table_entries.push((result, entry.clone()));
|
||||
*next_parse_action_list_index += 1 + entry.actions.len();
|
||||
result
|
||||
}
|
||||
|
||||
fn get_field_map_id(
|
||||
|
|
|
|||
|
|
@ -1,11 +1,16 @@
|
|||
use super::nfa::CharacterSet;
|
||||
use super::rules::{Alias, Symbol, TokenSet};
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::collections::BTreeMap;
|
||||
pub(crate) type ProductionInfoId = usize;
|
||||
pub(crate) type ParseStateId = usize;
|
||||
pub(crate) type LexStateId = usize;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
use std::hash::BuildHasherDefault;
|
||||
|
||||
use indexmap::IndexMap;
|
||||
use rustc_hash::FxHasher;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub(crate) enum ParseAction {
|
||||
Accept,
|
||||
Shift {
|
||||
|
|
@ -28,7 +33,7 @@ pub(crate) enum GotoAction {
|
|||
ShiftExtra,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub(crate) struct ParseTableEntry {
|
||||
pub actions: Vec<ParseAction>,
|
||||
pub reusable: bool,
|
||||
|
|
@ -37,8 +42,8 @@ pub(crate) struct ParseTableEntry {
|
|||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||
pub(crate) struct ParseState {
|
||||
pub id: ParseStateId,
|
||||
pub terminal_entries: HashMap<Symbol, ParseTableEntry>,
|
||||
pub nonterminal_entries: HashMap<Symbol, GotoAction>,
|
||||
pub terminal_entries: IndexMap<Symbol, ParseTableEntry, BuildHasherDefault<FxHasher>>,
|
||||
pub nonterminal_entries: IndexMap<Symbol, GotoAction, BuildHasherDefault<FxHasher>>,
|
||||
pub lex_state_id: usize,
|
||||
pub external_lex_state_id: usize,
|
||||
pub core_id: usize,
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ pub mod generate;
|
|||
pub mod highlight;
|
||||
pub mod logger;
|
||||
pub mod parse;
|
||||
pub mod playground;
|
||||
pub mod query;
|
||||
pub mod query_testing;
|
||||
pub mod tags;
|
||||
|
|
@ -9,7 +10,6 @@ pub mod test;
|
|||
pub mod test_highlight;
|
||||
pub mod util;
|
||||
pub mod wasm;
|
||||
pub mod web_ui;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
use log::{LevelFilter, Log, Metadata, Record};
|
||||
|
||||
#[allow(dead_code)]
|
||||
struct Logger {
|
||||
pub filter: Option<String>,
|
||||
}
|
||||
|
|
|
|||
145
cli/src/main.rs
145
cli/src/main.rs
|
|
@ -4,7 +4,7 @@ use glob::glob;
|
|||
use std::path::Path;
|
||||
use std::{env, fs, u64};
|
||||
use tree_sitter_cli::{
|
||||
generate, highlight, logger, parse, query, tags, test, test_highlight, util, wasm, web_ui,
|
||||
generate, highlight, logger, parse, playground, query, tags, test, test_highlight, util, wasm,
|
||||
};
|
||||
use tree_sitter_config::Config;
|
||||
use tree_sitter_loader as loader;
|
||||
|
|
@ -35,6 +35,45 @@ fn run() -> Result<()> {
|
|||
BUILD_VERSION.to_string()
|
||||
};
|
||||
|
||||
let debug_arg = Arg::with_name("debug")
|
||||
.help("Show parsing debug log")
|
||||
.long("debug")
|
||||
.short("d");
|
||||
|
||||
let debug_graph_arg = Arg::with_name("debug-graph")
|
||||
.help("Produce the log.html file with debug graphs")
|
||||
.long("debug-graph")
|
||||
.short("D");
|
||||
|
||||
let debug_build_arg = Arg::with_name("debug-build")
|
||||
.help("Compile a parser in debug mode")
|
||||
.long("debug-build")
|
||||
.short("0");
|
||||
|
||||
let paths_file_arg = Arg::with_name("paths-file")
|
||||
.help("The path to a file with paths to source file(s)")
|
||||
.long("paths")
|
||||
.takes_value(true);
|
||||
|
||||
let paths_arg = Arg::with_name("paths")
|
||||
.help("The source file(s) to use")
|
||||
.multiple(true);
|
||||
|
||||
let scope_arg = Arg::with_name("scope")
|
||||
.help("Select a language by the scope instead of a file extension")
|
||||
.long("scope")
|
||||
.takes_value(true);
|
||||
|
||||
let time_arg = Arg::with_name("time")
|
||||
.help("Measure execution time")
|
||||
.long("time")
|
||||
.short("t");
|
||||
|
||||
let quiet_arg = Arg::with_name("quiet")
|
||||
.help("Suppress main output")
|
||||
.long("quiet")
|
||||
.short("q");
|
||||
|
||||
let matches = App::new("tree-sitter")
|
||||
.author("Max Brunsfeld <maxbrunsfeld@gmail.com>")
|
||||
.about("Generates and tests parsers")
|
||||
|
|
@ -65,23 +104,30 @@ fn run() -> Result<()> {
|
|||
SubCommand::with_name("parse")
|
||||
.alias("p")
|
||||
.about("Parse files")
|
||||
.arg(Arg::with_name("paths-file").long("paths").takes_value(true))
|
||||
.arg(
|
||||
Arg::with_name("paths")
|
||||
.index(1)
|
||||
.multiple(true)
|
||||
.required(false),
|
||||
)
|
||||
.arg(Arg::with_name("scope").long("scope").takes_value(true))
|
||||
.arg(Arg::with_name("debug").long("debug").short("d"))
|
||||
.arg(Arg::with_name("debug-graph").long("debug-graph").short("D"))
|
||||
.arg(&paths_file_arg)
|
||||
.arg(&paths_arg)
|
||||
.arg(&scope_arg)
|
||||
.arg(&debug_arg)
|
||||
.arg(&debug_build_arg)
|
||||
.arg(&debug_graph_arg)
|
||||
.arg(Arg::with_name("debug-xml").long("xml").short("x"))
|
||||
.arg(Arg::with_name("quiet").long("quiet").short("q"))
|
||||
.arg(Arg::with_name("stat").long("stat").short("s"))
|
||||
.arg(Arg::with_name("time").long("time").short("t"))
|
||||
.arg(Arg::with_name("timeout").long("timeout").takes_value(true))
|
||||
.arg(
|
||||
Arg::with_name("stat")
|
||||
.help("Show parsing statistic")
|
||||
.long("stat")
|
||||
.short("s"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("timeout")
|
||||
.help("Interrupt the parsing process by timeout (µs)")
|
||||
.long("timeout")
|
||||
.takes_value(true),
|
||||
)
|
||||
.arg(&time_arg)
|
||||
.arg(&quiet_arg)
|
||||
.arg(
|
||||
Arg::with_name("edits")
|
||||
.help("Apply edits in the format: \"row,col del_count insert_text\"")
|
||||
.long("edit")
|
||||
.short("edit")
|
||||
.takes_value(true)
|
||||
|
|
@ -93,36 +139,32 @@ fn run() -> Result<()> {
|
|||
SubCommand::with_name("query")
|
||||
.alias("q")
|
||||
.about("Search files using a syntax tree query")
|
||||
.arg(Arg::with_name("query-path").index(1).required(true))
|
||||
.arg(Arg::with_name("paths-file").long("paths").takes_value(true))
|
||||
.arg(
|
||||
Arg::with_name("paths")
|
||||
.index(2)
|
||||
.multiple(true)
|
||||
.required(false),
|
||||
Arg::with_name("query-path")
|
||||
.help("Path to a file with queries")
|
||||
.index(1)
|
||||
.required(true),
|
||||
)
|
||||
.arg(&paths_file_arg)
|
||||
.arg(&paths_arg.clone().index(2))
|
||||
.arg(
|
||||
Arg::with_name("byte-range")
|
||||
.help("The range of byte offsets in which the query will be executed")
|
||||
.long("byte-range")
|
||||
.takes_value(true),
|
||||
)
|
||||
.arg(Arg::with_name("scope").long("scope").takes_value(true))
|
||||
.arg(&scope_arg)
|
||||
.arg(Arg::with_name("captures").long("captures").short("c"))
|
||||
.arg(Arg::with_name("test").long("test")),
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("tags")
|
||||
.arg(Arg::with_name("quiet").long("quiet").short("q"))
|
||||
.arg(Arg::with_name("time").long("time").short("t"))
|
||||
.arg(Arg::with_name("scope").long("scope").takes_value(true))
|
||||
.arg(Arg::with_name("paths-file").long("paths").takes_value(true))
|
||||
.arg(
|
||||
Arg::with_name("paths")
|
||||
.help("The source file to use")
|
||||
.index(1)
|
||||
.multiple(true),
|
||||
),
|
||||
.about("Generate a list of tags")
|
||||
.arg(&scope_arg)
|
||||
.arg(&time_arg)
|
||||
.arg(&quiet_arg)
|
||||
.arg(&paths_file_arg)
|
||||
.arg(&paths_arg),
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("test")
|
||||
|
|
@ -141,23 +183,24 @@ fn run() -> Result<()> {
|
|||
.short("u")
|
||||
.help("Update all syntax trees in corpus files with current parser output"),
|
||||
)
|
||||
.arg(Arg::with_name("debug").long("debug").short("d"))
|
||||
.arg(Arg::with_name("debug-graph").long("debug-graph").short("D")),
|
||||
.arg(&debug_arg)
|
||||
.arg(&debug_build_arg)
|
||||
.arg(&debug_graph_arg),
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("highlight")
|
||||
.about("Highlight a file")
|
||||
.arg(Arg::with_name("paths-file").long("paths").takes_value(true))
|
||||
.arg(
|
||||
Arg::with_name("paths")
|
||||
.index(1)
|
||||
.multiple(true)
|
||||
.required(false),
|
||||
Arg::with_name("html")
|
||||
.help("Generate highlighting as an HTML document")
|
||||
.long("html")
|
||||
.short("H"),
|
||||
)
|
||||
.arg(Arg::with_name("scope").long("scope").takes_value(true))
|
||||
.arg(Arg::with_name("html").long("html").short("H"))
|
||||
.arg(Arg::with_name("time").long("time").short("t"))
|
||||
.arg(Arg::with_name("quiet").long("quiet").short("q")),
|
||||
.arg(&scope_arg)
|
||||
.arg(&time_arg)
|
||||
.arg(&quiet_arg)
|
||||
.arg(&paths_file_arg)
|
||||
.arg(&paths_arg),
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("build-wasm")
|
||||
|
|
@ -180,7 +223,7 @@ fn run() -> Result<()> {
|
|||
Arg::with_name("quiet")
|
||||
.long("quiet")
|
||||
.short("q")
|
||||
.help("open in default browser"),
|
||||
.help("Don't open in default browser"),
|
||||
),
|
||||
)
|
||||
.subcommand(
|
||||
|
|
@ -237,8 +280,12 @@ fn run() -> Result<()> {
|
|||
("test", Some(matches)) => {
|
||||
let debug = matches.is_present("debug");
|
||||
let debug_graph = matches.is_present("debug-graph");
|
||||
let debug_build = matches.is_present("debug-build");
|
||||
let update = matches.is_present("update");
|
||||
let filter = matches.value_of("filter");
|
||||
|
||||
loader.use_debug_build(debug_build);
|
||||
|
||||
let languages = loader.languages_at_path(¤t_dir)?;
|
||||
let language = languages
|
||||
.first()
|
||||
|
|
@ -274,6 +321,7 @@ fn run() -> Result<()> {
|
|||
("parse", Some(matches)) => {
|
||||
let debug = matches.is_present("debug");
|
||||
let debug_graph = matches.is_present("debug-graph");
|
||||
let debug_build = matches.is_present("debug-build");
|
||||
let debug_xml = matches.is_present("debug-xml");
|
||||
let quiet = matches.is_present("quiet");
|
||||
let time = matches.is_present("time");
|
||||
|
|
@ -287,6 +335,8 @@ fn run() -> Result<()> {
|
|||
env::set_var("TREE_SITTER_DEBUG", "1");
|
||||
}
|
||||
|
||||
loader.use_debug_build(debug_build);
|
||||
|
||||
let timeout = matches
|
||||
.value_of("timeout")
|
||||
.map_or(0, |t| u64::from_str_radix(t, 10).unwrap());
|
||||
|
|
@ -418,11 +468,10 @@ fn run() -> Result<()> {
|
|||
|
||||
if let Some(highlight_config) = language_config.highlight_config(language)? {
|
||||
let source = fs::read(path)?;
|
||||
let theme_config = config.get()?;
|
||||
if html_mode {
|
||||
highlight::html(
|
||||
&loader,
|
||||
&theme_config,
|
||||
&theme_config.theme,
|
||||
&source,
|
||||
highlight_config,
|
||||
quiet,
|
||||
|
|
@ -431,7 +480,7 @@ fn run() -> Result<()> {
|
|||
} else {
|
||||
highlight::ansi(
|
||||
&loader,
|
||||
&theme_config,
|
||||
&theme_config.theme,
|
||||
&source,
|
||||
highlight_config,
|
||||
time,
|
||||
|
|
@ -455,7 +504,7 @@ fn run() -> Result<()> {
|
|||
|
||||
("playground", Some(matches)) => {
|
||||
let open_in_browser = !matches.is_present("quiet");
|
||||
web_ui::serve(¤t_dir, open_in_browser);
|
||||
playground::serve(¤t_dir, open_in_browser);
|
||||
}
|
||||
|
||||
("dump-languages", Some(_)) => {
|
||||
|
|
|
|||
|
|
@ -9,28 +9,6 @@ use tiny_http::{Header, Response, Server};
|
|||
use webbrowser;
|
||||
|
||||
macro_rules! resource {
|
||||
($name: tt, $path: tt) => {
|
||||
#[cfg(TREE_SITTER_EMBED_WASM_BINDING)]
|
||||
fn $name(tree_sitter_dir: &Option<PathBuf>) -> Vec<u8> {
|
||||
if let Some(tree_sitter_dir) = tree_sitter_dir {
|
||||
fs::read(tree_sitter_dir.join($path)).unwrap()
|
||||
} else {
|
||||
include_bytes!(concat!("../../", $path)).to_vec()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(TREE_SITTER_EMBED_WASM_BINDING))]
|
||||
fn $name(tree_sitter_dir: &Option<PathBuf>) -> Vec<u8> {
|
||||
if let Some(tree_sitter_dir) = tree_sitter_dir {
|
||||
fs::read(tree_sitter_dir.join($path)).unwrap()
|
||||
} else {
|
||||
include_bytes!(concat!("../../", $path)).to_vec()
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! optional_resource {
|
||||
($name: tt, $path: tt) => {
|
||||
#[cfg(TREE_SITTER_EMBED_WASM_BINDING)]
|
||||
fn $name(tree_sitter_dir: &Option<PathBuf>) -> Vec<u8> {
|
||||
|
|
@ -52,15 +30,15 @@ macro_rules! optional_resource {
|
|||
};
|
||||
}
|
||||
|
||||
resource!(get_main_html, "cli/src/web_ui.html");
|
||||
resource!(get_main_html, "cli/src/playground.html");
|
||||
resource!(get_playground_js, "docs/assets/js/playground.js");
|
||||
optional_resource!(get_lib_js, "lib/binding_web/tree-sitter.js");
|
||||
optional_resource!(get_lib_wasm, "lib/binding_web/tree-sitter.wasm");
|
||||
resource!(get_lib_js, "lib/binding_web/tree-sitter.js");
|
||||
resource!(get_lib_wasm, "lib/binding_web/tree-sitter.wasm");
|
||||
|
||||
pub fn serve(grammar_path: &Path, open_in_browser: bool) {
|
||||
let port = get_available_port().expect("Couldn't find an available port");
|
||||
let url = format!("127.0.0.1:{}", port);
|
||||
let server = Server::http(&url).expect("Failed to start web server");
|
||||
let addr = format!("127.0.0.1:{}", port);
|
||||
let server = Server::http(&addr).expect("Failed to start web server");
|
||||
let grammar_name = wasm::get_grammar_name(&grammar_path.join("src"))
|
||||
.with_context(|| "Failed to get wasm filename")
|
||||
.unwrap();
|
||||
|
|
@ -73,8 +51,10 @@ pub fn serve(grammar_path: &Path, open_in_browser: bool) {
|
|||
)
|
||||
})
|
||||
.unwrap();
|
||||
let url = format!("http://{}", addr);
|
||||
println!("Started playground on: {}", url);
|
||||
if open_in_browser {
|
||||
if let Err(_) = webbrowser::open(&format!("http://127.0.0.1:{}", port)) {
|
||||
if let Err(_) = webbrowser::open(&url) {
|
||||
eprintln!("Failed to open '{}' in a web browser", url);
|
||||
}
|
||||
}
|
||||
|
|
@ -95,17 +75,23 @@ pub fn serve(grammar_path: &Path, open_in_browser: bool) {
|
|||
for request in server.incoming_requests() {
|
||||
let res = match request.url() {
|
||||
"/" => response(&main_html, &html_header),
|
||||
"/playground.js" => response(&playground_js, &js_header),
|
||||
"/tree-sitter-parser.wasm" => response(&language_wasm, &wasm_header),
|
||||
"/playground.js" => {
|
||||
if playground_js.is_empty() {
|
||||
redirect("https://tree-sitter.github.io/tree-sitter/assets/js/playground.js")
|
||||
} else {
|
||||
response(&playground_js, &js_header)
|
||||
}
|
||||
}
|
||||
"/tree-sitter.js" => {
|
||||
if cfg!(windows) {
|
||||
if lib_js.is_empty() {
|
||||
redirect("https://tree-sitter.github.io/tree-sitter.js")
|
||||
} else {
|
||||
response(&lib_js, &js_header)
|
||||
}
|
||||
}
|
||||
"/tree-sitter.wasm" => {
|
||||
if cfg!(windows) {
|
||||
if lib_wasm.is_empty() {
|
||||
redirect("https://tree-sitter.github.io/tree-sitter.wasm")
|
||||
} else {
|
||||
response(&lib_wasm, &wasm_header)
|
||||
|
|
@ -48,10 +48,12 @@ pub fn query_files_at_paths(
|
|||
let capture_name = &query.capture_names()[capture.index as usize];
|
||||
writeln!(
|
||||
&mut stdout,
|
||||
" pattern: {}, capture: {}, row: {}, text: {:?}",
|
||||
" pattern: {:>2}, capture: {} - {}, start: {}, end: {}, text: `{}`",
|
||||
mat.pattern_index,
|
||||
capture.index,
|
||||
capture_name,
|
||||
capture.node.start_position().row,
|
||||
capture.node.start_position(),
|
||||
capture.node.end_position(),
|
||||
capture.node.utf8_text(&source_code).unwrap_or("")
|
||||
)?;
|
||||
results.push(query_testing::CaptureInfo {
|
||||
|
|
@ -70,9 +72,11 @@ pub fn query_files_at_paths(
|
|||
if end.row == start.row {
|
||||
writeln!(
|
||||
&mut stdout,
|
||||
" capture: {}, start: {}, text: {:?}",
|
||||
" capture: {} - {}, start: {}, end: {}, text: `{}`",
|
||||
capture.index,
|
||||
capture_name,
|
||||
start,
|
||||
end,
|
||||
capture.node.utf8_text(&source_code).unwrap_or("")
|
||||
)?;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -48,40 +48,38 @@ pub fn parse_position_comments(
|
|||
if node.kind().contains("comment") {
|
||||
if let Ok(text) = node.utf8_text(source) {
|
||||
let mut position = node.start_position();
|
||||
if position.row == 0 {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Find the arrow character ("^" or '<-") in the comment. A left arrow
|
||||
// refers to the column where the comment node starts. An up arrow refers
|
||||
// to its own column.
|
||||
let mut has_left_caret = false;
|
||||
let mut has_arrow = false;
|
||||
let mut arrow_end = 0;
|
||||
for (i, c) in text.char_indices() {
|
||||
arrow_end = i + 1;
|
||||
if c == '-' && has_left_caret {
|
||||
has_arrow = true;
|
||||
break;
|
||||
if position.row > 0 {
|
||||
// Find the arrow character ("^" or '<-") in the comment. A left arrow
|
||||
// refers to the column where the comment node starts. An up arrow refers
|
||||
// to its own column.
|
||||
let mut has_left_caret = false;
|
||||
let mut has_arrow = false;
|
||||
let mut arrow_end = 0;
|
||||
for (i, c) in text.char_indices() {
|
||||
arrow_end = i + 1;
|
||||
if c == '-' && has_left_caret {
|
||||
has_arrow = true;
|
||||
break;
|
||||
}
|
||||
if c == '^' {
|
||||
has_arrow = true;
|
||||
position.column += i;
|
||||
break;
|
||||
}
|
||||
has_left_caret = c == '<';
|
||||
}
|
||||
if c == '^' {
|
||||
has_arrow = true;
|
||||
position.column += i;
|
||||
break;
|
||||
}
|
||||
has_left_caret = c == '<';
|
||||
}
|
||||
|
||||
// If the comment node contains an arrow and a highlight name, record the
|
||||
// highlight name and the position.
|
||||
if let (true, Some(mat)) =
|
||||
(has_arrow, CAPTURE_NAME_REGEX.find(&text[arrow_end..]))
|
||||
{
|
||||
assertion_ranges.push((node.start_position(), node.end_position()));
|
||||
result.push(Assertion {
|
||||
position: position,
|
||||
expected_capture_name: mat.as_str().to_string(),
|
||||
});
|
||||
// If the comment node contains an arrow and a highlight name, record the
|
||||
// highlight name and the position.
|
||||
if let (true, Some(mat)) =
|
||||
(has_arrow, CAPTURE_NAME_REGEX.find(&text[arrow_end..]))
|
||||
{
|
||||
assertion_ranges.push((node.start_position(), node.end_position()));
|
||||
result.push(Assertion {
|
||||
position: position,
|
||||
expected_capture_name: mat.as_str().to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
184
cli/src/test.rs
184
cli/src/test.rs
|
|
@ -5,7 +5,6 @@ use difference::{Changeset, Difference};
|
|||
use lazy_static::lazy_static;
|
||||
use regex::bytes::{Regex as ByteRegex, RegexBuilder as ByteRegexBuilder};
|
||||
use regex::Regex;
|
||||
use std::char;
|
||||
use std::ffi::OsStr;
|
||||
use std::fmt::Write as FmtWrite;
|
||||
use std::fs;
|
||||
|
|
@ -16,11 +15,12 @@ use tree_sitter::{Language, LogType, Parser, Query};
|
|||
use walkdir::WalkDir;
|
||||
|
||||
lazy_static! {
|
||||
static ref HEADER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^===+\r?\n([^=]*)\r?\n===+\r?\n")
|
||||
.multi_line(true)
|
||||
.build()
|
||||
.unwrap();
|
||||
static ref DIVIDER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^---+\r?\n")
|
||||
static ref HEADER_REGEX: ByteRegex =
|
||||
ByteRegexBuilder::new(r"^===+(?P<suffix1>[^=\r\n][^\r\n]*)?\r?\n(?P<test_name>[^=\r\n][^\r\n]*)\r?\n===+(?P<suffix2>[^=\r\n][^\r\n]*)?\r?\n")
|
||||
.multi_line(true)
|
||||
.build()
|
||||
.unwrap();
|
||||
static ref DIVIDER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^---+(?P<suffix>[^-\r\n][^\r\n]*)?\r?\n")
|
||||
.multi_line(true)
|
||||
.build()
|
||||
.unwrap();
|
||||
|
|
@ -114,7 +114,9 @@ pub fn run_tests_at_path(
|
|||
print_diff_key();
|
||||
for (i, (name, actual, expected)) in failures.iter().enumerate() {
|
||||
println!("\n {}. {}:", i + 1, name);
|
||||
print_diff(actual, expected);
|
||||
let actual = format_sexp_indented(&actual, 2);
|
||||
let expected = format_sexp_indented(&expected, 2);
|
||||
print_diff(&actual, &expected);
|
||||
}
|
||||
Err(anyhow!(""))
|
||||
}
|
||||
|
|
@ -153,8 +155,7 @@ pub fn print_diff_key() {
|
|||
}
|
||||
|
||||
pub fn print_diff(actual: &String, expected: &String) {
|
||||
let changeset = Changeset::new(actual, expected, " ");
|
||||
print!(" ");
|
||||
let changeset = Changeset::new(actual, expected, "\n");
|
||||
for diff in &changeset.diffs {
|
||||
match diff {
|
||||
Difference::Same(part) => {
|
||||
|
|
@ -263,9 +264,13 @@ fn run_tests(
|
|||
}
|
||||
|
||||
fn format_sexp(sexp: &String) -> String {
|
||||
format_sexp_indented(sexp, 0)
|
||||
}
|
||||
|
||||
fn format_sexp_indented(sexp: &String, initial_indent_level: u32) -> String {
|
||||
let mut formatted = String::new();
|
||||
|
||||
let mut indent_level = 0;
|
||||
let mut indent_level = initial_indent_level;
|
||||
let mut has_field = false;
|
||||
let mut s_iter = sexp.split(|c| c == ' ' || c == ')');
|
||||
while let Some(s) = s_iter.next() {
|
||||
|
|
@ -375,22 +380,58 @@ fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>)
|
|||
let mut prev_name = String::new();
|
||||
let mut prev_header_end = 0;
|
||||
|
||||
// Identify all of the test descriptions using the `======` headers.
|
||||
for (header_start, header_end) in HEADER_REGEX
|
||||
.find_iter(&bytes)
|
||||
.map(|m| (m.start(), m.end()))
|
||||
.chain(Some((bytes.len(), bytes.len())))
|
||||
{
|
||||
// Find the longest line of dashes following each test description.
|
||||
// That is the divider between input and expected output.
|
||||
// Find the first test header in the file, and determine if it has a
|
||||
// custom suffix. If so, then this suffix will be used to identify
|
||||
// all subsequent headers and divider lines in the file.
|
||||
let first_suffix = HEADER_REGEX
|
||||
.captures(bytes)
|
||||
.and_then(|c| c.name("suffix1"))
|
||||
.map(|m| String::from_utf8_lossy(m.as_bytes()));
|
||||
|
||||
// Find all of the `===` test headers, which contain the test names.
|
||||
// Ignore any matches whose suffix does not match the first header
|
||||
// suffix in the file.
|
||||
let header_matches = HEADER_REGEX.captures_iter(&bytes).filter_map(|c| {
|
||||
let suffix1 = c
|
||||
.name("suffix1")
|
||||
.map(|m| String::from_utf8_lossy(m.as_bytes()));
|
||||
let suffix2 = c
|
||||
.name("suffix2")
|
||||
.map(|m| String::from_utf8_lossy(m.as_bytes()));
|
||||
if suffix1 == first_suffix && suffix2 == first_suffix {
|
||||
let header_range = c.get(0).unwrap().range();
|
||||
let test_name = c
|
||||
.name("test_name")
|
||||
.map(|c| String::from_utf8_lossy(c.as_bytes()).to_string());
|
||||
Some((header_range, test_name))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
|
||||
for (header_range, test_name) in header_matches.chain(Some((bytes.len()..bytes.len(), None))) {
|
||||
// Find the longest line of dashes following each test description. That line
|
||||
// separates the input from the expected output. Ignore any matches whose suffix
|
||||
// does not match the first suffix in the file.
|
||||
if prev_header_end > 0 {
|
||||
let divider_match = DIVIDER_REGEX
|
||||
.find_iter(&bytes[prev_header_end..header_start])
|
||||
.map(|m| (prev_header_end + m.start(), prev_header_end + m.end()))
|
||||
.max_by_key(|(start, end)| end - start);
|
||||
if let Some((divider_start, divider_end)) = divider_match {
|
||||
if let Ok(output) = str::from_utf8(&bytes[divider_end..header_start]) {
|
||||
let mut input = bytes[prev_header_end..divider_start].to_vec();
|
||||
let divider_range = DIVIDER_REGEX
|
||||
.captures_iter(&bytes[prev_header_end..header_range.start])
|
||||
.filter_map(|m| {
|
||||
let suffix = m
|
||||
.name("suffix")
|
||||
.map(|m| String::from_utf8_lossy(m.as_bytes()));
|
||||
if suffix == first_suffix {
|
||||
let range = m.get(0).unwrap().range();
|
||||
Some((prev_header_end + range.start)..(prev_header_end + range.end))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.max_by_key(|range| range.len());
|
||||
|
||||
if let Some(divider_range) = divider_range {
|
||||
if let Ok(output) = str::from_utf8(&bytes[divider_range.end..header_range.start]) {
|
||||
let mut input = bytes[prev_header_end..divider_range.start].to_vec();
|
||||
|
||||
// Remove trailing newline from the input.
|
||||
input.pop();
|
||||
|
|
@ -400,6 +441,7 @@ fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>)
|
|||
|
||||
// Remove all comments
|
||||
let output = COMMENT_REGEX.replace_all(output, "").to_string();
|
||||
|
||||
// Normalize the whitespace in the expected output.
|
||||
let output = WHITESPACE_REGEX.replace_all(output.trim(), " ");
|
||||
let output = output.replace(" )", ")");
|
||||
|
|
@ -417,10 +459,8 @@ fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>)
|
|||
}
|
||||
}
|
||||
}
|
||||
prev_name = String::from_utf8_lossy(&bytes[header_start..header_end])
|
||||
.trim_matches(|c| char::is_whitespace(c) || c == '=')
|
||||
.to_string();
|
||||
prev_header_end = header_end;
|
||||
prev_name = test_name.unwrap_or(String::new());
|
||||
prev_header_end = header_range.end;
|
||||
}
|
||||
TestEntry::Group {
|
||||
name,
|
||||
|
|
@ -434,7 +474,7 @@ mod tests {
|
|||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_test_content() {
|
||||
fn test_parse_test_content_simple() {
|
||||
let entry = parse_test_content(
|
||||
"the-filename".to_string(),
|
||||
r#"
|
||||
|
|
@ -664,4 +704,88 @@ code
|
|||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_test_content_with_suffixes() {
|
||||
let entry = parse_test_content(
|
||||
"the-filename".to_string(),
|
||||
r#"
|
||||
==================asdf\()[]|{}*+?^$.-
|
||||
First test
|
||||
==================asdf\()[]|{}*+?^$.-
|
||||
|
||||
=========================
|
||||
NOT A TEST HEADER
|
||||
=========================
|
||||
-------------------------
|
||||
|
||||
---asdf\()[]|{}*+?^$.-
|
||||
|
||||
(a)
|
||||
|
||||
==================asdf\()[]|{}*+?^$.-
|
||||
Second test
|
||||
==================asdf\()[]|{}*+?^$.-
|
||||
|
||||
=========================
|
||||
NOT A TEST HEADER
|
||||
=========================
|
||||
-------------------------
|
||||
|
||||
---asdf\()[]|{}*+?^$.-
|
||||
|
||||
(a)
|
||||
|
||||
=========================asdf\()[]|{}*+?^$.-
|
||||
Test name with = symbol
|
||||
=========================asdf\()[]|{}*+?^$.-
|
||||
|
||||
=========================
|
||||
NOT A TEST HEADER
|
||||
=========================
|
||||
-------------------------
|
||||
|
||||
---asdf\()[]|{}*+?^$.-
|
||||
|
||||
(a)
|
||||
"#
|
||||
.trim()
|
||||
.to_string(),
|
||||
None,
|
||||
);
|
||||
|
||||
let expected_input = "\n=========================\n\
|
||||
NOT A TEST HEADER\n\
|
||||
=========================\n\
|
||||
-------------------------\n"
|
||||
.as_bytes()
|
||||
.to_vec();
|
||||
assert_eq!(
|
||||
entry,
|
||||
TestEntry::Group {
|
||||
name: "the-filename".to_string(),
|
||||
children: vec![
|
||||
TestEntry::Example {
|
||||
name: "First test".to_string(),
|
||||
input: expected_input.clone(),
|
||||
output: "(a)".to_string(),
|
||||
has_fields: false,
|
||||
},
|
||||
TestEntry::Example {
|
||||
name: "Second test".to_string(),
|
||||
input: expected_input.clone(),
|
||||
output: "(a)".to_string(),
|
||||
has_fields: false,
|
||||
},
|
||||
TestEntry::Example {
|
||||
name: "Test name with = symbol".to_string(),
|
||||
input: expected_input.clone(),
|
||||
output: "(a)".to_string(),
|
||||
has_fields: false,
|
||||
}
|
||||
],
|
||||
file_path: None,
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -63,9 +63,14 @@ fn test_parsing_with_logging() {
|
|||
)));
|
||||
assert!(messages.contains(&(LogType::Lex, "skip character:' '".to_string())));
|
||||
|
||||
let mut row_starts_from_0 = false;
|
||||
for (_, m) in &messages {
|
||||
assert!(!m.contains("row:0"));
|
||||
if m.contains("row:0") {
|
||||
row_starts_from_0 = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert!(row_starts_from_0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -849,7 +854,10 @@ fn test_parsing_with_multiple_included_ranges() {
|
|||
hello_text_node.start_byte(),
|
||||
source_code.find("Hello").unwrap()
|
||||
);
|
||||
assert_eq!(hello_text_node.end_byte(), source_code.find("<b>").unwrap());
|
||||
assert_eq!(
|
||||
hello_text_node.end_byte(),
|
||||
source_code.find(" <b>").unwrap()
|
||||
);
|
||||
|
||||
assert_eq!(b_start_tag_node.kind(), "start_tag");
|
||||
assert_eq!(
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ fn test_highlight_test_with_basic_test() {
|
|||
],
|
||||
);
|
||||
let source = [
|
||||
"// hi",
|
||||
"var abc = function(d) {",
|
||||
" // ^ function",
|
||||
" // ^ keyword",
|
||||
|
|
@ -32,15 +33,15 @@ fn test_highlight_test_with_basic_test() {
|
|||
assertions,
|
||||
&[
|
||||
Assertion {
|
||||
position: Point::new(0, 5),
|
||||
position: Point::new(1, 5),
|
||||
expected_capture_name: "function".to_string()
|
||||
},
|
||||
Assertion {
|
||||
position: Point::new(0, 11),
|
||||
position: Point::new(1, 11),
|
||||
expected_capture_name: "keyword".to_string()
|
||||
},
|
||||
Assertion {
|
||||
position: Point::new(3, 9),
|
||||
position: Point::new(4, 9),
|
||||
expected_capture_name: "variable.parameter".to_string()
|
||||
},
|
||||
]
|
||||
|
|
@ -53,12 +54,12 @@ fn test_highlight_test_with_basic_test() {
|
|||
assert_eq!(
|
||||
highlight_positions,
|
||||
&[
|
||||
(Point::new(0, 0), Point::new(0, 3), Highlight(2)), // "var"
|
||||
(Point::new(0, 4), Point::new(0, 7), Highlight(0)), // "abc"
|
||||
(Point::new(0, 10), Point::new(0, 18), Highlight(2)), // "function"
|
||||
(Point::new(0, 19), Point::new(0, 20), Highlight(1)), // "d"
|
||||
(Point::new(3, 2), Point::new(3, 8), Highlight(2)), // "return"
|
||||
(Point::new(3, 9), Point::new(3, 10), Highlight(1)), // "d"
|
||||
(Point::new(1, 0), Point::new(1, 3), Highlight(2)), // "var"
|
||||
(Point::new(1, 4), Point::new(1, 7), Highlight(0)), // "abc"
|
||||
(Point::new(1, 10), Point::new(1, 18), Highlight(2)), // "function"
|
||||
(Point::new(1, 19), Point::new(1, 20), Highlight(1)), // "d"
|
||||
(Point::new(4, 2), Point::new(4, 8), Highlight(2)), // "return"
|
||||
(Point::new(4, 9), Point::new(4, 10), Highlight(1)), // "d"
|
||||
]
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,8 +6,8 @@ GEM
|
|||
minitest (~> 5.1)
|
||||
thread_safe (~> 0.3, >= 0.3.4)
|
||||
tzinfo (~> 1.1)
|
||||
addressable (2.5.2)
|
||||
public_suffix (>= 2.0.2, < 4.0)
|
||||
addressable (2.8.0)
|
||||
public_suffix (>= 2.0.2, < 5.0)
|
||||
coffee-script (2.4.1)
|
||||
coffee-script-source
|
||||
execjs
|
||||
|
|
@ -16,12 +16,27 @@ GEM
|
|||
commonmarker (0.17.8)
|
||||
ruby-enum (~> 0.5)
|
||||
concurrent-ruby (1.0.5)
|
||||
ethon (0.11.0)
|
||||
ffi (>= 1.3.0)
|
||||
ethon (0.14.0)
|
||||
ffi (>= 1.15.0)
|
||||
execjs (2.7.0)
|
||||
faraday (0.14.0)
|
||||
faraday (1.5.1)
|
||||
faraday-em_http (~> 1.0)
|
||||
faraday-em_synchrony (~> 1.0)
|
||||
faraday-excon (~> 1.1)
|
||||
faraday-httpclient (~> 1.0.1)
|
||||
faraday-net_http (~> 1.0)
|
||||
faraday-net_http_persistent (~> 1.1)
|
||||
faraday-patron (~> 1.0)
|
||||
multipart-post (>= 1.2, < 3)
|
||||
ffi (1.9.23)
|
||||
ruby2_keywords (>= 0.0.4)
|
||||
faraday-em_http (1.0.0)
|
||||
faraday-em_synchrony (1.0.0)
|
||||
faraday-excon (1.1.0)
|
||||
faraday-httpclient (1.0.1)
|
||||
faraday-net_http (1.0.1)
|
||||
faraday-net_http_persistent (1.2.0)
|
||||
faraday-patron (1.0.0)
|
||||
ffi (1.15.3)
|
||||
forwardable-extended (2.6.0)
|
||||
gemoji (3.0.0)
|
||||
github-pages (177)
|
||||
|
|
@ -195,33 +210,35 @@ GEM
|
|||
minima (2.1.1)
|
||||
jekyll (~> 3.3)
|
||||
minitest (5.11.3)
|
||||
multipart-post (2.0.0)
|
||||
net-dns (0.8.0)
|
||||
multipart-post (2.1.1)
|
||||
net-dns (0.9.0)
|
||||
nokogiri (1.11.4)
|
||||
mini_portile2 (~> 2.5.0)
|
||||
racc (~> 1.4)
|
||||
octokit (4.8.0)
|
||||
octokit (4.21.0)
|
||||
faraday (>= 0.9)
|
||||
sawyer (~> 0.8.0, >= 0.5.3)
|
||||
pathutil (0.16.1)
|
||||
pathutil (0.16.2)
|
||||
forwardable-extended (~> 2.6)
|
||||
public_suffix (2.0.5)
|
||||
racc (1.5.2)
|
||||
rb-fsevent (0.10.2)
|
||||
rb-inotify (0.9.10)
|
||||
ffi (>= 0.5.0, < 2)
|
||||
rb-fsevent (0.11.0)
|
||||
rb-inotify (0.10.1)
|
||||
ffi (~> 1.0)
|
||||
rouge (2.2.1)
|
||||
ruby-enum (0.7.2)
|
||||
i18n
|
||||
ruby2_keywords (0.0.4)
|
||||
rubyzip (2.0.0)
|
||||
safe_yaml (1.0.4)
|
||||
sass (3.5.5)
|
||||
safe_yaml (1.0.5)
|
||||
sass (3.7.4)
|
||||
sass-listen (~> 4.0.0)
|
||||
sass-listen (4.0.0)
|
||||
rb-fsevent (~> 0.9, >= 0.9.4)
|
||||
rb-inotify (~> 0.9, >= 0.9.7)
|
||||
sawyer (0.8.1)
|
||||
addressable (>= 2.3.5, < 2.6)
|
||||
faraday (~> 0.8, < 1.0)
|
||||
sawyer (0.8.2)
|
||||
addressable (>= 2.3.5)
|
||||
faraday (> 0.8, < 2.0)
|
||||
terminal-table (1.8.0)
|
||||
unicode-display_width (~> 1.1, >= 1.1.1)
|
||||
thread_safe (0.3.6)
|
||||
|
|
|
|||
|
|
@ -15,12 +15,13 @@ Tree-sitter is a parser generator tool and an incremental parsing library. It ca
|
|||
|
||||
There are currently bindings that allow Tree-sitter to be used from the following languages:
|
||||
|
||||
* [Rust](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust)
|
||||
* [JavaScript (Wasm)](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web)
|
||||
* [Haskell](https://github.com/tree-sitter/haskell-tree-sitter)
|
||||
* [JavaScript (Node.js)](https://github.com/tree-sitter/node-tree-sitter)
|
||||
* [JavaScript (Wasm)](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web)
|
||||
* [OCaml](https://github.com/returntocorp/ocaml-tree-sitter-core)
|
||||
* [Python](https://github.com/tree-sitter/py-tree-sitter)
|
||||
* [Ruby](https://github.com/tree-sitter/ruby-tree-sitter)
|
||||
* [Haskell](https://github.com/tree-sitter/haskell-tree-sitter)
|
||||
* [Rust](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust)
|
||||
|
||||
### Available Parsers
|
||||
|
||||
|
|
@ -31,11 +32,13 @@ Parsers for these languages are fairly complete:
|
|||
* [C#](https://github.com/tree-sitter/tree-sitter-c-sharp)
|
||||
* [C++](https://github.com/tree-sitter/tree-sitter-cpp)
|
||||
* [CSS](https://github.com/tree-sitter/tree-sitter-css)
|
||||
* [DOT](https://github.com/rydesun/tree-sitter-dot)
|
||||
* [Elm](https://github.com/elm-tooling/tree-sitter-elm)
|
||||
* [Eno](https://github.com/eno-lang/tree-sitter-eno)
|
||||
* [ERB / EJS](https://github.com/tree-sitter/tree-sitter-embedded-template)
|
||||
* [Fennel](https://github.com/travonted/tree-sitter-fennel)
|
||||
* [Go](https://github.com/tree-sitter/tree-sitter-go)
|
||||
* [HCL](https://github.com/MichaHoffmann/tree-sitter-hcl)
|
||||
* [HTML](https://github.com/tree-sitter/tree-sitter-html)
|
||||
* [Java](https://github.com/tree-sitter/tree-sitter-java)
|
||||
* [JavaScript](https://github.com/tree-sitter/tree-sitter-javascript)
|
||||
|
|
@ -60,6 +63,7 @@ Parsers for these languages are fairly complete:
|
|||
* [Vue](https://github.com/ikatyang/tree-sitter-vue)
|
||||
* [YAML](https://github.com/ikatyang/tree-sitter-yaml)
|
||||
* [WASM](https://github.com/wasm-lsp/tree-sitter-wasm)
|
||||
* [WGSL WebGPU Shading Language](https://github.com/mehmetoguzderin/tree-sitter-wgsl)
|
||||
|
||||
Parsers for these languages are in development:
|
||||
|
||||
|
|
@ -67,10 +71,12 @@ Parsers for these languages are in development:
|
|||
* [Erlang](https://github.com/AbstractMachinesLab/tree-sitter-erlang/)
|
||||
* [Dockerfile](https://github.com/camdencheek/tree-sitter-dockerfile)
|
||||
* [Go mod](https://github.com/camdencheek/tree-sitter-go-mod)
|
||||
* [Hack](https://github.com/slackhq/tree-sitter-hack)
|
||||
* [Haskell](https://github.com/tree-sitter/tree-sitter-haskell)
|
||||
* [Julia](https://github.com/tree-sitter/tree-sitter-julia)
|
||||
* [Kotlin](https://github.com/fwcd/tree-sitter-kotlin)
|
||||
* [Nix](https://github.com/cstrahan/tree-sitter-nix)
|
||||
* [Objective-C](https://github.com/jiyee/tree-sitter-objc)
|
||||
* [Perl](https://github.com/ganezdragon/tree-sitter-perl)
|
||||
* [Scala](https://github.com/tree-sitter/tree-sitter-scala)
|
||||
* [Sourcepawn](https://github.com/nilshelmig/tree-sitter-sourcepawn)
|
||||
|
|
@ -89,8 +95,8 @@ Parsers for these languages are in development:
|
|||
The design of Tree-sitter was greatly influenced by the following research papers:
|
||||
|
||||
- [Practical Algorithms for Incremental Software Development Environments](https://www2.eecs.berkeley.edu/Pubs/TechRpts/1997/CSD-97-946.pdf)
|
||||
- [Context Aware Scanning for Parsing Extensible Languages](http://www.umsec.umn.edu/publications/Context-Aware-Scanning-Parsing-Extensible)
|
||||
- [Efficient and Flexible Incremental Parsing](http://ftp.cs.berkeley.edu/sggs/toplas-parsing.ps)
|
||||
- [Incremental Analysis of Real Programming Languages](https://pdfs.semanticscholar.org/ca69/018c29cc415820ed207d7e1d391e2da1656f.pdf)
|
||||
- [Context Aware Scanning for Parsing Extensible Languages](https://www-users.cse.umn.edu/~evw/pubs/vanwyk07gpce/vanwyk07gpce.pdf)
|
||||
- [Efficient and Flexible Incremental Parsing](http://harmonia.cs.berkeley.edu/papers/twagner-parsing.pdf)
|
||||
- [Incremental Analysis of Real Programming Languages](http://harmonia.cs.berkeley.edu/papers/twagner-glr.pdf)
|
||||
- [Error Detection and Recovery in LR Parsers](http://what-when-how.com/compiler-writing/bottom-up-parsing-compiler-writing-part-13)
|
||||
- [Error Recovery for LR Parsers](http://www.dtic.mil/dtic/tr/fulltext/u2/a043470.pdf)
|
||||
- [Error Recovery for LR Parsers](https://apps.dtic.mil/sti/pdfs/ADA043470.pdf)
|
||||
|
|
|
|||
|
|
@ -464,7 +464,7 @@ In general, it's a good idea to make patterns more specific by specifying [field
|
|||
|
||||
#### Negated Fields
|
||||
|
||||
You can also constrain a pattern so that it only mathces nodes that *lack* a certain field. To do this, add a field name prefixed by a `!` within the parent pattern. For example, this pattern would match a class declaration with no type parameters:
|
||||
You can also constrain a pattern so that it only matches nodes that *lack* a certain field. To do this, add a field name prefixed by a `!` within the parent pattern. For example, this pattern would match a class declaration with no type parameters:
|
||||
|
||||
```
|
||||
(class_declaration
|
||||
|
|
@ -586,8 +586,10 @@ This pattern would match a set of possible keyword tokens, capturing them as `@k
|
|||
|
||||
#### Wildcard Node
|
||||
|
||||
A wildcard node is represented with an underscore (`(_)`), it matches any node.
|
||||
A wildcard node is represented with an underscore (`_`), it matches any node.
|
||||
This is similar to `.` in regular expressions.
|
||||
There are two types, `(_)` will match any named node,
|
||||
and `_` will match any named or anonymous node.
|
||||
|
||||
For example, this pattern would match any node inside a call:
|
||||
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ tree-sitter parse example-file
|
|||
This should print the following:
|
||||
|
||||
```
|
||||
(source_file [1, 0] - [1, 5])
|
||||
(source_file [0, 0] - [1, 0])
|
||||
```
|
||||
|
||||
You now have a working parser.
|
||||
|
|
@ -95,7 +95,7 @@ Let's go over all of the functionality of the `tree-sitter` command line tool.
|
|||
|
||||
### Command: `generate`
|
||||
|
||||
The most important command you'll use is `tree-sitter generate`. This command reads the `grammar.js` file in your current working directory and creates a file called `src/parser.c`, which implements the parser. After making changes to your grammar, just run `tree-sitter` generate again.
|
||||
The most important command you'll use is `tree-sitter generate`. This command reads the `grammar.js` file in your current working directory and creates a file called `src/parser.c`, which implements the parser. After making changes to your grammar, just run `tree-sitter generate` again.
|
||||
|
||||
The first time you run `tree-sitter generate`, it will also generate a few other files:
|
||||
|
||||
|
|
@ -674,7 +674,7 @@ This function is responsible for recognizing external tokens. It should return `
|
|||
* **`TSSymbol result_symbol`** - The symbol that was recognized. Your scan function should *assign* to this field one of the values from the `TokenType` enum, described above.
|
||||
* **`void (*advance)(TSLexer *, bool skip)`** - A function for advancing to the next character. If you pass `true` for the second argument, the current character will be treated as whitespace.
|
||||
* **`void (*mark_end)(TSLexer *)`** - A function for marking the end of the recognized token. This allows matching tokens that require multiple characters of lookahead. By default (if you don't call `mark_end`), any character that you moved past using the `advance` function will be included in the size of the token. But once you call `mark_end`, then any later calls to `advance` will *not* increase the size of the returned token. You can call `mark_end` multiple times to increase the size of the token.
|
||||
* **`uint32_t (*get_column)(TSLexer *)`** - **(Experimental)** A function for querying the current column position of the lexer. It returns the number of unicode code points (not bytes) since the start of the current line.
|
||||
* **`uint32_t (*get_column)(TSLexer *)`** - A function for querying the current column position of the lexer. It returns the number of bytes (not characters) since the start of the current line.
|
||||
* **`bool (*is_at_included_range_start)(TSLexer *)`** - A function for checking if the parser has just skipped some characters in the document. When parsing an embedded document using the `ts_parser_set_included_ranges` function (described in the [multi-language document section][multi-language-section]), your scanner may want to apply some special behavior when moving to a disjoint part of the document. For example, in [EJS documents][ejs], the JavaScript parser uses this function to enable inserting automatic semicolon tokens in between the code directives, delimited by `<%` and `%>`.
|
||||
|
||||
The third argument to the `scan` function is an array of booleans that indicates which of your external tokens are currently expected by the parser. You should only look for a given token if it is valid according to this array. At the same time, you cannot backtrack, so you may need to combine certain pieces of logic.
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ git clone https://github.com/tree-sitter/tree-sitter
|
|||
cd tree-sitter
|
||||
```
|
||||
|
||||
Optionally, build the WASM library. If you skip this step, then the `tree-sitter web-ui` command will require an internet connection. If you have emscripten installed, this will use your `emcc` compiler. Otherwise, it will use Docker:
|
||||
Optionally, build the WASM library. If you skip this step, then the `tree-sitter playground` command will require an internet connection. If you have emscripten installed, this will use your `emcc` compiler. Otherwise, it will use Docker:
|
||||
|
||||
```sh
|
||||
./script/build-wasm
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ description = "Library for performing syntax highlighting with Tree-sitter"
|
|||
version = "0.20.0"
|
||||
authors = [
|
||||
"Max Brunsfeld <maxbrunsfeld@gmail.com>",
|
||||
"Tim Clem <timothy.clem@gmail.com>"
|
||||
"Tim Clem <timothy.clem@gmail.com>",
|
||||
]
|
||||
license = "MIT"
|
||||
readme = "README.md"
|
||||
|
|
@ -21,5 +21,5 @@ regex = "1"
|
|||
thiserror = "1.0"
|
||||
|
||||
[dependencies.tree-sitter]
|
||||
version = ">= 0.3.7"
|
||||
version = "0.20"
|
||||
path = "../lib"
|
||||
|
|
|
|||
|
|
@ -586,7 +586,7 @@ where
|
|||
break;
|
||||
}
|
||||
if i > 0 {
|
||||
&self.layers[0..(i + 1)].rotate_left(1);
|
||||
self.layers[0..(i + 1)].rotate_left(1);
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -133,6 +133,7 @@ pub const TSQueryError_TSQueryErrorNodeType: TSQueryError = 2;
|
|||
pub const TSQueryError_TSQueryErrorField: TSQueryError = 3;
|
||||
pub const TSQueryError_TSQueryErrorCapture: TSQueryError = 4;
|
||||
pub const TSQueryError_TSQueryErrorStructure: TSQueryError = 5;
|
||||
pub const TSQueryError_TSQueryErrorLanguage: TSQueryError = 6;
|
||||
pub type TSQueryError = u32;
|
||||
extern "C" {
|
||||
#[doc = " Create a new parser."]
|
||||
|
|
|
|||
|
|
@ -202,6 +202,7 @@ pub enum QueryErrorKind {
|
|||
Capture,
|
||||
Predicate,
|
||||
Structure,
|
||||
Language,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
|
|
@ -629,7 +630,7 @@ impl Parser {
|
|||
/// If a pointer is assigned, then the parser will periodically read from
|
||||
/// this pointer during parsing. If it reads a non-zero value, it will halt early,
|
||||
/// returning `None`. See [parse](Parser::parse) for more information.
|
||||
pub unsafe fn set_cancellation_flag(&self, flag: Option<&AtomicUsize>) {
|
||||
pub unsafe fn set_cancellation_flag(&mut self, flag: Option<&AtomicUsize>) {
|
||||
if let Some(flag) = flag {
|
||||
ffi::ts_parser_set_cancellation_flag(
|
||||
self.0.as_ptr(),
|
||||
|
|
@ -1231,6 +1232,19 @@ impl Query {
|
|||
|
||||
// On failure, build an error based on the error code and offset.
|
||||
if ptr.is_null() {
|
||||
if error_type == ffi::TSQueryError_TSQueryErrorLanguage {
|
||||
return Err(QueryError {
|
||||
row: 0,
|
||||
column: 0,
|
||||
offset: 0,
|
||||
message: LanguageError {
|
||||
version: language.version(),
|
||||
}
|
||||
.to_string(),
|
||||
kind: QueryErrorKind::Language,
|
||||
});
|
||||
}
|
||||
|
||||
let offset = error_offset as usize;
|
||||
let mut line_start = 0;
|
||||
let mut row = 0;
|
||||
|
|
@ -1739,6 +1753,10 @@ impl QueryCursor {
|
|||
}
|
||||
|
||||
impl<'a, 'tree> QueryMatch<'a, 'tree> {
|
||||
pub fn id(&self) -> u32 {
|
||||
self.id
|
||||
}
|
||||
|
||||
pub fn remove(self) {
|
||||
unsafe { ffi::ts_query_cursor_remove_match(self.cursor, self.id) }
|
||||
}
|
||||
|
|
@ -1803,21 +1821,36 @@ impl<'a, 'tree> QueryMatch<'a, 'tree> {
|
|||
.iter()
|
||||
.all(|predicate| match predicate {
|
||||
TextPredicate::CaptureEqCapture(i, j, is_positive) => {
|
||||
let node1 = self.nodes_for_capture_index(*i).next().unwrap();
|
||||
let node2 = self.nodes_for_capture_index(*j).next().unwrap();
|
||||
let text1 = get_text(buffer1, text_provider.text(node1));
|
||||
let text2 = get_text(buffer2, text_provider.text(node2));
|
||||
(text1 == text2) == *is_positive
|
||||
let node1 = self.nodes_for_capture_index(*i).next();
|
||||
let node2 = self.nodes_for_capture_index(*j).next();
|
||||
match (node1, node2) {
|
||||
(Some(node1), Some(node2)) => {
|
||||
let text1 = get_text(buffer1, text_provider.text(node1));
|
||||
let text2 = get_text(buffer2, text_provider.text(node2));
|
||||
(text1 == text2) == *is_positive
|
||||
}
|
||||
_ => true,
|
||||
}
|
||||
}
|
||||
TextPredicate::CaptureEqString(i, s, is_positive) => {
|
||||
let node = self.nodes_for_capture_index(*i).next().unwrap();
|
||||
let text = get_text(buffer1, text_provider.text(node));
|
||||
(text == s.as_bytes()) == *is_positive
|
||||
let node = self.nodes_for_capture_index(*i).next();
|
||||
match node {
|
||||
Some(node) => {
|
||||
let text = get_text(buffer1, text_provider.text(node));
|
||||
(text == s.as_bytes()) == *is_positive
|
||||
}
|
||||
None => true,
|
||||
}
|
||||
}
|
||||
TextPredicate::CaptureMatchString(i, r, is_positive) => {
|
||||
let node = self.nodes_for_capture_index(*i).next().unwrap();
|
||||
let text = get_text(buffer1, text_provider.text(node));
|
||||
r.is_match(text) == *is_positive
|
||||
let node = self.nodes_for_capture_index(*i).next();
|
||||
match node {
|
||||
Some(node) => {
|
||||
let text = get_text(buffer1, text_provider.text(node));
|
||||
r.is_match(text) == *is_positive
|
||||
}
|
||||
None => true,
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
@ -2105,21 +2138,27 @@ impl fmt::Display for LanguageError {
|
|||
|
||||
impl fmt::Display for QueryError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"Query error at {}:{}. {}{}",
|
||||
self.row + 1,
|
||||
self.column + 1,
|
||||
match self.kind {
|
||||
QueryErrorKind::Field => "Invalid field name ",
|
||||
QueryErrorKind::NodeType => "Invalid node type ",
|
||||
QueryErrorKind::Capture => "Invalid capture name ",
|
||||
QueryErrorKind::Predicate => "Invalid predicate: ",
|
||||
QueryErrorKind::Structure => "Impossible pattern:\n",
|
||||
QueryErrorKind::Syntax => "Invalid syntax:\n",
|
||||
},
|
||||
self.message
|
||||
)
|
||||
let msg = match self.kind {
|
||||
QueryErrorKind::Field => "Invalid field name ",
|
||||
QueryErrorKind::NodeType => "Invalid node type ",
|
||||
QueryErrorKind::Capture => "Invalid capture name ",
|
||||
QueryErrorKind::Predicate => "Invalid predicate: ",
|
||||
QueryErrorKind::Structure => "Impossible pattern:\n",
|
||||
QueryErrorKind::Syntax => "Invalid syntax:\n",
|
||||
QueryErrorKind::Language => "",
|
||||
};
|
||||
if msg.len() > 0 {
|
||||
write!(
|
||||
f,
|
||||
"Query error at {}:{}. {}{}",
|
||||
self.row + 1,
|
||||
self.column + 1,
|
||||
msg,
|
||||
self.message
|
||||
)
|
||||
} else {
|
||||
write!(f, "{}", self.message)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
1
lib/binding_web/.gitignore
vendored
1
lib/binding_web/.gitignore
vendored
|
|
@ -3,3 +3,4 @@
|
|||
package-lock.json
|
||||
node_modules
|
||||
*.tgz
|
||||
LICENSE
|
||||
|
|
|
|||
|
|
@ -17,24 +17,15 @@ var MIN_COMPATIBLE_VERSION;
|
|||
var TRANSFER_BUFFER;
|
||||
var currentParseCallback;
|
||||
var currentLogCallback;
|
||||
var initPromise = new Promise(resolve => {
|
||||
Module.onRuntimeInitialized = resolve
|
||||
}).then(() => {
|
||||
TRANSFER_BUFFER = C._ts_init();
|
||||
VERSION = getValue(TRANSFER_BUFFER, 'i32');
|
||||
MIN_COMPATIBLE_VERSION = getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
|
||||
});
|
||||
|
||||
class Parser {
|
||||
class ParserImpl {
|
||||
static init() {
|
||||
return initPromise;
|
||||
TRANSFER_BUFFER = C._ts_init();
|
||||
VERSION = getValue(TRANSFER_BUFFER, 'i32');
|
||||
MIN_COMPATIBLE_VERSION = getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
|
||||
}
|
||||
|
||||
constructor() {
|
||||
if (TRANSFER_BUFFER == null) {
|
||||
throw new Error('You must first call Parser.init() and wait for it to resolve.');
|
||||
}
|
||||
|
||||
initialize() {
|
||||
C._ts_parser_new_wasm();
|
||||
this[0] = getValue(TRANSFER_BUFFER, 'i32');
|
||||
this[1] = getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
|
||||
|
|
@ -794,6 +785,7 @@ class Language {
|
|||
if (c.name === captureName1) node1 = c.node;
|
||||
if (c.name === captureName2) node2 = c.node;
|
||||
}
|
||||
if(node1 === undefined || node2 === undefined) return true;
|
||||
return (node1.text === node2.text) === isPositive;
|
||||
});
|
||||
} else {
|
||||
|
|
@ -805,7 +797,7 @@ class Language {
|
|||
return (c.node.text === stringValue) === isPositive;
|
||||
};
|
||||
}
|
||||
return false;
|
||||
return true;
|
||||
});
|
||||
}
|
||||
break;
|
||||
|
|
@ -828,7 +820,7 @@ class Language {
|
|||
for (const c of captures) {
|
||||
if (c.name === captureName) return regex.test(c.node.text) === isPositive;
|
||||
}
|
||||
return false;
|
||||
return true;
|
||||
});
|
||||
break;
|
||||
|
||||
|
|
@ -1203,6 +1195,3 @@ function marshalEdit(edit) {
|
|||
setValue(address, edit.oldEndIndex, 'i32'); address += SIZE_OF_INT;
|
||||
setValue(address, edit.newEndIndex, 'i32'); address += SIZE_OF_INT;
|
||||
}
|
||||
|
||||
Parser.Language = Language;
|
||||
Parser.Parser = Parser;
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@
|
|||
"_memchr",
|
||||
"_memcmp",
|
||||
"_memcpy",
|
||||
"_memmove",
|
||||
"_strlen",
|
||||
"_towupper",
|
||||
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@
|
|||
},
|
||||
"scripts": {
|
||||
"test": "mocha",
|
||||
"prepack": "cp ../../LICENSE .",
|
||||
"prepublishOnly": "node check-artifacts-fresh.js"
|
||||
},
|
||||
"repository": {
|
||||
|
|
|
|||
|
|
@ -1,9 +1,15 @@
|
|||
(function (root, factory) {
|
||||
if (typeof define === 'function' && define.amd) {
|
||||
define([], factory);
|
||||
} else if (typeof exports === 'object') {
|
||||
module.exports = factory();
|
||||
} else {
|
||||
window.TreeSitter = factory();
|
||||
}
|
||||
}(this, function () {
|
||||
var TreeSitter = function() {
|
||||
var initPromise;
|
||||
class Parser {
|
||||
constructor() {
|
||||
this.initialize();
|
||||
}
|
||||
|
||||
initialize() {
|
||||
throw new Error("cannot construct a Parser before calling `init()`");
|
||||
}
|
||||
|
||||
static init(moduleOptions) {
|
||||
if (initPromise) return initPromise;
|
||||
Module = Object.assign({ }, Module, moduleOptions);
|
||||
return initPromise = new Promise((resolveInitPromise) => {
|
||||
|
|
|
|||
|
|
@ -1,2 +1,23 @@
|
|||
return Parser;
|
||||
}));
|
||||
for (const name of Object.getOwnPropertyNames(ParserImpl.prototype)) {
|
||||
Object.defineProperty(Parser.prototype, name, {
|
||||
value: ParserImpl.prototype[name],
|
||||
enumerable: false,
|
||||
writable: false,
|
||||
})
|
||||
}
|
||||
|
||||
Parser.Language = Language;
|
||||
Module.onRuntimeInitialized = () => {
|
||||
ParserImpl.init();
|
||||
resolveInitPromise();
|
||||
};
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return Parser;
|
||||
}();
|
||||
|
||||
if (typeof exports === 'object') {
|
||||
module.exports = TreeSitter;
|
||||
}
|
||||
20
lib/binding_web/tree-sitter-web.d.ts
vendored
20
lib/binding_web/tree-sitter-web.d.ts
vendored
|
|
@ -1,12 +1,19 @@
|
|||
declare module 'web-tree-sitter' {
|
||||
class Parser {
|
||||
static init(): Promise<void>;
|
||||
/**
|
||||
*
|
||||
* @param moduleOptions Optional emscripten module-object, see https://emscripten.org/docs/api_reference/module.html
|
||||
*/
|
||||
static init(moduleOptions?: object): Promise<void>;
|
||||
delete(): void;
|
||||
parse(input: string | Parser.Input, previousTree?: Parser.Tree, options?: Parser.Options): Parser.Tree;
|
||||
getLanguage(): any;
|
||||
setLanguage(language: any): void;
|
||||
reset(): void;
|
||||
getLanguage(): Parser.Language;
|
||||
setLanguage(language?: Parser.Language | undefined | null): void;
|
||||
getLogger(): Parser.Logger;
|
||||
setLogger(logFunc: Parser.Logger): void;
|
||||
setLogger(logFunc?: Parser.Logger | undefined | null): void;
|
||||
setTimeoutMicros(value: number): void;
|
||||
getTimeoutMicros(): number;
|
||||
}
|
||||
|
||||
namespace Parser {
|
||||
|
|
@ -96,8 +103,11 @@ declare module 'web-tree-sitter' {
|
|||
|
||||
export interface TreeCursor {
|
||||
nodeType: string;
|
||||
nodeTypeId: number;
|
||||
nodeText: string;
|
||||
nodeId: number;
|
||||
nodeIsNamed: boolean;
|
||||
nodeIsMissing: boolean;
|
||||
startPosition: Point;
|
||||
endPosition: Point;
|
||||
startIndex: number;
|
||||
|
|
@ -123,7 +133,7 @@ declare module 'web-tree-sitter' {
|
|||
walk(): TreeCursor;
|
||||
getChangedRanges(other: Tree): Range[];
|
||||
getEditedRange(other: Tree): Range;
|
||||
getLanguage(): any;
|
||||
getLanguage(): Language;
|
||||
}
|
||||
|
||||
class Language {
|
||||
|
|
|
|||
|
|
@ -131,6 +131,7 @@ typedef enum {
|
|||
TSQueryErrorField,
|
||||
TSQueryErrorCapture,
|
||||
TSQueryErrorStructure,
|
||||
TSQueryErrorLanguage,
|
||||
} TSQueryError;
|
||||
|
||||
/********************/
|
||||
|
|
@ -618,7 +619,7 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *);
|
|||
const char *ts_tree_cursor_current_field_name(const TSTreeCursor *);
|
||||
|
||||
/**
|
||||
* Get the field name of the tree cursor's current node.
|
||||
* Get the field id of the tree cursor's current node.
|
||||
*
|
||||
* This returns zero if the current node doesn't have a field.
|
||||
* See also `ts_node_child_by_field_id`, `ts_language_field_id_for_name`.
|
||||
|
|
|
|||
|
|
@ -417,7 +417,7 @@ static Subtree ts_parser__lex(
|
|||
LOG(
|
||||
"lex_external state:%d, row:%u, column:%u",
|
||||
lex_mode.external_lex_state,
|
||||
current_position.extent.row + 1,
|
||||
current_position.extent.row,
|
||||
current_position.extent.column
|
||||
);
|
||||
ts_lexer_start(&self->lexer);
|
||||
|
|
@ -456,7 +456,7 @@ static Subtree ts_parser__lex(
|
|||
LOG(
|
||||
"lex_internal state:%d, row:%u, column:%u",
|
||||
lex_mode.lex_state,
|
||||
current_position.extent.row + 1,
|
||||
current_position.extent.row,
|
||||
current_position.extent.column
|
||||
);
|
||||
ts_lexer_start(&self->lexer);
|
||||
|
|
@ -1884,7 +1884,7 @@ TSTree *ts_parser_parse(
|
|||
LOG("process version:%d, version_count:%u, state:%d, row:%u, col:%u",
|
||||
version, ts_stack_version_count(self->stack),
|
||||
ts_stack_state(self->stack, version),
|
||||
ts_stack_position(self->stack, version).extent.row + 1,
|
||||
ts_stack_position(self->stack, version).extent.row,
|
||||
ts_stack_position(self->stack, version).extent.column);
|
||||
|
||||
if (!ts_parser__advance(self, version, allow_node_reuse)) return NULL;
|
||||
|
|
|
|||
|
|
@ -2069,6 +2069,15 @@ TSQuery *ts_query_new(
|
|||
uint32_t *error_offset,
|
||||
TSQueryError *error_type
|
||||
) {
|
||||
if (
|
||||
!language ||
|
||||
language->version > TREE_SITTER_LANGUAGE_VERSION ||
|
||||
language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
|
||||
) {
|
||||
*error_type = TSQueryErrorLanguage;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
TSQuery *self = ts_malloc(sizeof(TSQuery));
|
||||
*self = (TSQuery) {
|
||||
.steps = array_new(),
|
||||
|
|
@ -2552,6 +2561,7 @@ static void ts_query_cursor__add_state(
|
|||
pattern->step_index
|
||||
);
|
||||
array_insert(&self->states, index, ((QueryState) {
|
||||
.id = UINT32_MAX,
|
||||
.capture_list_id = NONE,
|
||||
.step_index = pattern->step_index,
|
||||
.pattern_index = pattern->pattern_index,
|
||||
|
|
@ -2716,7 +2726,6 @@ static inline bool ts_query_cursor__advance(
|
|||
if (step->depth == PATTERN_DONE_MARKER) {
|
||||
if (state->start_depth > self->depth || self->halted) {
|
||||
LOG(" finish pattern %u\n", state->pattern_index);
|
||||
state->id = self->next_state_id++;
|
||||
array_push(&self->finished_states, *state);
|
||||
did_match = true;
|
||||
deleted_count++;
|
||||
|
|
@ -3105,7 +3114,6 @@ static inline bool ts_query_cursor__advance(
|
|||
LOG(" defer finishing pattern %u\n", state->pattern_index);
|
||||
} else {
|
||||
LOG(" finish pattern %u\n", state->pattern_index);
|
||||
state->id = self->next_state_id++;
|
||||
array_push(&self->finished_states, *state);
|
||||
array_erase(&self->states, state - self->states.contents);
|
||||
did_match = true;
|
||||
|
|
@ -3160,6 +3168,7 @@ bool ts_query_cursor_next_match(
|
|||
}
|
||||
|
||||
QueryState *state = &self->finished_states.contents[0];
|
||||
if (state->id == UINT32_MAX) state->id = self->next_state_id++;
|
||||
match->id = state->id;
|
||||
match->pattern_index = state->pattern_index;
|
||||
const CaptureList *captures = capture_list_pool_get(
|
||||
|
|
@ -3269,6 +3278,7 @@ bool ts_query_cursor_next_capture(
|
|||
}
|
||||
|
||||
if (state) {
|
||||
if (state->id == UINT32_MAX) state->id = self->next_state_id++;
|
||||
match->id = state->id;
|
||||
match->pattern_index = state->pattern_index;
|
||||
const CaptureList *captures = capture_list_pool_get(
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ web_dir=lib/binding_web
|
|||
emscripten_flags="-O3"
|
||||
minify_js=1
|
||||
force_docker=0
|
||||
emscripen_version=$(cat "$(dirname "$0")"/../emscripten-version)
|
||||
emscripen_version=$(cat "$(dirname "$0")"/../cli/emscripten-version)
|
||||
|
||||
while [[ $# > 0 ]]; do
|
||||
case "$1" in
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
set -e
|
||||
|
||||
EMSCRIPTEN_VERSION=$(cat "$(dirname "$0")/../emscripten-version")
|
||||
EMSCRIPTEN_VERSION=$(cat "$(dirname "$0")/../cli/emscripten-version")
|
||||
|
||||
mkdir -p target
|
||||
EMSDK_DIR="./target/emsdk"
|
||||
|
|
|
|||
|
|
@ -4,10 +4,14 @@
|
|||
|
||||
const CATEGORY_OUTPUT_PATH = './cli/src/generate/prepare_grammar/unicode-categories.json'
|
||||
const PROPERTY_OUTPUT_PATH = './cli/src/generate/prepare_grammar/unicode-properties.json'
|
||||
const CATEGORY_ALIAS_OUTPUT_PATH = './cli/src/generate/prepare_grammar/unicode-category-aliases.json'
|
||||
const PROPERTY_ALIAS_OUTPUT_PATH = './cli/src/generate/prepare_grammar/unicode-property-aliases.json'
|
||||
|
||||
const CATEGORY_URL = 'https://unicode.org/Public/13.0.0/ucd/UnicodeData.txt'
|
||||
const PROPERTY_URL = 'https://unicode.org/Public/13.0.0/ucd/PropList.txt'
|
||||
const DERIVED_PROPERTY_URL = 'https://unicode.org/Public/13.0.0/ucd/DerivedCoreProperties.txt'
|
||||
const CATEGORY_ALIAS_URL = 'https://unicode.org/Public/13.0.0/ucd/PropertyValueAliases.txt'
|
||||
const PROPERTY_ALIAS_URL = 'https://unicode.org/Public/13.0.0/ucd/PropertyAliases.txt'
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
|
@ -16,7 +20,9 @@ const {spawnSync} = require('child_process');
|
|||
// Download the unicode data files, caching them inside the 'target' directory.
|
||||
const categoryData = cachedDownload(CATEGORY_URL);
|
||||
const propertyData = cachedDownload(PROPERTY_URL);
|
||||
const derivedPopertyData = cachedDownload(DERIVED_PROPERTY_URL);
|
||||
const derivedPropertyData = cachedDownload(DERIVED_PROPERTY_URL);
|
||||
const categoryAliasData = cachedDownload(CATEGORY_ALIAS_URL);
|
||||
const propertyAliasData = cachedDownload(PROPERTY_ALIAS_URL);
|
||||
function cachedDownload(url) {
|
||||
let downloadPath = path.join('.', 'target', path.basename(url))
|
||||
if (fs.existsSync(downloadPath)) {
|
||||
|
|
@ -30,10 +36,12 @@ function cachedDownload(url) {
|
|||
|
||||
const categories = {};
|
||||
const properties = {};
|
||||
const categoryAliases = {};
|
||||
const propertyAliases = {}
|
||||
let data, row, lineStart, lineEnd;
|
||||
|
||||
// Parse the properties
|
||||
data = propertyData + derivedPopertyData;
|
||||
data = propertyData + derivedPropertyData;
|
||||
row = 0;
|
||||
lineStart = 0;
|
||||
lineEnd = -1;
|
||||
|
|
@ -106,7 +114,7 @@ while (lineStart < data.length) {
|
|||
if (
|
||||
nameStart === 0 ||
|
||||
categoryStart == 0 ||
|
||||
categoryEnd === 0
|
||||
categoryEnd === -1
|
||||
) {
|
||||
throw new Error(`Unexpected format on line ${row}`);
|
||||
}
|
||||
|
|
@ -124,5 +132,110 @@ while (lineStart < data.length) {
|
|||
categories[category].push(codePoint);
|
||||
}
|
||||
|
||||
// Parse the category aliases
|
||||
data = categoryAliasData;
|
||||
row = 0;
|
||||
lineStart = 0;
|
||||
lineEnd = -1;
|
||||
const IGNORE = /[#\s]/
|
||||
while (lineStart < data.length) {
|
||||
row++;
|
||||
lineStart = lineEnd + 1;
|
||||
lineEnd = data.indexOf('\n', lineStart);
|
||||
if (lineEnd === -1) break;
|
||||
|
||||
// Skip over blank and comment lines
|
||||
if (IGNORE.test(data[lineStart])) continue;
|
||||
|
||||
// Parse the first three semicolon-separated fields:
|
||||
// * property value type
|
||||
// * short name
|
||||
// * long name
|
||||
// Other aliases may be listed in additional fields
|
||||
const propertyValueTypeEnd = data.indexOf(';', lineStart);
|
||||
const shortNameStart = propertyValueTypeEnd + 1;
|
||||
const shortNameEnd = data.indexOf(';', shortNameStart);
|
||||
const longNameStart = shortNameEnd + 1;
|
||||
if (
|
||||
shortNameStart === 0 ||
|
||||
longNameStart === 0
|
||||
) {
|
||||
throw new Error(`Unexpected format on line ${row}`);
|
||||
}
|
||||
|
||||
const propertyValueType = data.slice(lineStart, propertyValueTypeEnd).trim();
|
||||
const shortName = data.slice(shortNameStart, shortNameEnd).trim();
|
||||
|
||||
// Filter for General_Category lines
|
||||
if (propertyValueType !== 'gc') continue;
|
||||
|
||||
let aliasStart = longNameStart;
|
||||
let lineDone = false;
|
||||
do {
|
||||
let aliasEnd = data.indexOf(';', aliasStart);
|
||||
if (aliasEnd === -1 || aliasEnd > lineEnd) {
|
||||
aliasEnd = data.indexOf('#', aliasStart);
|
||||
if (aliasEnd === -1 || aliasEnd > lineEnd) {
|
||||
aliasEnd = lineEnd;
|
||||
}
|
||||
lineDone = true;
|
||||
}
|
||||
const alias = data.slice(aliasStart, aliasEnd).trim();
|
||||
console.log(alias, shortName);
|
||||
categoryAliases[alias] = shortName;
|
||||
aliasStart = aliasEnd + 1;
|
||||
} while (!lineDone);
|
||||
}
|
||||
|
||||
// Parse the property aliases
|
||||
data = propertyAliasData;
|
||||
row = 0;
|
||||
lineStart = 0;
|
||||
lineEnd = -1;
|
||||
while (lineStart < data.length) {
|
||||
row++;
|
||||
lineStart = lineEnd + 1;
|
||||
lineEnd = data.indexOf('\n', lineStart);
|
||||
if (lineEnd === -1) break;
|
||||
|
||||
// Skip over blank and comment lines
|
||||
if (IGNORE.test(data[lineStart])) continue;
|
||||
|
||||
// Parse the first two semicolon fields:
|
||||
// * short name
|
||||
// * long name
|
||||
const shortNameEnd = data.indexOf(';', lineStart);
|
||||
const longNameStart = shortNameEnd + 1;
|
||||
|
||||
if (longNameStart == 0) {
|
||||
throw new Error(`Unexpected format on line ${row}`);
|
||||
}
|
||||
|
||||
let alias = data.slice(lineStart, shortNameEnd).trim();
|
||||
let longName = null;
|
||||
let nameStart = longNameStart;
|
||||
let lineDone = false;
|
||||
do {
|
||||
let nameEnd = data.indexOf(';', nameStart);
|
||||
if (nameEnd === -1 || nameEnd > lineEnd) {
|
||||
nameEnd = data.indexOf('#', nameStart);
|
||||
if (nameEnd === -1 || nameEnd > lineEnd) {
|
||||
nameEnd = lineEnd;
|
||||
}
|
||||
lineDone = true;
|
||||
}
|
||||
if (longName == null) {
|
||||
longName = data.slice(nameStart, nameEnd).trim();
|
||||
} else {
|
||||
alias = data.slice(nameStart, nameEnd).trim();
|
||||
}
|
||||
console.log(alias, longName);
|
||||
propertyAliases[alias] = longName;
|
||||
nameStart = nameEnd + 1;
|
||||
} while (!lineDone);
|
||||
}
|
||||
|
||||
fs.writeFileSync(CATEGORY_OUTPUT_PATH, JSON.stringify(categories), 'utf8');
|
||||
fs.writeFileSync(PROPERTY_OUTPUT_PATH, JSON.stringify(properties), 'utf8');
|
||||
fs.writeFileSync(CATEGORY_ALIAS_OUTPUT_PATH, JSON.stringify(categoryAliases), 'utf8');
|
||||
fs.writeFileSync(PROPERTY_ALIAS_OUTPUT_PATH, JSON.stringify(propertyAliases), 'utf8');
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ description = "Library for extracting tag information"
|
|||
version = "0.20.0"
|
||||
authors = [
|
||||
"Max Brunsfeld <maxbrunsfeld@gmail.com>",
|
||||
"Patrick Thomson <patrickt@github.com>"
|
||||
"Patrick Thomson <patrickt@github.com>",
|
||||
]
|
||||
license = "MIT"
|
||||
readme = "README.md"
|
||||
|
|
@ -22,5 +22,5 @@ memchr = "2.3"
|
|||
thiserror = "1.0"
|
||||
|
||||
[dependencies.tree-sitter]
|
||||
version = ">= 0.17.0"
|
||||
version = "0.20"
|
||||
path = "../lib"
|
||||
|
|
|
|||
|
|
@ -30,3 +30,14 @@ Math symbols
|
|||
|
||||
(program
|
||||
(math_sym) (math_sym) (math_sym) (math_sym) (math_sym))
|
||||
|
||||
================================
|
||||
Letterlike numeric characters
|
||||
================================
|
||||
|
||||
ᛯ Ⅵ 〩
|
||||
|
||||
---
|
||||
|
||||
(program
|
||||
(letter_number) (letter_number) (letter_number))
|
||||
|
|
|
|||
|
|
@ -13,7 +13,8 @@
|
|||
"members": [
|
||||
{"type": "SYMBOL", "name": "lower"},
|
||||
{"type": "SYMBOL", "name": "upper"},
|
||||
{"type": "SYMBOL", "name": "math_sym"}
|
||||
{"type": "SYMBOL", "name": "math_sym"},
|
||||
{"type": "SYMBOL", "name": "letter_number"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
|
@ -31,6 +32,11 @@
|
|||
"math_sym": {
|
||||
"type": "PATTERN",
|
||||
"value": "\\p{Sm}+"
|
||||
},
|
||||
|
||||
"letter_number": {
|
||||
"type": "PATTERN",
|
||||
"value": "\\p{Letter_Number}"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue