Merge branch 'master' into m-novikov-add-parsers

This commit is contained in:
Max Brunsfeld 2021-09-24 09:04:30 -07:00 committed by GitHub
commit e7dcd2b7c4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
57 changed files with 822 additions and 353 deletions

View file

@ -36,7 +36,7 @@ jobs:
- name: Read Emscripten version
run: |
printf 'EMSCRIPTEN_VERSION=%s\n' "$(cat emscripten-version)" >> $GITHUB_ENV
printf 'EMSCRIPTEN_VERSION=%s\n' "$(cat cli/emscripten-version)" >> $GITHUB_ENV
- name: Cache artifacts
id: cache

1
.gitignore vendored
View file

@ -2,6 +2,7 @@ log*.html
.idea
*.xcodeproj
.vscode
fuzz-results

12
Cargo.lock generated
View file

@ -495,6 +495,12 @@ dependencies = [
"crossbeam-utils",
]
[[package]]
name = "rustc-hash"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "ryu"
version = "1.0.5"
@ -541,9 +547,9 @@ dependencies = [
[[package]]
name = "smallbitvec"
version = "2.5.0"
version = "2.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "797a4eaffb90d896f29698d45676f9f940a71936d7574996a7df54593ba209fa"
checksum = "75ce4f9dc4a41b4c3476cc925f1efb11b66df373a8fde5d4b8915fa91b5d995e"
[[package]]
name = "spin"
@ -689,11 +695,13 @@ dependencies = [
"dirs",
"glob",
"html-escape",
"indexmap",
"lazy_static",
"log",
"rand",
"regex",
"regex-syntax",
"rustc-hash",
"serde",
"serde_derive",
"serde_json",

View file

@ -1,6 +1,6 @@
The MIT License (MIT)
Copyright (c) 2018 Max Brunsfeld
Copyright (c) 2018-2021 Max Brunsfeld
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View file

@ -27,40 +27,42 @@ difference = "2.0"
dirs = "3.0"
glob = "0.3.0"
html-escape = "0.2.6"
indexmap = "1"
lazy_static = "1.2.0"
regex = "1"
regex-syntax = "0.6.4"
rustc-hash = "1"
serde = "1.0"
serde_derive = "1.0"
smallbitvec = "2.3.0"
smallbitvec = "2.5.1"
tiny_http = "0.8"
walkdir = "2.3"
webbrowser = "0.5.1"
which = "4.1.0"
[dependencies.tree-sitter]
version = ">= 0.17.0"
version = "0.20"
path = "../lib"
[dev-dependencies.tree-sitter]
version = ">= 0.17.0"
version = "0.20"
path = "../lib"
features = ["allocation-tracking"]
[dependencies.tree-sitter-config]
version = ">= 0.19.0"
version = "0.19.0"
path = "config"
[dependencies.tree-sitter-highlight]
version = ">= 0.3.0"
version = "0.20"
path = "../highlight"
[dependencies.tree-sitter-loader]
version = ">= 0.19.0"
version = "0.19.0"
path = "loader"
[dependencies.tree-sitter-tags]
version = ">= 0.1.0"
version = "0.20"
path = "../tags"
[dependencies.serde_json]

View file

@ -36,4 +36,4 @@ The `tree-sitter` binary itself has no dependencies, but specific commands have
* `test` - The `tree-sitter test` command will run the unit tests for the Tree-sitter parser in the current working directory. See [the documentation](http://tree-sitter.github.io/tree-sitter/creating-parsers) for more information.
* `parse` - The `tree-sitter parse` command will parse a file (or list of file) using Tree-sitter parsers.
* `parse` - The `tree-sitter parse` command will parse a file (or list of files) using Tree-sitter parsers.

View file

@ -6,7 +6,7 @@ fn main() {
println!("cargo:rustc-env={}={}", "BUILD_SHA", git_sha);
}
if wasm_files_present() {
if web_playground_files_present() {
println!("cargo:rustc-cfg={}", "TREE_SITTER_EMBED_WASM_BINDING");
}
@ -16,15 +16,16 @@ fn main() {
"RUST_BINDING_VERSION", rust_binding_version,
);
let emscripten_version = fs::read_to_string("../emscripten-version").unwrap();
let emscripten_version = fs::read_to_string("emscripten-version").unwrap();
println!(
"cargo:rustc-env={}={}",
"EMSCRIPTEN_VERSION", emscripten_version,
);
}
fn wasm_files_present() -> bool {
fn web_playground_files_present() -> bool {
let paths = [
"../docs/assets/js/playground.js",
"../lib/binding_web/tree-sitter.js",
"../lib/binding_web/tree-sitter.wasm",
];
@ -81,10 +82,10 @@ fn read_git_sha() -> Option<String> {
}
fn read_rust_binding_version() -> String {
let path = "../lib/Cargo.toml";
let path = "Cargo.toml";
let text = fs::read_to_string(path).unwrap();
let cargo_toml = toml::from_str::<toml::Value>(text.as_ref()).unwrap();
cargo_toml["package"]["version"]
cargo_toml["dependencies"]["tree-sitter"]["version"]
.as_str()
.unwrap()
.trim_matches('"')

View file

@ -1,6 +1,6 @@
//! Manages tree-sitter's configuration file.
use anyhow::{anyhow, Result};
use anyhow::{anyhow, Context, Result};
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::path::PathBuf;
@ -14,6 +14,7 @@ use std::{env, fs};
/// This type holds the generic JSON content of the configuration file. Individual tree-sitter
/// components will use the [`get`][] method to parse that JSON to extract configuration fields
/// that are specific to that component.
#[derive(Debug)]
pub struct Config {
pub location: PathBuf,
pub config: Value,
@ -64,8 +65,10 @@ impl Config {
Some(location) => location,
None => return Config::initial(),
};
let content = fs::read_to_string(&location)?;
let config = serde_json::from_str(&content)?;
let content = fs::read_to_string(&location)
.with_context(|| format!("Failed to read {}", &location.to_string_lossy()))?;
let config = serde_json::from_str(&content)
.with_context(|| format!("Bad JSON config {}", &location.to_string_lossy()))?;
Ok(Config { location, config })
}

View file

@ -25,13 +25,13 @@ version = "1.0"
features = ["preserve_order"]
[dependencies.tree-sitter]
version = ">= 0.19"
version = "0.20"
path = "../../lib"
[dependencies.tree-sitter-highlight]
version = ">= 0.19"
version = "0.20"
path = "../../highlight"
[dependencies.tree-sitter-tags]
version = ">= 0.19"
version = "0.20"
path = "../../tags"

View file

@ -12,7 +12,7 @@ use std::process::Command;
use std::sync::Mutex;
use std::time::SystemTime;
use std::{fs, mem};
use tree_sitter::{Language, QueryError};
use tree_sitter::{Language, QueryError, QueryErrorKind};
use tree_sitter_highlight::HighlightConfiguration;
use tree_sitter_tags::{Error as TagsError, TagsConfiguration};
@ -101,6 +101,7 @@ pub struct Loader {
language_configuration_ids_by_file_type: HashMap<String, Vec<usize>>,
highlight_names: Box<Mutex<Vec<String>>>,
use_all_highlight_names: bool,
debug_build: bool,
}
unsafe impl Send for Loader {}
@ -122,6 +123,7 @@ impl Loader {
language_configuration_ids_by_file_type: HashMap::new(),
highlight_names: Box::new(Mutex::new(Vec::new())),
use_all_highlight_names: true,
debug_build: false,
}
}
@ -347,7 +349,11 @@ impl Loader {
parser_path: &Path,
scanner_path: &Option<PathBuf>,
) -> Result<Language> {
let mut library_path = self.parser_lib_path.join(name);
let mut lib_name = name.to_string();
if self.debug_build {
lib_name.push_str(".debug._");
}
let mut library_path = self.parser_lib_path.join(lib_name);
library_path.set_extension(DYLIB_EXTENSION);
let recompile = needs_recompile(&library_path, &parser_path, &scanner_path)
@ -369,11 +375,13 @@ impl Loader {
}
if cfg!(windows) {
command
.args(&["/nologo", "/LD", "/I"])
.arg(header_path)
.arg("/Od")
.arg(parser_path);
command.args(&["/nologo", "/LD", "/I"]).arg(header_path);
if self.debug_build {
command.arg("/Od");
} else {
command.arg("/O2");
}
command.arg(parser_path);
if let Some(scanner_path) = scanner_path.as_ref() {
command.arg(scanner_path);
}
@ -389,8 +397,18 @@ impl Loader {
.arg("-I")
.arg(header_path)
.arg("-o")
.arg(&library_path)
.arg("-O2");
.arg(&library_path);
if self.debug_build {
command.arg("-O0");
} else {
command.arg("-O2");
}
// For conditional compilation of external scanner code when
// used internally by `tree-siteer parse` and other sub commands.
command.arg("-DTREE_SITTER_INTERNAL_BUILD");
if let Some(scanner_path) = scanner_path.as_ref() {
if scanner_path.extension() == Some("c".as_ref()) {
command.arg("-xc").arg("-std=c99").arg(scanner_path);
@ -639,6 +657,10 @@ impl Loader {
Err(anyhow!("No language found"))
}
}
pub fn use_debug_build(&mut self, flag: bool) {
self.debug_build = flag;
}
}
impl<'a> LanguageConfiguration<'a> {
@ -662,28 +684,31 @@ impl<'a> LanguageConfiguration<'a> {
&injections_query,
&locals_query,
)
.map_err(|error| {
if error.offset < injections_query.len() {
Self::include_path_in_query_error(
error,
&injection_ranges,
&injections_query,
0,
)
} else if error.offset < injections_query.len() + locals_query.len() {
Self::include_path_in_query_error(
error,
&locals_ranges,
&locals_query,
injections_query.len(),
)
} else {
Self::include_path_in_query_error(
error,
&highlight_ranges,
&highlights_query,
injections_query.len() + locals_query.len(),
)
.map_err(|error| match error.kind {
QueryErrorKind::Language => Error::from(error),
_ => {
if error.offset < injections_query.len() {
Self::include_path_in_query_error(
error,
&injection_ranges,
&injections_query,
0,
)
} else if error.offset < injections_query.len() + locals_query.len() {
Self::include_path_in_query_error(
error,
&locals_ranges,
&locals_query,
injections_query.len(),
)
} else {
Self::include_path_in_query_error(
error,
&highlight_ranges,
&highlights_query,
injections_query.len() + locals_query.len(),
)
}
}
})?;
let mut all_highlight_names = self.highlight_names.lock().unwrap();

1
cli/npm/.gitignore vendored
View file

@ -2,3 +2,4 @@ tree-sitter
tree-sitter.exe
*.gz
*.tgz
LICENSE

View file

@ -14,7 +14,8 @@
],
"main": "lib/api/index.js",
"scripts": {
"install": "node install.js"
"install": "node install.js",
"prepack": "cp ../../LICENSE ."
},
"bin": {
"tree-sitter": "cli.js"

View file

@ -347,7 +347,7 @@ fn lex_states_differ(
fn sort_states(table: &mut LexTable, parse_table: &mut ParseTable) {
// Get a mapping of old state index -> new_state_index
let mut old_ids_by_new_id = (0..table.states.len()).collect::<Vec<_>>();
&old_ids_by_new_id[1..].sort_by_key(|id| &table.states[*id]);
old_ids_by_new_id[1..].sort_by_key(|id| &table.states[*id]);
// Get the inverse mapping
let mut new_ids_by_old_id = vec![0; old_ids_by_new_id.len()];

View file

@ -11,10 +11,14 @@ use crate::generate::tables::{
ProductionInfo, ProductionInfoId,
};
use anyhow::{anyhow, Result};
use std::cmp::Ordering;
use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
use std::fmt::Write;
use std::hash::BuildHasherDefault;
use std::u32;
use std::{cmp::Ordering, collections::hash_map::Entry};
use indexmap::{map::Entry, IndexMap};
use rustc_hash::FxHasher;
// For conflict reporting, each parse state is associated with an example
// sequence of symbols that could lead to that parse state.
@ -49,7 +53,7 @@ struct ParseTableBuilder<'a> {
lexical_grammar: &'a LexicalGrammar,
variable_info: &'a Vec<VariableInfo>,
core_ids_by_core: HashMap<ParseItemSetCore<'a>, usize>,
state_ids_by_item_set: HashMap<ParseItemSet<'a>, ParseStateId>,
state_ids_by_item_set: IndexMap<ParseItemSet<'a>, ParseStateId, BuildHasherDefault<FxHasher>>,
parse_state_info_by_id: Vec<ParseStateInfo<'a>>,
parse_state_queue: VecDeque<ParseStateQueueEntry>,
non_terminal_extra_states: Vec<(Symbol, usize)>,
@ -147,13 +151,7 @@ impl<'a> ParseTableBuilder<'a> {
Entry::Vacant(v) => {
let core = v.key().core();
let core_count = self.core_ids_by_core.len();
let core_id = match self.core_ids_by_core.entry(core) {
Entry::Occupied(e) => *e.get(),
Entry::Vacant(e) => {
e.insert(core_count);
core_count
}
};
let core_id = *self.core_ids_by_core.entry(core).or_insert(core_count);
let state_id = self.parse_table.states.len();
self.parse_state_info_by_id
@ -163,8 +161,8 @@ impl<'a> ParseTableBuilder<'a> {
id: state_id,
lex_state_id: 0,
external_lex_state_id: 0,
terminal_entries: HashMap::new(),
nonterminal_entries: HashMap::new(),
terminal_entries: IndexMap::default(),
nonterminal_entries: IndexMap::default(),
core_id,
});
self.parse_state_queue.push_back(ParseStateQueueEntry {
@ -981,7 +979,7 @@ pub(crate) fn build_parse_table<'a>(
item_set_builder,
variable_info,
non_terminal_extra_states: Vec::new(),
state_ids_by_item_set: HashMap::new(),
state_ids_by_item_set: IndexMap::default(),
core_ids_by_core: HashMap::new(),
parse_state_info_by_id: Vec::new(),
parse_state_queue: VecDeque::new(),

View file

@ -479,7 +479,7 @@ impl<'a> Minimizer<'a> {
fn reorder_states_by_descending_size(&mut self) {
// Get a mapping of old state index -> new_state_index
let mut old_ids_by_new_id = (0..self.parse_table.states.len()).collect::<Vec<_>>();
&old_ids_by_new_id.sort_unstable_by_key(|i| {
old_ids_by_new_id.sort_unstable_by_key(|i| {
// Don't changes states 0 (the error state) or 1 (the start state).
if *i <= 1 {
return *i as i64 - 1_000_000;

View file

@ -169,6 +169,7 @@ fn load_grammar_file(grammar_path: &Path) -> Result<String> {
}
fn load_js_grammar_file(grammar_path: &Path) -> Result<String> {
let grammar_path = fs::canonicalize(grammar_path)?;
let mut node_process = Command::new("node")
.env("TREE_SITTER_GRAMMAR_PATH", grammar_path)
.stdin(Stdio::piped())

View file

@ -19,10 +19,16 @@ lazy_static! {
serde_json::from_str(UNICODE_CATEGORIES_JSON).unwrap();
static ref UNICODE_PROPERTIES: HashMap<&'static str, Vec<u32>> =
serde_json::from_str(UNICODE_PROPERTIES_JSON).unwrap();
static ref UNICODE_CATEGORY_ALIASES: HashMap<&'static str, String> =
serde_json::from_str(UNICODE_CATEGORY_ALIASES_JSON).unwrap();
static ref UNICODE_PROPERTY_ALIASES: HashMap<&'static str, String> =
serde_json::from_str(UNICODE_PROPERTY_ALIASES_JSON).unwrap();
}
const UNICODE_CATEGORIES_JSON: &'static str = include_str!("./unicode-categories.json");
const UNICODE_PROPERTIES_JSON: &'static str = include_str!("./unicode-properties.json");
const UNICODE_CATEGORY_ALIASES_JSON: &'static str = include_str!("./unicode-category-aliases.json");
const UNICODE_PROPERTY_ALIASES_JSON: &'static str = include_str!("./unicode-property-aliases.json");
const ALLOWED_REDUNDANT_ESCAPED_CHARS: [char; 4] = ['!', '\'', '"', '/'];
struct NfaBuilder {
@ -394,12 +400,16 @@ impl NfaBuilder {
category_letter = le.to_string();
}
ClassUnicodeKind::Named(class_name) => {
if class_name.len() == 1 {
category_letter = class_name.clone();
let actual_class_name = UNICODE_CATEGORY_ALIASES
.get(class_name.as_str())
.or_else(|| UNICODE_PROPERTY_ALIASES.get(class_name.as_str()))
.unwrap_or(class_name);
if actual_class_name.len() == 1 {
category_letter = actual_class_name.clone();
} else {
let code_points = UNICODE_CATEGORIES
.get(class_name.as_str())
.or_else(|| UNICODE_PROPERTIES.get(class_name.as_str()))
.get(actual_class_name.as_str())
.or_else(|| UNICODE_PROPERTIES.get(actual_class_name.as_str()))
.ok_or_else(|| {
anyhow!(
"Regex error: Unsupported unicode character class {}",

View file

@ -0,0 +1 @@
{"Other":"C","Control":"Cc","cntrl":"Cc","Format":"Cf","Unassigned":"Cn","Private_Use":"Co","Surrogate":"Cs","Letter":"L","Cased_Letter":"LC","Lowercase_Letter":"Ll","Modifier_Letter":"Lm","Other_Letter":"Lo","Titlecase_Letter":"Lt","Uppercase_Letter":"Lu","Mark":"M","Combining_Mark":"M","Spacing_Mark":"Mc","Enclosing_Mark":"Me","Nonspacing_Mark":"Mn","Number":"N","Decimal_Number":"Nd","digit":"Nd","Letter_Number":"Nl","Other_Number":"No","Punctuation":"P","punct":"P","Connector_Punctuation":"Pc","Dash_Punctuation":"Pd","Close_Punctuation":"Pe","Final_Punctuation":"Pf","Initial_Punctuation":"Pi","Other_Punctuation":"Po","Open_Punctuation":"Ps","Symbol":"S","Currency_Symbol":"Sc","Modifier_Symbol":"Sk","Math_Symbol":"Sm","Other_Symbol":"So","Separator":"Z","Line_Separator":"Zl","Paragraph_Separator":"Zp","Space_Separator":"Zs"}

View file

@ -0,0 +1 @@
{"cjkAccountingNumeric":"kAccountingNumeric","cjkOtherNumeric":"kOtherNumeric","cjkPrimaryNumeric":"kPrimaryNumeric","nv":"Numeric_Value","cf":"Case_Folding","cjkCompatibilityVariant":"kCompatibilityVariant","dm":"Decomposition_Mapping","FC_NFKC":"FC_NFKC_Closure","lc":"Lowercase_Mapping","NFKC_CF":"NFKC_Casefold","scf":"Simple_Case_Folding","sfc":"Simple_Case_Folding","slc":"Simple_Lowercase_Mapping","stc":"Simple_Titlecase_Mapping","suc":"Simple_Uppercase_Mapping","tc":"Titlecase_Mapping","uc":"Uppercase_Mapping","bmg":"Bidi_Mirroring_Glyph","bpb":"Bidi_Paired_Bracket","cjkIICore":"kIICore","cjkIRG_GSource":"kIRG_GSource","cjkIRG_HSource":"kIRG_HSource","cjkIRG_JSource":"kIRG_JSource","cjkIRG_KPSource":"kIRG_KPSource","cjkIRG_KSource":"kIRG_KSource","cjkIRG_MSource":"kIRG_MSource","cjkIRG_SSource":"kIRG_SSource","cjkIRG_TSource":"kIRG_TSource","cjkIRG_UKSource":"kIRG_UKSource","cjkIRG_USource":"kIRG_USource","cjkIRG_VSource":"kIRG_VSource","cjkRSUnicode":"kRSUnicode","Unicode_Radical_Stroke":"kRSUnicode","URS":"kRSUnicode","EqUIdeo":"Equivalent_Unified_Ideograph","isc":"ISO_Comment","JSN":"Jamo_Short_Name","na":"Name","na1":"Unicode_1_Name","Name_Alias":"Name_Alias","scx":"Script_Extensions","age":"Age","blk":"Block","sc":"Script","bc":"Bidi_Class","bpt":"Bidi_Paired_Bracket_Type","ccc":"Canonical_Combining_Class","dt":"Decomposition_Type","ea":"East_Asian_Width","gc":"General_Category","GCB":"Grapheme_Cluster_Break","hst":"Hangul_Syllable_Type","InPC":"Indic_Positional_Category","InSC":"Indic_Syllabic_Category","jg":"Joining_Group","jt":"Joining_Type","lb":"Line_Break","NFC_QC":"NFC_Quick_Check","NFD_QC":"NFD_Quick_Check","NFKC_QC":"NFKC_Quick_Check","NFKD_QC":"NFKD_Quick_Check","nt":"Numeric_Type","SB":"Sentence_Break","vo":"Vertical_Orientation","WB":"Word_Break","AHex":"ASCII_Hex_Digit","Alpha":"Alphabetic","Bidi_C":"Bidi_Control","Bidi_M":"Bidi_Mirrored","Cased":"Cased","CE":"Composition_Exclusion","CI":"Case_Ignorable","Comp_Ex":"Full_Composition_Exclusion","CWCF":"Changes_When_Casefolded","CWCM":"Changes_When_Casemapped","CWKCF":"Changes_When_NFKC_Casefolded","CWL":"Changes_When_Lowercased","CWT":"Changes_When_Titlecased","CWU":"Changes_When_Uppercased","Dash":"Dash","Dep":"Deprecated","DI":"Default_Ignorable_Code_Point","Dia":"Diacritic","EBase":"Emoji_Modifier_Base","EComp":"Emoji_Component","EMod":"Emoji_Modifier","Emoji":"Emoji","EPres":"Emoji_Presentation","Ext":"Extender","ExtPict":"Extended_Pictographic","Gr_Base":"Grapheme_Base","Gr_Ext":"Grapheme_Extend","Gr_Link":"Grapheme_Link","Hex":"Hex_Digit","Hyphen":"Hyphen","IDC":"ID_Continue","Ideo":"Ideographic","IDS":"ID_Start","IDSB":"IDS_Binary_Operator","IDST":"IDS_Trinary_Operator","Join_C":"Join_Control","LOE":"Logical_Order_Exception","Lower":"Lowercase","Math":"Math","NChar":"Noncharacter_Code_Point","OAlpha":"Other_Alphabetic","ODI":"Other_Default_Ignorable_Code_Point","OGr_Ext":"Other_Grapheme_Extend","OIDC":"Other_ID_Continue","OIDS":"Other_ID_Start","OLower":"Other_Lowercase","OMath":"Other_Math","OUpper":"Other_Uppercase","Pat_Syn":"Pattern_Syntax","Pat_WS":"Pattern_White_Space","PCM":"Prepended_Concatenation_Mark","QMark":"Quotation_Mark","Radical":"Radical","RI":"Regional_Indicator","SD":"Soft_Dotted","STerm":"Sentence_Terminal","Term":"Terminal_Punctuation","UIdeo":"Unified_Ideograph","Upper":"Uppercase","VS":"Variation_Selector","WSpace":"White_Space","space":"White_Space","XIDC":"XID_Continue","XIDS":"XID_Start","XO_NFC":"Expands_On_NFC","XO_NFD":"Expands_On_NFD","XO_NFKC":"Expands_On_NFKC","XO_NFKD":"Expands_On_NFKD"}

View file

@ -1057,7 +1057,7 @@ impl Generator {
}
fn add_parse_table(&mut self) {
let mut parse_table_entries = Vec::new();
let mut parse_table_entries = HashMap::new();
let mut next_parse_action_list_index = 0;
self.get_parse_action_list_id(
@ -1224,6 +1224,11 @@ impl Generator {
add_line!(self, "");
}
let mut parse_table_entries: Vec<_> = parse_table_entries
.into_iter()
.map(|(entry, i)| (i, entry))
.collect();
parse_table_entries.sort_by_key(|(index, _)| *index);
self.add_parse_action_list(parse_table_entries);
}
@ -1404,17 +1409,17 @@ impl Generator {
fn get_parse_action_list_id(
&self,
entry: &ParseTableEntry,
parse_table_entries: &mut Vec<(usize, ParseTableEntry)>,
parse_table_entries: &mut HashMap<ParseTableEntry, usize>,
next_parse_action_list_index: &mut usize,
) -> usize {
if let Some((index, _)) = parse_table_entries.iter().find(|(_, e)| *e == *entry) {
return *index;
if let Some(&index) = parse_table_entries.get(entry) {
index
} else {
let result = *next_parse_action_list_index;
parse_table_entries.insert(entry.clone(), result);
*next_parse_action_list_index += 1 + entry.actions.len();
result
}
let result = *next_parse_action_list_index;
parse_table_entries.push((result, entry.clone()));
*next_parse_action_list_index += 1 + entry.actions.len();
result
}
fn get_field_map_id(

View file

@ -1,11 +1,16 @@
use super::nfa::CharacterSet;
use super::rules::{Alias, Symbol, TokenSet};
use std::collections::{BTreeMap, HashMap};
use std::collections::BTreeMap;
pub(crate) type ProductionInfoId = usize;
pub(crate) type ParseStateId = usize;
pub(crate) type LexStateId = usize;
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
use std::hash::BuildHasherDefault;
use indexmap::IndexMap;
use rustc_hash::FxHasher;
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub(crate) enum ParseAction {
Accept,
Shift {
@ -28,7 +33,7 @@ pub(crate) enum GotoAction {
ShiftExtra,
}
#[derive(Clone, Debug, PartialEq, Eq)]
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub(crate) struct ParseTableEntry {
pub actions: Vec<ParseAction>,
pub reusable: bool,
@ -37,8 +42,8 @@ pub(crate) struct ParseTableEntry {
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub(crate) struct ParseState {
pub id: ParseStateId,
pub terminal_entries: HashMap<Symbol, ParseTableEntry>,
pub nonterminal_entries: HashMap<Symbol, GotoAction>,
pub terminal_entries: IndexMap<Symbol, ParseTableEntry, BuildHasherDefault<FxHasher>>,
pub nonterminal_entries: IndexMap<Symbol, GotoAction, BuildHasherDefault<FxHasher>>,
pub lex_state_id: usize,
pub external_lex_state_id: usize,
pub core_id: usize,

View file

@ -2,6 +2,7 @@ pub mod generate;
pub mod highlight;
pub mod logger;
pub mod parse;
pub mod playground;
pub mod query;
pub mod query_testing;
pub mod tags;
@ -9,7 +10,6 @@ pub mod test;
pub mod test_highlight;
pub mod util;
pub mod wasm;
pub mod web_ui;
#[cfg(test)]
mod tests;

View file

@ -1,5 +1,6 @@
use log::{LevelFilter, Log, Metadata, Record};
#[allow(dead_code)]
struct Logger {
pub filter: Option<String>,
}

View file

@ -4,7 +4,7 @@ use glob::glob;
use std::path::Path;
use std::{env, fs, u64};
use tree_sitter_cli::{
generate, highlight, logger, parse, query, tags, test, test_highlight, util, wasm, web_ui,
generate, highlight, logger, parse, playground, query, tags, test, test_highlight, util, wasm,
};
use tree_sitter_config::Config;
use tree_sitter_loader as loader;
@ -35,6 +35,45 @@ fn run() -> Result<()> {
BUILD_VERSION.to_string()
};
let debug_arg = Arg::with_name("debug")
.help("Show parsing debug log")
.long("debug")
.short("d");
let debug_graph_arg = Arg::with_name("debug-graph")
.help("Produce the log.html file with debug graphs")
.long("debug-graph")
.short("D");
let debug_build_arg = Arg::with_name("debug-build")
.help("Compile a parser in debug mode")
.long("debug-build")
.short("0");
let paths_file_arg = Arg::with_name("paths-file")
.help("The path to a file with paths to source file(s)")
.long("paths")
.takes_value(true);
let paths_arg = Arg::with_name("paths")
.help("The source file(s) to use")
.multiple(true);
let scope_arg = Arg::with_name("scope")
.help("Select a language by the scope instead of a file extension")
.long("scope")
.takes_value(true);
let time_arg = Arg::with_name("time")
.help("Measure execution time")
.long("time")
.short("t");
let quiet_arg = Arg::with_name("quiet")
.help("Suppress main output")
.long("quiet")
.short("q");
let matches = App::new("tree-sitter")
.author("Max Brunsfeld <maxbrunsfeld@gmail.com>")
.about("Generates and tests parsers")
@ -65,23 +104,30 @@ fn run() -> Result<()> {
SubCommand::with_name("parse")
.alias("p")
.about("Parse files")
.arg(Arg::with_name("paths-file").long("paths").takes_value(true))
.arg(
Arg::with_name("paths")
.index(1)
.multiple(true)
.required(false),
)
.arg(Arg::with_name("scope").long("scope").takes_value(true))
.arg(Arg::with_name("debug").long("debug").short("d"))
.arg(Arg::with_name("debug-graph").long("debug-graph").short("D"))
.arg(&paths_file_arg)
.arg(&paths_arg)
.arg(&scope_arg)
.arg(&debug_arg)
.arg(&debug_build_arg)
.arg(&debug_graph_arg)
.arg(Arg::with_name("debug-xml").long("xml").short("x"))
.arg(Arg::with_name("quiet").long("quiet").short("q"))
.arg(Arg::with_name("stat").long("stat").short("s"))
.arg(Arg::with_name("time").long("time").short("t"))
.arg(Arg::with_name("timeout").long("timeout").takes_value(true))
.arg(
Arg::with_name("stat")
.help("Show parsing statistic")
.long("stat")
.short("s"),
)
.arg(
Arg::with_name("timeout")
.help("Interrupt the parsing process by timeout (µs)")
.long("timeout")
.takes_value(true),
)
.arg(&time_arg)
.arg(&quiet_arg)
.arg(
Arg::with_name("edits")
.help("Apply edits in the format: \"row,col del_count insert_text\"")
.long("edit")
.short("edit")
.takes_value(true)
@ -93,36 +139,32 @@ fn run() -> Result<()> {
SubCommand::with_name("query")
.alias("q")
.about("Search files using a syntax tree query")
.arg(Arg::with_name("query-path").index(1).required(true))
.arg(Arg::with_name("paths-file").long("paths").takes_value(true))
.arg(
Arg::with_name("paths")
.index(2)
.multiple(true)
.required(false),
Arg::with_name("query-path")
.help("Path to a file with queries")
.index(1)
.required(true),
)
.arg(&paths_file_arg)
.arg(&paths_arg.clone().index(2))
.arg(
Arg::with_name("byte-range")
.help("The range of byte offsets in which the query will be executed")
.long("byte-range")
.takes_value(true),
)
.arg(Arg::with_name("scope").long("scope").takes_value(true))
.arg(&scope_arg)
.arg(Arg::with_name("captures").long("captures").short("c"))
.arg(Arg::with_name("test").long("test")),
)
.subcommand(
SubCommand::with_name("tags")
.arg(Arg::with_name("quiet").long("quiet").short("q"))
.arg(Arg::with_name("time").long("time").short("t"))
.arg(Arg::with_name("scope").long("scope").takes_value(true))
.arg(Arg::with_name("paths-file").long("paths").takes_value(true))
.arg(
Arg::with_name("paths")
.help("The source file to use")
.index(1)
.multiple(true),
),
.about("Generate a list of tags")
.arg(&scope_arg)
.arg(&time_arg)
.arg(&quiet_arg)
.arg(&paths_file_arg)
.arg(&paths_arg),
)
.subcommand(
SubCommand::with_name("test")
@ -141,23 +183,24 @@ fn run() -> Result<()> {
.short("u")
.help("Update all syntax trees in corpus files with current parser output"),
)
.arg(Arg::with_name("debug").long("debug").short("d"))
.arg(Arg::with_name("debug-graph").long("debug-graph").short("D")),
.arg(&debug_arg)
.arg(&debug_build_arg)
.arg(&debug_graph_arg),
)
.subcommand(
SubCommand::with_name("highlight")
.about("Highlight a file")
.arg(Arg::with_name("paths-file").long("paths").takes_value(true))
.arg(
Arg::with_name("paths")
.index(1)
.multiple(true)
.required(false),
Arg::with_name("html")
.help("Generate highlighting as an HTML document")
.long("html")
.short("H"),
)
.arg(Arg::with_name("scope").long("scope").takes_value(true))
.arg(Arg::with_name("html").long("html").short("H"))
.arg(Arg::with_name("time").long("time").short("t"))
.arg(Arg::with_name("quiet").long("quiet").short("q")),
.arg(&scope_arg)
.arg(&time_arg)
.arg(&quiet_arg)
.arg(&paths_file_arg)
.arg(&paths_arg),
)
.subcommand(
SubCommand::with_name("build-wasm")
@ -180,7 +223,7 @@ fn run() -> Result<()> {
Arg::with_name("quiet")
.long("quiet")
.short("q")
.help("open in default browser"),
.help("Don't open in default browser"),
),
)
.subcommand(
@ -237,8 +280,12 @@ fn run() -> Result<()> {
("test", Some(matches)) => {
let debug = matches.is_present("debug");
let debug_graph = matches.is_present("debug-graph");
let debug_build = matches.is_present("debug-build");
let update = matches.is_present("update");
let filter = matches.value_of("filter");
loader.use_debug_build(debug_build);
let languages = loader.languages_at_path(&current_dir)?;
let language = languages
.first()
@ -274,6 +321,7 @@ fn run() -> Result<()> {
("parse", Some(matches)) => {
let debug = matches.is_present("debug");
let debug_graph = matches.is_present("debug-graph");
let debug_build = matches.is_present("debug-build");
let debug_xml = matches.is_present("debug-xml");
let quiet = matches.is_present("quiet");
let time = matches.is_present("time");
@ -287,6 +335,8 @@ fn run() -> Result<()> {
env::set_var("TREE_SITTER_DEBUG", "1");
}
loader.use_debug_build(debug_build);
let timeout = matches
.value_of("timeout")
.map_or(0, |t| u64::from_str_radix(t, 10).unwrap());
@ -418,11 +468,10 @@ fn run() -> Result<()> {
if let Some(highlight_config) = language_config.highlight_config(language)? {
let source = fs::read(path)?;
let theme_config = config.get()?;
if html_mode {
highlight::html(
&loader,
&theme_config,
&theme_config.theme,
&source,
highlight_config,
quiet,
@ -431,7 +480,7 @@ fn run() -> Result<()> {
} else {
highlight::ansi(
&loader,
&theme_config,
&theme_config.theme,
&source,
highlight_config,
time,
@ -455,7 +504,7 @@ fn run() -> Result<()> {
("playground", Some(matches)) => {
let open_in_browser = !matches.is_present("quiet");
web_ui::serve(&current_dir, open_in_browser);
playground::serve(&current_dir, open_in_browser);
}
("dump-languages", Some(_)) => {

View file

@ -9,28 +9,6 @@ use tiny_http::{Header, Response, Server};
use webbrowser;
macro_rules! resource {
($name: tt, $path: tt) => {
#[cfg(TREE_SITTER_EMBED_WASM_BINDING)]
fn $name(tree_sitter_dir: &Option<PathBuf>) -> Vec<u8> {
if let Some(tree_sitter_dir) = tree_sitter_dir {
fs::read(tree_sitter_dir.join($path)).unwrap()
} else {
include_bytes!(concat!("../../", $path)).to_vec()
}
}
#[cfg(not(TREE_SITTER_EMBED_WASM_BINDING))]
fn $name(tree_sitter_dir: &Option<PathBuf>) -> Vec<u8> {
if let Some(tree_sitter_dir) = tree_sitter_dir {
fs::read(tree_sitter_dir.join($path)).unwrap()
} else {
include_bytes!(concat!("../../", $path)).to_vec()
}
}
};
}
macro_rules! optional_resource {
($name: tt, $path: tt) => {
#[cfg(TREE_SITTER_EMBED_WASM_BINDING)]
fn $name(tree_sitter_dir: &Option<PathBuf>) -> Vec<u8> {
@ -52,15 +30,15 @@ macro_rules! optional_resource {
};
}
resource!(get_main_html, "cli/src/web_ui.html");
resource!(get_main_html, "cli/src/playground.html");
resource!(get_playground_js, "docs/assets/js/playground.js");
optional_resource!(get_lib_js, "lib/binding_web/tree-sitter.js");
optional_resource!(get_lib_wasm, "lib/binding_web/tree-sitter.wasm");
resource!(get_lib_js, "lib/binding_web/tree-sitter.js");
resource!(get_lib_wasm, "lib/binding_web/tree-sitter.wasm");
pub fn serve(grammar_path: &Path, open_in_browser: bool) {
let port = get_available_port().expect("Couldn't find an available port");
let url = format!("127.0.0.1:{}", port);
let server = Server::http(&url).expect("Failed to start web server");
let addr = format!("127.0.0.1:{}", port);
let server = Server::http(&addr).expect("Failed to start web server");
let grammar_name = wasm::get_grammar_name(&grammar_path.join("src"))
.with_context(|| "Failed to get wasm filename")
.unwrap();
@ -73,8 +51,10 @@ pub fn serve(grammar_path: &Path, open_in_browser: bool) {
)
})
.unwrap();
let url = format!("http://{}", addr);
println!("Started playground on: {}", url);
if open_in_browser {
if let Err(_) = webbrowser::open(&format!("http://127.0.0.1:{}", port)) {
if let Err(_) = webbrowser::open(&url) {
eprintln!("Failed to open '{}' in a web browser", url);
}
}
@ -95,17 +75,23 @@ pub fn serve(grammar_path: &Path, open_in_browser: bool) {
for request in server.incoming_requests() {
let res = match request.url() {
"/" => response(&main_html, &html_header),
"/playground.js" => response(&playground_js, &js_header),
"/tree-sitter-parser.wasm" => response(&language_wasm, &wasm_header),
"/playground.js" => {
if playground_js.is_empty() {
redirect("https://tree-sitter.github.io/tree-sitter/assets/js/playground.js")
} else {
response(&playground_js, &js_header)
}
}
"/tree-sitter.js" => {
if cfg!(windows) {
if lib_js.is_empty() {
redirect("https://tree-sitter.github.io/tree-sitter.js")
} else {
response(&lib_js, &js_header)
}
}
"/tree-sitter.wasm" => {
if cfg!(windows) {
if lib_wasm.is_empty() {
redirect("https://tree-sitter.github.io/tree-sitter.wasm")
} else {
response(&lib_wasm, &wasm_header)

View file

@ -48,10 +48,12 @@ pub fn query_files_at_paths(
let capture_name = &query.capture_names()[capture.index as usize];
writeln!(
&mut stdout,
" pattern: {}, capture: {}, row: {}, text: {:?}",
" pattern: {:>2}, capture: {} - {}, start: {}, end: {}, text: `{}`",
mat.pattern_index,
capture.index,
capture_name,
capture.node.start_position().row,
capture.node.start_position(),
capture.node.end_position(),
capture.node.utf8_text(&source_code).unwrap_or("")
)?;
results.push(query_testing::CaptureInfo {
@ -70,9 +72,11 @@ pub fn query_files_at_paths(
if end.row == start.row {
writeln!(
&mut stdout,
" capture: {}, start: {}, text: {:?}",
" capture: {} - {}, start: {}, end: {}, text: `{}`",
capture.index,
capture_name,
start,
end,
capture.node.utf8_text(&source_code).unwrap_or("")
)?;
} else {

View file

@ -48,40 +48,38 @@ pub fn parse_position_comments(
if node.kind().contains("comment") {
if let Ok(text) = node.utf8_text(source) {
let mut position = node.start_position();
if position.row == 0 {
continue;
}
// Find the arrow character ("^" or '<-") in the comment. A left arrow
// refers to the column where the comment node starts. An up arrow refers
// to its own column.
let mut has_left_caret = false;
let mut has_arrow = false;
let mut arrow_end = 0;
for (i, c) in text.char_indices() {
arrow_end = i + 1;
if c == '-' && has_left_caret {
has_arrow = true;
break;
if position.row > 0 {
// Find the arrow character ("^" or '<-") in the comment. A left arrow
// refers to the column where the comment node starts. An up arrow refers
// to its own column.
let mut has_left_caret = false;
let mut has_arrow = false;
let mut arrow_end = 0;
for (i, c) in text.char_indices() {
arrow_end = i + 1;
if c == '-' && has_left_caret {
has_arrow = true;
break;
}
if c == '^' {
has_arrow = true;
position.column += i;
break;
}
has_left_caret = c == '<';
}
if c == '^' {
has_arrow = true;
position.column += i;
break;
}
has_left_caret = c == '<';
}
// If the comment node contains an arrow and a highlight name, record the
// highlight name and the position.
if let (true, Some(mat)) =
(has_arrow, CAPTURE_NAME_REGEX.find(&text[arrow_end..]))
{
assertion_ranges.push((node.start_position(), node.end_position()));
result.push(Assertion {
position: position,
expected_capture_name: mat.as_str().to_string(),
});
// If the comment node contains an arrow and a highlight name, record the
// highlight name and the position.
if let (true, Some(mat)) =
(has_arrow, CAPTURE_NAME_REGEX.find(&text[arrow_end..]))
{
assertion_ranges.push((node.start_position(), node.end_position()));
result.push(Assertion {
position: position,
expected_capture_name: mat.as_str().to_string(),
});
}
}
}
}

View file

@ -5,7 +5,6 @@ use difference::{Changeset, Difference};
use lazy_static::lazy_static;
use regex::bytes::{Regex as ByteRegex, RegexBuilder as ByteRegexBuilder};
use regex::Regex;
use std::char;
use std::ffi::OsStr;
use std::fmt::Write as FmtWrite;
use std::fs;
@ -16,11 +15,12 @@ use tree_sitter::{Language, LogType, Parser, Query};
use walkdir::WalkDir;
lazy_static! {
static ref HEADER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^===+\r?\n([^=]*)\r?\n===+\r?\n")
.multi_line(true)
.build()
.unwrap();
static ref DIVIDER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^---+\r?\n")
static ref HEADER_REGEX: ByteRegex =
ByteRegexBuilder::new(r"^===+(?P<suffix1>[^=\r\n][^\r\n]*)?\r?\n(?P<test_name>[^=\r\n][^\r\n]*)\r?\n===+(?P<suffix2>[^=\r\n][^\r\n]*)?\r?\n")
.multi_line(true)
.build()
.unwrap();
static ref DIVIDER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^---+(?P<suffix>[^-\r\n][^\r\n]*)?\r?\n")
.multi_line(true)
.build()
.unwrap();
@ -114,7 +114,9 @@ pub fn run_tests_at_path(
print_diff_key();
for (i, (name, actual, expected)) in failures.iter().enumerate() {
println!("\n {}. {}:", i + 1, name);
print_diff(actual, expected);
let actual = format_sexp_indented(&actual, 2);
let expected = format_sexp_indented(&expected, 2);
print_diff(&actual, &expected);
}
Err(anyhow!(""))
}
@ -153,8 +155,7 @@ pub fn print_diff_key() {
}
pub fn print_diff(actual: &String, expected: &String) {
let changeset = Changeset::new(actual, expected, " ");
print!(" ");
let changeset = Changeset::new(actual, expected, "\n");
for diff in &changeset.diffs {
match diff {
Difference::Same(part) => {
@ -263,9 +264,13 @@ fn run_tests(
}
fn format_sexp(sexp: &String) -> String {
format_sexp_indented(sexp, 0)
}
fn format_sexp_indented(sexp: &String, initial_indent_level: u32) -> String {
let mut formatted = String::new();
let mut indent_level = 0;
let mut indent_level = initial_indent_level;
let mut has_field = false;
let mut s_iter = sexp.split(|c| c == ' ' || c == ')');
while let Some(s) = s_iter.next() {
@ -375,22 +380,58 @@ fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>)
let mut prev_name = String::new();
let mut prev_header_end = 0;
// Identify all of the test descriptions using the `======` headers.
for (header_start, header_end) in HEADER_REGEX
.find_iter(&bytes)
.map(|m| (m.start(), m.end()))
.chain(Some((bytes.len(), bytes.len())))
{
// Find the longest line of dashes following each test description.
// That is the divider between input and expected output.
// Find the first test header in the file, and determine if it has a
// custom suffix. If so, then this suffix will be used to identify
// all subsequent headers and divider lines in the file.
let first_suffix = HEADER_REGEX
.captures(bytes)
.and_then(|c| c.name("suffix1"))
.map(|m| String::from_utf8_lossy(m.as_bytes()));
// Find all of the `===` test headers, which contain the test names.
// Ignore any matches whose suffix does not match the first header
// suffix in the file.
let header_matches = HEADER_REGEX.captures_iter(&bytes).filter_map(|c| {
let suffix1 = c
.name("suffix1")
.map(|m| String::from_utf8_lossy(m.as_bytes()));
let suffix2 = c
.name("suffix2")
.map(|m| String::from_utf8_lossy(m.as_bytes()));
if suffix1 == first_suffix && suffix2 == first_suffix {
let header_range = c.get(0).unwrap().range();
let test_name = c
.name("test_name")
.map(|c| String::from_utf8_lossy(c.as_bytes()).to_string());
Some((header_range, test_name))
} else {
None
}
});
for (header_range, test_name) in header_matches.chain(Some((bytes.len()..bytes.len(), None))) {
// Find the longest line of dashes following each test description. That line
// separates the input from the expected output. Ignore any matches whose suffix
// does not match the first suffix in the file.
if prev_header_end > 0 {
let divider_match = DIVIDER_REGEX
.find_iter(&bytes[prev_header_end..header_start])
.map(|m| (prev_header_end + m.start(), prev_header_end + m.end()))
.max_by_key(|(start, end)| end - start);
if let Some((divider_start, divider_end)) = divider_match {
if let Ok(output) = str::from_utf8(&bytes[divider_end..header_start]) {
let mut input = bytes[prev_header_end..divider_start].to_vec();
let divider_range = DIVIDER_REGEX
.captures_iter(&bytes[prev_header_end..header_range.start])
.filter_map(|m| {
let suffix = m
.name("suffix")
.map(|m| String::from_utf8_lossy(m.as_bytes()));
if suffix == first_suffix {
let range = m.get(0).unwrap().range();
Some((prev_header_end + range.start)..(prev_header_end + range.end))
} else {
None
}
})
.max_by_key(|range| range.len());
if let Some(divider_range) = divider_range {
if let Ok(output) = str::from_utf8(&bytes[divider_range.end..header_range.start]) {
let mut input = bytes[prev_header_end..divider_range.start].to_vec();
// Remove trailing newline from the input.
input.pop();
@ -400,6 +441,7 @@ fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>)
// Remove all comments
let output = COMMENT_REGEX.replace_all(output, "").to_string();
// Normalize the whitespace in the expected output.
let output = WHITESPACE_REGEX.replace_all(output.trim(), " ");
let output = output.replace(" )", ")");
@ -417,10 +459,8 @@ fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>)
}
}
}
prev_name = String::from_utf8_lossy(&bytes[header_start..header_end])
.trim_matches(|c| char::is_whitespace(c) || c == '=')
.to_string();
prev_header_end = header_end;
prev_name = test_name.unwrap_or(String::new());
prev_header_end = header_range.end;
}
TestEntry::Group {
name,
@ -434,7 +474,7 @@ mod tests {
use super::*;
#[test]
fn test_parse_test_content() {
fn test_parse_test_content_simple() {
let entry = parse_test_content(
"the-filename".to_string(),
r#"
@ -664,4 +704,88 @@ code
}
);
}
#[test]
fn test_parse_test_content_with_suffixes() {
let entry = parse_test_content(
"the-filename".to_string(),
r#"
==================asdf\()[]|{}*+?^$.-
First test
==================asdf\()[]|{}*+?^$.-
=========================
NOT A TEST HEADER
=========================
-------------------------
---asdf\()[]|{}*+?^$.-
(a)
==================asdf\()[]|{}*+?^$.-
Second test
==================asdf\()[]|{}*+?^$.-
=========================
NOT A TEST HEADER
=========================
-------------------------
---asdf\()[]|{}*+?^$.-
(a)
=========================asdf\()[]|{}*+?^$.-
Test name with = symbol
=========================asdf\()[]|{}*+?^$.-
=========================
NOT A TEST HEADER
=========================
-------------------------
---asdf\()[]|{}*+?^$.-
(a)
"#
.trim()
.to_string(),
None,
);
let expected_input = "\n=========================\n\
NOT A TEST HEADER\n\
=========================\n\
-------------------------\n"
.as_bytes()
.to_vec();
assert_eq!(
entry,
TestEntry::Group {
name: "the-filename".to_string(),
children: vec![
TestEntry::Example {
name: "First test".to_string(),
input: expected_input.clone(),
output: "(a)".to_string(),
has_fields: false,
},
TestEntry::Example {
name: "Second test".to_string(),
input: expected_input.clone(),
output: "(a)".to_string(),
has_fields: false,
},
TestEntry::Example {
name: "Test name with = symbol".to_string(),
input: expected_input.clone(),
output: "(a)".to_string(),
has_fields: false,
}
],
file_path: None,
}
);
}
}

View file

@ -63,9 +63,14 @@ fn test_parsing_with_logging() {
)));
assert!(messages.contains(&(LogType::Lex, "skip character:' '".to_string())));
let mut row_starts_from_0 = false;
for (_, m) in &messages {
assert!(!m.contains("row:0"));
if m.contains("row:0") {
row_starts_from_0 = true;
break;
}
}
assert!(row_starts_from_0);
}
#[test]
@ -849,7 +854,10 @@ fn test_parsing_with_multiple_included_ranges() {
hello_text_node.start_byte(),
source_code.find("Hello").unwrap()
);
assert_eq!(hello_text_node.end_byte(), source_code.find("<b>").unwrap());
assert_eq!(
hello_text_node.end_byte(),
source_code.find(" <b>").unwrap()
);
assert_eq!(b_start_tag_node.kind(), "start_tag");
assert_eq!(

View file

@ -17,6 +17,7 @@ fn test_highlight_test_with_basic_test() {
],
);
let source = [
"// hi",
"var abc = function(d) {",
" // ^ function",
" // ^ keyword",
@ -32,15 +33,15 @@ fn test_highlight_test_with_basic_test() {
assertions,
&[
Assertion {
position: Point::new(0, 5),
position: Point::new(1, 5),
expected_capture_name: "function".to_string()
},
Assertion {
position: Point::new(0, 11),
position: Point::new(1, 11),
expected_capture_name: "keyword".to_string()
},
Assertion {
position: Point::new(3, 9),
position: Point::new(4, 9),
expected_capture_name: "variable.parameter".to_string()
},
]
@ -53,12 +54,12 @@ fn test_highlight_test_with_basic_test() {
assert_eq!(
highlight_positions,
&[
(Point::new(0, 0), Point::new(0, 3), Highlight(2)), // "var"
(Point::new(0, 4), Point::new(0, 7), Highlight(0)), // "abc"
(Point::new(0, 10), Point::new(0, 18), Highlight(2)), // "function"
(Point::new(0, 19), Point::new(0, 20), Highlight(1)), // "d"
(Point::new(3, 2), Point::new(3, 8), Highlight(2)), // "return"
(Point::new(3, 9), Point::new(3, 10), Highlight(1)), // "d"
(Point::new(1, 0), Point::new(1, 3), Highlight(2)), // "var"
(Point::new(1, 4), Point::new(1, 7), Highlight(0)), // "abc"
(Point::new(1, 10), Point::new(1, 18), Highlight(2)), // "function"
(Point::new(1, 19), Point::new(1, 20), Highlight(1)), // "d"
(Point::new(4, 2), Point::new(4, 8), Highlight(2)), // "return"
(Point::new(4, 9), Point::new(4, 10), Highlight(1)), // "d"
]
);
}

View file

@ -6,8 +6,8 @@ GEM
minitest (~> 5.1)
thread_safe (~> 0.3, >= 0.3.4)
tzinfo (~> 1.1)
addressable (2.5.2)
public_suffix (>= 2.0.2, < 4.0)
addressable (2.8.0)
public_suffix (>= 2.0.2, < 5.0)
coffee-script (2.4.1)
coffee-script-source
execjs
@ -16,12 +16,27 @@ GEM
commonmarker (0.17.8)
ruby-enum (~> 0.5)
concurrent-ruby (1.0.5)
ethon (0.11.0)
ffi (>= 1.3.0)
ethon (0.14.0)
ffi (>= 1.15.0)
execjs (2.7.0)
faraday (0.14.0)
faraday (1.5.1)
faraday-em_http (~> 1.0)
faraday-em_synchrony (~> 1.0)
faraday-excon (~> 1.1)
faraday-httpclient (~> 1.0.1)
faraday-net_http (~> 1.0)
faraday-net_http_persistent (~> 1.1)
faraday-patron (~> 1.0)
multipart-post (>= 1.2, < 3)
ffi (1.9.23)
ruby2_keywords (>= 0.0.4)
faraday-em_http (1.0.0)
faraday-em_synchrony (1.0.0)
faraday-excon (1.1.0)
faraday-httpclient (1.0.1)
faraday-net_http (1.0.1)
faraday-net_http_persistent (1.2.0)
faraday-patron (1.0.0)
ffi (1.15.3)
forwardable-extended (2.6.0)
gemoji (3.0.0)
github-pages (177)
@ -195,33 +210,35 @@ GEM
minima (2.1.1)
jekyll (~> 3.3)
minitest (5.11.3)
multipart-post (2.0.0)
net-dns (0.8.0)
multipart-post (2.1.1)
net-dns (0.9.0)
nokogiri (1.11.4)
mini_portile2 (~> 2.5.0)
racc (~> 1.4)
octokit (4.8.0)
octokit (4.21.0)
faraday (>= 0.9)
sawyer (~> 0.8.0, >= 0.5.3)
pathutil (0.16.1)
pathutil (0.16.2)
forwardable-extended (~> 2.6)
public_suffix (2.0.5)
racc (1.5.2)
rb-fsevent (0.10.2)
rb-inotify (0.9.10)
ffi (>= 0.5.0, < 2)
rb-fsevent (0.11.0)
rb-inotify (0.10.1)
ffi (~> 1.0)
rouge (2.2.1)
ruby-enum (0.7.2)
i18n
ruby2_keywords (0.0.4)
rubyzip (2.0.0)
safe_yaml (1.0.4)
sass (3.5.5)
safe_yaml (1.0.5)
sass (3.7.4)
sass-listen (~> 4.0.0)
sass-listen (4.0.0)
rb-fsevent (~> 0.9, >= 0.9.4)
rb-inotify (~> 0.9, >= 0.9.7)
sawyer (0.8.1)
addressable (>= 2.3.5, < 2.6)
faraday (~> 0.8, < 1.0)
sawyer (0.8.2)
addressable (>= 2.3.5)
faraday (> 0.8, < 2.0)
terminal-table (1.8.0)
unicode-display_width (~> 1.1, >= 1.1.1)
thread_safe (0.3.6)

View file

@ -15,12 +15,13 @@ Tree-sitter is a parser generator tool and an incremental parsing library. It ca
There are currently bindings that allow Tree-sitter to be used from the following languages:
* [Rust](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust)
* [JavaScript (Wasm)](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web)
* [Haskell](https://github.com/tree-sitter/haskell-tree-sitter)
* [JavaScript (Node.js)](https://github.com/tree-sitter/node-tree-sitter)
* [JavaScript (Wasm)](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web)
* [OCaml](https://github.com/returntocorp/ocaml-tree-sitter-core)
* [Python](https://github.com/tree-sitter/py-tree-sitter)
* [Ruby](https://github.com/tree-sitter/ruby-tree-sitter)
* [Haskell](https://github.com/tree-sitter/haskell-tree-sitter)
* [Rust](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust)
### Available Parsers
@ -31,11 +32,13 @@ Parsers for these languages are fairly complete:
* [C#](https://github.com/tree-sitter/tree-sitter-c-sharp)
* [C++](https://github.com/tree-sitter/tree-sitter-cpp)
* [CSS](https://github.com/tree-sitter/tree-sitter-css)
* [DOT](https://github.com/rydesun/tree-sitter-dot)
* [Elm](https://github.com/elm-tooling/tree-sitter-elm)
* [Eno](https://github.com/eno-lang/tree-sitter-eno)
* [ERB / EJS](https://github.com/tree-sitter/tree-sitter-embedded-template)
* [Fennel](https://github.com/travonted/tree-sitter-fennel)
* [Go](https://github.com/tree-sitter/tree-sitter-go)
* [HCL](https://github.com/MichaHoffmann/tree-sitter-hcl)
* [HTML](https://github.com/tree-sitter/tree-sitter-html)
* [Java](https://github.com/tree-sitter/tree-sitter-java)
* [JavaScript](https://github.com/tree-sitter/tree-sitter-javascript)
@ -60,6 +63,7 @@ Parsers for these languages are fairly complete:
* [Vue](https://github.com/ikatyang/tree-sitter-vue)
* [YAML](https://github.com/ikatyang/tree-sitter-yaml)
* [WASM](https://github.com/wasm-lsp/tree-sitter-wasm)
* [WGSL WebGPU Shading Language](https://github.com/mehmetoguzderin/tree-sitter-wgsl)
Parsers for these languages are in development:
@ -67,10 +71,12 @@ Parsers for these languages are in development:
* [Erlang](https://github.com/AbstractMachinesLab/tree-sitter-erlang/)
* [Dockerfile](https://github.com/camdencheek/tree-sitter-dockerfile)
* [Go mod](https://github.com/camdencheek/tree-sitter-go-mod)
* [Hack](https://github.com/slackhq/tree-sitter-hack)
* [Haskell](https://github.com/tree-sitter/tree-sitter-haskell)
* [Julia](https://github.com/tree-sitter/tree-sitter-julia)
* [Kotlin](https://github.com/fwcd/tree-sitter-kotlin)
* [Nix](https://github.com/cstrahan/tree-sitter-nix)
* [Objective-C](https://github.com/jiyee/tree-sitter-objc)
* [Perl](https://github.com/ganezdragon/tree-sitter-perl)
* [Scala](https://github.com/tree-sitter/tree-sitter-scala)
* [Sourcepawn](https://github.com/nilshelmig/tree-sitter-sourcepawn)
@ -89,8 +95,8 @@ Parsers for these languages are in development:
The design of Tree-sitter was greatly influenced by the following research papers:
- [Practical Algorithms for Incremental Software Development Environments](https://www2.eecs.berkeley.edu/Pubs/TechRpts/1997/CSD-97-946.pdf)
- [Context Aware Scanning for Parsing Extensible Languages](http://www.umsec.umn.edu/publications/Context-Aware-Scanning-Parsing-Extensible)
- [Efficient and Flexible Incremental Parsing](http://ftp.cs.berkeley.edu/sggs/toplas-parsing.ps)
- [Incremental Analysis of Real Programming Languages](https://pdfs.semanticscholar.org/ca69/018c29cc415820ed207d7e1d391e2da1656f.pdf)
- [Context Aware Scanning for Parsing Extensible Languages](https://www-users.cse.umn.edu/~evw/pubs/vanwyk07gpce/vanwyk07gpce.pdf)
- [Efficient and Flexible Incremental Parsing](http://harmonia.cs.berkeley.edu/papers/twagner-parsing.pdf)
- [Incremental Analysis of Real Programming Languages](http://harmonia.cs.berkeley.edu/papers/twagner-glr.pdf)
- [Error Detection and Recovery in LR Parsers](http://what-when-how.com/compiler-writing/bottom-up-parsing-compiler-writing-part-13)
- [Error Recovery for LR Parsers](http://www.dtic.mil/dtic/tr/fulltext/u2/a043470.pdf)
- [Error Recovery for LR Parsers](https://apps.dtic.mil/sti/pdfs/ADA043470.pdf)

View file

@ -464,7 +464,7 @@ In general, it's a good idea to make patterns more specific by specifying [field
#### Negated Fields
You can also constrain a pattern so that it only mathces nodes that *lack* a certain field. To do this, add a field name prefixed by a `!` within the parent pattern. For example, this pattern would match a class declaration with no type parameters:
You can also constrain a pattern so that it only matches nodes that *lack* a certain field. To do this, add a field name prefixed by a `!` within the parent pattern. For example, this pattern would match a class declaration with no type parameters:
```
(class_declaration
@ -586,8 +586,10 @@ This pattern would match a set of possible keyword tokens, capturing them as `@k
#### Wildcard Node
A wildcard node is represented with an underscore (`(_)`), it matches any node.
A wildcard node is represented with an underscore (`_`), it matches any node.
This is similar to `.` in regular expressions.
There are two types, `(_)` will match any named node,
and `_` will match any named or anonymous node.
For example, this pattern would match any node inside a call:

View file

@ -84,7 +84,7 @@ tree-sitter parse example-file
This should print the following:
```
(source_file [1, 0] - [1, 5])
(source_file [0, 0] - [1, 0])
```
You now have a working parser.
@ -95,7 +95,7 @@ Let's go over all of the functionality of the `tree-sitter` command line tool.
### Command: `generate`
The most important command you'll use is `tree-sitter generate`. This command reads the `grammar.js` file in your current working directory and creates a file called `src/parser.c`, which implements the parser. After making changes to your grammar, just run `tree-sitter` generate again.
The most important command you'll use is `tree-sitter generate`. This command reads the `grammar.js` file in your current working directory and creates a file called `src/parser.c`, which implements the parser. After making changes to your grammar, just run `tree-sitter generate` again.
The first time you run `tree-sitter generate`, it will also generate a few other files:
@ -674,7 +674,7 @@ This function is responsible for recognizing external tokens. It should return `
* **`TSSymbol result_symbol`** - The symbol that was recognized. Your scan function should *assign* to this field one of the values from the `TokenType` enum, described above.
* **`void (*advance)(TSLexer *, bool skip)`** - A function for advancing to the next character. If you pass `true` for the second argument, the current character will be treated as whitespace.
* **`void (*mark_end)(TSLexer *)`** - A function for marking the end of the recognized token. This allows matching tokens that require multiple characters of lookahead. By default (if you don't call `mark_end`), any character that you moved past using the `advance` function will be included in the size of the token. But once you call `mark_end`, then any later calls to `advance` will *not* increase the size of the returned token. You can call `mark_end` multiple times to increase the size of the token.
* **`uint32_t (*get_column)(TSLexer *)`** - **(Experimental)** A function for querying the current column position of the lexer. It returns the number of unicode code points (not bytes) since the start of the current line.
* **`uint32_t (*get_column)(TSLexer *)`** - A function for querying the current column position of the lexer. It returns the number of bytes (not characters) since the start of the current line.
* **`bool (*is_at_included_range_start)(TSLexer *)`** - A function for checking if the parser has just skipped some characters in the document. When parsing an embedded document using the `ts_parser_set_included_ranges` function (described in the [multi-language document section][multi-language-section]), your scanner may want to apply some special behavior when moving to a disjoint part of the document. For example, in [EJS documents][ejs], the JavaScript parser uses this function to enable inserting automatic semicolon tokens in between the code directives, delimited by `<%` and `%>`.
The third argument to the `scan` function is an array of booleans that indicates which of your external tokens are currently expected by the parser. You should only look for a given token if it is valid according to this array. At the same time, you cannot backtrack, so you may need to combine certain pieces of logic.

View file

@ -29,7 +29,7 @@ git clone https://github.com/tree-sitter/tree-sitter
cd tree-sitter
```
Optionally, build the WASM library. If you skip this step, then the `tree-sitter web-ui` command will require an internet connection. If you have emscripten installed, this will use your `emcc` compiler. Otherwise, it will use Docker:
Optionally, build the WASM library. If you skip this step, then the `tree-sitter playground` command will require an internet connection. If you have emscripten installed, this will use your `emcc` compiler. Otherwise, it will use Docker:
```sh
./script/build-wasm

View file

@ -4,7 +4,7 @@ description = "Library for performing syntax highlighting with Tree-sitter"
version = "0.20.0"
authors = [
"Max Brunsfeld <maxbrunsfeld@gmail.com>",
"Tim Clem <timothy.clem@gmail.com>"
"Tim Clem <timothy.clem@gmail.com>",
]
license = "MIT"
readme = "README.md"
@ -21,5 +21,5 @@ regex = "1"
thiserror = "1.0"
[dependencies.tree-sitter]
version = ">= 0.3.7"
version = "0.20"
path = "../lib"

View file

@ -586,7 +586,7 @@ where
break;
}
if i > 0 {
&self.layers[0..(i + 1)].rotate_left(1);
self.layers[0..(i + 1)].rotate_left(1);
}
break;
} else {

View file

@ -133,6 +133,7 @@ pub const TSQueryError_TSQueryErrorNodeType: TSQueryError = 2;
pub const TSQueryError_TSQueryErrorField: TSQueryError = 3;
pub const TSQueryError_TSQueryErrorCapture: TSQueryError = 4;
pub const TSQueryError_TSQueryErrorStructure: TSQueryError = 5;
pub const TSQueryError_TSQueryErrorLanguage: TSQueryError = 6;
pub type TSQueryError = u32;
extern "C" {
#[doc = " Create a new parser."]

View file

@ -202,6 +202,7 @@ pub enum QueryErrorKind {
Capture,
Predicate,
Structure,
Language,
}
#[derive(Debug)]
@ -629,7 +630,7 @@ impl Parser {
/// If a pointer is assigned, then the parser will periodically read from
/// this pointer during parsing. If it reads a non-zero value, it will halt early,
/// returning `None`. See [parse](Parser::parse) for more information.
pub unsafe fn set_cancellation_flag(&self, flag: Option<&AtomicUsize>) {
pub unsafe fn set_cancellation_flag(&mut self, flag: Option<&AtomicUsize>) {
if let Some(flag) = flag {
ffi::ts_parser_set_cancellation_flag(
self.0.as_ptr(),
@ -1231,6 +1232,19 @@ impl Query {
// On failure, build an error based on the error code and offset.
if ptr.is_null() {
if error_type == ffi::TSQueryError_TSQueryErrorLanguage {
return Err(QueryError {
row: 0,
column: 0,
offset: 0,
message: LanguageError {
version: language.version(),
}
.to_string(),
kind: QueryErrorKind::Language,
});
}
let offset = error_offset as usize;
let mut line_start = 0;
let mut row = 0;
@ -1739,6 +1753,10 @@ impl QueryCursor {
}
impl<'a, 'tree> QueryMatch<'a, 'tree> {
pub fn id(&self) -> u32 {
self.id
}
pub fn remove(self) {
unsafe { ffi::ts_query_cursor_remove_match(self.cursor, self.id) }
}
@ -1803,21 +1821,36 @@ impl<'a, 'tree> QueryMatch<'a, 'tree> {
.iter()
.all(|predicate| match predicate {
TextPredicate::CaptureEqCapture(i, j, is_positive) => {
let node1 = self.nodes_for_capture_index(*i).next().unwrap();
let node2 = self.nodes_for_capture_index(*j).next().unwrap();
let text1 = get_text(buffer1, text_provider.text(node1));
let text2 = get_text(buffer2, text_provider.text(node2));
(text1 == text2) == *is_positive
let node1 = self.nodes_for_capture_index(*i).next();
let node2 = self.nodes_for_capture_index(*j).next();
match (node1, node2) {
(Some(node1), Some(node2)) => {
let text1 = get_text(buffer1, text_provider.text(node1));
let text2 = get_text(buffer2, text_provider.text(node2));
(text1 == text2) == *is_positive
}
_ => true,
}
}
TextPredicate::CaptureEqString(i, s, is_positive) => {
let node = self.nodes_for_capture_index(*i).next().unwrap();
let text = get_text(buffer1, text_provider.text(node));
(text == s.as_bytes()) == *is_positive
let node = self.nodes_for_capture_index(*i).next();
match node {
Some(node) => {
let text = get_text(buffer1, text_provider.text(node));
(text == s.as_bytes()) == *is_positive
}
None => true,
}
}
TextPredicate::CaptureMatchString(i, r, is_positive) => {
let node = self.nodes_for_capture_index(*i).next().unwrap();
let text = get_text(buffer1, text_provider.text(node));
r.is_match(text) == *is_positive
let node = self.nodes_for_capture_index(*i).next();
match node {
Some(node) => {
let text = get_text(buffer1, text_provider.text(node));
r.is_match(text) == *is_positive
}
None => true,
}
}
})
}
@ -2105,21 +2138,27 @@ impl fmt::Display for LanguageError {
impl fmt::Display for QueryError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"Query error at {}:{}. {}{}",
self.row + 1,
self.column + 1,
match self.kind {
QueryErrorKind::Field => "Invalid field name ",
QueryErrorKind::NodeType => "Invalid node type ",
QueryErrorKind::Capture => "Invalid capture name ",
QueryErrorKind::Predicate => "Invalid predicate: ",
QueryErrorKind::Structure => "Impossible pattern:\n",
QueryErrorKind::Syntax => "Invalid syntax:\n",
},
self.message
)
let msg = match self.kind {
QueryErrorKind::Field => "Invalid field name ",
QueryErrorKind::NodeType => "Invalid node type ",
QueryErrorKind::Capture => "Invalid capture name ",
QueryErrorKind::Predicate => "Invalid predicate: ",
QueryErrorKind::Structure => "Impossible pattern:\n",
QueryErrorKind::Syntax => "Invalid syntax:\n",
QueryErrorKind::Language => "",
};
if msg.len() > 0 {
write!(
f,
"Query error at {}:{}. {}{}",
self.row + 1,
self.column + 1,
msg,
self.message
)
} else {
write!(f, "{}", self.message)
}
}
}

View file

@ -3,3 +3,4 @@
package-lock.json
node_modules
*.tgz
LICENSE

View file

@ -17,24 +17,15 @@ var MIN_COMPATIBLE_VERSION;
var TRANSFER_BUFFER;
var currentParseCallback;
var currentLogCallback;
var initPromise = new Promise(resolve => {
Module.onRuntimeInitialized = resolve
}).then(() => {
TRANSFER_BUFFER = C._ts_init();
VERSION = getValue(TRANSFER_BUFFER, 'i32');
MIN_COMPATIBLE_VERSION = getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
});
class Parser {
class ParserImpl {
static init() {
return initPromise;
TRANSFER_BUFFER = C._ts_init();
VERSION = getValue(TRANSFER_BUFFER, 'i32');
MIN_COMPATIBLE_VERSION = getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
}
constructor() {
if (TRANSFER_BUFFER == null) {
throw new Error('You must first call Parser.init() and wait for it to resolve.');
}
initialize() {
C._ts_parser_new_wasm();
this[0] = getValue(TRANSFER_BUFFER, 'i32');
this[1] = getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
@ -794,6 +785,7 @@ class Language {
if (c.name === captureName1) node1 = c.node;
if (c.name === captureName2) node2 = c.node;
}
if(node1 === undefined || node2 === undefined) return true;
return (node1.text === node2.text) === isPositive;
});
} else {
@ -805,7 +797,7 @@ class Language {
return (c.node.text === stringValue) === isPositive;
};
}
return false;
return true;
});
}
break;
@ -828,7 +820,7 @@ class Language {
for (const c of captures) {
if (c.name === captureName) return regex.test(c.node.text) === isPositive;
}
return false;
return true;
});
break;
@ -1203,6 +1195,3 @@ function marshalEdit(edit) {
setValue(address, edit.oldEndIndex, 'i32'); address += SIZE_OF_INT;
setValue(address, edit.newEndIndex, 'i32'); address += SIZE_OF_INT;
}
Parser.Language = Language;
Parser.Parser = Parser;

View file

@ -23,6 +23,7 @@
"_memchr",
"_memcmp",
"_memcpy",
"_memmove",
"_strlen",
"_towupper",

View file

@ -9,6 +9,7 @@
},
"scripts": {
"test": "mocha",
"prepack": "cp ../../LICENSE .",
"prepublishOnly": "node check-artifacts-fresh.js"
},
"repository": {

View file

@ -1,9 +1,15 @@
(function (root, factory) {
if (typeof define === 'function' && define.amd) {
define([], factory);
} else if (typeof exports === 'object') {
module.exports = factory();
} else {
window.TreeSitter = factory();
}
}(this, function () {
var TreeSitter = function() {
var initPromise;
class Parser {
constructor() {
this.initialize();
}
initialize() {
throw new Error("cannot construct a Parser before calling `init()`");
}
static init(moduleOptions) {
if (initPromise) return initPromise;
Module = Object.assign({ }, Module, moduleOptions);
return initPromise = new Promise((resolveInitPromise) => {

View file

@ -1,2 +1,23 @@
return Parser;
}));
for (const name of Object.getOwnPropertyNames(ParserImpl.prototype)) {
Object.defineProperty(Parser.prototype, name, {
value: ParserImpl.prototype[name],
enumerable: false,
writable: false,
})
}
Parser.Language = Language;
Module.onRuntimeInitialized = () => {
ParserImpl.init();
resolveInitPromise();
};
});
}
}
return Parser;
}();
if (typeof exports === 'object') {
module.exports = TreeSitter;
}

View file

@ -1,12 +1,19 @@
declare module 'web-tree-sitter' {
class Parser {
static init(): Promise<void>;
/**
*
* @param moduleOptions Optional emscripten module-object, see https://emscripten.org/docs/api_reference/module.html
*/
static init(moduleOptions?: object): Promise<void>;
delete(): void;
parse(input: string | Parser.Input, previousTree?: Parser.Tree, options?: Parser.Options): Parser.Tree;
getLanguage(): any;
setLanguage(language: any): void;
reset(): void;
getLanguage(): Parser.Language;
setLanguage(language?: Parser.Language | undefined | null): void;
getLogger(): Parser.Logger;
setLogger(logFunc: Parser.Logger): void;
setLogger(logFunc?: Parser.Logger | undefined | null): void;
setTimeoutMicros(value: number): void;
getTimeoutMicros(): number;
}
namespace Parser {
@ -96,8 +103,11 @@ declare module 'web-tree-sitter' {
export interface TreeCursor {
nodeType: string;
nodeTypeId: number;
nodeText: string;
nodeId: number;
nodeIsNamed: boolean;
nodeIsMissing: boolean;
startPosition: Point;
endPosition: Point;
startIndex: number;
@ -123,7 +133,7 @@ declare module 'web-tree-sitter' {
walk(): TreeCursor;
getChangedRanges(other: Tree): Range[];
getEditedRange(other: Tree): Range;
getLanguage(): any;
getLanguage(): Language;
}
class Language {

View file

@ -131,6 +131,7 @@ typedef enum {
TSQueryErrorField,
TSQueryErrorCapture,
TSQueryErrorStructure,
TSQueryErrorLanguage,
} TSQueryError;
/********************/
@ -618,7 +619,7 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *);
const char *ts_tree_cursor_current_field_name(const TSTreeCursor *);
/**
* Get the field name of the tree cursor's current node.
* Get the field id of the tree cursor's current node.
*
* This returns zero if the current node doesn't have a field.
* See also `ts_node_child_by_field_id`, `ts_language_field_id_for_name`.

View file

@ -417,7 +417,7 @@ static Subtree ts_parser__lex(
LOG(
"lex_external state:%d, row:%u, column:%u",
lex_mode.external_lex_state,
current_position.extent.row + 1,
current_position.extent.row,
current_position.extent.column
);
ts_lexer_start(&self->lexer);
@ -456,7 +456,7 @@ static Subtree ts_parser__lex(
LOG(
"lex_internal state:%d, row:%u, column:%u",
lex_mode.lex_state,
current_position.extent.row + 1,
current_position.extent.row,
current_position.extent.column
);
ts_lexer_start(&self->lexer);
@ -1884,7 +1884,7 @@ TSTree *ts_parser_parse(
LOG("process version:%d, version_count:%u, state:%d, row:%u, col:%u",
version, ts_stack_version_count(self->stack),
ts_stack_state(self->stack, version),
ts_stack_position(self->stack, version).extent.row + 1,
ts_stack_position(self->stack, version).extent.row,
ts_stack_position(self->stack, version).extent.column);
if (!ts_parser__advance(self, version, allow_node_reuse)) return NULL;

View file

@ -2069,6 +2069,15 @@ TSQuery *ts_query_new(
uint32_t *error_offset,
TSQueryError *error_type
) {
if (
!language ||
language->version > TREE_SITTER_LANGUAGE_VERSION ||
language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
) {
*error_type = TSQueryErrorLanguage;
return NULL;
}
TSQuery *self = ts_malloc(sizeof(TSQuery));
*self = (TSQuery) {
.steps = array_new(),
@ -2552,6 +2561,7 @@ static void ts_query_cursor__add_state(
pattern->step_index
);
array_insert(&self->states, index, ((QueryState) {
.id = UINT32_MAX,
.capture_list_id = NONE,
.step_index = pattern->step_index,
.pattern_index = pattern->pattern_index,
@ -2716,7 +2726,6 @@ static inline bool ts_query_cursor__advance(
if (step->depth == PATTERN_DONE_MARKER) {
if (state->start_depth > self->depth || self->halted) {
LOG(" finish pattern %u\n", state->pattern_index);
state->id = self->next_state_id++;
array_push(&self->finished_states, *state);
did_match = true;
deleted_count++;
@ -3105,7 +3114,6 @@ static inline bool ts_query_cursor__advance(
LOG(" defer finishing pattern %u\n", state->pattern_index);
} else {
LOG(" finish pattern %u\n", state->pattern_index);
state->id = self->next_state_id++;
array_push(&self->finished_states, *state);
array_erase(&self->states, state - self->states.contents);
did_match = true;
@ -3160,6 +3168,7 @@ bool ts_query_cursor_next_match(
}
QueryState *state = &self->finished_states.contents[0];
if (state->id == UINT32_MAX) state->id = self->next_state_id++;
match->id = state->id;
match->pattern_index = state->pattern_index;
const CaptureList *captures = capture_list_pool_get(
@ -3269,6 +3278,7 @@ bool ts_query_cursor_next_capture(
}
if (state) {
if (state->id == UINT32_MAX) state->id = self->next_state_id++;
match->id = state->id;
match->pattern_index = state->pattern_index;
const CaptureList *captures = capture_list_pool_get(

View file

@ -33,7 +33,7 @@ web_dir=lib/binding_web
emscripten_flags="-O3"
minify_js=1
force_docker=0
emscripen_version=$(cat "$(dirname "$0")"/../emscripten-version)
emscripen_version=$(cat "$(dirname "$0")"/../cli/emscripten-version)
while [[ $# > 0 ]]; do
case "$1" in

View file

@ -2,7 +2,7 @@
set -e
EMSCRIPTEN_VERSION=$(cat "$(dirname "$0")/../emscripten-version")
EMSCRIPTEN_VERSION=$(cat "$(dirname "$0")/../cli/emscripten-version")
mkdir -p target
EMSDK_DIR="./target/emsdk"

View file

@ -4,10 +4,14 @@
const CATEGORY_OUTPUT_PATH = './cli/src/generate/prepare_grammar/unicode-categories.json'
const PROPERTY_OUTPUT_PATH = './cli/src/generate/prepare_grammar/unicode-properties.json'
const CATEGORY_ALIAS_OUTPUT_PATH = './cli/src/generate/prepare_grammar/unicode-category-aliases.json'
const PROPERTY_ALIAS_OUTPUT_PATH = './cli/src/generate/prepare_grammar/unicode-property-aliases.json'
const CATEGORY_URL = 'https://unicode.org/Public/13.0.0/ucd/UnicodeData.txt'
const PROPERTY_URL = 'https://unicode.org/Public/13.0.0/ucd/PropList.txt'
const DERIVED_PROPERTY_URL = 'https://unicode.org/Public/13.0.0/ucd/DerivedCoreProperties.txt'
const CATEGORY_ALIAS_URL = 'https://unicode.org/Public/13.0.0/ucd/PropertyValueAliases.txt'
const PROPERTY_ALIAS_URL = 'https://unicode.org/Public/13.0.0/ucd/PropertyAliases.txt'
const fs = require('fs');
const path = require('path');
@ -16,7 +20,9 @@ const {spawnSync} = require('child_process');
// Download the unicode data files, caching them inside the 'target' directory.
const categoryData = cachedDownload(CATEGORY_URL);
const propertyData = cachedDownload(PROPERTY_URL);
const derivedPopertyData = cachedDownload(DERIVED_PROPERTY_URL);
const derivedPropertyData = cachedDownload(DERIVED_PROPERTY_URL);
const categoryAliasData = cachedDownload(CATEGORY_ALIAS_URL);
const propertyAliasData = cachedDownload(PROPERTY_ALIAS_URL);
function cachedDownload(url) {
let downloadPath = path.join('.', 'target', path.basename(url))
if (fs.existsSync(downloadPath)) {
@ -30,10 +36,12 @@ function cachedDownload(url) {
const categories = {};
const properties = {};
const categoryAliases = {};
const propertyAliases = {}
let data, row, lineStart, lineEnd;
// Parse the properties
data = propertyData + derivedPopertyData;
data = propertyData + derivedPropertyData;
row = 0;
lineStart = 0;
lineEnd = -1;
@ -106,7 +114,7 @@ while (lineStart < data.length) {
if (
nameStart === 0 ||
categoryStart == 0 ||
categoryEnd === 0
categoryEnd === -1
) {
throw new Error(`Unexpected format on line ${row}`);
}
@ -124,5 +132,110 @@ while (lineStart < data.length) {
categories[category].push(codePoint);
}
// Parse the category aliases
data = categoryAliasData;
row = 0;
lineStart = 0;
lineEnd = -1;
const IGNORE = /[#\s]/
while (lineStart < data.length) {
row++;
lineStart = lineEnd + 1;
lineEnd = data.indexOf('\n', lineStart);
if (lineEnd === -1) break;
// Skip over blank and comment lines
if (IGNORE.test(data[lineStart])) continue;
// Parse the first three semicolon-separated fields:
// * property value type
// * short name
// * long name
// Other aliases may be listed in additional fields
const propertyValueTypeEnd = data.indexOf(';', lineStart);
const shortNameStart = propertyValueTypeEnd + 1;
const shortNameEnd = data.indexOf(';', shortNameStart);
const longNameStart = shortNameEnd + 1;
if (
shortNameStart === 0 ||
longNameStart === 0
) {
throw new Error(`Unexpected format on line ${row}`);
}
const propertyValueType = data.slice(lineStart, propertyValueTypeEnd).trim();
const shortName = data.slice(shortNameStart, shortNameEnd).trim();
// Filter for General_Category lines
if (propertyValueType !== 'gc') continue;
let aliasStart = longNameStart;
let lineDone = false;
do {
let aliasEnd = data.indexOf(';', aliasStart);
if (aliasEnd === -1 || aliasEnd > lineEnd) {
aliasEnd = data.indexOf('#', aliasStart);
if (aliasEnd === -1 || aliasEnd > lineEnd) {
aliasEnd = lineEnd;
}
lineDone = true;
}
const alias = data.slice(aliasStart, aliasEnd).trim();
console.log(alias, shortName);
categoryAliases[alias] = shortName;
aliasStart = aliasEnd + 1;
} while (!lineDone);
}
// Parse the property aliases
data = propertyAliasData;
row = 0;
lineStart = 0;
lineEnd = -1;
while (lineStart < data.length) {
row++;
lineStart = lineEnd + 1;
lineEnd = data.indexOf('\n', lineStart);
if (lineEnd === -1) break;
// Skip over blank and comment lines
if (IGNORE.test(data[lineStart])) continue;
// Parse the first two semicolon fields:
// * short name
// * long name
const shortNameEnd = data.indexOf(';', lineStart);
const longNameStart = shortNameEnd + 1;
if (longNameStart == 0) {
throw new Error(`Unexpected format on line ${row}`);
}
let alias = data.slice(lineStart, shortNameEnd).trim();
let longName = null;
let nameStart = longNameStart;
let lineDone = false;
do {
let nameEnd = data.indexOf(';', nameStart);
if (nameEnd === -1 || nameEnd > lineEnd) {
nameEnd = data.indexOf('#', nameStart);
if (nameEnd === -1 || nameEnd > lineEnd) {
nameEnd = lineEnd;
}
lineDone = true;
}
if (longName == null) {
longName = data.slice(nameStart, nameEnd).trim();
} else {
alias = data.slice(nameStart, nameEnd).trim();
}
console.log(alias, longName);
propertyAliases[alias] = longName;
nameStart = nameEnd + 1;
} while (!lineDone);
}
fs.writeFileSync(CATEGORY_OUTPUT_PATH, JSON.stringify(categories), 'utf8');
fs.writeFileSync(PROPERTY_OUTPUT_PATH, JSON.stringify(properties), 'utf8');
fs.writeFileSync(CATEGORY_ALIAS_OUTPUT_PATH, JSON.stringify(categoryAliases), 'utf8');
fs.writeFileSync(PROPERTY_ALIAS_OUTPUT_PATH, JSON.stringify(propertyAliases), 'utf8');

View file

@ -4,7 +4,7 @@ description = "Library for extracting tag information"
version = "0.20.0"
authors = [
"Max Brunsfeld <maxbrunsfeld@gmail.com>",
"Patrick Thomson <patrickt@github.com>"
"Patrick Thomson <patrickt@github.com>",
]
license = "MIT"
readme = "README.md"
@ -22,5 +22,5 @@ memchr = "2.3"
thiserror = "1.0"
[dependencies.tree-sitter]
version = ">= 0.17.0"
version = "0.20"
path = "../lib"

View file

@ -30,3 +30,14 @@ Math symbols
(program
(math_sym) (math_sym) (math_sym) (math_sym) (math_sym))
================================
Letterlike numeric characters
================================
ᛯ Ⅵ 〩
---
(program
(letter_number) (letter_number) (letter_number))

View file

@ -13,7 +13,8 @@
"members": [
{"type": "SYMBOL", "name": "lower"},
{"type": "SYMBOL", "name": "upper"},
{"type": "SYMBOL", "name": "math_sym"}
{"type": "SYMBOL", "name": "math_sym"},
{"type": "SYMBOL", "name": "letter_number"}
]
}
},
@ -31,6 +32,11 @@
"math_sym": {
"type": "PATTERN",
"value": "\\p{Sm}+"
},
"letter_number": {
"type": "PATTERN",
"value": "\\p{Letter_Number}"
}
}
}