Merge branch 'master' into m-novikov-add-parsers

2021-09-24 09:04:30 -07:00 · 2021-09-24 09:04:30 -07:00 · e7dcd2b7c4
commit e7dcd2b7c4
parent dd435636cb 561dfe3a2d
57 changed files with 822 additions and 353 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -36,7 +36,7 @@ jobs:

      - name: Read Emscripten version
        run: |
-          printf 'EMSCRIPTEN_VERSION=%s\n' "$(cat emscripten-version)" >> $GITHUB_ENV
+          printf 'EMSCRIPTEN_VERSION=%s\n' "$(cat cli/emscripten-version)" >> $GITHUB_ENV

      - name: Cache artifacts
        id: cache
--- a/.gitignore
+++ b/.gitignore
@ -2,6 +2,7 @@ log*.html

 .idea
 *.xcodeproj
+.vscode

 fuzz-results

--- a/Cargo.lock
+++ b/Cargo.lock
@ -495,6 +495,12 @@ dependencies = [
 "crossbeam-utils",
 ]

+[[package]]
+name = "rustc-hash"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
+
 [[package]]
 name = "ryu"
 version = "1.0.5"
@ -541,9 +547,9 @@ dependencies = [

 [[package]]
 name = "smallbitvec"
-version = "2.5.0"
+version = "2.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "797a4eaffb90d896f29698d45676f9f940a71936d7574996a7df54593ba209fa"
+checksum = "75ce4f9dc4a41b4c3476cc925f1efb11b66df373a8fde5d4b8915fa91b5d995e"

 [[package]]
 name = "spin"
@ -689,11 +695,13 @@ dependencies = [
 "dirs",
 "glob",
 "html-escape",
+ "indexmap",
 "lazy_static",
 "log",
 "rand",
 "regex",
 "regex-syntax",
+ "rustc-hash",
 "serde",
 "serde_derive",
 "serde_json",
--- a/2
+++ b/2
@ -1,6 +1,6 @@
 The MIT License (MIT)

-Copyright (c) 2018 Max Brunsfeld
+Copyright (c) 2018-2021 Max Brunsfeld

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
--- a/cli/Cargo.toml
+++ b/cli/Cargo.toml
@ -27,40 +27,42 @@ difference = "2.0"
 dirs = "3.0"
 glob = "0.3.0"
 html-escape = "0.2.6"
+indexmap = "1"
 lazy_static = "1.2.0"
 regex = "1"
 regex-syntax = "0.6.4"
+rustc-hash = "1"
 serde = "1.0"
 serde_derive = "1.0"
-smallbitvec = "2.3.0"
+smallbitvec = "2.5.1"
 tiny_http = "0.8"
 walkdir = "2.3"
 webbrowser = "0.5.1"
 which = "4.1.0"

 [dependencies.tree-sitter]
-version = ">= 0.17.0"
+version = "0.20"
 path = "../lib"

 [dev-dependencies.tree-sitter]
-version = ">= 0.17.0"
+version = "0.20"
 path = "../lib"
 features = ["allocation-tracking"]

 [dependencies.tree-sitter-config]
-version = ">= 0.19.0"
+version = "0.19.0"
 path = "config"

 [dependencies.tree-sitter-highlight]
-version = ">= 0.3.0"
+version = "0.20"
 path = "../highlight"

 [dependencies.tree-sitter-loader]
-version = ">= 0.19.0"
+version = "0.19.0"
 path = "loader"

 [dependencies.tree-sitter-tags]
-version = ">= 0.1.0"
+version = "0.20"
 path = "../tags"

 [dependencies.serde_json]
--- a/cli/README.md
+++ b/cli/README.md
@ -36,4 +36,4 @@ The `tree-sitter` binary itself has no dependencies, but specific commands have

 * `test` - The `tree-sitter test` command will run the unit tests for the Tree-sitter parser in the current working directory. See [the documentation](http://tree-sitter.github.io/tree-sitter/creating-parsers) for more information.

-* `parse` - The `tree-sitter parse` command will parse a file (or list of file) using Tree-sitter parsers.
+* `parse` - The `tree-sitter parse` command will parse a file (or list of files) using Tree-sitter parsers.
--- a/cli/build.rs
+++ b/cli/build.rs
@ -6,7 +6,7 @@ fn main() {
        println!("cargo:rustc-env={}={}", "BUILD_SHA", git_sha);
    }

-    if wasm_files_present() {
+    if web_playground_files_present() {
        println!("cargo:rustc-cfg={}", "TREE_SITTER_EMBED_WASM_BINDING");
    }

@ -16,15 +16,16 @@ fn main() {
        "RUST_BINDING_VERSION", rust_binding_version,
    );

-    let emscripten_version = fs::read_to_string("../emscripten-version").unwrap();
+    let emscripten_version = fs::read_to_string("emscripten-version").unwrap();
    println!(
        "cargo:rustc-env={}={}",
        "EMSCRIPTEN_VERSION", emscripten_version,
    );
 }

-fn wasm_files_present() -> bool {
+fn web_playground_files_present() -> bool {
    let paths = [
+        "../docs/assets/js/playground.js",
        "../lib/binding_web/tree-sitter.js",
        "../lib/binding_web/tree-sitter.wasm",
    ];
@ -81,10 +82,10 @@ fn read_git_sha() -> Option<String> {
 }

 fn read_rust_binding_version() -> String {
-    let path = "../lib/Cargo.toml";
+    let path = "Cargo.toml";
    let text = fs::read_to_string(path).unwrap();
    let cargo_toml = toml::from_str::<toml::Value>(text.as_ref()).unwrap();
-    cargo_toml["package"]["version"]
+    cargo_toml["dependencies"]["tree-sitter"]["version"]
        .as_str()
        .unwrap()
        .trim_matches('"')
--- a/cli/config/src/lib.rs
+++ b/cli/config/src/lib.rs
@ -1,6 +1,6 @@
 //! Manages tree-sitter's configuration file.

-use anyhow::{anyhow, Result};
+use anyhow::{anyhow, Context, Result};
 use serde::{Deserialize, Serialize};
 use serde_json::Value;
 use std::path::PathBuf;
@ -14,6 +14,7 @@ use std::{env, fs};
 /// This type holds the generic JSON content of the configuration file.  Individual tree-sitter
 /// components will use the [`get`][] method to parse that JSON to extract configuration fields
 /// that are specific to that component.
+#[derive(Debug)]
 pub struct Config {
    pub location: PathBuf,
    pub config: Value,
@ -64,8 +65,10 @@ impl Config {
            Some(location) => location,
            None => return Config::initial(),
        };
-        let content = fs::read_to_string(&location)?;
-        let config = serde_json::from_str(&content)?;
+        let content = fs::read_to_string(&location)
+            .with_context(|| format!("Failed to read {}", &location.to_string_lossy()))?;
+        let config = serde_json::from_str(&content)
+            .with_context(|| format!("Bad JSON config {}", &location.to_string_lossy()))?;
        Ok(Config { location, config })
    }

--- a/cli/emscripten-version
+++ b/cli/emscripten-version
--- a/cli/loader/Cargo.toml
+++ b/cli/loader/Cargo.toml
@ -25,13 +25,13 @@ version = "1.0"
 features = ["preserve_order"]

 [dependencies.tree-sitter]
-version = ">= 0.19"
+version = "0.20"
 path = "../../lib"

 [dependencies.tree-sitter-highlight]
-version = ">= 0.19"
+version = "0.20"
 path = "../../highlight"

 [dependencies.tree-sitter-tags]
-version = ">= 0.19"
+version = "0.20"
 path = "../../tags"
--- a/cli/loader/src/lib.rs
+++ b/cli/loader/src/lib.rs
@ -12,7 +12,7 @@ use std::process::Command;
 use std::sync::Mutex;
 use std::time::SystemTime;
 use std::{fs, mem};
-use tree_sitter::{Language, QueryError};
+use tree_sitter::{Language, QueryError, QueryErrorKind};
 use tree_sitter_highlight::HighlightConfiguration;
 use tree_sitter_tags::{Error as TagsError, TagsConfiguration};

@ -101,6 +101,7 @@ pub struct Loader {
    language_configuration_ids_by_file_type: HashMap<String, Vec<usize>>,
    highlight_names: Box<Mutex<Vec<String>>>,
    use_all_highlight_names: bool,
+    debug_build: bool,
 }

 unsafe impl Send for Loader {}
@ -122,6 +123,7 @@ impl Loader {
            language_configuration_ids_by_file_type: HashMap::new(),
            highlight_names: Box::new(Mutex::new(Vec::new())),
            use_all_highlight_names: true,
+            debug_build: false,
        }
    }

@ -347,7 +349,11 @@ impl Loader {
        parser_path: &Path,
        scanner_path: &Option<PathBuf>,
    ) -> Result<Language> {
-        let mut library_path = self.parser_lib_path.join(name);
+        let mut lib_name = name.to_string();
+        if self.debug_build {
+            lib_name.push_str(".debug._");
+        }
+        let mut library_path = self.parser_lib_path.join(lib_name);
        library_path.set_extension(DYLIB_EXTENSION);

        let recompile = needs_recompile(&library_path, &parser_path, &scanner_path)
@ -369,11 +375,13 @@ impl Loader {
            }

            if cfg!(windows) {
-                command
-                    .args(&["/nologo", "/LD", "/I"])
-                    .arg(header_path)
-                    .arg("/Od")
-                    .arg(parser_path);
+                command.args(&["/nologo", "/LD", "/I"]).arg(header_path);
+                if self.debug_build {
+                    command.arg("/Od");
+                } else {
+                    command.arg("/O2");
+                }
+                command.arg(parser_path);
                if let Some(scanner_path) = scanner_path.as_ref() {
                    command.arg(scanner_path);
                }
@ -389,8 +397,18 @@ impl Loader {
                    .arg("-I")
                    .arg(header_path)
                    .arg("-o")
-                    .arg(&library_path)
-                    .arg("-O2");
+                    .arg(&library_path);
+
+                if self.debug_build {
+                    command.arg("-O0");
+                } else {
+                    command.arg("-O2");
+                }
+
+                // For conditional compilation of external scanner code when
+                // used internally by `tree-siteer parse` and other sub commands.
+                command.arg("-DTREE_SITTER_INTERNAL_BUILD");
+
                if let Some(scanner_path) = scanner_path.as_ref() {
                    if scanner_path.extension() == Some("c".as_ref()) {
                        command.arg("-xc").arg("-std=c99").arg(scanner_path);
@ -639,6 +657,10 @@ impl Loader {
            Err(anyhow!("No language found"))
        }
    }
+
+    pub fn use_debug_build(&mut self, flag: bool) {
+        self.debug_build = flag;
+    }
 }

 impl<'a> LanguageConfiguration<'a> {
@ -662,28 +684,31 @@ impl<'a> LanguageConfiguration<'a> {
                        &injections_query,
                        &locals_query,
                    )
-                    .map_err(|error| {
-                        if error.offset < injections_query.len() {
-                            Self::include_path_in_query_error(
-                                error,
-                                &injection_ranges,
-                                &injections_query,
-                                0,
-                            )
-                        } else if error.offset < injections_query.len() + locals_query.len() {
-                            Self::include_path_in_query_error(
-                                error,
-                                &locals_ranges,
-                                &locals_query,
-                                injections_query.len(),
-                            )
-                        } else {
-                            Self::include_path_in_query_error(
-                                error,
-                                &highlight_ranges,
-                                &highlights_query,
-                                injections_query.len() + locals_query.len(),
-                            )
+                    .map_err(|error| match error.kind {
+                        QueryErrorKind::Language => Error::from(error),
+                        _ => {
+                            if error.offset < injections_query.len() {
+                                Self::include_path_in_query_error(
+                                    error,
+                                    &injection_ranges,
+                                    &injections_query,
+                                    0,
+                                )
+                            } else if error.offset < injections_query.len() + locals_query.len() {
+                                Self::include_path_in_query_error(
+                                    error,
+                                    &locals_ranges,
+                                    &locals_query,
+                                    injections_query.len(),
+                                )
+                            } else {
+                                Self::include_path_in_query_error(
+                                    error,
+                                    &highlight_ranges,
+                                    &highlights_query,
+                                    injections_query.len() + locals_query.len(),
+                                )
+                            }
                        }
                    })?;
                    let mut all_highlight_names = self.highlight_names.lock().unwrap();
--- a/cli/npm/.gitignore
+++ b/cli/npm/.gitignore
@ -2,3 +2,4 @@ tree-sitter
 tree-sitter.exe
 *.gz
 *.tgz
+LICENSE
--- a/cli/npm/package.json
+++ b/cli/npm/package.json
@ -14,7 +14,8 @@
  ],
  "main": "lib/api/index.js",
  "scripts": {
-    "install": "node install.js"
+    "install": "node install.js",
+    "prepack": "cp ../../LICENSE ."
  },
  "bin": {
    "tree-sitter": "cli.js"
--- a/cli/src/generate/build_tables/build_lex_table.rs
+++ b/cli/src/generate/build_tables/build_lex_table.rs
@ -347,7 +347,7 @@ fn lex_states_differ(
 fn sort_states(table: &mut LexTable, parse_table: &mut ParseTable) {
    // Get a mapping of old state index -> new_state_index
    let mut old_ids_by_new_id = (0..table.states.len()).collect::<Vec<_>>();
-    &old_ids_by_new_id[1..].sort_by_key(|id| &table.states[*id]);
+    old_ids_by_new_id[1..].sort_by_key(|id| &table.states[*id]);

    // Get the inverse mapping
    let mut new_ids_by_old_id = vec![0; old_ids_by_new_id.len()];
--- a/cli/src/generate/build_tables/build_parse_table.rs
+++ b/cli/src/generate/build_tables/build_parse_table.rs
@ -11,10 +11,14 @@ use crate::generate::tables::{
    ProductionInfo, ProductionInfoId,
 };
 use anyhow::{anyhow, Result};
+use std::cmp::Ordering;
 use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
 use std::fmt::Write;
+use std::hash::BuildHasherDefault;
 use std::u32;
-use std::{cmp::Ordering, collections::hash_map::Entry};
+
+use indexmap::{map::Entry, IndexMap};
+use rustc_hash::FxHasher;

 // For conflict reporting, each parse state is associated with an example
 // sequence of symbols that could lead to that parse state.
@ -49,7 +53,7 @@ struct ParseTableBuilder<'a> {
    lexical_grammar: &'a LexicalGrammar,
    variable_info: &'a Vec<VariableInfo>,
    core_ids_by_core: HashMap<ParseItemSetCore<'a>, usize>,
-    state_ids_by_item_set: HashMap<ParseItemSet<'a>, ParseStateId>,
+    state_ids_by_item_set: IndexMap<ParseItemSet<'a>, ParseStateId, BuildHasherDefault<FxHasher>>,
    parse_state_info_by_id: Vec<ParseStateInfo<'a>>,
    parse_state_queue: VecDeque<ParseStateQueueEntry>,
    non_terminal_extra_states: Vec<(Symbol, usize)>,
@ -147,13 +151,7 @@ impl<'a> ParseTableBuilder<'a> {
            Entry::Vacant(v) => {
                let core = v.key().core();
                let core_count = self.core_ids_by_core.len();
-                let core_id = match self.core_ids_by_core.entry(core) {
-                    Entry::Occupied(e) => *e.get(),
-                    Entry::Vacant(e) => {
-                        e.insert(core_count);
-                        core_count
-                    }
-                };
+                let core_id = *self.core_ids_by_core.entry(core).or_insert(core_count);

                let state_id = self.parse_table.states.len();
                self.parse_state_info_by_id
@ -163,8 +161,8 @@ impl<'a> ParseTableBuilder<'a> {
                    id: state_id,
                    lex_state_id: 0,
                    external_lex_state_id: 0,
-                    terminal_entries: HashMap::new(),
-                    nonterminal_entries: HashMap::new(),
+                    terminal_entries: IndexMap::default(),
+                    nonterminal_entries: IndexMap::default(),
                    core_id,
                });
                self.parse_state_queue.push_back(ParseStateQueueEntry {
@ -981,7 +979,7 @@ pub(crate) fn build_parse_table<'a>(
        item_set_builder,
        variable_info,
        non_terminal_extra_states: Vec::new(),
-        state_ids_by_item_set: HashMap::new(),
+        state_ids_by_item_set: IndexMap::default(),
        core_ids_by_core: HashMap::new(),
        parse_state_info_by_id: Vec::new(),
        parse_state_queue: VecDeque::new(),
--- a/cli/src/generate/build_tables/minimize_parse_table.rs
+++ b/cli/src/generate/build_tables/minimize_parse_table.rs
@ -479,7 +479,7 @@ impl<'a> Minimizer<'a> {
    fn reorder_states_by_descending_size(&mut self) {
        // Get a mapping of old state index -> new_state_index
        let mut old_ids_by_new_id = (0..self.parse_table.states.len()).collect::<Vec<_>>();
-        &old_ids_by_new_id.sort_unstable_by_key(|i| {
+        old_ids_by_new_id.sort_unstable_by_key(|i| {
            // Don't changes states 0 (the error state) or 1 (the start state).
            if *i <= 1 {
                return *i as i64 - 1_000_000;
--- a/cli/src/generate/mod.rs
+++ b/cli/src/generate/mod.rs
@ -169,6 +169,7 @@ fn load_grammar_file(grammar_path: &Path) -> Result<String> {
 }

 fn load_js_grammar_file(grammar_path: &Path) -> Result<String> {
+    let grammar_path = fs::canonicalize(grammar_path)?;
    let mut node_process = Command::new("node")
        .env("TREE_SITTER_GRAMMAR_PATH", grammar_path)
        .stdin(Stdio::piped())
--- a/cli/src/generate/prepare_grammar/expand_tokens.rs
+++ b/cli/src/generate/prepare_grammar/expand_tokens.rs
@ -19,10 +19,16 @@ lazy_static! {
        serde_json::from_str(UNICODE_CATEGORIES_JSON).unwrap();
    static ref UNICODE_PROPERTIES: HashMap<&'static str, Vec<u32>> =
        serde_json::from_str(UNICODE_PROPERTIES_JSON).unwrap();
+    static ref UNICODE_CATEGORY_ALIASES: HashMap<&'static str, String> =
+        serde_json::from_str(UNICODE_CATEGORY_ALIASES_JSON).unwrap();
+    static ref UNICODE_PROPERTY_ALIASES: HashMap<&'static str, String> =
+        serde_json::from_str(UNICODE_PROPERTY_ALIASES_JSON).unwrap();
 }

 const UNICODE_CATEGORIES_JSON: &'static str = include_str!("./unicode-categories.json");
 const UNICODE_PROPERTIES_JSON: &'static str = include_str!("./unicode-properties.json");
+const UNICODE_CATEGORY_ALIASES_JSON: &'static str = include_str!("./unicode-category-aliases.json");
+const UNICODE_PROPERTY_ALIASES_JSON: &'static str = include_str!("./unicode-property-aliases.json");
 const ALLOWED_REDUNDANT_ESCAPED_CHARS: [char; 4] = ['!', '\'', '"', '/'];

 struct NfaBuilder {
@ -394,12 +400,16 @@ impl NfaBuilder {
                category_letter = le.to_string();
            }
            ClassUnicodeKind::Named(class_name) => {
-                if class_name.len() == 1 {
-                    category_letter = class_name.clone();
+                let actual_class_name = UNICODE_CATEGORY_ALIASES
+                    .get(class_name.as_str())
+                    .or_else(|| UNICODE_PROPERTY_ALIASES.get(class_name.as_str()))
+                    .unwrap_or(class_name);
+                if actual_class_name.len() == 1 {
+                    category_letter = actual_class_name.clone();
                } else {
                    let code_points = UNICODE_CATEGORIES
-                        .get(class_name.as_str())
-                        .or_else(|| UNICODE_PROPERTIES.get(class_name.as_str()))
+                        .get(actual_class_name.as_str())
+                        .or_else(|| UNICODE_PROPERTIES.get(actual_class_name.as_str()))
                        .ok_or_else(|| {
                            anyhow!(
                                "Regex error: Unsupported unicode character class {}",
--- a/cli/src/generate/prepare_grammar/unicode-category-aliases.json
+++ b/cli/src/generate/prepare_grammar/unicode-category-aliases.json
@ -0,0 +1 @@
+{"Other":"C","Control":"Cc","cntrl":"Cc","Format":"Cf","Unassigned":"Cn","Private_Use":"Co","Surrogate":"Cs","Letter":"L","Cased_Letter":"LC","Lowercase_Letter":"Ll","Modifier_Letter":"Lm","Other_Letter":"Lo","Titlecase_Letter":"Lt","Uppercase_Letter":"Lu","Mark":"M","Combining_Mark":"M","Spacing_Mark":"Mc","Enclosing_Mark":"Me","Nonspacing_Mark":"Mn","Number":"N","Decimal_Number":"Nd","digit":"Nd","Letter_Number":"Nl","Other_Number":"No","Punctuation":"P","punct":"P","Connector_Punctuation":"Pc","Dash_Punctuation":"Pd","Close_Punctuation":"Pe","Final_Punctuation":"Pf","Initial_Punctuation":"Pi","Other_Punctuation":"Po","Open_Punctuation":"Ps","Symbol":"S","Currency_Symbol":"Sc","Modifier_Symbol":"Sk","Math_Symbol":"Sm","Other_Symbol":"So","Separator":"Z","Line_Separator":"Zl","Paragraph_Separator":"Zp","Space_Separator":"Zs"}
--- a/cli/src/generate/prepare_grammar/unicode-property-aliases.json
+++ b/cli/src/generate/prepare_grammar/unicode-property-aliases.json
@ -0,0 +1 @@
+{"cjkAccountingNumeric":"kAccountingNumeric","cjkOtherNumeric":"kOtherNumeric","cjkPrimaryNumeric":"kPrimaryNumeric","nv":"Numeric_Value","cf":"Case_Folding","cjkCompatibilityVariant":"kCompatibilityVariant","dm":"Decomposition_Mapping","FC_NFKC":"FC_NFKC_Closure","lc":"Lowercase_Mapping","NFKC_CF":"NFKC_Casefold","scf":"Simple_Case_Folding","sfc":"Simple_Case_Folding","slc":"Simple_Lowercase_Mapping","stc":"Simple_Titlecase_Mapping","suc":"Simple_Uppercase_Mapping","tc":"Titlecase_Mapping","uc":"Uppercase_Mapping","bmg":"Bidi_Mirroring_Glyph","bpb":"Bidi_Paired_Bracket","cjkIICore":"kIICore","cjkIRG_GSource":"kIRG_GSource","cjkIRG_HSource":"kIRG_HSource","cjkIRG_JSource":"kIRG_JSource","cjkIRG_KPSource":"kIRG_KPSource","cjkIRG_KSource":"kIRG_KSource","cjkIRG_MSource":"kIRG_MSource","cjkIRG_SSource":"kIRG_SSource","cjkIRG_TSource":"kIRG_TSource","cjkIRG_UKSource":"kIRG_UKSource","cjkIRG_USource":"kIRG_USource","cjkIRG_VSource":"kIRG_VSource","cjkRSUnicode":"kRSUnicode","Unicode_Radical_Stroke":"kRSUnicode","URS":"kRSUnicode","EqUIdeo":"Equivalent_Unified_Ideograph","isc":"ISO_Comment","JSN":"Jamo_Short_Name","na":"Name","na1":"Unicode_1_Name","Name_Alias":"Name_Alias","scx":"Script_Extensions","age":"Age","blk":"Block","sc":"Script","bc":"Bidi_Class","bpt":"Bidi_Paired_Bracket_Type","ccc":"Canonical_Combining_Class","dt":"Decomposition_Type","ea":"East_Asian_Width","gc":"General_Category","GCB":"Grapheme_Cluster_Break","hst":"Hangul_Syllable_Type","InPC":"Indic_Positional_Category","InSC":"Indic_Syllabic_Category","jg":"Joining_Group","jt":"Joining_Type","lb":"Line_Break","NFC_QC":"NFC_Quick_Check","NFD_QC":"NFD_Quick_Check","NFKC_QC":"NFKC_Quick_Check","NFKD_QC":"NFKD_Quick_Check","nt":"Numeric_Type","SB":"Sentence_Break","vo":"Vertical_Orientation","WB":"Word_Break","AHex":"ASCII_Hex_Digit","Alpha":"Alphabetic","Bidi_C":"Bidi_Control","Bidi_M":"Bidi_Mirrored","Cased":"Cased","CE":"Composition_Exclusion","CI":"Case_Ignorable","Comp_Ex":"Full_Composition_Exclusion","CWCF":"Changes_When_Casefolded","CWCM":"Changes_When_Casemapped","CWKCF":"Changes_When_NFKC_Casefolded","CWL":"Changes_When_Lowercased","CWT":"Changes_When_Titlecased","CWU":"Changes_When_Uppercased","Dash":"Dash","Dep":"Deprecated","DI":"Default_Ignorable_Code_Point","Dia":"Diacritic","EBase":"Emoji_Modifier_Base","EComp":"Emoji_Component","EMod":"Emoji_Modifier","Emoji":"Emoji","EPres":"Emoji_Presentation","Ext":"Extender","ExtPict":"Extended_Pictographic","Gr_Base":"Grapheme_Base","Gr_Ext":"Grapheme_Extend","Gr_Link":"Grapheme_Link","Hex":"Hex_Digit","Hyphen":"Hyphen","IDC":"ID_Continue","Ideo":"Ideographic","IDS":"ID_Start","IDSB":"IDS_Binary_Operator","IDST":"IDS_Trinary_Operator","Join_C":"Join_Control","LOE":"Logical_Order_Exception","Lower":"Lowercase","Math":"Math","NChar":"Noncharacter_Code_Point","OAlpha":"Other_Alphabetic","ODI":"Other_Default_Ignorable_Code_Point","OGr_Ext":"Other_Grapheme_Extend","OIDC":"Other_ID_Continue","OIDS":"Other_ID_Start","OLower":"Other_Lowercase","OMath":"Other_Math","OUpper":"Other_Uppercase","Pat_Syn":"Pattern_Syntax","Pat_WS":"Pattern_White_Space","PCM":"Prepended_Concatenation_Mark","QMark":"Quotation_Mark","Radical":"Radical","RI":"Regional_Indicator","SD":"Soft_Dotted","STerm":"Sentence_Terminal","Term":"Terminal_Punctuation","UIdeo":"Unified_Ideograph","Upper":"Uppercase","VS":"Variation_Selector","WSpace":"White_Space","space":"White_Space","XIDC":"XID_Continue","XIDS":"XID_Start","XO_NFC":"Expands_On_NFC","XO_NFD":"Expands_On_NFD","XO_NFKC":"Expands_On_NFKC","XO_NFKD":"Expands_On_NFKD"}
--- a/cli/src/generate/render.rs
+++ b/cli/src/generate/render.rs
@ -1057,7 +1057,7 @@ impl Generator {
    }

    fn add_parse_table(&mut self) {
-        let mut parse_table_entries = Vec::new();
+        let mut parse_table_entries = HashMap::new();
        let mut next_parse_action_list_index = 0;

        self.get_parse_action_list_id(
@ -1224,6 +1224,11 @@ impl Generator {
            add_line!(self, "");
        }

+        let mut parse_table_entries: Vec<_> = parse_table_entries
+            .into_iter()
+            .map(|(entry, i)| (i, entry))
+            .collect();
+        parse_table_entries.sort_by_key(|(index, _)| *index);
        self.add_parse_action_list(parse_table_entries);
    }

@ -1404,17 +1409,17 @@ impl Generator {
    fn get_parse_action_list_id(
        &self,
        entry: &ParseTableEntry,
-        parse_table_entries: &mut Vec<(usize, ParseTableEntry)>,
+        parse_table_entries: &mut HashMap<ParseTableEntry, usize>,
        next_parse_action_list_index: &mut usize,
    ) -> usize {
-        if let Some((index, _)) = parse_table_entries.iter().find(|(_, e)| *e == *entry) {
-            return *index;
+        if let Some(&index) = parse_table_entries.get(entry) {
+            index
+        } else {
+            let result = *next_parse_action_list_index;
+            parse_table_entries.insert(entry.clone(), result);
+            *next_parse_action_list_index += 1 + entry.actions.len();
+            result
        }
-
-        let result = *next_parse_action_list_index;
-        parse_table_entries.push((result, entry.clone()));
-        *next_parse_action_list_index += 1 + entry.actions.len();
-        result
    }

    fn get_field_map_id(
--- a/cli/src/generate/tables.rs
+++ b/cli/src/generate/tables.rs
@ -1,11 +1,16 @@
 use super::nfa::CharacterSet;
 use super::rules::{Alias, Symbol, TokenSet};
-use std::collections::{BTreeMap, HashMap};
+use std::collections::BTreeMap;
 pub(crate) type ProductionInfoId = usize;
 pub(crate) type ParseStateId = usize;
 pub(crate) type LexStateId = usize;

-#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
+use std::hash::BuildHasherDefault;
+
+use indexmap::IndexMap;
+use rustc_hash::FxHasher;
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub(crate) enum ParseAction {
    Accept,
    Shift {
@ -28,7 +33,7 @@ pub(crate) enum GotoAction {
    ShiftExtra,
 }

-#[derive(Clone, Debug, PartialEq, Eq)]
+#[derive(Clone, Debug, PartialEq, Eq, Hash)]
 pub(crate) struct ParseTableEntry {
    pub actions: Vec<ParseAction>,
    pub reusable: bool,
@ -37,8 +42,8 @@ pub(crate) struct ParseTableEntry {
 #[derive(Clone, Debug, Default, PartialEq, Eq)]
 pub(crate) struct ParseState {
    pub id: ParseStateId,
-    pub terminal_entries: HashMap<Symbol, ParseTableEntry>,
-    pub nonterminal_entries: HashMap<Symbol, GotoAction>,
+    pub terminal_entries: IndexMap<Symbol, ParseTableEntry, BuildHasherDefault<FxHasher>>,
+    pub nonterminal_entries: IndexMap<Symbol, GotoAction, BuildHasherDefault<FxHasher>>,
    pub lex_state_id: usize,
    pub external_lex_state_id: usize,
    pub core_id: usize,
--- a/cli/src/lib.rs
+++ b/cli/src/lib.rs
@ -2,6 +2,7 @@ pub mod generate;
 pub mod highlight;
 pub mod logger;
 pub mod parse;
+pub mod playground;
 pub mod query;
 pub mod query_testing;
 pub mod tags;
@ -9,7 +10,6 @@ pub mod test;
 pub mod test_highlight;
 pub mod util;
 pub mod wasm;
-pub mod web_ui;

 #[cfg(test)]
 mod tests;
--- a/cli/src/logger.rs
+++ b/cli/src/logger.rs
@ -1,5 +1,6 @@
 use log::{LevelFilter, Log, Metadata, Record};

+#[allow(dead_code)]
 struct Logger {
    pub filter: Option<String>,
 }
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@ -4,7 +4,7 @@ use glob::glob;
 use std::path::Path;
 use std::{env, fs, u64};
 use tree_sitter_cli::{
-    generate, highlight, logger, parse, query, tags, test, test_highlight, util, wasm, web_ui,
+    generate, highlight, logger, parse, playground, query, tags, test, test_highlight, util, wasm,
 };
 use tree_sitter_config::Config;
 use tree_sitter_loader as loader;
@ -35,6 +35,45 @@ fn run() -> Result<()> {
        BUILD_VERSION.to_string()
    };

+    let debug_arg = Arg::with_name("debug")
+        .help("Show parsing debug log")
+        .long("debug")
+        .short("d");
+
+    let debug_graph_arg = Arg::with_name("debug-graph")
+        .help("Produce the log.html file with debug graphs")
+        .long("debug-graph")
+        .short("D");
+
+    let debug_build_arg = Arg::with_name("debug-build")
+        .help("Compile a parser in debug mode")
+        .long("debug-build")
+        .short("0");
+
+    let paths_file_arg = Arg::with_name("paths-file")
+        .help("The path to a file with paths to source file(s)")
+        .long("paths")
+        .takes_value(true);
+
+    let paths_arg = Arg::with_name("paths")
+        .help("The source file(s) to use")
+        .multiple(true);
+
+    let scope_arg = Arg::with_name("scope")
+        .help("Select a language by the scope instead of a file extension")
+        .long("scope")
+        .takes_value(true);
+
+    let time_arg = Arg::with_name("time")
+        .help("Measure execution time")
+        .long("time")
+        .short("t");
+
+    let quiet_arg = Arg::with_name("quiet")
+        .help("Suppress main output")
+        .long("quiet")
+        .short("q");
+
    let matches = App::new("tree-sitter")
        .author("Max Brunsfeld <maxbrunsfeld@gmail.com>")
        .about("Generates and tests parsers")
@ -65,23 +104,30 @@ fn run() -> Result<()> {
            SubCommand::with_name("parse")
                .alias("p")
                .about("Parse files")
-                .arg(Arg::with_name("paths-file").long("paths").takes_value(true))
-                .arg(
-                    Arg::with_name("paths")
-                        .index(1)
-                        .multiple(true)
-                        .required(false),
-                )
-                .arg(Arg::with_name("scope").long("scope").takes_value(true))
-                .arg(Arg::with_name("debug").long("debug").short("d"))
-                .arg(Arg::with_name("debug-graph").long("debug-graph").short("D"))
+                .arg(&paths_file_arg)
+                .arg(&paths_arg)
+                .arg(&scope_arg)
+                .arg(&debug_arg)
+                .arg(&debug_build_arg)
+                .arg(&debug_graph_arg)
                .arg(Arg::with_name("debug-xml").long("xml").short("x"))
-                .arg(Arg::with_name("quiet").long("quiet").short("q"))
-                .arg(Arg::with_name("stat").long("stat").short("s"))
-                .arg(Arg::with_name("time").long("time").short("t"))
-                .arg(Arg::with_name("timeout").long("timeout").takes_value(true))
+                .arg(
+                    Arg::with_name("stat")
+                        .help("Show parsing statistic")
+                        .long("stat")
+                        .short("s"),
+                )
+                .arg(
+                    Arg::with_name("timeout")
+                        .help("Interrupt the parsing process by timeout (µs)")
+                        .long("timeout")
+                        .takes_value(true),
+                )
+                .arg(&time_arg)
+                .arg(&quiet_arg)
                .arg(
                    Arg::with_name("edits")
+                        .help("Apply edits in the format: \"row,col del_count insert_text\"")
                        .long("edit")
                        .short("edit")
                        .takes_value(true)
@ -93,36 +139,32 @@ fn run() -> Result<()> {
            SubCommand::with_name("query")
                .alias("q")
                .about("Search files using a syntax tree query")
-                .arg(Arg::with_name("query-path").index(1).required(true))
-                .arg(Arg::with_name("paths-file").long("paths").takes_value(true))
                .arg(
-                    Arg::with_name("paths")
-                        .index(2)
-                        .multiple(true)
-                        .required(false),
+                    Arg::with_name("query-path")
+                        .help("Path to a file with queries")
+                        .index(1)
+                        .required(true),
                )
+                .arg(&paths_file_arg)
+                .arg(&paths_arg.clone().index(2))
                .arg(
                    Arg::with_name("byte-range")
                        .help("The range of byte offsets in which the query will be executed")
                        .long("byte-range")
                        .takes_value(true),
                )
-                .arg(Arg::with_name("scope").long("scope").takes_value(true))
+                .arg(&scope_arg)
                .arg(Arg::with_name("captures").long("captures").short("c"))
                .arg(Arg::with_name("test").long("test")),
        )
        .subcommand(
            SubCommand::with_name("tags")
-                .arg(Arg::with_name("quiet").long("quiet").short("q"))
-                .arg(Arg::with_name("time").long("time").short("t"))
-                .arg(Arg::with_name("scope").long("scope").takes_value(true))
-                .arg(Arg::with_name("paths-file").long("paths").takes_value(true))
-                .arg(
-                    Arg::with_name("paths")
-                        .help("The source file to use")
-                        .index(1)
-                        .multiple(true),
-                ),
+                .about("Generate a list of tags")
+                .arg(&scope_arg)
+                .arg(&time_arg)
+                .arg(&quiet_arg)
+                .arg(&paths_file_arg)
+                .arg(&paths_arg),
        )
        .subcommand(
            SubCommand::with_name("test")
@ -141,23 +183,24 @@ fn run() -> Result<()> {
                        .short("u")
                        .help("Update all syntax trees in corpus files with current parser output"),
                )
-                .arg(Arg::with_name("debug").long("debug").short("d"))
-                .arg(Arg::with_name("debug-graph").long("debug-graph").short("D")),
+                .arg(&debug_arg)
+                .arg(&debug_build_arg)
+                .arg(&debug_graph_arg),
        )
        .subcommand(
            SubCommand::with_name("highlight")
                .about("Highlight a file")
-                .arg(Arg::with_name("paths-file").long("paths").takes_value(true))
                .arg(
-                    Arg::with_name("paths")
-                        .index(1)
-                        .multiple(true)
-                        .required(false),
+                    Arg::with_name("html")
+                        .help("Generate highlighting as an HTML document")
+                        .long("html")
+                        .short("H"),
                )
-                .arg(Arg::with_name("scope").long("scope").takes_value(true))
-                .arg(Arg::with_name("html").long("html").short("H"))
-                .arg(Arg::with_name("time").long("time").short("t"))
-                .arg(Arg::with_name("quiet").long("quiet").short("q")),
+                .arg(&scope_arg)
+                .arg(&time_arg)
+                .arg(&quiet_arg)
+                .arg(&paths_file_arg)
+                .arg(&paths_arg),
        )
        .subcommand(
            SubCommand::with_name("build-wasm")
@ -180,7 +223,7 @@ fn run() -> Result<()> {
                    Arg::with_name("quiet")
                        .long("quiet")
                        .short("q")
-                        .help("open in default browser"),
+                        .help("Don't open in default browser"),
                ),
        )
        .subcommand(
@ -237,8 +280,12 @@ fn run() -> Result<()> {
        ("test", Some(matches)) => {
            let debug = matches.is_present("debug");
            let debug_graph = matches.is_present("debug-graph");
+            let debug_build = matches.is_present("debug-build");
            let update = matches.is_present("update");
            let filter = matches.value_of("filter");
+
+            loader.use_debug_build(debug_build);
+
            let languages = loader.languages_at_path(&current_dir)?;
            let language = languages
                .first()
@ -274,6 +321,7 @@ fn run() -> Result<()> {
        ("parse", Some(matches)) => {
            let debug = matches.is_present("debug");
            let debug_graph = matches.is_present("debug-graph");
+            let debug_build = matches.is_present("debug-build");
            let debug_xml = matches.is_present("debug-xml");
            let quiet = matches.is_present("quiet");
            let time = matches.is_present("time");
@ -287,6 +335,8 @@ fn run() -> Result<()> {
                env::set_var("TREE_SITTER_DEBUG", "1");
            }

+            loader.use_debug_build(debug_build);
+
            let timeout = matches
                .value_of("timeout")
                .map_or(0, |t| u64::from_str_radix(t, 10).unwrap());
@ -418,11 +468,10 @@ fn run() -> Result<()> {

                if let Some(highlight_config) = language_config.highlight_config(language)? {
                    let source = fs::read(path)?;
-                    let theme_config = config.get()?;
                    if html_mode {
                        highlight::html(
                            &loader,
-                            &theme_config,
+                            &theme_config.theme,
                            &source,
                            highlight_config,
                            quiet,
@ -431,7 +480,7 @@ fn run() -> Result<()> {
                    } else {
                        highlight::ansi(
                            &loader,
-                            &theme_config,
+                            &theme_config.theme,
                            &source,
                            highlight_config,
                            time,
@ -455,7 +504,7 @@ fn run() -> Result<()> {

        ("playground", Some(matches)) => {
            let open_in_browser = !matches.is_present("quiet");
-            web_ui::serve(&current_dir, open_in_browser);
+            playground::serve(&current_dir, open_in_browser);
        }

        ("dump-languages", Some(_)) => {
--- a/cli/src/playground.html
+++ b/cli/src/playground.html
--- a/cli/src/playground.rs
+++ b/cli/src/playground.rs
@ -9,28 +9,6 @@ use tiny_http::{Header, Response, Server};
 use webbrowser;

 macro_rules! resource {
-    ($name: tt, $path: tt) => {
-        #[cfg(TREE_SITTER_EMBED_WASM_BINDING)]
-        fn $name(tree_sitter_dir: &Option<PathBuf>) -> Vec<u8> {
-            if let Some(tree_sitter_dir) = tree_sitter_dir {
-                fs::read(tree_sitter_dir.join($path)).unwrap()
-            } else {
-                include_bytes!(concat!("../../", $path)).to_vec()
-            }
-        }
-
-        #[cfg(not(TREE_SITTER_EMBED_WASM_BINDING))]
-        fn $name(tree_sitter_dir: &Option<PathBuf>) -> Vec<u8> {
-            if let Some(tree_sitter_dir) = tree_sitter_dir {
-                fs::read(tree_sitter_dir.join($path)).unwrap()
-            } else {
-                include_bytes!(concat!("../../", $path)).to_vec()
-            }
-        }
-    };
-}
-
-macro_rules! optional_resource {
    ($name: tt, $path: tt) => {
        #[cfg(TREE_SITTER_EMBED_WASM_BINDING)]
        fn $name(tree_sitter_dir: &Option<PathBuf>) -> Vec<u8> {
@ -52,15 +30,15 @@ macro_rules! optional_resource {
    };
 }

-resource!(get_main_html, "cli/src/web_ui.html");
+resource!(get_main_html, "cli/src/playground.html");
 resource!(get_playground_js, "docs/assets/js/playground.js");
-optional_resource!(get_lib_js, "lib/binding_web/tree-sitter.js");
-optional_resource!(get_lib_wasm, "lib/binding_web/tree-sitter.wasm");
+resource!(get_lib_js, "lib/binding_web/tree-sitter.js");
+resource!(get_lib_wasm, "lib/binding_web/tree-sitter.wasm");

 pub fn serve(grammar_path: &Path, open_in_browser: bool) {
    let port = get_available_port().expect("Couldn't find an available port");
-    let url = format!("127.0.0.1:{}", port);
-    let server = Server::http(&url).expect("Failed to start web server");
+    let addr = format!("127.0.0.1:{}", port);
+    let server = Server::http(&addr).expect("Failed to start web server");
    let grammar_name = wasm::get_grammar_name(&grammar_path.join("src"))
        .with_context(|| "Failed to get wasm filename")
        .unwrap();
@ -73,8 +51,10 @@ pub fn serve(grammar_path: &Path, open_in_browser: bool) {
            )
        })
        .unwrap();
+    let url = format!("http://{}", addr);
+    println!("Started playground on: {}", url);
    if open_in_browser {
-        if let Err(_) = webbrowser::open(&format!("http://127.0.0.1:{}", port)) {
+        if let Err(_) = webbrowser::open(&url) {
            eprintln!("Failed to open '{}' in a web browser", url);
        }
    }
@ -95,17 +75,23 @@ pub fn serve(grammar_path: &Path, open_in_browser: bool) {
    for request in server.incoming_requests() {
        let res = match request.url() {
            "/" => response(&main_html, &html_header),
-            "/playground.js" => response(&playground_js, &js_header),
            "/tree-sitter-parser.wasm" => response(&language_wasm, &wasm_header),
+            "/playground.js" => {
+                if playground_js.is_empty() {
+                    redirect("https://tree-sitter.github.io/tree-sitter/assets/js/playground.js")
+                } else {
+                    response(&playground_js, &js_header)
+                }
+            }
            "/tree-sitter.js" => {
-                if cfg!(windows) {
+                if lib_js.is_empty() {
                    redirect("https://tree-sitter.github.io/tree-sitter.js")
                } else {
                    response(&lib_js, &js_header)
                }
            }
            "/tree-sitter.wasm" => {
-                if cfg!(windows) {
+                if lib_wasm.is_empty() {
                    redirect("https://tree-sitter.github.io/tree-sitter.wasm")
                } else {
                    response(&lib_wasm, &wasm_header)
--- a/cli/src/query.rs
+++ b/cli/src/query.rs
@ -48,10 +48,12 @@ pub fn query_files_at_paths(
                let capture_name = &query.capture_names()[capture.index as usize];
                writeln!(
                    &mut stdout,
-                    "    pattern: {}, capture: {}, row: {}, text: {:?}",
+                    "    pattern: {:>2}, capture: {} - {}, start: {}, end: {}, text: `{}`",
                    mat.pattern_index,
+                    capture.index,
                    capture_name,
-                    capture.node.start_position().row,
+                    capture.node.start_position(),
+                    capture.node.end_position(),
                    capture.node.utf8_text(&source_code).unwrap_or("")
                )?;
                results.push(query_testing::CaptureInfo {
@ -70,9 +72,11 @@ pub fn query_files_at_paths(
                    if end.row == start.row {
                        writeln!(
                            &mut stdout,
-                            "    capture: {}, start: {}, text: {:?}",
+                            "    capture: {} - {}, start: {}, end: {}, text: `{}`",
+                            capture.index,
                            capture_name,
                            start,
+                            end,
                            capture.node.utf8_text(&source_code).unwrap_or("")
                        )?;
                    } else {
--- a/cli/src/query_testing.rs
+++ b/cli/src/query_testing.rs
@ -48,40 +48,38 @@ pub fn parse_position_comments(
            if node.kind().contains("comment") {
                if let Ok(text) = node.utf8_text(source) {
                    let mut position = node.start_position();
-                    if position.row == 0 {
-                        continue;
-                    }
-
-                    // Find the arrow character ("^" or '<-") in the comment. A left arrow
-                    // refers to the column where the comment node starts. An up arrow refers
-                    // to its own column.
-                    let mut has_left_caret = false;
-                    let mut has_arrow = false;
-                    let mut arrow_end = 0;
-                    for (i, c) in text.char_indices() {
-                        arrow_end = i + 1;
-                        if c == '-' && has_left_caret {
-                            has_arrow = true;
-                            break;
+                    if position.row > 0 {
+                        // Find the arrow character ("^" or '<-") in the comment. A left arrow
+                        // refers to the column where the comment node starts. An up arrow refers
+                        // to its own column.
+                        let mut has_left_caret = false;
+                        let mut has_arrow = false;
+                        let mut arrow_end = 0;
+                        for (i, c) in text.char_indices() {
+                            arrow_end = i + 1;
+                            if c == '-' && has_left_caret {
+                                has_arrow = true;
+                                break;
+                            }
+                            if c == '^' {
+                                has_arrow = true;
+                                position.column += i;
+                                break;
+                            }
+                            has_left_caret = c == '<';
                        }
-                        if c == '^' {
-                            has_arrow = true;
-                            position.column += i;
-                            break;
-                        }
-                        has_left_caret = c == '<';
-                    }

-                    // If the comment node contains an arrow and a highlight name, record the
-                    // highlight name and the position.
-                    if let (true, Some(mat)) =
-                        (has_arrow, CAPTURE_NAME_REGEX.find(&text[arrow_end..]))
-                    {
-                        assertion_ranges.push((node.start_position(), node.end_position()));
-                        result.push(Assertion {
-                            position: position,
-                            expected_capture_name: mat.as_str().to_string(),
-                        });
+                        // If the comment node contains an arrow and a highlight name, record the
+                        // highlight name and the position.
+                        if let (true, Some(mat)) =
+                            (has_arrow, CAPTURE_NAME_REGEX.find(&text[arrow_end..]))
+                        {
+                            assertion_ranges.push((node.start_position(), node.end_position()));
+                            result.push(Assertion {
+                                position: position,
+                                expected_capture_name: mat.as_str().to_string(),
+                            });
+                        }
                    }
                }
            }
--- a/cli/src/test.rs
+++ b/cli/src/test.rs
@ -5,7 +5,6 @@ use difference::{Changeset, Difference};
 use lazy_static::lazy_static;
 use regex::bytes::{Regex as ByteRegex, RegexBuilder as ByteRegexBuilder};
 use regex::Regex;
-use std::char;
 use std::ffi::OsStr;
 use std::fmt::Write as FmtWrite;
 use std::fs;
@ -16,11 +15,12 @@ use tree_sitter::{Language, LogType, Parser, Query};
 use walkdir::WalkDir;

 lazy_static! {
-    static ref HEADER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^===+\r?\n([^=]*)\r?\n===+\r?\n")
-        .multi_line(true)
-        .build()
-        .unwrap();
-    static ref DIVIDER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^---+\r?\n")
+    static ref HEADER_REGEX: ByteRegex =
+        ByteRegexBuilder::new(r"^===+(?P<suffix1>[^=\r\n][^\r\n]*)?\r?\n(?P<test_name>[^=\r\n][^\r\n]*)\r?\n===+(?P<suffix2>[^=\r\n][^\r\n]*)?\r?\n")
+            .multi_line(true)
+            .build()
+            .unwrap();
+    static ref DIVIDER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^---+(?P<suffix>[^-\r\n][^\r\n]*)?\r?\n")
        .multi_line(true)
        .build()
        .unwrap();
@ -114,7 +114,9 @@ pub fn run_tests_at_path(
            print_diff_key();
            for (i, (name, actual, expected)) in failures.iter().enumerate() {
                println!("\n  {}. {}:", i + 1, name);
-                print_diff(actual, expected);
+                let actual = format_sexp_indented(&actual, 2);
+                let expected = format_sexp_indented(&expected, 2);
+                print_diff(&actual, &expected);
            }
            Err(anyhow!(""))
        }
@ -153,8 +155,7 @@ pub fn print_diff_key() {
 }

 pub fn print_diff(actual: &String, expected: &String) {
-    let changeset = Changeset::new(actual, expected, " ");
-    print!("    ");
+    let changeset = Changeset::new(actual, expected, "\n");
    for diff in &changeset.diffs {
        match diff {
            Difference::Same(part) => {
@ -263,9 +264,13 @@ fn run_tests(
 }

 fn format_sexp(sexp: &String) -> String {
+    format_sexp_indented(sexp, 0)
+}
+
+fn format_sexp_indented(sexp: &String, initial_indent_level: u32) -> String {
    let mut formatted = String::new();

-    let mut indent_level = 0;
+    let mut indent_level = initial_indent_level;
    let mut has_field = false;
    let mut s_iter = sexp.split(|c| c == ' ' || c == ')');
    while let Some(s) = s_iter.next() {
@ -375,22 +380,58 @@ fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>)
    let mut prev_name = String::new();
    let mut prev_header_end = 0;

-    // Identify all of the test descriptions using the `======` headers.
-    for (header_start, header_end) in HEADER_REGEX
-        .find_iter(&bytes)
-        .map(|m| (m.start(), m.end()))
-        .chain(Some((bytes.len(), bytes.len())))
-    {
-        // Find the longest line of dashes following each test description.
-        // That is the divider between input and expected output.
+    // Find the first test header in the file, and determine if it has a
+    // custom suffix. If so, then this suffix will be used to identify
+    // all subsequent headers and divider lines in the file.
+    let first_suffix = HEADER_REGEX
+        .captures(bytes)
+        .and_then(|c| c.name("suffix1"))
+        .map(|m| String::from_utf8_lossy(m.as_bytes()));
+
+    // Find all of the `===` test headers, which contain the test names.
+    // Ignore any matches whose suffix does not match the first header
+    // suffix in the file.
+    let header_matches = HEADER_REGEX.captures_iter(&bytes).filter_map(|c| {
+        let suffix1 = c
+            .name("suffix1")
+            .map(|m| String::from_utf8_lossy(m.as_bytes()));
+        let suffix2 = c
+            .name("suffix2")
+            .map(|m| String::from_utf8_lossy(m.as_bytes()));
+        if suffix1 == first_suffix && suffix2 == first_suffix {
+            let header_range = c.get(0).unwrap().range();
+            let test_name = c
+                .name("test_name")
+                .map(|c| String::from_utf8_lossy(c.as_bytes()).to_string());
+            Some((header_range, test_name))
+        } else {
+            None
+        }
+    });
+
+    for (header_range, test_name) in header_matches.chain(Some((bytes.len()..bytes.len(), None))) {
+        // Find the longest line of dashes following each test description. That line
+        // separates the input from the expected output. Ignore any matches whose suffix
+        // does not match the first suffix in the file.
        if prev_header_end > 0 {
-            let divider_match = DIVIDER_REGEX
-                .find_iter(&bytes[prev_header_end..header_start])
-                .map(|m| (prev_header_end + m.start(), prev_header_end + m.end()))
-                .max_by_key(|(start, end)| end - start);
-            if let Some((divider_start, divider_end)) = divider_match {
-                if let Ok(output) = str::from_utf8(&bytes[divider_end..header_start]) {
-                    let mut input = bytes[prev_header_end..divider_start].to_vec();
+            let divider_range = DIVIDER_REGEX
+                .captures_iter(&bytes[prev_header_end..header_range.start])
+                .filter_map(|m| {
+                    let suffix = m
+                        .name("suffix")
+                        .map(|m| String::from_utf8_lossy(m.as_bytes()));
+                    if suffix == first_suffix {
+                        let range = m.get(0).unwrap().range();
+                        Some((prev_header_end + range.start)..(prev_header_end + range.end))
+                    } else {
+                        None
+                    }
+                })
+                .max_by_key(|range| range.len());
+
+            if let Some(divider_range) = divider_range {
+                if let Ok(output) = str::from_utf8(&bytes[divider_range.end..header_range.start]) {
+                    let mut input = bytes[prev_header_end..divider_range.start].to_vec();

                    // Remove trailing newline from the input.
                    input.pop();
@ -400,6 +441,7 @@ fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>)

                    // Remove all comments
                    let output = COMMENT_REGEX.replace_all(output, "").to_string();
+
                    // Normalize the whitespace in the expected output.
                    let output = WHITESPACE_REGEX.replace_all(output.trim(), " ");
                    let output = output.replace(" )", ")");
@ -417,10 +459,8 @@ fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>)
                }
            }
        }
-        prev_name = String::from_utf8_lossy(&bytes[header_start..header_end])
-            .trim_matches(|c| char::is_whitespace(c) || c == '=')
-            .to_string();
-        prev_header_end = header_end;
+        prev_name = test_name.unwrap_or(String::new());
+        prev_header_end = header_range.end;
    }
    TestEntry::Group {
        name,
@ -434,7 +474,7 @@ mod tests {
    use super::*;

    #[test]
-    fn test_parse_test_content() {
+    fn test_parse_test_content_simple() {
        let entry = parse_test_content(
            "the-filename".to_string(),
            r#"
@ -664,4 +704,88 @@ code
            }
        );
    }
+
+    #[test]
+    fn test_parse_test_content_with_suffixes() {
+        let entry = parse_test_content(
+            "the-filename".to_string(),
+            r#"
+==================asdf\()[]|{}*+?^$.-
+First test
+==================asdf\()[]|{}*+?^$.-
+
+=========================
+NOT A TEST HEADER
+=========================
+-------------------------
+
+---asdf\()[]|{}*+?^$.-
+
+(a)
+
+==================asdf\()[]|{}*+?^$.-
+Second test
+==================asdf\()[]|{}*+?^$.-
+
+=========================
+NOT A TEST HEADER
+=========================
+-------------------------
+
+---asdf\()[]|{}*+?^$.-
+
+(a)
+
+=========================asdf\()[]|{}*+?^$.-
+Test name with = symbol
+=========================asdf\()[]|{}*+?^$.-
+
+=========================
+NOT A TEST HEADER
+=========================
+-------------------------
+
+---asdf\()[]|{}*+?^$.-
+
+(a)
+        "#
+            .trim()
+            .to_string(),
+            None,
+        );
+
+        let expected_input = "\n=========================\n\
+            NOT A TEST HEADER\n\
+            =========================\n\
+            -------------------------\n"
+            .as_bytes()
+            .to_vec();
+        assert_eq!(
+            entry,
+            TestEntry::Group {
+                name: "the-filename".to_string(),
+                children: vec![
+                    TestEntry::Example {
+                        name: "First test".to_string(),
+                        input: expected_input.clone(),
+                        output: "(a)".to_string(),
+                        has_fields: false,
+                    },
+                    TestEntry::Example {
+                        name: "Second test".to_string(),
+                        input: expected_input.clone(),
+                        output: "(a)".to_string(),
+                        has_fields: false,
+                    },
+                    TestEntry::Example {
+                        name: "Test name with = symbol".to_string(),
+                        input: expected_input.clone(),
+                        output: "(a)".to_string(),
+                        has_fields: false,
+                    }
+                ],
+                file_path: None,
+            }
+        );
+    }
 }
--- a/cli/src/tests/parser_test.rs
+++ b/cli/src/tests/parser_test.rs
@ -63,9 +63,14 @@ fn test_parsing_with_logging() {
    )));
    assert!(messages.contains(&(LogType::Lex, "skip character:' '".to_string())));

+    let mut row_starts_from_0 = false;
    for (_, m) in &messages {
-        assert!(!m.contains("row:0"));
+        if m.contains("row:0") {
+            row_starts_from_0 = true;
+            break;
+        }
    }
+    assert!(row_starts_from_0);
 }

 #[test]
@ -849,7 +854,10 @@ fn test_parsing_with_multiple_included_ranges() {
        hello_text_node.start_byte(),
        source_code.find("Hello").unwrap()
    );
-    assert_eq!(hello_text_node.end_byte(), source_code.find("<b>").unwrap());
+    assert_eq!(
+        hello_text_node.end_byte(),
+        source_code.find(" <b>").unwrap()
+    );

    assert_eq!(b_start_tag_node.kind(), "start_tag");
    assert_eq!(
--- a/cli/src/tests/test_highlight_test.rs
+++ b/cli/src/tests/test_highlight_test.rs
@ -17,6 +17,7 @@ fn test_highlight_test_with_basic_test() {
        ],
    );
    let source = [
+        "// hi",
        "var abc = function(d) {",
        "  // ^ function",
        "  //       ^ keyword",
@ -32,15 +33,15 @@ fn test_highlight_test_with_basic_test() {
        assertions,
        &[
            Assertion {
-                position: Point::new(0, 5),
+                position: Point::new(1, 5),
                expected_capture_name: "function".to_string()
            },
            Assertion {
-                position: Point::new(0, 11),
+                position: Point::new(1, 11),
                expected_capture_name: "keyword".to_string()
            },
            Assertion {
-                position: Point::new(3, 9),
+                position: Point::new(4, 9),
                expected_capture_name: "variable.parameter".to_string()
            },
        ]
@ -53,12 +54,12 @@ fn test_highlight_test_with_basic_test() {
    assert_eq!(
        highlight_positions,
        &[
-            (Point::new(0, 0), Point::new(0, 3), Highlight(2)), // "var"
-            (Point::new(0, 4), Point::new(0, 7), Highlight(0)), // "abc"
-            (Point::new(0, 10), Point::new(0, 18), Highlight(2)), // "function"
-            (Point::new(0, 19), Point::new(0, 20), Highlight(1)), // "d"
-            (Point::new(3, 2), Point::new(3, 8), Highlight(2)), // "return"
-            (Point::new(3, 9), Point::new(3, 10), Highlight(1)), // "d"
+            (Point::new(1, 0), Point::new(1, 3), Highlight(2)), // "var"
+            (Point::new(1, 4), Point::new(1, 7), Highlight(0)), // "abc"
+            (Point::new(1, 10), Point::new(1, 18), Highlight(2)), // "function"
+            (Point::new(1, 19), Point::new(1, 20), Highlight(1)), // "d"
+            (Point::new(4, 2), Point::new(4, 8), Highlight(2)), // "return"
+            (Point::new(4, 9), Point::new(4, 10), Highlight(1)), // "d"
        ]
    );
 }
--- a/docs/Gemfile.lock
+++ b/docs/Gemfile.lock
@ -6,8 +6,8 @@ GEM
      minitest (~> 5.1)
      thread_safe (~> 0.3, >= 0.3.4)
      tzinfo (~> 1.1)
-    addressable (2.5.2)
-      public_suffix (>= 2.0.2, < 4.0)
+    addressable (2.8.0)
+      public_suffix (>= 2.0.2, < 5.0)
    coffee-script (2.4.1)
      coffee-script-source
      execjs
@ -16,12 +16,27 @@ GEM
    commonmarker (0.17.8)
      ruby-enum (~> 0.5)
    concurrent-ruby (1.0.5)
-    ethon (0.11.0)
-      ffi (>= 1.3.0)
+    ethon (0.14.0)
+      ffi (>= 1.15.0)
    execjs (2.7.0)
-    faraday (0.14.0)
+    faraday (1.5.1)
+      faraday-em_http (~> 1.0)
+      faraday-em_synchrony (~> 1.0)
+      faraday-excon (~> 1.1)
+      faraday-httpclient (~> 1.0.1)
+      faraday-net_http (~> 1.0)
+      faraday-net_http_persistent (~> 1.1)
+      faraday-patron (~> 1.0)
      multipart-post (>= 1.2, < 3)
-    ffi (1.9.23)
+      ruby2_keywords (>= 0.0.4)
+    faraday-em_http (1.0.0)
+    faraday-em_synchrony (1.0.0)
+    faraday-excon (1.1.0)
+    faraday-httpclient (1.0.1)
+    faraday-net_http (1.0.1)
+    faraday-net_http_persistent (1.2.0)
+    faraday-patron (1.0.0)
+    ffi (1.15.3)
    forwardable-extended (2.6.0)
    gemoji (3.0.0)
    github-pages (177)
@ -195,33 +210,35 @@ GEM
    minima (2.1.1)
      jekyll (~> 3.3)
    minitest (5.11.3)
-    multipart-post (2.0.0)
-    net-dns (0.8.0)
+    multipart-post (2.1.1)
+    net-dns (0.9.0)
    nokogiri (1.11.4)
      mini_portile2 (~> 2.5.0)
      racc (~> 1.4)
-    octokit (4.8.0)
+    octokit (4.21.0)
+      faraday (>= 0.9)
      sawyer (~> 0.8.0, >= 0.5.3)
-    pathutil (0.16.1)
+    pathutil (0.16.2)
      forwardable-extended (~> 2.6)
    public_suffix (2.0.5)
    racc (1.5.2)
-    rb-fsevent (0.10.2)
-    rb-inotify (0.9.10)
-      ffi (>= 0.5.0, < 2)
+    rb-fsevent (0.11.0)
+    rb-inotify (0.10.1)
+      ffi (~> 1.0)
    rouge (2.2.1)
    ruby-enum (0.7.2)
      i18n
+    ruby2_keywords (0.0.4)
    rubyzip (2.0.0)
-    safe_yaml (1.0.4)
-    sass (3.5.5)
+    safe_yaml (1.0.5)
+    sass (3.7.4)
      sass-listen (~> 4.0.0)
    sass-listen (4.0.0)
      rb-fsevent (~> 0.9, >= 0.9.4)
      rb-inotify (~> 0.9, >= 0.9.7)
-    sawyer (0.8.1)
-      addressable (>= 2.3.5, < 2.6)
-      faraday (~> 0.8, < 1.0)
+    sawyer (0.8.2)
+      addressable (>= 2.3.5)
+      faraday (> 0.8, < 2.0)
    terminal-table (1.8.0)
      unicode-display_width (~> 1.1, >= 1.1.1)
    thread_safe (0.3.6)
--- a/docs/index.md
+++ b/docs/index.md
@ -15,12 +15,13 @@ Tree-sitter is a parser generator tool and an incremental parsing library. It ca

 There are currently bindings that allow Tree-sitter to be used from the following languages:

-* [Rust](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust)
-* [JavaScript (Wasm)](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web)
+* [Haskell](https://github.com/tree-sitter/haskell-tree-sitter)
 * [JavaScript (Node.js)](https://github.com/tree-sitter/node-tree-sitter)
+* [JavaScript (Wasm)](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web)
+* [OCaml](https://github.com/returntocorp/ocaml-tree-sitter-core)
 * [Python](https://github.com/tree-sitter/py-tree-sitter)
 * [Ruby](https://github.com/tree-sitter/ruby-tree-sitter)
-* [Haskell](https://github.com/tree-sitter/haskell-tree-sitter)
+* [Rust](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust)

 ### Available Parsers

@ -31,11 +32,13 @@ Parsers for these languages are fairly complete:
 * [C#](https://github.com/tree-sitter/tree-sitter-c-sharp)
 * [C++](https://github.com/tree-sitter/tree-sitter-cpp)
 * [CSS](https://github.com/tree-sitter/tree-sitter-css)
+* [DOT](https://github.com/rydesun/tree-sitter-dot)
 * [Elm](https://github.com/elm-tooling/tree-sitter-elm)
 * [Eno](https://github.com/eno-lang/tree-sitter-eno)
 * [ERB / EJS](https://github.com/tree-sitter/tree-sitter-embedded-template)
 * [Fennel](https://github.com/travonted/tree-sitter-fennel)
 * [Go](https://github.com/tree-sitter/tree-sitter-go)
+* [HCL](https://github.com/MichaHoffmann/tree-sitter-hcl)
 * [HTML](https://github.com/tree-sitter/tree-sitter-html)
 * [Java](https://github.com/tree-sitter/tree-sitter-java)
 * [JavaScript](https://github.com/tree-sitter/tree-sitter-javascript)
@ -60,6 +63,7 @@ Parsers for these languages are fairly complete:
 * [Vue](https://github.com/ikatyang/tree-sitter-vue)
 * [YAML](https://github.com/ikatyang/tree-sitter-yaml)
 * [WASM](https://github.com/wasm-lsp/tree-sitter-wasm)
+* [WGSL WebGPU Shading Language](https://github.com/mehmetoguzderin/tree-sitter-wgsl)

 Parsers for these languages are in development:

@ -67,10 +71,12 @@ Parsers for these languages are in development:
 * [Erlang](https://github.com/AbstractMachinesLab/tree-sitter-erlang/)
 * [Dockerfile](https://github.com/camdencheek/tree-sitter-dockerfile)
 * [Go mod](https://github.com/camdencheek/tree-sitter-go-mod)
+* [Hack](https://github.com/slackhq/tree-sitter-hack)
 * [Haskell](https://github.com/tree-sitter/tree-sitter-haskell)
 * [Julia](https://github.com/tree-sitter/tree-sitter-julia)
 * [Kotlin](https://github.com/fwcd/tree-sitter-kotlin)
 * [Nix](https://github.com/cstrahan/tree-sitter-nix)
+* [Objective-C](https://github.com/jiyee/tree-sitter-objc)
 * [Perl](https://github.com/ganezdragon/tree-sitter-perl)
 * [Scala](https://github.com/tree-sitter/tree-sitter-scala)
 * [Sourcepawn](https://github.com/nilshelmig/tree-sitter-sourcepawn)
@ -89,8 +95,8 @@ Parsers for these languages are in development:
 The design of Tree-sitter was greatly influenced by the following research papers:

 - [Practical Algorithms for Incremental Software Development Environments](https://www2.eecs.berkeley.edu/Pubs/TechRpts/1997/CSD-97-946.pdf)
- [Context Aware Scanning for Parsing Extensible Languages](http://www.umsec.umn.edu/publications/Context-Aware-Scanning-Parsing-Extensible)
- [Efficient and Flexible Incremental Parsing](http://ftp.cs.berkeley.edu/sggs/toplas-parsing.ps)
- [Incremental Analysis of Real Programming Languages](https://pdfs.semanticscholar.org/ca69/018c29cc415820ed207d7e1d391e2da1656f.pdf)
+- [Context Aware Scanning for Parsing Extensible Languages](https://www-users.cse.umn.edu/~evw/pubs/vanwyk07gpce/vanwyk07gpce.pdf)
+- [Efficient and Flexible Incremental Parsing](http://harmonia.cs.berkeley.edu/papers/twagner-parsing.pdf)
+- [Incremental Analysis of Real Programming Languages](http://harmonia.cs.berkeley.edu/papers/twagner-glr.pdf)
 - [Error Detection and Recovery in LR Parsers](http://what-when-how.com/compiler-writing/bottom-up-parsing-compiler-writing-part-13)
- [Error Recovery for LR Parsers](http://www.dtic.mil/dtic/tr/fulltext/u2/a043470.pdf)
+- [Error Recovery for LR Parsers](https://apps.dtic.mil/sti/pdfs/ADA043470.pdf)
--- a/docs/section-2-using-parsers.md
+++ b/docs/section-2-using-parsers.md
@ -464,7 +464,7 @@ In general, it's a good idea to make patterns more specific by specifying [field

 #### Negated Fields

-You can also constrain a pattern so that it only mathces nodes that *lack* a certain field. To do this, add a field name prefixed by a `!` within the parent pattern. For example, this pattern would match a class declaration with no type parameters:
+You can also constrain a pattern so that it only matches nodes that *lack* a certain field. To do this, add a field name prefixed by a `!` within the parent pattern. For example, this pattern would match a class declaration with no type parameters:

 ```
 (class_declaration
@ -586,8 +586,10 @@ This pattern would match a set of possible keyword tokens, capturing them as `@k

 #### Wildcard Node

-A wildcard node is represented with an underscore (`(_)`), it matches any node.
+A wildcard node is represented with an underscore (`_`), it matches any node.
 This is similar to `.` in regular expressions.
+There are two types, `(_)` will match any named node,
+and `_` will match any named or anonymous node.

 For example, this pattern would match any node inside a call:

--- a/docs/section-3-creating-parsers.md
+++ b/docs/section-3-creating-parsers.md
@ -84,7 +84,7 @@ tree-sitter parse example-file
 This should print the following:

 ```
-(source_file [1, 0] - [1, 5])
+(source_file [0, 0] - [1, 0])
 ```

 You now have a working parser.
@ -95,7 +95,7 @@ Let's go over all of the functionality of the `tree-sitter` command line tool.

 ### Command: `generate`

-The most important command you'll use is `tree-sitter generate`. This command reads the `grammar.js` file in your current working directory and creates a file called `src/parser.c`, which implements the parser. After making changes to your grammar, just run `tree-sitter` generate again.
+The most important command you'll use is `tree-sitter generate`. This command reads the `grammar.js` file in your current working directory and creates a file called `src/parser.c`, which implements the parser. After making changes to your grammar, just run `tree-sitter generate` again.

 The first time you run `tree-sitter generate`, it will also generate a few other files:

@ -674,7 +674,7 @@ This function is responsible for recognizing external tokens. It should return `
 * **`TSSymbol result_symbol`** - The symbol that was recognized. Your scan function should *assign* to this field one of the values from the `TokenType` enum, described above.
 * **`void (*advance)(TSLexer *, bool skip)`** - A function for advancing to the next character. If you pass `true` for the second argument, the current character will be treated as whitespace.
 * **`void (*mark_end)(TSLexer *)`** - A function for marking the end of the recognized token. This allows matching tokens that require multiple characters of lookahead. By default (if you don't call `mark_end`), any character that you moved past using the `advance` function will be included in the size of the token. But once you call `mark_end`, then any later calls to `advance` will *not* increase the size of the returned token. You can call `mark_end` multiple times to increase the size of the token.
-* **`uint32_t (*get_column)(TSLexer *)`** - **(Experimental)** A function for querying the current column position of the lexer. It returns the number of unicode code points (not bytes) since the start of the current line.
+* **`uint32_t (*get_column)(TSLexer *)`** - A function for querying the current column position of the lexer. It returns the number of bytes (not characters) since the start of the current line.
 * **`bool (*is_at_included_range_start)(TSLexer *)`** - A function for checking if the parser has just skipped some characters in the document. When parsing an embedded document using the `ts_parser_set_included_ranges` function (described in the [multi-language document section][multi-language-section]), your scanner may want to apply some special behavior when moving to a disjoint part of the document. For example, in [EJS documents][ejs], the JavaScript parser uses this function to enable inserting automatic semicolon tokens in between the code directives, delimited by `<%` and `%>`.

 The third argument to the `scan` function is an array of booleans that indicates which of your external tokens are currently expected by the parser. You should only look for a given token if it is valid according to this array. At the same time, you cannot backtrack, so you may need to combine certain pieces of logic.
--- a/docs/section-6-contributing.md
+++ b/docs/section-6-contributing.md
@ -29,7 +29,7 @@ git clone https://github.com/tree-sitter/tree-sitter
 cd tree-sitter
 ```

-Optionally, build the WASM library. If you skip this step, then the `tree-sitter web-ui` command will require an internet connection. If you have emscripten installed, this will use your `emcc` compiler. Otherwise, it will use Docker:
+Optionally, build the WASM library. If you skip this step, then the `tree-sitter playground` command will require an internet connection. If you have emscripten installed, this will use your `emcc` compiler. Otherwise, it will use Docker:

 ```sh
 ./script/build-wasm
--- a/highlight/Cargo.toml
+++ b/highlight/Cargo.toml
@ -4,7 +4,7 @@ description = "Library for performing syntax highlighting with Tree-sitter"
 version = "0.20.0"
 authors = [
  "Max Brunsfeld <maxbrunsfeld@gmail.com>",
-  "Tim Clem <timothy.clem@gmail.com>"
+  "Tim Clem <timothy.clem@gmail.com>",
 ]
 license = "MIT"
 readme = "README.md"
@ -21,5 +21,5 @@ regex = "1"
 thiserror = "1.0"

 [dependencies.tree-sitter]
-version = ">= 0.3.7"
+version = "0.20"
 path = "../lib"
--- a/highlight/src/lib.rs
+++ b/highlight/src/lib.rs
@ -586,7 +586,7 @@ where
                    break;
                }
                if i > 0 {
-                    &self.layers[0..(i + 1)].rotate_left(1);
+                    self.layers[0..(i + 1)].rotate_left(1);
                }
                break;
            } else {
--- a/lib/binding_rust/bindings.rs
+++ b/lib/binding_rust/bindings.rs
@ -133,6 +133,7 @@ pub const TSQueryError_TSQueryErrorNodeType: TSQueryError = 2;
 pub const TSQueryError_TSQueryErrorField: TSQueryError = 3;
 pub const TSQueryError_TSQueryErrorCapture: TSQueryError = 4;
 pub const TSQueryError_TSQueryErrorStructure: TSQueryError = 5;
+pub const TSQueryError_TSQueryErrorLanguage: TSQueryError = 6;
 pub type TSQueryError = u32;
 extern "C" {
    #[doc = " Create a new parser."]
--- a/lib/binding_rust/lib.rs
+++ b/lib/binding_rust/lib.rs
@ -202,6 +202,7 @@ pub enum QueryErrorKind {
    Capture,
    Predicate,
    Structure,
+    Language,
 }

 #[derive(Debug)]
@ -629,7 +630,7 @@ impl Parser {
    /// If a pointer is assigned, then the parser will periodically read from
    /// this pointer during parsing. If it reads a non-zero value, it will halt early,
    /// returning `None`. See [parse](Parser::parse) for more information.
-    pub unsafe fn set_cancellation_flag(&self, flag: Option<&AtomicUsize>) {
+    pub unsafe fn set_cancellation_flag(&mut self, flag: Option<&AtomicUsize>) {
        if let Some(flag) = flag {
            ffi::ts_parser_set_cancellation_flag(
                self.0.as_ptr(),
@ -1231,6 +1232,19 @@ impl Query {

        // On failure, build an error based on the error code and offset.
        if ptr.is_null() {
+            if error_type == ffi::TSQueryError_TSQueryErrorLanguage {
+                return Err(QueryError {
+                    row: 0,
+                    column: 0,
+                    offset: 0,
+                    message: LanguageError {
+                        version: language.version(),
+                    }
+                    .to_string(),
+                    kind: QueryErrorKind::Language,
+                });
+            }
+
            let offset = error_offset as usize;
            let mut line_start = 0;
            let mut row = 0;
@ -1739,6 +1753,10 @@ impl QueryCursor {
 }

 impl<'a, 'tree> QueryMatch<'a, 'tree> {
+    pub fn id(&self) -> u32 {
+        self.id
+    }
+
    pub fn remove(self) {
        unsafe { ffi::ts_query_cursor_remove_match(self.cursor, self.id) }
    }
@ -1803,21 +1821,36 @@ impl<'a, 'tree> QueryMatch<'a, 'tree> {
            .iter()
            .all(|predicate| match predicate {
                TextPredicate::CaptureEqCapture(i, j, is_positive) => {
-                    let node1 = self.nodes_for_capture_index(*i).next().unwrap();
-                    let node2 = self.nodes_for_capture_index(*j).next().unwrap();
-                    let text1 = get_text(buffer1, text_provider.text(node1));
-                    let text2 = get_text(buffer2, text_provider.text(node2));
-                    (text1 == text2) == *is_positive
+                    let node1 = self.nodes_for_capture_index(*i).next();
+                    let node2 = self.nodes_for_capture_index(*j).next();
+                    match (node1, node2) {
+                        (Some(node1), Some(node2)) => {
+                            let text1 = get_text(buffer1, text_provider.text(node1));
+                            let text2 = get_text(buffer2, text_provider.text(node2));
+                            (text1 == text2) == *is_positive
+                        }
+                        _ => true,
+                    }
                }
                TextPredicate::CaptureEqString(i, s, is_positive) => {
-                    let node = self.nodes_for_capture_index(*i).next().unwrap();
-                    let text = get_text(buffer1, text_provider.text(node));
-                    (text == s.as_bytes()) == *is_positive
+                    let node = self.nodes_for_capture_index(*i).next();
+                    match node {
+                        Some(node) => {
+                            let text = get_text(buffer1, text_provider.text(node));
+                            (text == s.as_bytes()) == *is_positive
+                        }
+                        None => true,
+                    }
                }
                TextPredicate::CaptureMatchString(i, r, is_positive) => {
-                    let node = self.nodes_for_capture_index(*i).next().unwrap();
-                    let text = get_text(buffer1, text_provider.text(node));
-                    r.is_match(text) == *is_positive
+                    let node = self.nodes_for_capture_index(*i).next();
+                    match node {
+                        Some(node) => {
+                            let text = get_text(buffer1, text_provider.text(node));
+                            r.is_match(text) == *is_positive
+                        }
+                        None => true,
+                    }
                }
            })
    }
@ -2105,21 +2138,27 @@ impl fmt::Display for LanguageError {

 impl fmt::Display for QueryError {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(
-            f,
-            "Query error at {}:{}. {}{}",
-            self.row + 1,
-            self.column + 1,
-            match self.kind {
-                QueryErrorKind::Field => "Invalid field name ",
-                QueryErrorKind::NodeType => "Invalid node type ",
-                QueryErrorKind::Capture => "Invalid capture name ",
-                QueryErrorKind::Predicate => "Invalid predicate: ",
-                QueryErrorKind::Structure => "Impossible pattern:\n",
-                QueryErrorKind::Syntax => "Invalid syntax:\n",
-            },
-            self.message
-        )
+        let msg = match self.kind {
+            QueryErrorKind::Field => "Invalid field name ",
+            QueryErrorKind::NodeType => "Invalid node type ",
+            QueryErrorKind::Capture => "Invalid capture name ",
+            QueryErrorKind::Predicate => "Invalid predicate: ",
+            QueryErrorKind::Structure => "Impossible pattern:\n",
+            QueryErrorKind::Syntax => "Invalid syntax:\n",
+            QueryErrorKind::Language => "",
+        };
+        if msg.len() > 0 {
+            write!(
+                f,
+                "Query error at {}:{}. {}{}",
+                self.row + 1,
+                self.column + 1,
+                msg,
+                self.message
+            )
+        } else {
+            write!(f, "{}", self.message)
+        }
    }
 }

--- a/lib/binding_web/.gitignore
+++ b/lib/binding_web/.gitignore
@ -3,3 +3,4 @@
 package-lock.json
 node_modules
 *.tgz
+LICENSE
--- a/lib/binding_web/binding.js
+++ b/lib/binding_web/binding.js
@ -17,24 +17,15 @@ var MIN_COMPATIBLE_VERSION;
 var TRANSFER_BUFFER;
 var currentParseCallback;
 var currentLogCallback;
-var initPromise = new Promise(resolve => {
-  Module.onRuntimeInitialized = resolve
-}).then(() => {
-  TRANSFER_BUFFER = C._ts_init();
-  VERSION = getValue(TRANSFER_BUFFER, 'i32');
-  MIN_COMPATIBLE_VERSION = getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
-});

-class Parser {
+class ParserImpl {
  static init() {
-    return initPromise;
+    TRANSFER_BUFFER = C._ts_init();
+    VERSION = getValue(TRANSFER_BUFFER, 'i32');
+    MIN_COMPATIBLE_VERSION = getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
  }

-  constructor() {
-    if (TRANSFER_BUFFER == null) {
-      throw new Error('You must first call Parser.init() and wait for it to resolve.');
-    }
-
+  initialize() {
    C._ts_parser_new_wasm();
    this[0] = getValue(TRANSFER_BUFFER, 'i32');
    this[1] = getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
@ -794,6 +785,7 @@ class Language {
                    if (c.name === captureName1) node1 = c.node;
                    if (c.name === captureName2) node2 = c.node;
                  }
+                  if(node1 === undefined || node2 === undefined) return true;
                  return (node1.text === node2.text) === isPositive;
                });
              } else {
@ -805,7 +797,7 @@ class Language {
                      return (c.node.text === stringValue) === isPositive;
                    };
                  }
-                  return false;
+                  return true;
                });
              }
              break;
@ -828,7 +820,7 @@ class Language {
                for (const c of captures) {
                  if (c.name === captureName) return regex.test(c.node.text) === isPositive;
                }
-                return false;
+                return true;
              });
              break;

@ -1203,6 +1195,3 @@ function marshalEdit(edit) {
  setValue(address, edit.oldEndIndex, 'i32'); address += SIZE_OF_INT;
  setValue(address, edit.newEndIndex, 'i32'); address += SIZE_OF_INT;
 }
-
-Parser.Language = Language;
-Parser.Parser = Parser;
--- a/lib/binding_web/exports.json
+++ b/lib/binding_web/exports.json
@ -23,6 +23,7 @@
  "_memchr",
  "_memcmp",
  "_memcpy",
+  "_memmove",
  "_strlen",
  "_towupper",

--- a/lib/binding_web/package.json
+++ b/lib/binding_web/package.json
@ -9,6 +9,7 @@
  },
  "scripts": {
    "test": "mocha",
+    "prepack": "cp ../../LICENSE .",
    "prepublishOnly": "node check-artifacts-fresh.js"
  },
  "repository": {
--- a/lib/binding_web/prefix.js
+++ b/lib/binding_web/prefix.js
@ -1,9 +1,15 @@
-(function (root, factory) {
-  if (typeof define === 'function' && define.amd) {
-    define([], factory);
-  } else if (typeof exports === 'object') {
-    module.exports = factory();
-  } else {
-    window.TreeSitter = factory();
-  }
-}(this, function () {
+var TreeSitter = function() {
+  var initPromise;
+  class Parser {
+    constructor() {
+      this.initialize();
+    }
+
+    initialize() {
+      throw new Error("cannot construct a Parser before calling `init()`");
+    }
+
+    static init(moduleOptions) {
+      if (initPromise) return initPromise;
+      Module = Object.assign({ }, Module, moduleOptions);
+      return initPromise = new Promise((resolveInitPromise) => {
--- a/lib/binding_web/suffix.js
+++ b/lib/binding_web/suffix.js
@ -1,2 +1,23 @@
-return Parser;
-}));
+        for (const name of Object.getOwnPropertyNames(ParserImpl.prototype)) {
+          Object.defineProperty(Parser.prototype, name, {
+            value: ParserImpl.prototype[name],
+            enumerable: false,
+            writable: false,
+          })
+        }
+
+        Parser.Language = Language;
+        Module.onRuntimeInitialized = () => {
+          ParserImpl.init();
+          resolveInitPromise();
+        };
+      });
+    }
+  }
+
+  return Parser;
+}();
+
+if (typeof exports === 'object') {
+  module.exports = TreeSitter;
+}
--- a/lib/binding_web/tree-sitter-web.d.ts
+++ b/lib/binding_web/tree-sitter-web.d.ts
@ -1,12 +1,19 @@
 declare module 'web-tree-sitter' {
  class Parser {
-    static init(): Promise<void>;
+    /**
+     * 
+     * @param moduleOptions Optional emscripten module-object, see https://emscripten.org/docs/api_reference/module.html
+     */
+    static init(moduleOptions?: object): Promise<void>;
    delete(): void;
    parse(input: string | Parser.Input, previousTree?: Parser.Tree, options?: Parser.Options): Parser.Tree;
-    getLanguage(): any;
-    setLanguage(language: any): void;
+    reset(): void;
+    getLanguage(): Parser.Language;
+    setLanguage(language?: Parser.Language | undefined | null): void;
    getLogger(): Parser.Logger;
-    setLogger(logFunc: Parser.Logger): void;
+    setLogger(logFunc?: Parser.Logger | undefined | null): void;
+    setTimeoutMicros(value: number): void;
+    getTimeoutMicros(): number;
  }

  namespace Parser {
@ -96,8 +103,11 @@ declare module 'web-tree-sitter' {

    export interface TreeCursor {
      nodeType: string;
+      nodeTypeId: number;
      nodeText: string;
+      nodeId: number;
      nodeIsNamed: boolean;
+      nodeIsMissing: boolean;
      startPosition: Point;
      endPosition: Point;
      startIndex: number;
@ -123,7 +133,7 @@ declare module 'web-tree-sitter' {
      walk(): TreeCursor;
      getChangedRanges(other: Tree): Range[];
      getEditedRange(other: Tree): Range;
-      getLanguage(): any;
+      getLanguage(): Language;
    }

    class Language {
--- a/lib/include/tree_sitter/api.h
+++ b/lib/include/tree_sitter/api.h
@ -131,6 +131,7 @@ typedef enum {
  TSQueryErrorField,
  TSQueryErrorCapture,
  TSQueryErrorStructure,
+  TSQueryErrorLanguage,
 } TSQueryError;

 /********************/
@ -618,7 +619,7 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *);
 const char *ts_tree_cursor_current_field_name(const TSTreeCursor *);

 /**
- * Get the field name of the tree cursor's current node.
+ * Get the field id of the tree cursor's current node.
 *
 * This returns zero if the current node doesn't have a field.
 * See also `ts_node_child_by_field_id`, `ts_language_field_id_for_name`.
--- a/lib/src/parser.c
+++ b/lib/src/parser.c
@ -417,7 +417,7 @@ static Subtree ts_parser__lex(
      LOG(
        "lex_external state:%d, row:%u, column:%u",
        lex_mode.external_lex_state,
-        current_position.extent.row + 1,
+        current_position.extent.row,
        current_position.extent.column
      );
      ts_lexer_start(&self->lexer);
@ -456,7 +456,7 @@ static Subtree ts_parser__lex(
    LOG(
      "lex_internal state:%d, row:%u, column:%u",
      lex_mode.lex_state,
-      current_position.extent.row + 1,
+      current_position.extent.row,
      current_position.extent.column
    );
    ts_lexer_start(&self->lexer);
@ -1884,7 +1884,7 @@ TSTree *ts_parser_parse(
        LOG("process version:%d, version_count:%u, state:%d, row:%u, col:%u",
            version, ts_stack_version_count(self->stack),
            ts_stack_state(self->stack, version),
-            ts_stack_position(self->stack, version).extent.row + 1,
+            ts_stack_position(self->stack, version).extent.row,
            ts_stack_position(self->stack, version).extent.column);

        if (!ts_parser__advance(self, version, allow_node_reuse)) return NULL;
--- a/lib/src/query.c
+++ b/lib/src/query.c
@ -2069,6 +2069,15 @@ TSQuery *ts_query_new(
  uint32_t *error_offset,
  TSQueryError *error_type
 ) {
+  if (
+    !language ||
+    language->version > TREE_SITTER_LANGUAGE_VERSION ||
+    language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
+  ) {
+    *error_type = TSQueryErrorLanguage;
+    return NULL;
+  }
+
  TSQuery *self = ts_malloc(sizeof(TSQuery));
  *self = (TSQuery) {
    .steps = array_new(),
@ -2552,6 +2561,7 @@ static void ts_query_cursor__add_state(
    pattern->step_index
  );
  array_insert(&self->states, index, ((QueryState) {
+    .id = UINT32_MAX,
    .capture_list_id = NONE,
    .step_index = pattern->step_index,
    .pattern_index = pattern->pattern_index,
@ -2716,7 +2726,6 @@ static inline bool ts_query_cursor__advance(
        if (step->depth == PATTERN_DONE_MARKER) {
          if (state->start_depth > self->depth || self->halted) {
            LOG("  finish pattern %u\n", state->pattern_index);
-            state->id = self->next_state_id++;
            array_push(&self->finished_states, *state);
            did_match = true;
            deleted_count++;
@ -3105,7 +3114,6 @@ static inline bool ts_query_cursor__advance(
              LOG("  defer finishing pattern %u\n", state->pattern_index);
            } else {
              LOG("  finish pattern %u\n", state->pattern_index);
-              state->id = self->next_state_id++;
              array_push(&self->finished_states, *state);
              array_erase(&self->states, state - self->states.contents);
              did_match = true;
@ -3160,6 +3168,7 @@ bool ts_query_cursor_next_match(
  }

  QueryState *state = &self->finished_states.contents[0];
+  if (state->id == UINT32_MAX) state->id = self->next_state_id++;
  match->id = state->id;
  match->pattern_index = state->pattern_index;
  const CaptureList *captures = capture_list_pool_get(
@ -3269,6 +3278,7 @@ bool ts_query_cursor_next_capture(
    }

    if (state) {
+      if (state->id == UINT32_MAX) state->id = self->next_state_id++;
      match->id = state->id;
      match->pattern_index = state->pattern_index;
      const CaptureList *captures = capture_list_pool_get(
--- a/script/build-wasm
+++ b/script/build-wasm
@ -33,7 +33,7 @@ web_dir=lib/binding_web
 emscripten_flags="-O3"
 minify_js=1
 force_docker=0
-emscripen_version=$(cat "$(dirname "$0")"/../emscripten-version)
+emscripen_version=$(cat "$(dirname "$0")"/../cli/emscripten-version)

 while [[ $# > 0 ]]; do
  case "$1" in
--- a/script/fetch-emscripten
+++ b/script/fetch-emscripten
@ -2,7 +2,7 @@

 set -e

-EMSCRIPTEN_VERSION=$(cat "$(dirname "$0")/../emscripten-version")
+EMSCRIPTEN_VERSION=$(cat "$(dirname "$0")/../cli/emscripten-version")

 mkdir -p target
 EMSDK_DIR="./target/emsdk"
--- a/script/generate-unicode-categories-json
+++ b/script/generate-unicode-categories-json
@ -4,10 +4,14 @@

 const CATEGORY_OUTPUT_PATH = './cli/src/generate/prepare_grammar/unicode-categories.json'
 const PROPERTY_OUTPUT_PATH = './cli/src/generate/prepare_grammar/unicode-properties.json'
+const CATEGORY_ALIAS_OUTPUT_PATH = './cli/src/generate/prepare_grammar/unicode-category-aliases.json'
+const PROPERTY_ALIAS_OUTPUT_PATH = './cli/src/generate/prepare_grammar/unicode-property-aliases.json'

 const CATEGORY_URL = 'https://unicode.org/Public/13.0.0/ucd/UnicodeData.txt'
 const PROPERTY_URL = 'https://unicode.org/Public/13.0.0/ucd/PropList.txt'
 const DERIVED_PROPERTY_URL = 'https://unicode.org/Public/13.0.0/ucd/DerivedCoreProperties.txt'
+const CATEGORY_ALIAS_URL = 'https://unicode.org/Public/13.0.0/ucd/PropertyValueAliases.txt'
+const PROPERTY_ALIAS_URL = 'https://unicode.org/Public/13.0.0/ucd/PropertyAliases.txt'

 const fs = require('fs');
 const path = require('path');
@ -16,7 +20,9 @@ const {spawnSync} = require('child_process');
 // Download the unicode data files, caching them inside the 'target' directory.
 const categoryData = cachedDownload(CATEGORY_URL);
 const propertyData = cachedDownload(PROPERTY_URL);
-const derivedPopertyData = cachedDownload(DERIVED_PROPERTY_URL);
+const derivedPropertyData = cachedDownload(DERIVED_PROPERTY_URL);
+const categoryAliasData = cachedDownload(CATEGORY_ALIAS_URL);
+const propertyAliasData = cachedDownload(PROPERTY_ALIAS_URL);
 function cachedDownload(url) {
    let downloadPath = path.join('.', 'target', path.basename(url))
    if (fs.existsSync(downloadPath)) {
@ -30,10 +36,12 @@ function cachedDownload(url) {

 const categories = {};
 const properties = {};
+const categoryAliases = {};
+const propertyAliases = {}
 let data, row, lineStart, lineEnd;

 // Parse the properties
-data = propertyData + derivedPopertyData;
+data = propertyData + derivedPropertyData;
 row = 0;
 lineStart = 0;
 lineEnd = -1;
@ -106,7 +114,7 @@ while (lineStart < data.length) {
    if (
        nameStart === 0 ||
        categoryStart == 0 ||
-        categoryEnd === 0
+        categoryEnd === -1
    ) {
        throw new Error(`Unexpected format on line ${row}`);
    }
@ -124,5 +132,110 @@ while (lineStart < data.length) {
    categories[category].push(codePoint);
 }

+// Parse the category aliases
+data = categoryAliasData;
+row = 0;
+lineStart = 0;
+lineEnd = -1;
+const IGNORE = /[#\s]/
+while (lineStart < data.length) {
+    row++;
+    lineStart = lineEnd + 1;
+    lineEnd = data.indexOf('\n', lineStart);
+    if (lineEnd === -1) break;
+
+    // Skip over blank and comment lines
+    if (IGNORE.test(data[lineStart])) continue;
+
+    // Parse the first three semicolon-separated fields:
+    // * property value type
+    // * short name
+    // * long name
+    // Other aliases may be listed in additional fields
+    const propertyValueTypeEnd = data.indexOf(';', lineStart);
+    const shortNameStart = propertyValueTypeEnd + 1;
+    const shortNameEnd = data.indexOf(';', shortNameStart);
+    const longNameStart = shortNameEnd + 1;
+    if (
+        shortNameStart === 0 ||
+        longNameStart === 0
+    ) {
+        throw new Error(`Unexpected format on line ${row}`);
+    }
+
+    const propertyValueType = data.slice(lineStart, propertyValueTypeEnd).trim();
+    const shortName = data.slice(shortNameStart, shortNameEnd).trim();
+
+    // Filter for General_Category lines
+    if (propertyValueType !== 'gc') continue;
+
+    let aliasStart = longNameStart;
+    let lineDone = false;
+    do {
+        let aliasEnd = data.indexOf(';', aliasStart);
+        if (aliasEnd === -1 || aliasEnd > lineEnd) {
+            aliasEnd = data.indexOf('#', aliasStart);
+            if (aliasEnd === -1 || aliasEnd > lineEnd) {
+                aliasEnd = lineEnd;
+            }
+            lineDone = true;
+        }
+        const alias = data.slice(aliasStart, aliasEnd).trim();
+        console.log(alias, shortName);
+        categoryAliases[alias] = shortName;
+        aliasStart = aliasEnd + 1;
+    } while (!lineDone);
+}
+
+// Parse the property aliases
+data = propertyAliasData;
+row = 0;
+lineStart = 0;
+lineEnd = -1;
+while (lineStart < data.length) {
+    row++;
+    lineStart = lineEnd + 1;
+    lineEnd = data.indexOf('\n', lineStart);
+    if (lineEnd === -1) break;
+
+    // Skip over blank and comment lines
+    if (IGNORE.test(data[lineStart])) continue;
+
+    // Parse the first two semicolon fields:
+    // * short name
+    // * long name
+    const shortNameEnd = data.indexOf(';', lineStart);
+    const longNameStart = shortNameEnd + 1;
+
+    if (longNameStart == 0) {
+        throw new Error(`Unexpected format on line ${row}`);
+    }
+
+    let alias = data.slice(lineStart, shortNameEnd).trim();
+    let longName = null;
+    let nameStart = longNameStart;
+    let lineDone = false;
+    do {
+        let nameEnd = data.indexOf(';', nameStart);
+        if (nameEnd === -1 || nameEnd > lineEnd) {
+            nameEnd = data.indexOf('#', nameStart);
+            if (nameEnd === -1 || nameEnd > lineEnd) {
+                nameEnd = lineEnd;
+            }
+            lineDone = true;
+        }
+        if (longName == null) {
+            longName = data.slice(nameStart, nameEnd).trim();
+        } else {
+            alias = data.slice(nameStart, nameEnd).trim();
+        }
+        console.log(alias, longName);
+        propertyAliases[alias] = longName;
+        nameStart = nameEnd + 1;
+    } while (!lineDone);
+}
+
 fs.writeFileSync(CATEGORY_OUTPUT_PATH, JSON.stringify(categories), 'utf8');
 fs.writeFileSync(PROPERTY_OUTPUT_PATH, JSON.stringify(properties), 'utf8');
+fs.writeFileSync(CATEGORY_ALIAS_OUTPUT_PATH, JSON.stringify(categoryAliases), 'utf8');
+fs.writeFileSync(PROPERTY_ALIAS_OUTPUT_PATH, JSON.stringify(propertyAliases), 'utf8');
--- a/tags/Cargo.toml
+++ b/tags/Cargo.toml
@ -4,7 +4,7 @@ description = "Library for extracting tag information"
 version = "0.20.0"
 authors = [
  "Max Brunsfeld <maxbrunsfeld@gmail.com>",
-  "Patrick Thomson <patrickt@github.com>"
+  "Patrick Thomson <patrickt@github.com>",
 ]
 license = "MIT"
 readme = "README.md"
@ -22,5 +22,5 @@ memchr = "2.3"
 thiserror = "1.0"

 [dependencies.tree-sitter]
-version = ">= 0.17.0"
+version = "0.20"
 path = "../lib"
--- a/test/fixtures/test_grammars/unicode_classes/corpus.txt
+++ b/test/fixtures/test_grammars/unicode_classes/corpus.txt
@ -30,3 +30,14 @@ Math symbols

 (program
  (math_sym) (math_sym) (math_sym) (math_sym) (math_sym))
+
+================================
+Letterlike numeric characters
+================================
+
+ᛯ Ⅵ 〩
+
+---
+
+(program
+  (letter_number) (letter_number) (letter_number))
--- a/test/fixtures/test_grammars/unicode_classes/grammar.json
+++ b/test/fixtures/test_grammars/unicode_classes/grammar.json
@ -13,7 +13,8 @@
        "members": [
          {"type": "SYMBOL", "name": "lower"},
          {"type": "SYMBOL", "name": "upper"},
-          {"type": "SYMBOL", "name": "math_sym"}
+          {"type": "SYMBOL", "name": "math_sym"},
+          {"type": "SYMBOL", "name": "letter_number"}
        ]
      }
    },
@ -31,6 +32,11 @@
    "math_sym": {
      "type": "PATTERN",
      "value": "\\p{Sm}+"
+    },
+
+    "letter_number": {
+      "type": "PATTERN",
+      "value": "\\p{Letter_Number}"
    }
  }
 }
				`@ -0,0 +1 @@`
				{"Other":"C","Control":"Cc","cntrl":"Cc","Format":"Cf","Unassigned":"Cn","Private_Use":"Co","Surrogate":"Cs","Letter":"L","Cased_Letter":"LC","Lowercase_Letter":"Ll","Modifier_Letter":"Lm","Other_Letter":"Lo","Titlecase_Letter":"Lt","Uppercase_Letter":"Lu","Mark":"M","Combining_Mark":"M","Spacing_Mark":"Mc","Enclosing_Mark":"Me","Nonspacing_Mark":"Mn","Number":"N","Decimal_Number":"Nd","digit":"Nd","Letter_Number":"Nl","Other_Number":"No","Punctuation":"P","punct":"P","Connector_Punctuation":"Pc","Dash_Punctuation":"Pd","Close_Punctuation":"Pe","Final_Punctuation":"Pf","Initial_Punctuation":"Pi","Other_Punctuation":"Po","Open_Punctuation":"Ps","Symbol":"S","Currency_Symbol":"Sc","Modifier_Symbol":"Sk","Math_Symbol":"Sm","Other_Symbol":"So","Separator":"Z","Line_Separator":"Zl","Paragraph_Separator":"Zp","Space_Separator":"Zs"}
				`@ -0,0 +1 @@`
				{"cjkAccountingNumeric":"kAccountingNumeric","cjkOtherNumeric":"kOtherNumeric","cjkPrimaryNumeric":"kPrimaryNumeric","nv":"Numeric_Value","cf":"Case_Folding","cjkCompatibilityVariant":"kCompatibilityVariant","dm":"Decomposition_Mapping","FC_NFKC":"FC_NFKC_Closure","lc":"Lowercase_Mapping","NFKC_CF":"NFKC_Casefold","scf":"Simple_Case_Folding","sfc":"Simple_Case_Folding","slc":"Simple_Lowercase_Mapping","stc":"Simple_Titlecase_Mapping","suc":"Simple_Uppercase_Mapping","tc":"Titlecase_Mapping","uc":"Uppercase_Mapping","bmg":"Bidi_Mirroring_Glyph","bpb":"Bidi_Paired_Bracket","cjkIICore":"kIICore","cjkIRG_GSource":"kIRG_GSource","cjkIRG_HSource":"kIRG_HSource","cjkIRG_JSource":"kIRG_JSource","cjkIRG_KPSource":"kIRG_KPSource","cjkIRG_KSource":"kIRG_KSource","cjkIRG_MSource":"kIRG_MSource","cjkIRG_SSource":"kIRG_SSource","cjkIRG_TSource":"kIRG_TSource","cjkIRG_UKSource":"kIRG_UKSource","cjkIRG_USource":"kIRG_USource","cjkIRG_VSource":"kIRG_VSource","cjkRSUnicode":"kRSUnicode","Unicode_Radical_Stroke":"kRSUnicode","URS":"kRSUnicode","EqUIdeo":"Equivalent_Unified_Ideograph","isc":"ISO_Comment","JSN":"Jamo_Short_Name","na":"Name","na1":"Unicode_1_Name","Name_Alias":"Name_Alias","scx":"Script_Extensions","age":"Age","blk":"Block","sc":"Script","bc":"Bidi_Class","bpt":"Bidi_Paired_Bracket_Type","ccc":"Canonical_Combining_Class","dt":"Decomposition_Type","ea":"East_Asian_Width","gc":"General_Category","GCB":"Grapheme_Cluster_Break","hst":"Hangul_Syllable_Type","InPC":"Indic_Positional_Category","InSC":"Indic_Syllabic_Category","jg":"Joining_Group","jt":"Joining_Type","lb":"Line_Break","NFC_QC":"NFC_Quick_Check","NFD_QC":"NFD_Quick_Check","NFKC_QC":"NFKC_Quick_Check","NFKD_QC":"NFKD_Quick_Check","nt":"Numeric_Type","SB":"Sentence_Break","vo":"Vertical_Orientation","WB":"Word_Break","AHex":"ASCII_Hex_Digit","Alpha":"Alphabetic","Bidi_C":"Bidi_Control","Bidi_M":"Bidi_Mirrored","Cased":"Cased","CE":"Composition_Exclusion","CI":"Case_Ignorable","Comp_Ex":"Full_Composition_Exclusion","CWCF":"Changes_When_Casefolded","CWCM":"Changes_When_Casemapped","CWKCF":"Changes_When_NFKC_Casefolded","CWL":"Changes_When_Lowercased","CWT":"Changes_When_Titlecased","CWU":"Changes_When_Uppercased","Dash":"Dash","Dep":"Deprecated","DI":"Default_Ignorable_Code_Point","Dia":"Diacritic","EBase":"Emoji_Modifier_Base","EComp":"Emoji_Component","EMod":"Emoji_Modifier","Emoji":"Emoji","EPres":"Emoji_Presentation","Ext":"Extender","ExtPict":"Extended_Pictographic","Gr_Base":"Grapheme_Base","Gr_Ext":"Grapheme_Extend","Gr_Link":"Grapheme_Link","Hex":"Hex_Digit","Hyphen":"Hyphen","IDC":"ID_Continue","Ideo":"Ideographic","IDS":"ID_Start","IDSB":"IDS_Binary_Operator","IDST":"IDS_Trinary_Operator","Join_C":"Join_Control","LOE":"Logical_Order_Exception","Lower":"Lowercase","Math":"Math","NChar":"Noncharacter_Code_Point","OAlpha":"Other_Alphabetic","ODI":"Other_Default_Ignorable_Code_Point","OGr_Ext":"Other_Grapheme_Extend","OIDC":"Other_ID_Continue","OIDS":"Other_ID_Start","OLower":"Other_Lowercase","OMath":"Other_Math","OUpper":"Other_Uppercase","Pat_Syn":"Pattern_Syntax","Pat_WS":"Pattern_White_Space","PCM":"Prepended_Concatenation_Mark","QMark":"Quotation_Mark","Radical":"Radical","RI":"Regional_Indicator","SD":"Soft_Dotted","STerm":"Sentence_Terminal","Term":"Terminal_Punctuation","UIdeo":"Unified_Ideograph","Upper":"Uppercase","VS":"Variation_Selector","WSpace":"White_Space","space":"White_Space","XIDC":"XID_Continue","XIDS":"XID_Start","XO_NFC":"Expands_On_NFC","XO_NFD":"Expands_On_NFD","XO_NFKC":"Expands_On_NFKC","XO_NFKD":"Expands_On_NFKD"}