diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 00000000..4fcce330
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+/lib/src/unicode/*.h linguist-vendored
+/lib/src/unicode/LICENSE linguist-vendored
diff --git a/.gitignore b/.gitignore
index ed31e54a..58d73daa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,6 +15,7 @@ docs/assets/js/tree-sitter.js
 /target
 *.rs.bk
 *.a
+*.dylib
 *.o
 *.obj
 *.exp
diff --git a/.gitmodules b/.gitmodules
deleted file mode 100644
index 6e45ee19..00000000
--- a/.gitmodules
+++ /dev/null
@@ -1,3 +0,0 @@
-[submodule "externals/utf8proc"]
-	path = lib/utf8proc
-	url = https://github.com/julialang/utf8proc
diff --git a/.travis.yml b/.travis.yml
index 44d989a1..282ba02d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,6 +2,9 @@ language: rust
 rust:
   - stable
 
+env:
+  CFLAGS="-Wall -Wextra -Werror -Wstrict-prototypes"
+
 matrix:
   include:
     - os: osx
@@ -11,8 +14,8 @@ matrix:
 
 before_install:
   # Install node
-  - nvm install 10
-  - nvm use 10
+  - nvm install 12
+  - nvm use 12
 
   # Download emscripten and create a shorthand for adding it to the PATH.
   # Don't add it to the path globally because it overrides the default
@@ -23,6 +26,9 @@ script:
   # Build the WASM binding
   - (eval "$WASM_ENV" && script/build-wasm)
 
+  # build the shared/static libraries
+  - make
+
   # Build the CLI
   - cargo build --release
 
@@ -32,7 +38,6 @@ script:
   - (eval "$WASM_ENV" && script/generate-fixtures-wasm)
 
   # Run the tests
-  - export TREE_SITTER_STATIC_ANALYSIS=1
   - script/test
   - script/test-wasm
   - script/benchmark
@@ -53,8 +58,6 @@ deploy:
   file_glob: true
   file:
     - "tree-sitter-*.gz"
-    - "target/release/tree-sitter.js"
-    - "target/release/tree-sitter.wasm"
   draft: true
   overwrite: true
   skip_cleanup: true
@@ -65,5 +68,3 @@ cache:
   cargo: true
   directories:
     - target/emsdk
-    - test/fixtures/grammars
-    - /home/travis/.emscripten_cache
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 120000
index 00000000..4f643710
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1 @@
+docs/section-6-contributing.md
\ No newline at end of file
diff --git a/Cargo.lock b/Cargo.lock
index 4494a3dc..cd411095 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4,1066 +4,988 @@
 name = "aho-corasick"
 version = "0.6.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e9a933f4e58658d7b12defcf96dc5c720f20832deebe3e0a19efd3b6aaeeb9e"
 dependencies = [
- "memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "memchr",
 ]
 
 [[package]]
 name = "ansi_term"
 version = "0.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
 dependencies = [
- "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi",
 ]
 
 [[package]]
 name = "arrayref"
 version = "0.3.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0d382e583f07208808f6b1249e60848879ba3543f57c32277bf52d69c2f0f0ee"
 
 [[package]]
 name = "arrayvec"
 version = "0.4.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8d73f9beda665eaa98ab9e4f7442bd4e7de6652587de55b2525e52e29c1b0ba"
 dependencies = [
- "nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)",
+ "nodrop",
 ]
 
 [[package]]
 name = "ascii"
 version = "0.8.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97be891acc47ca214468e09425d02cef3af2c94d0d82081cd02061f996802f14"
 
 [[package]]
 name = "atty"
 version = "0.2.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652"
 dependencies = [
- "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)",
- "termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc",
+ "termion",
+ "winapi",
 ]
 
 [[package]]
 name = "autocfg"
 version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4e5f34df7a019573fb8bdc7e24a2bfebe51a2a1d6bfdbaeccedb3c41fc574727"
 
 [[package]]
 name = "backtrace"
 version = "0.3.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "89a47830402e9981c5c41223151efcced65a0510c13097c769cede7efb34782a"
 dependencies = [
- "backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)",
- "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
- "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)",
- "rustc-demangle 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
- "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "backtrace-sys",
+ "cfg-if",
+ "libc",
+ "rustc-demangle",
+ "winapi",
 ]
 
 [[package]]
 name = "backtrace-sys"
 version = "0.1.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c66d56ac8dabd07f6aacdaf633f4b8262f5b3601a810a0dcddffd5c22c69daa0"
 dependencies = [
- "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)",
- "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)",
+ "cc",
+ "libc",
 ]
 
 [[package]]
 name = "base64"
 version = "0.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b25d992356d2eb0ed82172f5248873db5560c4721f564b13cb5193bda5e668e"
 dependencies = [
- "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "byteorder",
 ]
 
 [[package]]
 name = "bitflags"
 version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12"
 
 [[package]]
 name = "blake2b_simd"
 version = "0.5.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "461f4b879a8eb70c1debf7d0788a9a5ff15f1ea9d25925fea264ef4258bed6b2"
 dependencies = [
- "arrayref 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
- "arrayvec 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)",
- "constant_time_eq 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "arrayref",
+ "arrayvec",
+ "constant_time_eq",
 ]
 
-[[package]]
-name = "bytecount"
-version = "0.5.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-
 [[package]]
 name = "byteorder"
 version = "1.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5"
 
 [[package]]
 name = "c2-chacha"
 version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7d64d04786e0f528460fc884753cf8dddcc466be308f6026f8e355c41a0e4101"
 dependencies = [
- "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "ppv-lite86 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)",
+ "lazy_static",
+ "ppv-lite86",
 ]
 
 [[package]]
 name = "cc"
 version = "1.0.25"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f159dfd43363c4d08055a07703eb7a3406b0dac4d0584d96965a3262db3c9d16"
 
 [[package]]
 name = "cfg-if"
 version = "0.1.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4"
 
 [[package]]
 name = "chrono"
 version = "0.4.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "45912881121cb26fad7c38c17ba7daa18764771836b34fab7d3fbd93ed633878"
 dependencies = [
- "num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)",
- "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
- "time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)",
+ "num-integer",
+ "num-traits",
+ "time",
 ]
 
 [[package]]
 name = "chunked_transfer"
 version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "498d20a7aaf62625b9bf26e637cf7736417cde1d0c99f1d04d1170229a85cf87"
 
 [[package]]
 name = "clap"
 version = "2.32.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b957d88f4b6a63b9d70d5f454ac8011819c6efa7727858f458ab71c756ce2d3e"
 dependencies = [
- "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
- "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
- "strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "textwrap 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
- "vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "ansi_term",
+ "atty",
+ "bitflags",
+ "strsim",
+ "textwrap",
+ "unicode-width",
+ "vec_map",
 ]
 
 [[package]]
 name = "cloudabi"
 version = "0.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
 dependencies = [
- "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "bitflags",
 ]
 
 [[package]]
 name = "constant_time_eq"
 version = "0.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ff012e225ce166d4422e0e78419d901719760f62ae2b7969ca6b564d1b54a9e"
 
 [[package]]
 name = "crossbeam-utils"
 version = "0.6.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "04973fa96e96579258a5091af6003abde64af786b860f18622b82e026cca60e6"
 dependencies = [
- "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
- "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "cfg-if",
+ "lazy_static",
 ]
 
 [[package]]
 name = "difference"
 version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198"
 
 [[package]]
 name = "dirs"
 version = "2.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13aea89a5c93364a98e9b37b2fa237effbb694d5cfe01c5b70941f7eb087d5e3"
 dependencies = [
- "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
- "dirs-sys 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "cfg-if",
+ "dirs-sys",
 ]
 
 [[package]]
 name = "dirs-sys"
 version = "0.3.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "afa0b23de8fd801745c471deffa6e12d248f962c9fd4b4c33787b055599bde7b"
 dependencies = [
- "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
- "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)",
- "redox_users 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "cfg-if",
+ "libc",
+ "redox_users",
+ "winapi",
 ]
 
 [[package]]
 name = "failure"
 version = "0.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6dd377bcc1b1b7ce911967e3ec24fa19c3224394ec05b54aa7b083d498341ac7"
 dependencies = [
- "backtrace 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
- "failure_derive 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "backtrace",
+ "failure_derive",
 ]
 
 [[package]]
 name = "failure_derive"
 version = "0.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "64c2d913fe8ed3b6c6518eedf4538255b989945c14c2a7d5cbff62a5e2120596"
 dependencies = [
- "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)",
- "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)",
- "syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)",
- "synstructure 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "synstructure",
 ]
 
 [[package]]
 name = "fuchsia-zircon"
 version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82"
 dependencies = [
- "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
- "fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "bitflags",
+ "fuchsia-zircon-sys",
 ]
 
 [[package]]
 name = "fuchsia-zircon-sys"
 version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
 
 [[package]]
 name = "getrandom"
 version = "0.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34f33de6f0ae7c9cb5e574502a562e2b512799e32abb801cd1e79ad952b62b49"
 dependencies = [
- "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
- "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)",
+ "cfg-if",
+ "libc",
 ]
 
+[[package]]
+name = "glob"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
+
 [[package]]
 name = "idna"
 version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "38f09e0f0b1fb55fdee1f17470ad800da77af5186a1a76c026b679358b7e844e"
 dependencies = [
- "matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
- "unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
- "unicode-normalization 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
+ "matches",
+ "unicode-bidi",
+ "unicode-normalization",
 ]
 
 [[package]]
 name = "indexmap"
 version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e81a7c05f79578dbc15793d8b619db9ba32b4577003ef3af1a91c416798c58d"
 
 [[package]]
 name = "itoa"
 version = "0.4.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1306f3464951f30e30d12373d31c79fbd52d236e5e896fd92f96ec7babbbe60b"
 
 [[package]]
 name = "lazy_static"
 version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-
-[[package]]
-name = "lexical-core"
-version = "0.4.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-dependencies = [
- "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
- "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
- "ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "stackvector 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
- "static_assertions 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
-]
+checksum = "a374c89b9db55895453a74c1e38861d9deec0b01b405a82516e9d5de4820dea1"
 
 [[package]]
 name = "libc"
 version = "0.2.61"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c665266eb592905e8503ba3403020f4b8794d26263f412ca33171600eca9a6fa"
 
 [[package]]
 name = "libloading"
 version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c3ad660d7cb8c5822cd83d10897b0f1f1526792737a179e73896152f85b88c2"
 dependencies = [
- "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)",
- "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "cc",
+ "winapi",
 ]
 
 [[package]]
 name = "lock_api"
 version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62ebf1391f6acad60e5c8b43706dde4582df75c06698ab44511d15016bc2442c"
 dependencies = [
- "scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "scopeguard",
 ]
 
 [[package]]
 name = "log"
 version = "0.4.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6"
 dependencies = [
- "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "cfg-if",
 ]
 
 [[package]]
 name = "matches"
 version = "0.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08"
 
 [[package]]
 name = "memchr"
-version = "2.1.1"
+version = "2.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-dependencies = [
- "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
- "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)",
- "version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
-]
+checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400"
 
 [[package]]
 name = "nodrop"
 version = "0.1.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-
-[[package]]
-name = "nom"
-version = "5.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-dependencies = [
- "lexical-core 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
- "memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
-]
+checksum = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945"
 
 [[package]]
 name = "num-integer"
 version = "0.1.39"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e83d528d2677f0518c570baf2b7abdcf0cd2d248860b68507bdcb3e91d4c0cea"
 dependencies = [
- "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
-]
-
-[[package]]
-name = "num-rational"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-dependencies = [
- "num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)",
- "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "num-traits",
 ]
 
 [[package]]
 name = "num-traits"
 version = "0.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b3a5d7cc97d6d30d8b9bc8fa19bf45349ffe46241e8816f50f62f6d6aaabee1"
 
 [[package]]
 name = "once_cell"
 version = "0.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "532c29a261168a45ce28948f9537ddd7a5dd272cc513b3017b1e82a88f962c37"
 dependencies = [
- "parking_lot 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "parking_lot",
 ]
 
 [[package]]
 name = "parking_lot"
 version = "0.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ab41b4aed082705d1056416ae4468b6ea99d52599ecf3169b00088d43113e337"
 dependencies = [
- "lock_api 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
- "parking_lot_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "lock_api",
+ "parking_lot_core",
 ]
 
 [[package]]
 name = "parking_lot_core"
 version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94c8c7923936b28d546dfd14d4472eaf34c99b14e1c973a32b3e6d4eb04298c9"
 dependencies = [
- "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)",
- "rand 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
- "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
- "smallvec 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)",
- "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc",
+ "rand 0.6.4",
+ "rustc_version",
+ "smallvec",
+ "winapi",
 ]
 
 [[package]]
 name = "percent-encoding"
 version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "31010dd2e1ac33d5b46a5b413495239882813e0369f8ed8a5e266f173602f831"
 
 [[package]]
 name = "ppv-lite86"
 version = "0.2.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3cbf9f658cdb5000fcf6f362b8ea2ba154b9f146a61c7a20d647034c6b6561b"
 
 [[package]]
 name = "proc-macro2"
 version = "0.4.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77619697826f31a02ae974457af0b29b723e5619e113e9397b8b82c6bd253f09"
 dependencies = [
- "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "unicode-xid",
 ]
 
 [[package]]
 name = "quote"
 version = "0.6.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "53fa22a1994bd0f9372d7a816207d8a2677ad0325b073f5c5332760f0fb62b5c"
 dependencies = [
- "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)",
+ "proc-macro2",
 ]
 
 [[package]]
 name = "rand"
 version = "0.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3906503e80ac6cbcacb2c2973fa8e473f24d7e2747c8c92bb230c2441cad96b5"
 dependencies = [
- "autocfg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)",
- "rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "rand_isaac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "rand_os 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "rand_pcg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "autocfg",
+ "libc",
+ "rand_chacha 0.1.1",
+ "rand_core 0.3.0",
+ "rand_hc 0.1.0",
+ "rand_isaac",
+ "rand_os",
+ "rand_pcg",
+ "rand_xorshift",
+ "winapi",
 ]
 
 [[package]]
 name = "rand"
 version = "0.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d47eab0e83d9693d40f825f86948aa16eff6750ead4bdffc4ab95b8b3a7f052c"
 dependencies = [
- "getrandom 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
- "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)",
- "rand_chacha 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "rand_core 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "getrandom",
+ "libc",
+ "rand_chacha 0.2.1",
+ "rand_core 0.5.0",
+ "rand_hc 0.2.0",
 ]
 
 [[package]]
 name = "rand_chacha"
 version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "556d3a1ca6600bfcbab7c7c91ccb085ac7fbbcd70e008a98742e7847f4f7bcef"
 dependencies = [
- "autocfg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "autocfg",
+ "rand_core 0.3.0",
 ]
 
 [[package]]
 name = "rand_chacha"
 version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "03a2a90da8c7523f554344f921aa97283eadf6ac484a6d2a7d0212fa7f8d6853"
 dependencies = [
- "c2-chacha 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
- "rand_core 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "c2-chacha",
+ "rand_core 0.5.0",
 ]
 
 [[package]]
 name = "rand_core"
 version = "0.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0905b6b7079ec73b314d4c748701f6931eb79fd97c668caa3f1899b22b32c6db"
 
 [[package]]
 name = "rand_core"
 version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "615e683324e75af5d43d8f7a39ffe3ee4a9dc42c5c701167a71dc59c3a493aca"
 dependencies = [
- "getrandom 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
+ "getrandom",
 ]
 
 [[package]]
 name = "rand_hc"
 version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7b40677c7be09ae76218dc623efbf7b18e34bced3f38883af07bb75630a21bc4"
 dependencies = [
- "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand_core 0.3.0",
 ]
 
 [[package]]
 name = "rand_hc"
 version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c"
 dependencies = [
- "rand_core 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand_core 0.5.0",
 ]
 
 [[package]]
 name = "rand_isaac"
 version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ded997c9d5f13925be2a6fd7e66bf1872597f759fd9dd93513dd7e92e5a5ee08"
 dependencies = [
- "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand_core 0.3.0",
 ]
 
 [[package]]
 name = "rand_os"
 version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f46fbd5550acf75b0c2730f5dd1873751daf9beb8f11b44027778fae50d7feca"
 dependencies = [
- "cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
- "fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
- "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)",
- "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "cloudabi",
+ "fuchsia-zircon",
+ "libc",
+ "rand_core 0.3.0",
+ "rdrand",
+ "winapi",
 ]
 
 [[package]]
 name = "rand_pcg"
 version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "086bd09a33c7044e56bb44d5bdde5a60e7f119a9e95b0775f545de759a32fe05"
 dependencies = [
- "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand_core 0.3.0",
+ "rustc_version",
 ]
 
 [[package]]
 name = "rand_xorshift"
 version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c"
 dependencies = [
- "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand_core 0.3.0",
 ]
 
 [[package]]
 name = "rdrand"
 version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2"
 dependencies = [
- "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand_core 0.3.0",
 ]
 
 [[package]]
 name = "redox_syscall"
 version = "0.1.43"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "679da7508e9a6390aeaf7fbd02a800fdc64b73fe2204dd2c8ae66d22d9d5ad5d"
 
 [[package]]
 name = "redox_termios"
 version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76"
 dependencies = [
- "redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)",
+ "redox_syscall",
 ]
 
 [[package]]
 name = "redox_users"
 version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ecedbca3bf205f8d8f5c2b44d83cd0690e39ee84b951ed649e9f1841132b66d"
 dependencies = [
- "failure 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
- "rand_os 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)",
- "rust-argon2 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "failure",
+ "rand_os",
+ "redox_syscall",
+ "rust-argon2",
 ]
 
 [[package]]
 name = "regex"
 version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37e7cbbd370869ce2e8dff25c7018702d10b21a20ef7135316f8daecd6c25b7f"
 dependencies = [
- "aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)",
- "memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
- "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
- "utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+ "thread_local",
+ "utf8-ranges",
 ]
 
 [[package]]
 name = "regex-syntax"
 version = "0.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4e47a2ed29da7a9e1960e1639e7a982e6edc6d49be308a3b02daf511504a16d1"
 dependencies = [
- "ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "ucd-util",
 ]
 
 [[package]]
 name = "remove_dir_all"
 version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3488ba1b9a2084d38645c4c08276a1752dcbf2c7130d74f1569681ad5d2799c5"
 dependencies = [
- "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
-]
-
-[[package]]
-name = "rsass"
-version = "0.11.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-dependencies = [
- "bytecount 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "nom 5.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "num-rational 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
- "rand 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi",
 ]
 
 [[package]]
 name = "rust-argon2"
 version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ca4eaef519b494d1f2848fc602d18816fed808a981aedf4f1f00ceb7c9d32cf"
 dependencies = [
- "base64 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "blake2b_simd 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)",
- "crossbeam-utils 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "base64",
+ "blake2b_simd",
+ "crossbeam-utils",
 ]
 
 [[package]]
 name = "rustc-demangle"
 version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bcfe5b13211b4d78e5c2cadfebd7769197d95c639c35a50057eb4c05de811395"
 
 [[package]]
 name = "rustc_version"
 version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
 dependencies = [
- "semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "semver",
 ]
 
 [[package]]
 name = "ryu"
 version = "0.2.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-
-[[package]]
-name = "ryu"
-version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eb9e9b8cde282a9fe6a42dd4681319bfb63f121b8a8ee9439c6f4107e58a46f7"
 
 [[package]]
 name = "scopeguard"
 version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27"
 
 [[package]]
 name = "semver"
 version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
 dependencies = [
- "semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "semver-parser",
 ]
 
 [[package]]
 name = "semver-parser"
 version = "0.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
 
 [[package]]
 name = "serde"
 version = "1.0.80"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "15c141fc7027dd265a47c090bf864cf62b42c4d228bbcf4e51a0c9e2b0d3f7ef"
 
 [[package]]
 name = "serde_derive"
 version = "1.0.80"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "225de307c6302bec3898c51ca302fc94a7a1697ef0845fcee6448f33c032249c"
 dependencies = [
- "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)",
- "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)",
- "syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)",
+ "proc-macro2",
+ "quote",
+ "syn",
 ]
 
 [[package]]
 name = "serde_json"
 version = "1.0.33"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c37ccd6be3ed1fdf419ee848f7c758eb31b054d7cd3ae3600e3bae0adf569811"
 dependencies = [
- "indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
- "itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
- "ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
- "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
+ "indexmap",
+ "itoa",
+ "ryu",
+ "serde",
 ]
 
 [[package]]
 name = "smallbitvec"
 version = "2.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1764fe2b30ee783bfe3b9b37b2649d8d590b3148bb12e0079715d4d5c673562e"
 
 [[package]]
 name = "smallvec"
 version = "0.6.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "88aea073965ab29f6edb5493faf96ad662fb18aa9eeb186a3b7057951605ed15"
 dependencies = [
- "unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "unreachable",
 ]
 
 [[package]]
 name = "spin"
 version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-
-[[package]]
-name = "stackvector"
-version = "1.0.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-dependencies = [
- "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
- "unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
-]
-
-[[package]]
-name = "static_assertions"
-version = "0.3.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "44363f6f51401c34e7be73db0db371c04705d35efbe9f7d6082e03a921a32c55"
 
 [[package]]
 name = "strsim"
 version = "0.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550"
 
 [[package]]
 name = "syn"
 version = "0.15.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae8b29eb5210bc5cf63ed6149cbf9adfc82ac0be023d8735c176ee74a2db4da7"
 dependencies = [
- "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)",
- "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)",
- "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "proc-macro2",
+ "quote",
+ "unicode-xid",
 ]
 
 [[package]]
 name = "synstructure"
 version = "0.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73687139bf99285483c96ac0add482c3776528beac1d97d444f6e91f203a2015"
 dependencies = [
- "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)",
- "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)",
- "syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)",
- "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "unicode-xid",
 ]
 
 [[package]]
 name = "tempfile"
 version = "3.0.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b86c784c88d98c801132806dadd3819ed29d8600836c4088e855cdf3e178ed8a"
 dependencies = [
- "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
- "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)",
- "rand 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
- "redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)",
- "remove_dir_all 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "cfg-if",
+ "libc",
+ "rand 0.6.4",
+ "redox_syscall",
+ "remove_dir_all",
+ "winapi",
 ]
 
 [[package]]
 name = "termion"
 version = "1.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096"
 dependencies = [
- "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)",
- "redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)",
- "redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc",
+ "redox_syscall",
+ "redox_termios",
 ]
 
 [[package]]
 name = "textwrap"
 version = "0.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "307686869c93e71f94da64286f9a9524c0f308a9e1c87a583de8e9c9039ad3f6"
 dependencies = [
- "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
+ "unicode-width",
 ]
 
 [[package]]
 name = "thread_local"
 version = "0.3.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
 dependencies = [
- "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "lazy_static",
 ]
 
 [[package]]
 name = "time"
 version = "0.1.42"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db8dcfca086c1143c9270ac42a2bbd8a7ee477b78ac8e45b19abfb0cbede4b6f"
 dependencies = [
- "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)",
- "redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)",
- "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc",
+ "redox_syscall",
+ "winapi",
 ]
 
 [[package]]
 name = "tiny_http"
 version = "0.6.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1661fa0a44c95d01604bd05c66732a446c657efb62b5164a7a083a3b552b4951"
 dependencies = [
- "ascii 0.8.7 (registry+https://github.com/rust-lang/crates.io-index)",
- "chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
- "chunked_transfer 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
- "url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "ascii",
+ "chrono",
+ "chunked_transfer",
+ "log",
+ "url",
 ]
 
 [[package]]
 name = "tree-sitter"
-version = "0.3.10"
+version = "0.17.1"
 dependencies = [
- "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)",
- "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
- "serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
- "serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)",
+ "cc",
+ "regex",
 ]
 
 [[package]]
 name = "tree-sitter-cli"
-version = "0.15.8"
+version = "0.17.3"
 dependencies = [
- "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)",
- "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "dirs 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
- "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
- "once_cell 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
- "rand 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
- "rsass 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
- "serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
- "serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)",
- "smallbitvec 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "tempfile 3.0.7 (registry+https://github.com/rust-lang/crates.io-index)",
- "tiny_http 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
- "tree-sitter 0.3.10",
- "tree-sitter-highlight 0.1.6",
- "webbrowser 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "ansi_term",
+ "atty",
+ "cc",
+ "clap",
+ "difference",
+ "dirs",
+ "glob",
+ "lazy_static",
+ "libloading",
+ "log",
+ "once_cell",
+ "rand 0.7.0",
+ "regex",
+ "regex-syntax",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "smallbitvec",
+ "spin",
+ "tempfile",
+ "tiny_http",
+ "tree-sitter",
+ "tree-sitter-highlight",
+ "tree-sitter-tags",
+ "webbrowser",
 ]
 
 [[package]]
 name = "tree-sitter-highlight"
-version = "0.1.6"
+version = "0.3.0"
 dependencies = [
- "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
- "serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
- "serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)",
- "tree-sitter 0.3.10",
+ "regex",
+ "tree-sitter",
+]
+
+[[package]]
+name = "tree-sitter-tags"
+version = "0.3.0"
+dependencies = [
+ "memchr",
+ "regex",
+ "tree-sitter",
 ]
 
 [[package]]
 name = "ucd-util"
 version = "0.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86"
 
 [[package]]
 name = "unicode-bidi"
 version = "0.3.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5"
 dependencies = [
- "matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
+ "matches",
 ]
 
 [[package]]
 name = "unicode-normalization"
 version = "0.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "141339a08b982d942be2ca06ff8b076563cbe223d1befd5450716790d44e2426"
 dependencies = [
- "smallvec 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)",
+ "smallvec",
 ]
 
 [[package]]
 name = "unicode-width"
 version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526"
 
 [[package]]
 name = "unicode-xid"
 version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
 
 [[package]]
 name = "unreachable"
 version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56"
 dependencies = [
- "void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "void",
 ]
 
 [[package]]
 name = "url"
 version = "1.7.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dd4e7c0d531266369519a4aa4f399d748bd37043b00bde1e4ff1f60a120b355a"
 dependencies = [
- "idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
- "matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
- "percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "idna",
+ "matches",
+ "percent-encoding",
 ]
 
 [[package]]
 name = "utf8-ranges"
 version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737"
 
 [[package]]
 name = "vec_map"
 version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-
-[[package]]
-name = "version_check"
-version = "0.1.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a"
 
 [[package]]
 name = "void"
 version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
 
 [[package]]
 name = "webbrowser"
 version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c01efd7cb6939b7f34983f1edff0550e5b21b49e2db4495656295922df8939ac"
 dependencies = [
- "widestring 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "widestring",
+ "winapi",
 ]
 
 [[package]]
 name = "widestring"
 version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "effc0e4ff8085673ea7b9b2e3c73f6bd4d118810c9009ed8f1e16bd96c331db6"
 
 [[package]]
 name = "winapi"
 version = "0.3.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92c1eb33641e276cfa214a0522acad57be5c56b10cb348b3c5117db75f3ac4b0"
 dependencies = [
- "winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
 ]
 
 [[package]]
 name = "winapi-i686-pc-windows-gnu"
 version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
 
 [[package]]
 name = "winapi-x86_64-pc-windows-gnu"
 version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-
-[metadata]
-"checksum aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)" = "1e9a933f4e58658d7b12defcf96dc5c720f20832deebe3e0a19efd3b6aaeeb9e"
-"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
-"checksum arrayref 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "0d382e583f07208808f6b1249e60848879ba3543f57c32277bf52d69c2f0f0ee"
-"checksum arrayvec 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)" = "b8d73f9beda665eaa98ab9e4f7442bd4e7de6652587de55b2525e52e29c1b0ba"
-"checksum ascii 0.8.7 (registry+https://github.com/rust-lang/crates.io-index)" = "97be891acc47ca214468e09425d02cef3af2c94d0d82081cd02061f996802f14"
-"checksum atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652"
-"checksum autocfg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4e5f34df7a019573fb8bdc7e24a2bfebe51a2a1d6bfdbaeccedb3c41fc574727"
-"checksum backtrace 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "89a47830402e9981c5c41223151efcced65a0510c13097c769cede7efb34782a"
-"checksum backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)" = "c66d56ac8dabd07f6aacdaf633f4b8262f5b3601a810a0dcddffd5c22c69daa0"
-"checksum base64 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0b25d992356d2eb0ed82172f5248873db5560c4721f564b13cb5193bda5e668e"
-"checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12"
-"checksum blake2b_simd 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "461f4b879a8eb70c1debf7d0788a9a5ff15f1ea9d25925fea264ef4258bed6b2"
-"checksum bytecount 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "be0fdd54b507df8f22012890aadd099979befdba27713c767993f8380112ca7c"
-"checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5"
-"checksum c2-chacha 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7d64d04786e0f528460fc884753cf8dddcc466be308f6026f8e355c41a0e4101"
-"checksum cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)" = "f159dfd43363c4d08055a07703eb7a3406b0dac4d0584d96965a3262db3c9d16"
-"checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4"
-"checksum chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "45912881121cb26fad7c38c17ba7daa18764771836b34fab7d3fbd93ed633878"
-"checksum chunked_transfer 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "498d20a7aaf62625b9bf26e637cf7736417cde1d0c99f1d04d1170229a85cf87"
-"checksum clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b957d88f4b6a63b9d70d5f454ac8011819c6efa7727858f458ab71c756ce2d3e"
-"checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
-"checksum constant_time_eq 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8ff012e225ce166d4422e0e78419d901719760f62ae2b7969ca6b564d1b54a9e"
-"checksum crossbeam-utils 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)" = "04973fa96e96579258a5091af6003abde64af786b860f18622b82e026cca60e6"
-"checksum difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198"
-"checksum dirs 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "13aea89a5c93364a98e9b37b2fa237effbb694d5cfe01c5b70941f7eb087d5e3"
-"checksum dirs-sys 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "afa0b23de8fd801745c471deffa6e12d248f962c9fd4b4c33787b055599bde7b"
-"checksum failure 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "6dd377bcc1b1b7ce911967e3ec24fa19c3224394ec05b54aa7b083d498341ac7"
-"checksum failure_derive 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "64c2d913fe8ed3b6c6518eedf4538255b989945c14c2a7d5cbff62a5e2120596"
-"checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82"
-"checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
-"checksum getrandom 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "34f33de6f0ae7c9cb5e574502a562e2b512799e32abb801cd1e79ad952b62b49"
-"checksum idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "38f09e0f0b1fb55fdee1f17470ad800da77af5186a1a76c026b679358b7e844e"
-"checksum indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7e81a7c05f79578dbc15793d8b619db9ba32b4577003ef3af1a91c416798c58d"
-"checksum itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1306f3464951f30e30d12373d31c79fbd52d236e5e896fd92f96ec7babbbe60b"
-"checksum lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a374c89b9db55895453a74c1e38861d9deec0b01b405a82516e9d5de4820dea1"
-"checksum lexical-core 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "b8b0f90c979adde96d19eb10eb6431ba0c441e2f9e9bdff868b2f6f5114ff519"
-"checksum libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)" = "c665266eb592905e8503ba3403020f4b8794d26263f412ca33171600eca9a6fa"
-"checksum libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3ad660d7cb8c5822cd83d10897b0f1f1526792737a179e73896152f85b88c2"
-"checksum lock_api 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "62ebf1391f6acad60e5c8b43706dde4582df75c06698ab44511d15016bc2442c"
-"checksum log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6"
-"checksum matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08"
-"checksum memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0a3eb002f0535929f1199681417029ebea04aadc0c7a4224b46be99c7f5d6a16"
-"checksum nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945"
-"checksum nom 5.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e9761d859320e381010a4f7f8ed425f2c924de33ad121ace447367c713ad561b"
-"checksum num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)" = "e83d528d2677f0518c570baf2b7abdcf0cd2d248860b68507bdcb3e91d4c0cea"
-"checksum num-rational 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4e96f040177bb3da242b5b1ecf3f54b5d5af3efbbfb18608977a5d2767b22f10"
-"checksum num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0b3a5d7cc97d6d30d8b9bc8fa19bf45349ffe46241e8816f50f62f6d6aaabee1"
-"checksum once_cell 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "532c29a261168a45ce28948f9537ddd7a5dd272cc513b3017b1e82a88f962c37"
-"checksum parking_lot 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ab41b4aed082705d1056416ae4468b6ea99d52599ecf3169b00088d43113e337"
-"checksum parking_lot_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "94c8c7923936b28d546dfd14d4472eaf34c99b14e1c973a32b3e6d4eb04298c9"
-"checksum percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "31010dd2e1ac33d5b46a5b413495239882813e0369f8ed8a5e266f173602f831"
-"checksum ppv-lite86 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "e3cbf9f658cdb5000fcf6f362b8ea2ba154b9f146a61c7a20d647034c6b6561b"
-"checksum proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)" = "77619697826f31a02ae974457af0b29b723e5619e113e9397b8b82c6bd253f09"
-"checksum quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)" = "53fa22a1994bd0f9372d7a816207d8a2677ad0325b073f5c5332760f0fb62b5c"
-"checksum rand 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "3906503e80ac6cbcacb2c2973fa8e473f24d7e2747c8c92bb230c2441cad96b5"
-"checksum rand 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d47eab0e83d9693d40f825f86948aa16eff6750ead4bdffc4ab95b8b3a7f052c"
-"checksum rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "556d3a1ca6600bfcbab7c7c91ccb085ac7fbbcd70e008a98742e7847f4f7bcef"
-"checksum rand_chacha 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "03a2a90da8c7523f554344f921aa97283eadf6ac484a6d2a7d0212fa7f8d6853"
-"checksum rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0905b6b7079ec73b314d4c748701f6931eb79fd97c668caa3f1899b22b32c6db"
-"checksum rand_core 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "615e683324e75af5d43d8f7a39ffe3ee4a9dc42c5c701167a71dc59c3a493aca"
-"checksum rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7b40677c7be09ae76218dc623efbf7b18e34bced3f38883af07bb75630a21bc4"
-"checksum rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c"
-"checksum rand_isaac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ded997c9d5f13925be2a6fd7e66bf1872597f759fd9dd93513dd7e92e5a5ee08"
-"checksum rand_os 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f46fbd5550acf75b0c2730f5dd1873751daf9beb8f11b44027778fae50d7feca"
-"checksum rand_pcg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "086bd09a33c7044e56bb44d5bdde5a60e7f119a9e95b0775f545de759a32fe05"
-"checksum rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c"
-"checksum rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2"
-"checksum redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)" = "679da7508e9a6390aeaf7fbd02a800fdc64b73fe2204dd2c8ae66d22d9d5ad5d"
-"checksum redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76"
-"checksum redox_users 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4ecedbca3bf205f8d8f5c2b44d83cd0690e39ee84b951ed649e9f1841132b66d"
-"checksum regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "37e7cbbd370869ce2e8dff25c7018702d10b21a20ef7135316f8daecd6c25b7f"
-"checksum regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4e47a2ed29da7a9e1960e1639e7a982e6edc6d49be308a3b02daf511504a16d1"
-"checksum remove_dir_all 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3488ba1b9a2084d38645c4c08276a1752dcbf2c7130d74f1569681ad5d2799c5"
-"checksum rsass 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4520dc8a2786c0319f3947e3d79e735b27f0c63c555b854aaa802e49e3f45098"
-"checksum rust-argon2 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4ca4eaef519b494d1f2848fc602d18816fed808a981aedf4f1f00ceb7c9d32cf"
-"checksum rustc-demangle 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "bcfe5b13211b4d78e5c2cadfebd7769197d95c639c35a50057eb4c05de811395"
-"checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
-"checksum ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "eb9e9b8cde282a9fe6a42dd4681319bfb63f121b8a8ee9439c6f4107e58a46f7"
-"checksum ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c92464b447c0ee8c4fb3824ecc8383b81717b9f1e74ba2e72540aef7b9f82997"
-"checksum scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27"
-"checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
-"checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
-"checksum serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)" = "15c141fc7027dd265a47c090bf864cf62b42c4d228bbcf4e51a0c9e2b0d3f7ef"
-"checksum serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)" = "225de307c6302bec3898c51ca302fc94a7a1697ef0845fcee6448f33c032249c"
-"checksum serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)" = "c37ccd6be3ed1fdf419ee848f7c758eb31b054d7cd3ae3600e3bae0adf569811"
-"checksum smallbitvec 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1764fe2b30ee783bfe3b9b37b2649d8d590b3148bb12e0079715d4d5c673562e"
-"checksum smallvec 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)" = "88aea073965ab29f6edb5493faf96ad662fb18aa9eeb186a3b7057951605ed15"
-"checksum spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "44363f6f51401c34e7be73db0db371c04705d35efbe9f7d6082e03a921a32c55"
-"checksum stackvector 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "1c4725650978235083241fab0fdc8e694c3de37821524e7534a1a9061d1068af"
-"checksum static_assertions 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "7f3eb36b47e512f8f1c9e3d10c2c1965bc992bd9cdb024fa581e2194501c83d3"
-"checksum strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550"
-"checksum syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)" = "ae8b29eb5210bc5cf63ed6149cbf9adfc82ac0be023d8735c176ee74a2db4da7"
-"checksum synstructure 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "73687139bf99285483c96ac0add482c3776528beac1d97d444f6e91f203a2015"
-"checksum tempfile 3.0.7 (registry+https://github.com/rust-lang/crates.io-index)" = "b86c784c88d98c801132806dadd3819ed29d8600836c4088e855cdf3e178ed8a"
-"checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096"
-"checksum textwrap 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "307686869c93e71f94da64286f9a9524c0f308a9e1c87a583de8e9c9039ad3f6"
-"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
-"checksum time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)" = "db8dcfca086c1143c9270ac42a2bbd8a7ee477b78ac8e45b19abfb0cbede4b6f"
-"checksum tiny_http 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1661fa0a44c95d01604bd05c66732a446c657efb62b5164a7a083a3b552b4951"
-"checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86"
-"checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5"
-"checksum unicode-normalization 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "141339a08b982d942be2ca06ff8b076563cbe223d1befd5450716790d44e2426"
-"checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526"
-"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
-"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56"
-"checksum url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "dd4e7c0d531266369519a4aa4f399d748bd37043b00bde1e4ff1f60a120b355a"
-"checksum utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737"
-"checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a"
-"checksum version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd"
-"checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
-"checksum webbrowser 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c01efd7cb6939b7f34983f1edff0550e5b21b49e2db4495656295922df8939ac"
-"checksum widestring 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "effc0e4ff8085673ea7b9b2e3c73f6bd4d118810c9009ed8f1e16bd96c331db6"
-"checksum winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "92c1eb33641e276cfa214a0522acad57be5c56b10cb348b3c5117db75f3ac4b0"
-"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
-"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..764f411a
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,71 @@
+VERSION := 0.6.3
+
+# install directory layout
+PREFIX ?= /usr/local
+INCLUDEDIR ?= $(PREFIX)/include
+LIBDIR ?= $(PREFIX)/lib
+PCLIBDIR ?= $(LIBDIR)/pkgconfig
+
+# collect sources
+ifneq ($(AMALGAMATED),1)
+	SRC := $(wildcard lib/src/*.c)
+	# do not double-include amalgamation
+	SRC := $(filter-out lib/src/lib.c,$(SRC))
+else
+	# use amalgamated build
+	SRC := lib/src/lib.c
+endif
+OBJ := $(SRC:.c=.o)
+
+# define default flags, and override to append mandatory flags
+CFLAGS ?= -O3 -Wall -Wextra -Werror
+override CFLAGS += -std=gnu99 -fPIC -Ilib/src -Ilib/include
+
+# ABI versioning
+SONAME_MAJOR := 0
+SONAME_MINOR := 0
+
+# OS-specific bits
+ifeq ($(shell uname),Darwin)
+	SOEXT = dylib
+	SOEXTVER_MAJOR = $(SONAME_MAJOR).dylib
+	SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).dylib
+	LINKSHARED += -dynamiclib -Wl,-install_name,$(LIBDIR)/libtree-sitter.$(SONAME_MAJOR).dylib
+else
+	SOEXT = so
+	SOEXTVER_MAJOR = so.$(SONAME_MAJOR)
+	SOEXTVER = so.$(SONAME_MAJOR).$(SONAME_MINOR)
+	LINKSHARED += -shared -Wl,-soname,libtree-sitter.so.$(SONAME_MAJOR)
+endif
+ifneq (,$(filter $(shell uname),FreeBSD NetBSD DragonFly))
+	PCLIBDIR := $(PREFIX)/libdata/pkgconfig
+endif
+
+all: libtree-sitter.a libtree-sitter.$(SOEXTVER)
+
+libtree-sitter.a: $(OBJ)
+	$(AR) rcs $@ $^
+
+libtree-sitter.$(SOEXTVER): $(OBJ)
+	$(CC) $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@
+	ln -sf $@ libtree-sitter.$(SOEXT)
+	ln -sf $@ libtree-sitter.$(SOEXTVER_MAJOR)
+
+install: all
+	install -d '$(DESTDIR)$(LIBDIR)'
+	install -m755 libtree-sitter.a '$(DESTDIR)$(LIBDIR)'/libtree-sitter.a
+	install -m755 libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER)
+	ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER_MAJOR)
+	ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXT)
+	install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter
+	install -m644 lib/include/tree_sitter/*.h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/
+	install -d '$(DESTDIR)$(PCLIBDIR)'
+	sed -e 's|@LIBDIR@|$(LIBDIR)|;s|@INCLUDEDIR@|$(INCLUDEDIR)|;s|@VERSION@|$(VERSION)|' \
+	    -e 's|=$(PREFIX)|=$${prefix}|' \
+	    -e 's|@PREFIX@|$(PREFIX)|' \
+	    tree-sitter.pc.in > '$(DESTDIR)$(PCLIBDIR)'/tree-sitter.pc
+
+clean:
+	rm -f lib/src/*.o libtree-sitter.a libtree-sitter.$(SOEXT) libtree-sitter.$(SOEXTVER_MAJOR) libtree-sitter.$(SOEXTVER)
+
+.PHONY: all install clean
diff --git a/README.md b/README.md
index b6df76e9..a4bb7a48 100644
--- a/README.md
+++ b/README.md
@@ -5,9 +5,14 @@
 
 Tree-sitter is a parser generator tool and an incremental parsing library. It can build a concrete syntax tree for a source file and efficiently update the syntax tree as the source file is edited. Tree-sitter aims to be:
 
-* **General** enough to parse any programming language
-* **Fast** enough to parse on every keystroke in a text editor
-* **Robust** enough to provide useful results even in the presence of syntax errors
-* **Dependency-free** so that the runtime library (which is written in pure C) can be embedded in any application
+- **General** enough to parse any programming language
+- **Fast** enough to parse on every keystroke in a text editor
+- **Robust** enough to provide useful results even in the presence of syntax errors
+- **Dependency-free** so that the runtime library (which is written in pure C) can be embedded in any application
 
-[Documentation](https://tree-sitter.github.io/tree-sitter/)
+## Links
+
+- [Documentation](https://tree-sitter.github.io)
+- [Rust binding](lib/binding_rust/README.md)
+- [WASM binding](lib/binding_web/README.md)
+- [Command-line interface](cli/README.md)
diff --git a/cli/Cargo.toml b/cli/Cargo.toml
index 2bf83f8d..48dbbff7 100644
--- a/cli/Cargo.toml
+++ b/cli/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "tree-sitter-cli"
 description = "CLI tool for developing, testing, and using Tree-sitter parsers"
-version = "0.15.8"
+version = "0.17.3"
 authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
 edition = "2018"
 license = "MIT"
@@ -19,31 +19,36 @@ name = "benchmark"
 harness = false
 
 [dependencies]
-cc = "1.0"
 ansi_term = "0.11"
-difference = "2.0"
-lazy_static = "1.2.0"
-smallbitvec = "2.3.0"
+cc = "1.0"
+atty = "0.2"
 clap = "2.32"
+difference = "2.0"
 dirs = "2.0.2"
+glob = "0.3.0"
+lazy_static = "1.2.0"
 libloading = "0.5"
 once_cell = "0.1.8"
+regex = "1"
+regex-syntax = "0.6.4"
 serde = "1.0"
 serde_derive = "1.0"
-regex-syntax = "0.6.4"
-regex = "1"
-rsass = "^0.11.0"
+smallbitvec = "2.3.0"
 tiny_http = "0.6"
 webbrowser = "0.5.1"
 
 [dependencies.tree-sitter]
-version = ">= 0.3.7"
+version = ">= 0.17.0"
 path = "../lib"
 
 [dependencies.tree-sitter-highlight]
-version = ">= 0.1.0"
+version = ">= 0.3.0"
 path = "../highlight"
 
+[dependencies.tree-sitter-tags]
+version = ">= 0.1.0"
+path = "../tags"
+
 [dependencies.serde_json]
 version = "1.0"
 features = ["preserve_order"]
diff --git a/cli/benches/benchmark.rs b/cli/benches/benchmark.rs
index 294c8a97..53ab3fea 100644
--- a/cli/benches/benchmark.rs
+++ b/cli/benches/benchmark.rs
@@ -2,8 +2,8 @@ use lazy_static::lazy_static;
 use std::collections::BTreeMap;
 use std::path::{Path, PathBuf};
 use std::time::Instant;
-use std::{env, fs, usize};
-use tree_sitter::{Language, Parser};
+use std::{env, fs, str, usize};
+use tree_sitter::{Language, Parser, Query};
 use tree_sitter_cli::error::Error;
 use tree_sitter_cli::loader::Loader;
 
@@ -17,28 +17,34 @@ lazy_static! {
     static ref REPETITION_COUNT: usize = env::var("TREE_SITTER_BENCHMARK_REPETITION_COUNT")
         .map(|s| usize::from_str_radix(&s, 10).unwrap())
         .unwrap_or(5);
-
     static ref TEST_LOADER: Loader = Loader::new(SCRATCH_DIR.clone());
-    static ref EXAMPLE_PATHS_BY_LANGUAGE_DIR: BTreeMap<PathBuf, Vec<PathBuf>> = {
-        fn process_dir(result: &mut BTreeMap<PathBuf, Vec<PathBuf>>, dir: &Path) {
+    static ref EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR: BTreeMap<PathBuf, (Vec<PathBuf>, Vec<PathBuf>)> = {
+        fn process_dir(result: &mut BTreeMap<PathBuf, (Vec<PathBuf>, Vec<PathBuf>)>, dir: &Path) {
             if dir.join("grammar.js").exists() {
                 let relative_path = dir.strip_prefix(GRAMMARS_DIR.as_path()).unwrap();
+                let (example_paths, query_paths) =
+                    result.entry(relative_path.to_owned()).or_default();
+
                 if let Ok(example_files) = fs::read_dir(&dir.join("examples")) {
-                    result.insert(
-                        relative_path.to_owned(),
-                        example_files
-                            .filter_map(|p| {
-                                let p = p.unwrap().path();
-                                if p.is_file() {
-                                    Some(p)
-                                } else {
-                                    None
-                                }
-                            })
-                            .collect(),
-                    );
-                } else {
-                    result.insert(relative_path.to_owned(), Vec::new());
+                    example_paths.extend(example_files.filter_map(|p| {
+                        let p = p.unwrap().path();
+                        if p.is_file() {
+                            Some(p.to_owned())
+                        } else {
+                            None
+                        }
+                    }));
+                }
+
+                if let Ok(query_files) = fs::read_dir(&dir.join("queries")) {
+                    query_paths.extend(query_files.filter_map(|p| {
+                        let p = p.unwrap().path();
+                        if p.is_file() {
+                            Some(p.to_owned())
+                        } else {
+                            None
+                        }
+                    }));
                 }
             } else {
                 for entry in fs::read_dir(&dir).unwrap() {
@@ -57,20 +63,25 @@ lazy_static! {
 }
 
 fn main() {
-    let mut parser = Parser::new();
-    let max_path_length = EXAMPLE_PATHS_BY_LANGUAGE_DIR
-        .iter()
-        .flat_map(|(_, paths)| paths.iter())
-        .map(|p| p.file_name().unwrap().to_str().unwrap().chars().count())
+    let max_path_length = EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR
+        .values()
+        .flat_map(|(e, q)| {
+            e.iter()
+                .chain(q.iter())
+                .map(|s| s.file_name().unwrap().to_str().unwrap().len())
+        })
         .max()
-        .unwrap();
-
-    let mut all_normal_speeds = Vec::new();
-    let mut all_error_speeds = Vec::new();
+        .unwrap_or(0);
 
     eprintln!("Benchmarking with {} repetitions", *REPETITION_COUNT);
 
-    for (language_path, example_paths) in EXAMPLE_PATHS_BY_LANGUAGE_DIR.iter() {
+    let mut parser = Parser::new();
+    let mut all_normal_speeds = Vec::new();
+    let mut all_error_speeds = Vec::new();
+
+    for (language_path, (example_paths, query_paths)) in
+        EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR.iter()
+    {
         let language_name = language_path.file_name().unwrap().to_str().unwrap();
 
         if let Some(filter) = LANGUAGE_FILTER.as_ref() {
@@ -80,9 +91,24 @@ fn main() {
         }
 
         eprintln!("\nLanguage: {}", language_name);
-        parser.set_language(get_language(language_path)).unwrap();
+        let language = get_language(language_path);
+        parser.set_language(language).unwrap();
 
-        eprintln!("  Normal examples:");
+        eprintln!("  Constructing Queries");
+        for path in query_paths {
+            if let Some(filter) = EXAMPLE_FILTER.as_ref() {
+                if !path.to_str().unwrap().contains(filter.as_str()) {
+                    continue;
+                }
+            }
+
+            parse(&path, max_path_length, |source| {
+                Query::new(language, str::from_utf8(source).unwrap())
+                    .expect("Failed to parse query");
+            });
+        }
+
+        eprintln!("  Parsing Valid Code:");
         let mut normal_speeds = Vec::new();
         for example_path in example_paths {
             if let Some(filter) = EXAMPLE_FILTER.as_ref() {
@@ -91,12 +117,16 @@ fn main() {
                 }
             }
 
-            normal_speeds.push(parse(&mut parser, example_path, max_path_length));
+            normal_speeds.push(parse(example_path, max_path_length, |code| {
+                parser.parse(code, None).expect("Failed to parse");
+            }));
         }
 
-        eprintln!("  Error examples (mismatched languages):");
+        eprintln!("  Parsing Invalid Code (mismatched languages):");
         let mut error_speeds = Vec::new();
-        for (other_language_path, example_paths) in EXAMPLE_PATHS_BY_LANGUAGE_DIR.iter() {
+        for (other_language_path, (example_paths, _)) in
+            EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR.iter()
+        {
             if other_language_path != language_path {
                 for example_path in example_paths {
                     if let Some(filter) = EXAMPLE_FILTER.as_ref() {
@@ -105,7 +135,9 @@ fn main() {
                         }
                     }
 
-                    error_speeds.push(parse(&mut parser, example_path, max_path_length));
+                    error_speeds.push(parse(example_path, max_path_length, |code| {
+                        parser.parse(code, None).expect("Failed to parse");
+                    }));
                 }
             }
         }
@@ -124,7 +156,7 @@ fn main() {
         all_error_speeds.extend(error_speeds);
     }
 
-    eprintln!("\nOverall");
+    eprintln!("\n  Overall");
     if let Some((average_normal, worst_normal)) = aggregate(&all_normal_speeds) {
         eprintln!("  Average Speed (normal): {} bytes/ms", average_normal);
         eprintln!("  Worst Speed (normal):   {} bytes/ms", worst_normal);
@@ -137,7 +169,7 @@ fn main() {
     eprintln!("");
 }
 
-fn aggregate(speeds: &Vec<(usize)>) -> Option<(usize, usize)> {
+fn aggregate(speeds: &Vec<usize>) -> Option<(usize, usize)> {
     if speeds.is_empty() {
         return None;
     }
@@ -152,28 +184,25 @@ fn aggregate(speeds: &Vec<(usize)>) -> Option<(usize, usize)> {
     Some((total / speeds.len(), max))
 }
 
-fn parse(parser: &mut Parser, example_path: &Path, max_path_length: usize) -> usize {
+fn parse(path: &Path, max_path_length: usize, mut action: impl FnMut(&[u8])) -> usize {
     eprint!(
         "    {:width$}\t",
-        example_path.file_name().unwrap().to_str().unwrap(),
+        path.file_name().unwrap().to_str().unwrap(),
         width = max_path_length
     );
 
-    let source_code = fs::read(example_path)
-        .map_err(Error::wrap(|| format!("Failed to read {:?}", example_path)))
+    let source_code = fs::read(path)
+        .map_err(Error::wrap(|| format!("Failed to read {:?}", path)))
         .unwrap();
     let time = Instant::now();
     for _ in 0..*REPETITION_COUNT {
-        parser
-            .parse(&source_code, None)
-            .expect("Incompatible language version");
+        action(&source_code);
     }
     let duration = time.elapsed() / (*REPETITION_COUNT as u32);
-    let duration_ms =
-        duration.as_secs() as f64 * 1000.0 + duration.subsec_nanos() as f64 / 1000000.0;
-    let speed = (source_code.len() as f64 / duration_ms) as usize;
+    let duration_ms = duration.as_millis();
+    let speed = source_code.len() as u128 / (duration_ms + 1);
     eprintln!("time {} ms\tspeed {} bytes/ms", duration_ms as usize, speed);
-    speed
+    speed as usize
 }
 
 fn get_language(path: &Path) -> Language {
diff --git a/cli/build.rs b/cli/build.rs
index 0ed9ef06..47506018 100644
--- a/cli/build.rs
+++ b/cli/build.rs
@@ -1,4 +1,4 @@
-use std::path::PathBuf;
+use std::path::{Path, PathBuf};
 use std::{env, fs};
 
 fn main() {
@@ -6,12 +6,25 @@ fn main() {
         println!("cargo:rustc-env={}={}", "BUILD_SHA", git_sha);
     }
 
+    if wasm_files_present() {
+        println!("cargo:rustc-cfg={}", "TREE_SITTER_EMBED_WASM_BINDING");
+    }
+
     println!(
         "cargo:rustc-env=BUILD_TARGET={}",
         std::env::var("TARGET").unwrap()
     );
 }
 
+fn wasm_files_present() -> bool {
+    let paths = [
+        "../lib/binding_web/tree-sitter.js",
+        "../lib/binding_web/tree-sitter.wasm",
+    ];
+
+    paths.iter().all(|p| Path::new(p).exists())
+}
+
 fn read_git_sha() -> Option<String> {
     let mut repo_path = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap());
 
@@ -51,7 +64,6 @@ fn read_git_sha() -> Option<String> {
             }
             return fs::read_to_string(&ref_filename).ok();
         }
-
         // If we're on a detached commit, then the `HEAD` file itself contains the sha.
         else if head_content.len() == 40 {
             return Some(head_content);
diff --git a/cli/npm/dsl.d.ts b/cli/npm/dsl.d.ts
new file mode 100644
index 00000000..b9bf1c98
--- /dev/null
+++ b/cli/npm/dsl.d.ts
@@ -0,0 +1,356 @@
+type AliasRule = {type: 'ALIAS'; named: boolean; content: Rule; value: string};
+type BlankRule = {type: 'BLANK'};
+type ChoiceRule = {type: 'CHOICE'; members: Rule[]};
+type FieldRule = {type: 'FIELD'; name: string; content: Rule};
+type ImmediateTokenRule = {type: 'IMMEDIATE_TOKEN'; content: Rule};
+type PatternRule = {type: 'PATTERN'; value: string};
+type PrecDynamicRule = {type: 'PREC_DYNAMIC'; content: Rule; value: number};
+type PrecLeftRule = {type: 'PREC_LEFT'; content: Rule; value: number};
+type PrecRightRule = {type: 'PREC_RIGHT'; content: Rule; value: number};
+type PrecRule = {type: 'PREC'; content: Rule; value: number};
+type Repeat1Rule = {type: 'REPEAT1'; content: Rule};
+type RepeatRule = {type: 'REPEAT'; content: Rule};
+type SeqRule = {type: 'SEQ'; members: Rule[]};
+type StringRule = {type: 'STRING'; value: string};
+type SymbolRule<Name extends string> = {type: 'SYMBOL'; name: Name};
+type TokenRule = {type: 'TOKEN'; content: Rule};
+
+type Rule =
+  | AliasRule
+  | BlankRule
+  | ChoiceRule
+  | FieldRule
+  | ImmediateTokenRule
+  | PatternRule
+  | PrecDynamicRule
+  | PrecLeftRule
+  | PrecRightRule
+  | PrecRule
+  | Repeat1Rule
+  | RepeatRule
+  | SeqRule
+  | StringRule
+  | SymbolRule<string>
+  | TokenRule;
+
+type RuleOrLiteral = Rule | RegExp | string;
+
+type GrammarSymbols<RuleName extends string> = {
+  [name in RuleName]: SymbolRule<name>;
+} &
+  Record<string, SymbolRule<string>>;
+
+type RuleBuilder<RuleName extends string> = (
+  $: GrammarSymbols<RuleName>,
+) => RuleOrLiteral;
+
+type RuleBuilders<
+  RuleName extends string,
+  BaseGrammarRuleName extends string
+> = {
+  [name in RuleName]: RuleBuilder<RuleName | BaseGrammarRuleName>;
+};
+
+interface Grammar<
+  RuleName extends string,
+  BaseGrammarRuleName extends string = never,
+  Rules extends RuleBuilders<RuleName, BaseGrammarRuleName> = RuleBuilders<
+    RuleName,
+    BaseGrammarRuleName
+  >
+> {
+  /**
+   * Name of the grammar language.
+   */
+  name: string;
+
+  /** Mapping of grammar rule names to rule builder functions. */
+  rules: Rules;
+
+  /**
+   * An array of arrays of rule names. Each inner array represents a set of
+   * rules that's involved in an _LR(1) conflict_ that is _intended to exist_
+   * in the grammar. When these conflicts occur at runtime, Tree-sitter will
+   * use the GLR algorithm to explore all of the possible interpretations. If
+   * _multiple_ parses end up succeeding, Tree-sitter will pick the subtree
+   * whose corresponding rule has the highest total _dynamic precedence_.
+   *
+   * @param $ grammar rules
+   */
+  conflicts?: (
+    $: GrammarSymbols<RuleName | BaseGrammarRuleName>,
+  ) => RuleOrLiteral[][];
+
+  /**
+   * An array of token names which can be returned by an _external scanner_.
+   * External scanners allow you to write custom C code which runs during the
+   * lexing process in order to handle lexical rules (e.g. Python's indentation
+   * tokens) that cannot be described by regular expressions.
+   *
+   * @param $ grammar rules
+   * @param previous array of externals from the base schema, if any
+   *
+   * @see https://tree-sitter.github.io/tree-sitter/creating-parsers#external-scanners
+   */
+  externals?: (
+    $: Record<string, SymbolRule<string>>,
+    previous: Rule[],
+  ) => SymbolRule<string>[];
+
+  /**
+   * An array of tokens that may appear anywhere in the language. This
+   * is often used for whitespace and comments. The default value of
+   * extras is to accept whitespace. To control whitespace explicitly,
+   * specify extras: `$ => []` in your grammar.
+   *
+   *  @param $ grammar rules
+   */
+  extras?: (
+    $: GrammarSymbols<RuleName | BaseGrammarRuleName>,
+  ) => RuleOrLiteral[];
+
+  /**
+   * An array of rules that should be automatically removed from the
+   * grammar by replacing all of their usages with a copy of their definition.
+   * This is useful for rules that are used in multiple places but for which
+   * you don't want to create syntax tree nodes at runtime.
+   *
+   * @param $ grammar rules
+   */
+  inline?: (
+    $: GrammarSymbols<RuleName | BaseGrammarRuleName>,
+  ) => RuleOrLiteral[];
+
+  /**
+   * A list of hidden rule names that should be considered supertypes in the
+   * generated node types file.
+   *
+   * @param $ grammar rules
+   *
+   * @see http://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
+   */
+  supertypes?: (
+    $: GrammarSymbols<RuleName | BaseGrammarRuleName>,
+  ) => RuleOrLiteral[];
+
+  /**
+   * The name of a token that will match keywords for the purpose of the
+   * keyword extraction optimization.
+   *
+   * @param $ grammar rules
+   *
+   * @see https://tree-sitter.github.io/tree-sitter/creating-parsers#keyword-extraction
+   */
+  word?: ($: GrammarSymbols<RuleName | BaseGrammarRuleName>) => RuleOrLiteral;
+}
+
+type GrammarSchema<RuleName extends string> = {
+  [K in keyof Grammar<RuleName>]: K extends 'rules'
+    ? Record<RuleName, Rule>
+    : Grammar<RuleName>[K];
+};
+
+/**
+ * Causes the given rule to appear with an alternative name in the syntax tree.
+ * For instance with `alias($.foo, 'bar')`, the aliased rule will appear as an
+ * anonymous node, as if the rule had been written as the simple string.
+ *
+ * @param rule rule that will be aliased
+ * @param name target name for the alias
+ */
+declare function alias(rule: RuleOrLiteral, name: string): AliasRule;
+
+/**
+ * Causes the given rule to appear as an alternative named node, for instance
+ * with `alias($.foo, $.bar)`, the aliased rule `foo` will appear as a named
+ * node called `bar`.
+ *
+ * @param rule rule that will be aliased
+ * @param symbol target symbol for the alias
+ */
+declare function alias(
+  rule: RuleOrLiteral,
+  symbol: SymbolRule<string>,
+): AliasRule;
+
+/**
+ * Creates a blank rule, matching nothing.
+ */
+declare function blank(): BlankRule;
+
+/**
+ * Assigns a field name to the child node(s) matched by the given rule.
+ * In the resulting syntax tree, you can then use that field name to
+ * access specific children.
+ *
+ * @param name name of the field
+ * @param rule rule the field should match
+ */
+declare function field(name: string, rule: RuleOrLiteral): FieldRule;
+
+/**
+ * Creates a rule that matches one of a set of possible rules. The order
+ * of the arguments does not matter. This is analogous to the `|` (pipe)
+ * operator in EBNF notation.
+ *
+ * @param options possible rule choices
+ */
+declare function choice(...options: RuleOrLiteral[]): ChoiceRule;
+
+/**
+ * Creates a rule that matches zero or one occurrence of a given rule.
+ * It is analogous to the `[x]` (square bracket) syntax in EBNF notation.
+ *
+ * @param value rule to be made optional
+ */
+declare function optional(rule: RuleOrLiteral): ChoiceRule;
+
+/**
+ * Marks the given rule with a numerical precedence which will be used to
+ * resolve LR(1) conflicts at parser-generation time. When two rules overlap
+ * in a way that represents either a true ambiguity or a _local_ ambiguity
+ * given one token of lookahead, Tree-sitter will try to resolve the conflict by
+ * matching the rule with the higher precedence. The default precedence of all
+ * rules is zero. This works similarly to the precedence directives in Yacc grammars.
+ *
+ * @param number precedence weight
+ * @param rule rule being weighted
+ *
+ * @see https://en.wikipedia.org/wiki/LR_parser#Conflicts_in_the_constructed_tables
+ * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
+ */
+declare const prec: {
+  (number: number, rule: RuleOrLiteral): PrecRule;
+
+  /**
+   * Marks the given rule as left-associative (and optionally applies a
+   * numerical precedence). When an LR(1) conflict arises in which all of the
+   * rules have the same numerical precedence, Tree-sitter will consult the
+   * rules' associativity. If there is a left-associative rule, Tree-sitter
+   * will prefer matching a rule that ends _earlier_. This works similarly to
+   * associativity directives in Yacc grammars.
+   *
+   * @param number (optional) precedence weight
+   * @param rule rule to mark as left-associative
+   *
+   * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
+   */
+  left(rule: RuleOrLiteral): PrecLeftRule;
+  left(number: number, rule: RuleOrLiteral): PrecLeftRule;
+
+  /**
+   * Marks the given rule as right-associative (and optionally applies a
+   * numerical precedence). When an LR(1) conflict arises in which all of the
+   * rules have the same numerical precedence, Tree-sitter will consult the
+   * rules' associativity. If there is a right-associative rule, Tree-sitter
+   * will prefer matching a rule that ends _later_. This works similarly to
+   * associativity directives in Yacc grammars.
+   *
+   * @param number (optional) precedence weight
+   * @param rule rule to mark as right-associative
+   *
+   * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
+   */
+  right(rule: RuleOrLiteral): PrecRightRule;
+  right(number: number, rule: RuleOrLiteral): PrecRightRule;
+
+  /**
+   * Marks the given rule with a numerical precedence which will be used to
+   * resolve LR(1) conflicts at _runtime_ instead of parser-generation time.
+   * This is only necessary when handling a conflict dynamically using the
+   * `conflicts` field in the grammar, and when there is a genuine _ambiguity_:
+   * multiple rules correctly match a given piece of code. In that event,
+   * Tree-sitter compares the total dynamic precedence associated with each
+   * rule, and selects the one with the highest total. This is similar to
+   * dynamic precedence directives in Bison grammars.
+   *
+   * @param number precedence weight
+   * @param rule rule being weighted
+   *
+   * @see https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html
+   */
+  dynamic(number: number, rule: RuleOrLiteral): PrecDynamicRule;
+};
+
+/**
+ * Creates a rule that matches _zero-or-more_ occurrences of a given rule.
+ * It is analogous to the `{x}` (curly brace) syntax in EBNF notation. This
+ * rule is implemented in terms of `repeat1` but is included because it
+ * is very commonly used.
+ *
+ * @param rule rule to repeat, zero or more times
+ */
+declare function repeat(rule: RuleOrLiteral): RepeatRule;
+
+/**
+ * Creates a rule that matches one-or-more occurrences of a given rule.
+ *
+ * @param rule rule to repeat, one or more times
+ */
+declare function repeat1(rule: RuleOrLiteral): Repeat1Rule;
+
+/**
+ * Creates a rule that matches any number of other rules, one after another.
+ * It is analogous to simply writing multiple symbols next to each other
+ * in EBNF notation.
+ *
+ * @param rules ordered rules that comprise the sequence
+ */
+declare function seq(...rules: RuleOrLiteral[]): SeqRule;
+
+/**
+ * Creates a symbol rule, representing another rule in the grammar by name.
+ *
+ * @param name name of the target rule
+ */
+declare function sym<Name extends string>(name: Name): SymbolRule<Name>;
+
+/**
+ * Marks the given rule as producing only a single token. Tree-sitter's
+ * default is to treat each String or RegExp literal in the grammar as a
+ * separate token. Each token is matched separately by the lexer and
+ * returned as its own leaf node in the tree. The token function allows
+ * you to express a complex rule using the DSL functions (rather
+ * than as a single regular expression) but still have Tree-sitter treat
+ * it as a single token.
+ *
+ * @param rule rule to represent as a single token
+ */
+declare const token: {
+  (rule: RuleOrLiteral): TokenRule;
+
+  /**
+   * Marks the given rule as producing an immediate token. This allows
+   * the parser to produce a different token based on whether or not
+   * there are `extras` preceding the token's main content. When there
+   * are _no_ leading `extras`, an immediate token is preferred over a
+   * normal token which would otherwise match.
+   *
+   * @param rule rule to represent as an immediate token
+   */
+  immediate(rule: RuleOrLiteral): ImmediateTokenRule;
+};
+
+/**
+ * Creates a new language grammar with the provided schema.
+ *
+ * @param options grammar options
+ */
+declare function grammar<RuleName extends string>(
+  options: Grammar<RuleName>,
+): GrammarSchema<RuleName>;
+
+/**
+ * Extends an existing language grammar with the provided options,
+ * creating a new language.
+ *
+ * @param baseGrammar base grammar schema to extend from
+ * @param options grammar options for the new extended language
+ */
+declare function grammar<
+  BaseGrammarRuleName extends string,
+  RuleName extends string
+>(
+  baseGrammar: GrammarSchema<BaseGrammarRuleName>,
+  options: Grammar<RuleName, BaseGrammarRuleName>,
+): GrammarSchema<RuleName | BaseGrammarRuleName>;
diff --git a/cli/npm/package.json b/cli/npm/package.json
index 4f4d08cc..4c6dfe90 100644
--- a/cli/npm/package.json
+++ b/cli/npm/package.json
@@ -1,6 +1,6 @@
 {
   "name": "tree-sitter-cli",
-  "version": "0.15.8",
+  "version": "0.17.3",
   "author": "Max Brunsfeld",
   "license": "MIT",
   "repository": {
diff --git a/cli/src/error.rs b/cli/src/error.rs
index 968486f4..63b57c9e 100644
--- a/cli/src/error.rs
+++ b/cli/src/error.rs
@@ -1,6 +1,7 @@
+use super::test_highlight;
 use std::fmt::Write;
 use std::io;
-use tree_sitter_highlight::PropertySheetError;
+use tree_sitter::{QueryError, QueryErrorKind};
 
 #[derive(Debug)]
 pub struct Error(pub Vec<String>);
@@ -50,6 +51,34 @@ impl Error {
     }
 }
 
+impl<'a> From<(&str, QueryError)> for Error {
+    fn from((path, error): (&str, QueryError)) -> Self {
+        let mut msg = format!("Query error at {}:{}. ", path, error.row + 1);
+        match error.kind {
+            QueryErrorKind::Capture => write!(&mut msg, "Invalid capture name {}", error.message),
+            QueryErrorKind::Field => write!(&mut msg, "Invalid field name {}", error.message),
+            QueryErrorKind::NodeType => write!(&mut msg, "Invalid node type {}", error.message),
+            QueryErrorKind::Syntax => write!(&mut msg, "Invalid syntax:\n{}", error.message),
+            QueryErrorKind::Structure => write!(&mut msg, "Impossible pattern:\n{}", error.message),
+            QueryErrorKind::Predicate => write!(&mut msg, "Invalid predicate: {}", error.message),
+        }
+        .unwrap();
+        Self::new(msg)
+    }
+}
+
+impl<'a> From<tree_sitter_highlight::Error> for Error {
+    fn from(error: tree_sitter_highlight::Error) -> Self {
+        Error::new(format!("{:?}", error))
+    }
+}
+
+impl<'a> From<tree_sitter_tags::Error> for Error {
+    fn from(error: tree_sitter_tags::Error) -> Self {
+        Error::new(format!("{}", error))
+    }
+}
+
 impl From<serde_json::Error> for Error {
     fn from(error: serde_json::Error) -> Self {
         Error::new(error.to_string())
@@ -62,8 +91,14 @@ impl From<io::Error> for Error {
     }
 }
 
-impl From<rsass::Error> for Error {
-    fn from(error: rsass::Error) -> Self {
+impl From<glob::PatternError> for Error {
+    fn from(error: glob::PatternError) -> Self {
+        Error::new(error.to_string())
+    }
+}
+
+impl From<glob::GlobError> for Error {
+    fn from(error: glob::GlobError) -> Self {
         Error::new(error.to_string())
     }
 }
@@ -74,18 +109,14 @@ impl From<regex_syntax::ast::Error> for Error {
     }
 }
 
+impl From<test_highlight::Failure> for Error {
+    fn from(error: test_highlight::Failure) -> Self {
+        Error::new(error.message())
+    }
+}
+
 impl From<String> for Error {
     fn from(error: String) -> Self {
         Error::new(error)
     }
 }
-
-impl From<PropertySheetError> for Error {
-    fn from(error: PropertySheetError) -> Self {
-        match error {
-            PropertySheetError::InvalidFormat(e) => Self::from(e),
-            PropertySheetError::InvalidRegex(e) => Self::regex(&e.to_string()),
-            PropertySheetError::InvalidJSON(e) => Self::from(e),
-        }
-    }
-}
diff --git a/cli/src/generate/build_tables/build_lex_table.rs b/cli/src/generate/build_tables/build_lex_table.rs
index 21594253..b365feb1 100644
--- a/cli/src/generate/build_tables/build_lex_table.rs
+++ b/cli/src/generate/build_tables/build_lex_table.rs
@@ -2,7 +2,7 @@ use super::coincident_tokens::CoincidentTokenIndex;
 use super::token_conflicts::TokenConflictMap;
 use crate::generate::dedup::split_state_id_groups;
 use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
-use crate::generate::nfa::{CharacterSet, NfaCursor};
+use crate::generate::nfa::NfaCursor;
 use crate::generate::rules::{Symbol, TokenSet};
 use crate::generate::tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable};
 use log::info;
@@ -189,13 +189,10 @@ impl<'a> LexTableBuilder<'a> {
         // character that leads to the empty set of NFA states.
         if eof_valid {
             let (next_state_id, _) = self.add_state(Vec::new(), false);
-            self.table.states[state_id].advance_actions.push((
-                CharacterSet::empty().add_char('\0'),
-                AdvanceAction {
-                    state: next_state_id,
-                    in_main_token: true,
-                },
-            ));
+            self.table.states[state_id].eof_action = Some(AdvanceAction {
+                state: next_state_id,
+                in_main_token: true,
+            });
         }
 
         for transition in transitions {
@@ -273,6 +270,7 @@ fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
         let signature = (
             i == 0,
             state.accept_action,
+            state.eof_action.is_some(),
             state
                 .advance_actions
                 .iter()
@@ -320,6 +318,9 @@ fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
         for (_, advance_action) in new_state.advance_actions.iter_mut() {
             advance_action.state = group_ids_by_state_id[advance_action.state];
         }
+        if let Some(eof_action) = &mut new_state.eof_action {
+            eof_action.state = group_ids_by_state_id[eof_action.state];
+        }
         new_states.push(new_state);
     }
 
@@ -364,6 +365,9 @@ fn sort_states(table: &mut LexTable, parse_table: &mut ParseTable) {
             for (_, advance_action) in state.advance_actions.iter_mut() {
                 advance_action.state = new_ids_by_old_id[advance_action.state];
             }
+            if let Some(eof_action) = &mut state.eof_action {
+                eof_action.state = new_ids_by_old_id[eof_action.state];
+            }
             state
         })
         .collect();
diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs
index 41d3932c..c63701ee 100644
--- a/cli/src/generate/build_tables/build_parse_table.rs
+++ b/cli/src/generate/build_tables/build_parse_table.rs
@@ -7,7 +7,7 @@ use crate::generate::grammars::{
 use crate::generate::node_types::VariableInfo;
 use crate::generate::rules::{Associativity, Symbol, SymbolType, TokenSet};
 use crate::generate::tables::{
-    FieldLocation, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
+    FieldLocation, GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
     ProductionInfo, ProductionInfoId,
 };
 use core::ops::Range;
@@ -16,17 +16,19 @@ use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
 use std::fmt::Write;
 use std::u32;
 
+// For conflict reporting, each parse state is associated with an example
+// sequence of symbols that could lead to that parse state.
+type SymbolSequence = Vec<Symbol>;
+
+type AuxiliarySymbolSequence = Vec<AuxiliarySymbolInfo>;
+pub(crate) type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>);
+
 #[derive(Clone)]
 struct AuxiliarySymbolInfo {
     auxiliary_symbol: Symbol,
     parent_symbols: Vec<Symbol>,
 }
 
-type SymbolSequence = Vec<Symbol>;
-type AuxiliarySymbolSequence = Vec<AuxiliarySymbolInfo>;
-
-pub(crate) type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>);
-
 struct ParseStateQueueEntry {
     state_id: ParseStateId,
     preceding_auxiliary_symbols: AuxiliarySymbolSequence,
@@ -41,6 +43,7 @@ struct ParseTableBuilder<'a> {
     state_ids_by_item_set: HashMap<ParseItemSet<'a>, ParseStateId>,
     parse_state_info_by_id: Vec<ParseStateInfo<'a>>,
     parse_state_queue: VecDeque<ParseStateQueueEntry>,
+    non_terminal_extra_states: Vec<(Symbol, usize)>,
     parse_table: ParseTable,
 }
 
@@ -52,7 +55,7 @@ impl<'a> ParseTableBuilder<'a> {
             .push(ProductionInfo::default());
 
         // Add the error state at index 0.
-        self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
+        self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default(), false);
 
         // Add the starting state at index 1.
         self.add_parse_state(
@@ -66,8 +69,40 @@ impl<'a> ParseTableBuilder<'a> {
                 .iter()
                 .cloned(),
             ),
+            false,
         );
 
+        // Compute the possible item sets for non-terminal extras.
+        let mut non_terminal_extra_item_sets_by_first_terminal = BTreeMap::new();
+        for extra_non_terminal in self
+            .syntax_grammar
+            .extra_symbols
+            .iter()
+            .filter(|s| s.is_non_terminal())
+        {
+            let variable = &self.syntax_grammar.variables[extra_non_terminal.index];
+            for production in &variable.productions {
+                non_terminal_extra_item_sets_by_first_terminal
+                    .entry(production.first_symbol().unwrap())
+                    .or_insert(ParseItemSet::default())
+                    .insert(
+                        ParseItem {
+                            variable_index: extra_non_terminal.index as u32,
+                            production,
+                            step_index: 1,
+                        },
+                        &[Symbol::end()].iter().cloned().collect(),
+                    );
+            }
+        }
+
+        // Add a state for each starting terminal of a non-terminal extra rule.
+        for (terminal, item_set) in non_terminal_extra_item_sets_by_first_terminal {
+            self.non_terminal_extra_states
+                .push((terminal, self.parse_table.states.len()));
+            self.add_parse_state(&Vec::new(), &Vec::new(), item_set, true);
+        }
+
         while let Some(entry) = self.parse_state_queue.pop_front() {
             let item_set = self
                 .item_set_builder
@@ -91,9 +126,15 @@ impl<'a> ParseTableBuilder<'a> {
         preceding_symbols: &SymbolSequence,
         preceding_auxiliary_symbols: &AuxiliarySymbolSequence,
         item_set: ParseItemSet<'a>,
+        is_non_terminal_extra: bool,
     ) -> ParseStateId {
         match self.state_ids_by_item_set.entry(item_set) {
+            // If an equivalent item set has already been processed, then return
+            // the existing parse state index.
             Entry::Occupied(o) => *o.get(),
+
+            // Otherwise, insert a new parse state and add it to the queue of
+            // parse states to populate.
             Entry::Vacant(v) => {
                 let core = v.key().core();
                 let core_count = self.core_ids_by_core.len();
@@ -116,6 +157,7 @@ impl<'a> ParseTableBuilder<'a> {
                     terminal_entries: HashMap::new(),
                     nonterminal_entries: HashMap::new(),
                     core_id,
+                    is_non_terminal_extra,
                 });
                 self.parse_state_queue.push_back(ParseStateQueueEntry {
                     state_id,
@@ -138,7 +180,12 @@ impl<'a> ParseTableBuilder<'a> {
         let mut non_terminal_successors = BTreeMap::new();
         let mut lookaheads_with_conflicts = TokenSet::new();
 
+        // Each item in the item set contributes to either or a Shift action or a Reduce
+        // action in this state.
         for (item, lookaheads) in &item_set.entries {
+            // If the item is unfinished, then this state has a transition for the item's
+            // next symbol. Advance the item to its next step and insert the resulting
+            // item into the successor item set.
             if let Some(next_symbol) = item.symbol() {
                 let successor = item.successor();
                 if next_symbol.is_non_terminal() {
@@ -160,7 +207,10 @@ impl<'a> ParseTableBuilder<'a> {
                         .or_insert_with(|| ParseItemSet::default())
                         .insert(successor, lookaheads);
                 }
-            } else {
+            }
+            // If the item is finished, then add a Reduce action to this state based
+            // on this item.
+            else {
                 let action = if item.is_augmented() {
                     ParseAction::Accept
                 } else {
@@ -179,6 +229,10 @@ impl<'a> ParseTableBuilder<'a> {
                         .terminal_entries
                         .entry(lookahead);
                     let entry = entry.or_insert_with(|| ParseTableEntry::new());
+
+                    // While inserting Reduce actions, eagerly resolve conflicts related
+                    // to precedence: avoid inserting lower-precedence reductions, and
+                    // clear the action list when inserting higher-precedence reductions.
                     if entry.actions.is_empty() {
                         entry.actions.push(action);
                     } else if action.precedence() > entry.actions[0].precedence() {
@@ -193,12 +247,16 @@ impl<'a> ParseTableBuilder<'a> {
             }
         }
 
+        // Having computed the the successor item sets for each symbol, add a new
+        // parse state for each of these item sets, and add a corresponding Shift
+        // action to this state.
         for (symbol, next_item_set) in terminal_successors {
             preceding_symbols.push(symbol);
             let next_state_id = self.add_parse_state(
                 &preceding_symbols,
                 &preceding_auxiliary_symbols,
                 next_item_set,
+                self.parse_table.states[state_id].is_non_terminal_extra,
             );
             preceding_symbols.pop();
 
@@ -226,13 +284,19 @@ impl<'a> ParseTableBuilder<'a> {
                 &preceding_symbols,
                 &preceding_auxiliary_symbols,
                 next_item_set,
+                self.parse_table.states[state_id].is_non_terminal_extra,
             );
             preceding_symbols.pop();
             self.parse_table.states[state_id]
                 .nonterminal_entries
-                .insert(symbol, next_state_id);
+                .insert(symbol, GotoAction::Goto(next_state_id));
         }
 
+        // For any symbol with multiple actions, perform conflict resolution.
+        // This will either
+        // * choose one action over the others using precedence or associativity
+        // * keep multiple actions if this conflict has been whitelisted in the grammar
+        // * fail, terminating the parser generation process
         for symbol in lookaheads_with_conflicts.iter() {
             self.handle_conflict(
                 &item_set,
@@ -243,15 +307,50 @@ impl<'a> ParseTableBuilder<'a> {
             )?;
         }
 
+        // Finally, add actions for the grammar's `extra` symbols.
         let state = &mut self.parse_table.states[state_id];
-        for extra_token in &self.syntax_grammar.extra_tokens {
-            state
-                .terminal_entries
-                .entry(*extra_token)
-                .or_insert(ParseTableEntry {
-                    reusable: true,
-                    actions: vec![ParseAction::ShiftExtra],
-                });
+        let is_non_terminal_extra = state.is_non_terminal_extra;
+        let is_end_of_non_terminal_extra =
+            is_non_terminal_extra && state.terminal_entries.len() == 1;
+
+        // Add actions for the start tokens of each non-terminal extra rule.
+        // These actions are added to every state except for the states that are
+        // alread within non-terminal extras. Non-terminal extras are not allowed
+        // to nest within each other.
+        if !is_non_terminal_extra {
+            for (terminal, state_id) in &self.non_terminal_extra_states {
+                state
+                    .terminal_entries
+                    .entry(*terminal)
+                    .or_insert(ParseTableEntry {
+                        reusable: true,
+                        actions: vec![ParseAction::Shift {
+                            state: *state_id,
+                            is_repetition: false,
+                        }],
+                    });
+            }
+        }
+
+        // Add ShiftExtra actions for the terminal extra tokens. These actions
+        // are added to every state except for those at the ends of non-terminal
+        // extras.
+        if !is_end_of_non_terminal_extra {
+            for extra_token in &self.syntax_grammar.extra_symbols {
+                if extra_token.is_non_terminal() {
+                    state
+                        .nonterminal_entries
+                        .insert(*extra_token, GotoAction::ShiftExtra);
+                } else {
+                    state
+                        .terminal_entries
+                        .entry(*extra_token)
+                        .or_insert(ParseTableEntry {
+                            reusable: true,
+                            actions: vec![ParseAction::ShiftExtra],
+                        });
+                }
+            }
         }
 
         Ok(())
@@ -362,8 +461,8 @@ impl<'a> ParseTableBuilder<'a> {
                     }
                 }
 
-                // If all reduce actions are left associative, remove the SHIFT action.
-                // If all reduce actions are right associative, remove the REDUCE actions.
+                // If all Reduce actions are left associative, remove the SHIFT action.
+                // If all Reduce actions are right associative, remove the REDUCE actions.
                 match (has_left, has_non, has_right) {
                     (true, false, false) => {
                         entry.actions.pop();
@@ -744,7 +843,7 @@ fn populate_following_tokens(
             }
         }
     }
-    for extra in &grammar.extra_tokens {
+    for extra in &grammar.extra_symbols {
         if extra.is_terminal() {
             for entry in result.iter_mut() {
                 entry.insert(*extra);
@@ -774,6 +873,7 @@ pub(crate) fn build_parse_table<'a>(
         lexical_grammar,
         item_set_builder,
         variable_info,
+        non_terminal_extra_states: Vec::new(),
         state_ids_by_item_set: HashMap::new(),
         core_ids_by_core: HashMap::new(),
         parse_state_info_by_id: Vec::new(),
diff --git a/cli/src/generate/build_tables/minimize_parse_table.rs b/cli/src/generate/build_tables/minimize_parse_table.rs
index 5ecde0fd..d159a2c4 100644
--- a/cli/src/generate/build_tables/minimize_parse_table.rs
+++ b/cli/src/generate/build_tables/minimize_parse_table.rs
@@ -2,7 +2,9 @@ use super::token_conflicts::TokenConflictMap;
 use crate::generate::dedup::split_state_id_groups;
 use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
 use crate::generate::rules::{AliasMap, Symbol, TokenSet};
-use crate::generate::tables::{ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry};
+use crate::generate::tables::{
+    GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
+};
 use log::info;
 use std::collections::{HashMap, HashSet};
 use std::mem;
@@ -66,6 +68,7 @@ impl<'a> Minimizer<'a> {
                             ..
                         } => {
                             if !self.simple_aliases.contains_key(&symbol)
+                                && !self.syntax_grammar.supertype_symbols.contains(&symbol)
                                 && !aliased_symbols.contains(&symbol)
                                 && self.syntax_grammar.variables[symbol.index].kind
                                     != VariableType::Named
@@ -101,7 +104,10 @@ impl<'a> Minimizer<'a> {
                 state.update_referenced_states(|other_state_id, state| {
                     if let Some(symbol) = unit_reduction_symbols_by_state.get(&other_state_id) {
                         done = false;
-                        state.nonterminal_entries[symbol]
+                        match state.nonterminal_entries.get(symbol) {
+                            Some(GotoAction::Goto(state_id)) => *state_id,
+                            _ => other_state_id,
+                        }
                     } else {
                         other_state_id
                     }
@@ -194,6 +200,9 @@ impl<'a> Minimizer<'a> {
         right_state: &ParseState,
         group_ids_by_state_id: &Vec<ParseStateId>,
     ) -> bool {
+        if left_state.is_non_terminal_extra != right_state.is_non_terminal_extra {
+            return true;
+        }
         for (token, left_entry) in &left_state.terminal_entries {
             if let Some(right_entry) = right_state.terminal_entries.get(token) {
                 if self.entries_conflict(
@@ -262,18 +271,24 @@ impl<'a> Minimizer<'a> {
 
         for (symbol, s1) in &state1.nonterminal_entries {
             if let Some(s2) = state2.nonterminal_entries.get(symbol) {
-                let group1 = group_ids_by_state_id[*s1];
-                let group2 = group_ids_by_state_id[*s2];
-                if group1 != group2 {
-                    info!(
-                        "split states {} {} - successors for {} are split: {} {}",
-                        state1.id,
-                        state2.id,
-                        self.symbol_name(symbol),
-                        s1,
-                        s2,
-                    );
-                    return true;
+                match (s1, s2) {
+                    (GotoAction::ShiftExtra, GotoAction::ShiftExtra) => continue,
+                    (GotoAction::Goto(s1), GotoAction::Goto(s2)) => {
+                        let group1 = group_ids_by_state_id[*s1];
+                        let group2 = group_ids_by_state_id[*s2];
+                        if group1 != group2 {
+                            info!(
+                                "split states {} {} - successors for {} are split: {} {}",
+                                state1.id,
+                                state2.id,
+                                self.symbol_name(symbol),
+                                s1,
+                                s2,
+                            );
+                            return true;
+                        }
+                    }
+                    _ => return true,
                 }
             }
         }
diff --git a/cli/src/generate/build_tables/mod.rs b/cli/src/generate/build_tables/mod.rs
index e0f84244..2e5d2f57 100644
--- a/cli/src/generate/build_tables/mod.rs
+++ b/cli/src/generate/build_tables/mod.rs
@@ -271,6 +271,7 @@ fn identify_keywords(
             cursor.reset(vec![variable.start_state]);
             if all_chars_are_alphabetical(&cursor)
                 && token_conflict_map.does_match_same_string(i, word_token.index)
+                && !token_conflict_map.does_match_different_string(i, word_token.index)
             {
                 info!(
                     "Keywords - add candidate {}",
diff --git a/cli/src/generate/build_tables/token_conflicts.rs b/cli/src/generate/build_tables/token_conflicts.rs
index edb92108..64e7564b 100644
--- a/cli/src/generate/build_tables/token_conflicts.rs
+++ b/cli/src/generate/build_tables/token_conflicts.rs
@@ -1,9 +1,9 @@
-use crate::generate::build_tables::item::{TokenSetDisplay};
+use crate::generate::build_tables::item::TokenSetDisplay;
 use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
 use crate::generate::nfa::{CharacterSet, NfaCursor, NfaTransition};
 use crate::generate::rules::TokenSet;
-use std::collections::HashSet;
 use std::cmp::Ordering;
+use std::collections::HashSet;
 use std::fmt;
 
 #[derive(Clone, Debug, Default, PartialEq, Eq)]
@@ -13,6 +13,7 @@ struct TokenConflictStatus {
     does_match_valid_continuation: bool,
     does_match_separators: bool,
     matches_same_string: bool,
+    matches_different_string: bool,
 }
 
 pub(crate) struct TokenConflictMap<'a> {
@@ -25,6 +26,12 @@ pub(crate) struct TokenConflictMap<'a> {
 }
 
 impl<'a> TokenConflictMap<'a> {
+    /// Create a token conflict map based on a lexical grammar, which describes the structure
+    /// each token, and a `following_token` map, which indicates which tokens may be appear
+    /// immediately after each other token.
+    ///
+    /// This analyzes the possible kinds of overlap between each pair of tokens and stores
+    /// them in a matrix.
     pub fn new(grammar: &'a LexicalGrammar, following_tokens: Vec<TokenSet>) -> Self {
         let mut cursor = NfaCursor::new(&grammar.nfa, Vec::new());
         let starting_chars = get_starting_chars(&mut cursor, grammar);
@@ -50,12 +57,21 @@ impl<'a> TokenConflictMap<'a> {
         }
     }
 
+    /// Does token `i` match any strings that token `j` also matches, such that token `i`
+    /// is preferred over token `j`?
     pub fn has_same_conflict_status(&self, a: usize, b: usize, other: usize) -> bool {
         let left = &self.status_matrix[matrix_index(self.n, a, other)];
         let right = &self.status_matrix[matrix_index(self.n, b, other)];
         left == right
     }
 
+    /// Does token `i` match any strings that token `j` does *not* match?
+    pub fn does_match_different_string(&self, i: usize, j: usize) -> bool {
+        self.status_matrix[matrix_index(self.n, i, j)].matches_different_string
+    }
+
+    /// Does token `i` match any strings that token `j` also matches, where
+    /// token `i` is preferred over token `j`?
     pub fn does_match_same_string(&self, i: usize, j: usize) -> bool {
         self.status_matrix[matrix_index(self.n, i, j)].matches_same_string
     }
@@ -67,6 +83,7 @@ impl<'a> TokenConflictMap<'a> {
             || entry.matches_same_string
     }
 
+    /// Does token `i` match any strings that are *prefixes* of strings matched by `j`?
     pub fn does_match_prefix(&self, i: usize, j: usize) -> bool {
         self.status_matrix[matrix_index(self.n, i, j)].matches_prefix
     }
@@ -239,19 +256,29 @@ fn compute_conflict_status(
     );
 
     while let Some(state_set) = state_set_queue.pop() {
-        // Don't pursue states where there's no potential for conflict.
-        if grammar.variable_indices_for_nfa_states(&state_set).count() > 1 {
-            cursor.reset(state_set);
-        } else {
+        let mut live_variable_indices = grammar.variable_indices_for_nfa_states(&state_set);
+
+        // If only one of the two tokens could possibly match from this state, then
+        // there is no reason to analyze any of its successors. Just record the fact
+        // that the token matches a string that the other token does not match.
+        let first_live_variable_index = live_variable_indices.next().unwrap();
+        if live_variable_indices.count() == 0 {
+            if first_live_variable_index == i {
+                result.0.matches_different_string = true;
+            } else {
+                result.1.matches_different_string = true;
+            }
             continue;
         }
 
-        let has_sep = cursor.transition_chars().any(|(_, sep)| sep);
+        // Don't pursue states where there's no potential for conflict.
+        cursor.reset(state_set);
+        let within_separator = cursor.transition_chars().any(|(_, sep)| sep);
 
         // Examine each possible completed token in this state.
         let mut completion = None;
         for (id, precedence) in cursor.completions() {
-            if has_sep {
+            if within_separator {
                 if id == i {
                     result.0.does_match_separators = true;
                 } else {
@@ -316,7 +343,7 @@ fn compute_conflict_status(
                         &transition,
                         completed_id,
                         completed_precedence,
-                        has_sep,
+                        within_separator,
                     ) {
                         can_advance = true;
                         if advanced_id == i {
diff --git a/cli/src/generate/dsl.js b/cli/src/generate/dsl.js
index 55594871..62fb1d70 100644
--- a/cli/src/generate/dsl.js
+++ b/cli/src/generate/dsl.js
@@ -292,7 +292,12 @@ function grammar(baseGrammar, options) {
 
     extras = options.extras
       .call(ruleBuilder, ruleBuilder, baseGrammar.extras)
-      .map(normalize);
+
+    if (!Array.isArray(extras)) {
+      throw new Error("Grammar's 'extras' function must return an array.")
+    }
+
+    extras = extras.map(normalize);
   }
 
   let word = baseGrammar.word;
diff --git a/cli/src/generate/grammar-schema.json b/cli/src/generate/grammar-schema.json
index 55388364..5ca35370 100644
--- a/cli/src/generate/grammar-schema.json
+++ b/cli/src/generate/grammar-schema.json
@@ -1,15 +1,15 @@
 {
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "title": "tree-sitter grammar specification",
   "type": "object",
 
-  "required": [
-    "name",
-    "rules"
-  ],
+  "required": ["name", "rules"],
 
   "additionalProperties": false,
 
   "properties": {
     "name": {
+      "description": "the name of the grammar",
       "type": "string",
       "pattern": "^[a-zA-Z_]\\w*"
     },
@@ -60,6 +60,15 @@
     "word": {
       "type": "string",
       "pattern": "^[a-zA-Z_]\\w*"
+    },
+
+    "supertypes": {
+      "description": "A list of hidden rule names that should be considered supertypes in the generated node types file. See http://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types.",
+      "type": "array",
+      "items": {
+        "description": "the name of a rule in `rules` or `extras`",
+        "type": "string"
+      }
     }
   },
 
@@ -96,20 +105,19 @@
           "type": "string",
           "pattern": "^PATTERN$"
         },
-        "value": {"type": "string"}
+        "value": { "type": "string" }
       },
       "required": ["type", "value"]
     },
 
     "symbol-rule": {
-      "required": ["name"],
       "type": "object",
       "properties": {
         "type": {
           "type": "string",
           "pattern": "^SYMBOL$"
         },
-        "name": {"type": "string"}
+        "name": { "type": "string" }
       },
       "required": ["type", "name"]
     },
@@ -210,6 +218,20 @@
       "required": ["type", "content"]
     },
 
+    "field-rule": {
+      "properties": {
+        "name": { "type": "string" },
+        "type": {
+          "type": "string",
+          "pattern": "^FIELD$"
+        },
+        "content": {
+          "$ref": "#/definitions/rule"
+        }
+      },
+      "required": ["name", "type", "content"]
+    },
+
     "prec-rule": {
       "type": "object",
       "properties": {
@@ -239,6 +261,7 @@
         { "$ref": "#/definitions/repeat1-rule" },
         { "$ref": "#/definitions/repeat-rule" },
         { "$ref": "#/definitions/token-rule" },
+        { "$ref": "#/definitions/field-rule" },
         { "$ref": "#/definitions/prec-rule" }
       ]
     }
diff --git a/cli/src/generate/grammars.rs b/cli/src/generate/grammars.rs
index 0b42c4c3..6cf325dd 100644
--- a/cli/src/generate/grammars.rs
+++ b/cli/src/generate/grammars.rs
@@ -23,7 +23,7 @@ pub(crate) struct Variable {
 pub(crate) struct InputGrammar {
     pub name: String,
     pub variables: Vec<Variable>,
-    pub extra_tokens: Vec<Rule>,
+    pub extra_symbols: Vec<Rule>,
     pub expected_conflicts: Vec<Vec<String>>,
     pub external_tokens: Vec<Rule>,
     pub variables_to_inline: Vec<String>,
@@ -87,7 +87,7 @@ pub(crate) struct ExternalToken {
 #[derive(Debug, Default)]
 pub(crate) struct SyntaxGrammar {
     pub variables: Vec<SyntaxVariable>,
-    pub extra_tokens: Vec<Symbol>,
+    pub extra_symbols: Vec<Symbol>,
     pub expected_conflicts: Vec<Vec<Symbol>>,
     pub external_tokens: Vec<ExternalToken>,
     pub supertype_symbols: Vec<Symbol>,
diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs
index 5446e4af..830c4a65 100644
--- a/cli/src/generate/mod.rs
+++ b/cli/src/generate/mod.rs
@@ -6,13 +6,12 @@ mod node_types;
 mod npm_files;
 pub mod parse_grammar;
 mod prepare_grammar;
-pub mod properties;
 mod render;
 mod rules;
 mod tables;
 
 use self::build_tables::build_tables;
-use self::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType};
+use self::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
 use self::parse_grammar::parse_grammar;
 use self::prepare_grammar::prepare_grammar;
 use self::render::render_c_code;
@@ -20,9 +19,8 @@ use self::rules::AliasMap;
 use crate::error::{Error, Result};
 use lazy_static::lazy_static;
 use regex::{Regex, RegexBuilder};
-use std::collections::HashSet;
-use std::fs::{self, File};
-use std::io::{BufWriter, Write};
+use std::fs;
+use std::io::Write;
 use std::path::{Path, PathBuf};
 use std::process::{Command, Stdio};
 
@@ -33,15 +31,9 @@ lazy_static! {
         .unwrap();
 }
 
-const NEW_HEADER_PARTS: [&'static str; 2] = [
-    "
-  uint32_t large_state_count;
-  const uint16_t *small_parse_table;
-  const uint32_t *small_parse_table_map;",
-    "
-#define SMALL_STATE(id) id - LARGE_STATE_COUNT
-",
-];
+const NEW_HEADER_PARTS: &[&'static str] = &["
+  const uint16_t *alias_map;
+  uint32_t state_count;"];
 
 struct GeneratedParser {
     c_code: String,
@@ -51,13 +43,11 @@ struct GeneratedParser {
 pub fn generate_parser_in_directory(
     repo_path: &PathBuf,
     grammar_path: Option<&str>,
-    properties_only: bool,
     next_abi: bool,
     report_symbol_name: Option<&str>,
 ) -> Result<()> {
     let src_path = repo_path.join("src");
     let header_path = src_path.join("tree_sitter");
-    let properties_dir_path = repo_path.join("properties");
 
     // Ensure that the output directories exist.
     fs::create_dir_all(&src_path)?;
@@ -82,71 +72,48 @@ pub fn generate_parser_in_directory(
         prepare_grammar(&input_grammar)?;
     let language_name = input_grammar.name;
 
-    // If run with no arguments, read all of the property sheets and compile them to JSON.
-    if grammar_path.is_none() {
-        let token_names = get_token_names(&syntax_grammar, &lexical_grammar);
-        if let Ok(entries) = fs::read_dir(properties_dir_path) {
-            for entry in entries {
-                let css_path = entry?.path();
-                let css = fs::read_to_string(&css_path)?;
-                let sheet = properties::generate_property_sheet(&css_path, &css, &token_names)?;
-                let property_sheet_json_path = src_path
-                    .join(css_path.file_name().unwrap())
-                    .with_extension("json");
-                let property_sheet_json_file =
-                    File::create(&property_sheet_json_path).map_err(Error::wrap(|| {
-                        format!("Failed to create {:?}", property_sheet_json_path)
-                    }))?;
-                let mut writer = BufWriter::new(property_sheet_json_file);
-                serde_json::to_writer_pretty(&mut writer, &sheet)?;
-            }
-        }
-    }
-
     // Generate the parser and related files.
-    if !properties_only {
-        let GeneratedParser {
-            c_code,
-            node_types_json,
-        } = generate_parser_for_grammar_with_opts(
-            &language_name,
-            syntax_grammar,
-            lexical_grammar,
-            inlines,
-            simple_aliases,
-            next_abi,
-            report_symbol_name,
-        )?;
+    let GeneratedParser {
+        c_code,
+        node_types_json,
+    } = generate_parser_for_grammar_with_opts(
+        &language_name,
+        syntax_grammar,
+        lexical_grammar,
+        inlines,
+        simple_aliases,
+        next_abi,
+        report_symbol_name,
+    )?;
 
-        write_file(&src_path.join("parser.c"), c_code)?;
-        write_file(&src_path.join("node-types.json"), node_types_json)?;
+    write_file(&src_path.join("parser.c"), c_code)?;
+    write_file(&src_path.join("node-types.json"), node_types_json)?;
 
-        if next_abi {
-            write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?;
-        } else {
-            let mut header = tree_sitter::PARSER_HEADER.to_string();
+    if next_abi {
+        write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?;
+    } else {
+        let mut header = tree_sitter::PARSER_HEADER.to_string();
 
-            for part in &NEW_HEADER_PARTS {
-                let pos = header
-                    .find(part)
-                    .expect("Missing expected part of parser.h header");
-                header.replace_range(pos..(pos + part.len()), "");
-            }
-
-            write_file(&header_path.join("parser.h"), header)?;
+        for part in NEW_HEADER_PARTS.iter() {
+            let pos = header
+                .find(part)
+                .expect("Missing expected part of parser.h header");
+            header.replace_range(pos..(pos + part.len()), "");
         }
 
-        ensure_file(&repo_path.join("index.js"), || {
-            npm_files::index_js(&language_name)
-        })?;
-        ensure_file(&src_path.join("binding.cc"), || {
-            npm_files::binding_cc(&language_name)
-        })?;
-        ensure_file(&repo_path.join("binding.gyp"), || {
-            npm_files::binding_gyp(&language_name)
-        })?;
+        write_file(&header_path.join("parser.h"), header)?;
     }
 
+    ensure_file(&repo_path.join("index.js"), || {
+        npm_files::index_js(&language_name)
+    })?;
+    ensure_file(&src_path.join("binding.cc"), || {
+        npm_files::binding_cc(&language_name)
+    })?;
+    ensure_file(&repo_path.join("binding.gyp"), || {
+        npm_files::binding_gyp(&language_name)
+    })?;
+
     Ok(())
 }
 
@@ -176,7 +143,8 @@ fn generate_parser_for_grammar_with_opts(
     next_abi: bool,
     report_symbol_name: Option<&str>,
 ) -> Result<GeneratedParser> {
-    let variable_info = node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &inlines)?;
+    let variable_info =
+        node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases)?;
     let node_types_json = node_types::generate_node_types_json(
         &syntax_grammar,
         &lexical_grammar,
@@ -208,35 +176,6 @@ fn generate_parser_for_grammar_with_opts(
     })
 }
 
-fn get_token_names(
-    syntax_grammar: &SyntaxGrammar,
-    lexical_grammar: &LexicalGrammar,
-) -> HashSet<String> {
-    let mut result = HashSet::new();
-    for variable in &lexical_grammar.variables {
-        if variable.kind == VariableType::Named {
-            result.insert(variable.name.clone());
-        }
-    }
-    for token in &syntax_grammar.external_tokens {
-        if token.kind == VariableType::Named {
-            result.insert(token.name.clone());
-        }
-    }
-    for variable in &syntax_grammar.variables {
-        for production in &variable.productions {
-            for step in &production.steps {
-                if let Some(alias) = &step.alias {
-                    if !step.symbol.is_non_terminal() && alias.is_named {
-                        result.insert(alias.value.clone());
-                    }
-                }
-            }
-        }
-    }
-    result
-}
-
 fn load_grammar_file(grammar_path: &Path) -> Result<String> {
     match grammar_path.extension().and_then(|e| e.to_str()) {
         Some("js") => Ok(load_js_grammar_file(grammar_path)?),
diff --git a/cli/src/generate/nfa.rs b/cli/src/generate/nfa.rs
index 2e23dc1e..4cbfaaa3 100644
--- a/cli/src/generate/nfa.rs
+++ b/cli/src/generate/nfa.rs
@@ -1,8 +1,10 @@
 use std::char;
 use std::cmp::max;
 use std::cmp::Ordering;
+use std::collections::HashSet;
 use std::fmt;
 use std::mem::swap;
+use std::ops::Range;
 
 #[derive(Clone, Debug, PartialEq, Eq, Hash)]
 pub enum CharacterSet {
@@ -178,6 +180,40 @@ impl CharacterSet {
         }
     }
 
+    pub fn ranges<'a>(
+        chars: &'a Vec<char>,
+        ruled_out_characters: &'a HashSet<u32>,
+    ) -> impl Iterator<Item = Range<char>> + 'a {
+        let mut prev_range: Option<Range<char>> = None;
+        chars
+            .iter()
+            .map(|c| (*c, false))
+            .chain(Some(('\0', true)))
+            .filter_map(move |(c, done)| {
+                if done {
+                    return prev_range.clone();
+                }
+                if ruled_out_characters.contains(&(c as u32)) {
+                    return None;
+                }
+                if let Some(range) = prev_range.clone() {
+                    let mut prev_range_successor = range.end as u32 + 1;
+                    while prev_range_successor < c as u32 {
+                        if !ruled_out_characters.contains(&prev_range_successor) {
+                            prev_range = Some(c..c);
+                            return Some(range);
+                        }
+                        prev_range_successor += 1;
+                    }
+                    prev_range = Some(range.start..c);
+                    None
+                } else {
+                    prev_range = Some(c..c);
+                    None
+                }
+            })
+    }
+
     #[cfg(test)]
     pub fn contains(&self, c: char) -> bool {
         match self {
@@ -266,6 +302,13 @@ fn compare_chars(left: &Vec<char>, right: &Vec<char>) -> SetComparision {
             result.common = true;
         }
     }
+
+    match (i, j) {
+        (Some(_), _) => result.left_only = true,
+        (_, Some(_)) => result.right_only = true,
+        _ => {}
+    }
+
     result
 }
 
@@ -718,7 +761,7 @@ mod tests {
                 .add_range('d', 'e')
         );
 
-        // A whitelist and an intersecting blacklist.
+        // An inclusion and an intersecting exclusion.
         // Both sets contain 'e', 'f', and 'm'
         let mut a = CharacterSet::empty()
             .add_range('c', 'h')
@@ -748,7 +791,7 @@ mod tests {
         assert_eq!(a, CharacterSet::Include(vec!['c', 'd', 'g', 'h', 'k', 'l']));
         assert_eq!(b, CharacterSet::empty().add_range('a', 'm').negate());
 
-        // A blacklist and an overlapping blacklist.
+        // An exclusion and an overlapping inclusion.
         // Both sets exclude 'c', 'd', and 'e'
         let mut a = CharacterSet::empty().add_range('a', 'e').negate();
         let mut b = CharacterSet::empty().add_range('c', 'h').negate();
@@ -759,7 +802,7 @@ mod tests {
         assert_eq!(a, CharacterSet::Include(vec!['f', 'g', 'h']));
         assert_eq!(b, CharacterSet::Include(vec!['a', 'b']));
 
-        // A blacklist and a larger blacklist.
+        // An exclusion and a larger exclusion.
         let mut a = CharacterSet::empty().add_range('b', 'c').negate();
         let mut b = CharacterSet::empty().add_range('a', 'd').negate();
         assert_eq!(
@@ -810,5 +853,53 @@ mod tests {
         );
         assert!(a.does_intersect(&b));
         assert!(b.does_intersect(&a));
+
+        let (a, b) = (
+            CharacterSet::Include(vec!['c']),
+            CharacterSet::Exclude(vec!['a']),
+        );
+        assert!(a.does_intersect(&b));
+        assert!(b.does_intersect(&a));
+    }
+
+    #[test]
+    fn test_character_set_get_ranges() {
+        struct Row {
+            chars: Vec<char>,
+            ruled_out_chars: Vec<char>,
+            expected_ranges: Vec<Range<char>>,
+        }
+
+        let table = [
+            Row {
+                chars: vec!['a'],
+                ruled_out_chars: vec![],
+                expected_ranges: vec!['a'..'a'],
+            },
+            Row {
+                chars: vec!['a', 'b', 'c', 'e', 'z'],
+                ruled_out_chars: vec![],
+                expected_ranges: vec!['a'..'c', 'e'..'e', 'z'..'z'],
+            },
+            Row {
+                chars: vec!['a', 'b', 'c', 'e', 'h', 'z'],
+                ruled_out_chars: vec!['d', 'f', 'g'],
+                expected_ranges: vec!['a'..'h', 'z'..'z'],
+            },
+        ];
+
+        for Row {
+            chars,
+            ruled_out_chars,
+            expected_ranges,
+        } in table.iter()
+        {
+            let ruled_out_chars = ruled_out_chars
+                .into_iter()
+                .map(|c: &char| *c as u32)
+                .collect();
+            let ranges = CharacterSet::ranges(chars, &ruled_out_chars).collect::<Vec<_>>();
+            assert_eq!(ranges, *expected_ranges);
+        }
     }
 }
diff --git a/cli/src/generate/node_types.rs b/cli/src/generate/node_types.rs
index 4dab4470..bc5a836f 100644
--- a/cli/src/generate/node_types.rs
+++ b/cli/src/generate/node_types.rs
@@ -1,12 +1,8 @@
-use super::grammars::{
-    InlinedProductionMap, LexicalGrammar, Production, ProductionStep, SyntaxGrammar,
-    SyntaxVariable, VariableType,
-};
+use super::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
 use super::rules::{Alias, AliasMap, Symbol, SymbolType};
 use crate::error::{Error, Result};
 use serde_derive::Serialize;
-use std::collections::{BTreeMap, HashMap};
-use std::mem;
+use std::collections::{BTreeMap, HashMap, HashSet};
 
 #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub(crate) enum ChildType {
@@ -16,20 +12,19 @@ pub(crate) enum ChildType {
 
 #[derive(Clone, Debug, Default, PartialEq, Eq)]
 pub(crate) struct FieldInfo {
-    pub required: bool,
-    pub multiple: bool,
+    pub quantity: ChildQuantity,
     pub types: Vec<ChildType>,
 }
 
-#[derive(Debug, Default, PartialEq, Eq)]
+#[derive(Clone, Debug, Default, PartialEq, Eq)]
 pub(crate) struct VariableInfo {
     pub fields: HashMap<String, FieldInfo>,
-    pub child_types: Vec<ChildType>,
+    pub children: FieldInfo,
     pub children_without_fields: FieldInfo,
     pub has_multi_step_production: bool,
 }
 
-#[derive(Debug, Serialize, PartialEq, Eq, Default)]
+#[derive(Debug, Serialize, PartialEq, Eq, Default, PartialOrd, Ord)]
 pub(crate) struct NodeInfoJSON {
     #[serde(rename = "type")]
     kind: String,
@@ -42,252 +37,296 @@ pub(crate) struct NodeInfoJSON {
     subtypes: Option<Vec<NodeTypeJSON>>,
 }
 
-#[derive(Debug, Serialize, PartialEq, Eq, PartialOrd, Ord)]
+#[derive(Clone, Debug, Serialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub(crate) struct NodeTypeJSON {
     #[serde(rename = "type")]
     kind: String,
     named: bool,
 }
 
-#[derive(Debug, Serialize, PartialEq, Eq)]
+#[derive(Debug, Serialize, PartialEq, Eq, PartialOrd, Ord)]
 pub(crate) struct FieldInfoJSON {
     multiple: bool,
     required: bool,
     types: Vec<NodeTypeJSON>,
 }
 
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct ChildQuantity {
+    exists: bool,
+    required: bool,
+    multiple: bool,
+}
+
+impl Default for FieldInfoJSON {
+    fn default() -> Self {
+        FieldInfoJSON {
+            multiple: false,
+            required: true,
+            types: Vec::new(),
+        }
+    }
+}
+
+impl Default for ChildQuantity {
+    fn default() -> Self {
+        Self::one()
+    }
+}
+
+impl ChildQuantity {
+    fn zero() -> Self {
+        ChildQuantity {
+            exists: false,
+            required: false,
+            multiple: false,
+        }
+    }
+
+    fn one() -> Self {
+        ChildQuantity {
+            exists: true,
+            required: true,
+            multiple: false,
+        }
+    }
+
+    fn append(&mut self, other: ChildQuantity) {
+        if other.exists {
+            if self.exists || other.multiple {
+                self.multiple = true;
+            }
+            if other.required {
+                self.required = true;
+            }
+            self.exists = true;
+        }
+    }
+
+    fn union(&mut self, other: ChildQuantity) -> bool {
+        let mut result = false;
+        if !self.exists && other.exists {
+            result = true;
+            self.exists = true;
+        }
+        if self.required && !other.required {
+            result = true;
+            self.required = false;
+        }
+        if !self.multiple && other.multiple {
+            result = true;
+            self.multiple = true;
+        }
+        result
+    }
+}
+
+/// Compute a summary of the public-facing structure of each variable in the
+/// grammar. Each variable in the grammar corresponds to a distinct public-facing
+/// node type.
+///
+/// The information collected about each node type `N` is:
+/// 1. `child_types` - The types of visible children that can appear within `N`.
+/// 2. `fields` - The fields that `N` can have. Data regarding each field:
+///    * `types` - The types of visible children the field can contain.
+///    * `optional` - Do `N` nodes always have this field?
+///    * `multiple` - Can `N` nodes have multiple children for this field?
+/// 3. `children_without_fields` - The *other* named children of `N` that are
+///    not associated with fields. Data regarding these children:
+///    * `types` - The types of named children with no field.
+///    * `optional` - Do `N` nodes always have at least one named child with no field?
+///    * `multiple` - Can `N` nodes have multiple named children with no field?
+///
+/// Each summary must account for some indirect factors:
+/// 1. hidden nodes. When a parent node `N` has a hidden child `C`, the visible
+///    children of `C` *appear* to be direct children of `N`.
+/// 2. aliases. If a parent node type `M` is aliased as some other type `N`,
+///    then nodes which *appear* to have type `N` may have internal structure based
+///    on `M`.
 pub(crate) fn get_variable_info(
     syntax_grammar: &SyntaxGrammar,
     lexical_grammar: &LexicalGrammar,
-    inlines: &InlinedProductionMap,
+    default_aliases: &AliasMap,
 ) -> Result<Vec<VariableInfo>> {
-    let mut result = Vec::new();
+    let child_type_is_visible = |t: &ChildType| {
+        variable_type_for_child_type(t, syntax_grammar, lexical_grammar) >= VariableType::Anonymous
+    };
 
-    // Determine which field names and child node types can appear directly
-    // within each type of node.
-    let mut steps = Vec::new();
-    for (i, variable) in syntax_grammar.variables.iter().enumerate() {
-        let mut info = VariableInfo {
-            fields: HashMap::new(),
-            child_types: Vec::new(),
-            children_without_fields: FieldInfo {
-                multiple: false,
-                required: true,
-                types: Vec::new(),
-            },
-            has_multi_step_production: false,
-        };
+    let child_type_is_named = |t: &ChildType| {
+        variable_type_for_child_type(t, syntax_grammar, lexical_grammar) == VariableType::Named
+    };
 
-        steps.clear();
-        if get_all_child_steps(variable, inlines, &mut steps) > 1 {
-            info.has_multi_step_production = true;
-        }
+    // Each variable's summary can depend on the summaries of other hidden variables,
+    // and variables can have mutually recursive structure. So we compute the summaries
+    // iteratively, in a loop that terminates only when no more changes are possible.
+    let mut did_change = true;
+    let mut all_initialized = false;
+    let mut result = vec![VariableInfo::default(); syntax_grammar.variables.len()];
+    while did_change {
+        did_change = false;
 
-        let is_recursive = steps.iter().any(|s| s.symbol == Symbol::non_terminal(i));
-
-        for step in &steps {
-            let child_type = if let Some(alias) = &step.alias {
-                ChildType::Aliased(alias.clone())
-            } else {
-                ChildType::Normal(step.symbol)
-            };
-
-            if let Some(field_name) = &step.field_name {
-                let field_info = info.fields.entry(field_name.clone()).or_insert(FieldInfo {
-                    multiple: false,
-                    required: true,
-                    types: Vec::new(),
-                });
-                field_info.multiple |= is_recursive;
-                if let Err(i) = field_info.types.binary_search(&child_type) {
-                    field_info.types.insert(i, child_type.clone());
-                }
-            } else if variable_type_for_child_type(&child_type, syntax_grammar, lexical_grammar)
-                == VariableType::Named
-            {
-                let children_info = &mut info.children_without_fields;
-                children_info.multiple |= is_recursive;
-                if let Err(i) = children_info.types.binary_search(&child_type) {
-                    children_info.types.insert(i, child_type.clone());
-                }
-            }
-
-            if let Err(i) = info.child_types.binary_search(&child_type) {
-                info.child_types.insert(i, child_type.clone());
-            }
-        }
-
-        for production in &variable.productions {
-            let production_fields: Vec<&String> = production
-                .steps
-                .iter()
-                .filter_map(|s| s.field_name.as_ref())
-                .collect();
-            for (field_name, field_info) in info.fields.iter_mut() {
-                let mut occurrence_count = 0;
-                for f in &production_fields {
-                    if *f == field_name {
-                        occurrence_count += 1;
-                    }
-                }
-                if occurrence_count == 0 {
-                    field_info.required = false;
-                }
-                if occurrence_count > 1 {
-                    field_info.multiple = true;
-                }
-            }
-
-            let named_children_without_fields_count = production
-                .steps
-                .iter()
-                .filter(|s| {
-                    if s.field_name.is_some() {
-                        false
-                    } else if let Some(alias) = &s.alias {
-                        alias.is_named
-                    } else if s.symbol.is_non_terminal() {
-                        true
-                    } else if s.symbol.is_external() {
-                        syntax_grammar.external_tokens[s.symbol.index].kind == VariableType::Named
-                    } else {
-                        lexical_grammar.variables[s.symbol.index].kind == VariableType::Named
-                    }
-                })
-                .count();
-            if named_children_without_fields_count == 0 {
-                info.children_without_fields.required = false;
-            }
-            if named_children_without_fields_count > 1 {
-                info.children_without_fields.multiple = true;
-            }
-        }
-
-        result.push(info);
-    }
-
-    // Expand each node type's information recursively to inherit the properties of
-    // hidden children.
-    let mut done = false;
-    while !done {
-        done = true;
         for (i, variable) in syntax_grammar.variables.iter().enumerate() {
-            // Move this variable's info out of the vector so it can be modified
-            // while reading from other entries of the vector.
-            let mut variable_info = VariableInfo::default();
-            mem::swap(&mut variable_info, &mut result[i]);
+            let mut variable_info = result[i].clone();
 
-            steps.clear();
-            get_all_child_steps(variable, inlines, &mut steps);
+            // Examine each of the variable's productions. The variable's child types can be
+            // immediately combined across all productions, but the child quantities must be
+            // recorded separately for each production.
+            for production in &variable.productions {
+                let mut production_field_quantities = HashMap::new();
+                let mut production_children_quantity = ChildQuantity::zero();
+                let mut production_children_without_fields_quantity = ChildQuantity::zero();
+                let mut production_has_uninitialized_invisible_children = false;
 
-            for step in &steps {
-                let child_symbol = step.symbol;
-                if step.alias.is_none()
-                    && child_symbol.kind == SymbolType::NonTerminal
-                    && !syntax_grammar.variables[child_symbol.index]
-                        .kind
-                        .is_visible()
-                    && !syntax_grammar.supertype_symbols.contains(&child_symbol)
-                {
-                    let child_variable_info = &result[child_symbol.index];
+                if production.steps.len() > 1 {
+                    variable_info.has_multi_step_production = true;
+                }
 
-                    // If a hidden child can have multiple children, then this
-                    // node can appear to have multiple children.
-                    if child_variable_info.has_multi_step_production {
-                        variable_info.has_multi_step_production = true;
+                for step in &production.steps {
+                    let child_symbol = step.symbol;
+                    let child_type = if let Some(alias) = &step.alias {
+                        ChildType::Aliased(alias.clone())
+                    } else if let Some(alias) = default_aliases.get(&step.symbol) {
+                        ChildType::Aliased(alias.clone())
+                    } else {
+                        ChildType::Normal(child_symbol)
+                    };
+
+                    let child_is_hidden = !child_type_is_visible(&child_type)
+                        && !syntax_grammar.supertype_symbols.contains(&child_symbol);
+
+                    // Maintain the set of all child types for this variable, and the quantity of
+                    // visible children in this production.
+                    did_change |=
+                        extend_sorted(&mut variable_info.children.types, Some(&child_type));
+                    if !child_is_hidden {
+                        production_children_quantity.append(ChildQuantity::one());
                     }
 
-                    // Inherit fields from this hidden child
-                    for (field_name, child_field_info) in &child_variable_info.fields {
+                    // Maintain the set of child types associated with each field, and the quantity
+                    // of children associated with each field in this production.
+                    if let Some(field_name) = &step.field_name {
                         let field_info = variable_info
                             .fields
                             .entry(field_name.clone())
-                            .or_insert_with(|| {
-                                done = false;
-                                child_field_info.clone()
-                            });
-                        if child_field_info.multiple && !field_info.multiple {
-                            field_info.multiple = child_field_info.multiple;
-                            done = false;
+                            .or_insert(FieldInfo::default());
+                        did_change |= extend_sorted(&mut field_info.types, Some(&child_type));
+
+                        let production_field_quantity = production_field_quantities
+                            .entry(field_name)
+                            .or_insert(ChildQuantity::zero());
+
+                        // Inherit the types and quantities of hidden children associated with fields.
+                        if child_is_hidden && child_symbol.is_non_terminal() {
+                            let child_variable_info = &result[child_symbol.index];
+                            did_change |= extend_sorted(
+                                &mut field_info.types,
+                                &child_variable_info.children.types,
+                            );
+                            production_field_quantity.append(child_variable_info.children.quantity);
+                        } else {
+                            production_field_quantity.append(ChildQuantity::one());
                         }
-                        if !child_field_info.required && field_info.required {
-                            field_info.required = child_field_info.required;
-                            done = false;
+                    }
+                    // Maintain the set of named children without fields within this variable.
+                    else if child_type_is_named(&child_type) {
+                        production_children_without_fields_quantity.append(ChildQuantity::one());
+                        did_change |= extend_sorted(
+                            &mut variable_info.children_without_fields.types,
+                            Some(&child_type),
+                        );
+                    }
+
+                    // Inherit all child information from hidden children.
+                    if child_is_hidden && child_symbol.is_non_terminal() {
+                        let child_variable_info = &result[child_symbol.index];
+
+                        // If a hidden child can have multiple children, then its parent node can
+                        // appear to have multiple children.
+                        if child_variable_info.has_multi_step_production {
+                            variable_info.has_multi_step_production = true;
                         }
-                        for child_type in &child_field_info.types {
-                            if let Err(i) = field_info.types.binary_search(&child_type) {
-                                field_info.types.insert(i, child_type.clone());
-                                done = false;
+
+                        // If a hidden child has fields, then the parent node can appear to have
+                        // those same fields.
+                        for (field_name, child_field_info) in &child_variable_info.fields {
+                            production_field_quantities
+                                .entry(field_name)
+                                .or_insert(ChildQuantity::zero())
+                                .append(child_field_info.quantity);
+                            did_change |= extend_sorted(
+                                &mut variable_info
+                                    .fields
+                                    .entry(field_name.clone())
+                                    .or_insert(FieldInfo::default())
+                                    .types,
+                                &child_field_info.types,
+                            );
+                        }
+
+                        // If a hidden child has children, then the parent node can appear to have
+                        // those same children.
+                        production_children_quantity.append(child_variable_info.children.quantity);
+                        did_change |= extend_sorted(
+                            &mut variable_info.children.types,
+                            &child_variable_info.children.types,
+                        );
+
+                        // If a hidden child can have named children without fields, then the parent
+                        // node can appear to have those same children.
+                        if step.field_name.is_none() {
+                            let grandchildren_info = &child_variable_info.children_without_fields;
+                            if !grandchildren_info.types.is_empty() {
+                                production_children_without_fields_quantity
+                                    .append(child_variable_info.children_without_fields.quantity);
+                                did_change |= extend_sorted(
+                                    &mut variable_info.children_without_fields.types,
+                                    &child_variable_info.children_without_fields.types,
+                                );
                             }
                         }
                     }
 
-                    // Inherit child types from this hidden child
-                    for child_type in &child_variable_info.child_types {
-                        if let Err(i) = variable_info.child_types.binary_search(&child_type) {
-                            variable_info.child_types.insert(i, child_type.clone());
-                            done = false;
-                        }
+                    // Note whether or not this production contains children whose summaries
+                    // have not yet been computed.
+                    if child_symbol.index >= i && !all_initialized {
+                        production_has_uninitialized_invisible_children = true;
                     }
+                }
 
-                    // If any field points to this hidden child, inherit child types
-                    // for the field.
-                    if let Some(field_name) = &step.field_name {
-                        let field_info = variable_info.fields.get_mut(field_name).unwrap();
-                        for child_type in &child_variable_info.child_types {
-                            if let Err(i) = field_info.types.binary_search(&child_type) {
-                                field_info.types.insert(i, child_type.clone());
-                                done = false;
-                            }
-                        }
-                    } else {
-                        // Inherit child types without fields from this hidden child
-                        // Inherit info about children w/o fields from this hidden child
-                        let grandchildren_info = &child_variable_info.children_without_fields;
-                        if grandchildren_info.multiple
-                            && !variable_info.children_without_fields.multiple
-                        {
-                            variable_info.children_without_fields.multiple = true;
-                            done = false;
-                        }
-                        // if !grandchildren_info.required
-                        //     && variable_info.children_without_fields.required
-                        // {
-                        //     variable_info.children_without_fields.required = false;
-                        //     done = false;
-                        // }
-                        for child_type in &grandchildren_info.types {
-                            if let Err(i) = variable_info
-                                .children_without_fields
-                                .types
-                                .binary_search(&child_type)
-                            {
-                                variable_info
-                                    .children_without_fields
-                                    .types
-                                    .insert(i, child_type.clone());
-                                done = false;
-                            }
-                        }
+                // If this production's children all have had their summaries initialized,
+                // then expand the quantity information with all of the possibilities introduced
+                // by this production.
+                if !production_has_uninitialized_invisible_children {
+                    did_change |= variable_info
+                        .children
+                        .quantity
+                        .union(production_children_quantity);
+
+                    did_change |= variable_info
+                        .children_without_fields
+                        .quantity
+                        .union(production_children_without_fields_quantity);
+
+                    for (field_name, info) in variable_info.fields.iter_mut() {
+                        did_change |= info.quantity.union(
+                            production_field_quantities
+                                .get(field_name)
+                                .cloned()
+                                .unwrap_or(ChildQuantity::zero()),
+                        );
                     }
                 }
             }
 
-            // Move this variable's info back into the vector.
             result[i] = variable_info;
         }
+
+        all_initialized = true;
     }
 
     for supertype_symbol in &syntax_grammar.supertype_symbols {
-        let variable = &syntax_grammar.variables[supertype_symbol.index];
-        if variable.kind != VariableType::Hidden {
-            return Err(Error::grammar(&format!(
-                "Supertype symbols must be hidden, but `{}` is not",
-                variable.name
-            )));
-        }
-
         if result[supertype_symbol.index].has_multi_step_production {
+            let variable = &syntax_grammar.variables[supertype_symbol.index];
             return Err(Error::grammar(&format!(
                 "Supertype symbols must always have a single visible child, but `{}` can have multiple",
                 variable.name
@@ -295,178 +334,31 @@ pub(crate) fn get_variable_info(
         }
     }
 
-    let child_type_is_visible = |t: &ChildType| {
-        variable_type_for_child_type(t, syntax_grammar, lexical_grammar) >= VariableType::Anonymous
-    };
-
+    // Update all of the node type lists to eliminate hidden nodes.
     for supertype_symbol in &syntax_grammar.supertype_symbols {
         result[supertype_symbol.index]
-            .child_types
+            .children
+            .types
             .retain(child_type_is_visible);
     }
-
-    for i in 0..result.len() {
-        let mut variable_info = VariableInfo::default();
-        mem::swap(&mut variable_info, &mut result[i]);
-
-        // For each field, make the `types` list more concise by replacing sets of
-        // subtypes with a single supertype.
+    for variable_info in result.iter_mut() {
         for (_, field_info) in variable_info.fields.iter_mut() {
-            for supertype_symbol in &syntax_grammar.supertype_symbols {
-                sorted_vec_replace(
-                    &mut field_info.types,
-                    &result[supertype_symbol.index].child_types,
-                    ChildType::Normal(*supertype_symbol),
-                );
-            }
-
             field_info.types.retain(child_type_is_visible);
         }
-
-        for supertype_symbol in &syntax_grammar.supertype_symbols {
-            sorted_vec_replace(
-                &mut variable_info.children_without_fields.types,
-                &result[supertype_symbol.index].child_types,
-                ChildType::Normal(*supertype_symbol),
-            );
-        }
-
+        variable_info.fields.retain(|_, v| !v.types.is_empty());
         variable_info
             .children_without_fields
             .types
             .retain(child_type_is_visible);
-
-        result[i] = variable_info;
     }
 
     Ok(result)
 }
 
-// Summarize information about this variable's possible children by walking
-// all of its productions.
-fn get_all_child_steps(
-    variable: &SyntaxVariable,
-    inlines: &InlinedProductionMap,
-    output: &mut Vec<ProductionStep>,
-) -> usize {
-    // For each of the given variable's productions, insert all of the reachable steps
-    // into the output vector, and return the longest possible production length.
-    return variable
-        .productions
-        .iter()
-        .map(|p| process_production(inlines, p, 0, output))
-        .max()
-        .unwrap_or(0);
-
-    // For the given production suffix, add all of the remaining steps into the output
-    // vector and return the longest possible production length.
-    fn process_production(
-        inlines: &InlinedProductionMap,
-        production: &Production,
-        step_index: usize,
-        output: &mut Vec<ProductionStep>,
-    ) -> usize {
-        let mut max_length = production.steps.len();
-
-        // Process each of the remaining steps of the production.
-        for (i, step) in production.steps.iter().enumerate().skip(step_index) {
-            // If this step is inlined, then process the corresponding suffixes of
-            // all of the inlined productions instead.
-            if let Some(inlined_productions) = inlines.inlined_productions(production, i as u32) {
-                for inlined_production in inlined_productions {
-                    let length = process_production(inlines, inlined_production, i, output);
-                    if length > max_length {
-                        max_length = length;
-                    }
-                }
-                break;
-            }
-
-            // Otherwise, insert this step into the output vector unless it is already
-            // present.
-            if let Err(i) = output.binary_search(step) {
-                output.insert(i, step.clone());
-            }
-        }
-
-        return max_length;
-    }
-}
-
-fn variable_type_for_child_type(
-    child_type: &ChildType,
-    syntax_grammar: &SyntaxGrammar,
-    lexical_grammar: &LexicalGrammar,
-) -> VariableType {
-    match child_type {
-        ChildType::Aliased(alias) => {
-            if alias.is_named {
-                VariableType::Named
-            } else {
-                VariableType::Anonymous
-            }
-        }
-        ChildType::Normal(symbol) => {
-            if syntax_grammar.supertype_symbols.contains(&symbol) {
-                return VariableType::Named;
-            } else {
-                match symbol.kind {
-                    SymbolType::NonTerminal => syntax_grammar.variables[symbol.index].kind,
-                    SymbolType::Terminal => lexical_grammar.variables[symbol.index].kind,
-                    SymbolType::External => syntax_grammar.external_tokens[symbol.index].kind,
-                    _ => VariableType::Hidden,
-                }
-            }
-        }
-    }
-}
-
-fn sorted_vec_replace<T>(left: &mut Vec<T>, right: &Vec<T>, value: T) -> bool
-where
-    T: Eq + Ord,
-{
-    if left.len() == 0 {
-        return false;
-    }
-
-    let mut i = 0;
-    for right_elem in right.iter() {
-        while left[i] < *right_elem {
-            i += 1;
-            if i == left.len() {
-                return false;
-            }
-        }
-        if left[i] != *right_elem {
-            return false;
-        }
-    }
-
-    i = 0;
-    left.retain(|left_elem| {
-        if i == right.len() {
-            return true;
-        }
-        while right[i] < *left_elem {
-            i += 1;
-            if i == right.len() {
-                return true;
-            }
-        }
-        right[i] != *left_elem
-    });
-
-    if let Err(i) = left.binary_search(&value) {
-        left.insert(i, value);
-    }
-
-    true
-}
-
 pub(crate) fn generate_node_types_json(
     syntax_grammar: &SyntaxGrammar,
     lexical_grammar: &LexicalGrammar,
-    simple_aliases: &AliasMap,
+    default_aliases: &AliasMap,
     variable_info: &Vec<VariableInfo>,
 ) -> Vec<NodeInfoJSON> {
     let mut node_types_json = BTreeMap::new();
@@ -477,7 +369,7 @@ pub(crate) fn generate_node_types_json(
             named: alias.is_named,
         },
         ChildType::Normal(symbol) => {
-            if let Some(alias) = simple_aliases.get(&symbol) {
+            if let Some(alias) = default_aliases.get(&symbol) {
                 NodeTypeJSON {
                     kind: alias.value.clone(),
                     named: alias.is_named,
@@ -511,105 +403,303 @@ pub(crate) fn generate_node_types_json(
         }
     };
 
+    let populate_field_info_json = |json: &mut FieldInfoJSON, info: &FieldInfo| {
+        if info.types.len() > 0 {
+            json.multiple |= info.quantity.multiple;
+            json.required &= info.quantity.required;
+            json.types
+                .extend(info.types.iter().map(child_type_to_node_type));
+            json.types.sort_unstable();
+            json.types.dedup();
+        } else {
+            json.required = false;
+        }
+    };
+
+    let mut aliases_by_symbol = HashMap::new();
+    for (symbol, alias) in default_aliases {
+        aliases_by_symbol.insert(*symbol, {
+            let mut aliases = HashSet::new();
+            aliases.insert(Some(alias.clone()));
+            aliases
+        });
+    }
+    for extra_symbol in &syntax_grammar.extra_symbols {
+        if !default_aliases.contains_key(extra_symbol) {
+            aliases_by_symbol
+                .entry(*extra_symbol)
+                .or_insert(HashSet::new())
+                .insert(None);
+        }
+    }
+    for variable in &syntax_grammar.variables {
+        for production in &variable.productions {
+            for step in &production.steps {
+                aliases_by_symbol
+                    .entry(step.symbol)
+                    .or_insert(HashSet::new())
+                    .insert(
+                        step.alias
+                            .as_ref()
+                            .or_else(|| default_aliases.get(&step.symbol))
+                            .cloned(),
+                    );
+            }
+        }
+    }
+    aliases_by_symbol.insert(Symbol::non_terminal(0), [None].iter().cloned().collect());
+
+    let mut subtype_map = HashMap::new();
     for (i, info) in variable_info.iter().enumerate() {
         let symbol = Symbol::non_terminal(i);
         let variable = &syntax_grammar.variables[i];
-        let name = simple_aliases
-            .get(&Symbol::non_terminal(i))
-            .map_or(&variable.name, |alias| &alias.value);
-
         if syntax_grammar.supertype_symbols.contains(&symbol) {
             let node_type_json =
                 node_types_json
-                    .entry(name.clone())
+                    .entry(variable.name.clone())
                     .or_insert_with(|| NodeInfoJSON {
-                        kind: name.clone(),
+                        kind: variable.name.clone(),
                         named: true,
                         fields: None,
                         children: None,
                         subtypes: None,
                     });
             let mut subtypes = info
-                .child_types
-                .iter()
-                .map(child_type_to_node_type)
-                .collect::<Vec<_>>();
-            subtypes.sort_unstable();
-            subtypes.dedup();
-            node_type_json.subtypes = Some(subtypes);
-        } else if variable.kind.is_visible()
-            && !syntax_grammar.variables_to_inline.contains(&symbol)
-        {
-            let node_type_json =
-                node_types_json
-                    .entry(name.clone())
-                    .or_insert_with(|| NodeInfoJSON {
-                        kind: name.clone(),
-                        named: true,
-                        fields: None,
-                        children: None,
-                        subtypes: None,
-                    });
-            let mut fields_json = BTreeMap::new();
-            for (field, field_info) in info.fields.iter() {
-                let field_info_json = fields_json.entry(field.clone()).or_insert(FieldInfoJSON {
-                    multiple: false,
-                    required: true,
-                    types: Vec::new(),
-                });
-
-                field_info_json.multiple |= field_info.multiple;
-                field_info_json.required &= field_info.required;
-                field_info_json
-                    .types
-                    .extend(field_info.types.iter().map(child_type_to_node_type));
-                field_info_json.types.sort_unstable();
-                field_info_json.types.dedup();
-            }
-            node_type_json.fields = Some(fields_json);
-            let mut children_types = info
-                .children_without_fields
+                .children
                 .types
                 .iter()
                 .map(child_type_to_node_type)
                 .collect::<Vec<_>>();
-            if children_types.len() > 0 {
-                children_types.sort_unstable();
-                children_types.dedup();
-                node_type_json.children = Some(FieldInfoJSON {
-                    multiple: info.children_without_fields.multiple,
-                    required: info.children_without_fields.required,
-                    types: children_types,
+            subtype_map.insert(
+                NodeTypeJSON {
+                    kind: node_type_json.kind.clone(),
+                    named: true,
+                },
+                subtypes.clone(),
+            );
+            subtypes.sort_unstable();
+            subtypes.dedup();
+            node_type_json.subtypes = Some(subtypes);
+        } else if !syntax_grammar.variables_to_inline.contains(&symbol) {
+            // If a rule is aliased under multiple names, then its information
+            // contributes to multiple entries in the final JSON.
+            for alias in aliases_by_symbol
+                .get(&Symbol::non_terminal(i))
+                .unwrap_or(&HashSet::new())
+            {
+                let kind;
+                let is_named;
+                if let Some(alias) = alias {
+                    kind = &alias.value;
+                    is_named = alias.is_named;
+                } else if variable.kind.is_visible() {
+                    kind = &variable.name;
+                    is_named = variable.kind == VariableType::Named;
+                } else {
+                    continue;
+                }
+
+                // There may already be an entry with this name, because multiple
+                // rules may be aliased with the same name.
+                let mut node_type_existed = true;
+                let node_type_json = node_types_json.entry(kind.clone()).or_insert_with(|| {
+                    node_type_existed = false;
+                    NodeInfoJSON {
+                        kind: kind.clone(),
+                        named: is_named,
+                        fields: Some(BTreeMap::new()),
+                        children: None,
+                        subtypes: None,
+                    }
                 });
+
+                let fields_json = node_type_json.fields.as_mut().unwrap();
+                for (new_field, field_info) in info.fields.iter() {
+                    let field_json = fields_json.entry(new_field.clone()).or_insert_with(|| {
+                        // If another rule is aliased with the same name, and does *not* have this field,
+                        // then this field cannot be required.
+                        let mut field_json = FieldInfoJSON::default();
+                        if node_type_existed {
+                            field_json.required = false;
+                        }
+                        field_json
+                    });
+                    populate_field_info_json(field_json, field_info);
+                }
+
+                // If another rule is aliased with the same name, any fields that aren't present in this
+                // cannot be required.
+                for (existing_field, field_json) in fields_json.iter_mut() {
+                    if !info.fields.contains_key(existing_field) {
+                        field_json.required = false;
+                    }
+                }
+
+                populate_field_info_json(
+                    node_type_json
+                        .children
+                        .get_or_insert(FieldInfoJSON::default()),
+                    &info.children_without_fields,
+                );
             }
         }
     }
 
-    let mut result = node_types_json.into_iter().map(|e| e.1).collect::<Vec<_>>();
+    for (_, node_type_json) in node_types_json.iter_mut() {
+        if node_type_json
+            .children
+            .as_ref()
+            .map_or(false, |c| c.types.is_empty())
+        {
+            node_type_json.children = None;
+        }
 
-    for variable in &lexical_grammar.variables {
-        if variable.kind == VariableType::Named {
-            result.push(NodeInfoJSON {
-                kind: variable.name.clone(),
-                named: true,
-                fields: None,
-                children: None,
-                subtypes: None,
+        if let Some(children) = &mut node_type_json.children {
+            process_supertypes(children, &subtype_map);
+        }
+        if let Some(fields) = &mut node_type_json.fields {
+            for (_, field_info) in fields.iter_mut() {
+                process_supertypes(field_info, &subtype_map);
+            }
+        }
+    }
+
+    let mut anonymous_node_types = Vec::new();
+
+    let empty = HashSet::new();
+    let regular_tokens = lexical_grammar
+        .variables
+        .iter()
+        .enumerate()
+        .flat_map(|(i, variable)| {
+            aliases_by_symbol
+                .get(&Symbol::terminal(i))
+                .unwrap_or(&empty)
+                .iter()
+                .map(move |alias| {
+                    if let Some(alias) = alias {
+                        (&alias.value, alias.kind())
+                    } else {
+                        (&variable.name, variable.kind)
+                    }
+                })
+        });
+    let external_tokens =
+        syntax_grammar
+            .external_tokens
+            .iter()
+            .enumerate()
+            .flat_map(|(i, token)| {
+                aliases_by_symbol
+                    .get(&Symbol::external(i))
+                    .unwrap_or(&empty)
+                    .iter()
+                    .map(move |alias| {
+                        if let Some(alias) = alias {
+                            (&alias.value, alias.kind())
+                        } else {
+                            (&token.name, token.kind)
+                        }
+                    })
             });
-        } else if variable.kind == VariableType::Anonymous {
-            result.push(NodeInfoJSON {
-                kind: variable.name.clone(),
+
+    for (name, kind) in regular_tokens.chain(external_tokens) {
+        match kind {
+            VariableType::Named => {
+                let node_type_json = node_types_json.entry(name.clone()).or_insert(NodeInfoJSON {
+                    kind: name.clone(),
+                    named: true,
+                    fields: None,
+                    children: None,
+                    subtypes: None,
+                });
+                if let Some(children) = &mut node_type_json.children {
+                    children.required = false;
+                }
+                if let Some(fields) = &mut node_type_json.fields {
+                    for (_, field) in fields.iter_mut() {
+                        field.required = false;
+                    }
+                }
+            }
+            VariableType::Anonymous => anonymous_node_types.push(NodeInfoJSON {
+                kind: name.clone(),
                 named: false,
                 fields: None,
                 children: None,
                 subtypes: None,
-            });
+            }),
+            _ => {}
         }
     }
 
+    let mut result = node_types_json.into_iter().map(|e| e.1).collect::<Vec<_>>();
+    result.extend(anonymous_node_types.into_iter());
+    result.sort_unstable_by(|a, b| {
+        b.subtypes
+            .is_some()
+            .cmp(&a.subtypes.is_some())
+            .then_with(|| {
+                let a_is_leaf = a.children.is_none() && a.fields.is_none();
+                let b_is_leaf = b.children.is_none() && b.fields.is_none();
+                a_is_leaf.cmp(&b_is_leaf)
+            })
+            .then_with(|| a.kind.cmp(&b.kind))
+    });
+    result.dedup();
     result
 }
 
+fn process_supertypes(
+    info: &mut FieldInfoJSON,
+    subtype_map: &HashMap<NodeTypeJSON, Vec<NodeTypeJSON>>,
+) {
+    for (supertype, subtypes) in subtype_map {
+        if info.types.contains(supertype) {
+            info.types.retain(|t| !subtypes.contains(t));
+        }
+    }
+}
+
+fn variable_type_for_child_type(
+    child_type: &ChildType,
+    syntax_grammar: &SyntaxGrammar,
+    lexical_grammar: &LexicalGrammar,
+) -> VariableType {
+    match child_type {
+        ChildType::Aliased(alias) => alias.kind(),
+        ChildType::Normal(symbol) => {
+            if syntax_grammar.supertype_symbols.contains(&symbol) {
+                VariableType::Named
+            } else if syntax_grammar.variables_to_inline.contains(&symbol) {
+                VariableType::Hidden
+            } else {
+                match symbol.kind {
+                    SymbolType::NonTerminal => syntax_grammar.variables[symbol.index].kind,
+                    SymbolType::Terminal => lexical_grammar.variables[symbol.index].kind,
+                    SymbolType::External => syntax_grammar.external_tokens[symbol.index].kind,
+                    _ => VariableType::Hidden,
+                }
+            }
+        }
+    }
+}
+
+fn extend_sorted<'a, T>(vec: &mut Vec<T>, values: impl IntoIterator<Item = &'a T>) -> bool
+where
+    T: Clone + Eq + Ord,
+    T: 'a,
+{
+    values.into_iter().any(|value| {
+        if let Err(i) = vec.binary_search(&value) {
+            vec.insert(i, value.clone());
+            true
+        } else {
+            false
+        }
+    })
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -623,7 +713,7 @@ mod tests {
     fn test_node_types_simple() {
         let node_types = get_node_types(InputGrammar {
             name: String::new(),
-            extra_tokens: Vec::new(),
+            extra_symbols: Vec::new(),
             external_tokens: Vec::new(),
             expected_conflicts: Vec::new(),
             variables_to_inline: Vec::new(),
@@ -643,9 +733,18 @@ mod tests {
                     kind: VariableType::Named,
                     rule: Rule::string("x"),
                 },
+                // This rule is not reachable from the start symbol
+                // so it won't be present in the node_types
+                Variable {
+                    name: "v3".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::string("y"),
+                },
             ],
         });
 
+        assert_eq!(node_types.len(), 3);
+
         assert_eq!(
             node_types[0],
             NodeInfoJSON {
@@ -705,11 +804,117 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_node_types_simple_extras() {
+        let node_types = get_node_types(InputGrammar {
+            name: String::new(),
+            extra_symbols: vec![Rule::named("v3")],
+            external_tokens: Vec::new(),
+            expected_conflicts: Vec::new(),
+            variables_to_inline: Vec::new(),
+            word_token: None,
+            supertype_symbols: vec![],
+            variables: vec![
+                Variable {
+                    name: "v1".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::seq(vec![
+                        Rule::field("f1".to_string(), Rule::named("v2")),
+                        Rule::field("f2".to_string(), Rule::string(";")),
+                    ]),
+                },
+                Variable {
+                    name: "v2".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::string("x"),
+                },
+                // This rule is not reachable from the start symbol, but
+                // it is reachable from the 'extra_symbols' so it
+                // should be present in the node_types
+                Variable {
+                    name: "v3".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::string("y"),
+                },
+            ],
+        });
+
+        assert_eq!(node_types.len(), 4);
+
+        assert_eq!(
+            node_types[0],
+            NodeInfoJSON {
+                kind: "v1".to_string(),
+                named: true,
+                subtypes: None,
+                children: None,
+                fields: Some(
+                    vec![
+                        (
+                            "f1".to_string(),
+                            FieldInfoJSON {
+                                multiple: false,
+                                required: true,
+                                types: vec![NodeTypeJSON {
+                                    kind: "v2".to_string(),
+                                    named: true,
+                                }]
+                            }
+                        ),
+                        (
+                            "f2".to_string(),
+                            FieldInfoJSON {
+                                multiple: false,
+                                required: true,
+                                types: vec![NodeTypeJSON {
+                                    kind: ";".to_string(),
+                                    named: false,
+                                }]
+                            }
+                        ),
+                    ]
+                    .into_iter()
+                    .collect()
+                )
+            }
+        );
+        assert_eq!(
+            node_types[1],
+            NodeInfoJSON {
+                kind: ";".to_string(),
+                named: false,
+                subtypes: None,
+                children: None,
+                fields: None
+            }
+        );
+        assert_eq!(
+            node_types[2],
+            NodeInfoJSON {
+                kind: "v2".to_string(),
+                named: true,
+                subtypes: None,
+                children: None,
+                fields: None
+            }
+        );
+        assert_eq!(
+            node_types[3],
+            NodeInfoJSON {
+                kind: "v3".to_string(),
+                named: true,
+                subtypes: None,
+                children: None,
+                fields: None
+            }
+        );
+    }
+
     #[test]
     fn test_node_types_with_supertypes() {
         let node_types = get_node_types(InputGrammar {
             name: String::new(),
-            extra_tokens: Vec::new(),
+            extra_symbols: Vec::new(),
             external_tokens: Vec::new(),
             expected_conflicts: Vec::new(),
             variables_to_inline: Vec::new(),
@@ -796,7 +1001,7 @@ mod tests {
     fn test_node_types_for_children_without_fields() {
         let node_types = get_node_types(InputGrammar {
             name: String::new(),
-            extra_tokens: Vec::new(),
+            extra_symbols: Vec::new(),
             external_tokens: Vec::new(),
             expected_conflicts: Vec::new(),
             variables_to_inline: Vec::new(),
@@ -890,6 +1095,421 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_node_types_with_inlined_rules() {
+        let node_types = get_node_types(InputGrammar {
+            name: String::new(),
+            word_token: None,
+            extra_symbols: Vec::new(),
+            external_tokens: Vec::new(),
+            expected_conflicts: Vec::new(),
+            variables_to_inline: vec!["v2".to_string()],
+            supertype_symbols: vec![],
+            variables: vec![
+                Variable {
+                    name: "v1".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::seq(vec![Rule::named("v2"), Rule::named("v3")]),
+                },
+                // v2 should not appear in the node types, since it is inlined
+                Variable {
+                    name: "v2".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::alias(Rule::string("a"), "x".to_string(), true),
+                },
+                Variable {
+                    name: "v3".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::string("b"),
+                },
+            ],
+        });
+
+        assert_eq!(
+            node_types[0],
+            NodeInfoJSON {
+                kind: "v1".to_string(),
+                named: true,
+                subtypes: None,
+                children: Some(FieldInfoJSON {
+                    multiple: true,
+                    required: true,
+                    types: vec![
+                        NodeTypeJSON {
+                            kind: "v3".to_string(),
+                            named: true,
+                        },
+                        NodeTypeJSON {
+                            kind: "x".to_string(),
+                            named: true,
+                        },
+                    ]
+                }),
+                fields: Some(BTreeMap::new()),
+            }
+        );
+    }
+
+    #[test]
+    fn test_node_types_for_aliased_nodes() {
+        let node_types = get_node_types(InputGrammar {
+            name: String::new(),
+            extra_symbols: Vec::new(),
+            external_tokens: Vec::new(),
+            expected_conflicts: Vec::new(),
+            variables_to_inline: Vec::new(),
+            word_token: None,
+            supertype_symbols: vec![],
+            variables: vec![
+                Variable {
+                    name: "thing".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::choice(vec![Rule::named("type"), Rule::named("expression")]),
+                },
+                Variable {
+                    name: "type".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::choice(vec![
+                        Rule::alias(
+                            Rule::named("identifier"),
+                            "type_identifier".to_string(),
+                            true,
+                        ),
+                        Rule::string("void"),
+                    ]),
+                },
+                Variable {
+                    name: "expression".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::choice(vec![
+                        Rule::named("identifier"),
+                        Rule::alias(
+                            Rule::named("foo_identifier"),
+                            "identifier".to_string(),
+                            true,
+                        ),
+                    ]),
+                },
+                Variable {
+                    name: "identifier".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::pattern("\\w+"),
+                },
+                Variable {
+                    name: "foo_identifier".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::pattern("[\\w-]+"),
+                },
+            ],
+        });
+
+        assert_eq!(node_types.iter().find(|t| t.kind == "foo_identifier"), None);
+        assert_eq!(
+            node_types.iter().find(|t| t.kind == "identifier"),
+            Some(&NodeInfoJSON {
+                kind: "identifier".to_string(),
+                named: true,
+                subtypes: None,
+                children: None,
+                fields: None,
+            })
+        );
+        assert_eq!(
+            node_types.iter().find(|t| t.kind == "type_identifier"),
+            Some(&NodeInfoJSON {
+                kind: "type_identifier".to_string(),
+                named: true,
+                subtypes: None,
+                children: None,
+                fields: None,
+            })
+        );
+    }
+
+    #[test]
+    fn test_node_types_with_multiple_valued_fields() {
+        let node_types = get_node_types(InputGrammar {
+            name: String::new(),
+            extra_symbols: Vec::new(),
+            external_tokens: Vec::new(),
+            expected_conflicts: Vec::new(),
+            variables_to_inline: Vec::new(),
+            word_token: None,
+            supertype_symbols: vec![],
+            variables: vec![
+                Variable {
+                    name: "a".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::seq(vec![
+                        Rule::choice(vec![
+                            Rule::Blank,
+                            Rule::repeat(Rule::field("f1".to_string(), Rule::named("b"))),
+                        ]),
+                        Rule::repeat(Rule::named("c")),
+                    ]),
+                },
+                Variable {
+                    name: "b".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::string("b"),
+                },
+                Variable {
+                    name: "c".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::string("c"),
+                },
+            ],
+        });
+
+        assert_eq!(
+            node_types[0],
+            NodeInfoJSON {
+                kind: "a".to_string(),
+                named: true,
+                subtypes: None,
+                children: Some(FieldInfoJSON {
+                    multiple: true,
+                    required: true,
+                    types: vec![NodeTypeJSON {
+                        kind: "c".to_string(),
+                        named: true,
+                    },]
+                }),
+                fields: Some(
+                    vec![(
+                        "f1".to_string(),
+                        FieldInfoJSON {
+                            multiple: true,
+                            required: false,
+                            types: vec![NodeTypeJSON {
+                                kind: "b".to_string(),
+                                named: true,
+                            }]
+                        }
+                    )]
+                    .into_iter()
+                    .collect()
+                ),
+            }
+        );
+    }
+
+    #[test]
+    fn test_node_types_with_fields_on_hidden_tokens() {
+        let node_types = get_node_types(InputGrammar {
+            name: String::new(),
+            extra_symbols: Vec::new(),
+            external_tokens: Vec::new(),
+            expected_conflicts: Vec::new(),
+            variables_to_inline: Vec::new(),
+            word_token: None,
+            supertype_symbols: vec![],
+            variables: vec![Variable {
+                name: "script".to_string(),
+                kind: VariableType::Named,
+                rule: Rule::seq(vec![
+                    Rule::field("a".to_string(), Rule::pattern("hi")),
+                    Rule::field("b".to_string(), Rule::pattern("bye")),
+                ]),
+            }],
+        });
+
+        assert_eq!(
+            node_types,
+            [NodeInfoJSON {
+                kind: "script".to_string(),
+                named: true,
+                fields: Some(BTreeMap::new()),
+                children: None,
+                subtypes: None
+            }]
+        );
+    }
+
+    #[test]
+    fn test_node_types_with_multiple_rules_same_alias_name() {
+        let node_types = get_node_types(InputGrammar {
+            name: String::new(),
+            extra_symbols: Vec::new(),
+            external_tokens: Vec::new(),
+            expected_conflicts: Vec::new(),
+            variables_to_inline: Vec::new(),
+            word_token: None,
+            supertype_symbols: vec![],
+            variables: vec![
+                Variable {
+                    name: "script".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::choice(vec![
+                        Rule::named("a"),
+                        // Rule `b` is aliased as rule `a`
+                        Rule::alias(Rule::named("b"), "a".to_string(), true),
+                    ]),
+                },
+                Variable {
+                    name: "a".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::seq(vec![
+                        Rule::field("f1".to_string(), Rule::string("1")),
+                        Rule::field("f2".to_string(), Rule::string("2")),
+                    ]),
+                },
+                Variable {
+                    name: "b".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::seq(vec![
+                        Rule::field("f2".to_string(), Rule::string("22")),
+                        Rule::field("f2".to_string(), Rule::string("222")),
+                        Rule::field("f3".to_string(), Rule::string("3")),
+                    ]),
+                },
+            ],
+        });
+
+        assert_eq!(
+            &node_types
+                .iter()
+                .map(|t| t.kind.as_str())
+                .collect::<Vec<_>>(),
+            &["a", "script", "1", "2", "22", "222", "3"]
+        );
+
+        assert_eq!(
+            &node_types[0..2],
+            &[
+                // A combination of the types for `a` and `b`.
+                NodeInfoJSON {
+                    kind: "a".to_string(),
+                    named: true,
+                    subtypes: None,
+                    children: None,
+                    fields: Some(
+                        vec![
+                            (
+                                "f1".to_string(),
+                                FieldInfoJSON {
+                                    multiple: false,
+                                    required: false,
+                                    types: vec![NodeTypeJSON {
+                                        kind: "1".to_string(),
+                                        named: false,
+                                    }]
+                                }
+                            ),
+                            (
+                                "f2".to_string(),
+                                FieldInfoJSON {
+                                    multiple: true,
+                                    required: true,
+                                    types: vec![
+                                        NodeTypeJSON {
+                                            kind: "2".to_string(),
+                                            named: false,
+                                        },
+                                        NodeTypeJSON {
+                                            kind: "22".to_string(),
+                                            named: false,
+                                        },
+                                        NodeTypeJSON {
+                                            kind: "222".to_string(),
+                                            named: false,
+                                        }
+                                    ]
+                                },
+                            ),
+                            (
+                                "f3".to_string(),
+                                FieldInfoJSON {
+                                    multiple: false,
+                                    required: false,
+                                    types: vec![NodeTypeJSON {
+                                        kind: "3".to_string(),
+                                        named: false,
+                                    }]
+                                }
+                            ),
+                        ]
+                        .into_iter()
+                        .collect()
+                    ),
+                },
+                NodeInfoJSON {
+                    kind: "script".to_string(),
+                    named: true,
+                    subtypes: None,
+                    // Only one node
+                    children: Some(FieldInfoJSON {
+                        multiple: false,
+                        required: true,
+                        types: vec![NodeTypeJSON {
+                            kind: "a".to_string(),
+                            named: true,
+                        }]
+                    }),
+                    fields: Some(BTreeMap::new()),
+                }
+            ]
+        );
+    }
+
+    #[test]
+    fn test_node_types_with_tokens_aliased_to_match_rules() {
+        let node_types = get_node_types(InputGrammar {
+            name: String::new(),
+            extra_symbols: Vec::new(),
+            external_tokens: Vec::new(),
+            expected_conflicts: Vec::new(),
+            variables_to_inline: Vec::new(),
+            word_token: None,
+            supertype_symbols: vec![],
+            variables: vec![
+                Variable {
+                    name: "a".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::seq(vec![Rule::named("b"), Rule::named("c")]),
+                },
+                // Ordinarily, `b` nodes have two named `c` children.
+                Variable {
+                    name: "b".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::seq(vec![Rule::named("c"), Rule::string("B"), Rule::named("c")]),
+                },
+                Variable {
+                    name: "c".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::choice(vec![
+                        Rule::string("C"),
+                        // This token is aliased as a `b`, which will produce a `b` node
+                        // with no children.
+                        Rule::alias(Rule::string("D"), "b".to_string(), true),
+                    ]),
+                },
+            ],
+        });
+
+        assert_eq!(
+            node_types.iter().map(|n| &n.kind).collect::<Vec<_>>(),
+            &["a", "b", "c", "B", "C"]
+        );
+        assert_eq!(
+            node_types[1],
+            NodeInfoJSON {
+                kind: "b".to_string(),
+                named: true,
+                subtypes: None,
+                children: Some(FieldInfoJSON {
+                    multiple: true,
+                    required: false,
+                    types: vec![NodeTypeJSON {
+                        kind: "c".to_string(),
+                        named: true,
+                    }]
+                }),
+                fields: Some(BTreeMap::new()),
+            }
+        );
+    }
+
     #[test]
     fn test_get_variable_info() {
         let variable_info = get_variable_info(
@@ -948,7 +1568,7 @@ mod tests {
                 vec![],
             ),
             &build_lexical_grammar(),
-            &InlinedProductionMap::default(),
+            &AliasMap::new(),
         )
         .unwrap();
 
@@ -957,8 +1577,11 @@ mod tests {
             vec![(
                 "field1".to_string(),
                 FieldInfo {
-                    required: true,
-                    multiple: false,
+                    quantity: ChildQuantity {
+                        exists: true,
+                        required: true,
+                        multiple: false,
+                    },
                     types: vec![ChildType::Normal(Symbol::terminal(1))],
                 }
             )]
@@ -971,8 +1594,11 @@ mod tests {
             vec![(
                 "field2".to_string(),
                 FieldInfo {
-                    required: false,
-                    multiple: false,
+                    quantity: ChildQuantity {
+                        exists: true,
+                        required: false,
+                        multiple: false,
+                    },
                     types: vec![
                         ChildType::Normal(Symbol::terminal(2)),
                         ChildType::Normal(Symbol::terminal(3)),
@@ -984,6 +1610,71 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_get_variable_info_with_repetitions_inside_fields() {
+        let variable_info = get_variable_info(
+            &build_syntax_grammar(
+                vec![
+                    // Field associated with a repetition.
+                    SyntaxVariable {
+                        name: "rule0".to_string(),
+                        kind: VariableType::Named,
+                        productions: vec![
+                            Production {
+                                dynamic_precedence: 0,
+                                steps: vec![ProductionStep::new(Symbol::non_terminal(1))
+                                    .with_field_name("field1")],
+                            },
+                            Production {
+                                dynamic_precedence: 0,
+                                steps: vec![],
+                            },
+                        ],
+                    },
+                    // Repetition node
+                    SyntaxVariable {
+                        name: "_rule0_repeat".to_string(),
+                        kind: VariableType::Hidden,
+                        productions: vec![
+                            Production {
+                                dynamic_precedence: 0,
+                                steps: vec![ProductionStep::new(Symbol::terminal(1))],
+                            },
+                            Production {
+                                dynamic_precedence: 0,
+                                steps: vec![
+                                    ProductionStep::new(Symbol::non_terminal(1)),
+                                    ProductionStep::new(Symbol::non_terminal(1)),
+                                ],
+                            },
+                        ],
+                    },
+                ],
+                vec![],
+            ),
+            &build_lexical_grammar(),
+            &AliasMap::new(),
+        )
+        .unwrap();
+
+        assert_eq!(
+            variable_info[0].fields,
+            vec![(
+                "field1".to_string(),
+                FieldInfo {
+                    quantity: ChildQuantity {
+                        exists: true,
+                        required: false,
+                        multiple: true,
+                    },
+                    types: vec![ChildType::Normal(Symbol::terminal(1))],
+                }
+            )]
+            .into_iter()
+            .collect::<HashMap<_, _>>()
+        );
+    }
+
     #[test]
     fn test_get_variable_info_with_inherited_fields() {
         let variable_info = get_variable_info(
@@ -992,14 +1683,20 @@ mod tests {
                     SyntaxVariable {
                         name: "rule0".to_string(),
                         kind: VariableType::Named,
-                        productions: vec![Production {
-                            dynamic_precedence: 0,
-                            steps: vec![
-                                ProductionStep::new(Symbol::terminal(0)),
-                                ProductionStep::new(Symbol::non_terminal(1)),
-                                ProductionStep::new(Symbol::terminal(1)),
-                            ],
-                        }],
+                        productions: vec![
+                            Production {
+                                dynamic_precedence: 0,
+                                steps: vec![
+                                    ProductionStep::new(Symbol::terminal(0)),
+                                    ProductionStep::new(Symbol::non_terminal(1)),
+                                    ProductionStep::new(Symbol::terminal(1)),
+                                ],
+                            },
+                            Production {
+                                dynamic_precedence: 0,
+                                steps: vec![ProductionStep::new(Symbol::non_terminal(1))],
+                            },
+                        ],
                     },
                     // Hidden node with fields
                     SyntaxVariable {
@@ -1008,7 +1705,7 @@ mod tests {
                         productions: vec![Production {
                             dynamic_precedence: 0,
                             steps: vec![
-                                ProductionStep::new(Symbol::terminal(2)),
+                                ProductionStep::new(Symbol::terminal(2)).with_alias(".", false),
                                 ProductionStep::new(Symbol::terminal(3)).with_field_name("field1"),
                             ],
                         }],
@@ -1017,7 +1714,7 @@ mod tests {
                 vec![],
             ),
             &build_lexical_grammar(),
-            &InlinedProductionMap::default(),
+            &AliasMap::new(),
         )
         .unwrap();
 
@@ -1026,14 +1723,32 @@ mod tests {
             vec![(
                 "field1".to_string(),
                 FieldInfo {
-                    required: true,
-                    multiple: false,
+                    quantity: ChildQuantity {
+                        exists: true,
+                        required: true,
+                        multiple: false,
+                    },
                     types: vec![ChildType::Normal(Symbol::terminal(3))],
                 }
             )]
             .into_iter()
             .collect::<HashMap<_, _>>()
         );
+
+        assert_eq!(
+            variable_info[0].children_without_fields,
+            FieldInfo {
+                quantity: ChildQuantity {
+                    exists: true,
+                    required: false,
+                    multiple: true,
+                },
+                types: vec![
+                    ChildType::Normal(Symbol::terminal(0)),
+                    ChildType::Normal(Symbol::terminal(1)),
+                ],
+            }
+        );
     }
 
     #[test]
@@ -1073,7 +1788,7 @@ mod tests {
                 vec![Symbol::non_terminal(1)],
             ),
             &build_lexical_grammar(),
-            &InlinedProductionMap::default(),
+            &AliasMap::new(),
         )
         .unwrap();
 
@@ -1082,8 +1797,11 @@ mod tests {
             vec![(
                 "field1".to_string(),
                 FieldInfo {
-                    required: true,
-                    multiple: false,
+                    quantity: ChildQuantity {
+                        exists: true,
+                        required: true,
+                        multiple: false,
+                    },
                     types: vec![ChildType::Normal(Symbol::non_terminal(1))],
                 }
             )]
@@ -1093,18 +1811,14 @@ mod tests {
     }
 
     fn get_node_types(grammar: InputGrammar) -> Vec<NodeInfoJSON> {
-        let (syntax_grammar, lexical_grammar, _, simple_aliases) =
+        let (syntax_grammar, lexical_grammar, _, default_aliases) =
             prepare_grammar(&grammar).unwrap();
-        let variable_info = get_variable_info(
-            &syntax_grammar,
-            &lexical_grammar,
-            &InlinedProductionMap::default(),
-        )
-        .unwrap();
+        let variable_info =
+            get_variable_info(&syntax_grammar, &lexical_grammar, &default_aliases).unwrap();
         generate_node_types_json(
             &syntax_grammar,
             &lexical_grammar,
-            &simple_aliases,
+            &default_aliases,
             &variable_info,
         )
     }
diff --git a/cli/src/generate/parse_grammar.rs b/cli/src/generate/parse_grammar.rs
index feb560a9..c01dbd99 100644
--- a/cli/src/generate/parse_grammar.rs
+++ b/cli/src/generate/parse_grammar.rs
@@ -87,7 +87,7 @@ pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
         })
     }
 
-    let extra_tokens = grammar_json
+    let extra_symbols = grammar_json
         .extras
         .unwrap_or(Vec::new())
         .into_iter()
@@ -107,7 +107,7 @@ pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
         name: grammar_json.name,
         word_token: grammar_json.word,
         variables,
-        extra_tokens,
+        extra_symbols,
         expected_conflicts,
         external_tokens,
         supertype_symbols,
diff --git a/cli/src/generate/prepare_grammar/expand_repeats.rs b/cli/src/generate/prepare_grammar/expand_repeats.rs
index ccc83d97..0660f06e 100644
--- a/cli/src/generate/prepare_grammar/expand_repeats.rs
+++ b/cli/src/generate/prepare_grammar/expand_repeats.rs
@@ -283,7 +283,7 @@ mod tests {
     fn build_grammar(variables: Vec<Variable>) -> ExtractedSyntaxGrammar {
         ExtractedSyntaxGrammar {
             variables,
-            extra_tokens: Vec::new(),
+            extra_symbols: Vec::new(),
             external_tokens: Vec::new(),
             expected_conflicts: Vec::new(),
             variables_to_inline: Vec::new(),
diff --git a/cli/src/generate/prepare_grammar/extract_default_aliases.rs b/cli/src/generate/prepare_grammar/extract_default_aliases.rs
new file mode 100644
index 00000000..3e08e3ad
--- /dev/null
+++ b/cli/src/generate/prepare_grammar/extract_default_aliases.rs
@@ -0,0 +1,293 @@
+use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
+use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType};
+
+#[derive(Clone, Default)]
+struct SymbolStatus {
+    aliases: Vec<(Alias, usize)>,
+    appears_unaliased: bool,
+}
+
+// Update the grammar by finding symbols that always are aliased, and for each such symbol,
+// promoting one of its aliases to a "default alias", which is applied globally instead
+// of in a context-specific way.
+//
+// This has two benefits:
+// * It reduces the overhead of storing production-specific alias info in the parse table.
+// * Within an `ERROR` node, no context-specific aliases will be applied. This transformation
+//   ensures that the children of an `ERROR` node have symbols that are consistent with the
+//   way that they would appear in a valid syntax tree.
+pub(super) fn extract_default_aliases(
+    syntax_grammar: &mut SyntaxGrammar,
+    lexical_grammar: &LexicalGrammar,
+) -> AliasMap {
+    let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()];
+    let mut non_terminal_status_list =
+        vec![SymbolStatus::default(); syntax_grammar.variables.len()];
+    let mut external_status_list =
+        vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()];
+
+    // For each grammar symbol, find all of the aliases under which the symbol appears,
+    // and determine whether or not the symbol ever appears *unaliased*.
+    for variable in syntax_grammar.variables.iter() {
+        for production in variable.productions.iter() {
+            for step in production.steps.iter() {
+                let mut status = match step.symbol.kind {
+                    SymbolType::External => &mut external_status_list[step.symbol.index],
+                    SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index],
+                    SymbolType::Terminal => &mut terminal_status_list[step.symbol.index],
+                    SymbolType::End => panic!("Unexpected end token"),
+                };
+
+                // Default aliases don't work for inlined variables.
+                if syntax_grammar.variables_to_inline.contains(&step.symbol) {
+                    continue;
+                }
+
+                if let Some(alias) = &step.alias {
+                    if let Some(count_for_alias) = status
+                        .aliases
+                        .iter_mut()
+                        .find_map(|(a, count)| if a == alias { Some(count) } else { None })
+                    {
+                        *count_for_alias += 1;
+                    } else {
+                        status.aliases.push((alias.clone(), 1));
+                    }
+                } else {
+                    status.appears_unaliased = true;
+                }
+            }
+        }
+    }
+
+    let symbols_with_statuses = (terminal_status_list
+        .iter_mut()
+        .enumerate()
+        .map(|(i, status)| (Symbol::terminal(i), status)))
+    .chain(
+        non_terminal_status_list
+            .iter_mut()
+            .enumerate()
+            .map(|(i, status)| (Symbol::non_terminal(i), status)),
+    )
+    .chain(
+        external_status_list
+            .iter_mut()
+            .enumerate()
+            .map(|(i, status)| (Symbol::external(i), status)),
+    );
+
+    // For each symbol that always appears aliased, find the alias the occurs most often,
+    // and designate that alias as the symbol's "default alias". Store all of these
+    // default aliases in a map that will be returned.
+    let mut result = AliasMap::new();
+    for (symbol, status) in symbols_with_statuses {
+        if status.appears_unaliased {
+            status.aliases.clear();
+        } else {
+            if let Some(default_entry) = status
+                .aliases
+                .iter()
+                .enumerate()
+                .max_by_key(|(i, (_, count))| (count, -(*i as i64)))
+                .map(|(_, entry)| entry.clone())
+            {
+                status.aliases.clear();
+                status.aliases.push(default_entry.clone());
+                result.insert(symbol, default_entry.0);
+            }
+        }
+    }
+
+    // Wherever a symbol is aliased as its default alias, remove the usage of the alias,
+    // because it will now be redundant.
+    let mut alias_positions_to_clear = Vec::new();
+    for variable in syntax_grammar.variables.iter_mut() {
+        alias_positions_to_clear.clear();
+
+        for (i, production) in variable.productions.iter().enumerate() {
+            for (j, step) in production.steps.iter().enumerate() {
+                let status = match step.symbol.kind {
+                    SymbolType::External => &mut external_status_list[step.symbol.index],
+                    SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index],
+                    SymbolType::Terminal => &mut terminal_status_list[step.symbol.index],
+                    SymbolType::End => panic!("Unexpected end token"),
+                };
+
+                // If this step is aliased as the symbol's default alias, then remove that alias.
+                if step.alias.is_some()
+                    && step.alias.as_ref() == status.aliases.get(0).map(|t| &t.0)
+                {
+                    let mut other_productions_must_use_this_alias_at_this_index = false;
+                    for (other_i, other_production) in variable.productions.iter().enumerate() {
+                        if other_i != i
+                            && other_production.steps.len() > j
+                            && other_production.steps[j].alias == step.alias
+                            && result.get(&other_production.steps[j].symbol) != step.alias.as_ref()
+                        {
+                            other_productions_must_use_this_alias_at_this_index = true;
+                            break;
+                        }
+                    }
+
+                    if !other_productions_must_use_this_alias_at_this_index {
+                        alias_positions_to_clear.push((i, j));
+                    }
+                }
+            }
+        }
+
+        for (production_index, step_index) in &alias_positions_to_clear {
+            variable.productions[*production_index].steps[*step_index].alias = None;
+        }
+    }
+
+    result
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::generate::grammars::{
+        LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType,
+    };
+    use crate::generate::nfa::Nfa;
+
+    #[test]
+    fn test_extract_simple_aliases() {
+        let mut syntax_grammar = SyntaxGrammar {
+            variables: vec![
+                SyntaxVariable {
+                    name: "v1".to_owned(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![
+                            ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
+                            ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
+                            ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
+                            ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true),
+                        ],
+                    }],
+                },
+                SyntaxVariable {
+                    name: "v2".to_owned(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![
+                            // Token 0 is always aliased as "a1".
+                            ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
+                            // Token 1 is aliased within rule `v1` above, but not here.
+                            ProductionStep::new(Symbol::terminal(1)),
+                            // Token 2 is aliased differently here than in `v1`. The alias from
+                            // `v1` should be promoted to the default alias, because `v1` appears
+                            // first in the grammar.
+                            ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true),
+                            // Token 3 is also aliased differently here than in `v1`. In this case,
+                            // this alias should be promoted to the default alias, because it is
+                            // used a greater number of times (twice).
+                            ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true),
+                            ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true),
+                        ],
+                    }],
+                },
+            ],
+            extra_symbols: Vec::new(),
+            expected_conflicts: Vec::new(),
+            variables_to_inline: Vec::new(),
+            supertype_symbols: Vec::new(),
+            external_tokens: Vec::new(),
+            word_token: None,
+        };
+
+        let lexical_grammar = LexicalGrammar {
+            nfa: Nfa::new(),
+            variables: vec![
+                LexicalVariable {
+                    name: "t0".to_string(),
+                    kind: VariableType::Anonymous,
+                    implicit_precedence: 0,
+                    start_state: 0,
+                },
+                LexicalVariable {
+                    name: "t1".to_string(),
+                    kind: VariableType::Anonymous,
+                    implicit_precedence: 0,
+                    start_state: 0,
+                },
+                LexicalVariable {
+                    name: "t2".to_string(),
+                    kind: VariableType::Anonymous,
+                    implicit_precedence: 0,
+                    start_state: 0,
+                },
+                LexicalVariable {
+                    name: "t3".to_string(),
+                    kind: VariableType::Anonymous,
+                    implicit_precedence: 0,
+                    start_state: 0,
+                },
+            ],
+        };
+
+        let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar);
+        assert_eq!(default_aliases.len(), 3);
+
+        assert_eq!(
+            default_aliases.get(&Symbol::terminal(0)),
+            Some(&Alias {
+                value: "a1".to_string(),
+                is_named: true,
+            })
+        );
+        assert_eq!(
+            default_aliases.get(&Symbol::terminal(2)),
+            Some(&Alias {
+                value: "a3".to_string(),
+                is_named: true,
+            })
+        );
+        assert_eq!(
+            default_aliases.get(&Symbol::terminal(3)),
+            Some(&Alias {
+                value: "a6".to_string(),
+                is_named: true,
+            })
+        );
+        assert_eq!(default_aliases.get(&Symbol::terminal(1)), None);
+
+        assert_eq!(
+            syntax_grammar.variables,
+            vec![
+                SyntaxVariable {
+                    name: "v1".to_owned(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![
+                            ProductionStep::new(Symbol::terminal(0)),
+                            ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
+                            ProductionStep::new(Symbol::terminal(2)),
+                            ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true),
+                        ],
+                    },],
+                },
+                SyntaxVariable {
+                    name: "v2".to_owned(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![
+                            ProductionStep::new(Symbol::terminal(0)),
+                            ProductionStep::new(Symbol::terminal(1)),
+                            ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true),
+                            ProductionStep::new(Symbol::terminal(3)),
+                            ProductionStep::new(Symbol::terminal(3)),
+                        ],
+                    },],
+                },
+            ]
+        );
+    }
+}
diff --git a/cli/src/generate/prepare_grammar/extract_simple_aliases.rs b/cli/src/generate/prepare_grammar/extract_simple_aliases.rs
deleted file mode 100644
index 9a0b7fbb..00000000
--- a/cli/src/generate/prepare_grammar/extract_simple_aliases.rs
+++ /dev/null
@@ -1,223 +0,0 @@
-use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
-use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType};
-
-#[derive(Clone, Default)]
-struct SymbolStatus {
-    alias: Option<Alias>,
-    conflicting: bool,
-}
-
-pub(super) fn extract_simple_aliases(
-    syntax_grammar: &mut SyntaxGrammar,
-    lexical_grammar: &LexicalGrammar,
-) -> AliasMap {
-    // Determine which symbols in the grammars are *always* aliased to a single name.
-    let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()];
-    let mut non_terminal_status_list =
-        vec![SymbolStatus::default(); syntax_grammar.variables.len()];
-    let mut external_status_list =
-        vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()];
-    for variable in syntax_grammar.variables.iter() {
-        for production in variable.productions.iter() {
-            for step in production.steps.iter() {
-                let mut status = match step.symbol {
-                    Symbol {
-                        kind: SymbolType::External,
-                        index,
-                    } => &mut external_status_list[index],
-                    Symbol {
-                        kind: SymbolType::NonTerminal,
-                        index,
-                    } => &mut non_terminal_status_list[index],
-                    Symbol {
-                        kind: SymbolType::Terminal,
-                        index,
-                    } => &mut terminal_status_list[index],
-                    Symbol {
-                        kind: SymbolType::End,
-                        ..
-                    } => panic!("Unexpected end token"),
-                };
-
-                if step.alias.is_none() {
-                    status.alias = None;
-                    status.conflicting = true;
-                }
-
-                if !status.conflicting {
-                    if status.alias.is_none() {
-                        status.alias = step.alias.clone();
-                    } else if status.alias != step.alias {
-                        status.alias = None;
-                        status.conflicting = true;
-                    }
-                }
-            }
-        }
-    }
-
-    // Remove the aliases for those symbols.
-    for variable in syntax_grammar.variables.iter_mut() {
-        for production in variable.productions.iter_mut() {
-            for step in production.steps.iter_mut() {
-                let status = match step.symbol {
-                    Symbol {
-                        kind: SymbolType::External,
-                        index,
-                    } => &external_status_list[index],
-                    Symbol {
-                        kind: SymbolType::NonTerminal,
-                        index,
-                    } => &non_terminal_status_list[index],
-                    Symbol {
-                        kind: SymbolType::Terminal,
-                        index,
-                    } => &terminal_status_list[index],
-                    Symbol {
-                        kind: SymbolType::End,
-                        ..
-                    } => panic!("Unexpected end token"),
-                };
-
-                if status.alias.is_some() {
-                    step.alias = None;
-                }
-            }
-        }
-    }
-
-    // Populate a map of the symbols to their aliases.
-    let mut result = AliasMap::new();
-    for (i, status) in terminal_status_list.into_iter().enumerate() {
-        if let Some(alias) = status.alias {
-            result.insert(Symbol::terminal(i), alias);
-        }
-    }
-    for (i, status) in non_terminal_status_list.into_iter().enumerate() {
-        if let Some(alias) = status.alias {
-            result.insert(Symbol::non_terminal(i), alias);
-        }
-    }
-    for (i, status) in external_status_list.into_iter().enumerate() {
-        if let Some(alias) = status.alias {
-            result.insert(Symbol::external(i), alias);
-        }
-    }
-    result
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::generate::grammars::{
-        LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType,
-    };
-    use crate::generate::nfa::Nfa;
-
-    #[test]
-    fn test_extract_simple_aliases() {
-        let mut syntax_grammar = SyntaxGrammar {
-            variables: vec![
-                SyntaxVariable {
-                    name: "v1".to_owned(),
-                    kind: VariableType::Named,
-                    productions: vec![Production {
-                        dynamic_precedence: 0,
-                        steps: vec![
-                            ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
-                            ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
-                            ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
-                        ],
-                    }],
-                },
-                SyntaxVariable {
-                    name: "v2".to_owned(),
-                    kind: VariableType::Named,
-                    productions: vec![Production {
-                        dynamic_precedence: 0,
-                        steps: vec![
-                            // Token 0 is always aliased as "a1".
-                            ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
-                            // Token 1 is aliased above, but not here.
-                            ProductionStep::new(Symbol::terminal(1)),
-                            // Token 2 is aliased differently than above.
-                            ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true),
-                        ],
-                    }],
-                },
-            ],
-            extra_tokens: Vec::new(),
-            expected_conflicts: Vec::new(),
-            variables_to_inline: Vec::new(),
-            supertype_symbols: Vec::new(),
-            external_tokens: Vec::new(),
-            word_token: None,
-        };
-
-        let lexical_grammar = LexicalGrammar {
-            nfa: Nfa::new(),
-            variables: vec![
-                LexicalVariable {
-                    name: "t1".to_string(),
-                    kind: VariableType::Anonymous,
-                    implicit_precedence: 0,
-                    start_state: 0,
-                },
-                LexicalVariable {
-                    name: "t2".to_string(),
-                    kind: VariableType::Anonymous,
-                    implicit_precedence: 0,
-                    start_state: 0,
-                },
-                LexicalVariable {
-                    name: "t3".to_string(),
-                    kind: VariableType::Anonymous,
-                    implicit_precedence: 0,
-                    start_state: 0,
-                },
-            ],
-        };
-
-        let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar);
-        assert_eq!(simple_aliases.len(), 1);
-        assert_eq!(
-            simple_aliases[&Symbol::terminal(0)],
-            Alias {
-                value: "a1".to_string(),
-                is_named: true,
-            }
-        );
-
-        assert_eq!(
-            syntax_grammar.variables,
-            vec![
-                SyntaxVariable {
-                    name: "v1".to_owned(),
-                    kind: VariableType::Named,
-                    productions: vec![Production {
-                        dynamic_precedence: 0,
-                        steps: vec![
-                            // 'Simple' alias removed
-                            ProductionStep::new(Symbol::terminal(0)),
-                            // Other aliases unchanged
-                            ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
-                            ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
-                        ],
-                    },],
-                },
-                SyntaxVariable {
-                    name: "v2".to_owned(),
-                    kind: VariableType::Named,
-                    productions: vec![Production {
-                        dynamic_precedence: 0,
-                        steps: vec![
-                            ProductionStep::new(Symbol::terminal(0)),
-                            ProductionStep::new(Symbol::terminal(1)),
-                            ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true),
-                        ],
-                    },],
-                },
-            ]
-        );
-    }
-}
diff --git a/cli/src/generate/prepare_grammar/extract_tokens.rs b/cli/src/generate/prepare_grammar/extract_tokens.rs
index def35b97..ae6e7244 100644
--- a/cli/src/generate/prepare_grammar/extract_tokens.rs
+++ b/cli/src/generate/prepare_grammar/extract_tokens.rs
@@ -90,21 +90,13 @@ pub(super) fn extract_tokens(
         .collect();
 
     let mut separators = Vec::new();
-    let mut extra_tokens = Vec::new();
-    for rule in grammar.extra_tokens {
+    let mut extra_symbols = Vec::new();
+    for rule in grammar.extra_symbols {
         if let Rule::Symbol(symbol) = rule {
-            let new_symbol = symbol_replacer.replace_symbol(symbol);
-            if new_symbol.is_non_terminal() {
-                return Error::err(format!(
-                    "Non-token symbol '{}' cannot be used as an extra token",
-                    &variables[new_symbol.index].name
-                ));
-            } else {
-                extra_tokens.push(new_symbol);
-            }
+            extra_symbols.push(symbol_replacer.replace_symbol(symbol));
         } else {
             if let Some(index) = lexical_variables.iter().position(|v| v.rule == rule) {
-                extra_tokens.push(Symbol::terminal(index));
+                extra_symbols.push(Symbol::terminal(index));
             } else {
                 separators.push(rule);
             }
@@ -158,7 +150,7 @@ pub(super) fn extract_tokens(
         ExtractedSyntaxGrammar {
             variables,
             expected_conflicts,
-            extra_tokens,
+            extra_symbols,
             variables_to_inline,
             supertype_symbols,
             external_tokens,
@@ -415,15 +407,15 @@ mod test {
     }
 
     #[test]
-    fn test_extracting_extra_tokens() {
+    fn test_extracting_extra_symbols() {
         let mut grammar = build_grammar(vec![
             Variable::named("rule_0", Rule::string("x")),
             Variable::named("comment", Rule::pattern("//.*")),
         ]);
-        grammar.extra_tokens = vec![Rule::string(" "), Rule::non_terminal(1)];
+        grammar.extra_symbols = vec![Rule::string(" "), Rule::non_terminal(1)];
 
         let (syntax_grammar, lexical_grammar) = extract_tokens(grammar).unwrap();
-        assert_eq!(syntax_grammar.extra_tokens, vec![Symbol::terminal(1),]);
+        assert_eq!(syntax_grammar.extra_symbols, vec![Symbol::terminal(1),]);
         assert_eq!(lexical_grammar.separators, vec![Rule::string(" "),]);
     }
 
@@ -472,28 +464,6 @@ mod test {
         );
     }
 
-    #[test]
-    fn test_error_on_non_terminal_symbol_extras() {
-        let mut grammar = build_grammar(vec![
-            Variable::named("rule_0", Rule::non_terminal(1)),
-            Variable::named("rule_1", Rule::non_terminal(2)),
-            Variable::named("rule_2", Rule::string("x")),
-        ]);
-        grammar.extra_tokens = vec![Rule::non_terminal(1)];
-
-        match extract_tokens(grammar) {
-            Err(e) => {
-                assert_eq!(
-                    e.message(),
-                    "Non-token symbol 'rule_1' cannot be used as an extra token"
-                );
-            }
-            _ => {
-                panic!("Expected an error but got no error");
-            }
-        }
-    }
-
     #[test]
     fn test_error_on_external_with_same_name_as_non_terminal() {
         let mut grammar = build_grammar(vec![
@@ -522,7 +492,7 @@ mod test {
     fn build_grammar(variables: Vec<Variable>) -> InternedGrammar {
         InternedGrammar {
             variables,
-            extra_tokens: Vec::new(),
+            extra_symbols: Vec::new(),
             external_tokens: Vec::new(),
             expected_conflicts: Vec::new(),
             variables_to_inline: Vec::new(),
diff --git a/cli/src/generate/prepare_grammar/flatten_grammar.rs b/cli/src/generate/prepare_grammar/flatten_grammar.rs
index e325776c..f2b43a04 100644
--- a/cli/src/generate/prepare_grammar/flatten_grammar.rs
+++ b/cli/src/generate/prepare_grammar/flatten_grammar.rs
@@ -199,7 +199,7 @@ unless they are used only as the grammar's start rule.
         }
     }
     Ok(SyntaxGrammar {
-        extra_tokens: grammar.extra_tokens,
+        extra_symbols: grammar.extra_symbols,
         expected_conflicts: grammar.expected_conflicts,
         variables_to_inline: grammar.variables_to_inline,
         external_tokens: grammar.external_tokens,
diff --git a/cli/src/generate/prepare_grammar/intern_symbols.rs b/cli/src/generate/prepare_grammar/intern_symbols.rs
index 4c0fc5c7..276f13ff 100644
--- a/cli/src/generate/prepare_grammar/intern_symbols.rs
+++ b/cli/src/generate/prepare_grammar/intern_symbols.rs
@@ -30,9 +30,9 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
         external_tokens.push(Variable { name, kind, rule });
     }
 
-    let mut extra_tokens = Vec::with_capacity(grammar.extra_tokens.len());
-    for extra_token in grammar.extra_tokens.iter() {
-        extra_tokens.push(interner.intern_rule(extra_token)?);
+    let mut extra_symbols = Vec::with_capacity(grammar.extra_symbols.len());
+    for extra_token in grammar.extra_symbols.iter() {
+        extra_symbols.push(interner.intern_rule(extra_token)?);
     }
 
     let mut supertype_symbols = Vec::with_capacity(grammar.supertype_symbols.len());
@@ -73,10 +73,16 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
         );
     }
 
+    for (i, variable) in variables.iter_mut().enumerate() {
+        if supertype_symbols.contains(&Symbol::non_terminal(i)) {
+            variable.kind = VariableType::Hidden;
+        }
+    }
+
     Ok(InternedGrammar {
         variables,
         external_tokens,
-        extra_tokens,
+        extra_symbols,
         expected_conflicts,
         variables_to_inline,
         supertype_symbols,
@@ -236,7 +242,7 @@ mod tests {
         InputGrammar {
             variables,
             name: "the_language".to_string(),
-            extra_tokens: Vec::new(),
+            extra_symbols: Vec::new(),
             external_tokens: Vec::new(),
             expected_conflicts: Vec::new(),
             variables_to_inline: Vec::new(),
diff --git a/cli/src/generate/prepare_grammar/mod.rs b/cli/src/generate/prepare_grammar/mod.rs
index a574aefb..8b094c56 100644
--- a/cli/src/generate/prepare_grammar/mod.rs
+++ b/cli/src/generate/prepare_grammar/mod.rs
@@ -1,6 +1,6 @@
 mod expand_repeats;
 mod expand_tokens;
-mod extract_simple_aliases;
+mod extract_default_aliases;
 mod extract_tokens;
 mod flatten_grammar;
 mod intern_symbols;
@@ -8,7 +8,7 @@ mod process_inlines;
 
 use self::expand_repeats::expand_repeats;
 pub(crate) use self::expand_tokens::expand_tokens;
-use self::extract_simple_aliases::extract_simple_aliases;
+use self::extract_default_aliases::extract_default_aliases;
 use self::extract_tokens::extract_tokens;
 use self::flatten_grammar::flatten_grammar;
 use self::intern_symbols::intern_symbols;
@@ -21,7 +21,7 @@ use crate::generate::rules::{AliasMap, Rule, Symbol};
 
 pub(crate) struct IntermediateGrammar<T, U> {
     variables: Vec<Variable>,
-    extra_tokens: Vec<T>,
+    extra_symbols: Vec<T>,
     expected_conflicts: Vec<Vec<Symbol>>,
     external_tokens: Vec<U>,
     variables_to_inline: Vec<Symbol>,
@@ -52,7 +52,7 @@ pub(crate) fn prepare_grammar(
     let syntax_grammar = expand_repeats(syntax_grammar);
     let mut syntax_grammar = flatten_grammar(syntax_grammar)?;
     let lexical_grammar = expand_tokens(lexical_grammar)?;
-    let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar);
+    let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar);
     let inlines = process_inlines(&syntax_grammar);
-    Ok((syntax_grammar, lexical_grammar, inlines, simple_aliases))
+    Ok((syntax_grammar, lexical_grammar, inlines, default_aliases))
 }
diff --git a/cli/src/generate/prepare_grammar/process_inlines.rs b/cli/src/generate/prepare_grammar/process_inlines.rs
index 68568419..f83658b2 100644
--- a/cli/src/generate/prepare_grammar/process_inlines.rs
+++ b/cli/src/generate/prepare_grammar/process_inlines.rs
@@ -127,6 +127,9 @@ impl InlinedProductionMapBuilder {
                                     last_inserted_step.associativity = removed_step.associativity;
                                 }
                             }
+                            if p.dynamic_precedence.abs() > production.dynamic_precedence.abs() {
+                                production.dynamic_precedence = p.dynamic_precedence;
+                            }
                             production
                         }),
                     );
@@ -196,7 +199,7 @@ mod tests {
     fn test_basic_inlining() {
         let grammar = SyntaxGrammar {
             expected_conflicts: Vec::new(),
-            extra_tokens: Vec::new(),
+            extra_symbols: Vec::new(),
             external_tokens: Vec::new(),
             supertype_symbols: Vec::new(),
             word_token: None,
@@ -226,7 +229,7 @@ mod tests {
                             ],
                         },
                         Production {
-                            dynamic_precedence: 0,
+                            dynamic_precedence: -2,
                             steps: vec![ProductionStep::new(Symbol::terminal(14))],
                         },
                     ],
@@ -258,7 +261,7 @@ mod tests {
                     ],
                 },
                 Production {
-                    dynamic_precedence: 0,
+                    dynamic_precedence: -2,
                     steps: vec![
                         ProductionStep::new(Symbol::terminal(10)),
                         ProductionStep::new(Symbol::terminal(14)),
@@ -327,7 +330,7 @@ mod tests {
                 Symbol::non_terminal(3),
             ],
             expected_conflicts: Vec::new(),
-            extra_tokens: Vec::new(),
+            extra_symbols: Vec::new(),
             external_tokens: Vec::new(),
             supertype_symbols: Vec::new(),
             word_token: None,
@@ -429,7 +432,7 @@ mod tests {
                 },
             ],
             expected_conflicts: Vec::new(),
-            extra_tokens: Vec::new(),
+            extra_symbols: Vec::new(),
             external_tokens: Vec::new(),
             supertype_symbols: Vec::new(),
             word_token: None,
diff --git a/cli/src/generate/properties.rs b/cli/src/generate/properties.rs
deleted file mode 100644
index 5091eafc..00000000
--- a/cli/src/generate/properties.rs
+++ /dev/null
@@ -1,1499 +0,0 @@
-use crate::error::{Error, Result};
-use crate::generate::dedup::split_state_id_groups;
-use rsass;
-use rsass::sass::Value;
-use rsass::selectors::SelectorPart;
-use serde_derive::Serialize;
-use std::cmp::Ordering;
-use std::collections::hash_map::Entry;
-use std::collections::{btree_map, BTreeMap, HashMap, HashSet, VecDeque};
-use std::fmt::{self, Write};
-use std::hash::{Hash, Hasher};
-use std::mem;
-use std::path::{Path, PathBuf};
-use tree_sitter::{self, PropertyStateJSON, PropertyTransitionJSON};
-
-#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
-#[serde(untagged)]
-pub(crate) enum PropertyValue {
-    Number(isize),
-    Boolean(bool),
-    String(String),
-    Object(PropertySet),
-    Array(Vec<PropertyValue>),
-}
-
-type PropertySet = BTreeMap<String, PropertyValue>;
-type PropertySheetJSON = tree_sitter::PropertySheetJSON<PropertySet>;
-type StateId = usize;
-type PropertySetId = usize;
-
-#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
-struct SelectorStep {
-    kind: Option<String>,
-    field: Option<String>,
-    child_index: Option<usize>,
-    text_pattern: Option<String>,
-    is_named: Option<bool>,
-    is_immediate: bool,
-}
-
-#[derive(PartialEq, Eq, PartialOrd, Ord)]
-struct Selector(Vec<SelectorStep>);
-
-#[derive(Debug, PartialEq, Eq)]
-struct Rule {
-    selectors: Vec<Selector>,
-    properties: PropertySet,
-}
-
-#[derive(Clone, Copy, Debug)]
-struct Item<'a> {
-    rule_id: u32,
-    selector: &'a Selector,
-    step_id: u32,
-}
-
-#[derive(Clone, PartialEq, Eq)]
-struct ItemSet<'a>(Vec<Item<'a>>);
-
-#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
-struct SelectorMatch {
-    specificity: u32,
-    rule_id: u32,
-}
-
-struct Builder<'a> {
-    rules: &'a Vec<Rule>,
-    output: PropertySheetJSON,
-    start_item_set: ItemSet<'a>,
-    token_names: &'a HashSet<String>,
-    ids_by_item_set: HashMap<ItemSet<'a>, StateId>,
-    item_set_queue: VecDeque<(ItemSet<'a>, StateId)>,
-    item_set_list: Vec<ItemSet<'a>>,
-}
-
-impl<'a> Item<'a> {
-    fn next_step(&self) -> Option<&SelectorStep> {
-        self.selector.0.get(self.step_id as usize)
-    }
-
-    fn is_done(&self) -> bool {
-        self.step_id as usize == self.selector.0.len()
-    }
-}
-
-impl<'a> Ord for Item<'a> {
-    fn cmp(&self, other: &Item) -> Ordering {
-        self.rule_id
-            .cmp(&other.rule_id)
-            .then_with(|| self.selector.0.len().cmp(&other.selector.0.len()))
-            .then_with(|| {
-                for (i, step) in self
-                    .selector
-                    .0
-                    .iter()
-                    .enumerate()
-                    .skip(self.step_id as usize)
-                {
-                    let result = step.cmp(&other.selector.0[i]);
-                    if result != Ordering::Equal {
-                        return result;
-                    }
-                }
-                Ordering::Equal
-            })
-    }
-}
-
-impl<'a> PartialOrd for Item<'a> {
-    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl<'a> Hash for Item<'a> {
-    fn hash<H: Hasher>(&self, hasher: &mut H) {
-        hasher.write_u32(self.rule_id);
-        hasher.write_usize(self.selector.0.len());
-        hasher.write_u32(self.step_id);
-        for step in &self.selector.0[self.step_id as usize..] {
-            step.hash(hasher);
-        }
-    }
-}
-
-impl<'a> PartialEq for Item<'a> {
-    fn eq(&self, other: &Self) -> bool {
-        if self.rule_id != other.rule_id
-            || self.selector.0.len() != other.selector.0.len()
-            || self.step_id != other.step_id
-        {
-            return false;
-        }
-
-        for (i, step) in self
-            .selector
-            .0
-            .iter()
-            .enumerate()
-            .skip(self.step_id as usize)
-        {
-            if *step != other.selector.0[i] {
-                return false;
-            }
-        }
-
-        true
-    }
-}
-
-impl<'a> Eq for Item<'a> {}
-
-impl<'a> Hash for ItemSet<'a> {
-    fn hash<H: Hasher>(&self, hasher: &mut H) {
-        hasher.write_usize(self.0.len());
-        for item in &self.0 {
-            item.hash(hasher);
-        }
-    }
-}
-
-impl<'a> ItemSet<'a> {
-    fn new() -> Self {
-        ItemSet(Vec::new())
-    }
-
-    fn insert(&mut self, item: Item<'a>) {
-        match self.0.binary_search(&item) {
-            Err(i) => self.0.insert(i, item),
-            _ => {}
-        }
-    }
-}
-
-impl<'a> Builder<'a> {
-    fn new(rules: &'a Vec<Rule>, token_names: &'a HashSet<String>) -> Self {
-        Builder {
-            rules,
-            start_item_set: ItemSet::new(),
-            item_set_list: Vec::new(),
-            output: PropertySheetJSON {
-                states: Vec::new(),
-                property_sets: Vec::new(),
-            },
-            token_names,
-            ids_by_item_set: HashMap::new(),
-            item_set_queue: VecDeque::new(),
-        }
-    }
-
-    fn build(mut self) -> PropertySheetJSON {
-        for (i, rule) in self.rules.iter().enumerate() {
-            for selector in &rule.selectors {
-                self.start_item_set.insert(Item {
-                    rule_id: i as u32,
-                    selector,
-                    step_id: 0,
-                });
-            }
-        }
-
-        self.add_state(ItemSet::new());
-        self.output.states[0].id = Some(0);
-        while let Some((item_set, state_id)) = self.item_set_queue.pop_front() {
-            self.populate_state(item_set, state_id);
-        }
-
-        self.remove_duplicate_states();
-
-        for (i, state) in self.output.states.iter_mut().enumerate() {
-            state.id = Some(i);
-        }
-
-        self.output
-    }
-
-    fn add_state(&mut self, item_set: ItemSet<'a>) -> StateId {
-        match self.ids_by_item_set.entry(item_set) {
-            Entry::Occupied(o) => *o.get(),
-            Entry::Vacant(v) => {
-                let state_id = self.output.states.len();
-                self.output.states.push(PropertyStateJSON {
-                    id: None,
-                    transitions: Vec::new(),
-                    property_set_id: 0,
-                    default_next_state_id: 0,
-                });
-                self.item_set_queue.push_back((v.key().clone(), state_id));
-                v.insert(state_id);
-                state_id
-            }
-        }
-    }
-
-    fn add_property_set(&mut self, properties: PropertySet) -> PropertySetId {
-        if let Some(index) = self
-            .output
-            .property_sets
-            .iter()
-            .position(|i| *i == properties)
-        {
-            index
-        } else {
-            self.output.property_sets.push(properties);
-            self.output.property_sets.len() - 1
-        }
-    }
-
-    fn populate_state(&mut self, item_set: ItemSet<'a>, state_id: StateId) {
-        let is_start_state = state_id == 0;
-        let mut transitions: HashMap<PropertyTransitionJSON, u32> = HashMap::new();
-        let mut selector_matches = Vec::new();
-
-        // First, compute all of the possible state transition conditions for
-        // this state, and all of the rules that are currently matching.
-        for item in item_set.0.iter().chain(self.start_item_set.0.iter()) {
-            // If this item has more elements remaining in its selector, then
-            // add a state transition based on the next step.
-            if let Some(step) = item.next_step() {
-                transitions
-                    .entry(PropertyTransitionJSON {
-                        kind: step.kind.clone(),
-                        field: step.field.clone(),
-                        named: step.is_named,
-                        index: step.child_index,
-                        text: step.text_pattern.clone(),
-                        state_id: 0,
-                    })
-                    .and_modify(|rule_id| {
-                        if item.rule_id > *rule_id {
-                            *rule_id = item.rule_id;
-                        }
-                    })
-                    .or_insert(item.rule_id);
-            }
-            // If the item has matched its entire selector, then the item's
-            // properties are applicable to this state.
-            else {
-                selector_matches.push(SelectorMatch {
-                    rule_id: item.rule_id,
-                    specificity: selector_specificity(item.selector),
-                });
-            }
-        }
-
-        // Compute the merged properties that apply in the current state.
-        // Sort the matching property sets by ascending specificity and by
-        // their order in the sheet. This way, more specific selectors and later
-        // rules will override less specific selectors and earlier rules.
-        let mut properties = PropertySet::new();
-        selector_matches.sort_unstable_by(|a, b| {
-            (a.specificity.cmp(&b.specificity)).then_with(|| a.rule_id.cmp(&b.rule_id))
-        });
-        selector_matches.dedup();
-        for selector_match in selector_matches {
-            let rule = &self.rules[selector_match.rule_id as usize];
-            for (property, value) in &rule.properties {
-                properties.insert(property.clone(), value.clone());
-            }
-        }
-        self.output.states[state_id].property_set_id = self.add_property_set(properties);
-
-        // If there are multiple transitions that could *both* match (e.g. one based on a
-        // a node type and one based on a field name), then create an additional transition
-        // for the intersection of the two.
-        let mut i = 0;
-        let mut transition_list = transitions.into_iter().collect::<Vec<_>>();
-        while i < transition_list.len() {
-            for j in 0..i {
-                if let Some(intersection) =
-                    self.intersect_transitions(&transition_list[j].0, &transition_list[i].0)
-                {
-                    transition_list.push((
-                        intersection,
-                        u32::max(transition_list[i].1, transition_list[j].1),
-                    ));
-                }
-            }
-            i += 1;
-        }
-
-        // Ensure that for a given node type, more specific transitions are tried
-        // first, and in the event of a tie, transitions corresponding to later rules
-        // in the cascade are tried first. Also, sort the non-intersecting transitions
-        // by name to guarantee a deterministic order.
-        transition_list.sort_by(|a, b| {
-            (transition_specificity(&b.0).cmp(&transition_specificity(&a.0)))
-                .then_with(|| b.1.cmp(&a.1))
-                .then_with(|| a.0.kind.cmp(&b.0.kind))
-                .then_with(|| a.0.named.cmp(&b.0.named))
-                .then_with(|| a.0.field.cmp(&b.0.field))
-        });
-
-        // For eacy possible state transition, compute the set of items in that transition's
-        // destination state.
-        i = 0;
-        while i < transition_list.len() {
-            let transition = &mut transition_list[i].0;
-            let transition_is_leaf = transition.named == Some(false)
-                || transition
-                    .kind
-                    .as_ref()
-                    .map_or(false, |kind| self.token_names.contains(kind));
-
-            let mut next_item_set = ItemSet::new();
-            let mut transition_differs_from_start_state = false;
-            for item in item_set.0.iter().chain(self.start_item_set.0.iter()) {
-                if let Some(next_step) = item.next_step() {
-                    // If the next step of the item's selector satisfies this transition,
-                    // advance the item to the next part of its selector and add the
-                    // resulting item to this transition's destination state.
-                    if step_matches_transition(next_step, transition) {
-                        let next_item = Item {
-                            rule_id: item.rule_id,
-                            selector: item.selector,
-                            step_id: item.step_id + 1,
-                        };
-                        if !transition_is_leaf || next_item.is_done() {
-                            next_item_set.insert(next_item);
-                            if item.step_id > 0 {
-                                transition_differs_from_start_state = true;
-                            }
-                        }
-                    }
-
-                    // If the next step of the item is not an immediate child, then
-                    // include this item in this transition's destination state, because
-                    // the next step of the item might match a descendant node.
-                    if !transition_is_leaf && !next_step.is_immediate && item.step_id > 0 {
-                        next_item_set.insert(*item);
-                        transition_differs_from_start_state = true;
-                    }
-                }
-            }
-
-            if (is_start_state || transition_differs_from_start_state)
-                && !next_item_set.0.is_empty()
-            {
-                transition.state_id = self.add_state(next_item_set);
-                if is_start_state || !self.output.states[0].transitions.contains(&transition) {
-                    i += 1;
-                    continue;
-                }
-            }
-            transition_list.remove(i);
-        }
-
-        self.output.states[state_id]
-            .transitions
-            .extend(transition_list.into_iter().map(|i| i.0));
-
-        // Compute the default successor item set - the item set that
-        // we should advance to if the next element doesn't match any
-        // of the next elements in the item set's selectors.
-        let mut default_next_item_set = ItemSet::new();
-        for item in &item_set.0 {
-            let next_step = item.selector.0.get(item.step_id as usize);
-            if let Some(step) = next_step {
-                if !step.is_immediate {
-                    default_next_item_set.insert(*item);
-                }
-            }
-        }
-        self.output.states[state_id].default_next_state_id = self.add_state(default_next_item_set);
-
-        self.item_set_list.push(item_set);
-    }
-
-    fn intersect_transitions(
-        &self,
-        left: &PropertyTransitionJSON,
-        right: &PropertyTransitionJSON,
-    ) -> Option<PropertyTransitionJSON> {
-        let mut left_contributes = false;
-        let mut right_contributes = false;
-        let mut result = left.clone();
-
-        if let Some(left_kind) = &left.kind {
-            if let Some(right_kind) = &right.kind {
-                if left_kind != right_kind || left.named != right.named {
-                    return None;
-                }
-            } else {
-                left_contributes = true;
-            }
-        } else if let Some(right_kind) = &right.kind {
-            result.kind = Some(right_kind.clone());
-            result.named = right.named;
-            right_contributes = true;
-        }
-
-        if let Some(left_field) = &left.field {
-            if let Some(right_field) = &right.field {
-                if left_field != right_field {
-                    return None;
-                }
-            } else {
-                left_contributes = true;
-            }
-        } else if let Some(right_field) = &right.field {
-            result.field = Some(right_field.clone());
-            right_contributes = true;
-        }
-
-        if let Some(left_text) = &left.text {
-            if let Some(right_text) = &right.text {
-                if left_text != right_text {
-                    return None;
-                }
-            } else {
-                left_contributes = true;
-            }
-        } else if let Some(right_text) = &right.text {
-            result.text = Some(right_text.clone());
-            right_contributes = true;
-        }
-
-        if let Some(left_index) = &left.index {
-            if let Some(right_index) = &right.index {
-                if left_index != right_index {
-                    return None;
-                }
-            } else {
-                left_contributes = true;
-            }
-        } else if let Some(right_index) = &right.index {
-            result.index = Some(right_index.clone());
-            right_contributes = true;
-        }
-
-        if left_contributes && right_contributes {
-            Some(result)
-        } else {
-            None
-        }
-    }
-
-    fn remove_duplicate_states(&mut self) {
-        let mut state_ids_by_properties = HashMap::new();
-        for (i, state) in self.output.states.iter().enumerate() {
-            state_ids_by_properties
-                .entry(state.property_set_id)
-                .or_insert(Vec::new())
-                .push(i);
-        }
-        let mut state_ids_by_group_id = state_ids_by_properties
-            .into_iter()
-            .map(|e| e.1)
-            .collect::<Vec<_>>();
-        state_ids_by_group_id.sort();
-        let start_group_index = state_ids_by_group_id
-            .iter()
-            .position(|g| g.contains(&0))
-            .unwrap();
-        state_ids_by_group_id.swap(start_group_index, 0);
-
-        let mut group_ids_by_state_id = vec![0; self.output.states.len()];
-        for (group_id, state_ids) in state_ids_by_group_id.iter().enumerate() {
-            for state_id in state_ids {
-                group_ids_by_state_id[*state_id] = group_id;
-            }
-        }
-
-        while split_state_id_groups(
-            &self.output.states,
-            &mut state_ids_by_group_id,
-            &mut group_ids_by_state_id,
-            0,
-            property_states_differ,
-        ) {
-            continue;
-        }
-
-        let mut new_states = Vec::with_capacity(state_ids_by_group_id.len());
-        for state_ids in state_ids_by_group_id.iter() {
-            let mut new_state = PropertyStateJSON::default();
-            mem::swap(&mut new_state, &mut self.output.states[state_ids[0]]);
-            for transition in new_state.transitions.iter_mut() {
-                transition.state_id = group_ids_by_state_id[transition.state_id];
-            }
-            new_state.default_next_state_id =
-                group_ids_by_state_id[new_state.default_next_state_id];
-            new_states.push(new_state);
-        }
-        self.output.states = new_states;
-    }
-}
-
-fn property_states_differ(
-    left: &PropertyStateJSON,
-    right: &PropertyStateJSON,
-    group_ids_by_state_id: &Vec<usize>,
-) -> bool {
-    if group_ids_by_state_id[left.default_next_state_id]
-        != group_ids_by_state_id[right.default_next_state_id]
-    {
-        return true;
-    }
-
-    left.transitions
-        .iter()
-        .zip(right.transitions.iter())
-        .any(|(left, right)| {
-            left.kind != right.kind
-                || left.named != right.named
-                || left.index != right.index
-                || left.field != right.field
-                || left.text != right.text
-                || group_ids_by_state_id[left.state_id] != group_ids_by_state_id[right.state_id]
-        })
-}
-
-fn selector_specificity(selector: &Selector) -> u32 {
-    let mut result = 0;
-    for step in &selector.0 {
-        if step.kind.is_some() {
-            result += 1;
-        }
-        if step.field.is_some() {
-            result += 1;
-        }
-        if step.child_index.is_some() {
-            result += 1;
-        }
-        if step.text_pattern.is_some() {
-            result += 1;
-        }
-    }
-    result
-}
-
-fn transition_specificity(transition: &PropertyTransitionJSON) -> u32 {
-    let mut result = 0;
-    if transition.kind.is_some() {
-        result += 1;
-    }
-    if transition.field.is_some() {
-        result += 1;
-    }
-    if transition.index.is_some() {
-        result += 1;
-    }
-    if transition.text.is_some() {
-        result += 1;
-    }
-    result
-}
-
-fn step_matches_transition(step: &SelectorStep, transition: &PropertyTransitionJSON) -> bool {
-    step.kind
-        .as_ref()
-        .map_or(true, |kind| transition.kind.as_ref() == Some(kind))
-        && step
-            .is_named
-            .map_or(true, |named| transition.named == Some(named))
-        && step
-            .field
-            .as_ref()
-            .map_or(true, |field| transition.field.as_ref() == Some(field))
-        && step
-            .child_index
-            .map_or(true, |index| transition.index == Some(index))
-        && step
-            .text_pattern
-            .as_ref()
-            .map_or(true, |text| transition.text.as_ref() == Some(text))
-}
-
-impl fmt::Debug for SelectorStep {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        if self.is_immediate {
-            write!(f, "> ")?;
-        }
-        write!(f, "(")?;
-        if let Some(kind) = &self.kind {
-            if self.is_named.unwrap() {
-                write!(f, "{}", kind)?;
-            } else {
-                write!(f, "[token='{}']", kind)?;
-            }
-        }
-        if let Some(field) = &self.field {
-            write!(f, ".{}", field)?;
-        }
-        if let Some(n) = self.child_index {
-            write!(f, ":nth-child({})", n)?;
-        }
-        if let Some(t) = &self.text_pattern {
-            write!(f, "[text='{}']", t)?;
-        }
-        write!(f, ")")?;
-        Ok(())
-    }
-}
-
-impl fmt::Debug for Selector {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "[")?;
-        for (i, step) in self.0.iter().enumerate() {
-            if step.is_immediate {
-                write!(f, " > ")?;
-            } else if i > 0 {
-                write!(f, " ")?;
-            }
-            write!(f, "{:?}", step)?;
-        }
-        write!(f, " (specificity: {})]", selector_specificity(self))?;
-        Ok(())
-    }
-}
-
-pub(crate) fn generate_property_sheet(
-    path: impl AsRef<Path>,
-    css: &str,
-    token_names: &HashSet<String>,
-) -> Result<PropertySheetJSON> {
-    let rules = parse_property_sheet(path.as_ref(), &css)?;
-    Ok(Builder::new(&rules, token_names).build())
-}
-
-fn parse_property_sheet(path: &Path, css: &str) -> Result<Vec<Rule>> {
-    let mut schema_paths = Vec::new();
-    let css = css.as_bytes();
-    let mut items = rsass::parse_scss_data(css).map_err(|(pos, kind)| rsass::Error::ParseError {
-        file: path.to_string_lossy().into(),
-        pos: rsass::ErrPos::pos_of(pos, &css),
-        kind,
-    })?;
-
-    process_at_rules(&mut items, &mut schema_paths, path)?;
-    let mut result = Vec::new();
-    let selector_prefixes = vec![Vec::new()];
-    parse_sass_items(items, &selector_prefixes, &mut result)?;
-    Ok(result)
-}
-
-fn parse_sass_items(
-    items: Vec<rsass::Item>,
-    selector_prefixes: &Vec<Vec<SelectorStep>>,
-    result: &mut Vec<Rule>,
-) -> Result<()> {
-    let mut properties = PropertySet::new();
-    for item in items {
-        match item {
-            rsass::Item::None | rsass::Item::Comment(_) => {}
-            rsass::Item::Property(name, value) => {
-                let value = parse_sass_value(&value)?;
-                match properties.entry(name.to_string()) {
-                    btree_map::Entry::Vacant(v) => {
-                        v.insert(value);
-                    }
-                    btree_map::Entry::Occupied(mut o) => {
-                        let existing_value = o.get_mut();
-                        if let PropertyValue::Array(items) = existing_value {
-                            items.push(value);
-                            continue;
-                        } else {
-                            let v = existing_value.clone();
-                            *existing_value = PropertyValue::Array(vec![v, value]);
-                        }
-                    }
-                }
-            }
-            rsass::Item::Rule(selectors, items) => {
-                let mut full_selectors = Vec::new();
-                for prefix in selector_prefixes {
-                    for selector in &selectors.s {
-                        let mut prefix = prefix.clone();
-                        let mut operator_was_immediate: Option<bool> = Some(false);
-                        for part in &selector.0 {
-                            match part {
-                                SelectorPart::BackRef => {
-                                    operator_was_immediate = None;
-                                }
-                                SelectorPart::Simple(value) => {
-                                    if let Some(value) = value.single_raw() {
-                                        for (i, value) in value.split('.').enumerate() {
-                                            if value.is_empty() {
-                                                continue;
-                                            }
-                                            let value = value.to_string();
-                                            check_node_kind(&value)?;
-                                            if i > 0 {
-                                                if let Some(immediate) = operator_was_immediate {
-                                                    prefix.push(SelectorStep {
-                                                        kind: None,
-                                                        field: Some(value),
-                                                        is_named: None,
-                                                        child_index: None,
-                                                        text_pattern: None,
-                                                        is_immediate: immediate,
-                                                    })
-                                                } else {
-                                                    prefix.last_mut().unwrap().field = Some(value);
-                                                }
-                                            } else {
-                                                if let Some(immediate) = operator_was_immediate {
-                                                    prefix.push(SelectorStep {
-                                                        kind: Some(value.to_string()),
-                                                        field: None,
-                                                        child_index: None,
-                                                        text_pattern: None,
-                                                        is_named: Some(true),
-                                                        is_immediate: immediate,
-                                                    });
-                                                } else {
-                                                    return Error::err(format!("Node type {} must be separated by whitespace or the `>` operator", value));
-                                                }
-                                            }
-                                            operator_was_immediate = None;
-                                        }
-                                    } else {
-                                        return Err(interpolation_error());
-                                    }
-                                    operator_was_immediate = None;
-                                }
-                                SelectorPart::Attribute { name, val, .. } => {
-                                    match name.single_raw() {
-                                        None => return Err(interpolation_error()),
-                                        Some("text") => {
-                                            if operator_was_immediate.is_some() {
-                                                return Error::err("The `text` attribute must be used in combination with a node type or field".to_string());
-                                            }
-                                            if let Some(last_step) = prefix.last_mut() {
-                                                last_step.text_pattern =
-                                                    Some(get_string_value(val.to_string())?)
-                                            }
-                                        }
-                                        Some("token") => {
-                                            if let Some(immediate) = operator_was_immediate {
-                                                prefix.push(SelectorStep {
-                                                    kind: Some(get_string_value(val.to_string())?),
-                                                    field: None,
-                                                    is_named: Some(false),
-                                                    child_index: None,
-                                                    text_pattern: None,
-                                                    is_immediate: immediate,
-                                                });
-                                                operator_was_immediate = None;
-                                            } else {
-                                                return Error::err("The `token` attribute canot be used in combination with a node type".to_string());
-                                            }
-                                        }
-                                        _ => {
-                                            return Error::err(format!(
-                                                "Unsupported attribute {}",
-                                                part
-                                            ));
-                                        }
-                                    }
-                                }
-                                SelectorPart::PseudoElement { .. } => {
-                                    return Error::err(
-                                        "Pseudo elements are not supported".to_string(),
-                                    );
-                                }
-                                SelectorPart::Pseudo { name, arg } => match name.single_raw() {
-                                    None => return Err(interpolation_error()),
-                                    Some("nth-child") => {
-                                        if let Some(arg) = arg {
-                                            let mut arg_str = String::new();
-                                            write!(&mut arg_str, "{}", arg).unwrap();
-                                            if let Some(last_step) = prefix.last_mut() {
-                                                if let Ok(i) = usize::from_str_radix(&arg_str, 10) {
-                                                    last_step.child_index = Some(i);
-                                                } else {
-                                                    return Error::err(format!(
-                                                        "Invalid child index {}",
-                                                        arg
-                                                    ));
-                                                }
-                                            }
-                                        }
-                                    }
-                                    _ => {
-                                        return Error::err(format!(
-                                            "Unsupported pseudo-class {}",
-                                            part
-                                        ));
-                                    }
-                                },
-                                SelectorPart::Descendant => {
-                                    operator_was_immediate = Some(false);
-                                }
-                                SelectorPart::RelOp(operator) => {
-                                    let operator = *operator as char;
-                                    if operator == '>' {
-                                        operator_was_immediate = Some(true);
-                                    } else {
-                                        return Error::err(format!(
-                                            "Unsupported operator {}",
-                                            operator
-                                        ));
-                                    }
-                                }
-                            }
-                        }
-                        full_selectors.push(prefix);
-                    }
-                }
-                parse_sass_items(items, &full_selectors, result)?;
-            }
-            _ => return Error::err(format!("Unsupported syntax type {:?}", item)),
-        }
-    }
-
-    if !properties.is_empty() {
-        result.push(Rule {
-            selectors: selector_prefixes.iter().cloned().map(Selector).collect(),
-            properties,
-        });
-    }
-
-    Ok(())
-}
-
-fn process_at_rules(
-    items: &mut Vec<rsass::Item>,
-    schema_paths: &mut Vec<PathBuf>,
-    path: &Path,
-) -> Result<()> {
-    let mut i = 0;
-    while i < items.len() {
-        match &items[i] {
-            rsass::Item::Import(arg) => {
-                if let Some(s) = get_sass_string(arg) {
-                    let import_path = resolve_path(path, s)?;
-                    let mut imported_items = rsass::parse_scss_file(&import_path)?;
-                    process_at_rules(&mut imported_items, schema_paths, &import_path)?;
-                    items.splice(i..(i + 1), imported_items);
-                    continue;
-                } else {
-                    return Err(Error::new("@import arguments must be strings".to_string()));
-                }
-            }
-            rsass::Item::AtRule { name, args, .. } => match name.as_str() {
-                "schema" => {
-                    if let Some(s) = get_sass_string(args) {
-                        let schema_path = resolve_path(path, s)?;
-                        schema_paths.push(schema_path);
-                        items.remove(i);
-                        continue;
-                    } else {
-                        return Error::err("@schema arguments must be strings".to_string());
-                    }
-                }
-                _ => return Error::err(format!("Unsupported at-rule '{}'", name)),
-            },
-            _ => {}
-        }
-        i += 1;
-    }
-    Ok(())
-}
-
-fn parse_sass_value(value: &Value) -> Result<PropertyValue> {
-    match value {
-        Value::Literal(s) => {
-            if let Some(s) = s.single_raw() {
-                Ok(PropertyValue::String(s.to_string()))
-            } else {
-                Err(interpolation_error())
-            }
-        }
-        Value::Call(name, raw_args) => {
-            if let Some(name) = name.single_raw() {
-                let mut args = Vec::new();
-                for (_, arg) in raw_args.iter() {
-                    args.push(parse_sass_value(arg)?);
-                }
-                let mut result = PropertySet::new();
-                result.insert("name".to_string(), PropertyValue::String(name.to_string()));
-                result.insert("args".to_string(), PropertyValue::Array(args));
-                Ok(PropertyValue::Object(result))
-            } else {
-                Err(Error::new(
-                    "String interpolation is not supported".to_string(),
-                ))
-            }
-        }
-        Value::List(elements, ..) => {
-            let mut result = Vec::new();
-            for element in elements {
-                result.push(parse_sass_value(element)?);
-            }
-            Ok(PropertyValue::Array(result))
-        }
-        Value::Color(_, Some(name)) => Ok(PropertyValue::String(name.clone())),
-        Value::Numeric(n, _) => Ok(PropertyValue::Number(n.to_integer())),
-        Value::True => Ok(PropertyValue::Boolean(true)),
-        Value::False => Ok(PropertyValue::Boolean(false)),
-        _ => Err(Error::new(format!(
-            "Property values must be strings or function calls. Got {:?}",
-            value
-        ))),
-    }
-}
-
-fn get_sass_string(value: &Value) -> Option<&str> {
-    if let Value::Literal(s) = value {
-        s.single_raw()
-    } else {
-        None
-    }
-}
-
-fn resolve_path(base: &Path, p: &str) -> Result<PathBuf> {
-    let path = Path::new(p);
-    let mut base = base.to_owned();
-    base.pop();
-    if path.starts_with(".") {
-        base.push(path);
-        if base.exists() {
-            return Ok(base);
-        }
-    } else {
-        loop {
-            let mut result = base.clone();
-            result.push("node_modules");
-            result.push(path);
-            if result.exists() {
-                return Ok(result);
-            }
-            if !base.pop() {
-                break;
-            }
-        }
-    }
-    Err(Error::new(format!("Could not resolve import path `{}`", p)))
-}
-
-fn check_node_kind(name: &String) -> Result<()> {
-    for c in name.chars() {
-        if !c.is_alphanumeric() && c != '_' {
-            return Err(Error::new(format!("Invalid identifier '{}'", name)));
-        }
-    }
-    Ok(())
-}
-
-fn get_string_value(mut s: String) -> Result<String> {
-    if s.starts_with("'") && s.ends_with("'") || s.starts_with('"') && s.ends_with('"') {
-        s.pop();
-        s.remove(0);
-        Ok(s)
-    } else {
-        Err(Error::new(format!("Unsupported string literal {}", s)))
-    }
-}
-
-fn interpolation_error() -> Error {
-    Error::new("String interpolation is not supported".to_string())
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use regex::Regex;
-    use std::fs;
-    use tempfile::TempDir;
-
-    #[test]
-    fn test_property_sheet_with_immediate_child_and_descendant_selectors() {
-        let sheet = generate_property_sheet(
-            "foo.css",
-            "
-                f1 {
-                  color: red;
-
-                  & > f2 {
-                    color: green;
-                  }
-
-                  & f3 {
-                    color: blue;
-                  }
-                }
-
-                f2 {
-                  color: indigo;
-                  height: 2;
-                }
-
-                f3 {
-                  color: violet;
-                  height: 3;
-                }
-            ",
-            &HashSet::new(),
-        )
-        .unwrap();
-
-        // f1 single-element selector
-        assert_eq!(
-            *query_simple(&sheet, vec!["f1"]),
-            props(&[("color", string("red"))])
-        );
-        assert_eq!(
-            *query_simple(&sheet, vec!["f2", "f1"]),
-            props(&[("color", string("red"))])
-        );
-        assert_eq!(
-            *query_simple(&sheet, vec!["f2", "f3", "f1"]),
-            props(&[("color", string("red"))])
-        );
-
-        // f2 single-element selector
-        assert_eq!(
-            *query_simple(&sheet, vec!["f2"]),
-            props(&[("color", string("indigo")), ("height", num(2))])
-        );
-        assert_eq!(
-            *query_simple(&sheet, vec!["f2", "f2"]),
-            props(&[("color", string("indigo")), ("height", num(2))])
-        );
-        assert_eq!(
-            *query_simple(&sheet, vec!["f1", "f3", "f2"]),
-            props(&[("color", string("indigo")), ("height", num(2))])
-        );
-        assert_eq!(
-            *query_simple(&sheet, vec!["f1", "f6", "f2"]),
-            props(&[("color", string("indigo")), ("height", num(2))])
-        );
-
-        // f3 single-element selector
-        assert_eq!(
-            *query_simple(&sheet, vec!["f3"]),
-            props(&[("color", string("violet")), ("height", num(3))])
-        );
-        assert_eq!(
-            *query_simple(&sheet, vec!["f2", "f3"]),
-            props(&[("color", string("violet")), ("height", num(3))])
-        );
-
-        // f2 child selector
-        assert_eq!(
-            *query_simple(&sheet, vec!["f1", "f2"]),
-            props(&[("color", string("green")), ("height", num(2))])
-        );
-        assert_eq!(
-            *query_simple(&sheet, vec!["f2", "f1", "f2"]),
-            props(&[("color", string("green")), ("height", num(2))])
-        );
-        assert_eq!(
-            *query_simple(&sheet, vec!["f3", "f1", "f2"]),
-            props(&[("color", string("green")), ("height", num(2))])
-        );
-
-        // f3 descendant selector
-        assert_eq!(
-            *query_simple(&sheet, vec!["f1", "f3"]),
-            props(&[("color", string("blue")), ("height", num(3))])
-        );
-        assert_eq!(
-            *query_simple(&sheet, vec!["f1", "f2", "f3"]),
-            props(&[("color", string("blue")), ("height", num(3))])
-        );
-        assert_eq!(
-            *query_simple(&sheet, vec!["f1", "f6", "f7", "f8", "f3"]),
-            props(&[("color", string("blue")), ("height", num(3))])
-        );
-
-        // no match
-        assert_eq!(*query_simple(&sheet, vec!["f1", "f3", "f4"]), props(&[]));
-        assert_eq!(*query_simple(&sheet, vec!["f1", "f2", "f5"]), props(&[]));
-    }
-
-    #[test]
-    fn test_property_sheet_with_text_attribute() {
-        let sheet = generate_property_sheet(
-            "foo.css",
-            "
-                f1 {
-                  color: red;
-
-                  &[text='^[A-Z]'] {
-                    color: green;
-                  }
-
-                  &[text='^[A-Z_]+$'] {
-                    color: blue;
-                  }
-                }
-
-                f2[text='^[A-Z_]+$'] {
-                  color: purple;
-                }
-            ",
-            &HashSet::new(),
-        )
-        .unwrap();
-
-        assert_eq!(
-            *query(&sheet, vec![("f1", None, true, 0)], "abc"),
-            props(&[("color", string("red"))])
-        );
-        assert_eq!(
-            *query(&sheet, vec![("f1", None, true, 0)], "Abc"),
-            props(&[("color", string("green"))])
-        );
-        assert_eq!(
-            *query(&sheet, vec![("f1", None, true, 0)], "AB_CD"),
-            props(&[("color", string("blue"))])
-        );
-        assert_eq!(
-            *query(&sheet, vec![("f2", None, true, 0)], "Abc"),
-            props(&[])
-        );
-        assert_eq!(
-            *query(&sheet, vec![("f2", None, true, 0)], "ABC"),
-            props(&[("color", string("purple"))])
-        );
-    }
-
-    #[test]
-    fn test_property_sheet_with_fields() {
-        let sheet = generate_property_sheet(
-            "foo.css",
-            "
-                a {
-                    color: red;
-                    &.x {
-                        color: green;
-                        b {
-                            color: blue;
-                            &.y { color: yellow; }
-                        }
-                    }
-                    b { color: orange; }
-                    b.y { color: indigo; }
-                }
-                .x { color: violet; }
-            ",
-            &HashSet::new(),
-        )
-        .unwrap();
-
-        assert_eq!(
-            *query(&sheet, vec![("a", None, true, 0)], ""),
-            props(&[("color", string("red"))])
-        );
-        assert_eq!(
-            *query(&sheet, vec![("a", Some("x"), true, 0)], ""),
-            props(&[("color", string("green"))])
-        );
-        assert_eq!(
-            *query(
-                &sheet,
-                vec![("a", Some("x"), true, 0), ("b", None, true, 0)],
-                ""
-            ),
-            props(&[("color", string("blue"))])
-        );
-        assert_eq!(
-            *query(
-                &sheet,
-                vec![("a", Some("x"), true, 0), ("b", Some("y"), true, 0)],
-                ""
-            ),
-            props(&[("color", string("yellow"))])
-        );
-        assert_eq!(
-            *query(&sheet, vec![("b", Some("x"), true, 0)], ""),
-            props(&[("color", string("violet"))])
-        );
-        assert_eq!(
-            *query(&sheet, vec![("a", None, true, 0), ("b", None, true, 0)], ""),
-            props(&[("color", string("orange"))])
-        );
-        assert_eq!(
-            *query(
-                &sheet,
-                vec![("a", None, true, 0), ("b", Some("y"), true, 0)],
-                ""
-            ),
-            props(&[("color", string("indigo"))])
-        );
-    }
-
-    #[test]
-    fn test_property_sheet_with_cascade_ordering_as_tie_breaker() {
-        let sheet = generate_property_sheet(
-            "foo.css",
-            "
-                f1 f2:nth-child(1) { color: red; }
-                f1:nth-child(1) f2 { color: green; }
-                f1 f2[text='a'] { color: blue; }
-                f1 f2[text='b'] { color: violet; }
-            ",
-            &HashSet::new(),
-        )
-        .unwrap();
-
-        assert_eq!(
-            *query(
-                &sheet,
-                vec![("f1", None, true, 0), ("f2", None, true, 0)],
-                "x"
-            ),
-            props(&[])
-        );
-        assert_eq!(
-            *query(
-                &sheet,
-                vec![("f1", None, true, 0), ("f2", None, true, 1)],
-                "x"
-            ),
-            props(&[("color", string("red"))])
-        );
-        assert_eq!(
-            *query(
-                &sheet,
-                vec![("f1", None, true, 1), ("f2", None, true, 1)],
-                "x"
-            ),
-            props(&[("color", string("green"))])
-        );
-        assert_eq!(
-            *query(
-                &sheet,
-                vec![("f1", None, true, 1), ("f2", None, true, 1)],
-                "a"
-            ),
-            props(&[("color", string("blue"))])
-        );
-        assert_eq!(
-            *query(
-                &sheet,
-                vec![("f1", None, true, 1), ("f2", None, true, 1)],
-                "ab"
-            ),
-            props(&[("color", string("violet"))])
-        );
-    }
-
-    #[test]
-    fn test_property_sheet_with_css_function_calls() {
-        let sheet = generate_property_sheet(
-            "foo.css",
-            "
-                a {
-                  b: f();
-                  c: f(g(h), i, \"j\", 10);
-                }
-            ",
-            &HashSet::new(),
-        )
-        .unwrap();
-
-        let p = query_simple(&sheet, vec!["a"]);
-
-        assert_eq!(
-            p["b"],
-            object(&[("name", string("f")), ("args", array(vec![])),])
-        );
-
-        assert_eq!(
-            p["c"],
-            object(&[
-                ("name", string("f")),
-                (
-                    "args",
-                    array(vec![
-                        object(&[("name", string("g")), ("args", array(vec![string("h"),]))]),
-                        string("i"),
-                        string("j"),
-                        num(10),
-                    ])
-                ),
-            ])
-        );
-
-        // Handle differently-formatted calls
-        let sheet2 = generate_property_sheet(
-            "foo.css",
-            "
-                a {
-                  b: f();
-                  c: f(
-                      g(h),
-                      i,
-                      \"j\",
-                      10
-                  );
-                }
-            ",
-            &HashSet::new(),
-        )
-        .unwrap();
-
-        assert_eq!(
-            query_simple(&sheet2, vec!["a"])["c"],
-            query_simple(&sheet, vec!["a"])["c"]
-        );
-    }
-
-    #[test]
-    fn test_property_sheet_with_array_by_declaring_property_multiple_times() {
-        let sheet = generate_property_sheet(
-            "foo.css",
-            "
-                a {
-                  b: 'foo';
-                  b: 'bar';
-                  b: 'baz';
-                  c: f(g());
-                  c: h();
-                }
-            ",
-            &HashSet::new(),
-        )
-        .unwrap();
-
-        let p = query_simple(&sheet, vec!["a"]);
-
-        assert_eq!(
-            p["b"],
-            array(vec![string("foo"), string("bar"), string("baz"),])
-        );
-
-        assert_eq!(
-            p["c"],
-            array(vec![
-                object(&[
-                    ("name", string("f")),
-                    (
-                        "args",
-                        array(vec![object(&[
-                            ("name", string("g")),
-                            ("args", array(vec![])),
-                        ])])
-                    )
-                ]),
-                object(&[("name", string("h")), ("args", array(vec![])),]),
-            ]),
-        );
-    }
-
-    #[test]
-    fn test_property_sheet_with_imports() {
-        let repo_dir = TempDir::new().unwrap();
-        let properties_dir = repo_dir.path().join("properties");
-        let dependency_properties_dir = repo_dir
-            .path()
-            .join("node_modules")
-            .join("the-dependency")
-            .join("properties");
-        fs::create_dir_all(&properties_dir).unwrap();
-        fs::create_dir_all(&dependency_properties_dir).unwrap();
-        let sheet_path1 = properties_dir.join("sheet1.css");
-        let sheet_path2 = properties_dir.join("sheet2.css");
-        let dependency_sheet_path1 = dependency_properties_dir.join("dependency-sheet1.css");
-        let dependency_sheet_path2 = dependency_properties_dir.join("dependency-sheet2.css");
-
-        fs::write(
-            sheet_path2,
-            r#"
-            a { x: '1'; }
-            "#,
-        )
-        .unwrap();
-        fs::write(
-            dependency_sheet_path1,
-            r#"
-            @import "./dependency-sheet2.css";
-            a { y: '2'; }
-            "#,
-        )
-        .unwrap();
-        fs::write(
-            dependency_sheet_path2,
-            r#"
-            b { x: '3'; }
-            "#,
-        )
-        .unwrap();
-        let sheet = generate_property_sheet(
-            sheet_path1,
-            r#"
-            @import "./sheet2.css";
-            @import "the-dependency/properties/dependency-sheet1.css";
-            b { y: '4'; }
-            "#,
-            &HashSet::new(),
-        )
-        .unwrap();
-
-        let a = query_simple(&sheet, vec!["a"]);
-        assert_eq!(a["x"], string("1"),);
-        assert_eq!(a["y"], string("2"),);
-        let b = query_simple(&sheet, vec!["b"]);
-        assert_eq!(b["x"], string("3"),);
-        assert_eq!(b["y"], string("4"),);
-    }
-
-    fn query_simple<'a>(
-        sheet: &'a PropertySheetJSON,
-        node_stack: Vec<&'static str>,
-    ) -> &'a PropertySet {
-        query(
-            sheet,
-            node_stack.into_iter().map(|s| (s, None, true, 0)).collect(),
-            "",
-        )
-    }
-
-    fn query<'a>(
-        sheet: &'a PropertySheetJSON,
-        node_stack: Vec<(&'static str, Option<&'static str>, bool, usize)>,
-        leaf_text: &str,
-    ) -> &'a PropertySet {
-        let mut state_id = 0;
-        for (kind, field, is_named, child_index) in node_stack {
-            let state = &sheet.states[state_id];
-            state_id = state
-                .transitions
-                .iter()
-                .chain(sheet.states[0].transitions.iter())
-                .find(|transition| {
-                    transition.kind.as_ref().map_or(true, |k| k == kind)
-                        && transition.named.map_or(true, |n| n == is_named)
-                        && transition.field.as_ref().map_or(true, |f| field == Some(f))
-                        && transition.index.map_or(true, |index| index == child_index)
-                        && (transition
-                            .text
-                            .as_ref()
-                            .map_or(true, |text| Regex::new(text).unwrap().is_match(leaf_text)))
-                })
-                .map_or(state.default_next_state_id, |t| t.state_id);
-        }
-        &sheet.property_sets[sheet.states[state_id].property_set_id]
-    }
-
-    fn array(s: Vec<PropertyValue>) -> PropertyValue {
-        PropertyValue::Array(s)
-    }
-
-    fn object<'a>(s: &'a [(&'a str, PropertyValue)]) -> PropertyValue {
-        PropertyValue::Object(
-            s.into_iter()
-                .map(|(a, b)| (a.to_string(), b.clone()))
-                .collect(),
-        )
-    }
-
-    fn string(s: &str) -> PropertyValue {
-        PropertyValue::String(s.to_string())
-    }
-
-    fn num(n: isize) -> PropertyValue {
-        PropertyValue::Number(n)
-    }
-
-    fn props<'a>(s: &'a [(&'a str, PropertyValue)]) -> PropertySet {
-        s.into_iter()
-            .map(|(a, b)| (a.to_string(), b.clone()))
-            .collect()
-    }
-}
diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs
index 34d8f391..58d99cc4 100644
--- a/cli/src/generate/render.rs
+++ b/cli/src/generate/render.rs
@@ -2,11 +2,12 @@ use super::grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType
 use super::nfa::CharacterSet;
 use super::rules::{Alias, AliasMap, Symbol, SymbolType};
 use super::tables::{
-    AdvanceAction, FieldLocation, LexState, LexTable, ParseAction, ParseTable, ParseTableEntry,
+    AdvanceAction, FieldLocation, GotoAction, LexState, LexTable, ParseAction, ParseTable,
+    ParseTableEntry,
 };
 use core::ops::Range;
 use std::cmp;
-use std::collections::{BTreeMap, HashMap, HashSet};
+use std::collections::{HashMap, HashSet};
 use std::fmt::Write;
 use std::mem::swap;
 
@@ -14,6 +15,8 @@ use std::mem::swap;
 // stabilized, and the parser generation does not use it by default.
 const STABLE_LANGUAGE_VERSION: usize = tree_sitter::LANGUAGE_VERSION - 1;
 
+const LARGE_CHARACTER_RANGE_COUNT: usize = 8;
+
 macro_rules! add {
     ($this: tt, $($arg: tt)*) => {{
         $this.buffer.write_fmt(format_args!($($arg)*)).unwrap();
@@ -62,15 +65,29 @@ struct Generator {
     keyword_capture_token: Option<Symbol>,
     syntax_grammar: SyntaxGrammar,
     lexical_grammar: LexicalGrammar,
-    simple_aliases: AliasMap,
+    default_aliases: AliasMap,
     symbol_order: HashMap<Symbol, usize>,
     symbol_ids: HashMap<Symbol, String>,
     alias_ids: HashMap<Alias, String>,
-    alias_map: BTreeMap<Alias, Option<Symbol>>,
+    unique_aliases: Vec<Alias>,
+    symbol_map: HashMap<Symbol, Symbol>,
     field_names: Vec<String>,
     next_abi: bool,
 }
 
+struct TransitionSummary {
+    is_included: bool,
+    ranges: Vec<Range<char>>,
+    call_id: Option<usize>,
+}
+
+struct LargeCharacterSetInfo {
+    ranges: Vec<Range<char>>,
+    symbol: Symbol,
+    index: usize,
+    usage_count: usize,
+}
+
 impl Generator {
     fn generate(mut self) -> String {
         self.init();
@@ -79,6 +96,7 @@ impl Generator {
         self.add_stats();
         self.add_symbol_enum();
         self.add_symbol_names_list();
+        self.add_unique_symbol_map();
         self.add_symbol_metadata_list();
 
         if !self.field_names.is_empty() {
@@ -91,14 +109,18 @@ impl Generator {
             self.add_alias_sequences();
         }
 
+        if self.next_abi {
+            self.add_non_terminal_alias_map();
+        }
+
         let mut main_lex_table = LexTable::default();
         swap(&mut main_lex_table, &mut self.main_lex_table);
-        self.add_lex_function("ts_lex", main_lex_table);
+        self.add_lex_function("ts_lex", main_lex_table, true);
 
         if self.keyword_capture_token.is_some() {
             let mut keyword_lex_table = LexTable::default();
             swap(&mut keyword_lex_table, &mut self.keyword_lex_table);
-            self.add_lex_function("ts_lex_keywords", keyword_lex_table);
+            self.add_lex_function("ts_lex_keywords", keyword_lex_table, false);
         }
 
         self.add_lex_modes_list();
@@ -121,59 +143,105 @@ impl Generator {
             self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers);
         }
 
-        let mut field_names = Vec::new();
+        self.symbol_map = self
+            .parse_table
+            .symbols
+            .iter()
+            .map(|symbol| {
+                let mut mapping = symbol;
+
+                // There can be multiple symbols in the grammar that have the same name and kind,
+                // due to simple aliases. When that happens, ensure that they map to the same
+                // public-facing symbol. If one of the symbols is not aliased, choose that one
+                // to be the public-facing symbol. Otherwise, pick the symbol with the lowest
+                // numeric value.
+                if let Some(alias) = self.default_aliases.get(symbol) {
+                    let kind = alias.kind();
+                    for other_symbol in &self.parse_table.symbols {
+                        if let Some(other_alias) = self.default_aliases.get(other_symbol) {
+                            if other_symbol < mapping && other_alias == alias {
+                                mapping = other_symbol;
+                            }
+                        } else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) {
+                            mapping = other_symbol;
+                            break;
+                        }
+                    }
+                }
+                // Two anonymous tokens with different flags but the same string value
+                // should be represented with the same symbol in the public API. Examples:
+                // *  "<" and token(prec(1, "<"))
+                // *  "(" and token.immediate("(")
+                else if symbol.is_terminal() {
+                    let metadata = self.metadata_for_symbol(*symbol);
+                    for other_symbol in &self.parse_table.symbols {
+                        let other_metadata = self.metadata_for_symbol(*other_symbol);
+                        if other_metadata == metadata {
+                            mapping = other_symbol;
+                            break;
+                        }
+                    }
+                }
+
+                (*symbol, *mapping)
+            })
+            .collect();
+
         for production_info in &self.parse_table.production_infos {
+            // Build a list of all field names
             for field_name in production_info.field_map.keys() {
-                field_names.push(field_name);
+                if let Err(i) = self.field_names.binary_search(&field_name) {
+                    self.field_names.insert(i, field_name.clone());
+                }
             }
 
             for alias in &production_info.alias_sequence {
+                // Generate a mapping from aliases to C identifiers.
                 if let Some(alias) = &alias {
-                    let alias_kind = if alias.is_named {
-                        VariableType::Named
-                    } else {
-                        VariableType::Anonymous
-                    };
-                    let matching_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| {
-                        let (name, kind) = self.metadata_for_symbol(*symbol);
-                        name == alias.value && kind == alias_kind
+                    let existing_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| {
+                        if let Some(default_alias) = self.default_aliases.get(symbol) {
+                            default_alias == alias
+                        } else {
+                            let (name, kind) = self.metadata_for_symbol(*symbol);
+                            name == alias.value && kind == alias.kind()
+                        }
                     });
-                    let alias_id = if let Some(symbol) = matching_symbol {
-                        self.symbol_ids[&symbol].clone()
-                    } else if alias.is_named {
-                        format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
-                    } else {
-                        format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
-                    };
+
+                    // Some aliases match an existing symbol in the grammar.
+                    let alias_id;
+                    if let Some(existing_symbol) = existing_symbol {
+                        alias_id = self.symbol_ids[&self.symbol_map[&existing_symbol]].clone();
+                    }
+                    // Other aliases don't match any existing symbol, and need their own identifiers.
+                    else {
+                        if let Err(i) = self.unique_aliases.binary_search(alias) {
+                            self.unique_aliases.insert(i, alias.clone());
+                        }
+
+                        alias_id = if alias.is_named {
+                            format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
+                        } else {
+                            format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
+                        };
+                    }
+
                     self.alias_ids.entry(alias.clone()).or_insert(alias_id);
-                    self.alias_map
-                        .entry(alias.clone())
-                        .or_insert(matching_symbol);
                 }
             }
         }
 
-        field_names.sort_unstable();
-        field_names.dedup();
-        self.field_names = field_names.into_iter().cloned().collect();
-
-        // If we are opting in to the new unstable language ABI, then use the concept of
-        // "small parse states". Otherwise, use the same representation for all parse
-        // states.
-        if self.next_abi {
-            let threshold = cmp::min(SMALL_STATE_THRESHOLD, self.parse_table.symbols.len() / 2);
-            self.large_state_count = self
-                .parse_table
-                .states
-                .iter()
-                .enumerate()
-                .take_while(|(i, s)| {
-                    *i <= 1 || s.terminal_entries.len() + s.nonterminal_entries.len() > threshold
-                })
-                .count();
-        } else {
-            self.large_state_count = self.parse_table.states.len();
-        }
+        // Determine which states should use the "small state" representation, and which should
+        // use the normal array representation.
+        let threshold = cmp::min(SMALL_STATE_THRESHOLD, self.parse_table.symbols.len() / 2);
+        self.large_state_count = self
+            .parse_table
+            .states
+            .iter()
+            .enumerate()
+            .take_while(|(i, s)| {
+                *i <= 1 || s.terminal_entries.len() + s.nonterminal_entries.len() > threshold
+            })
+            .count();
     }
 
     fn add_includes(&mut self) {
@@ -239,21 +307,14 @@ impl Generator {
             "#define STATE_COUNT {}",
             self.parse_table.states.len()
         );
-
-        if self.next_abi {
-            add_line!(self, "#define LARGE_STATE_COUNT {}", self.large_state_count);
-        }
+        add_line!(self, "#define LARGE_STATE_COUNT {}", self.large_state_count);
 
         add_line!(
             self,
             "#define SYMBOL_COUNT {}",
             self.parse_table.symbols.len()
         );
-        add_line!(
-            self,
-            "#define ALIAS_COUNT {}",
-            self.alias_map.iter().filter(|e| e.1.is_none()).count()
-        );
+        add_line!(self, "#define ALIAS_COUNT {}", self.unique_aliases.len(),);
         add_line!(self, "#define TOKEN_COUNT {}", token_count);
         add_line!(
             self,
@@ -281,11 +342,9 @@ impl Generator {
                 i += 1;
             }
         }
-        for (alias, symbol) in &self.alias_map {
-            if symbol.is_none() {
-                add_line!(self, "{} = {},", self.alias_ids[&alias], i);
-                i += 1;
-            }
+        for alias in &self.unique_aliases {
+            add_line!(self, "{} = {},", self.alias_ids[&alias], i);
+            i += 1;
         }
         dedent!(self);
         add_line!(self, "}};");
@@ -297,28 +356,52 @@ impl Generator {
         indent!(self);
         for symbol in self.parse_table.symbols.iter() {
             let name = self.sanitize_string(
-                self.simple_aliases
+                self.default_aliases
                     .get(symbol)
                     .map(|alias| alias.value.as_str())
                     .unwrap_or(self.metadata_for_symbol(*symbol).0),
             );
             add_line!(self, "[{}] = \"{}\",", self.symbol_ids[&symbol], name);
         }
-        for (alias, symbol) in &self.alias_map {
-            if symbol.is_none() {
-                add_line!(
-                    self,
-                    "[{}] = \"{}\",",
-                    self.alias_ids[&alias],
-                    self.sanitize_string(&alias.value)
-                );
-            }
+        for alias in &self.unique_aliases {
+            add_line!(
+                self,
+                "[{}] = \"{}\",",
+                self.alias_ids[&alias],
+                self.sanitize_string(&alias.value)
+            );
         }
         dedent!(self);
         add_line!(self, "}};");
         add_line!(self, "");
     }
 
+    fn add_unique_symbol_map(&mut self) {
+        add_line!(self, "static TSSymbol ts_symbol_map[] = {{");
+        indent!(self);
+        for symbol in &self.parse_table.symbols {
+            add_line!(
+                self,
+                "[{}] = {},",
+                self.symbol_ids[symbol],
+                self.symbol_ids[&self.symbol_map[symbol]],
+            );
+        }
+
+        for alias in &self.unique_aliases {
+            add_line!(
+                self,
+                "[{}] = {},",
+                self.alias_ids[&alias],
+                self.alias_ids[&alias],
+            );
+        }
+
+        dedent!(self);
+        add_line!(self, "}};");
+        add_line!(self, "");
+    }
+
     fn add_field_name_enum(&mut self) {
         add_line!(self, "enum {{");
         indent!(self);
@@ -356,7 +439,7 @@ impl Generator {
         for symbol in &self.parse_table.symbols {
             add_line!(self, "[{}] = {{", self.symbol_ids[&symbol]);
             indent!(self);
-            if let Some(Alias { is_named, .. }) = self.simple_aliases.get(symbol) {
+            if let Some(Alias { is_named, .. }) = self.default_aliases.get(symbol) {
                 add_line!(self, ".visible = true,");
                 add_line!(self, ".named = {},", is_named);
             } else {
@@ -372,6 +455,9 @@ impl Generator {
                     VariableType::Hidden => {
                         add_line!(self, ".visible = false,");
                         add_line!(self, ".named = true,");
+                        if self.syntax_grammar.supertype_symbols.contains(symbol) {
+                            add_line!(self, ".supertype = true,");
+                        }
                     }
                     VariableType::Auxiliary => {
                         add_line!(self, ".visible = false,");
@@ -382,15 +468,13 @@ impl Generator {
             dedent!(self);
             add_line!(self, "}},");
         }
-        for (alias, matching_symbol) in &self.alias_map {
-            if matching_symbol.is_none() {
-                add_line!(self, "[{}] = {{", self.alias_ids[&alias]);
-                indent!(self);
-                add_line!(self, ".visible = true,");
-                add_line!(self, ".named = {},", alias.is_named);
-                dedent!(self);
-                add_line!(self, "}},");
-            }
+        for alias in &self.unique_aliases {
+            add_line!(self, "[{}] = {{", self.alias_ids[&alias]);
+            indent!(self);
+            add_line!(self, ".visible = true,");
+            add_line!(self, ".named = {},", alias.is_named);
+            dedent!(self);
+            add_line!(self, "}},");
         }
         dedent!(self);
         add_line!(self, "}};");
@@ -429,6 +513,53 @@ impl Generator {
         add_line!(self, "");
     }
 
+    fn add_non_terminal_alias_map(&mut self) {
+        let mut alias_ids_by_symbol = HashMap::new();
+        for variable in &self.syntax_grammar.variables {
+            for production in &variable.productions {
+                for step in &production.steps {
+                    if let Some(alias) = &step.alias {
+                        if step.symbol.is_non_terminal()
+                            && Some(alias) != self.default_aliases.get(&step.symbol)
+                        {
+                            if self.symbol_ids.contains_key(&step.symbol) {
+                                if let Some(alias_id) = self.alias_ids.get(&alias) {
+                                    let alias_ids = alias_ids_by_symbol
+                                        .entry(step.symbol)
+                                        .or_insert(Vec::new());
+                                    if let Err(i) = alias_ids.binary_search(&alias_id) {
+                                        alias_ids.insert(i, alias_id);
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        let mut alias_ids_by_symbol = alias_ids_by_symbol.iter().collect::<Vec<_>>();
+        alias_ids_by_symbol.sort_unstable_by_key(|e| e.0);
+
+        add_line!(self, "static uint16_t ts_non_terminal_alias_map[] = {{");
+        indent!(self);
+        for (symbol, alias_ids) in alias_ids_by_symbol {
+            let symbol_id = &self.symbol_ids[symbol];
+            let public_symbol_id = &self.symbol_ids[&self.symbol_map[&symbol]];
+            add_line!(self, "{}, {},", symbol_id, 1 + alias_ids.len());
+            indent!(self);
+            add_line!(self, "{},", public_symbol_id);
+            for alias_id in alias_ids {
+                add_line!(self, "{},", alias_id);
+            }
+            dedent!(self);
+        }
+        add_line!(self, "0,");
+        dedent!(self);
+        add_line!(self, "}};");
+        add_line!(self, "");
+    }
+
     fn add_field_sequences(&mut self) {
         let mut flat_field_maps = vec![];
         let mut next_flat_field_map_index = 0;
@@ -462,7 +593,8 @@ impl Generator {
 
         add_line!(
             self,
-            "static const TSFieldMapSlice ts_field_map_slices[] = {{",
+            "static const TSFieldMapSlice ts_field_map_slices[{}] = {{",
+            self.parse_table.production_infos.len(),
         );
         indent!(self);
         for (production_id, (row_id, length)) in field_map_ids.into_iter().enumerate() {
@@ -504,21 +636,120 @@ impl Generator {
         add_line!(self, "");
     }
 
-    fn add_lex_function(&mut self, name: &str, lex_table: LexTable) {
+    fn add_lex_function(
+        &mut self,
+        name: &str,
+        lex_table: LexTable,
+        extract_helper_functions: bool,
+    ) {
+        let mut ruled_out_chars = HashSet::new();
+        let mut large_character_sets = Vec::<LargeCharacterSetInfo>::new();
+
+        // For each lex state, compute a summary of the code that needs to be
+        // generated.
+        let state_transition_summaries: Vec<Vec<TransitionSummary>> = lex_table
+            .states
+            .iter()
+            .map(|state| {
+                ruled_out_chars.clear();
+
+                // For each state transition, compute the set of character ranges
+                // that need to be checked.
+                state
+                    .advance_actions
+                    .iter()
+                    .map(|(chars, action)| {
+                        let (chars, is_included) = match chars {
+                            CharacterSet::Include(c) => (c, true),
+                            CharacterSet::Exclude(c) => (c, false),
+                        };
+                        let mut call_id = None;
+                        let mut ranges =
+                            CharacterSet::ranges(chars, &ruled_out_chars).collect::<Vec<_>>();
+                        if is_included {
+                            ruled_out_chars.extend(chars.iter().map(|c| *c as u32));
+                        } else {
+                            ranges.insert(0, '\0'..'\0')
+                        }
+
+                        // Record any large character sets so that they can be extracted
+                        // into helper functions, reducing code duplication.
+                        if extract_helper_functions && ranges.len() > LARGE_CHARACTER_RANGE_COUNT {
+                            let char_set_symbol = self
+                                .symbol_for_advance_action(action, &lex_table)
+                                .expect("No symbol for lex state");
+                            let mut count_for_symbol = 0;
+                            for (i, info) in large_character_sets.iter_mut().enumerate() {
+                                if info.ranges == ranges {
+                                    call_id = Some(i);
+                                    info.usage_count += 1;
+                                    break;
+                                }
+                                if info.symbol == char_set_symbol {
+                                    count_for_symbol += 1;
+                                }
+                            }
+                            if call_id.is_none() {
+                                call_id = Some(large_character_sets.len());
+                                large_character_sets.push(LargeCharacterSetInfo {
+                                    symbol: char_set_symbol,
+                                    index: count_for_symbol + 1,
+                                    ranges: ranges.clone(),
+                                    usage_count: 1,
+                                });
+                            }
+                        }
+
+                        TransitionSummary {
+                            is_included,
+                            ranges,
+                            call_id,
+                        }
+                    })
+                    .collect()
+            })
+            .collect();
+
+        // Generate a helper function for each large character set.
+        let mut sorted_large_char_sets: Vec<_> = large_character_sets.iter().map(|e| e).collect();
+        sorted_large_char_sets.sort_unstable_by_key(|info| (info.symbol, info.index));
+        for info in sorted_large_char_sets {
+            if info.usage_count > 1 {
+                add_line!(
+                    self,
+                    "static inline bool {}_character_set_{}(int32_t lookahead) {{",
+                    self.symbol_ids[&info.symbol],
+                    info.index
+                );
+                indent!(self);
+                add_line!(self, "return");
+                indent!(self);
+                add_whitespace!(self);
+                self.add_character_range_conditions(&info.ranges, true, 0);
+                add!(self, ";\n");
+                dedent!(self);
+                dedent!(self);
+                add_line!(self, "}}");
+                add_line!(self, "");
+            }
+        }
+
         add_line!(
             self,
             "static bool {}(TSLexer *lexer, TSStateId state) {{",
             name
         );
         indent!(self);
-        add_line!(self, "START_LEXER();");
-        add_line!(self, "switch (state) {{");
-        indent!(self);
 
+        add_line!(self, "START_LEXER();");
+        add_line!(self, "eof = lexer->eof(lexer);");
+        add_line!(self, "switch (state) {{");
+
+        indent!(self);
         for (i, state) in lex_table.states.into_iter().enumerate() {
             add_line!(self, "case {}:", i);
             indent!(self);
-            self.add_lex_state(state);
+            self.add_lex_state(state, &state_transition_summaries[i], &large_character_sets);
             dedent!(self);
         }
 
@@ -529,85 +760,102 @@ impl Generator {
 
         dedent!(self);
         add_line!(self, "}}");
+
         dedent!(self);
         add_line!(self, "}}");
         add_line!(self, "");
     }
 
-    fn add_lex_state(&mut self, state: LexState) {
+    fn symbol_for_advance_action(
+        &self,
+        action: &AdvanceAction,
+        lex_table: &LexTable,
+    ) -> Option<Symbol> {
+        let mut state_ids = vec![action.state];
+        let mut i = 0;
+        while i < state_ids.len() {
+            let id = state_ids[i];
+            let state = &lex_table.states[id];
+            if let Some(accept) = state.accept_action {
+                return Some(accept);
+            }
+            for (_, action) in &state.advance_actions {
+                if !state_ids.contains(&action.state) {
+                    state_ids.push(action.state);
+                }
+            }
+            i += 1;
+        }
+        return None;
+    }
+
+    fn add_lex_state(
+        &mut self,
+        state: LexState,
+        transition_info: &Vec<TransitionSummary>,
+        large_character_sets: &Vec<LargeCharacterSetInfo>,
+    ) {
         if let Some(accept_action) = state.accept_action {
             add_line!(self, "ACCEPT_TOKEN({});", self.symbol_ids[&accept_action]);
         }
 
-        let mut ruled_out_characters = HashSet::new();
-        for (characters, action) in state.advance_actions {
-            let previous_length = self.buffer.len();
+        if let Some(eof_action) = state.eof_action {
+            add_line!(self, "if (eof) ADVANCE({});", eof_action.state);
+        }
 
+        for (i, (_, action)) in state.advance_actions.into_iter().enumerate() {
+            let transition = &transition_info[i];
             add_whitespace!(self);
-            add!(self, "if (");
-            if self.add_character_set_condition(&characters, &ruled_out_characters) {
-                add!(self, ") ");
-                self.add_advance_action(&action);
-                if let CharacterSet::Include(chars) = characters {
-                    ruled_out_characters.extend(chars.iter().map(|c| *c as u32));
+
+            // If there is a helper function for this transition's character
+            // set, then generate a call to that helper function.
+            if let Some(call_id) = transition.call_id {
+                let info = &large_character_sets[call_id];
+                if info.usage_count > 1 {
+                    add!(self, "if (");
+                    if !transition.is_included {
+                        add!(self, "!");
+                    }
+                    add!(
+                        self,
+                        "{}_character_set_{}(lookahead)) ",
+                        self.symbol_ids[&info.symbol],
+                        info.index
+                    );
+                    self.add_advance_action(&action);
+                    add!(self, "\n");
+                    continue;
                 }
-            } else {
-                self.buffer.truncate(previous_length);
-                self.add_advance_action(&action);
             }
+
+            // Otherwise, generate code to compare the lookahead character
+            // with all of the character ranges.
+            if transition.ranges.len() > 0 {
+                add!(self, "if (");
+                self.add_character_range_conditions(&transition.ranges, transition.is_included, 2);
+                add!(self, ") ");
+            }
+            self.add_advance_action(&action);
             add!(self, "\n");
         }
 
         add_line!(self, "END_STATE();");
     }
 
-    fn add_character_set_condition(
-        &mut self,
-        characters: &CharacterSet,
-        ruled_out_characters: &HashSet<u32>,
-    ) -> bool {
-        match characters {
-            CharacterSet::Include(chars) => {
-                let ranges = Self::get_ranges(chars, ruled_out_characters);
-                self.add_character_range_conditions(ranges, false)
-            }
-            CharacterSet::Exclude(chars) => {
-                let ranges = Some('\0'..'\0')
-                    .into_iter()
-                    .chain(Self::get_ranges(chars, ruled_out_characters));
-                self.add_character_range_conditions(ranges, true)
-            }
-        }
-    }
-
     fn add_character_range_conditions(
         &mut self,
-        ranges: impl Iterator<Item = Range<char>>,
-        is_negated: bool,
+        ranges: &[Range<char>],
+        is_included: bool,
+        indent_count: usize,
     ) -> bool {
-        let line_break = "\n          ";
+        let mut line_break = "\n".to_string();
+        for _ in 0..self.indent_level + indent_count {
+            line_break.push_str("  ");
+        }
+
         let mut did_add = false;
         for range in ranges {
-            if is_negated {
-                if did_add {
-                    add!(self, " &&{}", line_break);
-                }
-                if range.end == range.start {
-                    add!(self, "lookahead != ");
-                    self.add_character(range.start);
-                } else if range.end as u32 == range.start as u32 + 1 {
-                    add!(self, "lookahead != ");
-                    self.add_character(range.start);
-                    add!(self, " &&{}lookahead != ", line_break);
-                    self.add_character(range.end);
-                } else {
-                    add!(self, "(lookahead < ");
-                    self.add_character(range.start);
-                    add!(self, " || ");
-                    self.add_character(range.end);
-                    add!(self, " < lookahead)");
-                }
-            } else {
+            if is_included {
                 if did_add {
                     add!(self, " ||{}", line_break);
                 }
@@ -626,46 +874,31 @@ impl Generator {
                     self.add_character(range.end);
                     add!(self, ")");
                 }
+            } else {
+                if did_add {
+                    add!(self, " &&{}", line_break);
+                }
+                if range.end == range.start {
+                    add!(self, "lookahead != ");
+                    self.add_character(range.start);
+                } else if range.end as u32 == range.start as u32 + 1 {
+                    add!(self, "lookahead != ");
+                    self.add_character(range.start);
+                    add!(self, " &&{}lookahead != ", line_break);
+                    self.add_character(range.end);
+                } else {
+                    add!(self, "(lookahead < ");
+                    self.add_character(range.start);
+                    add!(self, " || ");
+                    self.add_character(range.end);
+                    add!(self, " < lookahead)");
+                }
             }
             did_add = true;
         }
         did_add
     }
 
-    fn get_ranges<'a>(
-        chars: &'a Vec<char>,
-        ruled_out_characters: &'a HashSet<u32>,
-    ) -> impl Iterator<Item = Range<char>> + 'a {
-        let mut prev_range: Option<Range<char>> = None;
-        chars
-            .iter()
-            .map(|c| (*c, false))
-            .chain(Some(('\0', true)))
-            .filter_map(move |(c, done)| {
-                if done {
-                    return prev_range.clone();
-                }
-                if ruled_out_characters.contains(&(c as u32)) {
-                    return None;
-                }
-                if let Some(range) = prev_range.clone() {
-                    let mut prev_range_successor = range.end as u32 + 1;
-                    while prev_range_successor < c as u32 {
-                        if !ruled_out_characters.contains(&prev_range_successor) {
-                            prev_range = Some(c..c);
-                            return Some(range);
-                        }
-                        prev_range_successor += 1;
-                    }
-                    prev_range = Some(range.start..c);
-                    None
-                } else {
-                    prev_range = Some(c..c);
-                    None
-                }
-            })
-    }
-
     fn add_advance_action(&mut self, action: &AdvanceAction) {
         if action.in_main_token {
             add!(self, "ADVANCE({});", action.state);
@@ -678,7 +911,12 @@ impl Generator {
         add_line!(self, "static TSLexMode ts_lex_modes[STATE_COUNT] = {{");
         indent!(self);
         for (i, state) in self.parse_table.states.iter().enumerate() {
-            if state.external_lex_state_id > 0 {
+            if state.is_non_terminal_extra
+                && state.terminal_entries.len() == 1
+                && *state.terminal_entries.iter().next().unwrap().0 == Symbol::end()
+            {
+                add_line!(self, "[{}] = {{(TSStateId)(-1)}},", i,);
+            } else if state.external_lex_state_id > 0 {
                 add_line!(
                     self,
                     "[{}] = {{.lex_state = {}, .external_lex_state = {}}},",
@@ -776,12 +1014,7 @@ impl Generator {
 
         add_line!(
             self,
-            "static uint16_t ts_parse_table[{}][SYMBOL_COUNT] = {{",
-            if self.next_abi {
-                "LARGE_STATE_COUNT"
-            } else {
-                "STATE_COUNT"
-            }
+            "static uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = {{",
         );
         indent!(self);
 
@@ -807,12 +1040,15 @@ impl Generator {
             terminal_entries.sort_unstable_by_key(|e| self.symbol_order.get(e.0));
             nonterminal_entries.sort_unstable_by_key(|k| k.0);
 
-            for (symbol, state_id) in &nonterminal_entries {
+            for (symbol, action) in &nonterminal_entries {
                 add_line!(
                     self,
                     "[{}] = STATE({}),",
                     self.symbol_ids[symbol],
-                    *state_id
+                    match action {
+                        GotoAction::Goto(state) => *state,
+                        GotoAction::ShiftExtra => i,
+                    }
                 );
             }
 
@@ -865,9 +1101,15 @@ impl Generator {
                         .or_default()
                         .push(**symbol);
                 }
-                for (symbol, state_id) in &state.nonterminal_entries {
+                for (symbol, action) in &state.nonterminal_entries {
+                    let state_id = match action {
+                        GotoAction::Goto(i) => *i,
+                        GotoAction::ShiftExtra => {
+                            self.large_state_count + small_state_indices.len() - 1
+                        }
+                    };
                     symbols_by_value
-                        .entry((*state_id, SymbolType::NonTerminal))
+                        .entry((state_id, SymbolType::NonTerminal))
                         .or_default()
                         .push(*symbol);
                 }
@@ -931,7 +1173,7 @@ impl Generator {
         for (i, entry) in parse_table_entries {
             add!(
                 self,
-                "  [{}] = {{.count = {}, .reusable = {}}},",
+                "  [{}] = {{.entry = {{.count = {}, .reusable = {}}}}},",
                 i,
                 entry.actions.len(),
                 entry.reusable
@@ -982,6 +1224,10 @@ impl Generator {
         let language_function_name = format!("tree_sitter_{}", self.language_name);
         let external_scanner_name = format!("{}_external_scanner", language_function_name);
 
+        add_line!(self, "#ifdef __cplusplus");
+        add_line!(self, r#"extern "C" {{"#);
+        add_line!(self, "#endif");
+
         if !self.syntax_grammar.external_tokens.is_empty() {
             add_line!(self, "void *{}_create(void);", external_scanner_name);
             add_line!(self, "void {}_destroy(void *);", external_scanner_name);
@@ -1020,31 +1266,12 @@ impl Generator {
         add_line!(self, ".symbol_count = SYMBOL_COUNT,");
         add_line!(self, ".alias_count = ALIAS_COUNT,");
         add_line!(self, ".token_count = TOKEN_COUNT,");
-
-        if self.next_abi {
-            add_line!(self, ".large_state_count = LARGE_STATE_COUNT,");
-        }
-
+        add_line!(self, ".external_token_count = EXTERNAL_TOKEN_COUNT,");
+        add_line!(self, ".symbol_names = ts_symbol_names,");
         add_line!(self, ".symbol_metadata = ts_symbol_metadata,");
-        add_line!(
-            self,
-            ".parse_table = (const unsigned short *)ts_parse_table,"
-        );
-
-        if self.large_state_count < self.parse_table.states.len() {
-            add_line!(
-                self,
-                ".small_parse_table = (const uint16_t *)ts_small_parse_table,"
-            );
-            add_line!(
-                self,
-                ".small_parse_table_map = (const uint32_t *)ts_small_parse_table_map,"
-            );
-        }
-
+        add_line!(self, ".parse_table = (const uint16_t *)ts_parse_table,");
         add_line!(self, ".parse_actions = ts_parse_actions,");
         add_line!(self, ".lex_modes = ts_lex_modes,");
-        add_line!(self, ".symbol_names = ts_symbol_names,");
 
         if !self.parse_table.production_infos.is_empty() {
             add_line!(
@@ -1052,27 +1279,12 @@ impl Generator {
                 ".alias_sequences = (const TSSymbol *)ts_alias_sequences,"
             );
         }
-
-        add_line!(self, ".field_count = FIELD_COUNT,");
-
-        if !self.field_names.is_empty() {
-            add_line!(self, ".field_names = ts_field_names,");
-            add_line!(
-                self,
-                ".field_map_slices = (const TSFieldMapSlice *)ts_field_map_slices,"
-            );
-            add_line!(
-                self,
-                ".field_map_entries = (const TSFieldMapEntry *)ts_field_map_entries,"
-            );
-        }
-
         add_line!(
             self,
             ".max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH,"
         );
-        add_line!(self, ".lex_fn = ts_lex,");
 
+        add_line!(self, ".lex_fn = ts_lex,");
         if let Some(keyword_capture_token) = self.keyword_capture_token {
             add_line!(self, ".keyword_lex_fn = ts_lex_keywords,");
             add_line!(
@@ -1082,8 +1294,6 @@ impl Generator {
             );
         }
 
-        add_line!(self, ".external_token_count = EXTERNAL_TOKEN_COUNT,");
-
         if !self.syntax_grammar.external_tokens.is_empty() {
             add_line!(self, ".external_scanner = {{");
             indent!(self);
@@ -1097,12 +1307,47 @@ impl Generator {
             dedent!(self);
             add_line!(self, "}},");
         }
-        dedent!(self);
 
+        add_line!(self, ".field_count = FIELD_COUNT,");
+        if !self.field_names.is_empty() {
+            add_line!(
+                self,
+                ".field_map_slices = (const TSFieldMapSlice *)ts_field_map_slices,"
+            );
+            add_line!(
+                self,
+                ".field_map_entries = (const TSFieldMapEntry *)ts_field_map_entries,"
+            );
+            add_line!(self, ".field_names = ts_field_names,");
+        }
+
+        add_line!(self, ".large_state_count = LARGE_STATE_COUNT,");
+        if self.large_state_count < self.parse_table.states.len() {
+            add_line!(
+                self,
+                ".small_parse_table = (const uint16_t *)ts_small_parse_table,"
+            );
+            add_line!(
+                self,
+                ".small_parse_table_map = (const uint32_t *)ts_small_parse_table_map,"
+            );
+        }
+
+        add_line!(self, ".public_symbol_map = ts_symbol_map,");
+
+        if self.next_abi {
+            add_line!(self, ".alias_map = ts_non_terminal_alias_map,");
+            add_line!(self, ".state_count = STATE_COUNT,");
+        }
+
+        dedent!(self);
         add_line!(self, "}};");
         add_line!(self, "return &language;");
         dedent!(self);
         add_line!(self, "}}");
+        add_line!(self, "#ifdef __cplusplus");
+        add_line!(self, "}}");
+        add_line!(self, "#endif");
     }
 
     fn get_parse_action_list_id(
@@ -1255,10 +1500,12 @@ impl Generator {
         for c in name.chars() {
             match c {
                 '\"' => result += "\\\"",
+                '?' => result += "\\?",
                 '\\' => result += "\\\\",
-                '\t' => result += "\\t",
+                '\u{000c}' => result += "\\f",
                 '\n' => result += "\\n",
                 '\r' => result += "\\r",
+                '\t' => result += "\\t",
                 _ => result.push(c),
             }
         }
@@ -1266,18 +1513,20 @@ impl Generator {
     }
 
     fn add_character(&mut self, c: char) {
-        if c.is_ascii() {
-            match c {
-                '\0' => add!(self, "0"),
-                '\'' => add!(self, "'\\''"),
-                '\\' => add!(self, "'\\\\'"),
-                '\t' => add!(self, "'\\t'"),
-                '\n' => add!(self, "'\\n'"),
-                '\r' => add!(self, "'\\r'"),
-                _ => add!(self, "'{}'", c),
+        match c {
+            '\'' => add!(self, "'\\''"),
+            '\\' => add!(self, "'\\\\'"),
+            '\u{000c}' => add!(self, "'\\f'"),
+            '\n' => add!(self, "'\\n'"),
+            '\t' => add!(self, "'\\t'"),
+            '\r' => add!(self, "'\\r'"),
+            _ => {
+                if c == ' ' || c.is_ascii_graphic() {
+                    add!(self, "'{}'", c)
+                } else {
+                    add!(self, "{}", c as u32)
+                }
             }
-        } else {
-            add!(self, "{}", c as u32)
         }
     }
 }
@@ -1294,7 +1543,7 @@ impl Generator {
 ///    for keyword capture, if any.
 /// * `syntax_grammar` - The syntax grammar extracted from the language's grammar
 /// * `lexical_grammar` - The lexical grammar extracted from the language's grammar
-/// * `simple_aliases` - A map describing the global rename rules that should apply.
+/// * `default_aliases` - A map describing the global rename rules that should apply.
 ///    the keys are symbols that are *always* aliased in the same way, and the values
 ///    are the aliases that are applied to those symbols.
 /// * `next_abi` - A boolean indicating whether to opt into the new, unstable parse
@@ -1307,7 +1556,7 @@ pub(crate) fn render_c_code(
     keyword_capture_token: Option<Symbol>,
     syntax_grammar: SyntaxGrammar,
     lexical_grammar: LexicalGrammar,
-    simple_aliases: AliasMap,
+    default_aliases: AliasMap,
     next_abi: bool,
 ) -> String {
     Generator {
@@ -1321,59 +1570,14 @@ pub(crate) fn render_c_code(
         keyword_capture_token,
         syntax_grammar,
         lexical_grammar,
-        simple_aliases,
+        default_aliases,
         symbol_ids: HashMap::new(),
         symbol_order: HashMap::new(),
         alias_ids: HashMap::new(),
-        alias_map: BTreeMap::new(),
+        symbol_map: HashMap::new(),
+        unique_aliases: Vec::new(),
         field_names: Vec::new(),
         next_abi,
     }
     .generate()
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_get_char_ranges() {
-        struct Row {
-            chars: Vec<char>,
-            ruled_out_chars: Vec<char>,
-            expected_ranges: Vec<Range<char>>,
-        }
-
-        let table = [
-            Row {
-                chars: vec!['a'],
-                ruled_out_chars: vec![],
-                expected_ranges: vec!['a'..'a'],
-            },
-            Row {
-                chars: vec!['a', 'b', 'c', 'e', 'z'],
-                ruled_out_chars: vec![],
-                expected_ranges: vec!['a'..'c', 'e'..'e', 'z'..'z'],
-            },
-            Row {
-                chars: vec!['a', 'b', 'c', 'e', 'h', 'z'],
-                ruled_out_chars: vec!['d', 'f', 'g'],
-                expected_ranges: vec!['a'..'h', 'z'..'z'],
-            },
-        ];
-
-        for Row {
-            chars,
-            ruled_out_chars,
-            expected_ranges,
-        } in table.iter()
-        {
-            let ruled_out_chars = ruled_out_chars
-                .into_iter()
-                .map(|c: &char| *c as u32)
-                .collect();
-            let ranges = Generator::get_ranges(chars, &ruled_out_chars).collect::<Vec<_>>();
-            assert_eq!(ranges, *expected_ranges);
-        }
-    }
-}
diff --git a/cli/src/generate/rules.rs b/cli/src/generate/rules.rs
index f3e39ebf..7676d61d 100644
--- a/cli/src/generate/rules.rs
+++ b/cli/src/generate/rules.rs
@@ -1,3 +1,4 @@
+use super::grammars::VariableType;
 use smallbitvec::SmallBitVec;
 use std::collections::HashMap;
 use std::iter::FromIterator;
@@ -139,6 +140,16 @@ impl Rule {
     }
 }
 
+impl Alias {
+    pub fn kind(&self) -> VariableType {
+        if self.is_named {
+            VariableType::Named
+        } else {
+            VariableType::Anonymous
+        }
+    }
+}
+
 #[cfg(test)]
 impl Rule {
     pub fn terminal(index: usize) -> Self {
@@ -366,7 +377,7 @@ impl FromIterator<Symbol> for TokenSet {
 
 fn add_metadata<T: FnOnce(&mut MetadataParams)>(input: Rule, f: T) -> Rule {
     match input {
-        Rule::Metadata { rule, mut params } => {
+        Rule::Metadata { rule, mut params } if !params.is_token => {
             f(&mut params);
             Rule::Metadata { rule, params }
         }
diff --git a/cli/src/generate/tables.rs b/cli/src/generate/tables.rs
index fb593953..15b18a97 100644
--- a/cli/src/generate/tables.rs
+++ b/cli/src/generate/tables.rs
@@ -24,6 +24,12 @@ pub(crate) enum ParseAction {
     },
 }
 
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub(crate) enum GotoAction {
+    Goto(ParseStateId),
+    ShiftExtra,
+}
+
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub(crate) struct ParseTableEntry {
     pub actions: Vec<ParseAction>,
@@ -34,10 +40,11 @@ pub(crate) struct ParseTableEntry {
 pub(crate) struct ParseState {
     pub id: ParseStateId,
     pub terminal_entries: HashMap<Symbol, ParseTableEntry>,
-    pub nonterminal_entries: HashMap<Symbol, ParseStateId>,
+    pub nonterminal_entries: HashMap<Symbol, GotoAction>,
     pub lex_state_id: usize,
     pub external_lex_state_id: usize,
     pub core_id: usize,
+    pub is_non_terminal_extra: bool,
 }
 
 #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
@@ -70,6 +77,7 @@ pub(crate) struct AdvanceAction {
 #[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
 pub(crate) struct LexState {
     pub accept_action: Option<Symbol>,
+    pub eof_action: Option<AdvanceAction>,
     pub advance_actions: Vec<(CharacterSet, AdvanceAction)>,
 }
 
@@ -103,7 +111,13 @@ impl ParseState {
                     _ => None,
                 })
             })
-            .chain(self.nonterminal_entries.iter().map(|(_, state)| *state))
+            .chain(self.nonterminal_entries.iter().filter_map(|(_, action)| {
+                if let GotoAction::Goto(state) = action {
+                    Some(*state)
+                } else {
+                    None
+                }
+            }))
     }
 
     pub fn update_referenced_states<F>(&mut self, mut f: F)
@@ -121,15 +135,18 @@ impl ParseState {
                 }
             }
         }
-        for (symbol, other_state) in &self.nonterminal_entries {
-            let result = f(*other_state, self);
-            if result != *other_state {
-                updates.push((*symbol, 0, result));
+        for (symbol, action) in &self.nonterminal_entries {
+            if let GotoAction::Goto(other_state) = action {
+                let result = f(*other_state, self);
+                if result != *other_state {
+                    updates.push((*symbol, 0, result));
+                }
             }
         }
         for (symbol, action_index, new_state) in updates {
             if symbol.is_non_terminal() {
-                self.nonterminal_entries.insert(symbol, new_state);
+                self.nonterminal_entries
+                    .insert(symbol, GotoAction::Goto(new_state));
             } else {
                 let entry = self.terminal_entries.get_mut(&symbol).unwrap();
                 if let ParseAction::Shift { is_repetition, .. } = entry.actions[action_index] {
diff --git a/cli/src/highlight.rs b/cli/src/highlight.rs
index dff8fd2c..330c9e57 100644
--- a/cli/src/highlight.rs
+++ b/cli/src/highlight.rs
@@ -1,26 +1,58 @@
+use super::util;
 use crate::error::Result;
 use crate::loader::Loader;
-use ansi_term::{Color, Style};
+use ansi_term::Color;
 use lazy_static::lazy_static;
 use serde::ser::SerializeMap;
 use serde::{Deserialize, Deserializer, Serialize, Serializer};
 use serde_json::{json, Value};
 use std::collections::HashMap;
-use std::sync::atomic::{AtomicUsize, Ordering};
-use std::sync::Arc;
+use std::sync::atomic::AtomicUsize;
 use std::time::Instant;
-use std::{fmt, fs, io, path, thread};
-use tree_sitter::{Language, PropertySheet};
-use tree_sitter_highlight::{highlight, highlight_html, Highlight, HighlightEvent, Properties};
+use std::{fs, io, path, str, usize};
+use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent, Highlighter, HtmlRenderer};
+
+pub const HTML_HEADER: &'static str = "
+<!doctype HTML>
+<head>
+  <title>Tree-sitter Highlighting</title>
+  <style>
+    body {
+      font-family: monospace
+    }
+    .line-number {
+      user-select: none;
+      text-align: right;
+      color: rgba(27,31,35,.3);
+      padding: 0 10px;
+    }
+    .line {
+      white-space: pre;
+    }
+  </style>
+</head>
+<body>
+";
+
+pub const HTML_FOOTER: &'static str = "
+</body>
+";
 
 lazy_static! {
     static ref CSS_STYLES_BY_COLOR_ID: Vec<String> =
         serde_json::from_str(include_str!("../vendor/xterm-colors.json")).unwrap();
 }
 
+#[derive(Debug, Default)]
+pub struct Style {
+    pub ansi: ansi_term::Style,
+    pub css: Option<String>,
+}
+
+#[derive(Debug)]
 pub struct Theme {
-    ansi_styles: Vec<Option<Style>>,
-    css_styles: Vec<Option<String>>,
+    pub styles: Vec<Style>,
+    pub highlight_names: Vec<String>,
 }
 
 impl Theme {
@@ -29,14 +61,8 @@ impl Theme {
         Ok(serde_json::from_str(&json).unwrap_or_default())
     }
 
-    fn ansi_style(&self, highlight: Highlight) -> Option<&Style> {
-        self.ansi_styles[highlight as usize].as_ref()
-    }
-
-    fn css_style(&self, highlight: Highlight) -> Option<&str> {
-        self.css_styles[highlight as usize]
-            .as_ref()
-            .map(|s| s.as_str())
+    pub fn default_style(&self) -> Style {
+        Style::default()
     }
 }
 
@@ -45,20 +71,21 @@ impl<'de> Deserialize<'de> for Theme {
     where
         D: Deserializer<'de>,
     {
-        let highlight_count = Highlight::Unknown as usize + 1;
-        let mut ansi_styles = vec![None; highlight_count];
-        let mut css_styles = vec![None; highlight_count];
-        if let Ok(colors) = HashMap::<Highlight, Value>::deserialize(deserializer) {
-            for (highlight, style_value) in colors {
+        let mut styles = Vec::new();
+        let mut highlight_names = Vec::new();
+        if let Ok(colors) = HashMap::<String, Value>::deserialize(deserializer) {
+            highlight_names.reserve(colors.len());
+            styles.reserve(colors.len());
+            for (name, style_value) in colors {
                 let mut style = Style::default();
                 parse_style(&mut style, style_value);
-                ansi_styles[highlight as usize] = Some(style);
-                css_styles[highlight as usize] = Some(style_to_css(style));
+                highlight_names.push(name);
+                styles.push(style);
             }
         }
         Ok(Self {
-            ansi_styles,
-            css_styles,
+            styles,
+            highlight_names,
         })
     }
 }
@@ -68,48 +95,40 @@ impl Serialize for Theme {
     where
         S: Serializer,
     {
-        let entry_count = self.ansi_styles.iter().filter(|i| i.is_some()).count();
-        let mut map = serializer.serialize_map(Some(entry_count))?;
-        for (i, style) in self.ansi_styles.iter().enumerate() {
-            let highlight = Highlight::from_usize(i).unwrap();
-            if highlight == Highlight::Unknown {
-                break;
-            }
-            if let Some(style) = style {
-                let color = style.foreground.map(|color| match color {
-                    Color::Black => json!("black"),
-                    Color::Blue => json!("blue"),
-                    Color::Cyan => json!("cyan"),
-                    Color::Green => json!("green"),
-                    Color::Purple => json!("purple"),
-                    Color::Red => json!("red"),
-                    Color::White => json!("white"),
-                    Color::Yellow => json!("yellow"),
-                    Color::RGB(r, g, b) => json!(format!("#{:x?}{:x?}{:x?}", r, g, b)),
-                    Color::Fixed(n) => json!(n),
-                });
-                if style.is_bold || style.is_italic || style.is_underline {
-                    let mut entry = HashMap::new();
-                    if let Some(color) = color {
-                        entry.insert("color", color);
-                    }
-                    if style.is_bold {
-                        entry.insert("bold", Value::Bool(true));
-                    }
-                    if style.is_italic {
-                        entry.insert("italic", Value::Bool(true));
-                    }
-                    if style.is_underline {
-                        entry.insert("underline", Value::Bool(true));
-                    }
-                    map.serialize_entry(&highlight, &entry)?;
-                } else if let Some(color) = color {
-                    map.serialize_entry(&highlight, &color)?;
-                } else {
-                    map.serialize_entry(&highlight, &Value::Null)?;
+        let mut map = serializer.serialize_map(Some(self.styles.len()))?;
+        for (name, style) in self.highlight_names.iter().zip(&self.styles) {
+            let style = &style.ansi;
+            let color = style.foreground.map(|color| match color {
+                Color::Black => json!("black"),
+                Color::Blue => json!("blue"),
+                Color::Cyan => json!("cyan"),
+                Color::Green => json!("green"),
+                Color::Purple => json!("purple"),
+                Color::Red => json!("red"),
+                Color::White => json!("white"),
+                Color::Yellow => json!("yellow"),
+                Color::RGB(r, g, b) => json!(format!("#{:x?}{:x?}{:x?}", r, g, b)),
+                Color::Fixed(n) => json!(n),
+            });
+            if style.is_bold || style.is_italic || style.is_underline {
+                let mut style_json = HashMap::new();
+                if let Some(color) = color {
+                    style_json.insert("color", color);
                 }
+                if style.is_bold {
+                    style_json.insert("bold", Value::Bool(true));
+                }
+                if style.is_italic {
+                    style_json.insert("italic", Value::Bool(true));
+                }
+                if style.is_underline {
+                    style_json.insert("underline", Value::Bool(true));
+                }
+                map.serialize_entry(&name, &style_json)?;
+            } else if let Some(color) = color {
+                map.serialize_entry(&name, &color)?;
             } else {
-                map.serialize_entry(&highlight, &Value::Null)?;
+                map.serialize_entry(&name, &Value::Null)?;
             }
         }
         map.end()
@@ -149,42 +168,39 @@ impl Default for Theme {
     }
 }
 
-impl fmt::Debug for Theme {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{{")?;
-        let mut first = true;
-        for (i, style) in self.ansi_styles.iter().enumerate() {
-            if let Some(style) = style {
-                let highlight = Highlight::from_usize(i).unwrap();
-                if !first {
-                    write!(f, ", ")?;
-                }
-                write!(f, "{:?}: {:?}", highlight, style)?;
-                first = false;
-            }
-        }
-        write!(f, "}}")?;
-        Ok(())
-    }
-}
-
 fn parse_style(style: &mut Style, json: Value) {
     if let Value::Object(entries) = json {
         for (property_name, value) in entries {
             match property_name.as_str() {
-                "bold" => *style = style.bold(),
-                "italic" => *style = style.italic(),
-                "underline" => *style = style.underline(),
+                "bold" => {
+                    if value == Value::Bool(true) {
+                        style.ansi = style.ansi.bold()
+                    }
+                }
+                "italic" => {
+                    if value == Value::Bool(true) {
+                        style.ansi = style.ansi.italic()
+                    }
+                }
+                "underline" => {
+                    if value == Value::Bool(true) {
+                        style.ansi = style.ansi.underline()
+                    }
+                }
                 "color" => {
                     if let Some(color) = parse_color(value) {
-                        *style = style.fg(color);
+                        style.ansi = style.ansi.fg(color);
                     }
                 }
                 _ => {}
             }
         }
+        style.css = Some(style_to_css(style.ansi));
     } else if let Some(color) = parse_color(json) {
-        *style = style.fg(color);
+        style.ansi = style.ansi.fg(color);
+        style.css = Some(style_to_css(style.ansi));
+    } else {
+        style.css = None;
     }
 }
 
@@ -223,9 +239,12 @@ fn parse_color(json: Value) -> Option<Color> {
     }
 }
 
-fn style_to_css(style: Style) -> String {
+fn style_to_css(style: ansi_term::Style) -> String {
     use std::fmt::Write;
     let mut result = "style='".to_string();
+    if style.is_underline {
+        write!(&mut result, "text-decoration: underline;").unwrap();
+    }
     if style.is_bold {
         write!(&mut result, "font-weight: bold;").unwrap();
     }
@@ -254,163 +273,95 @@ fn color_to_css(color: Color) -> &'static str {
     }
 }
 
-fn cancel_on_stdin() -> Arc<AtomicUsize> {
-    let result = Arc::new(AtomicUsize::new(0));
-    thread::spawn({
-        let flag = result.clone();
-        move || {
-            let mut line = String::new();
-            io::stdin().read_line(&mut line).unwrap();
-            flag.store(1, Ordering::Relaxed);
-        }
-    });
-    result
-}
-
 pub fn ansi(
     loader: &Loader,
     theme: &Theme,
     source: &[u8],
-    language: Language,
-    property_sheet: &PropertySheet<Properties>,
+    config: &HighlightConfiguration,
     print_time: bool,
+    cancellation_flag: Option<&AtomicUsize>,
 ) -> Result<()> {
-    use std::io::Write;
     let stdout = io::stdout();
     let mut stdout = stdout.lock();
-
-    let cancellation_flag = cancel_on_stdin();
     let time = Instant::now();
-    let mut highlight_stack = Vec::new();
-    for event in highlight(
-        source,
-        language,
-        property_sheet,
-        Some(cancellation_flag.as_ref()),
-        |s| language_for_injection_string(loader, s),
-    )
-    .map_err(|e| e.to_string())?
-    {
-        let event = event.map_err(|e| e.to_string())?;
-        match event {
-            HighlightEvent::Source { start, end } => {
-                if let Some(style) = highlight_stack.last().and_then(|s| theme.ansi_style(*s)) {
-                    style.paint(&source[start..end]).write_to(&mut stdout)?;
-                } else {
-                    stdout.write_all(&source[start..end])?;
-                }
-            }
-            HighlightEvent::HighlightStart(h) => {
-                highlight_stack.push(h);
+    let mut highlighter = Highlighter::new();
+
+    let events = highlighter.highlight(config, source, cancellation_flag, |string| {
+        loader.highlight_config_for_injection_string(string)
+    })?;
+
+    let mut style_stack = vec![theme.default_style().ansi];
+    for event in events {
+        match event? {
+            HighlightEvent::HighlightStart(highlight) => {
+                style_stack.push(theme.styles[highlight.0].ansi);
             }
             HighlightEvent::HighlightEnd => {
-                highlight_stack.pop();
+                style_stack.pop();
+            }
+            HighlightEvent::Source { start, end } => {
+                style_stack
+                    .last()
+                    .unwrap()
+                    .paint(&source[start..end])
+                    .write_to(&mut stdout)?;
             }
         }
     }
 
     if print_time {
-        let duration = time.elapsed();
-        let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000;
-        eprintln!("{} ms", duration_ms);
+        eprintln!("Time: {}ms", time.elapsed().as_millis());
     }
 
     Ok(())
 }
 
-pub const HTML_HEADER: &'static str = "
-<!doctype HTML>
-<head>
-  <title>Tree-sitter Highlighting</title>
-  <style>
-    body {
-      font-family: monospace
-    }
-    .line-number {
-      user-select: none;
-      text-align: right;
-      color: rgba(27,31,35,.3);
-      padding: 0 10px;
-    }
-    .line {
-      white-space: pre;
-    }
-  </style>
-</head>
-<body>
-";
-
-pub const HTML_FOOTER: &'static str = "
-</body>
-";
-
 pub fn html(
     loader: &Loader,
     theme: &Theme,
     source: &[u8],
-    language: Language,
-    property_sheet: &PropertySheet<Properties>,
+    config: &HighlightConfiguration,
+    quiet: bool,
+    print_time: bool,
 ) -> Result<()> {
     use std::io::Write;
+
     let stdout = io::stdout();
     let mut stdout = stdout.lock();
-    write!(&mut stdout, "<table>\n")?;
+    let time = Instant::now();
+    let cancellation_flag = util::cancel_on_stdin();
+    let mut highlighter = Highlighter::new();
 
-    let cancellation_flag = cancel_on_stdin();
-    let lines = highlight_html(
-        source,
-        language,
-        property_sheet,
-        Some(cancellation_flag.as_ref()),
-        |s| language_for_injection_string(loader, s),
-        |highlight| {
-            if let Some(css_style) = theme.css_style(highlight) {
-                css_style
-            } else {
-                ""
-            }
-        },
-    )
-    .map_err(|e| e.to_string())?;
-    for (i, line) in lines.into_iter().enumerate() {
-        write!(
-            &mut stdout,
-            "<tr><td class=line-number>{}</td><td class=line>{}</td></tr>\n",
-            i + 1,
-            line
-        )?;
+    let events = highlighter.highlight(config, source, Some(&cancellation_flag), |string| {
+        loader.highlight_config_for_injection_string(string)
+    })?;
+
+    let mut renderer = HtmlRenderer::new();
+    renderer.render(events, source, &move |highlight| {
+        if let Some(css_style) = &theme.styles[highlight.0].css {
+            css_style.as_bytes()
+        } else {
+            "".as_bytes()
+        }
+    })?;
+
+    if !quiet {
+        write!(&mut stdout, "<table>\n")?;
+        for (i, line) in renderer.lines().enumerate() {
+            write!(
+                &mut stdout,
+                "<tr><td class=line-number>{}</td><td class=line>{}</td></tr>\n",
+                i + 1,
+                line
+            )?;
+        }
+
+        write!(&mut stdout, "</table>\n")?;
     }
-    write!(&mut stdout, "</table>\n")?;
+
+    if print_time {
+        eprintln!("Time: {}ms", time.elapsed().as_millis());
+    }
+
     Ok(())
 }
-
-fn language_for_injection_string<'a>(
-    loader: &'a Loader,
-    string: &str,
-) -> Option<(Language, &'a PropertySheet<Properties>)> {
-    match loader.language_configuration_for_injection_string(string) {
-        Err(e) => {
-            eprintln!(
-                "Failed to load language for injection string '{}': {}",
-                string,
-                e.message()
-            );
-            None
-        }
-        Ok(None) => None,
-        Ok(Some((language, configuration))) => {
-            match configuration.highlight_property_sheet(language) {
-                Err(e) => {
-                    eprintln!(
-                        "Failed to load property sheet for injection string '{}': {}",
-                        string,
-                        e.message()
-                    );
-                    None
-                }
-                Ok(None) => None,
-                Ok(Some(sheet)) => Some((language, sheet)),
-            }
-        }
-    }
-}
diff --git a/cli/src/lib.rs b/cli/src/lib.rs
index 33a9904f..e00323b7 100644
--- a/cli/src/lib.rs
+++ b/cli/src/lib.rs
@@ -5,7 +5,11 @@ pub mod highlight;
 pub mod loader;
 pub mod logger;
 pub mod parse;
+pub mod query;
+pub mod query_testing;
+pub mod tags;
 pub mod test;
+pub mod test_highlight;
 pub mod util;
 pub mod wasm;
 pub mod web_ui;
diff --git a/cli/src/loader.rs b/cli/src/loader.rs
index 237718bb..3d5a9377 100644
--- a/cli/src/loader.rs
+++ b/cli/src/loader.rs
@@ -5,12 +5,15 @@ use regex::{Regex, RegexBuilder};
 use serde_derive::Deserialize;
 use std::collections::HashMap;
 use std::io::BufReader;
+use std::ops::Range;
 use std::path::{Path, PathBuf};
 use std::process::Command;
+use std::sync::Mutex;
 use std::time::SystemTime;
 use std::{fs, mem};
-use tree_sitter::{Language, PropertySheet};
-use tree_sitter_highlight::{load_property_sheet, Properties};
+use tree_sitter::{Language, QueryError};
+use tree_sitter_highlight::HighlightConfiguration;
+use tree_sitter_tags::{Error as TagsError, TagsConfiguration};
 
 #[cfg(unix)]
 const DYLIB_EXTENSION: &'static str = "so";
@@ -20,23 +23,31 @@ const DYLIB_EXTENSION: &'static str = "dll";
 
 const BUILD_TARGET: &'static str = env!("BUILD_TARGET");
 
-#[derive(Default)]
-pub struct LanguageConfiguration {
+pub struct LanguageConfiguration<'a> {
     pub scope: Option<String>,
     pub content_regex: Option<Regex>,
     pub _first_line_regex: Option<Regex>,
     pub injection_regex: Option<Regex>,
     pub file_types: Vec<String>,
-    pub highlight_property_sheet_path: Option<PathBuf>,
+    pub root_path: PathBuf,
+    pub highlights_filenames: Option<Vec<String>>,
+    pub injections_filenames: Option<Vec<String>>,
+    pub locals_filenames: Option<Vec<String>>,
+    pub tags_filenames: Option<Vec<String>>,
     language_id: usize,
-    highlight_property_sheet: OnceCell<Option<PropertySheet<Properties>>>,
+    highlight_config: OnceCell<Option<HighlightConfiguration>>,
+    tags_config: OnceCell<Option<TagsConfiguration>>,
+    highlight_names: &'a Mutex<Vec<String>>,
+    use_all_highlight_names: bool,
 }
 
 pub struct Loader {
     parser_lib_path: PathBuf,
     languages_by_id: Vec<(PathBuf, OnceCell<Language>)>,
-    language_configurations: Vec<LanguageConfiguration>,
+    language_configurations: Vec<LanguageConfiguration<'static>>,
     language_configuration_ids_by_file_type: HashMap<String, Vec<usize>>,
+    highlight_names: Box<Mutex<Vec<String>>>,
+    use_all_highlight_names: bool,
 }
 
 unsafe impl Send for Loader {}
@@ -49,9 +60,22 @@ impl Loader {
             languages_by_id: Vec::new(),
             language_configurations: Vec::new(),
             language_configuration_ids_by_file_type: HashMap::new(),
+            highlight_names: Box::new(Mutex::new(Vec::new())),
+            use_all_highlight_names: true,
         }
     }
 
+    pub fn configure_highlights(&mut self, names: &Vec<String>) {
+        self.use_all_highlight_names = false;
+        let mut highlights = self.highlight_names.lock().unwrap();
+        highlights.clear();
+        highlights.extend(names.iter().cloned());
+    }
+
+    pub fn highlight_names(&self) -> Vec<String> {
+        self.highlight_names.lock().unwrap().clone()
+    }
+
     pub fn find_all_languages(&mut self, parser_src_paths: &Vec<PathBuf>) -> Result<()> {
         for parser_container_dir in parser_src_paths.iter() {
             if let Ok(entries) = fs::read_dir(parser_container_dir) {
@@ -134,11 +158,12 @@ impl Loader {
                 if configuration_ids.len() == 1 {
                     configuration = &self.language_configurations[configuration_ids[0]];
                 }
-
                 // If multiple language configurations match, then determine which
                 // one to use by applying the configurations' content regexes.
                 else {
-                    let file_contents = fs::read_to_string(path)?;
+                    let file_contents = fs::read(path)
+                        .map_err(Error::wrap(|| format!("Failed to read path {:?}", path)))?;
+                    let file_contents = String::from_utf8_lossy(&file_contents);
                     let mut best_score = -2isize;
                     let mut best_configuration_id = None;
                     for configuration_id in configuration_ids {
@@ -151,7 +176,6 @@ impl Loader {
                             if let Some(mat) = content_regex.find(&file_contents) {
                                 score = (mat.end() - mat.start()) as isize;
                             }
-
                             // If the content regex does not match, then *penalize* this
                             // language configuration, so that language configurations
                             // without content regexes are preferred over those with
@@ -338,10 +362,63 @@ impl Loader {
         Ok(language)
     }
 
-    fn find_language_configurations_at_path<'a>(
+    pub fn highlight_config_for_injection_string<'a>(
+        &'a self,
+        string: &str,
+    ) -> Option<&'a HighlightConfiguration> {
+        match self.language_configuration_for_injection_string(string) {
+            Err(e) => {
+                eprintln!(
+                    "Failed to load language for injection string '{}': {}",
+                    string,
+                    e.message()
+                );
+                None
+            }
+            Ok(None) => None,
+            Ok(Some((language, configuration))) => match configuration.highlight_config(language) {
+                Err(e) => {
+                    eprintln!(
+                        "Failed to load property sheet for injection string '{}': {}",
+                        string,
+                        e.message()
+                    );
+                    None
+                }
+                Ok(None) => None,
+                Ok(Some(config)) => Some(config),
+            },
+        }
+    }
+
+    pub fn find_language_configurations_at_path<'a>(
         &'a mut self,
         parser_path: &Path,
     ) -> Result<&[LanguageConfiguration]> {
+        #[derive(Deserialize)]
+        #[serde(untagged)]
+        enum PathsJSON {
+            Empty,
+            Single(String),
+            Multiple(Vec<String>),
+        }
+
+        impl Default for PathsJSON {
+            fn default() -> Self {
+                PathsJSON::Empty
+            }
+        }
+
+        impl PathsJSON {
+            fn into_vec(self) -> Option<Vec<String>> {
+                match self {
+                    PathsJSON::Empty => None,
+                    PathsJSON::Single(s) => Some(vec![s]),
+                    PathsJSON::Multiple(s) => Some(s),
+                }
+            }
+        }
+
         #[derive(Deserialize)]
         struct LanguageConfigurationJSON {
             #[serde(default)]
@@ -355,7 +432,14 @@ impl Loader {
             first_line_regex: Option<String>,
             #[serde(rename = "injection-regex")]
             injection_regex: Option<String>,
-            highlights: Option<String>,
+            #[serde(default)]
+            highlights: PathsJSON,
+            #[serde(default)]
+            injections: PathsJSON,
+            #[serde(default)]
+            locals: PathsJSON,
+            #[serde(default)]
+            tags: PathsJSON,
         }
 
         #[derive(Deserialize)]
@@ -394,22 +478,21 @@ impl Loader {
                     });
 
                     let configuration = LanguageConfiguration {
+                        root_path: parser_path.to_path_buf(),
                         scope: config_json.scope,
                         language_id,
                         file_types: config_json.file_types.unwrap_or(Vec::new()),
-                        content_regex: config_json
-                            .content_regex
-                            .and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()),
-                        _first_line_regex: config_json
-                            .first_line_regex
-                            .and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()),
-                        injection_regex: config_json
-                            .injection_regex
-                            .and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()),
-                        highlight_property_sheet_path: config_json
-                            .highlights
-                            .map(|h| parser_path.join(h)),
-                        highlight_property_sheet: OnceCell::new(),
+                        content_regex: Self::regex(config_json.content_regex),
+                        _first_line_regex: Self::regex(config_json.first_line_regex),
+                        injection_regex: Self::regex(config_json.injection_regex),
+                        injections_filenames: config_json.injections.into_vec(),
+                        locals_filenames: config_json.locals.into_vec(),
+                        tags_filenames: config_json.tags.into_vec(),
+                        highlights_filenames: config_json.highlights.into_vec(),
+                        highlight_config: OnceCell::new(),
+                        tags_config: OnceCell::new(),
+                        highlight_names: &*self.highlight_names,
+                        use_all_highlight_names: self.use_all_highlight_names,
                     };
 
                     for file_type in &configuration.file_types {
@@ -419,7 +502,8 @@ impl Loader {
                             .push(self.language_configurations.len());
                     }
 
-                    self.language_configurations.push(configuration);
+                    self.language_configurations
+                        .push(unsafe { mem::transmute(configuration) });
                 }
             }
         }
@@ -427,52 +511,184 @@ impl Loader {
         if self.language_configurations.len() == initial_language_configuration_count
             && parser_path.join("src").join("grammar.json").exists()
         {
-            self.language_configurations.push(LanguageConfiguration {
+            let configuration = LanguageConfiguration {
+                root_path: parser_path.to_owned(),
                 language_id: self.languages_by_id.len(),
+                file_types: Vec::new(),
                 scope: None,
                 content_regex: None,
-                injection_regex: None,
-                file_types: Vec::new(),
                 _first_line_regex: None,
-                highlight_property_sheet_path: None,
-                highlight_property_sheet: OnceCell::new(),
-            });
+                injection_regex: None,
+                injections_filenames: None,
+                locals_filenames: None,
+                highlights_filenames: None,
+                tags_filenames: None,
+                highlight_config: OnceCell::new(),
+                tags_config: OnceCell::new(),
+                highlight_names: &*self.highlight_names,
+                use_all_highlight_names: self.use_all_highlight_names,
+            };
+            self.language_configurations
+                .push(unsafe { mem::transmute(configuration) });
             self.languages_by_id
                 .push((parser_path.to_owned(), OnceCell::new()));
         }
 
         Ok(&self.language_configurations[initial_language_configuration_count..])
     }
+
+    fn regex(pattern: Option<String>) -> Option<Regex> {
+        pattern.and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok())
+    }
 }
 
-impl LanguageConfiguration {
-    pub fn highlight_property_sheet(
-        &self,
-        language: Language,
-    ) -> Result<Option<&PropertySheet<Properties>>> {
-        self.highlight_property_sheet
+impl<'a> LanguageConfiguration<'a> {
+    pub fn highlight_config(&self, language: Language) -> Result<Option<&HighlightConfiguration>> {
+        return self
+            .highlight_config
             .get_or_try_init(|| {
-                if let Some(path) = &self.highlight_property_sheet_path {
-                    let sheet_json = fs::read_to_string(path).map_err(Error::wrap(|| {
-                        format!(
-                            "Failed to read property sheet {:?}",
-                            path.file_name().unwrap()
-                        )
-                    }))?;
-                    let sheet =
-                        load_property_sheet(language, &sheet_json).map_err(Error::wrap(|| {
-                            format!(
-                                "Failed to parse property sheet {:?}",
-                                path.file_name().unwrap()
-                            )
-                        }))?;
-                    Ok(Some(sheet))
-                } else {
+                let (highlights_query, highlight_ranges) =
+                    self.read_queries(&self.highlights_filenames, "highlights.scm")?;
+                let (injections_query, injection_ranges) =
+                    self.read_queries(&self.injections_filenames, "injections.scm")?;
+                let (locals_query, locals_ranges) =
+                    self.read_queries(&self.locals_filenames, "locals.scm")?;
+
+                if highlights_query.is_empty() {
                     Ok(None)
+                } else {
+                    let mut result = HighlightConfiguration::new(
+                        language,
+                        &highlights_query,
+                        &injections_query,
+                        &locals_query,
+                    )
+                    .map_err(|error| {
+                        if error.offset < injections_query.len() {
+                            Self::include_path_in_query_error(
+                                error,
+                                &injection_ranges,
+                                &injections_query,
+                                0,
+                            )
+                        } else if error.offset < injections_query.len() + locals_query.len() {
+                            Self::include_path_in_query_error(
+                                error,
+                                &locals_ranges,
+                                &locals_query,
+                                injections_query.len(),
+                            )
+                        } else {
+                            Self::include_path_in_query_error(
+                                error,
+                                &highlight_ranges,
+                                &highlights_query,
+                                injections_query.len() + locals_query.len(),
+                            )
+                        }
+                    })?;
+                    let mut all_highlight_names = self.highlight_names.lock().unwrap();
+                    if self.use_all_highlight_names {
+                        for capture_name in result.query.capture_names() {
+                            if !all_highlight_names.contains(capture_name) {
+                                all_highlight_names.push(capture_name.clone());
+                            }
+                        }
+                    }
+                    result.configure(&all_highlight_names);
+                    Ok(Some(result))
+                }
+            })
+            .map(Option::as_ref);
+    }
+
+    pub fn tags_config(&self, language: Language) -> Result<Option<&TagsConfiguration>> {
+        self.tags_config
+            .get_or_try_init(|| {
+                let (tags_query, tags_ranges) =
+                    self.read_queries(&self.tags_filenames, "tags.scm")?;
+                let (locals_query, locals_ranges) =
+                    self.read_queries(&self.locals_filenames, "locals.scm")?;
+                if tags_query.is_empty() {
+                    Ok(None)
+                } else {
+                    TagsConfiguration::new(language, &tags_query, &locals_query)
+                        .map(Some)
+                        .map_err(|error| {
+                            if let TagsError::Query(error) = error {
+                                if error.offset < locals_query.len() {
+                                    Self::include_path_in_query_error(
+                                        error,
+                                        &locals_ranges,
+                                        &locals_query,
+                                        0,
+                                    )
+                                } else {
+                                    Self::include_path_in_query_error(
+                                        error,
+                                        &tags_ranges,
+                                        &tags_query,
+                                        locals_query.len(),
+                                    )
+                                }
+                                .into()
+                            } else {
+                                error.into()
+                            }
+                        })
                 }
             })
             .map(Option::as_ref)
     }
+
+    fn include_path_in_query_error<'b>(
+        mut error: QueryError,
+        ranges: &'b Vec<(String, Range<usize>)>,
+        source: &str,
+        start_offset: usize,
+    ) -> (&'b str, QueryError) {
+        let offset_within_section = error.offset - start_offset;
+        let (path, range) = ranges
+            .iter()
+            .find(|(_, range)| range.contains(&offset_within_section))
+            .unwrap();
+        error.offset = offset_within_section - range.start;
+        error.row = source[range.start..offset_within_section]
+            .chars()
+            .filter(|c| *c == '\n')
+            .count();
+        (path.as_ref(), error)
+    }
+
+    fn read_queries(
+        &self,
+        paths: &Option<Vec<String>>,
+        default_path: &str,
+    ) -> Result<(String, Vec<(String, Range<usize>)>)> {
+        let mut query = String::new();
+        let mut path_ranges = Vec::new();
+        if let Some(paths) = paths.as_ref() {
+            for path in paths {
+                let abs_path = self.root_path.join(path);
+                let prev_query_len = query.len();
+                query += &fs::read_to_string(&abs_path).map_err(Error::wrap(|| {
+                    format!("Failed to read query file {:?}", path)
+                }))?;
+                path_ranges.push((path.clone(), prev_query_len..query.len()));
+            }
+        } else {
+            let queries_path = self.root_path.join("queries");
+            let path = queries_path.join(default_path);
+            if path.exists() {
+                query = fs::read_to_string(&path).map_err(Error::wrap(|| {
+                    format!("Failed to read query file {:?}", path)
+                }))?;
+                path_ranges.push((default_path.to_string(), 0..query.len()));
+            }
+        }
+
+        Ok((query, path_ranges))
+    }
 }
 
 fn needs_recompile(
diff --git a/cli/src/main.rs b/cli/src/main.rs
index 84b13da8..36ca5b77 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -1,10 +1,13 @@
 use clap::{App, AppSettings, Arg, SubCommand};
 use error::Error;
+use glob::glob;
 use std::path::Path;
 use std::process::exit;
 use std::{env, fs, u64};
+use tree_sitter::Language;
 use tree_sitter_cli::{
-    config, error, generate, highlight, loader, logger, parse, test, wasm, web_ui,
+    config, error, generate, highlight, loader, logger, parse, query, tags, test, test_highlight,
+    util, wasm, web_ui,
 };
 
 const BUILD_VERSION: &'static str = env!("CARGO_PKG_VERSION");
@@ -38,8 +41,7 @@ fn run() -> error::Result<()> {
                 .about("Generate a parser")
                 .arg(Arg::with_name("grammar-path").index(1))
                 .arg(Arg::with_name("log").long("log"))
-                .arg(Arg::with_name("next-abi").long("next-abi"))
-                .arg(Arg::with_name("properties-only").long("properties"))
+                .arg(Arg::with_name("prev-abi").long("prev-abi"))
                 .arg(
                     Arg::with_name("report-states-for-rule")
                         .long("report-states-for-rule")
@@ -50,19 +52,20 @@ fn run() -> error::Result<()> {
         )
         .subcommand(
             SubCommand::with_name("parse")
-                .about("Parse a file")
+                .about("Parse files")
+                .arg(Arg::with_name("paths-file").long("paths").takes_value(true))
                 .arg(
-                    Arg::with_name("path")
+                    Arg::with_name("paths")
                         .index(1)
                         .multiple(true)
-                        .required(true),
+                        .required(false),
                 )
                 .arg(Arg::with_name("scope").long("scope").takes_value(true))
                 .arg(Arg::with_name("debug").long("debug").short("d"))
                 .arg(Arg::with_name("debug-graph").long("debug-graph").short("D"))
                 .arg(Arg::with_name("quiet").long("quiet").short("q"))
+                .arg(Arg::with_name("stat").long("stat").short("s"))
                 .arg(Arg::with_name("time").long("time").short("t"))
-                .arg(Arg::with_name("allow-cancellation").long("cancel"))
                 .arg(Arg::with_name("timeout").long("timeout").takes_value(true))
                 .arg(
                     Arg::with_name("edits")
@@ -73,6 +76,40 @@ fn run() -> error::Result<()> {
                         .number_of_values(1),
                 ),
         )
+        .subcommand(
+            SubCommand::with_name("query")
+                .about("Search files using a syntax tree query")
+                .arg(Arg::with_name("query-path").index(1).required(true))
+                .arg(Arg::with_name("paths-file").long("paths").takes_value(true))
+                .arg(
+                    Arg::with_name("paths")
+                        .index(2)
+                        .multiple(true)
+                        .required(false),
+                )
+                .arg(
+                    Arg::with_name("byte-range")
+                        .help("The range of byte offsets in which the query will be executed")
+                        .long("byte-range")
+                        .takes_value(true),
+                )
+                .arg(Arg::with_name("scope").long("scope").takes_value(true))
+                .arg(Arg::with_name("captures").long("captures").short("c"))
+                .arg(Arg::with_name("test").long("test")),
+        )
+        .subcommand(
+            SubCommand::with_name("tags")
+                .arg(Arg::with_name("quiet").long("quiet").short("q"))
+                .arg(Arg::with_name("time").long("time").short("t"))
+                .arg(Arg::with_name("scope").long("scope").takes_value(true))
+                .arg(Arg::with_name("paths-file").long("paths").takes_value(true))
+                .arg(
+                    Arg::with_name("paths")
+                        .help("The source file to use")
+                        .index(1)
+                        .multiple(true),
+                ),
+        )
         .subcommand(
             SubCommand::with_name("test")
                 .about("Run a parser's tests")
@@ -89,15 +126,17 @@ fn run() -> error::Result<()> {
         .subcommand(
             SubCommand::with_name("highlight")
                 .about("Highlight a file")
+                .arg(Arg::with_name("paths-file").long("paths").takes_value(true))
                 .arg(
-                    Arg::with_name("path")
+                    Arg::with_name("paths")
                         .index(1)
                         .multiple(true)
-                        .required(true),
+                        .required(false),
                 )
                 .arg(Arg::with_name("scope").long("scope").takes_value(true))
                 .arg(Arg::with_name("html").long("html").short("h"))
-                .arg(Arg::with_name("time").long("time").short("t")),
+                .arg(Arg::with_name("time").long("time").short("t"))
+                .arg(Arg::with_name("quiet").long("quiet").short("q")),
         )
         .subcommand(
             SubCommand::with_name("build-wasm")
@@ -110,7 +149,14 @@ fn run() -> error::Result<()> {
                 .arg(Arg::with_name("path").index(1).multiple(true)),
         )
         .subcommand(
-            SubCommand::with_name("web-ui").about("Test a parser interactively in the browser"),
+            SubCommand::with_name("web-ui")
+                .about("Test a parser interactively in the browser")
+                .arg(
+                    Arg::with_name("quiet")
+                        .long("quiet")
+                        .short("q")
+                        .help("open in default browser"),
+                ),
         )
         .subcommand(
             SubCommand::with_name("dump-languages")
@@ -128,7 +174,6 @@ fn run() -> error::Result<()> {
         config.save(&home_dir)?;
     } else if let Some(matches) = matches.subcommand_matches("generate") {
         let grammar_path = matches.value_of("grammar-path");
-        let properties_only = matches.is_present("properties-only");
         let report_symbol_name = matches.value_of("report-states-for-rule").or_else(|| {
             if matches.is_present("report-states") {
                 Some("")
@@ -139,24 +184,40 @@ fn run() -> error::Result<()> {
         if matches.is_present("log") {
             logger::init();
         }
-        let next_abi = matches.is_present("next-abi");
+        let prev_abi = matches.is_present("prev-abi");
         generate::generate_parser_in_directory(
             &current_dir,
             grammar_path,
-            properties_only,
-            next_abi,
+            !prev_abi,
             report_symbol_name,
         )?;
     } else if let Some(matches) = matches.subcommand_matches("test") {
         let debug = matches.is_present("debug");
         let debug_graph = matches.is_present("debug-graph");
-        let filter = matches.value_of("filter");
         let update = matches.is_present("update");
-        let corpus_path = current_dir.join("corpus");
-        if let Some(language) = loader.languages_at_path(&current_dir)?.first() {
-            test::run_tests_at_path(*language, &corpus_path, debug, debug_graph, filter, update)?;
-        } else {
-            eprintln!("No language found");
+        let filter = matches.value_of("filter");
+        let languages = loader.languages_at_path(&current_dir)?;
+        let language = languages
+            .first()
+            .ok_or_else(|| "No language found".to_string())?;
+        let test_dir = current_dir.join("test");
+
+        // Run the corpus tests. Look for them at two paths: `test/corpus` and `corpus`.
+        let mut test_corpus_dir = test_dir.join("corpus");
+        if !test_corpus_dir.is_dir() {
+            test_corpus_dir = current_dir.join("corpus");
+        }
+        if test_corpus_dir.is_dir() {
+            test::run_tests_at_path(*language, &test_corpus_dir, debug, debug_graph, filter, update)?;
+        }
+
+        // Check that all of the queries are valid.
+        test::check_queries_at_path(*language, &current_dir.join("queries"))?;
+
+        // Run the syntax highlighting tests.
+        let test_highlight_dir = test_dir.join("highlight");
+        if test_highlight_dir.is_dir() {
+            test_highlight::test_highlights(&loader, &test_highlight_dir)?;
         }
     } else if let Some(matches) = matches.subcommand_matches("parse") {
         let debug = matches.is_present("debug");
@@ -166,56 +227,27 @@ fn run() -> error::Result<()> {
         let edits = matches
             .values_of("edits")
             .map_or(Vec::new(), |e| e.collect());
-        let allow_cancellation = matches.is_present("allow-cancellation");
+        let cancellation_flag = util::cancel_on_stdin();
+
         let timeout = matches
             .value_of("timeout")
             .map_or(0, |t| u64::from_str_radix(t, 10).unwrap());
-        loader.find_all_languages(&config.parser_directories)?;
-        let paths = matches
-            .values_of("path")
-            .unwrap()
-            .into_iter()
-            .collect::<Vec<_>>();
+
+        let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
+
         let max_path_length = paths.iter().map(|p| p.chars().count()).max().unwrap();
         let mut has_error = false;
+        loader.find_all_languages(&config.parser_directories)?;
+
+        let should_track_stats = matches.is_present("stat");
+        let mut stats = parse::Stats::default();
+
         for path in paths {
-            let path = Path::new(path);
-            let language = if let Some(scope) = matches.value_of("scope") {
-                if let Some(config) =
-                    loader
-                        .language_configuration_for_scope(scope)
-                        .map_err(Error::wrap(|| {
-                            format!("Failed to load language for scope '{}'", scope)
-                        }))?
-                {
-                    config.0
-                } else {
-                    return Error::err(format!("Unknown scope '{}'", scope));
-                }
-            } else if let Some((lang, _)) = loader
-                .language_configuration_for_file_name(path)
-                .map_err(Error::wrap(|| {
-                    format!(
-                        "Failed to load language for file name {:?}",
-                        path.file_name().unwrap()
-                    )
-                }))?
-            {
-                lang
-            } else if let Some(lang) = loader
-                .languages_at_path(&current_dir)
-                .map_err(Error::wrap(|| {
-                    "Failed to load language in current directory"
-                }))?
-                .first()
-                .cloned()
-            {
-                lang
-            } else {
-                eprintln!("No language found");
-                return Ok(());
-            };
-            has_error |= parse::parse_file_at_path(
+            let path = Path::new(&path);
+            let language =
+                select_language(&mut loader, path, &current_dir, matches.value_of("scope"))?;
+
+            let this_file_errored = parse::parse_file_at_path(
                 language,
                 path,
                 &edits,
@@ -225,36 +257,86 @@ fn run() -> error::Result<()> {
                 timeout,
                 debug,
                 debug_graph,
-                allow_cancellation,
+                Some(&cancellation_flag),
             )?;
+
+            if should_track_stats {
+                stats.total_parses += 1;
+                if !this_file_errored {
+                    stats.successful_parses += 1;
+                }
+            }
+
+            has_error |= this_file_errored;
+        }
+
+        if should_track_stats {
+            println!("{}", stats)
         }
 
         if has_error {
             return Error::err(String::new());
         }
+    } else if let Some(matches) = matches.subcommand_matches("query") {
+        let ordered_captures = matches.values_of("captures").is_some();
+        let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
+        loader.find_all_languages(&config.parser_directories)?;
+        let language = select_language(
+            &mut loader,
+            Path::new(&paths[0]),
+            &current_dir,
+            matches.value_of("scope"),
+        )?;
+        let query_path = Path::new(matches.value_of("query-path").unwrap());
+        let range = matches.value_of("byte-range").map(|br| {
+            let r: Vec<&str> = br.split(":").collect();
+            (r[0].parse().unwrap(), r[1].parse().unwrap())
+        });
+        let should_test = matches.is_present("test");
+        query::query_files_at_paths(
+            language,
+            paths,
+            query_path,
+            ordered_captures,
+            range,
+            should_test,
+        )?;
+    } else if let Some(matches) = matches.subcommand_matches("tags") {
+        loader.find_all_languages(&config.parser_directories)?;
+        let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
+        tags::generate_tags(
+            &loader,
+            matches.value_of("scope"),
+            &paths,
+            matches.is_present("quiet"),
+            matches.is_present("time"),
+        )?;
     } else if let Some(matches) = matches.subcommand_matches("highlight") {
-        let paths = matches.values_of("path").unwrap().into_iter();
-        let html_mode = matches.is_present("html");
-        let time = matches.is_present("time");
+        loader.configure_highlights(&config.theme.highlight_names);
         loader.find_all_languages(&config.parser_directories)?;
 
-        if html_mode {
+        let time = matches.is_present("time");
+        let quiet = matches.is_present("quiet");
+        let html_mode = quiet || matches.is_present("html");
+        let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
+
+        if html_mode && !quiet {
             println!("{}", highlight::HTML_HEADER);
         }
 
-        let language_config;
+        let cancellation_flag = util::cancel_on_stdin();
+
+        let mut lang = None;
         if let Some(scope) = matches.value_of("scope") {
-            language_config = loader.language_configuration_for_scope(scope)?;
-            if language_config.is_none() {
+            lang = loader.language_configuration_for_scope(scope)?;
+            if lang.is_none() {
                 return Error::err(format!("Unknown scope '{}'", scope));
             }
-        } else {
-            language_config = None;
         }
 
         for path in paths {
-            let path = Path::new(path);
-            let (language, language_config) = match language_config {
+            let path = Path::new(&path);
+            let (language, language_config) = match lang {
                 Some(v) => v,
                 None => match loader.language_configuration_for_file_name(path)? {
                     Some(v) => v,
@@ -265,30 +347,56 @@ fn run() -> error::Result<()> {
                 },
             };
 
-            if let Some(sheet) = language_config.highlight_property_sheet(language)? {
+            if let Some(highlight_config) = language_config.highlight_config(language)? {
                 let source = fs::read(path)?;
                 if html_mode {
-                    highlight::html(&loader, &config.theme, &source, language, sheet)?;
+                    highlight::html(
+                        &loader,
+                        &config.theme,
+                        &source,
+                        highlight_config,
+                        quiet,
+                        time,
+                    )?;
                 } else {
-                    highlight::ansi(&loader, &config.theme, &source, language, sheet, time)?;
+                    highlight::ansi(
+                        &loader,
+                        &config.theme,
+                        &source,
+                        highlight_config,
+                        time,
+                        Some(&cancellation_flag),
+                    )?;
                 }
             } else {
-                return Error::err(format!("No syntax highlighting property sheet specified"));
+                eprintln!("No syntax highlighting config found for path {:?}", path);
             }
         }
+
+        if html_mode && !quiet {
+            println!("{}", highlight::HTML_FOOTER);
+        }
     } else if let Some(matches) = matches.subcommand_matches("build-wasm") {
         let grammar_path = current_dir.join(matches.value_of("path").unwrap_or(""));
         wasm::compile_language_to_wasm(&grammar_path, matches.is_present("docker"))?;
-    } else if matches.subcommand_matches("web-ui").is_some() {
-        web_ui::serve(&current_dir);
+    } else if let Some(matches) = matches.subcommand_matches("web-ui") {
+        let open_in_browser = !matches.is_present("quiet");
+        web_ui::serve(&current_dir, open_in_browser);
     } else if matches.subcommand_matches("dump-languages").is_some() {
         loader.find_all_languages(&config.parser_directories)?;
         for (configuration, language_path) in loader.get_all_language_configurations() {
             println!(
-                "scope: {}\nparser: {:?}\nproperties: {:?}\nfile_types: {:?}\ncontent_regex: {:?}\ninjection_regex: {:?}\n",
+                concat!(
+                    "scope: {}\n",
+                    "parser: {:?}\n",
+                    "highlights: {:?}\n",
+                    "file_types: {:?}\n",
+                    "content_regex: {:?}\n",
+                    "injection_regex: {:?}\n",
+                ),
                 configuration.scope.as_ref().unwrap_or(&String::new()),
                 language_path,
-                configuration.highlight_property_sheet_path,
+                configuration.highlights_filenames,
                 configuration.file_types,
                 configuration.content_regex,
                 configuration.injection_regex,
@@ -298,3 +406,107 @@ fn run() -> error::Result<()> {
 
     Ok(())
 }
+
+fn collect_paths<'a>(
+    paths_file: Option<&str>,
+    paths: Option<impl Iterator<Item = &'a str>>,
+) -> error::Result<Vec<String>> {
+    if let Some(paths_file) = paths_file {
+        return Ok(fs::read_to_string(paths_file)
+            .map_err(Error::wrap(|| {
+                format!("Failed to read paths file {}", paths_file)
+            }))?
+            .trim()
+            .split_ascii_whitespace()
+            .map(String::from)
+            .collect::<Vec<_>>());
+    }
+
+    if let Some(paths) = paths {
+        let mut result = Vec::new();
+
+        let mut incorporate_path = |path: &str, positive| {
+            if positive {
+                result.push(path.to_string());
+            } else {
+                if let Some(index) = result.iter().position(|p| p == path) {
+                    result.remove(index);
+                }
+            }
+        };
+
+        for mut path in paths {
+            let mut positive = true;
+            if path.starts_with("!") {
+                positive = false;
+                path = path.trim_start_matches("!");
+            }
+
+            if Path::new(path).exists() {
+                incorporate_path(path, positive);
+            } else {
+                let paths = glob(path)
+                    .map_err(Error::wrap(|| format!("Invalid glob pattern {:?}", path)))?;
+                for path in paths {
+                    if let Some(path) = path?.to_str() {
+                        incorporate_path(path, positive);
+                    }
+                }
+            }
+        }
+
+        if result.is_empty() {
+            Error::err(
+                "No files were found at or matched by the provided pathname/glob".to_string(),
+            )?;
+        }
+
+        return Ok(result);
+    }
+
+    Err(Error::new("Must provide one or more paths".to_string()))
+}
+
+fn select_language(
+    loader: &mut loader::Loader,
+    path: &Path,
+    current_dir: &Path,
+    scope: Option<&str>,
+) -> Result<Language, Error> {
+    if let Some(scope) = scope {
+        if let Some(config) =
+            loader
+                .language_configuration_for_scope(scope)
+                .map_err(Error::wrap(|| {
+                    format!("Failed to load language for scope '{}'", scope)
+                }))?
+        {
+            Ok(config.0)
+        } else {
+            return Error::err(format!("Unknown scope '{}'", scope));
+        }
+    } else if let Some((lang, _)) =
+        loader
+            .language_configuration_for_file_name(path)
+            .map_err(Error::wrap(|| {
+                format!(
+                    "Failed to load language for file name {:?}",
+                    path.file_name().unwrap()
+                )
+            }))?
+    {
+        Ok(lang)
+    } else if let Some(lang) = loader
+        .languages_at_path(&current_dir)
+        .map_err(Error::wrap(|| {
+            "Failed to load language in current directory"
+        }))?
+        .first()
+        .cloned()
+    {
+        Ok(lang)
+    } else {
+        eprintln!("No language found");
+        Error::err("No language found".to_string())
+    }
+}
diff --git a/cli/src/parse.rs b/cli/src/parse.rs
index d1ddb499..4d66df1d 100644
--- a/cli/src/parse.rs
+++ b/cli/src/parse.rs
@@ -2,9 +2,9 @@ use super::error::{Error, Result};
 use super::util;
 use std::io::{self, Write};
 use std::path::Path;
-use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::atomic::AtomicUsize;
 use std::time::Instant;
-use std::{fs, thread, usize};
+use std::{fmt, fs, usize};
 use tree_sitter::{InputEdit, Language, LogType, Parser, Point, Tree};
 
 #[derive(Debug)]
@@ -14,6 +14,22 @@ pub struct Edit {
     pub inserted_text: Vec<u8>,
 }
 
+#[derive(Debug, Default)]
+pub struct Stats {
+    pub successful_parses: usize,
+    pub total_parses: usize,
+}
+
+impl fmt::Display for Stats {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        return writeln!(f, "Total parses: {}; successful parses: {}; failed parses: {}; success percentage: {:.2}%",
+                 self.total_parses,
+                 self.successful_parses,
+                 self.total_parses - self.successful_parses,
+                 (self.successful_parses as f64) / (self.total_parses as f64) * 100.0);
+    }
+}
+
 pub fn parse_file_at_path(
     language: Language,
     path: &Path,
@@ -24,7 +40,7 @@ pub fn parse_file_at_path(
     timeout: u64,
     debug: bool,
     debug_graph: bool,
-    allow_cancellation: bool,
+    cancellation_flag: Option<&AtomicUsize>,
 ) -> Result<bool> {
     let mut _log_session = None;
     let mut parser = Parser::new();
@@ -35,16 +51,7 @@ pub fn parse_file_at_path(
 
     // If the `--cancel` flag was passed, then cancel the parse
     // when the user types a newline.
-    if allow_cancellation {
-        let flag = Box::new(AtomicUsize::new(0));
-        unsafe { parser.set_cancellation_flag(Some(&flag)) };
-        thread::spawn(move || {
-            let mut line = String::new();
-            io::stdin().read_line(&mut line).unwrap();
-            eprintln!("Cancelling");
-            flag.store(1, Ordering::Relaxed);
-        });
-    }
+    unsafe { parser.set_cancellation_flag(cancellation_flag) };
 
     // Set a timeout based on the `--time` flag.
     parser.set_timeout_micros(timeout);
@@ -70,10 +77,18 @@ pub fn parse_file_at_path(
     let mut stdout = stdout.lock();
 
     if let Some(mut tree) = tree {
-        for edit in edits {
+        if debug_graph && !edits.is_empty() {
+            println!("BEFORE:\n{}", String::from_utf8_lossy(&source_code));
+        }
+
+        for (i, edit) in edits.iter().enumerate() {
             let edit = parse_edit_flag(&source_code, edit)?;
             perform_edit(&mut tree, &mut source_code, &edit);
             tree = parser.parse(&source_code, Some(&tree)).unwrap();
+
+            if debug_graph {
+                println!("AFTER {}:\n{}", i, String::from_utf8_lossy(&source_code));
+            }
         }
 
         let duration = time.elapsed();
diff --git a/cli/src/query.rs b/cli/src/query.rs
new file mode 100644
index 00000000..485fdb82
--- /dev/null
+++ b/cli/src/query.rs
@@ -0,0 +1,100 @@
+use super::error::{Error, Result};
+use crate::query_testing;
+use std::fs;
+use std::io::{self, Write};
+use std::path::Path;
+use tree_sitter::{Language, Node, Parser, Query, QueryCursor};
+
+pub fn query_files_at_paths(
+    language: Language,
+    paths: Vec<String>,
+    query_path: &Path,
+    ordered_captures: bool,
+    range: Option<(usize, usize)>,
+    should_test: bool,
+) -> Result<()> {
+    let stdout = io::stdout();
+    let mut stdout = stdout.lock();
+
+    let query_source = fs::read_to_string(query_path).map_err(Error::wrap(|| {
+        format!("Error reading query file {:?}", query_path)
+    }))?;
+    let query = Query::new(language, &query_source)
+        .map_err(|e| Error::new(format!("Query compilation failed: {:?}", e)))?;
+
+    let mut query_cursor = QueryCursor::new();
+    if let Some((beg, end)) = range {
+        query_cursor.set_byte_range(beg, end);
+    }
+
+    let mut parser = Parser::new();
+    parser.set_language(language).map_err(|e| e.to_string())?;
+
+    for path in paths {
+        let mut results = Vec::new();
+
+        writeln!(&mut stdout, "{}", path)?;
+
+        let source_code = fs::read(&path).map_err(Error::wrap(|| {
+            format!("Error reading source file {:?}", path)
+        }))?;
+        let text_callback = |n: Node| &source_code[n.byte_range()];
+        let tree = parser.parse(&source_code, None).unwrap();
+
+        if ordered_captures {
+            for (mat, capture_index) in
+                query_cursor.captures(&query, tree.root_node(), text_callback)
+            {
+                let capture = mat.captures[capture_index];
+                let capture_name = &query.capture_names()[capture.index as usize];
+                writeln!(
+                    &mut stdout,
+                    "    pattern: {}, capture: {}, row: {}, text: {:?}",
+                    mat.pattern_index,
+                    capture_name,
+                    capture.node.start_position().row,
+                    capture.node.utf8_text(&source_code).unwrap_or("")
+                )?;
+                results.push(query_testing::CaptureInfo {
+                    name: capture_name.to_string(),
+                    start: capture.node.start_position(),
+                    end: capture.node.end_position(),
+                });
+            }
+        } else {
+            for m in query_cursor.matches(&query, tree.root_node(), text_callback) {
+                writeln!(&mut stdout, "  pattern: {}", m.pattern_index)?;
+                for capture in m.captures {
+                    let start = capture.node.start_position();
+                    let end = capture.node.end_position();
+                    let capture_name = &query.capture_names()[capture.index as usize];
+                    if end.row == start.row {
+                        writeln!(
+                            &mut stdout,
+                            "    capture: {}, start: {}, text: {:?}",
+                            capture_name,
+                            start,
+                            capture.node.utf8_text(&source_code).unwrap_or("")
+                        )?;
+                    } else {
+                        writeln!(
+                            &mut stdout,
+                            "    capture: {}, start: {}, end: {}",
+                            capture_name, start, end,
+                        )?;
+                    }
+                    results.push(query_testing::CaptureInfo {
+                        name: capture_name.to_string(),
+                        start: capture.node.start_position(),
+                        end: capture.node.end_position(),
+                    });
+                }
+            }
+        }
+        if should_test {
+            query_testing::assert_expected_captures(results, path, &mut parser, language)?
+        }
+    }
+
+    Ok(())
+}
diff --git a/cli/src/query_testing.rs b/cli/src/query_testing.rs
new file mode 100644
index 00000000..ef02ec69
--- /dev/null
+++ b/cli/src/query_testing.rs
@@ -0,0 +1,150 @@
+use crate::error;
+use crate::error::Result;
+use lazy_static::lazy_static;
+use regex::Regex;
+use std::fs;
+use tree_sitter::{Language, Parser, Point};
+
+lazy_static! {
+    static ref CAPTURE_NAME_REGEX: Regex = Regex::new("[\\w_\\-.]+").unwrap();
+}
+
+#[derive(Debug, Eq, PartialEq)]
+pub struct CaptureInfo {
+    pub name: String,
+    pub start: Point,
+    pub end: Point,
+}
+
+#[derive(Debug, PartialEq, Eq)]
+pub struct Assertion {
+    pub position: Point,
+    pub expected_capture_name: String,
+}
+
+/// Parse the given source code, finding all of the comments that contain
+/// highlighting assertions. Return a vector of (position, expected highlight name)
+/// pairs.
+pub fn parse_position_comments(
+    parser: &mut Parser,
+    language: Language,
+    source: &[u8],
+) -> Result<Vec<Assertion>> {
+    let mut result = Vec::new();
+    let mut assertion_ranges = Vec::new();
+
+    // Parse the code.
+    parser.set_included_ranges(&[]).unwrap();
+    parser.set_language(language).unwrap();
+    let tree = parser.parse(source, None).unwrap();
+
+    // Walk the tree, finding comment nodes that contain assertions.
+    let mut ascending = false;
+    let mut cursor = tree.root_node().walk();
+    loop {
+        if ascending {
+            let node = cursor.node();
+
+            // Find every comment node.
+            if node.kind().contains("comment") {
+                if let Ok(text) = node.utf8_text(source) {
+                    let mut position = node.start_position();
+                    if position.row == 0 {
+                        continue;
+                    }
+
+                    // Find the arrow character ("^" or '<-") in the comment. A left arrow
+                    // refers to the column where the comment node starts. An up arrow refers
+                    // to its own column.
+                    let mut has_left_caret = false;
+                    let mut has_arrow = false;
+                    let mut arrow_end = 0;
+                    for (i, c) in text.char_indices() {
+                        arrow_end = i + 1;
+                        if c == '-' && has_left_caret {
+                            has_arrow = true;
+                            break;
+                        }
+                        if c == '^' {
+                            has_arrow = true;
+                            position.column += i;
+                            break;
+                        }
+                        has_left_caret = c == '<';
+                    }
+
+                    // If the comment node contains an arrow and a highlight name, record the
+                    // highlight name and the position.
+                    if let (true, Some(mat)) =
+                        (has_arrow, CAPTURE_NAME_REGEX.find(&text[arrow_end..]))
+                    {
+                        assertion_ranges.push((node.start_position(), node.end_position()));
+                        result.push(Assertion {
+                            position: position,
+                            expected_capture_name: mat.as_str().to_string(),
+                        });
+                    }
+                }
+            }
+
+            // Continue walking the tree.
+            if cursor.goto_next_sibling() {
+                ascending = false;
+            } else if !cursor.goto_parent() {
+                break;
+            }
+        } else if !cursor.goto_first_child() {
+            ascending = true;
+        }
+    }
+
+    // Adjust the row number in each assertion's position to refer to the line of
+    // code *above* the assertion. There can be multiple lines of assertion comments,
+    // so the positions may have to be decremented by more than one row.
+    let mut i = 0;
+    for assertion in result.iter_mut() {
+        loop {
+            let on_assertion_line = assertion_ranges[i..]
+                .iter()
+                .any(|(start, _)| start.row == assertion.position.row);
+            if on_assertion_line {
+                assertion.position.row -= 1;
+            } else {
+                while i < assertion_ranges.len()
+                    && assertion_ranges[i].0.row < assertion.position.row
+                {
+                    i += 1;
+                }
+                break;
+            }
+        }
+    }
+
+    // The assertions can end up out of order due to the line adjustments.
+    result.sort_unstable_by_key(|a| a.position);
+
+    Ok(result)
+}
+
+pub fn assert_expected_captures(
+    infos: Vec<CaptureInfo>,
+    path: String,
+    parser: &mut Parser,
+    language: Language,
+) -> Result<()> {
+    let contents = fs::read_to_string(path)?;
+    let pairs = parse_position_comments(parser, language, contents.as_bytes())?;
+    for info in &infos {
+        if let Some(found) = pairs.iter().find(|p| {
+            p.position.row == info.start.row && p.position >= info.start && p.position < info.end
+        }) {
+            if found.expected_capture_name != info.name && info.name != "name" {
+                Err(error::Error::new(format!(
+                    "Assertion failed: at {}, found {}, expected {}",
+                    info.start, found.expected_capture_name, info.name
+                )))?
+            }
+        }
+    }
+    Ok(())
+}
diff --git a/cli/src/tags.rs b/cli/src/tags.rs
new file mode 100644
index 00000000..802d8d06
--- /dev/null
+++ b/cli/src/tags.rs
@@ -0,0 +1,98 @@
+use super::loader::Loader;
+use super::util;
+use crate::error::{Error, Result};
+use std::io::{self, Write};
+use std::path::Path;
+use std::time::Instant;
+use std::{fs, str};
+use tree_sitter_tags::TagsContext;
+
+pub fn generate_tags(
+    loader: &Loader,
+    scope: Option<&str>,
+    paths: &[String],
+    quiet: bool,
+    time: bool,
+) -> Result<()> {
+    let mut lang = None;
+    if let Some(scope) = scope {
+        lang = loader.language_configuration_for_scope(scope)?;
+        if lang.is_none() {
+            return Error::err(format!("Unknown scope '{}'", scope));
+        }
+    }
+
+    let mut context = TagsContext::new();
+    let cancellation_flag = util::cancel_on_stdin();
+    let stdout = io::stdout();
+    let mut stdout = stdout.lock();
+
+    for path in paths {
+        let path = Path::new(&path);
+        let (language, language_config) = match lang {
+            Some(v) => v,
+            None => match loader.language_configuration_for_file_name(path)? {
+                Some(v) => v,
+                None => {
+                    eprintln!("No language found for path {:?}", path);
+                    continue;
+                }
+            },
+        };
+
+        if let Some(tags_config) = language_config.tags_config(language)? {
+            let indent;
+            if paths.len() > 1 {
+                if !quiet {
+                    writeln!(&mut stdout, "{}", path.to_string_lossy())?;
+                }
+                indent = "\t"
+            } else {
+                indent = "";
+            };
+
+            let source = fs::read(path)?;
+            let t0 = Instant::now();
+            for tag in context
+                .generate_tags(tags_config, &source, Some(&cancellation_flag))?
+                .0
+            {
+                let tag = tag?;
+                if !quiet {
+                    write!(
+                        &mut stdout,
+                        "{}{:<10}\t | {:<8}\t{} {} - {} `{}`",
+                        indent,
+                        str::from_utf8(&source[tag.name_range]).unwrap_or(""),
+                        &tags_config.syntax_type_name(tag.syntax_type_id),
+                        if tag.is_definition { "def" } else { "ref" },
+                        tag.span.start,
+                        tag.span.end,
+                        str::from_utf8(&source[tag.line_range]).unwrap_or(""),
+                    )?;
+                    if let Some(docs) = tag.docs {
+                        if docs.len() > 120 {
+                            write!(&mut stdout, "\t{:?}...", docs.get(0..120).unwrap_or(""))?;
+                        } else {
+                            write!(&mut stdout, "\t{:?}", &docs)?;
+                        }
+                    }
+                    writeln!(&mut stdout, "")?;
+                }
+            }
+
+            if time {
+                writeln!(
+                    &mut stdout,
+                    "{}time: {}ms",
+                    indent,
+                    t0.elapsed().as_millis(),
+                )?;
+            }
+        } else {
+            eprintln!("No tags config found for path {:?}", path);
+        }
+    }
+
+    Ok(())
+}
diff --git a/cli/src/test.rs b/cli/src/test.rs
index 544ec249..50c27220 100644
--- a/cli/src/test.rs
+++ b/cli/src/test.rs
@@ -11,7 +11,7 @@ use std::fs;
 use std::io::{self, Write};
 use std::path::{Path, PathBuf};
 use std::str;
-use tree_sitter::{Language, LogType, Parser};
+use tree_sitter::{Language, LogType, Parser, Query};
 
 lazy_static! {
     static ref HEADER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^===+\r?\n([^=]*)\r?\n===+\r?\n")
@@ -112,6 +112,24 @@ pub fn run_tests_at_path(
     }
 }
 
+pub fn check_queries_at_path(language: Language, path: &Path) -> Result<()> {
+    if path.exists() {
+        for entry in fs::read_dir(path)? {
+            let entry = entry?;
+            let filepath = entry.file_name();
+            let filepath = filepath.to_str().unwrap_or("");
+            let hidden = filepath.starts_with(".");
+            if !hidden {
+                let content = fs::read_to_string(entry.path()).map_err(Error::wrap(|| {
+                    format!("Error reading query file {:?}", entry.file_name())
+                }))?;
+                Query::new(language, &content).map_err(|e| (filepath, e))?;
+            }
+        }
+    }
+    Ok(())
+}
+
 pub fn print_diff_key() {
     println!(
         "\n{} / {}",
diff --git a/cli/src/test_highlight.rs b/cli/src/test_highlight.rs
new file mode 100644
index 00000000..df870bf6
--- /dev/null
+++ b/cli/src/test_highlight.rs
@@ -0,0 +1,275 @@
+use super::error::Result;
+use crate::loader::Loader;
+use crate::query_testing::{parse_position_comments, Assertion};
+use ansi_term::Colour;
+use std::fs;
+use std::path::Path;
+use tree_sitter::Point;
+use tree_sitter_highlight::{Highlight, HighlightConfiguration, HighlightEvent, Highlighter};
+
+pub struct Failure {
+    row: usize,
+    column: usize,
+    expected_highlight: String,
+    actual_highlights: Vec<String>,
+}
+
+impl Failure {
+    pub fn message(&self) -> String {
+        let mut result = format!(
+            "Failure - row: {}, column: {}, expected highlight '{}', actual highlights: ",
+            self.row, self.column, self.expected_highlight
+        );
+        if self.actual_highlights.is_empty() {
+            result += "none.";
+        } else {
+            for (i, actual_highlight) in self.actual_highlights.iter().enumerate() {
+                if i > 0 {
+                    result += ", ";
+                }
+                result += "'";
+                result += actual_highlight;
+                result += "'";
+            }
+        }
+        result
+    }
+}
+
+pub fn test_highlights(loader: &Loader, directory: &Path) -> Result<()> {
+    let mut failed = false;
+    let mut highlighter = Highlighter::new();
+
+    println!("syntax highlighting:");
+    for highlight_test_file in fs::read_dir(directory)? {
+        let highlight_test_file = highlight_test_file?;
+        let test_file_path = highlight_test_file.path();
+        let test_file_name = highlight_test_file.file_name();
+        let (language, language_config) = loader
+            .language_configuration_for_file_name(&test_file_path)?
+            .ok_or_else(|| format!("No language found for path {:?}", test_file_path))?;
+        let highlight_config = language_config
+            .highlight_config(language)?
+            .ok_or_else(|| format!("No highlighting config found for {:?}", test_file_path))?;
+        match test_highlight(
+            &loader,
+            &mut highlighter,
+            highlight_config,
+            fs::read(&test_file_path)?.as_slice(),
+        ) {
+            Ok(assertion_count) => {
+                println!(
+                    "  ✓ {} ({} assertions)",
+                    Colour::Green.paint(test_file_name.to_string_lossy().as_ref()),
+                    assertion_count
+                );
+            }
+            Err(e) => {
+                println!(
+                    "  ✗ {}",
+                    Colour::Red.paint(test_file_name.to_string_lossy().as_ref())
+                );
+                println!("    {}", e.message());
+                failed = true;
+            }
+        }
+    }
+
+    if failed {
+        Err(String::new().into())
+    } else {
+        Ok(())
+    }
+}
+pub fn iterate_assertions(
+    assertions: &Vec<Assertion>,
+    highlights: &Vec<(Point, Point, Highlight)>,
+    highlight_names: &Vec<String>,
+) -> Result<usize> {
+    // Iterate through all of the highlighting assertions, checking each one against the
+    // actual highlights.
+    let mut i = 0;
+    let mut actual_highlights = Vec::<&String>::new();
+    for Assertion {
+        position,
+        expected_capture_name: expected_highlight,
+    } in assertions
+    {
+        let mut passed = false;
+        actual_highlights.clear();
+
+        'highlight_loop: loop {
+            // The assertions are ordered by position, so skip past all of the highlights that
+            // end at or before this assertion's position.
+            if let Some(highlight) = highlights.get(i) {
+                if highlight.1 <= *position {
+                    i += 1;
+                    continue;
+                }
+
+                // Iterate through all of the highlights that start at or before this assertion's,
+                // position, looking for one that matches the assertion.
+                let mut j = i;
+                while let (false, Some(highlight)) = (passed, highlights.get(j)) {
+                    if highlight.0 > *position {
+                        break 'highlight_loop;
+                    }
+
+                    // If the highlight matches the assertion, this test passes. Otherwise,
+                    // add this highlight to the list of actual highlights that span the
+                    // assertion's position, in order to generate an error message in the event
+                    // of a failure.
+                    let highlight_name = &highlight_names[(highlight.2).0];
+                    if *highlight_name == *expected_highlight {
+                        passed = true;
+                        break 'highlight_loop;
+                    } else {
+                        actual_highlights.push(highlight_name);
+                    }
+
+                    j += 1;
+                }
+            } else {
+                break;
+            }
+        }
+
+        if !passed {
+            return Err(Failure {
+                row: position.row,
+                column: position.column,
+                expected_highlight: expected_highlight.clone(),
+                actual_highlights: actual_highlights.into_iter().cloned().collect(),
+            }
+            .into());
+        }
+    }
+
+    Ok(assertions.len())
+}
+
+pub fn test_highlight(
+    loader: &Loader,
+    highlighter: &mut Highlighter,
+    highlight_config: &HighlightConfiguration,
+    source: &[u8],
+) -> Result<usize> {
+    // Highlight the file, and parse out all of the highlighting assertions.
+    let highlight_names = loader.highlight_names();
+    let highlights = get_highlight_positions(loader, highlighter, highlight_config, source)?;
+    let assertions =
+        parse_position_comments(highlighter.parser(), highlight_config.language, source)?;
+
+    iterate_assertions(&assertions, &highlights, &highlight_names)?;
+
+    // Iterate through all of the highlighting assertions, checking each one against the
+    // actual highlights.
+    let mut i = 0;
+    let mut actual_highlights = Vec::<&String>::new();
+    for Assertion {
+        position,
+        expected_capture_name: expected_highlight,
+    } in &assertions
+    {
+        let mut passed = false;
+        actual_highlights.clear();
+
+        'highlight_loop: loop {
+            // The assertions are ordered by position, so skip past all of the highlights that
+            // end at or before this assertion's position.
+            if let Some(highlight) = highlights.get(i) {
+                if highlight.1 <= *position {
+                    i += 1;
+                    continue;
+                }
+
+                // Iterate through all of the highlights that start at or before this assertion's,
+                // position, looking for one that matches the assertion.
+                let mut j = i;
+                while let (false, Some(highlight)) = (passed, highlights.get(j)) {
+                    if highlight.0 > *position {
+                        break 'highlight_loop;
+                    }
+
+                    // If the highlight matches the assertion, this test passes. Otherwise,
+                    // add this highlight to the list of actual highlights that span the
+                    // assertion's position, in order to generate an error message in the event
+                    // of a failure.
+                    let highlight_name = &highlight_names[(highlight.2).0];
+                    if *highlight_name == *expected_highlight {
+                        passed = true;
+                        break 'highlight_loop;
+                    } else {
+                        actual_highlights.push(highlight_name);
+                    }
+
+                    j += 1;
+                }
+            } else {
+                break;
+            }
+        }
+
+        if !passed {
+            return Err(Failure {
+                row: position.row,
+                column: position.column,
+                expected_highlight: expected_highlight.clone(),
+                actual_highlights: actual_highlights.into_iter().cloned().collect(),
+            }
+            .into());
+        }
+    }
+
+    Ok(assertions.len())
+}
+
+pub fn get_highlight_positions(
+    loader: &Loader,
+    highlighter: &mut Highlighter,
+    highlight_config: &HighlightConfiguration,
+    source: &[u8],
+) -> Result<Vec<(Point, Point, Highlight)>> {
+    let mut row = 0;
+    let mut column = 0;
+    let mut byte_offset = 0;
+    let mut was_newline = false;
+    let mut result = Vec::new();
+    let mut highlight_stack = Vec::new();
+    let source = String::from_utf8_lossy(source);
+    let mut char_indices = source.char_indices();
+    for event in highlighter.highlight(highlight_config, source.as_bytes(), None, |string| {
+        loader.highlight_config_for_injection_string(string)
+    })? {
+        match event? {
+            HighlightEvent::HighlightStart(h) => highlight_stack.push(h),
+            HighlightEvent::HighlightEnd => {
+                highlight_stack.pop();
+            }
+            HighlightEvent::Source { start, end } => {
+                let mut start_position = Point::new(row, column);
+                while byte_offset < end {
+                    if byte_offset <= start {
+                        start_position = Point::new(row, column);
+                    }
+                    if let Some((i, c)) = char_indices.next() {
+                        if was_newline {
+                            row += 1;
+                            column = 0;
+                        } else {
+                            column += i - byte_offset;
+                        }
+                        was_newline = c == '\n';
+                        byte_offset = i;
+                    } else {
+                        break;
+                    }
+                }
+                if let Some(highlight) = highlight_stack.last() {
+                    result.push((start_position, Point::new(row, column), *highlight))
+                }
+            }
+        }
+    }
+    Ok(result)
+}
diff --git a/cli/src/tests/corpus_test.rs b/cli/src/tests/corpus_test.rs
index ed6226f2..202dcd70 100644
--- a/cli/src/tests/corpus_test.rs
+++ b/cli/src/tests/corpus_test.rs
@@ -21,7 +21,11 @@ const LANGUAGES: &'static [&'static str] = &[
     "go",
     "html",
     "javascript",
+    "json",
+    "php",
     "python",
+    "ruby",
+    "rust",
 ];
 
 lazy_static! {
@@ -57,7 +61,11 @@ fn test_real_language_corpus_files() {
         }
 
         let language = get_language(language_name);
-        let corpus_dir = grammars_dir.join(language_name).join("corpus");
+        let mut corpus_dir = grammars_dir.join(language_name).join("corpus");
+        if !corpus_dir.is_dir() {
+            corpus_dir = grammars_dir.join(language_name).join("test").join("corpus");
+        }
+
         let error_corpus_file = error_corpus_dir.join(&format!("{}_errors.txt", language_name));
         let main_tests = parse_tests(&corpus_dir).unwrap();
         let error_tests = parse_tests(&error_corpus_file).unwrap_or(TestEntry::default());
@@ -300,7 +308,8 @@ fn check_consistent_sizes(tree: &Tree, input: &Vec<u8>) {
         let mut last_child_end_point = start_point;
         let mut some_child_has_changes = false;
         let mut actual_named_child_count = 0;
-        for child in node.children() {
+        for i in 0..node.child_count() {
+            let child = node.child(i).unwrap();
             assert!(child.start_byte() >= last_child_end_byte);
             assert!(child.start_position() >= last_child_end_point);
             check(child, line_offsets);
diff --git a/cli/src/tests/helpers/allocations.rs b/cli/src/tests/helpers/allocations.rs
index c64762bd..2f89c173 100644
--- a/cli/src/tests/helpers/allocations.rs
+++ b/cli/src/tests/helpers/allocations.rs
@@ -51,6 +51,12 @@ pub fn stop_recording() {
     }
 }
 
+pub fn record(f: impl FnOnce()) {
+    start_recording();
+    f();
+    stop_recording();
+}
+
 fn record_alloc(ptr: *mut c_void) {
     let mut recorder = RECORDER.lock();
     if recorder.enabled {
diff --git a/cli/src/tests/helpers/fixtures.rs b/cli/src/tests/helpers/fixtures.rs
index 4389797e..fc459777 100644
--- a/cli/src/tests/helpers/fixtures.rs
+++ b/cli/src/tests/helpers/fixtures.rs
@@ -2,8 +2,8 @@ use crate::loader::Loader;
 use lazy_static::lazy_static;
 use std::fs;
 use std::path::{Path, PathBuf};
-use tree_sitter::{Language, PropertySheet};
-use tree_sitter_highlight::{load_property_sheet, Properties};
+use tree_sitter::Language;
+use tree_sitter_highlight::HighlightConfiguration;
 
 include!("./dirs.rs");
 
@@ -11,6 +11,10 @@ lazy_static! {
     static ref TEST_LOADER: Loader = Loader::new(SCRATCH_DIR.clone());
 }
 
+pub fn test_loader<'a>() -> &'a Loader {
+    &*TEST_LOADER
+}
+
 pub fn fixtures_dir<'a>() -> &'static Path {
     &FIXTURES_DIR
 }
@@ -21,18 +25,33 @@ pub fn get_language(name: &str) -> Language {
         .unwrap()
 }
 
-pub fn get_property_sheet_json(language_name: &str, sheet_name: &str) -> String {
-    let path = GRAMMARS_DIR
-        .join(language_name)
-        .join("src")
-        .join(sheet_name);
-    fs::read_to_string(path).unwrap()
+pub fn get_language_queries_path(language_name: &str) -> PathBuf {
+    GRAMMARS_DIR.join(language_name).join("queries")
 }
 
-pub fn get_property_sheet(language_name: &str, sheet_name: &str) -> PropertySheet<Properties> {
-    let json = get_property_sheet_json(language_name, sheet_name);
+pub fn get_highlight_config(
+    language_name: &str,
+    injection_query_filename: Option<&str>,
+    highlight_names: &[String],
+) -> HighlightConfiguration {
     let language = get_language(language_name);
-    load_property_sheet(language, &json).unwrap()
+    let queries_path = get_language_queries_path(language_name);
+    let highlights_query = fs::read_to_string(queries_path.join("highlights.scm")).unwrap();
+    let injections_query = if let Some(injection_query_filename) = injection_query_filename {
+        fs::read_to_string(queries_path.join(injection_query_filename)).unwrap()
+    } else {
+        String::new()
+    };
+    let locals_query = fs::read_to_string(queries_path.join("locals.scm")).unwrap_or(String::new());
+    let mut result = HighlightConfiguration::new(
+        language,
+        &highlights_query,
+        &injections_query,
+        &locals_query,
+    )
+    .unwrap();
+    result.configure(highlight_names);
+    result
 }
 
 pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> Language {
diff --git a/cli/src/tests/helpers/mod.rs b/cli/src/tests/helpers/mod.rs
index 2d1ce574..88928d55 100644
--- a/cli/src/tests/helpers/mod.rs
+++ b/cli/src/tests/helpers/mod.rs
@@ -1,5 +1,5 @@
 pub(super) mod allocations;
+pub(super) mod edits;
 pub(super) mod fixtures;
 pub(super) mod random;
 pub(super) mod scope_sequence;
-pub(super) mod edits;
diff --git a/cli/src/tests/highlight_test.rs b/cli/src/tests/highlight_test.rs
index 34b545ff..6b09d64c 100644
--- a/cli/src/tests/highlight_test.rs
+++ b/cli/src/tests/highlight_test.rs
@@ -1,32 +1,92 @@
-use super::helpers::fixtures::{get_language, get_property_sheet, get_property_sheet_json};
+use super::helpers::fixtures::{get_highlight_config, get_language, get_language_queries_path};
 use lazy_static::lazy_static;
 use std::ffi::CString;
-
 use std::sync::atomic::{AtomicUsize, Ordering};
-use std::{ptr, slice, str};
-use tree_sitter::{Language, PropertySheet};
+use std::{fs, ptr, slice, str};
 use tree_sitter_highlight::{
-    c, highlight, highlight_html, Error, Highlight, HighlightEvent, Properties,
+    c, Error, Highlight, HighlightConfiguration, HighlightEvent, Highlighter, HtmlRenderer,
 };
 
 lazy_static! {
-    static ref JS_SHEET: PropertySheet<Properties> =
-        get_property_sheet("javascript", "highlights.json");
-    static ref HTML_SHEET: PropertySheet<Properties> =
-        get_property_sheet("html", "highlights.json");
-    static ref EJS_SHEET: PropertySheet<Properties> =
-        get_property_sheet("embedded-template", "highlights-ejs.json");
-    static ref RUST_SHEET: PropertySheet<Properties> =
-        get_property_sheet("rust", "highlights.json");
-    static ref SCOPE_CLASS_STRINGS: Vec<String> = {
-        let mut result = Vec::new();
-        let mut i = 0;
-        while let Some(highlight) = Highlight::from_usize(i) {
-            result.push(format!("class={:?}", highlight));
-            i += 1;
-        }
-        result
-    };
+    static ref JS_HIGHLIGHT: HighlightConfiguration =
+        get_highlight_config("javascript", Some("injections.scm"), &HIGHLIGHT_NAMES);
+    static ref JSDOC_HIGHLIGHT: HighlightConfiguration =
+        get_highlight_config("jsdoc", None, &HIGHLIGHT_NAMES);
+    static ref HTML_HIGHLIGHT: HighlightConfiguration =
+        get_highlight_config("html", Some("injections.scm"), &HIGHLIGHT_NAMES);
+    static ref EJS_HIGHLIGHT: HighlightConfiguration = get_highlight_config(
+        "embedded-template",
+        Some("injections-ejs.scm"),
+        &HIGHLIGHT_NAMES
+    );
+    static ref RUST_HIGHLIGHT: HighlightConfiguration =
+        get_highlight_config("rust", Some("injections.scm"), &HIGHLIGHT_NAMES);
+    static ref HIGHLIGHT_NAMES: Vec<String> = [
+        "attribute",
+        "carriage-return",
+        "comment",
+        "constant",
+        "constructor",
+        "function.builtin",
+        "function",
+        "embedded",
+        "keyword",
+        "operator",
+        "property.builtin",
+        "property",
+        "punctuation",
+        "punctuation.bracket",
+        "punctuation.delimiter",
+        "punctuation.special",
+        "string",
+        "tag",
+        "type.builtin",
+        "type",
+        "variable.builtin",
+        "variable.parameter",
+        "variable",
+    ]
+    .iter()
+    .cloned()
+    .map(String::from)
+    .collect();
+    static ref HTML_ATTRS: Vec<String> = HIGHLIGHT_NAMES
+        .iter()
+        .map(|s| format!("class={}", s))
+        .collect();
+}
+
+#[test]
+fn test_highlighting_javascript() {
+    let source = "const a = function(b) { return b + c; }";
+    assert_eq!(
+        &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
+        &[vec![
+            ("const", vec!["keyword"]),
+            (" ", vec![]),
+            ("a", vec!["function"]),
+            (" ", vec![]),
+            ("=", vec!["operator"]),
+            (" ", vec![]),
+            ("function", vec!["keyword"]),
+            ("(", vec!["punctuation.bracket"]),
+            ("b", vec!["variable.parameter"]),
+            (")", vec!["punctuation.bracket"]),
+            (" ", vec![]),
+            ("{", vec!["punctuation.bracket"]),
+            (" ", vec![]),
+            ("return", vec!["keyword"]),
+            (" ", vec![]),
+            ("b", vec!["variable.parameter"]),
+            (" ", vec![]),
+            ("+", vec!["operator"]),
+            (" ", vec![]),
+            ("c", vec!["variable"]),
+            (";", vec!["punctuation.delimiter"]),
+            (" ", vec![]),
+            ("}", vec!["punctuation.bracket"]),
+        ]]
+    );
 }
 
 #[test]
@@ -34,57 +94,65 @@ fn test_highlighting_injected_html_in_javascript() {
     let source = vec!["const s = html `<div>${a < b}</div>`;"].join("\n");
 
     assert_eq!(
-        &to_token_vector(&source, get_language("javascript"), &JS_SHEET).unwrap(),
+        &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
         &[vec![
-            ("const", vec![Highlight::Keyword]),
+            ("const", vec!["keyword"]),
             (" ", vec![]),
-            ("s", vec![Highlight::Variable]),
+            ("s", vec!["variable"]),
             (" ", vec![]),
-            ("=", vec![Highlight::Operator]),
+            ("=", vec!["operator"]),
             (" ", vec![]),
-            ("html", vec![Highlight::Function]),
+            ("html", vec!["function"]),
             (" ", vec![]),
-            ("`<", vec![Highlight::String]),
-            ("div", vec![Highlight::String, Highlight::Tag]),
-            (">", vec![Highlight::String]),
-            (
-                "${",
-                vec![
-                    Highlight::String,
-                    Highlight::Embedded,
-                    Highlight::PunctuationSpecial
-                ]
-            ),
-            (
-                "a",
-                vec![Highlight::String, Highlight::Embedded, Highlight::Variable]
-            ),
-            (" ", vec![Highlight::String, Highlight::Embedded]),
-            (
-                "<",
-                vec![Highlight::String, Highlight::Embedded, Highlight::Operator]
-            ),
-            (" ", vec![Highlight::String, Highlight::Embedded]),
-            (
-                "b",
-                vec![Highlight::String, Highlight::Embedded, Highlight::Variable]
-            ),
-            (
-                "}",
-                vec![
-                    Highlight::String,
-                    Highlight::Embedded,
-                    Highlight::PunctuationSpecial
-                ]
-            ),
-            ("</", vec![Highlight::String]),
-            ("div", vec![Highlight::String, Highlight::Tag]),
-            (">`", vec![Highlight::String]),
-            (";", vec![Highlight::PunctuationDelimiter]),
+            ("`", vec!["string"]),
+            ("<", vec!["string", "punctuation.bracket"]),
+            ("div", vec!["string", "tag"]),
+            (">", vec!["string", "punctuation.bracket"]),
+            ("${", vec!["string", "embedded", "punctuation.special"]),
+            ("a", vec!["string", "embedded", "variable"]),
+            (" ", vec!["string", "embedded"]),
+            ("<", vec!["string", "embedded", "operator"]),
+            (" ", vec!["string", "embedded"]),
+            ("b", vec!["string", "embedded", "variable"]),
+            ("}", vec!["string", "embedded", "punctuation.special"]),
+            ("</", vec!["string", "punctuation.bracket"]),
+            ("div", vec!["string", "tag"]),
+            (">", vec!["string", "punctuation.bracket"]),
+            ("`", vec!["string"]),
+            (";", vec!["punctuation.delimiter"]),
         ]]
     );
 }
 
+#[test]
+fn test_highlighting_injected_javascript_in_html_mini() {
+    let source = "<script>const x = new Thing();</script>";
+
+    assert_eq!(
+        &to_token_vector(source, &HTML_HIGHLIGHT).unwrap(),
+        &[vec![
+            ("<", vec!["punctuation.bracket"]),
+            ("script", vec!["tag"]),
+            (">", vec!["punctuation.bracket"]),
+            ("const", vec!["keyword"]),
+            (" ", vec![]),
+            ("x", vec!["variable"]),
+            (" ", vec![]),
+            ("=", vec!["operator"]),
+            (" ", vec![]),
+            ("new", vec!["keyword"]),
+            (" ", vec![]),
+            ("Thing", vec!["constructor"]),
+            ("(", vec!["punctuation.bracket"]),
+            (")", vec!["punctuation.bracket"]),
+            (";", vec!["punctuation.delimiter"]),
+            ("</", vec!["punctuation.bracket"]),
+            ("script", vec!["tag"]),
+            (">", vec!["punctuation.bracket"]),
+        ],]
+    );
+}
+
 #[test]
 fn test_highlighting_injected_javascript_in_html() {
     let source = vec![
@@ -97,38 +165,44 @@ fn test_highlighting_injected_javascript_in_html() {
     .join("\n");
 
     assert_eq!(
-        &to_token_vector(&source, get_language("html"), &HTML_SHEET).unwrap(),
+        &to_token_vector(&source, &HTML_HIGHLIGHT).unwrap(),
         &[
-            vec![("<", vec![]), ("body", vec![Highlight::Tag]), (">", vec![]),],
             vec![
-                ("  <", vec![]),
-                ("script", vec![Highlight::Tag]),
-                (">", vec![]),
+                ("<", vec!["punctuation.bracket"]),
+                ("body", vec!["tag"]),
+                (">", vec!["punctuation.bracket"]),
+            ],
+            vec![
+                ("  ", vec![]),
+                ("<", vec!["punctuation.bracket"]),
+                ("script", vec!["tag"]),
+                (">", vec!["punctuation.bracket"]),
             ],
             vec![
                 ("    ", vec![]),
-                ("const", vec![Highlight::Keyword]),
+                ("const", vec!["keyword"]),
                 (" ", vec![]),
-                ("x", vec![Highlight::Variable]),
+                ("x", vec!["variable"]),
                 (" ", vec![]),
-                ("=", vec![Highlight::Operator]),
+                ("=", vec!["operator"]),
                 (" ", vec![]),
-                ("new", vec![Highlight::Keyword]),
+                ("new", vec!["keyword"]),
                 (" ", vec![]),
-                ("Thing", vec![Highlight::Constructor]),
-                ("(", vec![Highlight::PunctuationBracket]),
-                (")", vec![Highlight::PunctuationBracket]),
-                (";", vec![Highlight::PunctuationDelimiter]),
+                ("Thing", vec!["constructor"]),
+                ("(", vec!["punctuation.bracket"]),
+                (")", vec!["punctuation.bracket"]),
+                (";", vec!["punctuation.delimiter"]),
             ],
             vec![
-                ("  </", vec![]),
-                ("script", vec![Highlight::Tag]),
-                (">", vec![]),
+                ("  ", vec![]),
+                ("</", vec!["punctuation.bracket"]),
+                ("script", vec!["tag"]),
+                (">", vec!["punctuation.bracket"]),
             ],
             vec![
-                ("</", vec![]),
-                ("body", vec![Highlight::Tag]),
-                (">", vec![]),
+                ("</", vec!["punctuation.bracket"]),
+                ("body", vec!["tag"]),
+                (">", vec!["punctuation.bracket"]),
             ],
         ]
     );
@@ -147,13 +221,13 @@ fn test_highlighting_multiline_nodes_to_html() {
     .join("\n");
 
     assert_eq!(
-        &to_html(&source, get_language("javascript"), &JS_SHEET,).unwrap(),
+        &to_html(&source, &JS_HIGHLIGHT).unwrap(),
         &[
-            "<span class=Keyword>const</span> <span class=Constant>SOMETHING</span> <span class=Operator>=</span> <span class=String>`</span>\n".to_string(),
-            "<span class=String>  one <span class=Embedded><span class=PunctuationSpecial>${</span></span></span>\n".to_string(),
-            "<span class=String><span class=Embedded>    <span class=Function>two</span><span class=PunctuationBracket>(</span><span class=PunctuationBracket>)</span></span></span>\n".to_string(),
-            "<span class=String><span class=Embedded>  <span class=PunctuationSpecial>}</span></span> three</span>\n".to_string(),
-            "<span class=String>`</span>\n".to_string(),
+            "<span class=keyword>const</span> <span class=constant>SOMETHING</span> <span class=operator>=</span> <span class=string>`</span>\n".to_string(),
+            "<span class=string>  one <span class=embedded><span class=punctuation.special>${</span></span></span>\n".to_string(),
+            "<span class=string><span class=embedded>    <span class=function>two</span><span class=punctuation.bracket>(</span><span class=punctuation.bracket>)</span></span></span>\n".to_string(),
+            "<span class=string><span class=embedded>  <span class=punctuation.special>}</span></span> three</span>\n".to_string(),
+            "<span class=string>`</span>\n".to_string(),
         ]
     );
 }
@@ -169,51 +243,51 @@ fn test_highlighting_with_local_variable_tracking() {
     .join("\n");
 
     assert_eq!(
-        &to_token_vector(&source, get_language("javascript"), &JS_SHEET).unwrap(),
+        &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
         &[
             vec![
-                ("module", vec![Highlight::VariableBuiltin]),
-                (".", vec![Highlight::PunctuationDelimiter]),
-                ("exports", vec![Highlight::Property]),
+                ("module", vec!["variable.builtin"]),
+                (".", vec!["punctuation.delimiter"]),
+                ("exports", vec!["function"]),
                 (" ", vec![]),
-                ("=", vec![Highlight::Operator]),
+                ("=", vec!["operator"]),
                 (" ", vec![]),
-                ("function", vec![Highlight::Keyword]),
+                ("function", vec!["keyword"]),
                 (" ", vec![]),
-                ("a", vec![Highlight::Function]),
-                ("(", vec![Highlight::PunctuationBracket]),
-                ("b", vec![Highlight::VariableParameter]),
-                (")", vec![Highlight::PunctuationBracket]),
+                ("a", vec!["function"]),
+                ("(", vec!["punctuation.bracket"]),
+                ("b", vec!["variable.parameter"]),
+                (")", vec!["punctuation.bracket"]),
                 (" ", vec![]),
-                ("{", vec![Highlight::PunctuationBracket])
+                ("{", vec!["punctuation.bracket"])
             ],
             vec![
                 ("  ", vec![]),
-                ("const", vec![Highlight::Keyword]),
+                ("const", vec!["keyword"]),
                 (" ", vec![]),
-                ("module", vec![Highlight::Variable]),
+                ("module", vec!["variable"]),
                 (" ", vec![]),
-                ("=", vec![Highlight::Operator]),
+                ("=", vec!["operator"]),
                 (" ", vec![]),
-                ("c", vec![Highlight::Variable]),
-                (";", vec![Highlight::PunctuationDelimiter])
+                ("c", vec!["variable"]),
+                (";", vec!["punctuation.delimiter"])
             ],
             vec![
                 ("  ", vec![]),
-                ("console", vec![Highlight::VariableBuiltin]),
-                (".", vec![Highlight::PunctuationDelimiter]),
-                ("log", vec![Highlight::Function]),
-                ("(", vec![Highlight::PunctuationBracket]),
+                ("console", vec!["variable.builtin"]),
+                (".", vec!["punctuation.delimiter"]),
+                ("log", vec!["function"]),
+                ("(", vec!["punctuation.bracket"]),
                 // Not a builtin, because `module` was defined as a variable above.
-                ("module", vec![Highlight::Variable]),
-                (",", vec![Highlight::PunctuationDelimiter]),
+                ("module", vec!["variable"]),
+                (",", vec!["punctuation.delimiter"]),
                 (" ", vec![]),
                 // A parameter, because `b` was defined as a parameter above.
-                ("b", vec![Highlight::VariableParameter]),
-                (")", vec![Highlight::PunctuationBracket]),
-                (";", vec![Highlight::PunctuationDelimiter]),
+                ("b", vec!["variable.parameter"]),
+                (")", vec!["punctuation.bracket"]),
+                (";", vec!["punctuation.delimiter"]),
             ],
-            vec![("}", vec![Highlight::PunctuationBracket])]
+            vec![("}", vec!["punctuation.bracket"])]
         ],
     );
 }
@@ -234,41 +308,95 @@ fn test_highlighting_empty_lines() {
     .join("\n");
 
     assert_eq!(
-        &to_html(&source, get_language("javascript"), &JS_SHEET,).unwrap(),
+        &to_html(&source, &JS_HIGHLIGHT,).unwrap(),
         &[
-            "<span class=Keyword>class</span> <span class=Constructor>A</span> <span class=PunctuationBracket>{</span>\n".to_string(),
+            "<span class=keyword>class</span> <span class=constructor>A</span> <span class=punctuation.bracket>{</span>\n".to_string(),
             "\n".to_string(),
-            "  <span class=Function>b</span><span class=PunctuationBracket>(</span><span class=VariableParameter>c</span><span class=PunctuationBracket>)</span> <span class=PunctuationBracket>{</span>\n".to_string(),
+            "  <span class=function>b</span><span class=punctuation.bracket>(</span><span class=variable.parameter>c</span><span class=punctuation.bracket>)</span> <span class=punctuation.bracket>{</span>\n".to_string(),
             "\n".to_string(),
-            "    <span class=Function>d</span><span class=PunctuationBracket>(</span><span class=Variable>e</span><span class=PunctuationBracket>)</span>\n".to_string(),
+            "    <span class=function>d</span><span class=punctuation.bracket>(</span><span class=variable>e</span><span class=punctuation.bracket>)</span>\n".to_string(),
             "\n".to_string(),
-            "  <span class=PunctuationBracket>}</span>\n".to_string(),
+            "  <span class=punctuation.bracket>}</span>\n".to_string(),
             "\n".to_string(),
-            "<span class=PunctuationBracket>}</span>\n".to_string(),
+            "<span class=punctuation.bracket>}</span>\n".to_string(),
         ]
     );
 }
 
 #[test]
-fn test_highlighting_ejs() {
-    let source = vec!["<div><% foo() %></div>"].join("\n");
+fn test_highlighting_carriage_returns() {
+    let source = "a = \"a\rb\"\r\nb\r";
 
     assert_eq!(
-        &to_token_vector(&source, get_language("embedded-template"), &EJS_SHEET).unwrap(),
+        &to_html(&source, &JS_HIGHLIGHT).unwrap(),
+        &[
+            "<span class=variable>a</span> <span class=operator>=</span> <span class=string>&quot;a<span class=carriage-return></span>b&quot;</span>\n",
+            "<span class=variable>b</span>\n",
+        ],
+    );
+}
+
+#[test]
+fn test_highlighting_ejs_with_html_and_javascript() {
+    let source = vec!["<div><% foo() %></div><script> bar() </script>"].join("\n");
+
+    assert_eq!(
+        &to_token_vector(&source, &EJS_HIGHLIGHT).unwrap(),
         &[[
-            ("<", vec![]),
-            ("div", vec![Highlight::Tag]),
-            (">", vec![]),
-            ("<%", vec![Highlight::Keyword]),
+            ("<", vec!["punctuation.bracket"]),
+            ("div", vec!["tag"]),
+            (">", vec!["punctuation.bracket"]),
+            ("<%", vec!["keyword"]),
             (" ", vec![]),
-            ("foo", vec![Highlight::Function]),
-            ("(", vec![Highlight::PunctuationBracket]),
-            (")", vec![Highlight::PunctuationBracket]),
+            ("foo", vec!["function"]),
+            ("(", vec!["punctuation.bracket"]),
+            (")", vec!["punctuation.bracket"]),
             (" ", vec![]),
-            ("%>", vec![Highlight::Keyword]),
-            ("</", vec![]),
-            ("div", vec![Highlight::Tag]),
-            (">", vec![])
+            ("%>", vec!["keyword"]),
+            ("</", vec!["punctuation.bracket"]),
+            ("div", vec!["tag"]),
+            (">", vec!["punctuation.bracket"]),
+            ("<", vec!["punctuation.bracket"]),
+            ("script", vec!["tag"]),
+            (">", vec!["punctuation.bracket"]),
+            (" ", vec![]),
+            ("bar", vec!["function"]),
+            ("(", vec!["punctuation.bracket"]),
+            (")", vec!["punctuation.bracket"]),
+            (" ", vec![]),
+            ("</", vec!["punctuation.bracket"]),
+            ("script", vec!["tag"]),
+            (">", vec!["punctuation.bracket"]),
+        ]],
+    );
+}
+
+#[test]
+fn test_highlighting_javascript_with_jsdoc() {
+    // Regression test: the middle comment has no highlights. This should not prevent
+    // later injections from highlighting properly.
+    let source = vec!["a /* @see a */ b; /* nothing */ c; /* @see b */"].join("\n");
+
+    assert_eq!(
+        &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
+        &[[
+            ("a", vec!["variable"]),
+            (" ", vec![]),
+            ("/* ", vec!["comment"]),
+            ("@see", vec!["comment", "keyword"]),
+            (" a */", vec!["comment"]),
+            (" ", vec![]),
+            ("b", vec!["variable"]),
+            (";", vec!["punctuation.delimiter"]),
+            (" ", vec![]),
+            ("/* nothing */", vec!["comment"]),
+            (" ", vec![]),
+            ("c", vec!["variable"]),
+            (";", vec!["punctuation.delimiter"]),
+            (" ", vec![]),
+            ("/* ", vec!["comment"]),
+            ("@see", vec!["comment", "keyword"]),
+            (" b */", vec!["comment"])
         ]],
     );
 }
@@ -278,33 +406,36 @@ fn test_highlighting_with_content_children_included() {
     let source = vec!["assert!(", "    a.b.c() < D::e::<F>()", ");"].join("\n");
 
     assert_eq!(
-        &to_token_vector(&source, get_language("rust"), &RUST_SHEET).unwrap(),
+        &to_token_vector(&source, &RUST_HIGHLIGHT).unwrap(),
         &[
             vec![
-                ("assert", vec![Highlight::Function]),
-                ("!", vec![Highlight::Function]),
-                ("(", vec![Highlight::PunctuationBracket]),
+                ("assert", vec!["function"]),
+                ("!", vec!["function"]),
+                ("(", vec!["punctuation.bracket"]),
             ],
             vec![
                 ("    a", vec![]),
-                (".", vec![Highlight::PunctuationDelimiter]),
-                ("b", vec![Highlight::Property]),
-                (".", vec![Highlight::PunctuationDelimiter]),
-                ("c", vec![Highlight::Function]),
-                ("(", vec![Highlight::PunctuationBracket]),
-                (")", vec![Highlight::PunctuationBracket]),
+                (".", vec!["punctuation.delimiter"]),
+                ("b", vec!["property"]),
+                (".", vec!["punctuation.delimiter"]),
+                ("c", vec!["function"]),
+                ("(", vec!["punctuation.bracket"]),
+                (")", vec!["punctuation.bracket"]),
                 (" < ", vec![]),
-                ("D", vec![Highlight::Type]),
-                ("::", vec![Highlight::PunctuationDelimiter]),
-                ("e", vec![Highlight::Function]),
-                ("::", vec![Highlight::PunctuationDelimiter]),
-                ("<", vec![Highlight::PunctuationBracket]),
-                ("F", vec![Highlight::Type]),
-                (">", vec![Highlight::PunctuationBracket]),
-                ("(", vec![Highlight::PunctuationBracket]),
-                (")", vec![Highlight::PunctuationBracket]),
+                ("D", vec!["type"]),
+                ("::", vec!["punctuation.delimiter"]),
+                ("e", vec!["function"]),
+                ("::", vec!["punctuation.delimiter"]),
+                ("<", vec!["punctuation.bracket"]),
+                ("F", vec!["type"]),
+                (">", vec!["punctuation.bracket"]),
+                ("(", vec!["punctuation.bracket"]),
+                (")", vec!["punctuation.bracket"]),
             ],
-            vec![(")", vec![Highlight::PunctuationBracket]), (";", vec![]),]
+            vec![
+                (")", vec!["punctuation.bracket"]),
+                (";", vec!["punctuation.delimiter"]),
+            ]
         ],
     );
 }
@@ -325,73 +456,97 @@ fn test_highlighting_cancellation() {
         test_language_for_injection_string(name)
     };
 
-    // Constructing the highlighter, which eagerly parses the outer document,
-    // should not fail.
-    let highlighter = highlight(
-        source.as_bytes(),
-        get_language("html"),
-        &HTML_SHEET,
-        Some(&cancellation_flag),
-        injection_callback,
-    )
-    .unwrap();
+    // The initial `highlight` call, which eagerly parses the outer document, should not fail.
+    let mut highlighter = Highlighter::new();
+    let events = highlighter
+        .highlight(
+            &HTML_HIGHLIGHT,
+            source.as_bytes(),
+            Some(&cancellation_flag),
+            injection_callback,
+        )
+        .unwrap();
 
-    // Iterating the scopes should not panic. It should return an error
-    // once the cancellation is detected.
-    for event in highlighter {
+    // Iterating the scopes should not panic. It should return an error once the
+    // cancellation is detected.
+    for event in events {
         if let Err(e) = event {
             assert_eq!(e, Error::Cancelled);
             return;
         }
     }
+
     panic!("Expected an error while iterating highlighter");
 }
 
 #[test]
 fn test_highlighting_via_c_api() {
-    let js_lang = get_language("javascript");
-    let html_lang = get_language("html");
-    let js_sheet = get_property_sheet_json("javascript", "highlights.json");
-    let js_sheet = c_string(&js_sheet);
-    let html_sheet = get_property_sheet_json("html", "highlights.json");
-    let html_sheet = c_string(&html_sheet);
+    let highlights = vec![
+        "class=tag\0",
+        "class=function\0",
+        "class=string\0",
+        "class=keyword\0",
+    ];
+    let highlight_names = highlights
+        .iter()
+        .map(|h| h["class=".len()..].as_ptr() as *const i8)
+        .collect::<Vec<_>>();
+    let highlight_attrs = highlights
+        .iter()
+        .map(|h| h.as_bytes().as_ptr() as *const i8)
+        .collect::<Vec<_>>();
+    let highlighter = c::ts_highlighter_new(
+        &highlight_names[0] as *const *const i8,
+        &highlight_attrs[0] as *const *const i8,
+        highlights.len() as u32,
+    );
 
-    let class_tag = c_string("class=tag");
-    let class_function = c_string("class=function");
-    let class_string = c_string("class=string");
-    let class_keyword = c_string("class=keyword");
-
-    let js_scope_name = c_string("source.js");
-    let html_scope_name = c_string("text.html.basic");
-    let injection_regex = c_string("^(javascript|js)$");
     let source_code = c_string("<script>\nconst a = b('c');\nc.d();\n</script>");
 
-    let attribute_strings = &mut [ptr::null(); Highlight::Unknown as usize + 1];
-    attribute_strings[Highlight::Tag as usize] = class_tag.as_ptr();
-    attribute_strings[Highlight::String as usize] = class_string.as_ptr();
-    attribute_strings[Highlight::Keyword as usize] = class_keyword.as_ptr();
-    attribute_strings[Highlight::Function as usize] = class_function.as_ptr();
+    let js_scope = c_string("source.js");
+    let js_injection_regex = c_string("^javascript");
+    let language = get_language("javascript");
+    let queries = get_language_queries_path("javascript");
+    let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap();
+    let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap();
+    let locals_query = fs::read_to_string(queries.join("locals.scm")).unwrap();
+    c::ts_highlighter_add_language(
+        highlighter,
+        js_scope.as_ptr(),
+        js_injection_regex.as_ptr(),
+        language,
+        highlights_query.as_ptr() as *const i8,
+        injections_query.as_ptr() as *const i8,
+        locals_query.as_ptr() as *const i8,
+        highlights_query.len() as u32,
+        injections_query.len() as u32,
+        locals_query.len() as u32,
+    );
+
+    let html_scope = c_string("text.html.basic");
+    let html_injection_regex = c_string("^html");
+    let language = get_language("html");
+    let queries = get_language_queries_path("html");
+    let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap();
+    let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap();
+    c::ts_highlighter_add_language(
+        highlighter,
+        html_scope.as_ptr(),
+        html_injection_regex.as_ptr(),
+        language,
+        highlights_query.as_ptr() as *const i8,
+        injections_query.as_ptr() as *const i8,
+        ptr::null(),
+        highlights_query.len() as u32,
+        injections_query.len() as u32,
+        0,
+    );
 
-    let highlighter = c::ts_highlighter_new(attribute_strings.as_ptr());
     let buffer = c::ts_highlight_buffer_new();
 
-    c::ts_highlighter_add_language(
-        highlighter,
-        html_scope_name.as_ptr(),
-        html_lang,
-        html_sheet.as_ptr(),
-        ptr::null_mut(),
-    );
-    c::ts_highlighter_add_language(
-        highlighter,
-        js_scope_name.as_ptr(),
-        js_lang,
-        js_sheet.as_ptr(),
-        injection_regex.as_ptr(),
-    );
     c::ts_highlighter_highlight(
         highlighter,
-        html_scope_name.as_ptr(),
+        html_scope.as_ptr(),
         source_code.as_ptr(),
         source_code.as_bytes().len() as u32,
         buffer,
@@ -421,8 +576,8 @@ fn test_highlighting_via_c_api() {
         lines,
         vec![
             "&lt;<span class=tag>script</span>&gt;\n",
-            "<span class=keyword>const</span> <span>a</span> <span>=</span> <span class=function>b</span><span>(</span><span class=string>&#39;c&#39;</span><span>)</span><span>;</span>\n",
-            "<span>c</span><span>.</span><span class=function>d</span><span>(</span><span>)</span><span>;</span>\n",
+            "<span class=keyword>const</span> a = <span class=function>b</span>(<span class=string>&#39;c&#39;</span>);\n",
+            "c.<span class=function>d</span>();\n",
             "&lt;/<span class=tag>script</span>&gt;\n",
         ]
     );
@@ -433,7 +588,7 @@ fn test_highlighting_via_c_api() {
 
 #[test]
 fn test_decode_utf8_lossy() {
-    use tree_sitter_highlight::util::LossyUtf8;
+    use tree_sitter::LossyUtf8;
 
     let parts = LossyUtf8::new(b"hi").collect::<Vec<_>>();
     assert_eq!(parts, vec!["hi"]);
@@ -452,50 +607,60 @@ fn c_string(s: &str) -> CString {
     CString::new(s.as_bytes().to_vec()).unwrap()
 }
 
-fn test_language_for_injection_string<'a>(
-    string: &str,
-) -> Option<(Language, &'a PropertySheet<Properties>)> {
+fn test_language_for_injection_string<'a>(string: &str) -> Option<&'a HighlightConfiguration> {
     match string {
-        "javascript" => Some((get_language("javascript"), &JS_SHEET)),
-        "html" => Some((get_language("html"), &HTML_SHEET)),
-        "rust" => Some((get_language("rust"), &RUST_SHEET)),
+        "javascript" => Some(&JS_HIGHLIGHT),
+        "html" => Some(&HTML_HIGHLIGHT),
+        "rust" => Some(&RUST_HIGHLIGHT),
+        "jsdoc" => Some(&JSDOC_HIGHLIGHT),
         _ => None,
     }
 }
 
 fn to_html<'a>(
     src: &'a str,
-    language: Language,
-    property_sheet: &'a PropertySheet<Properties>,
+    language_config: &'a HighlightConfiguration,
 ) -> Result<Vec<String>, Error> {
-    highlight_html(
-        src.as_bytes(),
-        language,
-        property_sheet,
+    let src = src.as_bytes();
+    let mut renderer = HtmlRenderer::new();
+    let mut highlighter = Highlighter::new();
+    let events = highlighter.highlight(
+        language_config,
+        src,
         None,
         &test_language_for_injection_string,
-        &|highlight| SCOPE_CLASS_STRINGS[highlight as usize].as_str(),
-    )
+    )?;
+
+    renderer.set_carriage_return_highlight(
+        HIGHLIGHT_NAMES
+            .iter()
+            .position(|s| s == "carriage-return")
+            .map(Highlight),
+    );
+    renderer
+        .render(events, src, &|highlight| HTML_ATTRS[highlight.0].as_bytes())
+        .unwrap();
+    Ok(renderer.lines().map(|s| s.to_string()).collect())
 }
 
 fn to_token_vector<'a>(
     src: &'a str,
-    language: Language,
-    property_sheet: &'a PropertySheet<Properties>,
-) -> Result<Vec<Vec<(&'a str, Vec<Highlight>)>>, Error> {
+    language_config: &'a HighlightConfiguration,
+) -> Result<Vec<Vec<(&'a str, Vec<&'static str>)>>, Error> {
     let src = src.as_bytes();
+    let mut highlighter = Highlighter::new();
     let mut lines = Vec::new();
     let mut highlights = Vec::new();
     let mut line = Vec::new();
-    for event in highlight(
+    let events = highlighter.highlight(
+        language_config,
         src,
-        language,
-        property_sheet,
         None,
         &test_language_for_injection_string,
-    )? {
+    )?;
+    for event in events {
         match event? {
-            HighlightEvent::HighlightStart(s) => highlights.push(s),
+            HighlightEvent::HighlightStart(s) => highlights.push(HIGHLIGHT_NAMES[s.0].as_str()),
             HighlightEvent::HighlightEnd => {
                 highlights.pop();
             }
diff --git a/cli/src/tests/mod.rs b/cli/src/tests/mod.rs
index 143e8297..24e8160e 100644
--- a/cli/src/tests/mod.rs
+++ b/cli/src/tests/mod.rs
@@ -3,5 +3,8 @@ mod helpers;
 mod highlight_test;
 mod node_test;
 mod parser_test;
-mod properties_test;
+mod pathological_test;
+mod query_test;
+mod tags_test;
+mod test_highlight_test;
 mod tree_test;
diff --git a/cli/src/tests/node_refs.rs b/cli/src/tests/node_refs.rs
deleted file mode 100644
index 143ae7f6..00000000
--- a/cli/src/tests/node_refs.rs
+++ /dev/null
@@ -1,62 +0,0 @@
-use super::helpers::fixtures::get_test_language;
-use crate::generate::generate_parser_for_grammar;
-use tree_sitter::Parser;
-
-#[test]
-fn test_basic_node_refs() {
-    let (parser_name, parser_code) = generate_parser_for_grammar(
-        r#"
-        {
-            "name": "test_grammar_with_refs",
-            "extras": [
-                {"type": "PATTERN", "value": "\\s+"}
-            ],
-            "rules": {
-                "rule_a": {
-                    "type": "SEQ",
-                    "members": [
-                        {
-                            "type": "REF",
-                            "value": "ref_1",
-                            "content": {
-                                "type": "STRING",
-                                "value": "child-1"
-                            }
-                        },
-                        {
-                            "type": "CHOICE",
-                            "members": [
-                                {
-                                    "type": "STRING",
-                                    "value": "child-2"
-                                },
-                                {
-                                    "type": "BLANK"
-                                }
-                            ]
-                        },
-                        {
-                            "type": "REF",
-                            "value": "ref_2",
-                            "content": {
-                                "type": "STRING",
-                                "value": "child-3"
-                            }
-                        }
-                    ]
-                }
-            }
-        }
-    "#,
-    )
-    .unwrap();
-
-    let mut parser = Parser::new();
-    let language = get_test_language(&parser_name, &parser_code, None);
-    parser.set_language(language).unwrap();
-
-    let tree = parser.parse("child-1 child-2 child-3", None).unwrap();
-    let root_node = tree.root_node();
-    assert_eq!(root_node.child_by_ref("ref_1"), root_node.child(0));
-    assert_eq!(root_node.child_by_ref("ref_2"), root_node.child(2));
-}
diff --git a/cli/src/tests/node_test.rs b/cli/src/tests/node_test.rs
index 74e123f4..7e652cd5 100644
--- a/cli/src/tests/node_test.rs
+++ b/cli/src/tests/node_test.rs
@@ -167,6 +167,79 @@ fn test_node_child() {
     assert_eq!(tree.root_node().parent(), None);
 }
 
+#[test]
+fn test_node_children() {
+    let tree = parse_json_example();
+    let mut cursor = tree.walk();
+    let array_node = tree.root_node().child(0).unwrap();
+    assert_eq!(
+        array_node
+            .children(&mut cursor)
+            .map(|n| n.kind())
+            .collect::<Vec<_>>(),
+        &["[", "number", ",", "false", ",", "object", "]",]
+    );
+    assert_eq!(
+        array_node
+            .named_children(&mut cursor)
+            .map(|n| n.kind())
+            .collect::<Vec<_>>(),
+        &["number", "false", "object"]
+    );
+    let object_node = array_node
+        .named_children(&mut cursor)
+        .find(|n| n.kind() == "object")
+        .unwrap();
+    assert_eq!(
+        object_node
+            .children(&mut cursor)
+            .map(|n| n.kind())
+            .collect::<Vec<_>>(),
+        &["{", "pair", "}",]
+    );
+}
+
+#[test]
+fn test_node_children_by_field_name() {
+    let mut parser = Parser::new();
+    parser.set_language(get_language("python")).unwrap();
+    let source = "
+        if one:
+            a()
+        elif two:
+            b()
+        elif three:
+            c()
+        elif four:
+            d()
+    ";
+
+    let tree = parser.parse(source, None).unwrap();
+    let node = tree.root_node().child(0).unwrap();
+    assert_eq!(node.kind(), "if_statement");
+    let mut cursor = tree.walk();
+    let alternatives = node.children_by_field_name("alternative", &mut cursor);
+    let alternative_texts =
+        alternatives.map(|n| &source[n.child_by_field_name("condition").unwrap().byte_range()]);
+    assert_eq!(
+        alternative_texts.collect::<Vec<_>>(),
+        &["two", "three", "four",]
+    );
+}
+
+#[test]
+fn test_node_parent_of_child_by_field_name() {
+    let mut parser = Parser::new();
+    parser.set_language(get_language("javascript")).unwrap();
+    let tree = parser.parse("foo(a().b[0].c.d.e())", None).unwrap();
+    let call_node = tree.root_node().named_child(0).unwrap().named_child(0).unwrap();
+    assert_eq!(call_node.kind(), "call_expression");
+
+    // Regression test - when a field points to a hidden node (in this case, `_expression`)
+    // the hidden node should not be added to the node parent cache.
+    assert_eq!(call_node.child_by_field_name("function").unwrap().parent(), Some(call_node));
+}
+
 #[test]
 fn test_node_named_child() {
     let tree = parse_json_example();
@@ -627,6 +700,63 @@ fn test_node_is_named_but_aliased_as_anonymous() {
     assert_eq!(root_node.named_child(0).unwrap().kind(), "c");
 }
 
+#[test]
+fn test_node_numeric_symbols_respect_simple_aliases() {
+    let mut parser = Parser::new();
+    parser.set_language(get_language("python")).unwrap();
+
+    // Example 1:
+    // Python argument lists can contain "splat" arguments, which are not allowed within
+    // other expressions. This includes `parenthesized_list_splat` nodes like `(*b)`. These
+    // `parenthesized_list_splat` nodes are aliased as `parenthesized_expression`. Their numeric
+    // `symbol`, aka `kind_id` should match that of a normal `parenthesized_expression`.
+    let tree = parser.parse("(a((*b)))", None).unwrap();
+    let root = tree.root_node();
+    assert_eq!(
+        root.to_sexp(),
+        "(module (expression_statement (parenthesized_expression (call function: (identifier) arguments: (argument_list (parenthesized_expression (list_splat (identifier))))))))",
+    );
+
+    let outer_expr_node = root.child(0).unwrap().child(0).unwrap();
+    assert_eq!(outer_expr_node.kind(), "parenthesized_expression");
+
+    let inner_expr_node = outer_expr_node
+        .named_child(0)
+        .unwrap()
+        .child_by_field_name("arguments")
+        .unwrap()
+        .named_child(0)
+        .unwrap();
+    assert_eq!(inner_expr_node.kind(), "parenthesized_expression");
+    assert_eq!(inner_expr_node.kind_id(), outer_expr_node.kind_id());
+
+    // Example 2:
+    // Ruby handles the unary (negative) and binary (minus) `-` operators using two different
+    // tokens. One or more of these is an external token that's aliased as `-`. Their numeric
+    // kind ids should match.
+    parser.set_language(get_language("ruby")).unwrap();
+    let tree = parser.parse("-a - b", None).unwrap();
+    let root = tree.root_node();
+    assert_eq!(
+        root.to_sexp(),
+        "(program (binary left: (unary operand: (identifier)) right: (identifier)))",
+    );
+
+    let binary_node = root.child(0).unwrap();
+    assert_eq!(binary_node.kind(), "binary");
+
+    let unary_minus_node = binary_node
+        .child_by_field_name("left")
+        .unwrap()
+        .child(0)
+        .unwrap();
+    assert_eq!(unary_minus_node.kind(), "-");
+
+    let binary_minus_node = binary_node.child_by_field_name("operator").unwrap();
+    assert_eq!(binary_minus_node.kind(), "-");
+    assert_eq!(unary_minus_node.kind_id(), binary_minus_node.kind_id());
+}
+
 fn get_all_nodes(tree: &Tree) -> Vec<Node> {
     let mut result = Vec::new();
     let mut visited_children = false;
diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs
index 882f5963..b2b2560e 100644
--- a/cli/src/tests/parser_test.rs
+++ b/cli/src/tests/parser_test.rs
@@ -1,13 +1,14 @@
+use super::helpers::allocations;
 use super::helpers::edits::ReadRecorder;
 use super::helpers::fixtures::{get_language, get_test_language};
 use crate::generate::generate_parser_for_grammar;
 use crate::parse::{perform_edit, Edit};
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::{thread, time};
-use tree_sitter::{InputEdit, LogType, Parser, Point, Range};
+use tree_sitter::{IncludedRangesError, InputEdit, LogType, Parser, Point, Range};
 
 #[test]
-fn test_basic_parsing() {
+fn test_parsing_simple_string() {
     let mut parser = Parser::new();
     parser.set_language(get_language("rust")).unwrap();
 
@@ -26,7 +27,11 @@ fn test_basic_parsing() {
 
     assert_eq!(
         root_node.to_sexp(),
-        "(source_file (struct_item (type_identifier) (field_declaration_list)) (function_item (identifier) (parameters) (block)))"
+        concat!(
+            "(source_file ",
+            "(struct_item name: (type_identifier) body: (field_declaration_list)) ",
+            "(function_item name: (identifier) parameters: (parameters) body: (block)))"
+        )
     );
 
     let struct_node = root_node.child(0).unwrap();
@@ -118,7 +123,17 @@ fn test_parsing_with_custom_utf8_input() {
         .unwrap();
 
     let root = tree.root_node();
-    assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))");
+    assert_eq!(
+        root.to_sexp(),
+        concat!(
+            "(source_file ",
+            "(function_item ",
+            "(visibility_modifier) ",
+            "name: (identifier) ",
+            "parameters: (parameters) ",
+            "body: (block (integer_literal))))"
+        )
+    );
     assert_eq!(root.kind(), "source_file");
     assert_eq!(root.has_error(), false);
     assert_eq!(root.child(0).unwrap().kind(), "function_item");
@@ -154,7 +169,10 @@ fn test_parsing_with_custom_utf16_input() {
         .unwrap();
 
     let root = tree.root_node();
-    assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))");
+    assert_eq!(
+        root.to_sexp(),
+        "(source_file (function_item (visibility_modifier) name: (identifier) parameters: (parameters) body: (block (integer_literal))))"
+    );
     assert_eq!(root.kind(), "source_file");
     assert_eq!(root.has_error(), false);
     assert_eq!(root.child(0).unwrap().kind(), "function_item");
@@ -175,7 +193,10 @@ fn test_parsing_with_callback_returning_owned_strings() {
         .unwrap();
 
     let root = tree.root_node();
-    assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))");
+    assert_eq!(
+        root.to_sexp(),
+        "(source_file (function_item (visibility_modifier) name: (identifier) parameters: (parameters) body: (block (integer_literal))))"
+    );
 }
 
 #[test]
@@ -192,7 +213,7 @@ fn test_parsing_text_with_byte_order_mark() {
         .unwrap();
     assert_eq!(
         tree.root_node().to_sexp(),
-        "(source_file (function_item (identifier) (parameters) (block)))"
+        "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))"
     );
     assert_eq!(tree.root_node().start_byte(), 2);
 
@@ -200,7 +221,7 @@ fn test_parsing_text_with_byte_order_mark() {
     let mut tree = parser.parse("\u{FEFF}fn a() {}", None).unwrap();
     assert_eq!(
         tree.root_node().to_sexp(),
-        "(source_file (function_item (identifier) (parameters) (block)))"
+        "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))"
     );
     assert_eq!(tree.root_node().start_byte(), 3);
 
@@ -216,7 +237,7 @@ fn test_parsing_text_with_byte_order_mark() {
     let mut tree = parser.parse(" \u{FEFF}fn a() {}", Some(&tree)).unwrap();
     assert_eq!(
         tree.root_node().to_sexp(),
-        "(source_file (ERROR (UNEXPECTED 65279)) (function_item (identifier) (parameters) (block)))"
+        "(source_file (ERROR (UNEXPECTED 65279)) (function_item name: (identifier) parameters: (parameters) body: (block)))"
     );
     assert_eq!(tree.root_node().start_byte(), 1);
 
@@ -232,11 +253,52 @@ fn test_parsing_text_with_byte_order_mark() {
     let tree = parser.parse("\u{FEFF}fn a() {}", Some(&tree)).unwrap();
     assert_eq!(
         tree.root_node().to_sexp(),
-        "(source_file (function_item (identifier) (parameters) (block)))"
+        "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))"
     );
     assert_eq!(tree.root_node().start_byte(), 3);
 }
 
+#[test]
+fn test_parsing_invalid_chars_at_eof() {
+    let mut parser = Parser::new();
+    parser.set_language(get_language("json")).unwrap();
+    let tree = parser.parse(b"\xdf", None).unwrap();
+    assert_eq!(tree.root_node().to_sexp(), "(ERROR (UNEXPECTED INVALID))");
+}
+
+#[test]
+fn test_parsing_unexpected_null_characters_within_source() {
+    let mut parser = Parser::new();
+    parser.set_language(get_language("javascript")).unwrap();
+    let tree = parser.parse(b"var \0 something;", None).unwrap();
+    assert_eq!(
+        tree.root_node().to_sexp(),
+        "(program (variable_declaration (ERROR (UNEXPECTED '\\0')) (variable_declarator name: (identifier))))"
+    );
+}
+
+#[test]
+fn test_parsing_ends_when_input_callback_returns_empty() {
+    let mut parser = Parser::new();
+    parser.set_language(get_language("javascript")).unwrap();
+    let mut i = 0;
+    let source = b"abcdefghijklmnoqrs";
+    let tree = parser
+        .parse_with(
+            &mut |offset, _| {
+                i += 1;
+                if offset >= 6 {
+                    b""
+                } else {
+                    &source[offset..usize::min(source.len(), offset + 3)]
+                }
+            },
+            None,
+        )
+        .unwrap();
+    assert_eq!(tree.root_node().end_byte(), 6);
+}
+
 // Incremental parsing
 
 #[test]
@@ -333,6 +395,18 @@ fn test_parsing_after_editing_end_of_code() {
     assert_eq!(recorder.strings_read(), vec![" * ", "abc.d)",]);
 }
 
+#[test]
+fn test_parsing_empty_file_with_reused_tree() {
+    let mut parser = Parser::new();
+    parser.set_language(get_language("rust")).unwrap();
+
+    let tree = parser.parse("", None);
+    parser.parse("", tree.as_ref());
+
+    let tree = parser.parse("\n  ", None);
+    parser.parse("\n  ", tree.as_ref());
+}
+
 // Thread safety
 
 #[test]
@@ -388,7 +462,7 @@ fn test_parsing_on_multiple_threads() {
 
 #[test]
 fn test_parsing_cancelled_by_another_thread() {
-    let cancellation_flag = Box::new(AtomicUsize::new(0));
+    let cancellation_flag = std::sync::Arc::new(AtomicUsize::new(0));
 
     let mut parser = Parser::new();
     parser.set_language(get_language("javascript")).unwrap();
@@ -409,9 +483,10 @@ fn test_parsing_cancelled_by_another_thread() {
     );
     assert!(tree.is_some());
 
+    let flag = cancellation_flag.clone();
     let cancel_thread = thread::spawn(move || {
         thread::sleep(time::Duration::from_millis(100));
-        cancellation_flag.store(1, Ordering::SeqCst);
+        flag.store(1, Ordering::SeqCst);
     });
 
     // Infinite input
@@ -547,6 +622,56 @@ fn test_parsing_with_a_timeout_and_a_reset() {
     );
 }
 
+#[test]
+fn test_parsing_with_a_timeout_and_implicit_reset() {
+    allocations::record(|| {
+        let mut parser = Parser::new();
+        parser.set_language(get_language("javascript")).unwrap();
+
+        parser.set_timeout_micros(5);
+        let tree = parser.parse(
+            "[\"ok\", 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]",
+            None,
+        );
+        assert!(tree.is_none());
+
+        // Changing the parser's language implicitly resets, discarding
+        // the previous partial parse.
+        parser.set_language(get_language("json")).unwrap();
+        parser.set_timeout_micros(0);
+        let tree = parser.parse(
+            "[null, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]",
+            None,
+        ).unwrap();
+        assert_eq!(
+            tree.root_node()
+                .named_child(0)
+                .unwrap()
+                .named_child(0)
+                .unwrap()
+                .kind(),
+            "null"
+        );
+    });
+}
+
+#[test]
+fn test_parsing_with_timeout_and_no_completion() {
+    allocations::record(|| {
+        let mut parser = Parser::new();
+        parser.set_language(get_language("javascript")).unwrap();
+
+        parser.set_timeout_micros(5);
+        let tree = parser.parse(
+            "[\"ok\", 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]",
+            None,
+        );
+        assert!(tree.is_none());
+
+        // drop the parser when it has an unfinished parse
+    });
+}
+
 // Included Ranges
 
 #[test]
@@ -559,7 +684,9 @@ fn test_parsing_with_one_included_range() {
     let script_content_node = html_tree.root_node().child(1).unwrap().child(1).unwrap();
     assert_eq!(script_content_node.kind(), "raw_text");
 
-    parser.set_included_ranges(&[script_content_node.range()]);
+    parser
+        .set_included_ranges(&[script_content_node.range()])
+        .unwrap();
     parser.set_language(get_language("javascript")).unwrap();
     let js_tree = parser.parse(source_code, None).unwrap();
 
@@ -599,26 +726,28 @@ fn test_parsing_with_multiple_included_ranges() {
     let close_quote_node = template_string_node.child(3).unwrap();
 
     parser.set_language(get_language("html")).unwrap();
-    parser.set_included_ranges(&[
-        Range {
-            start_byte: open_quote_node.end_byte(),
-            start_point: open_quote_node.end_position(),
-            end_byte: interpolation_node1.start_byte(),
-            end_point: interpolation_node1.start_position(),
-        },
-        Range {
-            start_byte: interpolation_node1.end_byte(),
-            start_point: interpolation_node1.end_position(),
-            end_byte: interpolation_node2.start_byte(),
-            end_point: interpolation_node2.start_position(),
-        },
-        Range {
-            start_byte: interpolation_node2.end_byte(),
-            start_point: interpolation_node2.end_position(),
-            end_byte: close_quote_node.start_byte(),
-            end_point: close_quote_node.start_position(),
-        },
-    ]);
+    parser
+        .set_included_ranges(&[
+            Range {
+                start_byte: open_quote_node.end_byte(),
+                start_point: open_quote_node.end_position(),
+                end_byte: interpolation_node1.start_byte(),
+                end_point: interpolation_node1.start_position(),
+            },
+            Range {
+                start_byte: interpolation_node1.end_byte(),
+                start_point: interpolation_node1.end_position(),
+                end_byte: interpolation_node2.start_byte(),
+                end_point: interpolation_node2.start_position(),
+            },
+            Range {
+                start_byte: interpolation_node2.end_byte(),
+                start_point: interpolation_node2.end_position(),
+                end_byte: close_quote_node.start_byte(),
+                end_point: close_quote_node.start_position(),
+            },
+        ])
+        .unwrap();
     let html_tree = parser.parse(source_code, None).unwrap();
 
     assert_eq!(
@@ -667,6 +796,47 @@ fn test_parsing_with_multiple_included_ranges() {
     );
 }
 
+#[test]
+fn test_parsing_error_in_invalid_included_ranges() {
+    let mut parser = Parser::new();
+
+    // Ranges are not ordered
+    let error = parser
+        .set_included_ranges(&[
+            Range {
+                start_byte: 23,
+                end_byte: 29,
+                start_point: Point::new(0, 23),
+                end_point: Point::new(0, 29),
+            },
+            Range {
+                start_byte: 0,
+                end_byte: 5,
+                start_point: Point::new(0, 0),
+                end_point: Point::new(0, 5),
+            },
+            Range {
+                start_byte: 50,
+                end_byte: 60,
+                start_point: Point::new(0, 50),
+                end_point: Point::new(0, 60),
+            },
+        ])
+        .unwrap_err();
+    assert_eq!(error, IncludedRangesError(1));
+
+    // Range ends before it starts
+    let error = parser
+        .set_included_ranges(&[Range {
+            start_byte: 10,
+            end_byte: 5,
+            start_point: Point::new(0, 10),
+            end_point: Point::new(0, 5),
+        }])
+        .unwrap_err();
+    assert_eq!(error, IncludedRangesError(0));
+}
+
 #[test]
 fn test_parsing_utf16_code_with_errors_at_the_end_of_an_included_range() {
     let source_code = "<script>a.</script>";
@@ -677,12 +847,14 @@ fn test_parsing_utf16_code_with_errors_at_the_end_of_an_included_range() {
 
     let mut parser = Parser::new();
     parser.set_language(get_language("javascript")).unwrap();
-    parser.set_included_ranges(&[Range {
-        start_byte,
-        end_byte,
-        start_point: Point::new(0, start_byte),
-        end_point: Point::new(0, end_byte),
-    }]);
+    parser
+        .set_included_ranges(&[Range {
+            start_byte,
+            end_byte,
+            start_point: Point::new(0, start_byte),
+            end_point: Point::new(0, end_byte),
+        }])
+        .unwrap();
     let tree = parser.parse_utf16(&utf16_source_code, None).unwrap();
     assert_eq!(tree.root_node().to_sexp(), "(program (ERROR (identifier)))");
 }
@@ -697,20 +869,22 @@ fn test_parsing_with_external_scanner_that_uses_included_range_boundaries() {
 
     let mut parser = Parser::new();
     parser.set_language(get_language("javascript")).unwrap();
-    parser.set_included_ranges(&[
-        Range {
-            start_byte: range1_start_byte,
-            end_byte: range1_end_byte,
-            start_point: Point::new(0, range1_start_byte),
-            end_point: Point::new(0, range1_end_byte),
-        },
-        Range {
-            start_byte: range2_start_byte,
-            end_byte: range2_end_byte,
-            start_point: Point::new(0, range2_start_byte),
-            end_point: Point::new(0, range2_end_byte),
-        },
-    ]);
+    parser
+        .set_included_ranges(&[
+            Range {
+                start_byte: range1_start_byte,
+                end_byte: range1_end_byte,
+                start_point: Point::new(0, range1_start_byte),
+                end_point: Point::new(0, range1_end_byte),
+            },
+            Range {
+                start_byte: range2_start_byte,
+                end_byte: range2_end_byte,
+                start_point: Point::new(0, range2_start_byte),
+                end_point: Point::new(0, range2_end_byte),
+            },
+        ])
+        .unwrap();
 
     let tree = parser.parse(source_code, None).unwrap();
     let root = tree.root_node();
@@ -758,20 +932,22 @@ fn test_parsing_with_a_newly_excluded_range() {
     let directive_start = source_code.find("<%=").unwrap();
     let directive_end = source_code.find("</span>").unwrap();
     let source_code_end = source_code.len();
-    parser.set_included_ranges(&[
-        Range {
-            start_byte: 0,
-            end_byte: directive_start,
-            start_point: Point::new(0, 0),
-            end_point: Point::new(0, directive_start),
-        },
-        Range {
-            start_byte: directive_end,
-            end_byte: source_code_end,
-            start_point: Point::new(0, directive_end),
-            end_point: Point::new(0, source_code_end),
-        },
-    ]);
+    parser
+        .set_included_ranges(&[
+            Range {
+                start_byte: 0,
+                end_byte: directive_start,
+                start_point: Point::new(0, 0),
+                end_point: Point::new(0, directive_start),
+            },
+            Range {
+                start_byte: directive_end,
+                end_byte: source_code_end,
+                start_point: Point::new(0, directive_end),
+                end_point: Point::new(0, source_code_end),
+            },
+        ])
+        .unwrap();
     let tree = parser.parse(&source_code, Some(&first_tree)).unwrap();
 
     assert_eq!(
@@ -809,59 +985,73 @@ fn test_parsing_with_a_newly_excluded_range() {
 
 #[test]
 fn test_parsing_with_a_newly_included_range() {
-    let source_code = "<div><%= foo() %></div><div><%= bar() %>";
-    let first_code_start_index = source_code.find(" foo").unwrap();
-    let first_code_end_index = first_code_start_index + 7;
-    let second_code_start_index = source_code.find(" bar").unwrap();
-    let second_code_end_index = second_code_start_index + 7;
-    let ranges = [
-        Range {
-            start_byte: first_code_start_index,
-            end_byte: first_code_end_index,
-            start_point: Point::new(0, first_code_start_index),
-            end_point: Point::new(0, first_code_end_index),
-        },
-        Range {
-            start_byte: second_code_start_index,
-            end_byte: second_code_end_index,
-            start_point: Point::new(0, second_code_start_index),
-            end_point: Point::new(0, second_code_end_index),
-        },
-    ];
+    let source_code = "<div><%= foo() %></div><span><%= bar() %></span><%= baz() %>";
+    let range1_start = source_code.find(" foo").unwrap();
+    let range2_start = source_code.find(" bar").unwrap();
+    let range3_start = source_code.find(" baz").unwrap();
+    let range1_end = range1_start + 7;
+    let range2_end = range2_start + 7;
+    let range3_end = range3_start + 7;
 
     // Parse only the first code directive as JavaScript
     let mut parser = Parser::new();
     parser.set_language(get_language("javascript")).unwrap();
-    parser.set_included_ranges(&ranges[0..1]);
-    let first_tree = parser.parse(source_code, None).unwrap();
+    parser
+        .set_included_ranges(&[simple_range(range1_start, range1_end)])
+        .unwrap();
+    let tree = parser.parse(source_code, None).unwrap();
     assert_eq!(
-        first_tree.root_node().to_sexp(),
+        tree.root_node().to_sexp(),
         concat!(
             "(program",
             " (expression_statement (call_expression function: (identifier) arguments: (arguments))))",
         )
     );
 
-    // Parse both the code directives as JavaScript, using the old tree as a reference.
-    parser.set_included_ranges(&ranges);
-    let tree = parser.parse(&source_code, Some(&first_tree)).unwrap();
+    // Parse both the first and third code directives as JavaScript, using the old tree as a
+    // reference.
+    parser
+        .set_included_ranges(&[
+            simple_range(range1_start, range1_end),
+            simple_range(range3_start, range3_end),
+        ])
+        .unwrap();
+    let tree2 = parser.parse(&source_code, Some(&tree)).unwrap();
     assert_eq!(
-        tree.root_node().to_sexp(),
+        tree2.root_node().to_sexp(),
         concat!(
             "(program",
             " (expression_statement (call_expression function: (identifier) arguments: (arguments)))",
             " (expression_statement (call_expression function: (identifier) arguments: (arguments))))",
         )
     );
-
     assert_eq!(
-        tree.changed_ranges(&first_tree).collect::<Vec<_>>(),
-        vec![Range {
-            start_byte: first_code_end_index + 1,
-            end_byte: second_code_end_index + 1,
-            start_point: Point::new(0, first_code_end_index + 1),
-            end_point: Point::new(0, second_code_end_index + 1),
-        }]
+        tree2.changed_ranges(&tree).collect::<Vec<_>>(),
+        &[simple_range(range1_end, range3_end)]
+    );
+
+    // Parse all three code directives as JavaScript, using the old tree as a
+    // reference.
+    parser
+        .set_included_ranges(&[
+            simple_range(range1_start, range1_end),
+            simple_range(range2_start, range2_end),
+            simple_range(range3_start, range3_end),
+        ])
+        .unwrap();
+    let tree3 = parser.parse(&source_code, Some(&tree)).unwrap();
+    assert_eq!(
+        tree3.root_node().to_sexp(),
+        concat!(
+            "(program",
+            " (expression_statement (call_expression function: (identifier) arguments: (arguments)))",
+            " (expression_statement (call_expression function: (identifier) arguments: (arguments)))",
+            " (expression_statement (call_expression function: (identifier) arguments: (arguments))))",
+        )
+    );
+    assert_eq!(
+        tree3.changed_ranges(&tree2).collect::<Vec<_>>(),
+        &[simple_range(range2_start + 1, range2_end - 1)]
     );
 }
 
@@ -899,20 +1089,22 @@ fn test_parsing_with_included_ranges_and_missing_tokens() {
     // There's a missing `a` token at the beginning of the code. It must be inserted
     // at the beginning of the first included range, not at {0, 0}.
     let source_code = "__bc__bc__";
-    parser.set_included_ranges(&[
-        Range {
-            start_byte: 2,
-            end_byte: 4,
-            start_point: Point::new(0, 2),
-            end_point: Point::new(0, 4),
-        },
-        Range {
-            start_byte: 6,
-            end_byte: 8,
-            start_point: Point::new(0, 6),
-            end_point: Point::new(0, 8),
-        },
-    ]);
+    parser
+        .set_included_ranges(&[
+            Range {
+                start_byte: 2,
+                end_byte: 4,
+                start_point: Point::new(0, 2),
+                end_point: Point::new(0, 4),
+            },
+            Range {
+                start_byte: 6,
+                end_byte: 8,
+                start_point: Point::new(0, 6),
+                end_point: Point::new(0, 8),
+            },
+        ])
+        .unwrap();
 
     let tree = parser.parse(source_code, None).unwrap();
     let root = tree.root_node();
@@ -923,3 +1115,12 @@ fn test_parsing_with_included_ranges_and_missing_tokens() {
     assert_eq!(root.start_byte(), 2);
     assert_eq!(root.child(3).unwrap().start_byte(), 4);
 }
+
+fn simple_range(start: usize, end: usize) -> Range {
+    Range {
+        start_byte: start,
+        end_byte: end,
+        start_point: Point::new(0, start),
+        end_point: Point::new(0, end),
+    }
+}
diff --git a/cli/src/tests/pathological_test.rs b/cli/src/tests/pathological_test.rs
new file mode 100644
index 00000000..7ebd5439
--- /dev/null
+++ b/cli/src/tests/pathological_test.rs
@@ -0,0 +1,15 @@
+use super::helpers::allocations;
+use super::helpers::fixtures::get_language;
+use tree_sitter::Parser;
+
+#[test]
+fn test_pathological_example_1() {
+    let language = "cpp";
+    let source = r#"*ss<s"ss<sqXqss<s._<s<sq<(qqX<sqss<s.ss<sqsssq<(qss<qssqXqss<s._<s<sq<(qqX<sqss<s.ss<sqsssq<(qss<sqss<sqss<s._<s<sq>(qqX<sqss<s.ss<sqsssq<(qss<sq&=ss<s<sqss<s._<s<sq<(qqX<sqss<s.ss<sqs"#;
+
+    allocations::record(|| {
+        let mut parser = Parser::new();
+        parser.set_language(get_language(language)).unwrap();
+        parser.parse(source, None).unwrap();
+    });
+}
diff --git a/cli/src/tests/properties_test.rs b/cli/src/tests/properties_test.rs
deleted file mode 100644
index 51f0e820..00000000
--- a/cli/src/tests/properties_test.rs
+++ /dev/null
@@ -1,265 +0,0 @@
-use super::helpers::fixtures::get_language;
-use crate::generate::properties;
-use serde_derive::Deserialize;
-use serde_json;
-
-use std::collections::HashSet;
-use tree_sitter::{Parser, PropertySheet};
-#[derive(Debug, Default, Deserialize, PartialEq, Eq)]
-struct Properties {
-    a: Option<String>,
-    b: Option<String>,
-}
-
-#[test]
-fn test_walk_with_properties_with_nth_child() {
-    let language = get_language("javascript");
-    let property_sheet = PropertySheet::<Properties>::new(
-        language,
-        &generate_property_sheet_string(
-            "/some/path.css",
-            "
-                binary_expression > identifier:nth-child(2) {
-                    a: x;
-                }
-
-                binary_expression > identifier {
-                    a: y;
-                }
-
-                identifier {
-                    a: z;
-                }
-            ",
-        ),
-    )
-    .unwrap();
-
-    let source_code = "a = b || c;";
-
-    let mut parser = Parser::new();
-    parser.set_language(language).unwrap();
-    let tree = parser.parse(source_code, None).unwrap();
-
-    let mut cursor = tree.walk_with_properties(&property_sheet, source_code.as_bytes());
-    assert_eq!(cursor.node().kind(), "program");
-    assert!(cursor.goto_first_child());
-    assert_eq!(cursor.node().kind(), "expression_statement");
-    assert!(cursor.goto_first_child());
-    assert_eq!(cursor.node().kind(), "assignment_expression");
-
-    assert!(cursor.goto_first_child());
-    assert_eq!(cursor.node().kind(), "identifier");
-    assert_eq!(
-        *cursor.node_properties(),
-        Properties {
-            a: Some("z".to_string()),
-            b: None
-        }
-    );
-
-    assert!(cursor.goto_next_sibling());
-    assert_eq!(cursor.node().kind(), "=");
-    assert!(cursor.goto_next_sibling());
-    assert_eq!(cursor.node().kind(), "binary_expression");
-
-    assert!(cursor.goto_first_child());
-    assert_eq!(cursor.node().kind(), "identifier");
-    assert_eq!(
-        *cursor.node_properties(),
-        Properties {
-            a: Some("y".to_string()),
-            b: None
-        }
-    );
-
-    assert!(cursor.goto_next_sibling());
-    assert_eq!(cursor.node().kind(), "||");
-    assert!(cursor.goto_next_sibling());
-    assert_eq!(cursor.node().kind(), "identifier");
-    assert_eq!(
-        *cursor.node_properties(),
-        Properties {
-            a: Some("x".to_string()),
-            b: None
-        }
-    );
-}
-
-#[test]
-fn test_walk_with_properties_with_regexes() {
-    let language = get_language("javascript");
-    let property_sheet = PropertySheet::<Properties>::new(
-        language,
-        &generate_property_sheet_string(
-            "/some/path.css",
-            "
-                identifier {
-                    &[text='^[A-Z]'] {
-                        a: y;
-                    }
-
-                    &[text='^[A-Z_]+$'] {
-                        a: z;
-                    }
-
-                    a: x;
-                }
-            ",
-        ),
-    )
-    .unwrap();
-
-    let source_code = "const ABC = Def(ghi);";
-
-    let mut parser = Parser::new();
-    parser.set_language(language).unwrap();
-    let tree = parser.parse(source_code, None).unwrap();
-
-    let mut cursor = tree.walk_with_properties(&property_sheet, source_code.as_bytes());
-    assert_eq!(cursor.node().kind(), "program");
-    assert!(cursor.goto_first_child());
-    assert_eq!(cursor.node().kind(), "lexical_declaration");
-    assert!(cursor.goto_first_child());
-    assert_eq!(cursor.node().kind(), "const");
-    assert!(cursor.goto_next_sibling());
-    assert_eq!(cursor.node().kind(), "variable_declarator");
-
-    // The later selector with a text regex overrides the earlier one.
-    assert!(cursor.goto_first_child());
-    assert_eq!(cursor.node().kind(), "identifier");
-    assert_eq!(
-        *cursor.node_properties(),
-        Properties {
-            a: Some("z".to_string()),
-            b: None
-        }
-    );
-
-    assert!(cursor.goto_next_sibling());
-    assert_eq!(cursor.node().kind(), "=");
-    assert!(cursor.goto_next_sibling());
-    assert_eq!(cursor.node().kind(), "call_expression");
-
-    // The selectors with text regexes override the selector without one.
-    assert!(cursor.goto_first_child());
-    assert_eq!(cursor.node().kind(), "identifier");
-    assert_eq!(
-        *cursor.node_properties(),
-        Properties {
-            a: Some("y".to_string()),
-            b: None
-        }
-    );
-
-    assert!(cursor.goto_next_sibling());
-    assert_eq!(cursor.node().kind(), "arguments");
-    assert!(cursor.goto_first_child());
-    assert_eq!(cursor.node().kind(), "(");
-
-    // This node doesn't match either of the regexes.
-    assert!(cursor.goto_next_sibling());
-    assert_eq!(cursor.node().kind(), "identifier");
-    assert_eq!(
-        *cursor.node_properties(),
-        Properties {
-            a: Some("x".to_string()),
-            b: None
-        }
-    );
-}
-
-#[test]
-fn test_walk_with_properties_based_on_fields() {
-    let language = get_language("javascript");
-    let property_sheet = PropertySheet::<Properties>::new(
-        language,
-        &generate_property_sheet_string(
-            "/some/path.css",
-            "
-                arrow_function > .parameter {
-                    a: x;
-                }
-
-                function_declaration {
-                    & > .parameters > identifier {
-                        a: y;
-                    }
-
-                    & > .name {
-                        b: z;
-                    }
-                }
-
-                identifier {
-                    a: w;
-                }
-            ",
-        ),
-    )
-    .unwrap();
-
-    let source_code = "function a(b) { return c => c + b; }";
-
-    let mut parser = Parser::new();
-    parser.set_language(language).unwrap();
-    let tree = parser.parse(source_code, None).unwrap();
-    let mut cursor = tree.walk_with_properties(&property_sheet, source_code.as_bytes());
-
-    assert!(cursor.goto_first_child());
-    assert_eq!(cursor.node().kind(), "function_declaration");
-    assert!(cursor.goto_first_child());
-    assert_eq!(cursor.node().kind(), "function");
-    assert_eq!(*cursor.node_properties(), Properties::default());
-
-    assert!(cursor.goto_next_sibling());
-    assert_eq!(cursor.node().kind(), "identifier");
-    assert_eq!(
-        *cursor.node_properties(),
-        Properties {
-            a: Some("w".to_string()),
-            b: Some("z".to_string())
-        }
-    );
-
-    assert!(cursor.goto_next_sibling());
-    assert_eq!(cursor.node().kind(), "formal_parameters");
-    assert_eq!(*cursor.node_properties(), Properties::default());
-
-    assert!(cursor.goto_first_child());
-    assert_eq!(cursor.node().kind(), "(");
-    assert_eq!(*cursor.node_properties(), Properties::default());
-    assert!(cursor.goto_next_sibling());
-    assert_eq!(cursor.node().kind(), "identifier");
-    assert_eq!(
-        *cursor.node_properties(),
-        Properties {
-            a: Some("y".to_string()),
-            b: None,
-        }
-    );
-
-    assert!(cursor.goto_parent());
-    assert!(cursor.goto_next_sibling());
-    assert_eq!(cursor.node().kind(), "statement_block");
-    assert!(cursor.goto_first_child());
-    assert!(cursor.goto_next_sibling());
-    assert_eq!(cursor.node().kind(), "return_statement");
-    assert!(cursor.goto_first_child());
-    assert!(cursor.goto_next_sibling());
-    assert_eq!(cursor.node().kind(), "arrow_function");
-    assert!(cursor.goto_first_child());
-    assert_eq!(cursor.node().kind(), "identifier");
-    assert_eq!(
-        *cursor.node_properties(),
-        Properties {
-            a: Some("x".to_string()),
-            b: None,
-        }
-    );
-}
-
-fn generate_property_sheet_string(path: &str, css: &str) -> String {
-    serde_json::to_string(&properties::generate_property_sheet(path, css, &HashSet::new()).unwrap())
-        .unwrap()
-}
diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs
new file mode 100644
index 00000000..02f222bb
--- /dev/null
+++ b/cli/src/tests/query_test.rs
@@ -0,0 +1,3027 @@
+use super::helpers::allocations;
+use super::helpers::fixtures::get_language;
+use lazy_static::lazy_static;
+use std::env;
+use std::fmt::Write;
+use tree_sitter::{
+    Language, Node, Parser, Query, QueryCapture, QueryCursor, QueryError, QueryErrorKind,
+    QueryMatch, QueryPredicate, QueryPredicateArg, QueryProperty,
+};
+
+lazy_static! {
+    static ref EXAMPLE_FILTER: Option<String> = env::var("TREE_SITTER_TEST_EXAMPLE_FILTER").ok();
+}
+
+#[test]
+fn test_query_errors_on_invalid_syntax() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+
+        assert!(Query::new(language, "(if_statement)").is_ok());
+        assert!(Query::new(
+            language,
+            "(if_statement condition:(parenthesized_expression (identifier)))"
+        )
+        .is_ok());
+
+        // Mismatched parens
+        assert_eq!(
+            Query::new(language, "(if_statement").unwrap_err().message,
+            [
+                "(if_statement", //
+                "             ^",
+            ]
+            .join("\n")
+        );
+        assert_eq!(
+            Query::new(language, "; comment 1\n; comment 2\n  (if_statement))")
+                .unwrap_err()
+                .message,
+            [
+                "  (if_statement))", //
+                "                ^",
+            ]
+            .join("\n")
+        );
+
+        // Return an error at the *beginning* of a bare identifier not followed a colon.
+        // If there's a colon but no pattern, return an error at the end of the colon.
+        assert_eq!(
+            Query::new(language, "(if_statement identifier)")
+                .unwrap_err()
+                .message,
+            [
+                "(if_statement identifier)", //
+                "              ^",
+            ]
+            .join("\n")
+        );
+        assert_eq!(
+            Query::new(language, "(if_statement condition:)")
+                .unwrap_err()
+                .message,
+            [
+                "(if_statement condition:)", //
+                "                        ^",
+            ]
+            .join("\n")
+        );
+
+        // Return an error at the beginning of an unterminated string.
+        assert_eq!(
+            Query::new(language, r#"(identifier) "h "#)
+                .unwrap_err()
+                .message,
+            [
+                r#"(identifier) "h "#, //
+                r#"             ^"#,
+            ]
+            .join("\n")
+        );
+
+        assert_eq!(
+            Query::new(language, r#"((identifier) ()"#)
+                .unwrap_err()
+                .message,
+            [
+                "((identifier) ()", //
+                "               ^",
+            ]
+            .join("\n")
+        );
+        assert_eq!(
+            Query::new(language, r#"((identifier) [])"#)
+                .unwrap_err()
+                .message,
+            [
+                "((identifier) [])", //
+                "               ^",
+            ]
+            .join("\n")
+        );
+        assert_eq!(
+            Query::new(language, r#"((identifier) (#a)"#)
+                .unwrap_err()
+                .message,
+            [
+                "((identifier) (#a)", //
+                "                  ^",
+            ]
+            .join("\n")
+        );
+        assert_eq!(
+            Query::new(language, r#"((identifier) @x (#eq? @x a"#)
+                .unwrap_err()
+                .message,
+            [
+                r#"((identifier) @x (#eq? @x a"#,
+                r#"                           ^"#,
+            ]
+            .join("\n")
+        );
+    });
+}
+
+#[test]
+fn test_query_errors_on_invalid_symbols() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+
+        assert_eq!(
+            Query::new(language, "(clas)").unwrap_err(),
+            QueryError {
+                row: 0,
+                offset: 1,
+                column: 1,
+                kind: QueryErrorKind::NodeType,
+                message: "clas".to_string()
+            }
+        );
+        assert_eq!(
+            Query::new(language, "(if_statement (arrayyyyy))").unwrap_err(),
+            QueryError {
+                row: 0,
+                offset: 15,
+                column: 15,
+                kind: QueryErrorKind::NodeType,
+                message: "arrayyyyy".to_string()
+            },
+        );
+        assert_eq!(
+            Query::new(language, "(if_statement condition: (non_existent3))").unwrap_err(),
+            QueryError {
+                row: 0,
+                offset: 26,
+                column: 26,
+                kind: QueryErrorKind::NodeType,
+                message: "non_existent3".to_string()
+            },
+        );
+        assert_eq!(
+            Query::new(language, "(if_statement condit: (identifier))").unwrap_err(),
+            QueryError {
+                row: 0,
+                offset: 14,
+                column: 14,
+                kind: QueryErrorKind::Field,
+                message: "condit".to_string()
+            },
+        );
+        assert_eq!(
+            Query::new(language, "(if_statement conditioning: (identifier))").unwrap_err(),
+            QueryError {
+                row: 0,
+                offset: 14,
+                column: 14,
+                kind: QueryErrorKind::Field,
+                message: "conditioning".to_string()
+            }
+        );
+    });
+}
+
+#[test]
+fn test_query_errors_on_invalid_predicates() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+
+        assert_eq!(
+            Query::new(language, "((identifier) @id (@id))").unwrap_err(),
+            QueryError {
+                kind: QueryErrorKind::Syntax,
+                row: 0,
+                column: 19,
+                offset: 19,
+                message: [
+                    "((identifier) @id (@id))", //
+                    "                   ^"
+                ]
+                .join("\n")
+            }
+        );
+        assert_eq!(
+            Query::new(language, "((identifier) @id (#eq? @id))").unwrap_err(),
+            QueryError {
+                kind: QueryErrorKind::Predicate,
+                row: 0,
+                column: 0,
+                offset: 0,
+                message: "Wrong number of arguments to #eq? predicate. Expected 2, got 1."
+                    .to_string()
+            }
+        );
+        assert_eq!(
+            Query::new(language, "((identifier) @id (#eq? @id @ok))").unwrap_err(),
+            QueryError {
+                kind: QueryErrorKind::Capture,
+                row: 0,
+                column: 29,
+                offset: 29,
+                message: "ok".to_string(),
+            }
+        );
+    });
+}
+
+#[test]
+fn test_query_errors_on_impossible_patterns() {
+    let js_lang = get_language("javascript");
+    let rb_lang = get_language("ruby");
+
+    allocations::record(|| {
+        assert_eq!(
+            Query::new(
+                js_lang,
+                "(binary_expression left: (identifier) left: (identifier))"
+            ),
+            Err(QueryError {
+                kind: QueryErrorKind::Structure,
+                row: 0,
+                offset: 38,
+                column: 38,
+                message: [
+                    "(binary_expression left: (identifier) left: (identifier))",
+                    "                                      ^"
+                ]
+                .join("\n"),
+            })
+        );
+
+        Query::new(
+            js_lang,
+            "(function_declaration name: (identifier) (statement_block))",
+        )
+        .unwrap();
+        assert_eq!(
+            Query::new(js_lang, "(function_declaration name: (statement_block))"),
+            Err(QueryError {
+                kind: QueryErrorKind::Structure,
+                row: 0,
+                offset: 22,
+                column: 22,
+                message: [
+                    "(function_declaration name: (statement_block))",
+                    "                      ^",
+                ]
+                .join("\n")
+            })
+        );
+
+        Query::new(rb_lang, "(call receiver:(call))").unwrap();
+        assert_eq!(
+            Query::new(rb_lang, "(call receiver:(binary))"),
+            Err(QueryError {
+                kind: QueryErrorKind::Structure,
+                row: 0,
+                offset: 6,
+                column: 6,
+                message: [
+                    "(call receiver:(binary))", //
+                    "      ^",
+                ]
+                .join("\n")
+            })
+        );
+
+        Query::new(
+            js_lang,
+            "[
+                (function (identifier))
+                (function_declaration (identifier))
+                (generator_function_declaration (identifier))
+            ]",
+        )
+        .unwrap();
+        assert_eq!(
+            Query::new(
+                js_lang,
+                "[
+                    (function (identifier))
+                    (function_declaration (object))
+                    (generator_function_declaration (identifier))
+                ]",
+            ),
+            Err(QueryError {
+                kind: QueryErrorKind::Structure,
+                row: 2,
+                offset: 88,
+                column: 42,
+                message: [
+                    "                    (function_declaration (object))", //
+                    "                                          ^",
+                ]
+                .join("\n")
+            })
+        );
+
+        assert_eq!(
+            Query::new(js_lang, "(identifier (identifier))",),
+            Err(QueryError {
+                kind: QueryErrorKind::Structure,
+                row: 0,
+                offset: 12,
+                column: 12,
+                message: [
+                    "(identifier (identifier))", //
+                    "            ^",
+                ]
+                .join("\n")
+            })
+        );
+        assert_eq!(
+            Query::new(js_lang, "(true (true))",),
+            Err(QueryError {
+                kind: QueryErrorKind::Structure,
+                row: 0,
+                offset: 6,
+                column: 6,
+                message: [
+                    "(true (true))", //
+                    "      ^",
+                ]
+                .join("\n")
+            })
+        );
+
+        Query::new(
+            js_lang,
+            "(if_statement
+                condition: (parenthesized_expression (_expression) @cond))",
+        )
+        .unwrap();
+
+        assert_eq!(
+            Query::new(js_lang, "(if_statement condition: (_expression))",),
+            Err(QueryError {
+                kind: QueryErrorKind::Structure,
+                row: 0,
+                offset: 14,
+                column: 14,
+                message: [
+                    "(if_statement condition: (_expression))", //
+                    "              ^",
+                ]
+                .join("\n")
+            })
+        );
+    });
+}
+
+#[test]
+fn test_query_verifies_possible_patterns_with_aliased_parent_nodes() {
+    allocations::record(|| {
+        let ruby = get_language("ruby");
+
+        Query::new(ruby, "(destructured_parameter (identifier))").unwrap();
+
+        assert_eq!(
+            Query::new(ruby, "(destructured_parameter (string))",),
+            Err(QueryError {
+                kind: QueryErrorKind::Structure,
+                row: 0,
+                offset: 24,
+                column: 24,
+                message: [
+                    "(destructured_parameter (string))", //
+                    "                        ^",
+                ]
+                .join("\n")
+            })
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_with_simple_pattern() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            "(function_declaration name: (identifier) @fn-name)",
+        )
+        .unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            "function one() { two(); function three() {} }",
+            &[
+                (0, vec![("fn-name", "one")]),
+                (0, vec![("fn-name", "three")]),
+            ],
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_with_multiple_on_same_root() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            "(class_declaration
+                name: (identifier) @the-class-name
+                (class_body
+                    (method_definition
+                        name: (property_identifier) @the-method-name)))",
+        )
+        .unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            "
+            class Person {
+                // the constructor
+                constructor(name) { this.name = name; }
+
+                // the getter
+                getFullName() { return this.name; }
+            }
+            ",
+            &[
+                (
+                    0,
+                    vec![
+                        ("the-class-name", "Person"),
+                        ("the-method-name", "constructor"),
+                    ],
+                ),
+                (
+                    0,
+                    vec![
+                        ("the-class-name", "Person"),
+                        ("the-method-name", "getFullName"),
+                    ],
+                ),
+            ],
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_with_multiple_patterns_different_roots() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            "
+                (function_declaration name:(identifier) @fn-def)
+                (call_expression function:(identifier) @fn-ref)
+            ",
+        )
+        .unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            "
+            function f1() {
+                f2(f3());
+            }
+            ",
+            &[
+                (0, vec![("fn-def", "f1")]),
+                (1, vec![("fn-ref", "f2")]),
+                (1, vec![("fn-ref", "f3")]),
+            ],
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_with_multiple_patterns_same_root() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            "
+              (pair
+                key: (property_identifier) @method-def
+                value: (function))
+
+              (pair
+                key: (property_identifier) @method-def
+                value: (arrow_function))
+            ",
+        )
+        .unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            "
+            a = {
+                b: () => { return c; },
+                d: function() { return d; }
+            };
+            ",
+            &[
+                (1, vec![("method-def", "b")]),
+                (0, vec![("method-def", "d")]),
+            ],
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_with_nesting_and_no_fields() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            "
+                (array
+                    (array
+                        (identifier) @x1
+                        (identifier) @x2))
+            ",
+        )
+        .unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            "
+            [[a]];
+            [[c, d], [e, f, g, h]];
+            [[h], [i]];
+            ",
+            &[
+                (0, vec![("x1", "c"), ("x2", "d")]),
+                (0, vec![("x1", "e"), ("x2", "f")]),
+                (0, vec![("x1", "e"), ("x2", "g")]),
+                (0, vec![("x1", "f"), ("x2", "g")]),
+                (0, vec![("x1", "e"), ("x2", "h")]),
+                (0, vec![("x1", "f"), ("x2", "h")]),
+                (0, vec![("x1", "g"), ("x2", "h")]),
+            ],
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_with_many_results() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(language, "(array (identifier) @element)").unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            &"[hello];\n".repeat(50),
+            &vec![(0, vec![("element", "hello")]); 50],
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_with_many_overlapping_results() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            r#"
+            (call_expression
+                function: (member_expression
+                    property: (property_identifier) @method))
+            (call_expression
+                function: (identifier) @function)
+            ((identifier) @constant
+             (#match? @constant "[A-Z\\d_]+"))
+            "#,
+        )
+        .unwrap();
+
+        let count = 1024;
+
+        // Deeply nested chained function calls:
+        // a
+        //    .foo(bar(BAZ))
+        //    .foo(bar(BAZ))
+        //    .foo(bar(BAZ))
+        //    ...
+        let mut source = "a".to_string();
+        source += &"\n  .foo(bar(BAZ))".repeat(count);
+
+        assert_query_matches(
+            language,
+            &query,
+            &source,
+            &[
+                (0, vec![("method", "foo")]),
+                (1, vec![("function", "bar")]),
+                (2, vec![("constant", "BAZ")]),
+            ]
+            .iter()
+            .cloned()
+            .cycle()
+            .take(3 * count)
+            .collect::<Vec<_>>(),
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_capturing_error_nodes() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            "
+            (ERROR (identifier) @the-error-identifier) @the-error
+            ",
+        )
+        .unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            "function a(b,, c, d :e:) {}",
+            &[(0, vec![("the-error", ":e:"), ("the-error-identifier", "e")])],
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_with_named_wildcard() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            "
+            (return_statement (_) @the-return-value)
+            (binary_expression operator: _ @the-operator)
+            ",
+        )
+        .unwrap();
+
+        let source = "return a + b - c;";
+
+        let mut parser = Parser::new();
+        parser.set_language(language).unwrap();
+        let tree = parser.parse(source, None).unwrap();
+        let mut cursor = QueryCursor::new();
+        let matches = cursor.matches(&query, tree.root_node(), to_callback(source));
+
+        assert_eq!(
+            collect_matches(matches, &query, source),
+            &[
+                (0, vec![("the-return-value", "a + b - c")]),
+                (1, vec![("the-operator", "+")]),
+                (1, vec![("the-operator", "-")]),
+            ]
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_with_wildcard_at_the_root() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            "
+            (_
+                (comment) @doc
+                .
+                (function_declaration
+                    name: (identifier) @name))
+            ",
+        )
+        .unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            "/* one */ var x; /* two */ function y() {} /* three */ class Z {}",
+            &[(0, vec![("doc", "/* two */"), ("name", "y")])],
+        );
+
+        let query = Query::new(
+            language,
+            "
+                (_ (string) @a)
+                (_ (number) @b)
+                (_ (true) @c)
+                (_ (false) @d)
+            ",
+        )
+        .unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            "['hi', x(true), {y: false}]",
+            &[
+                (0, vec![("a", "'hi'")]),
+                (2, vec![("c", "true")]),
+                (3, vec![("d", "false")]),
+            ],
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_with_immediate_siblings() {
+    allocations::record(|| {
+        let language = get_language("python");
+
+        // The immediate child operator '.' can be used in three similar ways:
+        // 1. Before the first child node in a pattern, it means that there cannot be any
+        //    named siblings before that child node.
+        // 2. After the last child node in a pattern, it means that there cannot be any named
+        //    sibling after that child node.
+        // 2. Between two child nodes in a pattern, it specifies that there cannot be any
+        //    named siblings between those two child snodes.
+        let query = Query::new(
+            language,
+            "
+            (dotted_name
+                (identifier) @parent
+                .
+                (identifier) @child)
+            (dotted_name
+                (identifier) @last-child
+                .)
+            (list
+                .
+                (_) @first-element)
+            ",
+        )
+        .unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            "import a.b.c.d; return [w, [1, y], z]",
+            &[
+                (0, vec![("parent", "a"), ("child", "b")]),
+                (0, vec![("parent", "b"), ("child", "c")]),
+                (0, vec![("parent", "c"), ("child", "d")]),
+                (1, vec![("last-child", "d")]),
+                (2, vec![("first-element", "w")]),
+                (2, vec![("first-element", "1")]),
+            ],
+        );
+
+        let query = Query::new(
+            language,
+            "
+            (block . (_) @first-stmt)
+            (block (_) @stmt)
+            (block (_) @last-stmt .)
+            ",
+        )
+        .unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            "
+            if a:
+                b()
+                c()
+                if d(): e(); f()
+                g()
+            ",
+            &[
+                (0, vec![("first-stmt", "b()")]),
+                (1, vec![("stmt", "b()")]),
+                (1, vec![("stmt", "c()")]),
+                (1, vec![("stmt", "if d(): e(); f()")]),
+                (0, vec![("first-stmt", "e()")]),
+                (1, vec![("stmt", "e()")]),
+                (1, vec![("stmt", "f()")]),
+                (2, vec![("last-stmt", "f()")]),
+                (1, vec![("stmt", "g()")]),
+                (2, vec![("last-stmt", "g()")]),
+            ],
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_with_repeated_leaf_nodes() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+
+        let query = Query::new(
+            language,
+            "
+            (
+                (comment)+ @doc
+                .
+                (class_declaration
+                    name: (identifier) @name)
+            )
+
+            (
+                (comment)+ @doc
+                .
+                (function_declaration
+                    name: (identifier) @name)
+            )
+            ",
+        )
+        .unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            "
+            // one
+            // two
+            a();
+
+            // three
+            {
+                // four
+                // five
+                // six
+                class B {}
+
+                // seven
+                c();
+
+                // eight
+                function d() {}
+            }
+            ",
+            &[
+                (
+                    0,
+                    vec![
+                        ("doc", "// four"),
+                        ("doc", "// five"),
+                        ("doc", "// six"),
+                        ("name", "B"),
+                    ],
+                ),
+                (1, vec![("doc", "// eight"), ("name", "d")]),
+            ],
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_with_optional_nodes_inside_of_repetitions() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(language, r#"(array (","? (number) @num)+)"#).unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            r#"
+            var a = [1, 2, 3, 4]
+            "#,
+            &[(
+                0,
+                vec![("num", "1"), ("num", "2"), ("num", "3"), ("num", "4")],
+            )],
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_with_top_level_repetitions() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            r#"
+            (comment)+ @doc
+            "#,
+        )
+        .unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            r#"
+            // a
+            // b
+            // c
+
+            d()
+
+            // e
+            "#,
+            &[
+                (0, vec![("doc", "// a"), ("doc", "// b"), ("doc", "// c")]),
+                (0, vec![("doc", "// e")]),
+            ],
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_with_non_terminal_repetitions_within_root() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            r#"
+            (_
+                (expression_statement
+                  (identifier) @id)+)
+            "#,
+        )
+        .unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            r#"
+            a;
+            b;
+            c;
+            "#,
+            &[(0, vec![("id", "a"), ("id", "b"), ("id", "c")])],
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_with_nested_repetitions() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            r#"
+            (variable_declaration
+                (","? (variable_declarator name: (identifier) @x))+)+
+            "#,
+        )
+        .unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            r#"
+            var a = b, c, d
+            var e, f
+
+            // more
+            var g
+            "#,
+            &[
+                (
+                    0,
+                    vec![("x", "a"), ("x", "c"), ("x", "d"), ("x", "e"), ("x", "f")],
+                ),
+                (0, vec![("x", "g")]),
+            ],
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_with_multiple_repetition_patterns_that_intersect_other_pattern() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+
+        // When this query sees a comment, it must keep track of several potential
+        // matches: up to two for each pattern that begins with a comment.
+        let query = Query::new(
+            language,
+            r#"
+            (call_expression
+                function: (member_expression
+                    property: (property_identifier) @name)) @ref.method
+
+            ((comment)* @doc (function_declaration))
+            ((comment)* @doc (generator_function_declaration))
+            ((comment)* @doc (class_declaration))
+            ((comment)* @doc (lexical_declaration))
+            ((comment)* @doc (variable_declaration))
+            ((comment)* @doc (method_definition))
+
+            (comment) @comment
+            "#,
+        )
+        .unwrap();
+
+        // Here, a series of comments occurs in the middle of a match of the first
+        // pattern. To avoid exceeding the storage limits and discarding that outer
+        // match, the comment-related matches need to be managed efficiently.
+        let source = format!(
+            "theObject\n{}\n.theMethod()",
+            "  // the comment\n".repeat(64)
+        );
+
+        assert_query_matches(
+            language,
+            &query,
+            &source,
+            &vec![(7, vec![("comment", "// the comment")]); 64]
+                .into_iter()
+                .chain(vec![(
+                    0,
+                    vec![("ref.method", source.as_str()), ("name", "theMethod")],
+                )])
+                .collect::<Vec<_>>(),
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_with_leading_zero_or_more_repeated_leaf_nodes() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+
+        let query = Query::new(
+            language,
+            "
+            (
+                (comment)* @doc
+                .
+                (function_declaration
+                    name: (identifier) @name)
+            )
+            ",
+        )
+        .unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            "
+            function a() {
+                // one
+                var b;
+
+                function c() {}
+
+                // two
+                // three
+                var d;
+
+                // four
+                // five
+                function e() {
+
+                }
+            }
+
+            // six
+            ",
+            &[
+                (0, vec![("name", "a")]),
+                (0, vec![("name", "c")]),
+                (
+                    0,
+                    vec![("doc", "// four"), ("doc", "// five"), ("name", "e")],
+                ),
+            ],
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_with_trailing_optional_nodes() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+
+        let query = Query::new(
+            language,
+            "
+            (class_declaration
+                name: (identifier) @class
+                (class_heritage
+                  (identifier) @superclass)?)
+            ",
+        )
+        .unwrap();
+
+        assert_query_matches(language, &query, "class A {}", &[(0, vec![("class", "A")])]);
+
+        assert_query_matches(
+            language,
+            &query,
+            "
+            class A {}
+            class B extends C {}
+            class D extends (E.F) {}
+            ",
+            &[
+                (0, vec![("class", "A")]),
+                (0, vec![("class", "B"), ("superclass", "C")]),
+                (0, vec![("class", "D")]),
+            ],
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_with_nested_optional_nodes() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+
+        // A function call, optionally containing a function call, which optionally contains a number
+        let query = Query::new(
+            language,
+            "
+            (call_expression
+                function: (identifier) @outer-fn
+                arguments: (arguments
+                    (call_expression
+                        function: (identifier) @inner-fn
+                        arguments: (arguments
+                            (number)? @num))?))
+            ",
+        )
+        .unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            r#"
+            a(b, c(), d(null, 1, 2))
+            e()
+            f(g())
+            "#,
+            &[
+                (0, vec![("outer-fn", "a"), ("inner-fn", "c")]),
+                (0, vec![("outer-fn", "c")]),
+                (0, vec![("outer-fn", "a"), ("inner-fn", "d"), ("num", "1")]),
+                (0, vec![("outer-fn", "a"), ("inner-fn", "d"), ("num", "2")]),
+                (0, vec![("outer-fn", "d")]),
+                (0, vec![("outer-fn", "e")]),
+                (0, vec![("outer-fn", "f"), ("inner-fn", "g")]),
+                (0, vec![("outer-fn", "g")]),
+            ],
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_with_repeated_internal_nodes() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            "
+            (_
+                (method_definition
+                    (decorator (identifier) @deco)+
+                    name: (property_identifier) @name))
+            ",
+        )
+        .unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            "
+            class A {
+                @c
+                @d
+                e() {}
+            }
+            ",
+            &[(0, vec![("deco", "c"), ("deco", "d"), ("name", "e")])],
+        );
+    })
+}
+
+#[test]
+fn test_query_matches_with_simple_alternatives() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            "
+            (pair
+                key: [(property_identifier) (string)] @key
+                value: [(function) @val1 (arrow_function) @val2])
+            ",
+        )
+        .unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            "
+            a = {
+                b: c,
+                'd': e => f,
+                g: {
+                    h: function i() {},
+                    'x': null,
+                    j: _ => k
+                },
+                'l': function m() {},
+            };
+            ",
+            &[
+                (0, vec![("key", "'d'"), ("val2", "e => f")]),
+                (0, vec![("key", "h"), ("val1", "function i() {}")]),
+                (0, vec![("key", "j"), ("val2", "_ => k")]),
+                (0, vec![("key", "'l'"), ("val1", "function m() {}")]),
+            ],
+        );
+    })
+}
+
+#[test]
+fn test_query_matches_with_alternatives_in_repetitions() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            r#"
+            (array
+                [(identifier) (string)] @el
+                .
+                (
+                    ","
+                    .
+                    [(identifier) (string)] @el
+                )*)
+            "#,
+        )
+        .unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            "
+            a = [b, 'c', d, 1, e, 'f', 'g', h];
+            ",
+            &[
+                (0, vec![("el", "b"), ("el", "'c'"), ("el", "d")]),
+                (
+                    0,
+                    vec![("el", "e"), ("el", "'f'"), ("el", "'g'"), ("el", "h")],
+                ),
+            ],
+        );
+    })
+}
+
+#[test]
+fn test_query_matches_with_alternatives_at_root() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            r#"
+            [
+                "if"
+                "else"
+                "function"
+                "throw"
+                "return"
+            ] @keyword
+            "#,
+        )
+        .unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            "
+            function a(b, c, d) {
+                if (b) {
+                    return c;
+                } else {
+                    throw d;
+                }
+            }
+            ",
+            &[
+                (0, vec![("keyword", "function")]),
+                (0, vec![("keyword", "if")]),
+                (0, vec![("keyword", "return")]),
+                (0, vec![("keyword", "else")]),
+                (0, vec![("keyword", "throw")]),
+            ],
+        );
+    })
+}
+
+#[test]
+fn test_query_matches_with_alternatives_under_fields() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            r#"
+            (assignment_expression
+                left: [
+                    (identifier) @variable
+                    (member_expression property: (property_identifier) @variable)
+                ])
+            "#,
+        )
+        .unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            "
+            a = b;
+            b = c.d;
+            e.f = g;
+            h.i = j.k;
+            ",
+            &[
+                (0, vec![("variable", "a")]),
+                (0, vec![("variable", "b")]),
+                (0, vec![("variable", "f")]),
+                (0, vec![("variable", "i")]),
+            ],
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_in_language_with_simple_aliases() {
+    allocations::record(|| {
+        let language = get_language("html");
+
+        // HTML uses different tokens to track start tags names, end
+        // tag names, script tag names, and style tag names. All of
+        // these tokens are aliased to `tag_name`.
+        let query = Query::new(language, "(tag_name) @tag").unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            "
+            <div>
+                <script>hi</script>
+                <style>hi</style>
+            </div>
+            ",
+            &[
+                (0, vec![("tag", "div")]),
+                (0, vec![("tag", "script")]),
+                (0, vec![("tag", "script")]),
+                (0, vec![("tag", "style")]),
+                (0, vec![("tag", "style")]),
+                (0, vec![("tag", "div")]),
+            ],
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_with_different_tokens_with_the_same_string_value() {
+    allocations::record(|| {
+        // In Rust, there are two '<' tokens: one for the binary operator,
+        // and one with higher precedence for generics.
+        let language = get_language("rust");
+        let query = Query::new(
+            language,
+            r#"
+                "<" @less
+                ">" @greater
+                "#,
+        )
+        .unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            "const A: B<C> = d < e || f > g;",
+            &[
+                (0, vec![("less", "<")]),
+                (1, vec![("greater", ">")]),
+                (0, vec![("less", "<")]),
+                (1, vec![("greater", ">")]),
+            ],
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_with_too_many_permutations_to_track() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            "
+            (array (identifier) @pre (identifier) @post)
+        ",
+        )
+        .unwrap();
+
+        let mut source = "hello, ".repeat(50);
+        source.insert(0, '[');
+        source.push_str("];");
+
+        let mut parser = Parser::new();
+        parser.set_language(language).unwrap();
+        let tree = parser.parse(&source, None).unwrap();
+        let mut cursor = QueryCursor::new();
+        let matches = cursor.matches(&query, tree.root_node(), to_callback(&source));
+
+        // For this pathological query, some match permutations will be dropped.
+        // Just check that a subset of the results are returned, and crash or
+        // leak occurs.
+        assert_eq!(
+            collect_matches(matches, &query, source.as_str())[0],
+            (0, vec![("pre", "hello"), ("post", "hello")]),
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_with_alternatives_and_too_many_permutations_to_track() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            "
+            (
+                (comment) @doc
+                ; not immediate
+                (class_declaration) @class
+            )
+
+            (call_expression
+                function: [
+                    (identifier) @function
+                    (member_expression property: (property_identifier) @method)
+                ])
+            ",
+        )
+        .unwrap();
+
+        let source = "/* hi */ a.b(); ".repeat(50);
+
+        let mut parser = Parser::new();
+        parser.set_language(language).unwrap();
+        let tree = parser.parse(&source, None).unwrap();
+        let mut cursor = QueryCursor::new();
+        let matches = cursor.matches(&query, tree.root_node(), to_callback(&source));
+
+        assert_eq!(
+            collect_matches(matches, &query, source.as_str()),
+            vec![(1, vec![("method", "b")]); 50],
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_with_anonymous_tokens() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            r#"
+            ";" @punctuation
+            "&&" @operator
+            "\"" @quote
+            "#,
+        )
+        .unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            r#"foo(a && "b");"#,
+            &[
+                (1, vec![("operator", "&&")]),
+                (2, vec![("quote", "\"")]),
+                (2, vec![("quote", "\"")]),
+                (0, vec![("punctuation", ";")]),
+            ],
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_with_supertypes() {
+    allocations::record(|| {
+        let language = get_language("python");
+        let query = Query::new(
+            language,
+            r#"
+            (argument_list (expression) @arg)
+
+            (keyword_argument
+                value: (expression) @kw_arg)
+
+            (assignment
+              left: (identifier) @var_def)
+
+            (primary_expression/identifier) @var_ref
+            "#,
+        )
+        .unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            "
+                a = b.c(
+                    [d],
+                    # a comment
+                    e=f
+                )
+            ",
+            &[
+                (2, vec![("var_def", "a")]),
+                (3, vec![("var_ref", "b")]),
+                (0, vec![("arg", "[d]")]),
+                (3, vec![("var_ref", "d")]),
+                (1, vec![("kw_arg", "f")]),
+                (3, vec![("var_ref", "f")]),
+            ],
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_within_byte_range() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(language, "(identifier) @element").unwrap();
+
+        let source = "[a, b, c, d, e, f, g]";
+
+        let mut parser = Parser::new();
+        parser.set_language(language).unwrap();
+        let tree = parser.parse(&source, None).unwrap();
+
+        let mut cursor = QueryCursor::new();
+        let matches =
+            cursor
+                .set_byte_range(5, 15)
+                .matches(&query, tree.root_node(), to_callback(source));
+
+        assert_eq!(
+            collect_matches(matches, &query, source),
+            &[
+                (0, vec![("element", "c")]),
+                (0, vec![("element", "d")]),
+                (0, vec![("element", "e")]),
+            ]
+        );
+    });
+}
+
+#[test]
+fn test_query_captures_within_byte_range() {
+    allocations::record(|| {
+        let language = get_language("c");
+        let query = Query::new(
+            language,
+            "
+            (call_expression
+                function: (identifier) @function
+                arguments: (argument_list (string_literal) @string.arg))
+
+            (string_literal) @string
+           ",
+        )
+        .unwrap();
+
+        let source = r#"DEFUN ("safe-length", Fsafe_length, Ssafe_length, 1, 1, 0)"#;
+
+        let mut parser = Parser::new();
+        parser.set_language(language).unwrap();
+        let tree = parser.parse(&source, None).unwrap();
+
+        let mut cursor = QueryCursor::new();
+        let captures =
+            cursor
+                .set_byte_range(3, 27)
+                .captures(&query, tree.root_node(), to_callback(source));
+
+        assert_eq!(
+            collect_captures(captures, &query, source),
+            &[
+                ("function", "DEFUN"),
+                ("string.arg", "\"safe-length\""),
+                ("string", "\"safe-length\""),
+            ]
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_different_queries_same_cursor() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query1 = Query::new(
+            language,
+            "
+            (array (identifier) @id1)
+        ",
+        )
+        .unwrap();
+        let query2 = Query::new(
+            language,
+            "
+            (array (identifier) @id1)
+            (pair (identifier) @id2)
+        ",
+        )
+        .unwrap();
+        let query3 = Query::new(
+            language,
+            "
+            (array (identifier) @id1)
+            (pair (identifier) @id2)
+            (parenthesized_expression (identifier) @id3)
+        ",
+        )
+        .unwrap();
+
+        let source = "[a, {b: b}, (c)];";
+
+        let mut parser = Parser::new();
+        let mut cursor = QueryCursor::new();
+
+        parser.set_language(language).unwrap();
+        let tree = parser.parse(&source, None).unwrap();
+
+        let matches = cursor.matches(&query1, tree.root_node(), to_callback(source));
+        assert_eq!(
+            collect_matches(matches, &query1, source),
+            &[(0, vec![("id1", "a")]),]
+        );
+
+        let matches = cursor.matches(&query3, tree.root_node(), to_callback(source));
+        assert_eq!(
+            collect_matches(matches, &query3, source),
+            &[
+                (0, vec![("id1", "a")]),
+                (1, vec![("id2", "b")]),
+                (2, vec![("id3", "c")]),
+            ]
+        );
+
+        let matches = cursor.matches(&query2, tree.root_node(), to_callback(source));
+        assert_eq!(
+            collect_matches(matches, &query2, source),
+            &[(0, vec![("id1", "a")]), (1, vec![("id2", "b")]),]
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_with_multiple_captures_on_a_node() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let mut query = Query::new(
+            language,
+            "(function_declaration
+                (identifier) @name1 @name2 @name3
+                (statement_block) @body1 @body2)",
+        )
+        .unwrap();
+
+        let source = "function foo() { return 1; }";
+        let mut parser = Parser::new();
+        let mut cursor = QueryCursor::new();
+
+        parser.set_language(language).unwrap();
+        let tree = parser.parse(&source, None).unwrap();
+
+        let matches = cursor.matches(&query, tree.root_node(), to_callback(source));
+        assert_eq!(
+            collect_matches(matches, &query, source),
+            &[(
+                0,
+                vec![
+                    ("name1", "foo"),
+                    ("name2", "foo"),
+                    ("name3", "foo"),
+                    ("body1", "{ return 1; }"),
+                    ("body2", "{ return 1; }"),
+                ]
+            ),]
+        );
+
+        // disabling captures still works when there are multiple captures on a
+        // single node.
+        query.disable_capture("name2");
+        let matches = cursor.matches(&query, tree.root_node(), to_callback(source));
+        assert_eq!(
+            collect_matches(matches, &query, source),
+            &[(
+                0,
+                vec![
+                    ("name1", "foo"),
+                    ("name3", "foo"),
+                    ("body1", "{ return 1; }"),
+                    ("body2", "{ return 1; }"),
+                ]
+            ),]
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_with_captured_wildcard_at_root() {
+    allocations::record(|| {
+        let language = get_language("python");
+        let query = Query::new(
+            language,
+            "
+            ; captured wildcard at the root
+            (_ [
+                (except_clause (block) @block)
+                (finally_clause (block) @block)
+            ]) @stmt
+
+            [
+                (while_statement (block) @block)
+                (if_statement (block) @block)
+
+                ; captured wildcard at the root within an alternation
+                (_ [
+                    (else_clause (block) @block)
+                    (elif_clause (block) @block)
+                ])
+
+                (try_statement (block) @block)
+                (for_statement (block) @block)
+            ] @stmt
+            ",
+        )
+        .unwrap();
+
+        let source = "
+        for i in j:
+            while True:
+                if a:
+                    print b
+                elif c:
+                    print d
+                else:
+                    try:
+                        print f
+                    except:
+                        print g
+                    finally:
+                        print h
+            else:
+                print i
+        "
+        .trim();
+
+        let mut parser = Parser::new();
+        let mut cursor = QueryCursor::new();
+        parser.set_language(language).unwrap();
+        let tree = parser.parse(&source, None).unwrap();
+
+        let match_capture_names_and_rows = cursor
+            .matches(&query, tree.root_node(), to_callback(source))
+            .map(|m| {
+                m.captures
+                    .iter()
+                    .map(|c| {
+                        (
+                            query.capture_names()[c.index as usize].as_str(),
+                            c.node.kind(),
+                            c.node.start_position().row,
+                        )
+                    })
+                    .collect::<Vec<_>>()
+            })
+            .collect::<Vec<_>>();
+
+        assert_eq!(
+            match_capture_names_and_rows,
+            &[
+                vec![("stmt", "for_statement", 0), ("block", "block", 1)],
+                vec![("stmt", "while_statement", 1), ("block", "block", 2)],
+                vec![("stmt", "if_statement", 2), ("block", "block", 3)],
+                vec![("stmt", "if_statement", 2), ("block", "block", 5)],
+                vec![("stmt", "if_statement", 2), ("block", "block", 7)],
+                vec![("stmt", "try_statement", 7), ("block", "block", 8)],
+                vec![("stmt", "try_statement", 7), ("block", "block", 10)],
+                vec![("stmt", "try_statement", 7), ("block", "block", 12)],
+                vec![("stmt", "while_statement", 1), ("block", "block", 14)],
+            ]
+        )
+    });
+}
+
+#[test]
+fn test_query_matches_with_no_captures() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            r#"
+            (identifier)
+            (string) @s
+            "#,
+        )
+        .unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            "
+            a = 'hi';
+            b = 'bye';
+            ",
+            &[
+                (0, vec![]),
+                (1, vec![("s", "'hi'")]),
+                (0, vec![]),
+                (1, vec![("s", "'bye'")]),
+            ],
+        );
+    });
+}
+
+#[test]
+fn test_query_matches_with_repeated_fields() {
+    allocations::record(|| {
+        let language = get_language("c");
+        let query = Query::new(
+            language,
+            "(field_declaration declarator: (field_identifier) @field)",
+        )
+        .unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            "
+            struct S {
+                int a, b, c;
+            }
+            ",
+            &[
+                (0, vec![("field", "a")]),
+                (0, vec![("field", "b")]),
+                (0, vec![("field", "c")]),
+            ],
+        );
+    });
+}
+
+#[test]
+fn test_query_captures_basic() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            r#"
+            (pair
+              key: _ @method.def
+              (function
+                name: (identifier) @method.alias))
+
+            (variable_declarator
+              name: _ @function.def
+              value: (function
+                name: (identifier) @function.alias))
+
+            ":" @delimiter
+            "=" @operator
+            "#,
+        )
+        .unwrap();
+
+        let source = "
+          a({
+            bc: function de() {
+              const fg = function hi() {}
+            },
+            jk: function lm() {
+              const no = function pq() {}
+            },
+          });
+        ";
+
+        let mut parser = Parser::new();
+        parser.set_language(language).unwrap();
+        let tree = parser.parse(&source, None).unwrap();
+        let mut cursor = QueryCursor::new();
+        let matches = cursor.matches(&query, tree.root_node(), to_callback(source));
+
+        assert_eq!(
+            collect_matches(matches, &query, source),
+            &[
+                (2, vec![("delimiter", ":")]),
+                (0, vec![("method.def", "bc"), ("method.alias", "de")]),
+                (3, vec![("operator", "=")]),
+                (1, vec![("function.def", "fg"), ("function.alias", "hi")]),
+                (2, vec![("delimiter", ":")]),
+                (0, vec![("method.def", "jk"), ("method.alias", "lm")]),
+                (3, vec![("operator", "=")]),
+                (1, vec![("function.def", "no"), ("function.alias", "pq")]),
+            ],
+        );
+
+        let captures = cursor.captures(&query, tree.root_node(), to_callback(source));
+        assert_eq!(
+            collect_captures(captures, &query, source),
+            &[
+                ("method.def", "bc"),
+                ("delimiter", ":"),
+                ("method.alias", "de"),
+                ("function.def", "fg"),
+                ("operator", "="),
+                ("function.alias", "hi"),
+                ("method.def", "jk"),
+                ("delimiter", ":"),
+                ("method.alias", "lm"),
+                ("function.def", "no"),
+                ("operator", "="),
+                ("function.alias", "pq"),
+            ]
+        );
+    });
+}
+
+#[test]
+fn test_query_captures_with_text_conditions() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            r#"
+            ((identifier) @constant
+             (#match? @constant "^[A-Z]{2,}$"))
+
+             ((identifier) @constructor
+              (#match? @constructor "^[A-Z]"))
+
+            ((identifier) @function.builtin
+             (#eq? @function.builtin "require"))
+
+            ((identifier) @variable
+             (#not-match? @variable "^(lambda|load)$"))
+            "#,
+        )
+        .unwrap();
+
+        let source = "
+          toad
+          load
+          panda
+          lambda
+          const ab = require('./ab');
+          new Cd(EF);
+        ";
+
+        let mut parser = Parser::new();
+        parser.set_language(language).unwrap();
+        let tree = parser.parse(&source, None).unwrap();
+        let mut cursor = QueryCursor::new();
+
+        let captures = cursor.captures(&query, tree.root_node(), to_callback(source));
+        assert_eq!(
+            collect_captures(captures, &query, source),
+            &[
+                ("variable", "toad"),
+                ("variable", "panda"),
+                ("variable", "ab"),
+                ("function.builtin", "require"),
+                ("variable", "require"),
+                ("constructor", "Cd"),
+                ("variable", "Cd"),
+                ("constant", "EF"),
+                ("constructor", "EF"),
+                ("variable", "EF"),
+            ],
+        );
+    });
+}
+
+#[test]
+fn test_query_captures_with_predicates() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+
+        let query = Query::new(
+            language,
+            r#"
+            ((call_expression (identifier) @foo)
+             (#set! name something)
+             (#set! cool)
+             (#something! @foo omg))
+
+            ((property_identifier) @bar
+             (#is? cool)
+             (#is-not? name something))"#,
+        )
+        .unwrap();
+
+        assert_eq!(
+            query.property_settings(0),
+            &[
+                QueryProperty::new("name", Some("something"), None),
+                QueryProperty::new("cool", None, None),
+            ]
+        );
+        assert_eq!(
+            query.general_predicates(0),
+            &[QueryPredicate {
+                operator: "something!".to_string().into_boxed_str(),
+                args: vec![
+                    QueryPredicateArg::Capture(0),
+                    QueryPredicateArg::String("omg".to_string().into_boxed_str()),
+                ],
+            },]
+        );
+        assert_eq!(query.property_settings(1), &[]);
+        assert_eq!(query.property_predicates(0), &[]);
+        assert_eq!(
+            query.property_predicates(1),
+            &[
+                (QueryProperty::new("cool", None, None), true),
+                (QueryProperty::new("name", Some("something"), None), false),
+            ]
+        );
+    });
+}
+
+#[test]
+fn test_query_captures_with_quoted_predicate_args() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+
+        // Double-quoted strings can contain:
+        // * special escape sequences like \n and \r
+        // * escaped double quotes with \*
+        // * literal backslashes with \\
+        let query = Query::new(
+            language,
+            r#"
+            ((call_expression (identifier) @foo)
+             (#set! one "\"something\ngreat\""))
+
+            ((identifier)
+             (#set! two "\\s(\r?\n)*$"))
+
+            ((function_declaration)
+             (#set! three "\"something\ngreat\""))
+            "#,
+        )
+        .unwrap();
+
+        assert_eq!(
+            query.property_settings(0),
+            &[QueryProperty::new(
+                "one",
+                Some("\"something\ngreat\""),
+                None
+            )]
+        );
+        assert_eq!(
+            query.property_settings(1),
+            &[QueryProperty::new("two", Some("\\s(\r?\n)*$"), None)]
+        );
+        assert_eq!(
+            query.property_settings(2),
+            &[QueryProperty::new(
+                "three",
+                Some("\"something\ngreat\""),
+                None
+            )]
+        );
+    });
+}
+
+#[test]
+fn test_query_captures_with_duplicates() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            r#"
+            (variable_declarator
+                name: (identifier) @function
+                value: (function))
+
+            (identifier) @variable
+            "#,
+        )
+        .unwrap();
+
+        let source = "
+          var x = function() {};
+        ";
+
+        let mut parser = Parser::new();
+        parser.set_language(language).unwrap();
+        let tree = parser.parse(&source, None).unwrap();
+        let mut cursor = QueryCursor::new();
+
+        let captures = cursor.captures(&query, tree.root_node(), to_callback(source));
+        assert_eq!(
+            collect_captures(captures, &query, source),
+            &[("function", "x"), ("variable", "x"),],
+        );
+    });
+}
+
+#[test]
+fn test_query_captures_with_many_nested_results_without_fields() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+
+        // Search for key-value pairs whose values are anonymous functions.
+        let query = Query::new(
+            language,
+            r#"
+            (pair
+              key: _ @method-def
+              (arrow_function))
+
+            ":" @colon
+            "," @comma
+            "#,
+        )
+        .unwrap();
+
+        // The `pair` node for key `y` does not match any pattern, but inside of
+        // its value, it contains many other `pair` nodes that do match the pattern.
+        // The match for the *outer* pair should be terminated *before* descending into
+        // the object value, so that we can avoid needing to buffer all of the inner
+        // matches.
+        let method_count = 50;
+        let mut source = "x = { y: {\n".to_owned();
+        for i in 0..method_count {
+            writeln!(&mut source, "    method{}: $ => null,", i).unwrap();
+        }
+        source.push_str("}};\n");
+
+        let mut parser = Parser::new();
+        parser.set_language(language).unwrap();
+        let tree = parser.parse(&source, None).unwrap();
+        let mut cursor = QueryCursor::new();
+
+        let captures = cursor.captures(&query, tree.root_node(), to_callback(&source));
+        let captures = collect_captures(captures, &query, &source);
+
+        assert_eq!(
+            &captures[0..13],
+            &[
+                ("colon", ":"),
+                ("method-def", "method0"),
+                ("colon", ":"),
+                ("comma", ","),
+                ("method-def", "method1"),
+                ("colon", ":"),
+                ("comma", ","),
+                ("method-def", "method2"),
+                ("colon", ":"),
+                ("comma", ","),
+                ("method-def", "method3"),
+                ("colon", ":"),
+                ("comma", ","),
+            ]
+        );
+
+        // Ensure that we don't drop matches because of needing to buffer too many.
+        assert_eq!(captures.len(), 1 + 3 * method_count);
+    });
+}
+
+#[test]
+fn test_query_captures_with_many_nested_results_with_fields() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+
+        // Search expressions like `a ? a.b : null`
+        let query = Query::new(
+            language,
+            r#"
+            ((ternary_expression
+                condition: (identifier) @left
+                consequence: (member_expression
+                    object: (identifier) @right)
+                alternative: (null))
+             (#eq? @left @right))
+            "#,
+        )
+        .unwrap();
+
+        // The outer expression does not match the pattern, but the consequence of the ternary
+        // is an object that *does* contain many occurences of the pattern.
+        let count = 50;
+        let mut source = "a ? {".to_owned();
+        for i in 0..count {
+            writeln!(&mut source, "  x: y{} ? y{}.z : null,", i, i).unwrap();
+        }
+        source.push_str("} : null;\n");
+
+        let mut parser = Parser::new();
+        parser.set_language(language).unwrap();
+        let tree = parser.parse(&source, None).unwrap();
+        let mut cursor = QueryCursor::new();
+
+        let captures = cursor.captures(&query, tree.root_node(), to_callback(&source));
+        let captures = collect_captures(captures, &query, &source);
+
+        assert_eq!(
+            &captures[0..20],
+            &[
+                ("left", "y0"),
+                ("right", "y0"),
+                ("left", "y1"),
+                ("right", "y1"),
+                ("left", "y2"),
+                ("right", "y2"),
+                ("left", "y3"),
+                ("right", "y3"),
+                ("left", "y4"),
+                ("right", "y4"),
+                ("left", "y5"),
+                ("right", "y5"),
+                ("left", "y6"),
+                ("right", "y6"),
+                ("left", "y7"),
+                ("right", "y7"),
+                ("left", "y8"),
+                ("right", "y8"),
+                ("left", "y9"),
+                ("right", "y9"),
+            ]
+        );
+
+        // Ensure that we don't drop matches because of needing to buffer too many.
+        assert_eq!(captures.len(), 2 * count);
+    });
+}
+
+#[test]
+fn test_query_captures_with_too_many_nested_results() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+
+        // Search for method calls in general, and also method calls with a template string
+        // in place of an argument list (aka "tagged template strings") in particular.
+        //
+        // This second pattern, which looks for the tagged template strings, is expensive to
+        // use with the `captures()` method, because:
+        // 1. When calling `captures`, all of the captures must be returned in order of their
+        //    appearance.
+        // 2. This pattern captures the root `call_expression`.
+        // 3. This pattern's result also depends on the final child (the template string).
+        // 4. In between the `call_expression` and the possible `template_string`, there can
+        //    be an arbitrarily deep subtree.
+        //
+        // This means that, if any patterns match *after* the initial `call_expression` is
+        // captured, but before the final `template_string` is found, those matches must
+        // be buffered, in order to prevent captures from being returned out-of-order.
+        let query = Query::new(
+            language,
+            r#"
+            ;; easy 👇
+            (call_expression
+              function: (member_expression
+                property: (property_identifier) @method-name))
+
+            ;; hard 👇
+            (call_expression
+              function: (member_expression
+                property: (property_identifier) @template-tag)
+              arguments: (template_string)) @template-call
+            "#,
+        )
+        .unwrap();
+
+        // There are a *lot* of matches in between the beginning of the outer `call_expression`
+        // (the call to `a(...).f`), which starts at the beginning of the file, and the final
+        // template string, which occurs at the end of the file. The query algorithm imposes a
+        // limit on the total number of matches which can be buffered at a time. But we don't
+        // want to neglect the inner matches just because of the expensive outer match, so we
+        // abandon the outer match (which would have captured `f` as a `template-tag`).
+        let source = "
+        a(b => {
+            b.c0().d0 `😄`;
+            b.c1().d1 `😄`;
+            b.c2().d2 `😄`;
+            b.c3().d3 `😄`;
+            b.c4().d4 `😄`;
+            b.c5().d5 `😄`;
+            b.c6().d6 `😄`;
+            b.c7().d7 `😄`;
+            b.c8().d8 `😄`;
+            b.c9().d9 `😄`;
+        }).e().f ``;
+        "
+        .trim();
+
+        let mut parser = Parser::new();
+        parser.set_language(language).unwrap();
+        let tree = parser.parse(&source, None).unwrap();
+        let mut cursor = QueryCursor::new();
+        let captures = cursor.captures(&query, tree.root_node(), to_callback(&source));
+        let captures = collect_captures(captures, &query, &source);
+
+        assert_eq!(
+            &captures[0..4],
+            &[
+                ("template-call", "b.c0().d0 `😄`"),
+                ("method-name", "c0"),
+                ("method-name", "d0"),
+                ("template-tag", "d0"),
+            ]
+        );
+        assert_eq!(
+            &captures[36..40],
+            &[
+                ("template-call", "b.c9().d9 `😄`"),
+                ("method-name", "c9"),
+                ("method-name", "d9"),
+                ("template-tag", "d9"),
+            ]
+        );
+        assert_eq!(
+            &captures[40..],
+            &[("method-name", "e"), ("method-name", "f"),]
+        );
+    });
+}
+
+#[test]
+fn test_query_captures_with_definite_pattern_containing_many_nested_matches() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            r#"
+            (array
+              "[" @l-bracket
+              "]" @r-bracket)
+
+            "." @dot
+            "#,
+        )
+        .unwrap();
+
+        // The '[' node must be returned before all of the '.' nodes,
+        // even though its pattern does not finish until the ']' node
+        // at the end of the document. But because the '[' is definite,
+        // it can be returned before the pattern finishes matching.
+        let source = "
+        [
+            a.b.c.d.e.f.g.h.i,
+            a.b.c.d.e.f.g.h.i,
+            a.b.c.d.e.f.g.h.i,
+            a.b.c.d.e.f.g.h.i,
+            a.b.c.d.e.f.g.h.i,
+        ]
+        ";
+
+        let mut parser = Parser::new();
+        parser.set_language(language).unwrap();
+        let tree = parser.parse(&source, None).unwrap();
+        let mut cursor = QueryCursor::new();
+
+        let captures = cursor.captures(&query, tree.root_node(), to_callback(source));
+        assert_eq!(
+            collect_captures(captures, &query, source),
+            [("l-bracket", "[")]
+                .iter()
+                .chain([("dot", "."); 40].iter())
+                .chain([("r-bracket", "]")].iter())
+                .cloned()
+                .collect::<Vec<_>>(),
+        );
+    });
+}
+
+#[test]
+fn test_query_captures_ordered_by_both_start_and_end_positions() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            r#"
+            (call_expression) @call
+            (member_expression) @member
+            (identifier) @variable
+            "#,
+        )
+        .unwrap();
+
+        let source = "
+          a.b(c.d().e).f;
+        ";
+
+        let mut parser = Parser::new();
+        parser.set_language(language).unwrap();
+        let tree = parser.parse(&source, None).unwrap();
+        let mut cursor = QueryCursor::new();
+
+        let captures = cursor.captures(&query, tree.root_node(), to_callback(source));
+        assert_eq!(
+            collect_captures(captures, &query, source),
+            &[
+                ("member", "a.b(c.d().e).f"),
+                ("call", "a.b(c.d().e)"),
+                ("member", "a.b"),
+                ("variable", "a"),
+                ("member", "c.d().e"),
+                ("call", "c.d()"),
+                ("member", "c.d"),
+                ("variable", "c"),
+            ],
+        );
+    });
+}
+
+#[test]
+fn test_query_captures_with_matches_removed() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            r#"
+            (binary_expression
+                left: (identifier) @left
+                operator: _ @op
+                right: (identifier) @right)
+            "#,
+        )
+        .unwrap();
+
+        let source = "
+          a === b && c > d && e < f;
+        ";
+
+        let mut parser = Parser::new();
+        parser.set_language(language).unwrap();
+        let tree = parser.parse(&source, None).unwrap();
+        let mut cursor = QueryCursor::new();
+
+        let mut captured_strings = Vec::new();
+        for (m, i) in cursor.captures(&query, tree.root_node(), to_callback(source)) {
+            let capture = m.captures[i];
+            let text = capture.node.utf8_text(source.as_bytes()).unwrap();
+            if text == "a" {
+                m.remove();
+                continue;
+            }
+            captured_strings.push(text);
+        }
+
+        assert_eq!(captured_strings, &["c", ">", "d", "e", "<", "f",]);
+    });
+}
+
+#[test]
+fn test_query_captures_and_matches_iterators_are_fused() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            r#"
+            (comment) @comment
+            "#,
+        )
+        .unwrap();
+
+        let source = "
+          // one
+          // two
+          // three
+          /* unfinished
+        ";
+
+        let mut parser = Parser::new();
+        parser.set_language(language).unwrap();
+        let tree = parser.parse(&source, None).unwrap();
+        let mut cursor = QueryCursor::new();
+        let mut captures = cursor.captures(&query, tree.root_node(), to_callback(source));
+
+        assert_eq!(captures.next().unwrap().0.captures[0].index, 0);
+        assert_eq!(captures.next().unwrap().0.captures[0].index, 0);
+        assert_eq!(captures.next().unwrap().0.captures[0].index, 0);
+        assert!(captures.next().is_none());
+        assert!(captures.next().is_none());
+        assert!(captures.next().is_none());
+        drop(captures);
+
+        let mut matches = cursor.matches(&query, tree.root_node(), to_callback(source));
+        assert_eq!(matches.next().unwrap().captures[0].index, 0);
+        assert_eq!(matches.next().unwrap().captures[0].index, 0);
+        assert_eq!(matches.next().unwrap().captures[0].index, 0);
+        assert!(matches.next().is_none());
+        assert!(matches.next().is_none());
+        assert!(matches.next().is_none());
+    });
+}
+
+#[test]
+fn test_query_start_byte_for_pattern() {
+    let language = get_language("javascript");
+
+    let patterns_1 = r#"
+        "+" @operator
+        "-" @operator
+        "*" @operator
+        "=" @operator
+        "=>" @operator
+    "#
+    .trim_start();
+
+    let patterns_2 = "
+        (identifier) @a
+        (string) @b
+    "
+    .trim_start();
+
+    let patterns_3 = "
+        ((identifier) @b (#match? @b i))
+        (function_declaration name: (identifier) @c)
+        (method_definition name: (property_identifier) @d)
+    "
+    .trim_start();
+
+    let mut source = String::new();
+    source += patterns_1;
+    source += patterns_2;
+    source += patterns_3;
+
+    let query = Query::new(language, &source).unwrap();
+
+    assert_eq!(query.start_byte_for_pattern(0), 0);
+    assert_eq!(query.start_byte_for_pattern(5), patterns_1.len());
+    assert_eq!(
+        query.start_byte_for_pattern(7),
+        patterns_1.len() + patterns_2.len()
+    );
+}
+
+#[test]
+fn test_query_capture_names() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            r#"
+            (if_statement
+              condition: (parenthesized_expression (binary_expression
+                left: _ @left-operand
+                operator: "||"
+                right: _ @right-operand))
+              consequence: (statement_block) @body)
+
+            (while_statement
+              condition: _ @loop-condition)
+            "#,
+        )
+        .unwrap();
+
+        assert_eq!(
+            query.capture_names(),
+            &[
+                "left-operand".to_string(),
+                "right-operand".to_string(),
+                "body".to_string(),
+                "loop-condition".to_string(),
+            ]
+        );
+    });
+}
+
+#[test]
+fn test_query_with_no_patterns() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(language, "").unwrap();
+        assert!(query.capture_names().is_empty());
+        assert_eq!(query.pattern_count(), 0);
+    });
+}
+
+#[test]
+fn test_query_comments() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            "
+                ; this is my first comment
+                ; i have two comments here
+                (function_declaration
+                    ; there is also a comment here
+                    ; and here
+                    name: (identifier) @fn-name)",
+        )
+        .unwrap();
+
+        let source = "function one() { }";
+        let mut parser = Parser::new();
+        parser.set_language(language).unwrap();
+        let tree = parser.parse(source, None).unwrap();
+        let mut cursor = QueryCursor::new();
+        let matches = cursor.matches(&query, tree.root_node(), to_callback(source));
+        assert_eq!(
+            collect_matches(matches, &query, source),
+            &[(0, vec![("fn-name", "one")]),],
+        );
+    });
+}
+
+#[test]
+fn test_query_disable_pattern() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let mut query = Query::new(
+            language,
+            "
+                (function_declaration
+                    name: (identifier) @name)
+                (function_declaration
+                    body: (statement_block) @body)
+                (class_declaration
+                    name: (identifier) @name)
+                (class_declaration
+                    body: (class_body) @body)
+            ",
+        )
+        .unwrap();
+
+        // disable the patterns that match names
+        query.disable_pattern(0);
+        query.disable_pattern(2);
+
+        let source = "class A { constructor() {} } function b() { return 1; }";
+        let mut parser = Parser::new();
+        parser.set_language(language).unwrap();
+        let tree = parser.parse(source, None).unwrap();
+        let mut cursor = QueryCursor::new();
+        let matches = cursor.matches(&query, tree.root_node(), to_callback(source));
+        assert_eq!(
+            collect_matches(matches, &query, source),
+            &[
+                (3, vec![("body", "{ constructor() {} }")]),
+                (1, vec![("body", "{ return 1; }")]),
+            ],
+        );
+    });
+}
+
+#[test]
+fn test_query_alternative_predicate_prefix() {
+    allocations::record(|| {
+        let language = get_language("c");
+        let query = Query::new(
+            language,
+            r#"
+            ((call_expression
+              function: (identifier) @keyword
+              arguments: (argument_list
+                          (string_literal) @function))
+             (.eq? @keyword "DEFUN"))
+        "#,
+        )
+        .unwrap();
+        let source = r#"
+            DEFUN ("identity", Fidentity, Sidentity, 1, 1, 0,
+                   doc: /* Return the argument unchanged.  */
+                   attributes: const)
+              (Lisp_Object arg)
+            {
+              return arg;
+            }
+        "#;
+        assert_query_matches(
+            language,
+            &query,
+            source,
+            &[(0, vec![("keyword", "DEFUN"), ("function", "\"identity\"")])],
+        );
+    });
+}
+
+#[test]
+fn test_query_step_is_definite() {
+    struct Row {
+        language: Language,
+        description: &'static str,
+        pattern: &'static str,
+        results_by_substring: &'static [(&'static str, bool)],
+    }
+
+    let rows = &[
+        Row {
+            description: "no definite steps",
+            language: get_language("python"),
+            pattern: r#"(expression_statement (string))"#,
+            results_by_substring: &[("expression_statement", false), ("string", false)],
+        },
+        Row {
+            description: "all definite steps",
+            language: get_language("javascript"),
+            pattern: r#"(object "{" "}")"#,
+            results_by_substring: &[("object", false), ("{", true), ("}", true)],
+        },
+        Row {
+            description: "an indefinite step that is optional",
+            language: get_language("javascript"),
+            pattern: r#"(object "{" (identifier)? @foo "}")"#,
+            results_by_substring: &[
+                ("object", false),
+                ("{", true),
+                ("(identifier)?", false),
+                ("}", true),
+            ],
+        },
+        Row {
+            description: "multiple indefinite steps that are optional",
+            language: get_language("javascript"),
+            pattern: r#"(object "{" (identifier)? @id1 ("," (identifier) @id2)? "}")"#,
+            results_by_substring: &[
+                ("object", false),
+                ("{", true),
+                ("(identifier)? @id1", false),
+                ("\",\"", false),
+                ("}", true),
+            ],
+        },
+        Row {
+            description: "definite step after indefinite step",
+            language: get_language("javascript"),
+            pattern: r#"(pair (property_identifier) ":")"#,
+            results_by_substring: &[("pair", false), ("property_identifier", false), (":", true)],
+        },
+        Row {
+            description: "indefinite step in between two definite steps",
+            language: get_language("javascript"),
+            pattern: r#"(ternary_expression
+                condition: (_)
+                "?"
+                consequence: (call_expression)
+                ":"
+                alternative: (_))"#,
+            results_by_substring: &[
+                ("condition:", false),
+                ("\"?\"", false),
+                ("consequence:", false),
+                ("\":\"", true),
+                ("alternative:", true),
+            ],
+        },
+        Row {
+            description: "one definite step after a repetition",
+            language: get_language("javascript"),
+            pattern: r#"(object "{" (_) "}")"#,
+            results_by_substring: &[("object", false), ("{", false), ("(_)", false), ("}", true)],
+        },
+        Row {
+            description: "definite steps after multiple repetitions",
+            language: get_language("json"),
+            pattern: r#"(object "{" (pair) "," (pair) "," (_) "}")"#,
+            results_by_substring: &[
+                ("object", false),
+                ("{", false),
+                ("(pair) \",\" (pair)", false),
+                ("(pair) \",\" (_)", false),
+                ("\",\" (_)", false),
+                ("(_)", true),
+                ("}", true),
+            ],
+        },
+        Row {
+            description: "a definite with a field",
+            language: get_language("javascript"),
+            pattern: r#"(binary_expression left: (identifier) right: (_))"#,
+            results_by_substring: &[
+                ("binary_expression", false),
+                ("(identifier)", false),
+                ("(_)", true),
+            ],
+        },
+        Row {
+            description: "multiple definite steps with fields",
+            language: get_language("javascript"),
+            pattern: r#"(function_declaration name: (identifier) body: (statement_block))"#,
+            results_by_substring: &[
+                ("function_declaration", false),
+                ("identifier", true),
+                ("statement_block", true),
+            ],
+        },
+        Row {
+            description: "nesting, one definite step",
+            language: get_language("javascript"),
+            pattern: r#"
+                (function_declaration
+                    name: (identifier)
+                    body: (statement_block "{" (expression_statement) "}"))"#,
+            results_by_substring: &[
+                ("function_declaration", false),
+                ("identifier", false),
+                ("statement_block", false),
+                ("{", false),
+                ("expression_statement", false),
+                ("}", true),
+            ],
+        },
+        Row {
+            description: "definite step after some deeply nested hidden nodes",
+            language: get_language("ruby"),
+            pattern: r#"
+            (singleton_class
+                value: (constant)
+                "end")
+            "#,
+            results_by_substring: &[
+                ("singleton_class", false),
+                ("constant", false),
+                ("end", true),
+            ],
+        },
+        Row {
+            description: "nesting, no definite steps",
+            language: get_language("javascript"),
+            pattern: r#"
+            (call_expression
+                function: (member_expression
+                  property: (property_identifier) @template-tag)
+                arguments: (template_string)) @template-call
+            "#,
+            results_by_substring: &[("property_identifier", false), ("template_string", false)],
+        },
+        Row {
+            description: "a definite step after a nested node",
+            language: get_language("javascript"),
+            pattern: r#"
+            (subscript_expression
+                object: (member_expression
+                    object: (identifier) @obj
+                    property: (property_identifier) @prop)
+                "[")
+            "#,
+            results_by_substring: &[
+                ("identifier", false),
+                ("property_identifier", true),
+                ("[", true),
+            ],
+        },
+        Row {
+            description: "a step that is indefinite due to a predicate",
+            language: get_language("javascript"),
+            pattern: r#"
+            (subscript_expression
+                object: (member_expression
+                    object: (identifier) @obj
+                    property: (property_identifier) @prop)
+                "["
+                (#match? @prop "foo"))
+            "#,
+            results_by_substring: &[
+                ("identifier", false),
+                ("property_identifier", false),
+                ("[", true),
+            ],
+        },
+        Row {
+            description: "alternation where one branch has definite steps",
+            language: get_language("javascript"),
+            pattern: r#"
+            [
+                (unary_expression (identifier))
+                (call_expression
+                  function: (_)
+                  arguments: (_))
+                (binary_expression right:(call_expression))
+            ]
+            "#,
+            results_by_substring: &[
+                ("identifier", false),
+                ("right:", false),
+                ("function:", true),
+                ("arguments:", true),
+            ],
+        },
+        Row {
+            description: "aliased parent node",
+            language: get_language("ruby"),
+            pattern: r#"
+            (method_parameters "(" (identifier) @id")")
+            "#,
+            results_by_substring: &[("\"(\"", false), ("(identifier)", false), ("\")\"", true)],
+        },
+        Row {
+            description: "long, but not too long to analyze",
+            language: get_language("javascript"),
+            pattern: r#"
+            (object "{" (pair) (pair) (pair) (pair) "}")
+            "#,
+            results_by_substring: &[
+                ("\"{\"", false),
+                ("(pair)", false),
+                ("(pair) \"}\"", false),
+                ("\"}\"", true),
+            ],
+        },
+        Row {
+            description: "too long to analyze",
+            language: get_language("javascript"),
+            pattern: r#"
+            (object "{" (pair) (pair) (pair) (pair) (pair) (pair) (pair) (pair) (pair) (pair) (pair) (pair) "}")
+            "#,
+            results_by_substring: &[
+                ("\"{\"", false),
+                ("(pair)", false),
+                ("(pair) \"}\"", false),
+                ("\"}\"", false),
+            ],
+        },
+        Row {
+            description: "hidden nodes that have several fields",
+            language: get_language("java"),
+            pattern: r#"
+            (method_declaration name: (identifier))
+            "#,
+            results_by_substring: &[("name:", true)],
+        },
+    ];
+
+    allocations::record(|| {
+        eprintln!("");
+
+        for row in rows.iter() {
+            if let Some(filter) = EXAMPLE_FILTER.as_ref() {
+                if !row.description.contains(filter.as_str()) {
+                    continue;
+                }
+            }
+            eprintln!("  query example: {:?}", row.description);
+            let query = Query::new(row.language, row.pattern).unwrap();
+            for (substring, is_definite) in row.results_by_substring {
+                let offset = row.pattern.find(substring).unwrap();
+                assert_eq!(
+                    query.step_is_definite(offset),
+                    *is_definite,
+                    "Description: {}, Pattern: {:?}, substring: {:?}, expected is_definite to be {}",
+                    row.description,
+                    row.pattern
+                        .split_ascii_whitespace()
+                        .collect::<Vec<_>>()
+                        .join(" "),
+                    substring,
+                    is_definite,
+                )
+            }
+        }
+    });
+}
+
+fn assert_query_matches(
+    language: Language,
+    query: &Query,
+    source: &str,
+    expected: &[(usize, Vec<(&str, &str)>)],
+) {
+    let mut parser = Parser::new();
+    parser.set_language(language).unwrap();
+    let tree = parser.parse(source, None).unwrap();
+    let mut cursor = QueryCursor::new();
+    let matches = cursor.matches(&query, tree.root_node(), to_callback(source));
+    assert_eq!(collect_matches(matches, &query, source), expected);
+}
+
+fn collect_matches<'a>(
+    matches: impl Iterator<Item = QueryMatch<'a>>,
+    query: &'a Query,
+    source: &'a str,
+) -> Vec<(usize, Vec<(&'a str, &'a str)>)> {
+    matches
+        .map(|m| {
+            (
+                m.pattern_index,
+                format_captures(m.captures.iter().cloned(), query, source),
+            )
+        })
+        .collect()
+}
+
+fn collect_captures<'a>(
+    captures: impl Iterator<Item = (QueryMatch<'a>, usize)>,
+    query: &'a Query,
+    source: &'a str,
+) -> Vec<(&'a str, &'a str)> {
+    format_captures(captures.map(|(m, i)| m.captures[i]), query, source)
+}
+
+fn format_captures<'a>(
+    captures: impl Iterator<Item = QueryCapture<'a>>,
+    query: &'a Query,
+    source: &'a str,
+) -> Vec<(&'a str, &'a str)> {
+    captures
+        .map(|capture| {
+            (
+                query.capture_names()[capture.index as usize].as_str(),
+                capture.node.utf8_text(source.as_bytes()).unwrap(),
+            )
+        })
+        .collect()
+}
+
+fn to_callback<'a>(source: &'a str) -> impl Fn(Node) -> &'a [u8] {
+    move |n| &source.as_bytes()[n.byte_range()]
+}
diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs
new file mode 100644
index 00000000..2b058c0b
--- /dev/null
+++ b/cli/src/tests/tags_test.rs
@@ -0,0 +1,437 @@
+use super::helpers::allocations;
+use super::helpers::fixtures::{get_language, get_language_queries_path};
+use std::ffi::CStr;
+use std::ffi::CString;
+use std::{fs, ptr, slice, str};
+use tree_sitter::Point;
+use tree_sitter_tags::c_lib as c;
+use tree_sitter_tags::{Error, TagsConfiguration, TagsContext};
+
+const PYTHON_TAG_QUERY: &'static str = r#"
+(
+  (function_definition
+    name: (identifier) @name
+    body: (block . (expression_statement (string) @doc))) @definition.function
+  (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
+)
+
+(function_definition
+  name: (identifier) @name) @definition.function
+
+(
+  (class_definition
+    name: (identifier) @name
+    body: (block
+      . (expression_statement (string) @doc))) @definition.class
+  (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
+)
+
+(class_definition
+  name: (identifier) @name) @definition.class
+
+(call
+  function: (identifier) @name) @reference.call
+
+(call
+  function: (attribute
+    attribute: (identifier) @name)) @reference.call
+"#;
+
+const JS_TAG_QUERY: &'static str = r#"
+(
+    (comment)* @doc .
+    (class_declaration
+        name: (identifier) @name) @definition.class
+    (#select-adjacent! @doc @definition.class)
+    (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
+)
+
+(
+    (comment)* @doc .
+    (method_definition
+        name: (property_identifier) @name) @definition.method
+    (#select-adjacent! @doc @definition.method)
+    (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
+)
+
+(
+    (comment)* @doc .
+    (function_declaration
+        name: (identifier) @name) @definition.function
+    (#select-adjacent! @doc @definition.function)
+    (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
+)
+
+(call_expression
+    function: (identifier) @name) @reference.call
+"#;
+
+const RUBY_TAG_QUERY: &'static str = r#"
+(method
+    name: (_) @name) @definition.method
+
+(method_call
+    method: (identifier) @name) @reference.call
+
+(setter (identifier) @ignore)
+
+((identifier) @name @reference.call
+ (#is-not? local))
+"#;
+
+#[test]
+fn test_tags_python() {
+    let language = get_language("python");
+    let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
+    let mut tag_context = TagsContext::new();
+
+    let source = br#"
+    class Customer:
+        """
+        Data about a customer
+        """
+
+        def age(self):
+            '''
+            Get the customer's age
+            '''
+            compute_age(self.id)
+    }
+    "#;
+
+    let tags = tag_context
+        .generate_tags(&tags_config, source, None)
+        .unwrap()
+        .0
+        .collect::<Result<Vec<_>, _>>()
+        .unwrap();
+
+    assert_eq!(
+        tags.iter()
+            .map(|t| (
+                substr(source, &t.name_range),
+                tags_config.syntax_type_name(t.syntax_type_id)
+            ))
+            .collect::<Vec<_>>(),
+        &[
+            ("Customer", "class"),
+            ("age", "function"),
+            ("compute_age", "call"),
+        ]
+    );
+
+    assert_eq!(substr(source, &tags[0].line_range), "class Customer:");
+    assert_eq!(substr(source, &tags[1].line_range), "def age(self):");
+    assert_eq!(tags[0].docs.as_ref().unwrap(), "Data about a customer");
+    assert_eq!(tags[1].docs.as_ref().unwrap(), "Get the customer's age");
+}
+
+#[test]
+fn test_tags_javascript() {
+    let language = get_language("javascript");
+    let tags_config = TagsConfiguration::new(language, JS_TAG_QUERY, "").unwrap();
+    let source = br#"
+    // hi
+
+    // Data about a customer.
+    // bla bla bla
+    class Customer {
+        /*
+         * Get the customer's age
+         */
+        getAge() {
+        }
+    }
+
+    // ok
+
+    class Agent {
+
+    }
+    "#;
+
+    let mut tag_context = TagsContext::new();
+    let tags = tag_context
+        .generate_tags(&tags_config, source, None)
+        .unwrap()
+        .0
+        .collect::<Result<Vec<_>, _>>()
+        .unwrap();
+
+    assert_eq!(
+        tags.iter()
+            .map(|t| (
+                substr(source, &t.name_range),
+                t.span.clone(),
+                tags_config.syntax_type_name(t.syntax_type_id)
+            ))
+            .collect::<Vec<_>>(),
+        &[
+            ("Customer", Point::new(5, 10)..Point::new(5, 18), "class",),
+            ("getAge", Point::new(9, 8)..Point::new(9, 14), "method",),
+            ("Agent", Point::new(15, 10)..Point::new(15, 15), "class",)
+        ]
+    );
+    assert_eq!(
+        tags[0].docs.as_ref().unwrap(),
+        "Data about a customer.\nbla bla bla"
+    );
+    assert_eq!(tags[1].docs.as_ref().unwrap(), "Get the customer's age");
+    assert_eq!(tags[2].docs, None);
+}
+
+#[test]
+fn test_tags_columns_measured_in_utf16_code_units() {
+    let language = get_language("python");
+    let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
+    let mut tag_context = TagsContext::new();
+
+    let source = r#""❤️❤️❤️".hello_α_ω()"#.as_bytes();
+
+    let tag = tag_context
+        .generate_tags(&tags_config, source, None)
+        .unwrap()
+        .0
+        .next()
+        .unwrap()
+        .unwrap();
+
+    assert_eq!(substr(source, &tag.name_range), "hello_α_ω");
+    assert_eq!(tag.span, Point::new(0, 21)..Point::new(0, 32));
+    assert_eq!(tag.utf16_column_range, 9..18);
+}
+
+#[test]
+fn test_tags_ruby() {
+    let language = get_language("ruby");
+    let locals_query =
+        fs::read_to_string(get_language_queries_path("ruby").join("locals.scm")).unwrap();
+    let tags_config = TagsConfiguration::new(language, RUBY_TAG_QUERY, &locals_query).unwrap();
+    let source = strip_whitespace(
+        8,
+        "
+        b = 1
+
+        def foo=()
+            c = 1
+
+            # a is a method because it is not in scope
+            # b is a method because `b` doesn't capture variables from its containing scope
+            bar a, b, c
+
+            [1, 2, 3].each do |a|
+                # a is a parameter
+                # b is a method
+                # c is a variable, because the block captures variables from its containing scope.
+                baz a, b, c
+            end
+        end",
+    );
+
+    let mut tag_context = TagsContext::new();
+    let tags = tag_context
+        .generate_tags(&tags_config, source.as_bytes(), None)
+        .unwrap()
+        .0
+        .collect::<Result<Vec<_>, _>>()
+        .unwrap();
+
+    assert_eq!(
+        tags.iter()
+            .map(|t| (
+                substr(source.as_bytes(), &t.name_range),
+                tags_config.syntax_type_name(t.syntax_type_id),
+                (t.span.start.row, t.span.start.column),
+            ))
+            .collect::<Vec<_>>(),
+        &[
+            ("foo=", "method", (2, 4)),
+            ("bar", "call", (7, 4)),
+            ("a", "call", (7, 8)),
+            ("b", "call", (7, 11)),
+            ("each", "call", (9, 14)),
+            ("baz", "call", (13, 8)),
+            ("b", "call", (13, 15),),
+        ]
+    );
+}
+
+#[test]
+fn test_tags_cancellation() {
+    use std::sync::atomic::{AtomicUsize, Ordering};
+
+    allocations::record(|| {
+        // Large javascript document
+        let source = (0..500)
+            .map(|_| "/* hi */ class A { /* ok */ b() {} }\n")
+            .collect::<String>();
+
+        let cancellation_flag = AtomicUsize::new(0);
+        let language = get_language("javascript");
+        let tags_config = TagsConfiguration::new(language, JS_TAG_QUERY, "").unwrap();
+
+        let mut tag_context = TagsContext::new();
+        let tags = tag_context
+            .generate_tags(&tags_config, source.as_bytes(), Some(&cancellation_flag))
+            .unwrap();
+
+        for (i, tag) in tags.0.enumerate() {
+            if i == 150 {
+                cancellation_flag.store(1, Ordering::SeqCst);
+            }
+            if let Err(e) = tag {
+                assert_eq!(e, Error::Cancelled);
+                return;
+            }
+        }
+
+        panic!("Expected to halt tagging with an error");
+    });
+}
+
+#[test]
+fn test_invalid_capture() {
+    let language = get_language("python");
+    let e = TagsConfiguration::new(language, "(identifier) @method", "")
+        .expect_err("expected InvalidCapture error");
+    assert_eq!(e, Error::InvalidCapture("method".to_string()));
+}
+
+#[test]
+fn test_tags_with_parse_error() {
+    let language = get_language("python");
+    let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
+    let mut tag_context = TagsContext::new();
+
+    let source = br#"
+    class Fine: pass
+    class Bad
+    "#;
+
+    let (tags, failed) = tag_context
+        .generate_tags(&tags_config, source, None)
+        .unwrap();
+
+    let newtags = tags.collect::<Result<Vec<_>, _>>().unwrap();
+
+    assert!(failed, "syntax error should have been detected");
+
+    assert_eq!(
+        newtags.iter()
+            .map(|t| (
+                substr(source, &t.name_range),
+                tags_config.syntax_type_name(t.syntax_type_id)
+            ))
+            .collect::<Vec<_>>(),
+        &[
+            ("Fine", "class"),
+        ]
+    );
+}
+
+
+#[test]
+fn test_tags_via_c_api() {
+    allocations::record(|| {
+        let tagger = c::ts_tagger_new();
+        let buffer = c::ts_tags_buffer_new();
+        let scope_name = "source.js";
+        let language = get_language("javascript");
+
+        let source_code = strip_whitespace(
+            12,
+            "
+            var a = 1;
+
+            // one
+            // two
+            // three
+            function b() {
+            }
+
+            // four
+            // five
+            class C extends D {
+
+            }
+
+            b(a);",
+        );
+
+        let c_scope_name = CString::new(scope_name).unwrap();
+        let result = c::ts_tagger_add_language(
+            tagger,
+            c_scope_name.as_ptr(),
+            language,
+            JS_TAG_QUERY.as_ptr(),
+            ptr::null(),
+            JS_TAG_QUERY.len() as u32,
+            0,
+        );
+        assert_eq!(result, c::TSTagsError::Ok);
+
+        let result = c::ts_tagger_tag(
+            tagger,
+            c_scope_name.as_ptr(),
+            source_code.as_ptr(),
+            source_code.len() as u32,
+            buffer,
+            ptr::null(),
+        );
+        assert_eq!(result, c::TSTagsError::Ok);
+        let tags = unsafe {
+            slice::from_raw_parts(
+                c::ts_tags_buffer_tags(buffer),
+                c::ts_tags_buffer_tags_len(buffer) as usize,
+            )
+        };
+        let docs = str::from_utf8(unsafe {
+            slice::from_raw_parts(
+                c::ts_tags_buffer_docs(buffer) as *const u8,
+                c::ts_tags_buffer_docs_len(buffer) as usize,
+            )
+        })
+        .unwrap();
+
+        let syntax_types: Vec<&str> = unsafe {
+            let mut len: u32 = 0;
+            let ptr =
+                c::ts_tagger_syntax_kinds_for_scope_name(tagger, c_scope_name.as_ptr(), &mut len);
+            slice::from_raw_parts(ptr, len as usize)
+                .iter()
+                .map(|i| CStr::from_ptr(*i).to_str().unwrap())
+                .collect()
+        };
+
+        assert_eq!(
+            tags.iter()
+                .map(|tag| (
+                    syntax_types[tag.syntax_type_id as usize],
+                    &source_code[tag.name_start_byte as usize..tag.name_end_byte as usize],
+                    &source_code[tag.line_start_byte as usize..tag.line_end_byte as usize],
+                    &docs[tag.docs_start_byte as usize..tag.docs_end_byte as usize],
+                ))
+                .collect::<Vec<_>>(),
+            &[
+                ("function", "b", "function b() {", "one\ntwo\nthree"),
+                ("class", "C", "class C extends D {", "four\nfive"),
+                ("call", "b", "b(a);", "")
+            ]
+        );
+
+        c::ts_tags_buffer_delete(buffer);
+        c::ts_tagger_delete(tagger);
+    });
+}
+
+fn substr<'a>(source: &'a [u8], range: &std::ops::Range<usize>) -> &'a str {
+    std::str::from_utf8(&source[range.clone()]).unwrap()
+}
+
+fn strip_whitespace(indent: usize, s: &str) -> String {
+    s.lines()
+        .skip(1)
+        .map(|line| &line[line.len().min(indent)..])
+        .collect::<Vec<_>>()
+        .join("\n")
+}
diff --git a/cli/src/tests/test_highlight_test.rs b/cli/src/tests/test_highlight_test.rs
new file mode 100644
index 00000000..1a658281
--- /dev/null
+++ b/cli/src/tests/test_highlight_test.rs
@@ -0,0 +1,64 @@
+use super::helpers::fixtures::{get_highlight_config, get_language, test_loader};
+use crate::query_testing::{parse_position_comments, Assertion};
+use crate::test_highlight::get_highlight_positions;
+use tree_sitter::{Parser, Point};
+use tree_sitter_highlight::{Highlight, Highlighter};
+
+#[test]
+fn test_highlight_test_with_basic_test() {
+    let language = get_language("javascript");
+    let config = get_highlight_config(
+        "javascript",
+        Some("injections.scm"),
+        &[
+            "function".to_string(),
+            "variable.parameter".to_string(),
+            "keyword".to_string(),
+        ],
+    );
+    let source = [
+        "var abc = function(d) {",
+        "  // ^ function",
+        "  //       ^ keyword",
+        "  return d + e;",
+        "  //     ^ variable.parameter",
+        "};",
+    ]
+    .join("\n");
+
+    let assertions =
+        parse_position_comments(&mut Parser::new(), language, source.as_bytes()).unwrap();
+    assert_eq!(
+        assertions,
+        &[
+            Assertion {
+                position: Point::new(0, 5),
+                expected_capture_name: "function".to_string()
+            },
+            Assertion {
+                position: Point::new(0, 11),
+                expected_capture_name: "keyword".to_string()
+            },
+            Assertion {
+                position: Point::new(3, 9),
+                expected_capture_name: "variable.parameter".to_string()
+            },
+        ]
+    );
+
+    let mut highlighter = Highlighter::new();
+    let highlight_positions =
+        get_highlight_positions(test_loader(), &mut highlighter, &config, source.as_bytes())
+            .unwrap();
+    assert_eq!(
+        highlight_positions,
+        &[
+            (Point::new(0, 0), Point::new(0, 3), Highlight(2)), // "var"
+            (Point::new(0, 4), Point::new(0, 7), Highlight(0)), // "abc"
+            (Point::new(0, 10), Point::new(0, 18), Highlight(2)), // "function"
+            (Point::new(0, 19), Point::new(0, 20), Highlight(1)), // "d"
+            (Point::new(3, 2), Point::new(3, 8), Highlight(2)), // "return"
+            (Point::new(3, 9), Point::new(3, 10), Highlight(1)), // "d"
+        ]
+    );
+}
diff --git a/cli/src/util.rs b/cli/src/util.rs
index e880bea1..acafa662 100644
--- a/cli/src/util.rs
+++ b/cli/src/util.rs
@@ -1,12 +1,32 @@
+use super::error::{Error, Result};
+use std::io;
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::Arc;
+use std::thread;
+use tree_sitter::Parser;
+
 #[cfg(unix)]
 use std::path::PathBuf;
 #[cfg(unix)]
 use std::process::{Child, ChildStdin, Command, Stdio};
-use tree_sitter::Parser;
 
 #[cfg(unix)]
 const HTML_HEADER: &[u8] = b"<!DOCTYPE html>\n<style>svg { width: 100%; }</style>\n\n";
 
+pub fn cancel_on_stdin() -> Arc<AtomicUsize> {
+    let result = Arc::new(AtomicUsize::new(0));
+    if atty::is(atty::Stream::Stdin) {
+        thread::spawn({
+            let flag = result.clone();
+            move || {
+                let mut line = String::new();
+                io::stdin().read_line(&mut line).unwrap();
+                flag.store(1, Ordering::Relaxed);
+            }
+        });
+    }
+    result
+}
 #[cfg(windows)]
 pub struct LogSession();
 
@@ -14,12 +34,12 @@ pub struct LogSession();
 pub struct LogSession(PathBuf, Option<Child>, Option<ChildStdin>);
 
 #[cfg(windows)]
-pub fn log_graphs(_parser: &mut Parser, _path: &str) -> std::io::Result<LogSession> {
+pub fn log_graphs(_parser: &mut Parser, _path: &str) -> Result<LogSession> {
     Ok(LogSession())
 }
 
 #[cfg(unix)]
-pub fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result<LogSession> {
+pub fn log_graphs(parser: &mut Parser, path: &str) -> Result<LogSession> {
     use std::io::Write;
 
     let mut dot_file = std::fs::File::create(path)?;
@@ -29,11 +49,13 @@ pub fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result<LogSession
         .stdin(Stdio::piped())
         .stdout(dot_file)
         .spawn()
-        .expect("Failed to run Dot");
+        .map_err(Error::wrap(|| {
+            "Failed to run the `dot` command. Check that graphviz is installed."
+        }))?;
     let dot_stdin = dot_process
         .stdin
         .take()
-        .expect("Failed to open stdin for Dot");
+        .ok_or_else(|| Error::new("Failed to open stdin for `dot` process.".to_string()))?;
     parser.print_dot_graphs(&dot_stdin);
     Ok(LogSession(
         PathBuf::from(path),
diff --git a/cli/src/wasm.rs b/cli/src/wasm.rs
index 5ee2cec0..8bbcfbdf 100644
--- a/cli/src/wasm.rs
+++ b/cli/src/wasm.rs
@@ -57,9 +57,11 @@ pub fn compile_language_to_wasm(language_dir: &Path, force_docker: bool) -> Resu
         }
 
         // Run `emcc` in a container using the `emscripten-slim` image
-        command.args(&["trzeci/emscripten-slim", "emcc"]);
+        command.args(&["emscripten/emsdk", "emcc"]);
     } else {
-        return Error::err("You must have either emcc or docker on your PATH to run this command".to_string());
+        return Error::err(
+            "You must have either emcc or docker on your PATH to run this command".to_string(),
+        );
     }
 
     command.args(&[
@@ -81,31 +83,22 @@ pub fn compile_language_to_wasm(language_dir: &Path, force_docker: bool) -> Resu
         "src",
     ]);
 
-    // Find source files to pass to emscripten
-    let src_entries = fs::read_dir(&src_dir).map_err(Error::wrap(|| {
-        format!("Failed to read source directory {:?}", src_dir)
-    }))?;
+    let src = Path::new("src");
+    let parser_c_path = src.join("parser.c");
+    let scanner_c_path = src.join("scanner.c");
+    let scanner_cc_path = src.join("scanner.cc");
+    let scanner_cpp_path = src.join("scanner.cpp");
 
-    for entry in src_entries {
-        let entry = entry?;
-        let file_name = entry.file_name();
-
-        // Do not compile the node.js binding file.
-        if file_name
-            .to_str()
-            .map_or(false, |s| s.starts_with("binding"))
-        {
-            continue;
-        }
-
-        // Compile any .c, .cc, or .cpp files
-        if let Some(extension) = Path::new(&file_name).extension().and_then(|s| s.to_str()) {
-            if extension == "c" || extension == "cc" || extension == "cpp" {
-                command.arg(Path::new("src").join(entry.file_name()));
-            }
-        }
+    if language_dir.join(&scanner_cc_path).exists() {
+        command.arg("-xc++").arg(&scanner_cc_path);
+    } else if language_dir.join(&scanner_cpp_path).exists() {
+        command.arg("-xc++").arg(&scanner_cpp_path);
+    } else if language_dir.join(&scanner_c_path).exists() {
+        command.arg(&scanner_c_path);
     }
 
+    command.arg(&parser_c_path);
+
     let output = command
         .output()
         .map_err(Error::wrap(|| "Failed to run emcc command"))?;
diff --git a/cli/src/web_ui.html b/cli/src/web_ui.html
index 2422a3d8..45dd5db0 100644
--- a/cli/src/web_ui.html
+++ b/cli/src/web_ui.html
@@ -1,4 +1,5 @@
 <head>
+  <meta charset="utf-8">
   <title>tree-sitter THE_LANGUAGE_NAME</title>
   <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/codemirror/5.45.0/codemirror.min.css">
   <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/clusterize.js/0.18.0/clusterize.min.css">
@@ -7,7 +8,7 @@
 </head>
 
 <body>
-  <div id="playground-container">
+  <div id="playground-container" style="visibility: hidden;">
     <header>
       <div class=header-item>
         <bold>THE_LANGUAGE_NAME</bold>
@@ -18,18 +19,31 @@
         <input id="logging-checkbox" type="checkbox"></input>
       </div>
 
+      <div class=header-item>
+        <label for="query-checkbox">query</label>
+        <input id="query-checkbox" type="checkbox"></input>
+      </div>
+
       <div class=header-item>
         <label for="update-time">parse time: </label>
         <span id="update-time"></span>
       </div>
-    </header>
 
-    <main>
       <select id="language-select" style="display: none;">
         <option value="parser">Parser</option>
       </select>
+    </header>
 
-      <textarea id="code-input"></textarea>
+    <main>
+      <div id="input-pane">
+        <div id="code-container">
+          <textarea id="code-input"></textarea>
+        </div>
+
+        <div id="query-container" style="visibility: hidden; position: absolute;">
+          <textarea id="query-input"></textarea>
+        </div>
+      </div>
 
       <div id="output-container-scroll">
         <pre id="output-container" class="highlight"></pre>
@@ -51,15 +65,13 @@
 
   <style>
     body {
-      font: Sans Serif;
+      margin: 0;
+      padding: 0;
     }
 
     #playground-container {
-      position: absolute;
-      top: 0;
-      bottom: 0;
-      left: 0;
-      right: 0;
+      width: 100%;
+      height: 100%;
       display: flex;
       flex-direction: column;
     }
@@ -73,24 +85,51 @@
     }
 
     main {
+      flex: 1;
+      position: relative;
+    }
+
+    #input-pane {
+      position: absolute;
+      top: 0;
+      left: 0;
+      bottom: 0;
+      right: 50%;
       display: flex;
-      height: 100%;
-      flex-direction: row;
+      flex-direction: column;
+    }
+
+    #code-container, #query-container {
+      flex: 1;
+      position: relative;
+      overflow: hidden;
+      border-right: 1px solid #aaa;
+      border-bottom: 1px solid #aaa;
+    }
+
+    #output-container-scroll {
+      position: absolute;
+      top: 0;
+      left: 50%;
+      bottom: 0;
+      right: 0;
     }
 
     .header-item {
       margin-right: 30px;
     }
 
-    .CodeMirror {
-      width: 50%;
+    #playground-container .CodeMirror {
+      position: absolute;
+      top: 0;
+      bottom: 0;
+      left: 0;
+      right: 0;
       height: 100%;
-      border-right: 1px solid #aaa;
     }
 
     #output-container-scroll {
-      width: 50%;
-      height: 100%;
+      flex: 1;
       padding: 0;
       overflow: auto;
     }
@@ -124,5 +163,9 @@
       border-radius: 3px;
       text-decoration: underline;
     }
+
+    .query-error {
+      text-decoration: underline red dashed;
+    }
   </style>
 </body>
diff --git a/cli/src/web_ui.rs b/cli/src/web_ui.rs
index c64fda0f..9b29a73a 100644
--- a/cli/src/web_ui.rs
+++ b/cli/src/web_ui.rs
@@ -1,26 +1,63 @@
 use super::error::Error;
 use super::wasm;
+use std::env;
 use std::fs;
 use std::net::TcpListener;
-use std::path::Path;
+use std::path::{Path, PathBuf};
 use std::str::FromStr;
 use tiny_http::{Header, Response, Server};
 use webbrowser;
 
-const HTML: &'static str = include_str!("./web_ui.html");
-const PLAYGROUND_JS: &'static [u8] = include_bytes!("../../docs/assets/js/playground.js");
+macro_rules! resource {
+    ($name: tt, $path: tt) => {
+        #[cfg(TREE_SITTER_EMBED_WASM_BINDING)]
+        fn $name(tree_sitter_dir: &Option<PathBuf>) -> Vec<u8> {
+            if let Some(tree_sitter_dir) = tree_sitter_dir {
+                fs::read(tree_sitter_dir.join($path)).unwrap()
+            } else {
+                include_bytes!(concat!("../../", $path)).to_vec()
+            }
+        }
 
-#[cfg(unix)]
-const LIB_JS: &'static [u8] = include_bytes!("../../lib/binding_web/tree-sitter.js");
-#[cfg(unix)]
-const LIB_WASM: &'static [u8] = include_bytes!("../../lib/binding_web/tree-sitter.wasm");
+        #[cfg(not(TREE_SITTER_EMBED_WASM_BINDING))]
+        fn $name(tree_sitter_dir: &Option<PathBuf>) -> Vec<u8> {
+            if let Some(tree_sitter_dir) = tree_sitter_dir {
+                fs::read(tree_sitter_dir.join($path)).unwrap()
+            } else {
+                include_bytes!(concat!("../../", $path)).to_vec()
+            }
+        }
+    };
+}
 
-#[cfg(windows)]
-const LIB_JS: &'static [u8] = &[];
-#[cfg(windows)]
-const LIB_WASM: &'static [u8] = &[];
+macro_rules! optional_resource {
+    ($name: tt, $path: tt) => {
+        #[cfg(TREE_SITTER_EMBED_WASM_BINDING)]
+        fn $name(tree_sitter_dir: &Option<PathBuf>) -> Vec<u8> {
+            if let Some(tree_sitter_dir) = tree_sitter_dir {
+                fs::read(tree_sitter_dir.join($path)).unwrap()
+            } else {
+                include_bytes!(concat!("../../", $path)).to_vec()
+            }
+        }
 
-pub fn serve(grammar_path: &Path) {
+        #[cfg(not(TREE_SITTER_EMBED_WASM_BINDING))]
+        fn $name(tree_sitter_dir: &Option<PathBuf>) -> Vec<u8> {
+            if let Some(tree_sitter_dir) = tree_sitter_dir {
+                fs::read(tree_sitter_dir.join($path)).unwrap()
+            } else {
+                Vec::new()
+            }
+        }
+    };
+}
+
+resource!(get_main_html, "cli/src/web_ui.html");
+resource!(get_playground_js, "docs/assets/js/playground.js");
+optional_resource!(get_lib_js, "lib/binding_web/tree-sitter.js");
+optional_resource!(get_lib_wasm, "lib/binding_web/tree-sitter.wasm");
+
+pub fn serve(grammar_path: &Path, open_in_browser: bool) {
     let port = get_available_port().expect("Couldn't find an available port");
     let url = format!("127.0.0.1:{}", port);
     let server = Server::http(&url).expect("Failed to start web server");
@@ -36,37 +73,42 @@ pub fn serve(grammar_path: &Path) {
             )
         }))
         .unwrap();
+    if open_in_browser {
+        if let Err(_) = webbrowser::open(&format!("http://127.0.0.1:{}", port)) {
+            eprintln!("Failed to open '{}' in a web browser", url);
+        }
+    }
 
-    webbrowser::open(&format!("http://127.0.0.1:{}", port))
-        .map_err(Error::wrap(|| {
-            format!("Failed to open '{}' in a web browser", url)
-        }))
-        .unwrap();
-
-    let html = HTML
+    let tree_sitter_dir = env::var("TREE_SITTER_BASE_DIR").map(PathBuf::from).ok();
+    let main_html = String::from_utf8(get_main_html(&tree_sitter_dir))
+        .unwrap()
         .replace("THE_LANGUAGE_NAME", &grammar_name)
         .into_bytes();
+    let playground_js = get_playground_js(&tree_sitter_dir);
+    let lib_js = get_lib_js(&tree_sitter_dir);
+    let lib_wasm = get_lib_wasm(&tree_sitter_dir);
+
     let html_header = Header::from_str("Content-Type: text/html").unwrap();
     let js_header = Header::from_str("Content-Type: application/javascript").unwrap();
     let wasm_header = Header::from_str("Content-Type: application/wasm").unwrap();
 
     for request in server.incoming_requests() {
         let res = match request.url() {
-            "/" => response(&html, &html_header),
-            "/playground.js" => response(PLAYGROUND_JS, &js_header),
+            "/" => response(&main_html, &html_header),
+            "/playground.js" => response(&playground_js, &js_header),
             "/tree-sitter-parser.wasm" => response(&language_wasm, &wasm_header),
             "/tree-sitter.js" => {
                 if cfg!(windows) {
                     redirect("https://tree-sitter.github.io/tree-sitter.js")
                 } else {
-                    response(LIB_JS, &js_header)
+                    response(&lib_js, &js_header)
                 }
             }
             "/tree-sitter.wasm" => {
                 if cfg!(windows) {
                     redirect("https://tree-sitter.github.io/tree-sitter.wasm")
                 } else {
-                    response(LIB_WASM, &wasm_header)
+                    response(&lib_wasm, &wasm_header)
                 }
             }
             _ => response(b"Not found", &html_header).with_status_code(404),
diff --git a/docs/Gemfile.lock b/docs/Gemfile.lock
index c38dc15f..48ad2b13 100644
--- a/docs/Gemfile.lock
+++ b/docs/Gemfile.lock
@@ -191,14 +191,14 @@ GEM
       rb-fsevent (>= 0.9.3)
       rb-inotify (>= 0.9.7)
     mercenary (0.3.6)
-    mini_portile2 (2.3.0)
+    mini_portile2 (2.4.0)
     minima (2.1.1)
       jekyll (~> 3.3)
     minitest (5.11.3)
     multipart-post (2.0.0)
     net-dns (0.8.0)
-    nokogiri (1.8.2)
-      mini_portile2 (~> 2.3.0)
+    nokogiri (1.10.8)
+      mini_portile2 (~> 2.4.0)
     octokit (4.8.0)
       sawyer (~> 0.8.0, >= 0.5.3)
     pathutil (0.16.1)
@@ -210,7 +210,7 @@ GEM
     rouge (2.2.1)
     ruby-enum (0.7.2)
       i18n
-    rubyzip (1.2.1)
+    rubyzip (2.0.0)
     safe_yaml (1.0.4)
     sass (3.5.5)
       sass-listen (~> 4.0.0)
diff --git a/docs/_layouts/default.html b/docs/_layouts/default.html
index 25be9c2d..587ab4f0 100644
--- a/docs/_layouts/default.html
+++ b/docs/_layouts/default.html
@@ -21,7 +21,8 @@
     <div id="sidebar">
       <nav id="table-of-contents">
         <a class="logo table-of-contents-section" href="https://github.com/tree-sitter/tree-sitter">
-          <img src="{{ '/assets/images/tree-sitter-small.png' | relative_url }}" width=200 height=200 />
+          <img title="Tree-sitter logo, link to github repo" src="{{ '/assets/images/tree-sitter-small.png' | relative_url }}" width=200 height=200 />
+          <span class="github-repo">GitHub repository</span>
         </a>
 
         {% for other_page in site.html_pages %}
diff --git a/docs/assets/css/style.scss b/docs/assets/css/style.scss
index 7e4b4bb2..e670fc31 100644
--- a/docs/assets/css/style.scss
+++ b/docs/assets/css/style.scss
@@ -12,6 +12,17 @@ body {
   overflow: scroll;
 }
 
+a[href^="http"]:after {
+  content: "";
+  display: inline-block;
+  transform: translate(0px, 2px);
+  width: .9em;
+  height: .9em;
+  margin-left: 3px;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='%23777'%3E%3Cpath d='M20 3h-5a1 1 0 1 0 0 2h3L8 14a1 1 0 1 0 2 2l9-10v3a1 1 0 1 0 2 0V4a1 1 0 0 0-1-1zM5 3L3 5v14l2 2h14l2-2v-6a1 1 0 1 0-2 0v6H5V5h6a1 1 0 1 0 0-2H5z'/%3E%3C/svg%3E");
+  background-size: cover;
+}
+
 #container {
   position: relative;
   max-width: $container-width;
@@ -31,6 +42,12 @@ body {
   overflow-y: auto;
   border-right: 1px solid #ccc;
   z-index: 1;
+
+  .github-repo {
+    display: inline-block;
+    padding-left: 3.75em;
+    font-size: .85em;
+  }
 }
 
 #sidebar-toggle-link {
@@ -57,6 +74,10 @@ body {
   text-decoration: underline;
 }
 
+a > span {
+  text-decoration: inherit;
+}
+
 .table-of-contents-section {
   border-bottom: 1px solid #ccc;
 }
@@ -118,7 +139,7 @@ body {
 }
 
 #playground-container {
-  > .CodeMirror {
+  .CodeMirror {
     height: auto;
     max-height: 350px;
     border: 1px solid #aaa;
@@ -129,7 +150,7 @@ body {
     max-height: 350px;
   }
 
-  h4, select, .field {
+  h4, select, .field, label {
     display: inline-block;
     margin-right: 20px;
   }
@@ -161,3 +182,7 @@ a.highlighted {
   background-color: #ddd;
   text-decoration: underline;
 }
+
+.query-error {
+  text-decoration: underline red dashed;
+}
diff --git a/docs/assets/js/playground.js b/docs/assets/js/playground.js
index 1b151e42..137bb352 100644
--- a/docs/assets/js/playground.js
+++ b/docs/assets/js/playground.js
@@ -1,16 +1,39 @@
 let tree;
 
 (async () => {
+  const CAPTURE_REGEX = /@\s*([\w\._-]+)/g;
+  const COLORS_BY_INDEX = [
+    'blue',
+    'chocolate',
+    'darkblue',
+    'darkcyan',
+    'darkgreen',
+    'darkred',
+    'darkslategray',
+    'dimgray',
+    'green',
+    'indigo',
+    'navy',
+    'red',
+    'sienna',
+  ];
+
   const scriptURL = document.currentScript.getAttribute('src');
+
   const codeInput = document.getElementById('code-input');
   const languageSelect = document.getElementById('language-select');
   const loggingCheckbox = document.getElementById('logging-checkbox');
   const outputContainer = document.getElementById('output-container');
   const outputContainerScroll = document.getElementById('output-container-scroll');
+  const playgroundContainer = document.getElementById('playground-container');
+  const queryCheckbox = document.getElementById('query-checkbox');
+  const queryContainer = document.getElementById('query-container');
+  const queryInput = document.getElementById('query-input');
   const updateTimeSpan = document.getElementById('update-time');
-  const demoContainer = document.getElementById('playground-container');
   const languagesByName = {};
 
+  loadState();
+
   await TreeSitter.init();
 
   const parser = new TreeSitter();
@@ -18,6 +41,12 @@ let tree;
     lineNumbers: true,
     showCursorWhenSelecting: true
   });
+
+  const queryEditor = CodeMirror.fromTextArea(queryInput, {
+    lineNumbers: true,
+    showCursorWhenSelecting: true
+  });
+
   const cluster = new Clusterize({
     rows: [],
     noDataText: null,
@@ -25,22 +54,30 @@ let tree;
     scrollElem: outputContainerScroll
   });
   const renderTreeOnCodeChange = debounce(renderTree, 50);
+  const saveStateOnChange = debounce(saveState, 2000);
+  const runTreeQueryOnChange = debounce(runTreeQuery, 50);
 
   let languageName = languageSelect.value;
   let treeRows = null;
   let treeRowHighlightedIndex = -1;
   let parseCount = 0;
   let isRendering = 0;
+  let query;
 
   codeEditor.on('changes', handleCodeChange);
+  codeEditor.on('viewportChange', runTreeQueryOnChange);
   codeEditor.on('cursorActivity', debounce(handleCursorMovement, 150));
+  queryEditor.on('changes', debounce(handleQueryChange, 150));
+
   loggingCheckbox.addEventListener('change', handleLoggingChange);
+  queryCheckbox.addEventListener('change', handleQueryEnableChange);
   languageSelect.addEventListener('change', handleLanguageChange);
   outputContainer.addEventListener('click', handleTreeClick);
 
+  handleQueryEnableChange();
   await handleLanguageChange()
 
-  demoContainer.style.visibility = 'visible';
+  playgroundContainer.style.visibility = 'visible';
 
   async function handleLanguageChange() {
     const newLanguageName = languageSelect.value;
@@ -62,15 +99,17 @@ let tree;
     languageName = newLanguageName;
     parser.setLanguage(languagesByName[newLanguageName]);
     handleCodeChange();
+    handleQueryChange();
   }
 
   async function handleCodeChange(editor, changes) {
     const newText = codeEditor.getValue() + '\n';
+    const edits = tree && changes && changes.map(treeEditForEditorChange);
 
     const start = performance.now();
-    if (tree && changes) {
-      for (const change of changes) {
-        tree.edit(treeEditForEditorChange(change));
+    if (edits) {
+      for (const edit of edits) {
+        tree.edit(edit);
       }
     }
     const newTree = parser.parse(newText, tree);
@@ -81,6 +120,8 @@ let tree;
     tree = newTree;
     parseCount++;
     renderTreeOnCodeChange();
+    runTreeQueryOnChange();
+    saveStateOnChange();
   }
 
   async function renderTree() {
@@ -164,6 +205,107 @@ let tree;
     handleCursorMovement();
   }
 
+  function runTreeQuery(_, startRow, endRow) {
+    if (endRow == null) {
+      const viewport = codeEditor.getViewport();
+      startRow = viewport.from;
+      endRow = viewport.to;
+    }
+
+    codeEditor.operation(() => {
+      const marks = codeEditor.getAllMarks();
+      marks.forEach(m => m.clear());
+
+      if (tree && query) {
+        const captures = query.captures(
+          tree.rootNode,
+          {row: startRow, column: 0},
+          {row: endRow, column: 0},
+        );
+        let lastNodeId;
+        for (const {name, node} of captures) {
+          if (node.id === lastNodeId) continue;
+          lastNodeId = node.id;
+          const {startPosition, endPosition} = node;
+          codeEditor.markText(
+            {line: startPosition.row, ch: startPosition.column},
+            {line: endPosition.row, ch: endPosition.column},
+            {
+              inclusiveLeft: true,
+              inclusiveRight: true,
+              css: `color: ${colorForCaptureName(name)}`
+            }
+          );
+        }
+      }
+    });
+  }
+
+  function handleQueryChange() {
+    if (query) {
+      query.delete();
+      query.deleted = true;
+      query = null;
+    }
+
+    queryEditor.operation(() => {
+      queryEditor.getAllMarks().forEach(m => m.clear());
+      if (!queryCheckbox.checked) return;
+
+      const queryText = queryEditor.getValue();
+
+      try {
+        query = parser.getLanguage().query(queryText);
+        let match;
+
+        let row = 0;
+        queryEditor.eachLine((line) => {
+          while (match = CAPTURE_REGEX.exec(line.text)) {
+            queryEditor.markText(
+              {line: row, ch: match.index},
+              {line: row, ch: match.index + match[0].length},
+              {
+                inclusiveLeft: true,
+                inclusiveRight: true,
+                css: `color: ${colorForCaptureName(match[1])}`
+              }
+            );
+          }
+          row++;
+        });
+      } catch (error) {
+        const startPosition = queryEditor.posFromIndex(error.index);
+        const endPosition = {
+          line: startPosition.line,
+          ch: startPosition.ch + (error.length || Infinity)
+        };
+
+        if (error.index === queryText.length) {
+          if (startPosition.ch > 0) {
+            startPosition.ch--;
+          } else if (startPosition.row > 0) {
+            startPosition.row--;
+            startPosition.column = Infinity;
+          }
+        }
+
+        queryEditor.markText(
+          startPosition,
+          endPosition,
+          {
+            className: 'query-error',
+            inclusiveLeft: true,
+            inclusiveRight: true,
+            attributes: {title: error.message}
+          }
+        );
+      }
+    });
+
+    runTreeQuery();
+    saveQueryState();
+  }
+
   function handleCursorMovement() {
     if (isRendering) return;
 
@@ -236,6 +378,17 @@ let tree;
     }
   }
 
+  function handleQueryEnableChange() {
+    if (queryCheckbox.checked) {
+      queryContainer.style.visibility = '';
+      queryContainer.style.position = '';
+    } else {
+      queryContainer.style.visibility = 'hidden';
+      queryContainer.style.position = 'absolute';
+    }
+    handleQueryChange();
+  }
+
   function treeEditForEditorChange(change) {
     const oldLineCount = change.removed.length;
     const newLineCount = change.text.length;
@@ -262,6 +415,35 @@ let tree;
     };
   }
 
+  function colorForCaptureName(capture) {
+    const id = query.captureNames.indexOf(capture);
+    return COLORS_BY_INDEX[id % COLORS_BY_INDEX.length];
+  }
+
+  function loadState() {
+    const language = localStorage.getItem("language");
+    const sourceCode = localStorage.getItem("sourceCode");
+    const query = localStorage.getItem("query");
+    const queryEnabled = localStorage.getItem("queryEnabled");
+    if (language != null && sourceCode != null && query != null) {
+      queryInput.value = query;
+      codeInput.value = sourceCode;
+      languageSelect.value = language;
+      queryCheckbox.checked = (queryEnabled === 'true');
+    }
+  }
+
+  function saveState() {
+    localStorage.setItem("language", languageSelect.value);
+    localStorage.setItem("sourceCode", codeEditor.getValue());
+    saveQueryState();
+  }
+
+  function saveQueryState() {
+    localStorage.setItem("queryEnabled", queryCheckbox.checked);
+    localStorage.setItem("query", queryEditor.getValue());
+  }
+
   function debounce(func, wait, immediate) {
     var timeout;
     return function() {
diff --git a/docs/index.md b/docs/index.md
index d5381f59..eca3f1a9 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -28,16 +28,19 @@ Parsers for these languages are fairly complete:
 
 * [Bash](https://github.com/tree-sitter/tree-sitter-bash)
 * [C](https://github.com/tree-sitter/tree-sitter-c)
+* [C#](https://github.com/tree-sitter/tree-sitter-c-sharp)
 * [C++](https://github.com/tree-sitter/tree-sitter-cpp)
 * [CSS](https://github.com/tree-sitter/tree-sitter-css)
 * [Elm](https://github.com/razzeee/tree-sitter-elm)
 * [Eno](https://github.com/eno-lang/tree-sitter-eno)
 * [ERB / EJS](https://github.com/tree-sitter/tree-sitter-embedded-template)
+- [Fennel](https://github.com/travonted/tree-sitter-fennel)
 * [Go](https://github.com/tree-sitter/tree-sitter-go)
 * [HTML](https://github.com/tree-sitter/tree-sitter-html)
 * [Java](https://github.com/tree-sitter/tree-sitter-java)
 * [JavaScript](https://github.com/tree-sitter/tree-sitter-javascript)
 * [Lua](https://github.com/Azganoth/tree-sitter-lua)
+* [Markdown](https://github.com/ikatyang/tree-sitter-markdown)
 * [OCaml](https://github.com/tree-sitter/tree-sitter-ocaml)
 * [PHP](https://github.com/tree-sitter/tree-sitter-php)
 * [Python](https://github.com/tree-sitter/tree-sitter-python)
@@ -47,16 +50,20 @@ Parsers for these languages are fairly complete:
 * [TOML](https://github.com/ikatyang/tree-sitter-toml)
 * [TypeScript](https://github.com/tree-sitter/tree-sitter-typescript)
 * [Verilog](https://github.com/tree-sitter/tree-sitter-verilog)
+* [VHDL](https://github.com/alemuller/tree-sitter-vhdl)
+* [Vue](https://github.com/ikatyang/tree-sitter-vue)
 * [YAML](https://github.com/ikatyang/tree-sitter-yaml)
+* [WASM](https://github.com/wasm-lsp/tree-sitter-wasm)
 
 Parsers for these languages are in development:
 
 * [Agda](https://github.com/tree-sitter/tree-sitter-agda)
-* [C-sharp](https://github.com/tree-sitter/tree-sitter-c-sharp)
 * [Haskell](https://github.com/tree-sitter/tree-sitter-haskell)
 * [Julia](https://github.com/tree-sitter/tree-sitter-julia)
 * [Nix](https://github.com/cstrahan/tree-sitter-nix)
 * [Scala](https://github.com/tree-sitter/tree-sitter-scala)
+* [SPARQL](https://github.com/BonaBeavis/tree-sitter-sparql)
+* [Swift](https://github.com/tree-sitter/tree-sitter-swift)
 
 ### Talks on Tree-sitter
 
diff --git a/docs/section-2-using-parsers.md b/docs/section-2-using-parsers.md
index 6d9e0e0b..75c508f5 100644
--- a/docs/section-2-using-parsers.md
+++ b/docs/section-2-using-parsers.md
@@ -5,45 +5,39 @@ permalink: using-parsers
 
 # Using Parsers
 
-All of Tree-sitter's parsing functionality is exposed through C APIs. Applications written in higher-level languages can use Tree-sitter via binding libraries like  [node-tree-sitter](https://github.com/tree-sitter/node-tree-sitter) or [rust-tree-sitter](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust), which have their own documentation.
+All of Tree-sitter's parsing functionality is exposed through C APIs. Applications written in higher-level languages can use Tree-sitter via binding libraries like [node-tree-sitter](https://github.com/tree-sitter/node-tree-sitter) or the [tree-sitter rust crate](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust), which have their own documentation.
 
-This document will describes the general concepts of how to use Tree-sitter, which should be relevant regardless of what language you're using. It also goes into some C-specific details that are useful if you're using the C API directly or are building a new binding to a different language.
+This document will describe the general concepts of how to use Tree-sitter, which should be relevant regardless of what language you're using. It also goes into some C-specific details that are useful if you're using the C API directly or are building a new binding to a different language.
 
-All of the API functions shown here are declared and documented in the `tree_sitter/api.h` header file.
+All of the API functions shown here are declared and documented in the [`tree_sitter/api.h`](https://github.com/tree-sitter/tree-sitter/blob/master/lib/include/tree_sitter/api.h) header file. You may also want to browse the [online Rust API docs](https://docs.rs/tree-sitter), which correspond to the C APIs closely.
 
-## Building the Library
+## Getting Started
 
-Building the library requires one git submodule: [`utf8proc`](https://github.com/JuliaStrings/utf8proc). Make sure that `utf8proc` is downloaded by running this command from the Tree-sitter directory:
+### Building the Library
 
-```sh
-git submodule update --init
-```
+To build the library on a POSIX system, just run `make` in the Tree-sitter directory. This will create a static library called `libtree-sitter.a` as well as dynamic libraries.
 
-To build the library on a POSIX system, run this script, which will create a static library called `libtree-sitter.a` in the Tree-sitter folder:
-
-```sh
-script/build-lib
-```
-
-Alternatively, you can use the library in a larger project by adding one source file to the project. This source file needs three directories to be in the include path when compiled:
+Alternatively, you can incorporate the library in a larger project's build system by adding one source file to the build. This source file needs two directories to be in the include path when compiled:
 
 **source file:**
-* `tree-sitter/lib/src/lib.c`
+
+- `tree-sitter/lib/src/lib.c`
 
 **include directories:**
-* `tree-sitter/lib/src`
-* `tree-sitter/lib/include`
-* `tree-sitter/lib/utf8proc`
 
-## The Objects
+- `tree-sitter/lib/src`
+- `tree-sitter/lib/include`
+
+### The Basic Objects
 
 There are four main types of objects involved when using Tree-sitter: languages, parsers, syntax trees, and syntax nodes. In C, these are called `TSLanguage`, `TSParser`, `TSTree`, and `TSNode`.
-* An `TSLanguage` is an opaque object that defines how to parse a particular programming language. The code for each `TSLanguage` is generated by Tree-sitter. Many languages are already available in separate git repositories within the the [Tree-sitter GitHub organization](https://github.com/tree-sitter). See [the next page](./creating-parsers) for how to create new languages.
-* A `TSParser` is a stateful object that can be assigned a `TSLanguage` and used to produce a `TSTree` based on some source code.
-* A `TSTree` represents the syntax tree of an entire source code file. It contains `TSNode` instances that indicate the structure of the source code. It can also be edited and used to produce a new `TSTree` in the event that the source code changes.
-* A `TSNode` represents a single node in the syntax tree. It tracks its start and end positions in the source code, as well as its relation to other nodes like its parent, siblings and children.
 
-## An Example Program
+- A `TSLanguage` is an opaque object that defines how to parse a particular programming language. The code for each `TSLanguage` is generated by Tree-sitter. Many languages are already available in separate git repositories within the the [Tree-sitter GitHub organization](https://github.com/tree-sitter). See [the next page](./creating-parsers) for how to create new languages.
+- A `TSParser` is a stateful object that can be assigned a `TSLanguage` and used to produce a `TSTree` based on some source code.
+- A `TSTree` represents the syntax tree of an entire source code file. It contains `TSNode` instances that indicate the structure of the source code. It can also be edited and used to produce a new `TSTree` in the event that the source code changes.
+- A `TSNode` represents a single node in the syntax tree. It tracks its start and end positions in the source code, as well as its relation to other nodes like its parent, siblings and children.
+
+### An Example Program
 
 Here's an example of a simple C program that uses the Tree-sitter [JSON parser](https://github.com/tree-sitter/tree-sitter-json).
 
@@ -83,7 +77,7 @@ int main() {
   TSNode number_node = ts_node_named_child(array_node, 0);
 
   // Check that the nodes have the expected types.
-  assert(strcmp(ts_node_type(root_node), "value") == 0);
+  assert(strcmp(ts_node_type(root_node), "document") == 0);
   assert(strcmp(ts_node_type(array_node), "array") == 0);
   assert(strcmp(ts_node_type(number_node), "number") == 0);
 
@@ -118,7 +112,9 @@ clang                                   \
 ./test-json-parser
 ```
 
-## Providing the Source Code
+## Basic Parsing
+
+### Providing the Code
 
 In the example above, we parsed source code stored in a simple string using the `ts_parser_parse_string` function:
 
@@ -131,7 +127,7 @@ TSTree *ts_parser_parse_string(
 );
 ```
 
-You may want to parse source code that's stored in a custom data structure, like a [piece table](https://en.wikipedia.org/wiki/Piece_table) or a [rope](https://en.wikipedia.org/wiki/Rope_(data_structure)). In this case, you can use the more general `ts_parser_parse` function:
+You may want to parse source code that's stored in a custom data structure, like a [piece table](https://en.wikipedia.org/wiki/Piece_table) or a [rope](<https://en.wikipedia.org/wiki/Rope_(data_structure)>). In this case, you can use the more general `ts_parser_parse` function:
 
 ```c
 TSTree *ts_parser_parse(
@@ -156,9 +152,9 @@ typedef struct {
 } TSInput;
 ```
 
-## Syntax Nodes
+### Syntax Nodes
 
-Tree-sitter provides a [DOM](https://en.wikipedia.org/wiki/Document_Object_Model)-style interface for inspecting syntax trees. A syntax node's *type* is a string that indicates which grammar rule the node represents.
+Tree-sitter provides a [DOM](https://en.wikipedia.org/wiki/Document_Object_Model)-style interface for inspecting syntax trees. A syntax node's _type_ is a string that indicates which grammar rule the node represents.
 
 ```c
 const char *ts_node_type(TSNode);
@@ -179,9 +175,9 @@ TSPoint ts_node_start_point(TSNode);
 TSPoint ts_node_end_point(TSNode);
 ```
 
-## Retrieving Nodes
+### Retrieving Nodes
 
-Every tree has a *root node*:
+Every tree has a _root node_:
 
 ```c
 TSNode ts_tree_root_node(const TSTree *);
@@ -202,29 +198,23 @@ TSNode ts_node_prev_sibling(TSNode);
 TSNode ts_node_parent(TSNode);
 ```
 
-These methods may all return a *null node* to indicate, for example, that a node does not *have* a next sibling. You can check if a node is null:
+These methods may all return a _null node_ to indicate, for example, that a node does not _have_ a next sibling. You can check if a node is null:
 
 ```c
 bool ts_node_is_null(TSNode);
 ```
 
-## Named vs Anonymous Nodes
+### Named vs Anonymous Nodes
 
-Tree-sitter produces [*concrete* syntax trees](https://en.wikipedia.org/wiki/Parse_tree) - trees that contain nodes for every individual token in the source code, including things like commas and parentheses. This is important for use-cases that deal with individual tokens, like [syntax highlighting](https://en.wikipedia.org/wiki/Syntax_highlighting). But some types of code analysis are easier to perform using an [*abstract* syntax tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) - a tree in which the less important details have been removed. Tree-sitter's trees support these use cases by making a distinction between *named* and *anonymous* nodes.
+Tree-sitter produces [_concrete_ syntax trees](https://en.wikipedia.org/wiki/Parse_tree) - trees that contain nodes for every individual token in the source code, including things like commas and parentheses. This is important for use-cases that deal with individual tokens, like [syntax highlighting](https://en.wikipedia.org/wiki/Syntax_highlighting). But some types of code analysis are easier to perform using an [_abstract_ syntax tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) - a tree in which the less important details have been removed. Tree-sitter's trees support these use cases by making a distinction between _named_ and _anonymous_ nodes.
 
 Consider a grammar rule like this:
 
 ```js
-if_statement: $ => seq(
-  'if',
-  '(',
-  $._expression,
-  ')',
-  $._statement,
-)
+if_statement: ($) => seq("if", "(", $._expression, ")", $._statement);
 ```
 
-A syntax node representing an `if_statement` in this language would have 5 children: the condition expression, the body statement, as well as the `if`, `(`, and `)` tokens. The expression and the statement would be marked as *named* nodes, because they have been given explicit names in the grammar. But the `if`, `(`, and `)` nodes would *not* be named nodes, because they are represented in the grammar as simple strings.
+A syntax node representing an `if_statement` in this language would have 5 children: the condition expression, the body statement, as well as the `if`, `(`, and `)` tokens. The expression and the statement would be marked as _named_ nodes, because they have been given explicit names in the grammar. But the `if`, `(`, and `)` nodes would _not_ be named nodes, because they are represented in the grammar as simple strings.
 
 You can check whether any given node is named:
 
@@ -243,9 +233,9 @@ TSNode ts_node_prev_named_sibling(TSNode);
 
 If you use this group of methods, the syntax tree functions much like an abstract syntax tree.
 
-## Node Field Names
+### Node Field Names
 
-To make syntax nodes easier to analyze, many grammars assign unique *field names* to particular child nodes. The the next page [explains](./creating-parsers#using-fields) how to do this on your own grammars. If a syntax node has fields, you can access its children using their field name:
+To make syntax nodes easier to analyze, many grammars assign unique _field names_ to particular child nodes. The next page [explains](./creating-parsers#using-fields) how to do this on your own grammars. If a syntax node has fields, you can access its children using their field name:
 
 ```c
 TSNode ts_node_child_by_field_name(
@@ -269,9 +259,11 @@ The field ids can be used in place of the name:
 TSNode ts_node_child_by_field_id(TSNode, TSFieldId);
 ```
 
-## Editing
+## Advanced Parsing
 
-In applications like text editors, you often need to re-parse a file after its source code has changed. Tree-sitter is designed to support this use case efficiently. There are two steps required. First, you must *edit* the syntax tree, which adjusts the ranges of its nodes so that they stay in sync with the code.
+### Editing
+
+In applications like text editors, you often need to re-parse a file after its source code has changed. Tree-sitter is designed to support this use case efficiently. There are two steps required. First, you must _edit_ the syntax tree, which adjusts the ranges of its nodes so that they stay in sync with the code.
 
 ```c
 typedef struct {
@@ -294,13 +286,13 @@ When you edit a syntax tree, the positions of its nodes will change. If you have
 void ts_node_edit(TSNode *, const TSInputEdit *);
 ```
 
-This `ts_node_edit` function is *only* needed in the case where you have retrieved `TSNode` instances *before* editing the tree, and then *after* editing the tree, you want to continue to use those specific node instances. Often, you'll just want to re-fetch nodes from the edited tree, in which case `ts_node_edit` is not needed.
+This `ts_node_edit` function is _only_ needed in the case where you have retrieved `TSNode` instances _before_ editing the tree, and then _after_ editing the tree, you want to continue to use those specific node instances. Often, you'll just want to re-fetch nodes from the edited tree, in which case `ts_node_edit` is not needed.
 
-## Multi-language Documents
+### Multi-language Documents
 
 Sometimes, different parts of a file may be written in different languages. For example, templating languages like [EJS](http://ejs.co) and [ERB](https://ruby-doc.org/stdlib-2.5.1/libdoc/erb/rdoc/ERB.html) allow you to generate HTML by writing a mixture of HTML and another language like JavaScript or Ruby.
 
-Tree-sitter handles these types of documents by allowing you to create a syntax tree based on the text in certain *ranges* of a file.
+Tree-sitter handles these types of documents by allowing you to create a syntax tree based on the text in certain _ranges_ of a file.
 
 ```c
 typedef struct {
@@ -402,7 +394,7 @@ int main(int argc, const char **argv) {
 
 This API allows for great flexibility in how languages can be composed. Tree-sitter is not responsible for mediating the interactions between languages. Instead, you are free to do that using arbitrary application-specific logic.
 
-## Concurrency
+### Concurrency
 
 Tree-sitter supports multi-threaded use cases by making syntax trees very cheap to copy.
 
@@ -410,4 +402,443 @@ Tree-sitter supports multi-threaded use cases by making syntax trees very cheap
 TSTree *ts_tree_copy(const TSTree *);
 ```
 
-Internally, copying a syntax tree just entails incrementing an atomic reference count. Conceptually, it provides you a new tree which you can freely query, edit, reparse, or delete on a new thread while continuing to use the original tree on a different thread. Note that individual `TSTree` instances are *not* thread safe; you must copy a tree if you want to use it on multiple threads simultaneously.
+Internally, copying a syntax tree just entails incrementing an atomic reference count. Conceptually, it provides you a new tree which you can freely query, edit, reparse, or delete on a new thread while continuing to use the original tree on a different thread. Note that individual `TSTree` instances are _not_ thread safe; you must copy a tree if you want to use it on multiple threads simultaneously.
+
+## Other Tree Operations
+
+### Walking Trees with Tree Cursors
+
+You can access every node in a syntax tree using the `TSNode` APIs [described above](#retrieving-nodes), but if you need to access a large number of nodes, the fastest way to do so is with a _tree cursor_. A cursor is a stateful object that allows you to walk a syntax tree with maximum efficiency.
+
+You can initialize a cursor from any node:
+
+```c
+TSTreeCursor ts_tree_cursor_new(TSNode);
+```
+
+You can move the cursor around the tree:
+
+```c
+bool ts_tree_cursor_goto_first_child(TSTreeCursor *);
+bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *);
+bool ts_tree_cursor_goto_parent(TSTreeCursor *);
+```
+
+These methods return `true` if the cursor successfully moved and `false` if there was no node to move to.
+
+You can always retrieve the cursor's current node, as well as the [field name](#node-field-names) that is associated with the current node.
+
+```c
+TSNode ts_tree_cursor_current_node(const TSTreeCursor *);
+const char *ts_tree_cursor_current_field_name(const TSTreeCursor *);
+TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *);
+```
+
+## Pattern Matching with Queries
+
+Many code analysis tasks involve searching for patterns in syntax trees. Tree-sitter provides a small declarative language for expressing these patterns and searching for matches. The language is similar to the format of Tree-sitter's [unit test system](./creating-parsers#command-test).
+
+### Query Syntax
+
+A _query_ consists of one or more _patterns_, where each pattern is an [S-expression](https://en.wikipedia.org/wiki/S-expression) that matches a certain set of nodes in a syntax tree. The expression to match a given node consists of a pair of parentheses containing two things: the node's type, and optionally, a series of other S-expressions that match the node's children. For example, this pattern would match any `binary_expression` node whose children are both `number_literal` nodes:
+
+```
+(binary_expression (number_literal) (number_literal))
+```
+
+Children can also be omitted. For example, this would match any `binary_expression` where at least _one_ of child is a `string_literal` node:
+
+```
+(binary_expression (string_literal))
+```
+
+#### Fields
+
+In general, it's a good idea to make patterns more specific by specifying [field names](#node-field-names) associated with child nodes. You do this by prefixing a child pattern with a field name followed by a colon. For example, this pattern would match an `assignment_expression` node where the `left` child is a `member_expression` whose `object` is a `call_expression`.
+
+```
+(assignment_expression
+  left: (member_expression
+    object: (call_expression)))
+```
+
+#### Anonymous Nodes
+
+The parenthesized syntax for writing nodes only applies to [named nodes](#named-vs-anonymous-nodes). To match specific anonymous nodes, you write their name between double quotes. For example, this pattern would match any `binary_expression` where the operator is `!=` and the right side is `null`:
+
+```
+(binary_expression
+  operator: "!="
+  right: (null))
+```
+
+#### Capturing Nodes
+
+When matching patterns, you may want to process specific nodes within the pattern. Captures allow you to associate names with specific nodes in a pattern, so that you can later refer to those nodes by those names. Capture names are written _after_ the nodes that they refer to, and start with an `@` character.
+
+For example, this pattern would match any assignment of a `function` to an `identifier`, and it would associate the name `the-function-name` with the identifier:
+
+```
+(assignment_expression
+  left: (identifier) @the-function-name
+  right: (function))
+```
+
+And this pattern would match all method definitions, associating the name `the-method-name` with the method name, `the-class-name` with the containing class name:
+
+```
+(class_declaration
+  name: (identifier) @the-class-name
+  body: (class_body
+    (method_definition
+      name: (property_identifier) @the-method-name)))
+```
+
+#### Quantification Operators
+
+You can match a repeating sequence of sibling nodes using the postfix `+` and `*` _repetition_ operators, which work analogously to the `+` and `*` operators [in regular expressions](https://en.wikipedia.org/wiki/Regular_expression#Basic_concepts). The `+` operator matches _one or more_ repetitions of a pattern, and the `*` operator matches _zero or more_.
+
+For example, this pattern would match a sequence of one or more comments:
+
+```
+(comment)+
+```
+
+This pattern would match a class declaration, capturing all of the decorators if any were present:
+
+```
+(class_declaration
+  (decorator)* @the-decorator
+  name: (identifier) @the-name)
+```
+
+You can also mark a node as optional using the `?` operator. For example, this pattern would match all function calls, capturing a string argument if one was present:
+
+```
+(call_expression
+  function: (identifier) @the-function
+  arguments: (arguments (string)? @the-string-arg))
+```
+
+#### Grouping Sibling Nodes
+
+You can also use parentheses for grouping a sequence of _sibling_ nodes. For example, this pattern would match a comment followed by a function declaration:
+
+```
+(
+  (comment)
+  (function_declaration)
+)
+```
+
+Any of the quantification operators mentioned above (`+`, `*`, and `?`) can also be applied to groups. For example, this pattern would match a comma-separated series of numbers:
+
+```
+(
+  (number)
+  ("," (number))*
+)
+```
+
+#### Alternations
+
+An alternation is written as a pair of square brackets (`[]`) containing a list of alternative patterns.
+This is similar to _character classes_ from regular expressions (`[abc]` matches either a, b, or c).
+
+For example, this pattern would match a call to either a variable or an object property.
+In the case of a variable, capture it as `@function`, and in the case of a property, capture it as `@method`:
+
+```
+(call_expression
+  function: [
+    (identifier) @function
+    (member_expression
+      property: (property_identifier) @method)
+  ])
+```
+
+This pattern would match a set of possible keyword tokens, capturing them as `@keyword`:
+
+```
+[
+  "break"
+  "atch"
+  "delete"
+  "else"
+  "for"
+  "function"
+  "if"
+  "return"
+  "try"
+  "while"
+] @keyword
+```
+
+#### Wildcard Node
+
+A wildcard node is represented with an underscore (`(_)`), it matches any node.
+This is similar to `.` in regular expressions.
+
+For example, this pattern would match any node inside a call:
+
+```
+(call (_) @call.inner)
+```
+
+
+#### Anchors
+
+The anchor operator, `.`, is used to constrain the ways in which child patterns are matched. It has different behaviors depending on where it's placed inside a query.
+
+When `.` is placed before the _first_ child within a parent pattern, the child will only match when it is the first named node in the parent. For example, the below pattern matches a given `array` node at most once, assigning the `@the-element` capture to the first `identifier` node in the parent `array`:
+
+```
+(array . (identifier) @the-element)
+```
+
+Without this anchor, the pattern would match once for every identifier in the array, with `@the-element` bound to each matched identifier.
+
+Similarly, an anchor placed after a pattern's _last_ child will cause that child pattern to only match nodes that are the last named child of their parent. The below pattern matches only nodes that are the last named child within a `block`.
+
+```
+(block (_) @last-expression .)
+```
+
+Finally, an anchor _between_ two child patterns will cause the patterns to only match nodes that are immediate siblings. The pattern below, given a long dotted name like `a.b.c.d`, will only match pairs of consecutive identifiers: `a, b`, `b, c`, and `c, d`.
+
+```
+(dotted_name
+  (identifier) @prev-id
+  .
+  (identifier) @next-id)
+```
+
+Without the anchor, non-consecutive pairs like `a, c` and `b, d` would also be matched.
+
+The restrictions placed on a pattern by an anchor operator ignore anonymous nodes.
+
+#### Predicates
+
+You can also specify arbitrary metadata and conditions associed with a pattern by adding _predicate_ S-expressions anywhere within your pattern. Predicate S-expressions start with a _predicate name_ beginning with a `#` character. After that, they can contain an arbitrary number of `@`-prefixed capture names or strings.
+
+For example, this pattern would match identifier whose names is written in `SCREAMING_SNAKE_CASE`:
+
+```
+(
+  (identifier) @constant
+  (#match? @constant "^[A-Z][A-Z_]+")
+)
+```
+
+And this pattern would match key-value pairs where the `value` is an identifier with the same name as the key:
+
+```
+(
+  (pair
+    key: (property_identifier) @key-name
+    value: (identifier) @value-name)
+  (#eq? @key-name @value-name)
+)
+```
+
+_Note_ - Predicates are not handled directly by the Tree-sitter C library. They are just exposed in a structured form so that higher-level code can perform the filtering. However, higher-level bindings to Tree-sitter like [the Rust crate](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust) or the [WebAssembly binding](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web) implement a few common predicates like `#eq?` and `#match?`.
+
+### The Query API
+
+Create a query by specifying a string containing one or more patterns:
+
+```c
+TSQuery *ts_query_new(
+  const TSLanguage *language,
+  const char *source,
+  uint32_t source_len,
+  uint32_t *error_offset,
+  TSQueryError *error_type
+);
+```
+
+If there is an error in the query, then the `error_offset` argument will be set to the byte offset of the error, and the `error_type` argument will be set to a value that indicates the type of error:
+
+```c
+typedef enum {
+  TSQueryErrorNone = 0,
+  TSQueryErrorSyntax,
+  TSQueryErrorNodeType,
+  TSQueryErrorField,
+  TSQueryErrorCapture,
+} TSQueryError;
+```
+
+The `TSQuery` value is immutable and can be safely shared between threads. To execute the query, create a `TSQueryCursor`, which carries the state needed for processing the queries. The query cursor should not be shared between threads, but can be reused for many query executions.
+
+```c
+TSQueryCursor *ts_query_cursor_new(void);
+```
+
+You can then execute the query on a given syntax node:
+
+```c
+void ts_query_cursor_exec(TSQueryCursor *, const TSQuery *, TSNode);
+```
+
+You can then iterate over the matches:
+
+```c
+typedef struct {
+  TSNode node;
+  uint32_t index;
+} TSQueryCapture;
+
+typedef struct {
+  uint32_t id;
+  uint16_t pattern_index;
+  uint16_t capture_count;
+  const TSQueryCapture *captures;
+} TSQueryMatch;
+
+bool ts_query_cursor_next_match(TSQueryCursor *, TSQueryMatch *match);
+```
+
+This function will return `false` when there are no more matches. Otherwise, it will populate the `match` with data about which pattern matched and which nodes were captured.
+
+## Static Node Types
+
+In languages with static typing, it can be helpful for syntax trees to provide specific type information about individual syntax nodes. Tree-sitter makes this information available via a generated file called `node-types.json`. This _node types_ file provides structured data about every possible syntax node in a grammar.
+
+You can use this data to generate type declarations in statically-typed programming languages. For example, GitHub's [Semantic](https://github.com/github/semantic) uses these node types files to [generate Haskell data types](https://github.com/github/semantic/tree/master/semantic-ast) for every possible syntax node, which allows for code analysis algorithms to be structurally verified by the Haskell type system.
+
+The node types file contains an array of objects, each of which describes a particular type of syntax node using the following entries:
+
+#### Basic Info
+
+Every object in this array has these two entries:
+
+- `"type"` - A string that indicates which grammar rule the node represents. This corresponds to the `ts_node_type` function described [above](#syntax-nodes).
+- `"named"` - A boolean that indicates whether this kind of node corresponds to a rule name in the grammar or just a string literal. See [above](#named-vs-anonymous-nodes) for more info.
+
+Examples:
+
+```json
+{
+  "type": "string_literal",
+  "named": true
+}
+{
+  "type": "+",
+  "named": false
+}
+```
+
+Together, these two fields constitute a unique identifier for a node type; no two top-level objects in the `node-types.json` should have the same values for both `"type"` and `"named"`.
+
+#### Internal Nodes
+
+Many syntax nodes can have _children_. The node type object describes the possible children that a node can have using the following entries:
+
+- `"fields"` - An object that describes the possible [fields](#node-field-names) that the node can have. The keys of this object are field names, and the values are _child type_ objects, described below.
+- `"children"` - Another _child type_ object that describes all of the node's possible _named_ children _without_ fields.
+
+A _child type_ object describes a set of child nodes using the following entries:
+
+- `"required"` - A boolean indicating whether there is always _at least one_ node in this set.
+- `"multiple"` - A boolean indicating whether there can be _multiple_ nodes in this set.
+- `"types"`- An array of objects that represent the possible types of nodes in this set. Each object has two keys: `"type"` and `"named"`, whose meanings are described above.
+
+Example with fields:
+
+```json
+{
+  "type": "method_definition",
+  "named": true,
+  "fields": {
+    "body": {
+      "multiple": false,
+      "required": true,
+      "types": [{ "type": "statement_block", "named": true }]
+    },
+    "decorator": {
+      "multiple": true,
+      "required": false,
+      "types": [{ "type": "decorator", "named": true }]
+    },
+    "name": {
+      "multiple": false,
+      "required": true,
+      "types": [
+        { "type": "computed_property_name", "named": true },
+        { "type": "property_identifier", "named": true }
+      ]
+    },
+    "parameters": {
+      "multiple": false,
+      "required": true,
+      "types": [{ "type": "formal_parameters", "named": true }]
+    }
+  }
+}
+```
+
+Example with children:
+
+```json
+{
+  "type": "array",
+  "named": true,
+  "fields": {},
+  "children": {
+    "multiple": true,
+    "required": false,
+    "types": [
+      { "type": "_expression", "named": true },
+      { "type": "spread_element", "named": true }
+    ]
+  }
+}
+```
+
+#### Supertype Nodes
+
+In Tree-sitter grammars, there are usually certain rules that represent abstract _categories_ of syntax nodes (e.g. "expression", "type", "declaration"). In the `grammar.js` file, these are often written as [hidden rules](./creating-parsers#hiding-rules) whose definition is a simple [`choice`](./creating-parsers#the-grammar-dsl) where each member is just a single symbol.
+
+Normally, hidden rules are not mentioned in the node types file, since they don't appear in the syntax tree. But if you add a hidden rule to the grammar's [`supertypes` list](./creating-parsers#the-grammar-dsl), then it _will_ show up in the node types file, with the following special entry:
+
+- `"subtypes"` - An array of objects that specify the _types_ of nodes that this 'supertype' node can wrap.
+
+Example:
+
+```json
+{
+  "type": "_declaration",
+  "named": true,
+  "subtypes": [
+    { "type": "class_declaration", "named": true },
+    { "type": "function_declaration", "named": true },
+    { "type": "generator_function_declaration", "named": true },
+    { "type": "lexical_declaration", "named": true },
+    { "type": "variable_declaration", "named": true }
+  ]
+}
+```
+
+Supertype nodes will also appear elsewhere in the node types file, as children of other node types, in a way that corresponds with how the supertype rule was used in the grammar. This can make the node types much shorter and easier to read, because a single supertype will take the place of multiple subtypes.
+
+Example:
+
+```json
+{
+  "type": "export_statement",
+  "named": true,
+  "fields": {
+    "declaration": {
+      "multiple": false,
+      "required": false,
+      "types": [{ "type": "_declaration", "named": true }]
+    },
+    "source": {
+      "multiple": false,
+      "required": false,
+      "types": [{ "type": "string", "named": true }]
+    }
+  }
+}
+```
diff --git a/docs/section-3-creating-parsers.md b/docs/section-3-creating-parsers.md
index 46d8f559..3fc8f04a 100644
--- a/docs/section-3-creating-parsers.md
+++ b/docs/section-3-creating-parsers.md
@@ -13,16 +13,16 @@ Developing Tree-sitter grammars can have a difficult learning curve, but once yo
 
 In order to develop a Tree-sitter parser, there are two dependencies that you need to install:
 
-* **Node.js** - Tree-sitter grammars are written in JavaScript, and Tree-sitter uses [Node.js][node.js] to interpret JavaScript files. It requires the `node` command to be in one of the directories in your [`PATH`][path-env]. It shouldn't matter what version of Node you have.
+* **Node.js** - Tree-sitter grammars are written in JavaScript, and Tree-sitter uses [Node.js][node.js] to interpret JavaScript files. It requires the `node` command to be in one of the directories in your [`PATH`][path-env]. You'll need Node.js version 6.0 or greater.
 * **A C Compiler** - Tree-sitter creates parsers that are written in C. In order to run and test these parsers with the `tree-sitter parse` or `tree-sitter test` commands, you must have a C/C++ compiler installed. Tree-sitter will try to look for these compilers in the standard places for each platform.
 
 ### Installation
 
-To create a Tree-sitter parser, you need to use the [the `tree-sitter` CLI][tree-sitter-cli]. You can install the CLI in a few different ways:
+To create a Tree-sitter parser, you need to use [the `tree-sitter` CLI][tree-sitter-cli]. You can install the CLI in a few different ways:
 
 * Install the `tree-sitter-cli` [Node.js module][node-module] using [`npm`][npm], the Node package manager. This is the recommended approach, and it is discussed further in the next section.
 * Download a binary for your platform from [the latest GitHub release][releases], and put it into a directory on your `PATH`.
-* Build the `tree-sitter-cli` [Rust crate][crate] from source using [`cargo`][cargo], the Rust package manager.
+* Build the `tree-sitter-cli` [Rust crate][crate] from source using [`cargo`][cargo], the Rust package manager. See [the contributing docs](/docs/section-5-contributing.md#developing-tree-sitter) for more information.
 
 ### Project Setup
 
@@ -66,7 +66,7 @@ module.exports = grammar({
 });
 ```
 
-Then run the the following command:
+Then run the following command:
 
 ```sh
 tree-sitter generate
@@ -110,9 +110,9 @@ If there is an ambiguity or *local ambiguity* in your grammar, Tree-sitter will
 
 The `tree-sitter test` command allows you to easily test that your parser is working correctly.
 
-For each rule that you add to the grammar, you should first create a *test* that describes how the syntax trees should look when parsing that rule. These tests are written using specially-formatted text files in a `corpus` directory in your parser's root folder.
+For each rule that you add to the grammar, you should first create a *test* that describes how the syntax trees should look when parsing that rule. These tests are written using specially-formatted text files in the `corpus/` or `test/corpus/` directories within your parser's root folder.
 
-For example, you might have a file called `corpus/statements.txt` that contains a series of entries like this:
+For example, you might have a file called `test/corpus/statements.txt` that contains a series of entries like this:
 
 ```
 ==================
@@ -152,7 +152,7 @@ func x() int {
 
 These tests are important. They serve as the parser's API documentation, and they can be run every time you change the grammar to verify that everything still parses correctly.
 
-By default, the `tree-sitter test` command runs all of the tests in your `corpus` folder. To run a particular test, you can use the the `-f` flag:
+By default, the `tree-sitter test` command runs all of the tests in your `corpus` or `test/corpus/` folder. To run a particular test, you can use the `-f` flag:
 
 ```sh
 tree-sitter test -f 'Return statements'
@@ -164,6 +164,10 @@ The recommendation is to be comprehensive in adding tests. If it's a visible nod
 
 You might notice that the first time you run `tree-sitter test` after regenerating your parser, it takes some extra time. This is because Tree-sitter automatically compiles your C code into a dynamically-loadable library. It recompiles your parser as-needed whenever you update it by re-running `tree-sitter generate`.
 
+#### Syntax Highlighting Tests
+
+The `tree-sitter test` command will *also* run any syntax highlighting tests in the `test/highlight` folder, if it exists. For more information about syntax highlighting tests, see [the syntax highlighting page][syntax-highlighting-tests].
+
 ### Command: `parse`
 
 You can run your parser on an arbitrary file using `tree-sitter parse`. This will print the resulting the syntax tree, including nodes' ranges and field names, like this:
@@ -180,12 +184,16 @@ You can run your parser on an arbitrary file using `tree-sitter parse`. This wil
           (int_literal [1, 9] - [1, 10]))))))
 ```
 
-You can pass as many files to `tree-sitter parse` as your OS will allow. The command will exit with a non-zero status code if any parse errors occurred. You can also prevent the syntax trees from being printed using the `--quiet` flag. This makes `tree-sitter parse` usable as a secondary testing strategy: you can check that a large number of files parse without error:
+You can pass any number of file paths and glob patterns to `tree-sitter parse`, and it will parse all of the given files. The command will exit with a non-zero status code if any parse errors occurred. You can also prevent the syntax trees from being printed using the `--quiet` flag. Additionally, the `--stat` flag prints out aggregated parse success/failure information for all processed files. This makes `tree-sitter parse` usable as a secondary testing strategy: you can check that a large number of files parse without error:
 
 ```sh
-find ./examples -name '*.go' | xargs -n 1000 tree-sitter parse --quiet
+tree-sitter parse 'examples/**/*.go' --quiet --stat
 ```
 
+### Command: `highlight`
+
+You can run syntax highlighting on an arbitrary file using `tree-sitter highlight`. This can either output colors directly to your terminal using ansi escape codes, or produce HTML (if the `--html` flag is passed). For more information, see [the syntax highlighting page][syntax-highlighting].
+
 ### The Grammar DSL
 
 The following is a complete list of built-in functions you can use in your `grammar.js` to define rules. Use-cases for some of these functions will be explained in more detail in later sections.
@@ -196,12 +204,13 @@ The following is a complete list of built-in functions you can use in your `gram
 * **Alternatives : `choice(rule1, rule2, ...)`** - This function creates a rule that matches *one* of a set of possible rules. The order of the arguments does not matter. This is analogous to the `|` (pipe) operator in EBNF notation.
 * **Repetitions : `repeat(rule)`** - This function creates a rule that matches *zero-or-more* occurrences of a given rule. It is analogous to the `{x}` (curly brace) syntax in EBNF notation.
 * **Repetitions : `repeat1(rule)`** - This function creates a rule that matches *one-or-more* occurrences of a given rule. The previous `repeat` rule is implemented in terms of `repeat1` but is included because it is very commonly used.
-* **Options : `optional(rule)`** - This function creates a rule that matches *zero or one* occurrence of a given rule it is analogous to the `[x]` (square bracket) syntax in EBNF notation.
+* **Options : `optional(rule)`** - This function creates a rule that matches *zero or one* occurrence of a given rule. It is analogous to the `[x]` (square bracket) syntax in EBNF notation.
 * **Precedence : `prec(number, rule)`** - This function marks the given rule with a numerical precedence which will be used to resolve [*LR(1) Conflicts*][lr-conflict] at parser-generation time. When two rules overlap in a way that represents either a true ambiguity or a *local* ambiguity given one token of lookahead, Tree-sitter will try to resolve the conflict by matching the rule with the higher precedence. The default precedence of all rules is zero. This works similarly to the [precedence directives][yacc-prec] in Yacc grammars.
 * **Left Associativity : `prec.left([number], rule)`** - This function marks the given rule as left-associative (and optionally applies a numerical precedence). When an LR(1) conflict arises in which all of the rules have the same numerical precedence, Tree-sitter will consult the rules' associativity. If there is a left-associative rule, Tree-sitter will prefer matching a rule that ends *earlier*. This works similarly to [associativity directives][yacc-prec] in Yacc grammars.
 * **Right Associativity : `prec.right([number], rule)`** - This function is like `prec.left`, but it instructs Tree-sitter to prefer matching a rule that ends *later*.
-* **Dynamic Precedence : `prec.dynamic(number, rule)`** - This function is similar to `prec`, but the given numerical precedence is applied at *runtime* instead of at parser generation time. This is only necessary when handling a conflict dynamically using the the `conflicts` field in the grammar, and when there is a genuine *ambiguity*: multiple rules correctly match a given piece of code. In that event, Tree-sitter compares the total dynamic precedence associated with each rule, and selects the one with the highest total. This is similar to [dynamic precedence directives][bison-dprec] in Bison grammars.
+* **Dynamic Precedence : `prec.dynamic(number, rule)`** - This function is similar to `prec`, but the given numerical precedence is applied at *runtime* instead of at parser generation time. This is only necessary when handling a conflict dynamically using the `conflicts` field in the grammar, and when there is a genuine *ambiguity*: multiple rules correctly match a given piece of code. In that event, Tree-sitter compares the total dynamic precedence associated with each rule, and selects the one with the highest total. This is similar to [dynamic precedence directives][bison-dprec] in Bison grammars.
 * **Tokens : `token(rule)`** - This function marks the given rule as producing only a single token. Tree-sitter's default is to treat each String or RegExp literal in the grammar as a separate token. Each token is matched separately by the lexer and returned as its own leaf node in the tree. The `token` function allows you to express a complex rule using the functions described above (rather than as a single regular expression) but still have Tree-sitter treat it as a single token.
+* **Immediate Tokens : `token.immediate(rule)`** - Usually, whitespace (and any other extras, such as comments) is optional before each token. This function means that the token will only match if there is no whitespace.
 * **Aliases : `alias(rule, name)`** - This function causes the given rule to *appear* with an alternative name in the syntax tree. If `name` is a *symbol*, as in `alias($.foo, $.bar)`, then the aliased rule will *appear* as a [named node][named-vs-anonymous-nodes-section] called `bar`. And if `name` is a *string literal*, as in `alias($.foo, 'bar')`, then the aliased rule will appear as an [anonymous node][named-vs-anonymous-nodes-section], as if the rule had been written as the simple string.
 * **Field Names : `field(name, rule)`** - This function assigns a *field name* to the child node(s) matched by the given rule. In the resulting syntax tree, you can then use that field name to access specific children.
 
@@ -212,6 +221,7 @@ In addition to the `name` and `rules` fields, grammars have a few other optional
 * **`conflicts`** - an array of arrays of rule names. Each inner array represents a set of rules that's involved in an *LR(1) conflict* that is *intended to exist* in the grammar. When these conflicts occur at runtime, Tree-sitter will use the GLR algorithm to explore all of the possible interpretations. If *multiple* parses end up succeeding, Tree-sitter will pick the subtree whose corresponding rule has the highest total *dynamic precedence*.
 * **`externals`** - an array of token names which can be returned by an [*external scanner*](#external-scanners). External scanners allow you to write custom C code which runs during the lexing process in order to handle lexical rules (e.g. Python's indentation tokens) that cannot be described by regular expressions.
 * **`word`** - the name of a token that will match keywords for the purpose of the [keyword extraction](#keyword-extraction) optimization.
+* **`supertypes`** an array of hidden rule names which should be considered to be 'supertypes' in the generated [*node types* file][static-node-types].
 
 
 ## Writing the Grammar
@@ -335,7 +345,7 @@ Imagine that you were just starting work on the [Tree-sitter JavaScript parser][
 return x + y;
 ```
 
-According to the specification, this line is a `ReturnStatement`, the fragment `x + y` is an `AdditiveExpression`, and `x` and `y` are both `IdentifierReferences`. The relationship between these constructs is captured by a complex series of production rules:  
+According to the specification, this line is a `ReturnStatement`, the fragment `x + y` is an `AdditiveExpression`, and `x` and `y` are both `IdentifierReferences`. The relationship between these constructs is captured by a complex series of production rules:
 
 ```
 ReturnStatement          ->  'return' Expression
@@ -496,6 +506,8 @@ Grammars often contain multiple tokens that can match the same characters. For e
 
 4. **Match Specificity** - If there are two valid tokens with the same precedence and which both match the same number of characters, Tree-sitter will prefer a token that is specified in the grammar as a `String` over a token specified as a `RegExp`.
 
+5. **Rule Order** - If none of the above criteria can be used to select one token over another, Tree-sitter will prefer the token that appears earlier in the grammar.
+
 ### Keywords
 
 Many languages have a set of *keyword* tokens (e.g. `if`, `for`, `return`), as well as a more general token (e.g. `identifier`) that matches any word, including many of the keyword strings. For example, JavaScript has a keyword `instanceof`, which is used as a binary operator, like this:
@@ -656,7 +668,7 @@ bool tree_sitter_my_language_external_scanner_scan(
 
 This function is responsible for recognizing external tokens. It should return `true` if a token was recognized, and `false` otherwise. It is called with a "lexer" struct with the following fields:
 
-* **`uint32_t lookahead`** - The current next character in the input stream, represented as a 32-bit unicode code point.
+* **`int32_t lookahead`** - The current next character in the input stream, represented as a 32-bit unicode code point.
 * **`TSSymbol result_symbol`** - The symbol that was recognized. Your scan function should *assign* to this field one of the values from the `TokenType` enum, described above.
 * **`void (*advance)(TSLexer *, bool skip)`** - A function for advancing to the next character. If you pass `true` for the second argument, the current character will be treated as whitespace.
 * **`void (*mark_end)(TSLexer *)`** - A function for marking the end of the recognized token. This allows matching tokens that require multiple characters of lookahead. By default (if you don't call `mark_end`), any character that you moved past using the `advance` function will be included in the size of the token. But once you call `mark_end`, then any later calls to `advance` will *not* increase the size of the returned token. You can call `mark_end` multiple times to increase the size of the token.
@@ -707,6 +719,7 @@ if (valid_symbols[INDENT] || valid_symbol[DEDENT]) {
 [nan]: https://github.com/nodejs/nan
 [node-module]: https://www.npmjs.com/package/tree-sitter-cli
 [node.js]: https://nodejs.org
+[static-node-types]: ./using-parsers#static-node-types
 [non-terminal]: https://en.wikipedia.org/wiki/Terminal_and_nonterminal_symbols
 [npm]: https://docs.npmjs.com
 [path-env]: https://en.wikipedia.org/wiki/PATH_(variable)
@@ -714,6 +727,8 @@ if (valid_symbols[INDENT] || valid_symbol[DEDENT]) {
 [percent-string]: https://docs.ruby-lang.org/en/2.5.0/syntax/literals_rdoc.html#label-Percent+Strings
 [releases]: https://github.com/tree-sitter/tree-sitter/releases/latest
 [s-exp]: https://en.wikipedia.org/wiki/S-expression
+[syntax-highlighting]: ./syntax-highlighting
+[syntax-highlighting-tests]: ./syntax-highlighting#unit-testing
 [tree-sitter-cli]: https://github.com/tree-sitter/tree-sitter/tree/master/cli
 [tree-sitter-javascript]: https://github.com/tree-sitter/tree-sitter-javascript
 [yacc-prec]: https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
diff --git a/docs/section-4-syntax-highlighting.md b/docs/section-4-syntax-highlighting.md
new file mode 100644
index 00000000..cbf97b66
--- /dev/null
+++ b/docs/section-4-syntax-highlighting.md
@@ -0,0 +1,429 @@
+---
+title: Syntax Highlighting
+permalink: syntax-highlighting
+---
+
+# Syntax Highlighting
+
+Syntax highlighting is a very common feature in applications that deal with code. Tree-sitter has built-in support for syntax highlighting, via the [`tree-sitter-highlight`](https://github.com/tree-sitter/tree-sitter/tree/master/highlight) library, which is currently used on GitHub.com for highlighting code written in several languages. You can also perform syntax highlighting at the command line using the `tree-sitter highlight` command.
+
+This document explains how the Tree-sitter syntax highlighting system works, using the command line interface. If you are using `tree-sitter-highlight` library (either from C or from Rust), all of these concepts are still applicable, but the configuration data is provided using in-memory objects, rather than files.
+
+**Note - If you are working on syntax highlighting in the [Atom](https://atom.io/) text editor, you should consult [the grammar-creation page](https://flight-manual.atom.io/hacking-atom/sections/creating-a-grammar/) of the Atom Flight Manual, *not* this document. Atom currently uses a different syntax highlighting system that is also based on Tree-sitter, but is older than the one described here.**
+
+## Overview
+
+All of the files needed to highlight a given language are normally included in the same git repository as the Tree-sitter grammar for that language (for example, [`tree-sitter-javascript`](https://github.com/tree-sitter/tree-sitter-javascript), [`tree-sitter-ruby`](https://github.com/tree-sitter/tree-sitter-ruby)). In order to run syntax highlighting from the command-line, three types of files are needed:
+
+1. Global configuration in `~/.tree-sitter/config.json`
+2. Language configuration in grammar repositories' `package.json` files.
+3. Tree queries in the grammars repositories' `queries` folders.
+
+For an example of the language-specific files, see the [`package.json` file](https://github.com/tree-sitter/tree-sitter-ruby/blob/master/package.json) and [`queries` directory](https://github.com/tree-sitter/tree-sitter-ruby/tree/master/queries) in the `tree-sitter-ruby` repository. The following sections describe the behavior of each file.
+
+## Global Configuration
+
+The Tree-sitter CLI automatically creates a directory in your home folder called `~/.tree-sitter`. This is used to store compiled language binaries, and it can also contain a JSON configuration file. To automatically create a default config file, run this command:
+
+```sh
+tree-sitter init-config
+```
+
+### Paths
+
+The `tree-sitter highlight` command takes one or more file paths, and tries to automatically determine which language should be used to highlight those files. In order to do this, it needs to know *where* to look for Tree-sitter grammars on your filesystem. You can control this using the `"parser-directories"` key in your configuration file:
+
+```json
+{
+  "parser-directories": [
+    "/Users/my-name/code",
+    "/Users/my-name/other-code"
+  ]
+}
+```
+
+Currently, any folder within one of these *parser directories* whose name begins with "tree-sitter-" will be treated as a Tree-sitter grammar repository.
+
+### Theme
+
+The Tree-sitter highlighting system works by annotating ranges of source code with logical "highlight names" like `function.method`, `type.builtin`, `keyword`, etc. In order to decide what *color* should be used for rendering each highlight, a *theme* is needed.
+
+In `~/.tree-sitter/config.json`, the `"theme"` value is an object whose keys are dot-separated highlight names like `function.builtin` or `keyword`, and whose values are JSON expressions that represent text styling parameters.
+
+#### Highlight Names
+
+A theme can contain multiple keys that share a common subsequence. Examples:
+* `variable` and `variable.parameter`
+* `function`, `function.builtin`, and `function.method`
+
+For a given highlight produced, styling will be determined based on the **longest matching theme key**. For example, the highlight `function.builtin.static` would match the key `function.builtin` rather than `function`.
+
+#### Styling Values
+
+Styling values can be any of the following:
+
+* Integers from 0 to 255, representing ANSI terminal color ids.
+* Strings like `"#e45649"` representing hexadecimal RGB colors.
+* Strings naming basic ANSI colors like `"red"`, `"black"`, `"purple"`, or `"cyan"`.
+* Objects with the following keys:
+  * `color` - An integer or string as described above.
+  * `underline` - A boolean indicating whether the text should be underlined.
+  * `italic` - A boolean indicating whether the text should be italicized.
+  * `bold` - A boolean indicating whether the text should be bold-face.
+
+## Language Configuration
+
+The `package.json` file is used by package managers like `npm`. Within this file, the Tree-sitter CLI looks for data nested under the top-level `"tree-sitter"` key. This key is expected to contain an array of objects with the following keys:
+
+### Basics
+
+These keys specify basic information about the parser:
+
+* `scope` (required) - A string like `"source.js"` that identifies the language. Currently, we strive to match the scope names used by popular [TextMate grammars](https://macromates.com/manual/en/language_grammars) and by the [Linguist](https://github.com/github/linguist) library.
+
+* `path` (optional) - A relative path from the directory containing `package.json` to another directory containing the `src/` folder, which contains the actual generated parser. The default value is `"."` (so that `src/` is in the same folder as `package.json`), and this very rarely needs to be overridden.
+
+### Language Detection
+
+These keys help to decide whether the language applies to a given file:
+
+* `file-types` - An array of filename suffix strings. The grammar will be used for files whose names end with one of these suffixes. Note that the suffix may match an *entire* filename.
+
+* `first-line-regex` - A regex pattern that will be tested against the first line of a file in order to determine whether this language applies to the file. If present, this regex will be used for any file whose language does not match any grammar's `file-types`.
+
+* `content-regex` - A regex pattern that will be tested against the contents of the file in order to break ties in cases where multiple grammars matched the file using the above two criteria. If the regex matches, this grammar will be preferred over another grammar with no `content-regex`. If the regex does not match, a grammar with no `content-regex` will be preferred over this one.
+
+* `injection-regex` - A regex pattern that will be tested against a *language name* in order to determine whether this language should be used for a potential *language injection* site. Language injection is described in more detail in [a later section](#language-injection-query).
+
+### Query Paths
+
+These keys specify relative paths from the directory containing `package.json` to the files that control syntax highlighting:
+
+* `highlights` - Path to a *highlight query*. Default: `queries/highlights.scm`
+* `locals` - Path to a *local variable query*. Default: `queries/locals.scm`.
+* `injections` - Path to an *injection query*. Default: `queries/injections.scm`.
+
+The behaviors of these three files are described in the next section.
+
+### Example
+
+Typically, the `"tree-sitter"` array only needs to contain one object, which only needs to specify a few keys:
+
+```json
+{
+  "tree-sitter": [
+    {
+      "scope": "source.ruby",
+      "file-types": [
+        "rb",
+        "gemspec",
+        "Gemfile",
+        "Rakefile"
+      ],
+      "first-line-regex": "#!.*\\bruby$"
+    }
+  ]
+}
+```
+
+## Queries
+
+Tree-sitter's syntax highlighting system is based on *tree queries*, which are a general system for pattern-matching on Tree-sitter's syntax trees. See [this section](./using-parsers#pattern-matching-with-queries) of the documentation for more information about tree queries.
+
+Syntax highlighting is controlled by *three* different types of query files that are usually included in the `queries` folder. The default names for the query files use the `.scm` file. We chose this extension because it commonly used for files written in [Scheme](https://en.wikipedia.org/wiki/Scheme_%28programming_language%29), a popular dialect of Lisp, and these query files use a Lisp-like syntax.
+
+Alternatively, you can think of `.scm` as an acronym for "Source Code Matching".
+
+### Highlights
+
+The most important query is called the highlights query. The highlights query uses *captures* to assign arbitrary *highlight names* to different nodes in the tree. Each highlight name can then be mapped to a color (as described [above](#theme)). Commonly used highlight names include `keyword`, `function`, `type`, `property`, and `string`. Names can also be dot-separated like `function.builtin`.
+
+#### Example Input
+
+For example, consider the following Go code:
+
+```go
+func increment(a int) int {
+    return a + 1
+}
+```
+
+With this syntax tree:
+
+```
+(source_file
+  (function_declaration
+    name: (identifier)
+    parameters: (parameter_list
+      (parameter_declaration
+        name: (identifier)
+        type: (type_identifier)))
+    result: (type_identifier)
+    body: (block
+      (return_statement
+        (expression_list
+          (binary_expression
+            left: (identifier)
+            right: (int_literal)))))))
+```
+
+#### Example Query
+
+Suppose we wanted to render this code with the following colors:
+* keywords `func` and `return` in purple
+* function `increment` in blue
+* type `int` in green
+* number `5` brown
+
+We can assign each of these categories a *highlight name* using a query like this:
+
+```
+; highlights.scm
+
+"func" @keyword
+"return" @keyword
+(type_identifier) @type
+(int_literal) @number
+(function_declaration name: (identifier) @function)
+```
+
+Then, in our `~/.tree-sitter/config.json` file, we could map each of these highlight names to a color:
+
+```json
+{
+  "theme": {
+    "keyword": "purple",
+    "function": "blue",
+    "type": "green",
+    "number": "brown"
+  }
+}
+```
+
+#### Result
+
+Running `tree-sitter highlight` on this Go file would produce output like this:
+
+<pre class='highlight' style='border: 1px solid #aaa;'>
+<span style='color: purple;'>func</span> <span style='color: #005fd7;'>increment</span>(<span>a</span> <span style='color: green;'>int</span>) <span style='color: green;'>int</span> {
+    <span style='color: purple;'>return</span> <span>a</span> <span style='font-weight: bold;color: #4e4e4e;'>+</span> <span style='font-weight: bold;color: #875f00;'>1</span>
+}
+</pre>
+
+### Local Variables
+
+Good syntax highlighting helps the reader to quickly distinguish between the different types of *entities* in their code. Ideally, if a given entity appears in *multiple* places, it should be colored the same in each place. The Tree-sitter syntax highlighting system can help you to achieve this by keeping track of local scopes and variables.
+
+The *local variables* query is different from the highlights query in that, while the highlights query uses *arbitrary* capture names which can then be mapped to colors, the locals variable query uses a fixed set of capture names, each of which has a special meaning.
+
+The capture names are as follows:
+
+* `@local.scope` - indicates that a syntax node introduces a new local scope.
+* `@local.definition` - indicates that a syntax node contains the *name* of a definition within the current local scope.
+* `@local.reference` - indicates that a syntax node contains the *name* which *may* refer to an earlier definition within some enclosing scope.
+
+When highlighting a file, Tree-sitter will keep track of the set of scopes that contains any given position, and the set of definitions within each scope. When processing a syntax node that is captured as a `local.reference`, Tree-sitter will try to find a definition for a name that matches the node's text. If it finds a match, Tree-sitter will ensure that the *reference* and the *definition* are colored the same.
+
+The information produced by this query can also be *used* by the highlights query. You can *disable* a pattern for nodes which have been identified as local variables by adding the predicate `(#is-not? local)` to the pattern. This is used in the example below:
+
+#### Example Input
+
+Consider this Ruby code:
+
+```ruby
+def process_list(list)
+  context = current_context
+  list.map do |item|
+    process_item(item, context)
+  end
+end
+
+item = 5
+list = [item]
+```
+
+With this syntax tree:
+
+```
+(program
+  (method
+    name: (identifier)
+    parameters: (method_parameters
+      (identifier))
+    (assignment
+      left: (identifier)
+      right: (identifier))
+    (method_call
+      method: (call
+        receiver: (identifier)
+        method: (identifier))
+      block: (do_block
+        (block_parameters
+          (identifier))
+        (method_call
+          method: (identifier)
+          arguments: (argument_list
+            (identifier)
+            (identifier))))))
+  (assignment
+    left: (identifier)
+    right: (integer))
+  (assignment
+    left: (identifier)
+    right: (array
+      (identifier))))
+```
+
+There are several different types of names within this method:
+
+* `process_list` is a method.
+* Within this method, `list` is a formal parameter
+* `context` is a local variable.
+* `current_context` is *not* a local variable, so it must be a method.
+* Within the `do` block, `item` is a formal parameter
+* Later on, `item` and `list` are both local variables (not formal parameters).
+
+#### Example Queries
+
+Let's write some queries that let us clearly distinguish between these types of names. First, set up the highlighting query, as described in the previous section. We'll assign distinct colors to method calls, method definitions, and formal parameters:
+
+```
+; highlights.scm
+
+(call method: (identifier) @function.method)
+(method_call method: (identifier) @function.method)
+
+(method name: (identifier) @function.method)
+
+(method_parameters (identifier) @variable.parameter)
+(block_parameters (identifier) @variable.parameter)
+
+((identifier) @function.method
+ (#is-not? local))
+```
+
+Then, we'll set up a local variable query to keep track of the variables and scopes. Here, we're indicating that methods and blocks create local *scopes*, parameters and assignments create *definitions*, and other identifiers should be considered *references*:
+
+```
+; locals.scm
+
+(method) @local.scope
+(do_block) @local.scope
+
+(method_parameters (identifier) @local.definition)
+(block_parameters (identifier) @local.definition)
+
+(assignment left:(identifier) @local.definition)
+
+(identifier) @local.reference
+```
+
+#### Result
+
+Running `tree-sitter highlight` on this ruby file would produce output like this:
+
+<pre class='highlight' style='border: 1px solid #aaa;'>
+<span style='color: purple;'>def</span> <span style='color: #005fd7;'>process_list</span><span style='color: #4e4e4e;'>(</span><span style='text-decoration: underline;'>list</span><span style='color: #4e4e4e;'>)</span>
+  <span>context</span> <span style='font-weight: bold;color: #4e4e4e;'>=</span> <span style='color: #005fd7;'>current_context</span>
+  <span style='text-decoration: underline;'>list</span><span style='color: #4e4e4e;'>.</span><span style='color: #005fd7;'>map</span> <span style='color: purple;'>do</span> |<span style='text-decoration: underline;'>item</span>|
+    <span style='color: #005fd7;'>process_item</span>(<span style='text-decoration: underline;'>item</span><span style='color: #4e4e4e;'>,</span> <span>context</span><span style='color: #4e4e4e;'>)</span>
+  <span style='color: purple;'>end</span>
+<span style='color: purple;'>end</span>
+
+<span>item</span> <span style='font-weight: bold;color: #4e4e4e;'>=</span> <span style='font-weight: bold;color: #875f00;'>5</span>
+<span>list</span> <span style='font-weight: bold;color: #4e4e4e;'>=</span> [<span>item</span><span style='color: #4e4e4e;'>]</span>
+</pre>
+
+### Language Injection
+
+Some source files contain code written in multiple different languages. Examples include:
+* HTML files, which can contain JavaScript inside of `<script>` tags and CSS inside of `<style>` tags
+* [ERB](https://en.wikipedia.org/wiki/ERuby) files, which contain Ruby inside of `<% %>` tags, and HTML outside of those tags
+* PHP files, which can contain  HTML between the `<php` tags
+* JavaScript files, which contain regular expression syntax within regex literals
+* Ruby, which can contain snippets of code inside of heredoc literals, where the heredoc delimiter often indicates the language
+
+All of these examples can be modeled in terms of a *parent* syntax tree and one or more *injected* syntax trees, which reside *inside* of certain nodes in the parent tree. The language injection query allows you to specify these "injections" using the following captures:
+
+* `@injection.content` - indicates that the captured node should have its contents re-parsed using another language.
+* `@injection.language` - indicates that the captured node's text may contain the *name* of a language that should be used to re-parse the `@injection.content`.
+
+The language injection behavior can also be configured by some properties associated with patterns:
+
+* `injection.language` - can be used to hard-code the name of a specific language.
+* `injection.combined` - indicates that *all* of the matching nodes in the tree should have their content parsed as *one* nested document.
+* `injection.include-children` - indicates that the `@injection.content` node's *entire* text should be re-parsed, including the text of its child nodes. By default, child nodes' text will be *excluded* from the injected document.
+
+#### Examples
+
+Consider this ruby code:
+
+```ruby
+system <<-BASH.strip!
+  abc --def | ghi > jkl
+BASH
+```
+
+With this syntax tree:
+
+```
+(program
+  (method_call
+    method: (identifier)
+    arguments: (argument_list
+      (call
+        receiver: (heredoc_beginning)
+        method: (identifier))))
+  (heredoc_body
+    (heredoc_end)))
+```
+
+The following query would specify that the contents of the heredoc should be parsed using a language named "BASH" (because that is the text of the `heredoc_end` node):
+
+```
+(heredoc_body
+  (heredoc_end) @injection.language) @injection.content
+```
+
+You can also force the language using the `#set!` predicate.
+For example, this will force the language to be always `ruby`.
+
+```
+((heredoc_body) @injection.content
+ (#set! injection.language "ruby"))
+```
+
+## Unit Testing
+
+Tree-sitter has a built-in way to verify the results of syntax highlighting. The interface is based on [Sublime Text's system](https://www.sublimetext.com/docs/3/syntax.html#testing) for testing highlighting.
+
+Tests are written as normal source code files that contain specially-formatted *comments* that make assertions about the surrounding syntax highlighting. These files are stored in the `test/highlight` directory in a grammar repository.
+
+Here is an example of a syntax highlighting test for JavaScript:
+
+```js
+var abc = function(d) {
+  // <- keyword
+  //          ^ keyword
+  //               ^ variable.parameter
+  // ^ function
+
+  if (a) {
+  // <- keyword
+  // ^ punctuation.bracket
+
+    foo(`foo ${bar}`);
+    // <- function
+    //    ^ string
+    //          ^ variable
+  }
+};
+```
+
+From the Sublime text docs:
+
+> The two types of tests are:
+>
+> **Caret**: ^ this will test the following selector against the scope on the most recent non-test line. It will test it at the same column the ^ is in. Consecutive ^s will test each column against the selector.
+>
+> **Arrow**: <- this will test the following selector against the scope on the most recent non-test line. It will test it at the same column as the comment character is in.
diff --git a/docs/section-4-implementation.md b/docs/section-5-implementation.md
similarity index 100%
rename from docs/section-4-implementation.md
rename to docs/section-5-implementation.md
diff --git a/docs/section-5-contributing.md b/docs/section-6-contributing.md
similarity index 65%
rename from docs/section-5-contributing.md
rename to docs/section-6-contributing.md
index cf95967e..4ccaddea 100644
--- a/docs/section-5-contributing.md
+++ b/docs/section-6-contributing.md
@@ -18,7 +18,7 @@ To make changes to Tree-sitter, you should have:
 1. A C compiler, for compiling the core library and the generated parsers.
 2. A [Rust toolchain](https://rustup.rs/), for compiling the Rust bindings, the highlighting library, and the CLI.
 3. Node.js and NPM, for generating parsers from `grammar.js` files.
-4. Either [Docker](https://www.docker.com/) or [Emscripten](https://emscripten.org/), for compiling the library to WASM.
+4. Either [Emscripten](https://emscripten.org/) or [Docker](https://www.docker.com/), for compiling the library to WASM.
 
 ### Building
 
@@ -29,13 +29,7 @@ git clone https://github.com/tree-sitter/tree-sitter
 cd tree-sitter
 ```
 
-Clone the [`utf8proc`](https://juliastrings.github.io/utf8proc/) submodule:
-
-```
-git submodule update --init
-```
-
-Build the WASM library. We do this first because it gets embedded in the CLI to enable the `web-ui` command. If you have emscripten installed, this will use your `emcc` compiler. Otherwise, it will use Docker:
+Optionally, build the WASM library. If you skip this step, then the `tree-sitter web-ui` command will require an internet connection. If you have emscripten installed, this will use your `emcc` compiler. Otherwise, it will use Docker:
 
 ```sh
 ./script/build-wasm
@@ -92,7 +86,7 @@ You can run the tests under the debugger (either `lldb` or `gdb`) using the `-g`
 script/test test_does_something -g
 ```
 
-Part of the Tree-sitter test suite involves parsing the *corpus* tests for several different languages and performing randomized edits to each example in the corpus. If you just want to run the tests for a particular *language*, you can pass the `-l` flag. And if you want to run a particular *example* from the corpus, you can pass the `-e` flag:
+Part of the Tree-sitter test suite involves parsing the _corpus_ tests for several different languages and performing randomized edits to each example in the corpus. If you just want to run the tests for a particular _language_, you can pass the `-l` flag. And if you want to run a particular _example_ from the corpus, you can pass the `-e` flag:
 
 ```sh
 script/test -l javascript -e Arrays
@@ -102,18 +96,18 @@ script/test -l javascript -e Arrays
 
 The main [`tree-sitter/tree-sitter`](https://github.com/tree-sitter/tree-sitter) repository contains the source code for several packages that are published to package registries for different languages:
 
-* Rust crates on [crates.io](https://crates.io):
-  * [`tree-sitter`](https://crates.io/crates/tree-sitter) - A Rust binding to the core library
-  * [`tree-sitter-highlight`](https://crates.io/crates/tree-sitter-highlight) - The syntax-highlighting library
-  * [`tree-sitter-cli`](https://crates.io/crates/tree-sitter-cli) - The command-line tool
-* JavaScript modules on [npmjs.com](https://npmjs.com):
-  * [`web-tree-sitter`](https://www.npmjs.com/package/web-tree-sitter) - A WASM-based JavaScript binding to the core library
-  * [`tree-sitter-cli`](https://www.npmjs.com/package/tree-sitter-cli) - The command-line tool
+- Rust crates on [crates.io](https://crates.io):
+  - [`tree-sitter`](https://crates.io/crates/tree-sitter) - A Rust binding to the core library
+  - [`tree-sitter-highlight`](https://crates.io/crates/tree-sitter-highlight) - The syntax-highlighting library
+  - [`tree-sitter-cli`](https://crates.io/crates/tree-sitter-cli) - The command-line tool
+- JavaScript modules on [npmjs.com](https://npmjs.com):
+  - [`web-tree-sitter`](https://www.npmjs.com/package/web-tree-sitter) - A WASM-based JavaScript binding to the core library
+  - [`tree-sitter-cli`](https://www.npmjs.com/package/tree-sitter-cli) - The command-line tool
 
 There are also several other dependent repositories that contain other published packages:
 
-* [`tree-sitter/node-tree-sitter`](https://github.com/tree-sitter/py-tree-sitter) - Node.js bindings to the core library, published as [`tree-sitter`](https://www.npmjs.com/package/tree-sitter) on npmjs.com
-* [`tree-sitter/py-tree-sitter`](https://github.com/tree-sitter/py-tree-sitter) - Python bindings to the core library, published as [`tree-sitter`](https://pypi.org/project/tree-sitter) on [PyPI.org](https://pypi.org).
+- [`tree-sitter/node-tree-sitter`](https://github.com/tree-sitter/py-tree-sitter) - Node.js bindings to the core library, published as [`tree-sitter`](https://www.npmjs.com/package/tree-sitter) on npmjs.com
+- [`tree-sitter/py-tree-sitter`](https://github.com/tree-sitter/py-tree-sitter) - Python bindings to the core library, published as [`tree-sitter`](https://pypi.org/project/tree-sitter) on [PyPI.org](https://pypi.org).
 
 ## Publishing New Releases
 
@@ -121,31 +115,31 @@ Publishing a new release of the CLI requires these steps:
 
 1. Commit and push all outstanding changes and verify that CI passes:
 
-    ```sh
-    git commit -m "Fix things"
-    git push
-    ```
+   ```sh
+   git commit -m "Fix things"
+   git push
+   ```
 
 2. Create a new tag:
 
-    ```sh
-    script/version patch
-    ```
+   ```sh
+   script/version patch
+   ```
 
-    This will determine the current version, increment the *patch* version number, and update the `Cargo.toml` and `package.json` files for the Rust and Node CLI packages. It will then create a commit and a tag for the new version. For more information about the arguments that are allowed, see the documentation for the [`npm version`](https://docs.npmjs.com/cli/version) command.
+   This will determine the current version, increment the _patch_ version number, and update the `Cargo.toml` and `package.json` files for the Rust and Node CLI packages. It will then create a commit and a tag for the new version. For more information about the arguments that are allowed, see the documentation for the [`npm version`](https://docs.npmjs.com/cli/version) command.
 
 3. Push the commit and the tag:
 
-    ```sh
-    git push
-    git push --tags
-    ```
+   ```sh
+   git push
+   git push --tags
+   ```
 
 4. Wait for CI to pass. Because of the git tag, the CI jobs will publish artifacts to [a GitHub release](https://github.com/tree-sitter/tree-sitter/releases). The npm module of `tree-sitter-cli` works by downloading the appropriate binary from the corresponding GitHub release during installation. So it's best not to publish the npm package until the binaries are uploaded.
 
 5. Publish the npm package:
 
-    ```sh
-    cd cli/npm
-    npm publish
-    ```
+   ```sh
+   cd cli/npm
+   npm publish
+   ```
diff --git a/docs/section-6-playground.html b/docs/section-7-playground.html
similarity index 84%
rename from docs/section-6-playground.html
rename to docs/section-7-playground.html
index 93d68867..3c6b90db 100644
--- a/docs/section-6-playground.html
+++ b/docs/section-7-playground.html
@@ -16,24 +16,35 @@ permalink: playground
   <option value="bash">Bash</option>
   <option value="c">C</option>
   <option value="cpp">C++</option>
+  <option value="c_sharp">C#</option>
   <option value="go">Go</option>
   <option value="html">HTML</option>
   <option value="java">Java</option>
   <option value="javascript" selected="selected">JavaScript</option>
+  <option value="php">PHP</option>
   <option value="python">Python</option>
   <option value="ruby">Ruby</option>
   <option value="rust">Rust</option>
   <option value="toml">TOML</option>
   <option value="typescript">TypeScript</option>
   <option value="yaml">YAML</option>
+  <option value="ql">QL</option>
 </select>
 
 <input id="logging-checkbox" type="checkbox"></input>
 <label for="logging-checkbox">Log</label>
 
+<input id="query-checkbox" type="checkbox"></input>
+<label for="query-checkbox">Query</label>
+
 <textarea id="code-input">
 </textarea>
 
+<div id="query-container" style="visibility: hidden; position: absolute;">
+  <h4>Query</h4>
+  <textarea id="query-input"></textarea>
+</div>
+
 <h4>Tree</h4>
 <span id="update-time"></span>
 <div id="output-container-scroll">
diff --git a/highlight/Cargo.toml b/highlight/Cargo.toml
index b0d32c02..7f8fc04a 100644
--- a/highlight/Cargo.toml
+++ b/highlight/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "tree-sitter-highlight"
 description = "Library for performing syntax highlighting with Tree-sitter"
-version = "0.1.6"
+version = "0.3.0"
 authors = [
   "Max Brunsfeld <maxbrunsfeld@gmail.com>",
   "Tim Clem <timothy.clem@gmail.com>"
@@ -18,9 +18,6 @@ crate-type = ["lib", "staticlib"]
 
 [dependencies]
 regex = "1"
-serde = "1.0"
-serde_json = "1.0"
-serde_derive = "1.0"
 
 [dependencies.tree-sitter]
 version = ">= 0.3.7"
diff --git a/highlight/README.md b/highlight/README.md
index b6b311cc..07edc421 100644
--- a/highlight/README.md
+++ b/highlight/README.md
@@ -1,4 +1,4 @@
-Tree-sitter Highlighting
+Tree-sitter Highlight
 =========================
 
 [![Build Status](https://travis-ci.org/tree-sitter/tree-sitter.svg?branch=master)](https://travis-ci.org/tree-sitter/tree-sitter)
@@ -14,42 +14,94 @@ extern "C" tree_sitter_html();
 extern "C" tree_sitter_javascript();
 ```
 
-Load some *property sheets*:
+Define the list of highlight names that you will recognize:
 
 ```rust
-use tree_sitter_highlight::load_property_sheet;
+let highlight_names : Vec<String> = [
+    "attribute",
+    "constant",
+    "function.builtin",
+    "function",
+    "keyword",
+    "operator",
+    "property",
+    "punctuation",
+    "punctuation.bracket",
+    "punctuation.delimiter",
+    "string",
+    "string.special",
+    "tag",
+    "type",
+    "type.builtin",
+    "variable",
+    "variable.builtin",
+    "variable.parameter",
+]
+.iter()
+.cloned()
+.map(String::from)
+.collect();
+```
 
-let javascript_property_sheet = load_property_sheet(
-  fs::read_to_string("./tree-sitter-javascript/src/highlights.json").unwrap()
+Create a highlighter. You need one of these for each thread that you're using for syntax highlighting:
+
+```rust
+use tree_sitter_highlight::Highlighter;
+
+let highlighter = Highlighter::new();
+```
+
+Load some highlighting queries from the `queries` directory of some language repositories:
+
+```rust
+use tree_sitter_highlight::HighlightConfiguration;
+
+let html_language = unsafe { tree_sitter_html() };
+let javascript_language = unsafe { tree_sitter_javascript() };
+
+let html_config = HighlightConfiguration::new(
+    html_language,
+    &fs::read_to_string("./tree-sitter-html/queries/highlights.scm").unwrap(),
+    &fs::read_to_string("./tree-sitter-html/queries/injections.scm").unwrap(),
+    "",
 ).unwrap();
 
-let html_property_sheet = load_property_sheet(
-  fs::read_to_string("./tree-sitter-html/src/highlights.json").unwrap()
+let javascript_config = HighlightConfiguration::new(
+    javascript_language,
+    &fs::read_to_string("./tree-sitter-javascript/queries/highlights.scm").unwrap(),
+    &fs::read_to_string("./tree-sitter-javascript/queries/injections.scm").unwrap(),
+    &fs::read_to_string("./tree-sitter-javascript/queries/locals.scm").unwrap(),
 ).unwrap();
 ```
 
+Configure the recognized names:
+
+```rust
+javascript_config.configure(&highlight_names);
+```
+
 Highlight some code:
 
 ```rust
-use tree_sitter_highlight::{highlight, HighlightEvent};
+use tree_sitter_highlight::HighlightEvent;
 
-let highlights = highlight(
+let highlights = highlighter.highlight(
+    &javascript_config,
     b"const x = new Y();",
-    unsafe { tree_sitter_javascript() },
-    &javascript_property_sheet,
-    &|_| None
+    None,
+    |_| None
 ).unwrap();
 
 for event in highlights {
-    match event {
-        HighlightEvent::Source(s) {
-            eprintln!("source: {:?}", s);
+    match event.unwrap() {
+        HighlightEvent::Source {start, end} => {
+            eprintln!("source: {}-{}", start, end);
         },
-        HighlightEvent::ScopeStart(s) {
-            eprintln!("scope started: {:?}", s);
+        HighlightEvent::HighlightStart(s) => {
+            eprintln!("highlight style started: {:?}", s);
         },
-        HighlightEvent::ScopeEnd(s) {
-            eprintln!("scope ended: {:?}", s);
+        HighlightEvent::HighlightEnd => {
+            eprintln!("highlight style ended");
         },
     }
 }
diff --git a/highlight/include/tree_sitter/highlight.h b/highlight/include/tree_sitter/highlight.h
index 8e879b5e..496faea4 100644
--- a/highlight/include/tree_sitter/highlight.h
+++ b/highlight/include/tree_sitter/highlight.h
@@ -12,49 +12,20 @@ typedef enum {
   TSHighlightUnknownScope,
   TSHighlightTimeout,
   TSHighlightInvalidLanguage,
+  TSHighlightInvalidUtf8,
+  TSHighlightInvalidRegex,
+  TSHighlightInvalidQuery,
 } TSHighlightError;
 
-// The list of scopes which can be styled for syntax highlighting.
-// When constructing a `TSHighlighter`, you need to construct an
-// `attribute_strings` array whose elements correspond to these values.
-enum TSHighlightValue {
-  TSHighlightValueAttribute,
-  TSHighlightValueComment,
-  TSHighlightValueConstant,
-  TSHighlightValueConstantBuiltin,
-  TSHighlightValueConstructor,
-  TSHighlightValueConstructorBuiltin,
-  TSHighlightValueEmbedded,
-  TSHighlightValueEscape,
-  TSHighlightValueFunction,
-  TSHighlightValueFunctionBuiltin,
-  TSHighlightValueKeyword,
-  TSHighlightValueNumber,
-  TSHighlightValueOperator,
-  TSHighlightValueProperty,
-  TSHighlightValuePropertyBuiltin,
-  TSHighlightValuePunctuation,
-  TSHighlightValuePunctuationBracket,
-  TSHighlightValuePunctuationDelimiter,
-  TSHighlightValuePunctuationSpecial,
-  TSHighlightValueString,
-  TSHighlightValueStringSpecial,
-  TSHighlightValueTag,
-  TSHighlightValueType,
-  TSHighlightValueTypeBuiltin,
-  TSHighlightValueVariable,
-  TSHighlightValueVariableBuiltin,
-  TSHighlightValueVariableParameter,
-  TSHighlightValueUnknown,
-};
-
 typedef struct TSHighlighter TSHighlighter;
 typedef struct TSHighlightBuffer TSHighlightBuffer;
 
 // Construct a `TSHighlighter` by providing a list of strings containing
 // the HTML attributes that should be applied for each highlight value.
 TSHighlighter *ts_highlighter_new(
-  const char **attribute_strings
+  const char **highlight_names,
+  const char **attribute_strings,
+  uint32_t highlight_count
 );
 
 // Delete a syntax highlighter.
@@ -67,17 +38,22 @@ void ts_highlighter_delete(TSHighlighter *);
 // with that language. You can also optionally provide an 'injection regex',
 // which is used to detect when this language has been embedded in a document
 // written in a different language.
-int ts_highlighter_add_language(
+TSHighlightError ts_highlighter_add_language(
   TSHighlighter *self,
   const char *scope_name,
+  const char *injection_regex,
   const TSLanguage *language,
-  const char *property_sheet_json,
-  const char *injection_regex
+  const char *highlight_query,
+  const char *injection_query,
+  const char *locals_query,
+  uint32_t highlight_query_len,
+  uint32_t injection_query_len,
+  uint32_t locals_query_len
 );
 
 // Compute syntax highlighting for a given document. You must first
 // create a `TSHighlightBuffer` to hold the output.
-int ts_highlighter_highlight(
+TSHighlightError ts_highlighter_highlight(
   const TSHighlighter *self,
   const char *scope_name,
   const char *source_code,
diff --git a/highlight/src/c_lib.rs b/highlight/src/c_lib.rs
index 063ab990..334b8db0 100644
--- a/highlight/src/c_lib.rs
+++ b/highlight/src/c_lib.rs
@@ -1,25 +1,24 @@
-use super::{load_property_sheet, Error, Highlight, Highlighter, HtmlRenderer, Properties};
+use super::{Error, Highlight, HighlightConfiguration, Highlighter, HtmlRenderer};
 use regex::Regex;
 use std::collections::HashMap;
 use std::ffi::CStr;
 use std::os::raw::c_char;
 use std::process::abort;
 use std::sync::atomic::AtomicUsize;
-use std::{fmt, slice};
-use tree_sitter::{Language, PropertySheet};
-
-struct LanguageConfiguration {
-    language: Language,
-    property_sheet: PropertySheet<Properties>,
-    injection_regex: Option<Regex>,
-}
+use std::{fmt, slice, str};
+use tree_sitter::Language;
 
 pub struct TSHighlighter {
-    languages: HashMap<String, LanguageConfiguration>,
+    languages: HashMap<String, (Option<Regex>, HighlightConfiguration)>,
     attribute_strings: Vec<&'static [u8]>,
+    highlight_names: Vec<String>,
+    carriage_return_index: Option<usize>,
 }
 
-pub struct TSHighlightBuffer(HtmlRenderer);
+pub struct TSHighlightBuffer {
+    highlighter: Highlighter,
+    renderer: HtmlRenderer,
+}
 
 #[repr(C)]
 pub enum ErrorCode {
@@ -27,33 +26,110 @@ pub enum ErrorCode {
     UnknownScope,
     Timeout,
     InvalidLanguage,
+    InvalidUtf8,
+    InvalidRegex,
+    InvalidQuery,
 }
 
 #[no_mangle]
 pub extern "C" fn ts_highlighter_new(
+    highlight_names: *const *const c_char,
     attribute_strings: *const *const c_char,
+    highlight_count: u32,
 ) -> *mut TSHighlighter {
+    let highlight_names =
+        unsafe { slice::from_raw_parts(highlight_names, highlight_count as usize) };
     let attribute_strings =
-        unsafe { slice::from_raw_parts(attribute_strings, Highlight::Unknown as usize + 1) };
+        unsafe { slice::from_raw_parts(attribute_strings, highlight_count as usize) };
+    let highlight_names = highlight_names
+        .into_iter()
+        .map(|s| unsafe { CStr::from_ptr(*s).to_string_lossy().to_string() })
+        .collect::<Vec<_>>();
     let attribute_strings = attribute_strings
         .into_iter()
-        .map(|s| {
-            if s.is_null() {
-                &[]
-            } else {
-                unsafe { CStr::from_ptr(*s).to_bytes() }
-            }
-        })
+        .map(|s| unsafe { CStr::from_ptr(*s).to_bytes() })
         .collect();
+    let carriage_return_index = highlight_names.iter().position(|s| s == "carriage-return");
     Box::into_raw(Box::new(TSHighlighter {
         languages: HashMap::new(),
         attribute_strings,
+        highlight_names,
+        carriage_return_index,
     }))
 }
 
+#[no_mangle]
+pub extern "C" fn ts_highlighter_add_language(
+    this: *mut TSHighlighter,
+    scope_name: *const c_char,
+    injection_regex: *const c_char,
+    language: Language,
+    highlight_query: *const c_char,
+    injection_query: *const c_char,
+    locals_query: *const c_char,
+    highlight_query_len: u32,
+    injection_query_len: u32,
+    locals_query_len: u32,
+) -> ErrorCode {
+    let f = move || {
+        let this = unwrap_mut_ptr(this);
+        let scope_name = unsafe { CStr::from_ptr(scope_name) };
+        let scope_name = scope_name
+            .to_str()
+            .or(Err(ErrorCode::InvalidUtf8))?
+            .to_string();
+        let injection_regex = if injection_regex.is_null() {
+            None
+        } else {
+            let pattern = unsafe { CStr::from_ptr(injection_regex) };
+            let pattern = pattern.to_str().or(Err(ErrorCode::InvalidUtf8))?;
+            Some(Regex::new(pattern).or(Err(ErrorCode::InvalidRegex))?)
+        };
+
+        let highlight_query = unsafe {
+            slice::from_raw_parts(highlight_query as *const u8, highlight_query_len as usize)
+        };
+        let highlight_query = str::from_utf8(highlight_query).or(Err(ErrorCode::InvalidUtf8))?;
+
+        let injection_query = if injection_query_len > 0 {
+            let query = unsafe {
+                slice::from_raw_parts(injection_query as *const u8, injection_query_len as usize)
+            };
+            str::from_utf8(query).or(Err(ErrorCode::InvalidUtf8))?
+        } else {
+            ""
+        };
+
+        let locals_query = if locals_query_len > 0 {
+            let query = unsafe {
+                slice::from_raw_parts(locals_query as *const u8, locals_query_len as usize)
+            };
+            str::from_utf8(query).or(Err(ErrorCode::InvalidUtf8))?
+        } else {
+            ""
+        };
+
+        let mut config =
+            HighlightConfiguration::new(language, highlight_query, injection_query, locals_query)
+                .or(Err(ErrorCode::InvalidQuery))?;
+        config.configure(&this.highlight_names);
+        this.languages.insert(scope_name, (injection_regex, config));
+
+        Ok(())
+    };
+
+    match f() {
+        Ok(()) => ErrorCode::Ok,
+        Err(e) => e,
+    }
+}
+
 #[no_mangle]
 pub extern "C" fn ts_highlight_buffer_new() -> *mut TSHighlightBuffer {
-    Box::into_raw(Box::new(TSHighlightBuffer(HtmlRenderer::new())))
+    Box::into_raw(Box::new(TSHighlightBuffer {
+        highlighter: Highlighter::new(),
+        renderer: HtmlRenderer::new(),
+    }))
 }
 
 #[no_mangle]
@@ -69,59 +145,25 @@ pub extern "C" fn ts_highlight_buffer_delete(this: *mut TSHighlightBuffer) {
 #[no_mangle]
 pub extern "C" fn ts_highlight_buffer_content(this: *const TSHighlightBuffer) -> *const u8 {
     let this = unwrap_ptr(this);
-    this.0.html.as_slice().as_ptr()
+    this.renderer.html.as_slice().as_ptr()
 }
 
 #[no_mangle]
 pub extern "C" fn ts_highlight_buffer_line_offsets(this: *const TSHighlightBuffer) -> *const u32 {
     let this = unwrap_ptr(this);
-    this.0.line_offsets.as_slice().as_ptr()
+    this.renderer.line_offsets.as_slice().as_ptr()
 }
 
 #[no_mangle]
 pub extern "C" fn ts_highlight_buffer_len(this: *const TSHighlightBuffer) -> u32 {
     let this = unwrap_ptr(this);
-    this.0.html.len() as u32
+    this.renderer.html.len() as u32
 }
 
 #[no_mangle]
 pub extern "C" fn ts_highlight_buffer_line_count(this: *const TSHighlightBuffer) -> u32 {
     let this = unwrap_ptr(this);
-    this.0.line_offsets.len() as u32
-}
-
-#[no_mangle]
-pub extern "C" fn ts_highlighter_add_language(
-    this: *mut TSHighlighter,
-    scope_name: *const c_char,
-    language: Language,
-    property_sheet_json: *const c_char,
-    injection_regex: *const c_char,
-) -> ErrorCode {
-    let this = unwrap_mut_ptr(this);
-    let scope_name = unsafe { CStr::from_ptr(scope_name) };
-    let scope_name = unwrap(scope_name.to_str()).to_string();
-    let property_sheet_json = unsafe { CStr::from_ptr(property_sheet_json) };
-    let property_sheet_json = unwrap(property_sheet_json.to_str());
-
-    let property_sheet = unwrap(load_property_sheet(language, property_sheet_json));
-    let injection_regex = if injection_regex.is_null() {
-        None
-    } else {
-        let pattern = unsafe { CStr::from_ptr(injection_regex) };
-        Some(unwrap(Regex::new(unwrap(pattern.to_str()))))
-    };
-
-    this.languages.insert(
-        scope_name,
-        LanguageConfiguration {
-            language,
-            property_sheet,
-            injection_regex,
-        },
-    );
-
-    ErrorCode::Ok
+    this.renderer.line_offsets.len() as u32
 }
 
 #[no_mangle]
@@ -150,36 +192,38 @@ impl TSHighlighter {
         output: &mut TSHighlightBuffer,
         cancellation_flag: Option<&AtomicUsize>,
     ) -> ErrorCode {
-        let configuration = self.languages.get(scope_name);
-        if configuration.is_none() {
+        let entry = self.languages.get(scope_name);
+        if entry.is_none() {
             return ErrorCode::UnknownScope;
         }
-        let configuration = configuration.unwrap();
+        let (_, configuration) = entry.unwrap();
         let languages = &self.languages;
 
-        let highlighter = Highlighter::new(
+        let highlights = output.highlighter.highlight(
+            configuration,
             source_code,
-            configuration.language,
-            &configuration.property_sheet,
-            |injection_string| {
-                languages.values().find_map(|conf| {
-                    conf.injection_regex.as_ref().and_then(|regex| {
+            cancellation_flag,
+            move |injection_string| {
+                languages.values().find_map(|(injection_regex, config)| {
+                    injection_regex.as_ref().and_then(|regex| {
                         if regex.is_match(injection_string) {
-                            Some((conf.language, &conf.property_sheet))
+                            Some(config)
                         } else {
                             None
                         }
                     })
                 })
             },
-            cancellation_flag,
         );
 
-        if let Ok(highlighter) = highlighter {
-            output.0.reset();
-            let result = output.0.render(highlighter, source_code, &|s| {
-                self.attribute_strings[s as usize]
-            });
+        if let Ok(highlights) = highlights {
+            output.renderer.reset();
+            output
+                .renderer
+                .set_carriage_return_highlight(self.carriage_return_index.map(Highlight));
+            let result = output
+                .renderer
+                .render(highlights, source_code, &|s| self.attribute_strings[s.0]);
             match result {
                 Err(Error::Cancelled) => {
                     return ErrorCode::Timeout;
diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs
index 477a640d..0f48847b 100644
--- a/highlight/src/lib.rs
+++ b/highlight/src/lib.rs
@@ -1,16 +1,23 @@
 pub mod c_lib;
 pub mod util;
-
 pub use c_lib as c;
-use serde::{Deserialize, Deserializer, Serialize, Serializer};
-use serde_derive::*;
-use std::mem::transmute;
+
 use std::sync::atomic::{AtomicUsize, Ordering};
-use std::{cmp, fmt, str, usize};
-use tree_sitter::{Language, Node, Parser, Point, PropertySheet, Range, Tree, TreePropertyCursor};
+use std::{iter, mem, ops, str, usize};
+use tree_sitter::{
+    Language, LossyUtf8, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError,
+    QueryMatch, Range, Tree,
+};
 
 const CANCELLATION_CHECK_INTERVAL: usize = 100;
+const BUFFER_HTML_RESERVE_CAPACITY: usize = 10 * 1024;
+const BUFFER_LINES_RESERVE_CAPACITY: usize = 1000;
 
+/// Indicates which highlight should be applied to a region of source code.
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub struct Highlight(pub usize);
+
+/// Represents the reason why syntax highlighting failed.
 #[derive(Debug, PartialEq, Eq)]
 pub enum Error {
     Cancelled,
@@ -18,107 +25,7 @@ pub enum Error {
     Unknown,
 }
 
-#[derive(Debug)]
-enum TreeStep {
-    Child {
-        index: isize,
-        kinds: Option<Vec<u16>>,
-    },
-    Children {
-        kinds: Option<Vec<u16>>,
-    },
-    Next {
-        kinds: Option<Vec<u16>>,
-    },
-}
-
-#[derive(Debug)]
-enum InjectionLanguage {
-    Literal(String),
-    TreePath(Vec<TreeStep>),
-}
-
-#[derive(Debug)]
-struct Injection {
-    language: InjectionLanguage,
-    content: Vec<TreeStep>,
-    includes_children: bool,
-}
-
-#[derive(Debug)]
-pub struct Properties {
-    highlight: Option<Highlight>,
-    highlight_nonlocal: Option<Highlight>,
-    injections: Vec<Injection>,
-    local_scope: Option<bool>,
-    local_definition: bool,
-    local_reference: bool,
-}
-
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
-#[repr(u16)]
-pub enum Highlight {
-    Attribute,
-    Comment,
-    Constant,
-    ConstantBuiltin,
-    Constructor,
-    ConstructorBuiltin,
-    Embedded,
-    Escape,
-    Function,
-    FunctionBuiltin,
-    Keyword,
-    Number,
-    Operator,
-    Property,
-    PropertyBuiltin,
-    Punctuation,
-    PunctuationBracket,
-    PunctuationDelimiter,
-    PunctuationSpecial,
-    String,
-    StringSpecial,
-    Tag,
-    Type,
-    TypeBuiltin,
-    Variable,
-    VariableBuiltin,
-    VariableParameter,
-    Unknown,
-}
-
-#[derive(Debug)]
-struct Scope<'a> {
-    inherits: bool,
-    local_defs: Vec<(&'a str, Highlight)>,
-}
-
-struct Layer<'a> {
-    _tree: Tree,
-    cursor: TreePropertyCursor<'a, Properties>,
-    ranges: Vec<Range>,
-    at_node_end: bool,
-    depth: usize,
-    opaque: bool,
-    scope_stack: Vec<Scope<'a>>,
-    local_highlight: Option<Highlight>,
-}
-
-struct Highlighter<'a, T>
-where
-    T: Fn(&str) -> Option<(Language, &'a PropertySheet<Properties>)>,
-{
-    injection_callback: T,
-    source: &'a [u8],
-    source_offset: usize,
-    parser: Parser,
-    layers: Vec<Layer<'a>>,
-    max_opaque_layer_depth: usize,
-    operation_count: usize,
-    cancellation_flag: Option<&'a AtomicUsize>,
-}
-
+/// Represents a single step in rendering a syntax-highlighted document.
 #[derive(Copy, Clone, Debug)]
 pub enum HighlightEvent {
     Source { start: usize, end: usize },
@@ -126,446 +33,405 @@ pub enum HighlightEvent {
     HighlightEnd,
 }
 
-#[derive(Debug, Deserialize)]
-#[serde(untagged)]
-enum TreePathArgJSON {
-    TreePath(TreePathJSON),
-    Number(isize),
-    String(String),
+/// Contains the data neeeded to higlight code written in a particular language.
+///
+/// This struct is immutable and can be shared between threads.
+pub struct HighlightConfiguration {
+    pub language: Language,
+    pub query: Query,
+    combined_injections_query: Option<Query>,
+    locals_pattern_index: usize,
+    highlights_pattern_index: usize,
+    highlight_indices: Vec<Option<Highlight>>,
+    non_local_variable_patterns: Vec<bool>,
+    injection_content_capture_index: Option<u32>,
+    injection_language_capture_index: Option<u32>,
+    local_scope_capture_index: Option<u32>,
+    local_def_capture_index: Option<u32>,
+    local_def_value_capture_index: Option<u32>,
+    local_ref_capture_index: Option<u32>,
 }
 
-#[derive(Debug, Deserialize)]
-#[serde(tag = "name")]
-enum TreePathJSON {
-    #[serde(rename = "this")]
-    This,
-    #[serde(rename = "child")]
-    Child { args: Vec<TreePathArgJSON> },
-    #[serde(rename = "next")]
-    Next { args: Vec<TreePathArgJSON> },
-    #[serde(rename = "children")]
-    Children { args: Vec<TreePathArgJSON> },
+/// Performs syntax highlighting, recognizing a given list of highlight names.
+///
+/// For the best performance `Highlighter` values should be reused between
+/// syntax highlighting calls. A separate highlighter is needed for each thread that
+/// is performing highlighting.
+pub struct Highlighter {
+    parser: Parser,
+    cursors: Vec<QueryCursor>,
 }
 
-#[derive(Debug, Deserialize)]
-#[serde(untagged)]
-enum InjectionLanguageJSON {
-    List(Vec<InjectionLanguageJSON>),
-    TreePath(TreePathJSON),
-    Literal(String),
-}
-
-#[derive(Debug, Deserialize)]
-#[serde(untagged)]
-enum InjectionContentJSON {
-    List(Vec<InjectionContentJSON>),
-    TreePath(TreePathJSON),
-}
-
-#[derive(Debug, Deserialize)]
-#[serde(untagged)]
-enum InjectionIncludesChildrenJSON {
-    List(Vec<bool>),
-    Single(bool),
-}
-
-#[derive(Debug, Deserialize)]
-struct PropertiesJSON {
-    highlight: Option<Highlight>,
-    #[serde(rename = "highlight-nonlocal")]
-    highlight_nonlocal: Option<Highlight>,
-
-    #[serde(rename = "injection-language")]
-    injection_language: Option<InjectionLanguageJSON>,
-    #[serde(rename = "injection-content")]
-    injection_content: Option<InjectionContentJSON>,
-    #[serde(default, rename = "injection-includes-children")]
-    injection_includes_children: Option<InjectionIncludesChildrenJSON>,
-
-    #[serde(default, rename = "local-scope")]
-    local_scope: bool,
-    #[serde(default, rename = "local-scope-inherit")]
-    local_scope_inherit: bool,
-    #[serde(default, rename = "local-definition")]
-    local_definition: bool,
-    #[serde(default, rename = "local-reference")]
-    local_reference: bool,
+/// Converts a general-purpose syntax highlighting iterator into a sequence of lines of HTML.
+pub struct HtmlRenderer {
+    pub html: Vec<u8>,
+    pub line_offsets: Vec<u32>,
+    carriage_return_highlight: Option<Highlight>,
 }
 
 #[derive(Debug)]
-pub enum PropertySheetError {
-    InvalidJSON(serde_json::Error),
-    InvalidRegex(regex::Error),
-    InvalidFormat(String),
+struct LocalDef<'a> {
+    name: &'a str,
+    value_range: ops::Range<usize>,
+    highlight: Option<Highlight>,
 }
 
-impl fmt::Display for Error {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            Error::Cancelled => write!(f, "Cancelled"),
-            Error::InvalidLanguage => write!(f, "Invalid language"),
-            Error::Unknown => write!(f, "Unknown error"),
+#[derive(Debug)]
+struct LocalScope<'a> {
+    inherits: bool,
+    range: ops::Range<usize>,
+    local_defs: Vec<LocalDef<'a>>,
+}
+
+struct HighlightIter<'a, F>
+where
+    F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
+{
+    source: &'a [u8],
+    byte_offset: usize,
+    highlighter: &'a mut Highlighter,
+    injection_callback: F,
+    cancellation_flag: Option<&'a AtomicUsize>,
+    layers: Vec<HighlightIterLayer<'a>>,
+    iter_count: usize,
+    next_event: Option<HighlightEvent>,
+    last_highlight_range: Option<(usize, usize, usize)>,
+}
+
+struct HighlightIterLayer<'a> {
+    _tree: Tree,
+    cursor: QueryCursor,
+    captures: iter::Peekable<QueryCaptures<'a, &'a [u8]>>,
+    config: &'a HighlightConfiguration,
+    highlight_end_stack: Vec<usize>,
+    scope_stack: Vec<LocalScope<'a>>,
+    ranges: Vec<Range>,
+    depth: usize,
+}
+
+impl Highlighter {
+    pub fn new() -> Self {
+        Highlighter {
+            parser: Parser::new(),
+            cursors: Vec::new(),
         }
     }
-}
 
-impl fmt::Display for PropertySheetError {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            PropertySheetError::InvalidJSON(e) => e.fmt(f),
-            PropertySheetError::InvalidRegex(e) => e.fmt(f),
-            PropertySheetError::InvalidFormat(e) => e.fmt(f),
-        }
+    pub fn parser(&mut self) -> &mut Parser {
+        &mut self.parser
     }
-}
 
-impl<'a> fmt::Debug for Layer<'a> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(
-            f,
-            "Layer {{ at_node_end: {}, node: {:?} }}",
-            self.at_node_end,
-            self.cursor.node()
+    /// Iterate over the highlighted regions for a given slice of source code.
+    pub fn highlight<'a>(
+        &'a mut self,
+        config: &'a HighlightConfiguration,
+        source: &'a [u8],
+        cancellation_flag: Option<&'a AtomicUsize>,
+        mut injection_callback: impl FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
+    ) -> Result<impl Iterator<Item = Result<HighlightEvent, Error>> + 'a, Error> {
+        let layers = HighlightIterLayer::new(
+            source,
+            self,
+            cancellation_flag,
+            &mut injection_callback,
+            config,
+            0,
+            vec![Range {
+                start_byte: 0,
+                end_byte: usize::MAX,
+                start_point: Point::new(0, 0),
+                end_point: Point::new(usize::MAX, usize::MAX),
+            }],
         )?;
-        Ok(())
+        assert_ne!(layers.len(), 0);
+        let mut result = HighlightIter {
+            source,
+            byte_offset: 0,
+            injection_callback,
+            cancellation_flag,
+            highlighter: self,
+            iter_count: 0,
+            layers: layers,
+            next_event: None,
+            last_highlight_range: None,
+        };
+        result.sort_layers();
+        Ok(result)
     }
 }
 
-pub fn load_property_sheet(
-    language: Language,
-    json: &str,
-) -> Result<PropertySheet<Properties>, PropertySheetError> {
-    let sheet = PropertySheet::new(language, json).map_err(|e| match e {
-        tree_sitter::PropertySheetError::InvalidJSON(e) => PropertySheetError::InvalidJSON(e),
-        tree_sitter::PropertySheetError::InvalidRegex(e) => PropertySheetError::InvalidRegex(e),
-    })?;
-    let sheet = sheet
-        .map(|p| Properties::new(p, language))
-        .map_err(PropertySheetError::InvalidFormat)?;
-    Ok(sheet)
-}
+impl HighlightConfiguration {
+    /// Creates a `HighlightConfiguration` for a given `Language` and set of highlighting
+    /// queries.
+    ///
+    /// # Parameters
+    ///
+    /// * `language`  - The Tree-sitter `Language` that should be used for parsing.
+    /// * `highlights_query` - A string containing tree patterns for syntax highlighting. This
+    ///   should be non-empty, otherwise no syntax highlights will be added.
+    /// * `injections_query` -  A string containing tree patterns for injecting other languages
+    ///   into the document. This can be empty if no injections are desired.
+    /// * `locals_query` - A string containing tree patterns for tracking local variable
+    ///   definitions and references. This can be empty if local variable tracking is not needed.
+    ///
+    /// Returns a `HighlightConfiguration` that can then be used with the `highlight` method.
+    pub fn new(
+        language: Language,
+        highlights_query: &str,
+        injection_query: &str,
+        locals_query: &str,
+    ) -> Result<Self, QueryError> {
+        // Concatenate the query strings, keeping track of the start offset of each section.
+        let mut query_source = String::new();
+        query_source.push_str(injection_query);
+        let locals_query_offset = query_source.len();
+        query_source.push_str(locals_query);
+        let highlights_query_offset = query_source.len();
+        query_source.push_str(highlights_query);
 
-impl Highlight {
-    pub fn from_usize(i: usize) -> Option<Self> {
-        if i <= (Highlight::Unknown as usize) {
-            Some(unsafe { transmute(i as u16) })
+        // Construct a single query by concatenating the three query strings, but record the
+        // range of pattern indices that belong to each individual string.
+        let mut query = Query::new(language, &query_source)?;
+        let mut locals_pattern_index = 0;
+        let mut highlights_pattern_index = 0;
+        for i in 0..(query.pattern_count()) {
+            let pattern_offset = query.start_byte_for_pattern(i);
+            if pattern_offset < highlights_query_offset {
+                if pattern_offset < highlights_query_offset {
+                    highlights_pattern_index += 1;
+                }
+                if pattern_offset < locals_query_offset {
+                    locals_pattern_index += 1;
+                }
+            }
+        }
+
+        // Construct a separate query just for dealing with the 'combined injections'.
+        // Disable the combined injection patterns in the main query.
+        let mut combined_injections_query = Query::new(language, injection_query)?;
+        let mut has_combined_queries = false;
+        for pattern_index in 0..locals_pattern_index {
+            let settings = query.property_settings(pattern_index);
+            if settings.iter().any(|s| &*s.key == "injection.combined") {
+                has_combined_queries = true;
+                query.disable_pattern(pattern_index);
+            } else {
+                combined_injections_query.disable_pattern(pattern_index);
+            }
+        }
+        let combined_injections_query = if has_combined_queries {
+            Some(combined_injections_query)
         } else {
             None
-        }
-    }
-}
+        };
 
-impl Properties {
-    fn new(json: PropertiesJSON, language: Language) -> Result<Self, String> {
-        let injections = match (json.injection_language, json.injection_content) {
-            (None, None) => Ok(Vec::new()),
-            (Some(_), None) => Err(
-                "Must specify an injection-content along with an injection-language".to_string(),
-            ),
-            (None, Some(_)) => Err(
-                "Must specify an injection-language along with an injection-content".to_string(),
-            ),
-            (Some(language_json), Some(content_json)) => {
-                let languages = match language_json {
-                    InjectionLanguageJSON::List(list) => {
-                        let mut result = Vec::with_capacity(list.len());
-                        for element in list {
-                            result.push(match element {
-                                InjectionLanguageJSON::TreePath(p) => {
-                                    let mut result = Vec::new();
-                                    Self::flatten_tree_path(p, &mut result, language)?;
-                                    InjectionLanguage::TreePath(result)
-                                }
-                                InjectionLanguageJSON::Literal(s) => InjectionLanguage::Literal(s),
-                                InjectionLanguageJSON::List(_) => {
-                                    panic!("Injection-language cannot be a list of lists")
-                                }
-                            })
-                        }
-                        result
-                    }
-                    InjectionLanguageJSON::TreePath(p) => vec![{
-                        let mut result = Vec::new();
-                        Self::flatten_tree_path(p, &mut result, language)?;
-                        InjectionLanguage::TreePath(result)
-                    }],
-                    InjectionLanguageJSON::Literal(s) => vec![InjectionLanguage::Literal(s)],
-                };
+        // Find all of the highlighting patterns that are disabled for nodes that
+        // have been identified as local variables.
+        let non_local_variable_patterns = (0..query.pattern_count())
+            .map(|i| {
+                query
+                    .property_predicates(i)
+                    .iter()
+                    .any(|(prop, positive)| !*positive && prop.key.as_ref() == "local")
+            })
+            .collect();
 
-                let contents = match content_json {
-                    InjectionContentJSON::List(l) => {
-                        let mut result = Vec::with_capacity(l.len());
-                        for element in l {
-                            result.push(match element {
-                                InjectionContentJSON::TreePath(p) => {
-                                    let mut result = Vec::new();
-                                    Self::flatten_tree_path(p, &mut result, language)?;
-                                    result
-                                }
-                                InjectionContentJSON::List(_) => {
-                                    panic!("Injection-content cannot be a list of lists")
-                                }
-                            })
-                        }
-                        result
-                    }
-                    InjectionContentJSON::TreePath(p) => vec![{
-                        let mut result = Vec::new();
-                        Self::flatten_tree_path(p, &mut result, language)?;
-                        result
-                    }],
-                };
-
-                let mut includes_children = match json.injection_includes_children {
-                    Some(InjectionIncludesChildrenJSON::List(v)) => v,
-                    Some(InjectionIncludesChildrenJSON::Single(v)) => vec![v],
-                    None => vec![false],
-                };
-
-                if languages.len() == contents.len() {
-                    includes_children.resize(languages.len(), includes_children[0]);
-                    Ok(languages
-                        .into_iter()
-                        .zip(contents.into_iter())
-                        .zip(includes_children.into_iter())
-                        .map(|((language, content), includes_children)| Injection {
-                            language,
-                            content,
-                            includes_children,
-                        })
-                        .collect())
-                } else {
-                    Err(format!(
-                        "Mismatch: got {} injection-language values but {} injection-content values",
-                        languages.len(),
-                        contents.len(),
-                    ))
-                }
+        // Store the numeric ids for all of the special captures.
+        let mut injection_content_capture_index = None;
+        let mut injection_language_capture_index = None;
+        let mut local_def_capture_index = None;
+        let mut local_def_value_capture_index = None;
+        let mut local_ref_capture_index = None;
+        let mut local_scope_capture_index = None;
+        for (i, name) in query.capture_names().iter().enumerate() {
+            let i = Some(i as u32);
+            match name.as_str() {
+                "injection.content" => injection_content_capture_index = i,
+                "injection.language" => injection_language_capture_index = i,
+                "local.definition" => local_def_capture_index = i,
+                "local.definition-value" => local_def_value_capture_index = i,
+                "local.reference" => local_ref_capture_index = i,
+                "local.scope" => local_scope_capture_index = i,
+                _ => {}
             }
-        }?;
+        }
 
-        Ok(Self {
-            highlight: json.highlight,
-            highlight_nonlocal: json.highlight_nonlocal,
-            local_scope: if json.local_scope {
-                Some(json.local_scope_inherit)
-            } else {
-                None
-            },
-            local_definition: json.local_definition,
-            local_reference: json.local_reference,
-            injections,
+        let highlight_indices = vec![None; query.capture_names().len()];
+        Ok(HighlightConfiguration {
+            language,
+            query,
+            combined_injections_query,
+            locals_pattern_index,
+            highlights_pattern_index,
+            highlight_indices,
+            non_local_variable_patterns,
+            injection_content_capture_index,
+            injection_language_capture_index,
+            local_def_capture_index,
+            local_def_value_capture_index,
+            local_ref_capture_index,
+            local_scope_capture_index,
         })
     }
 
-    // Transform a tree path from the format expressed directly in the property sheet
-    // (nested function calls), to a flat sequence of steps for transforming a list of
-    // nodes. This way, we can evaluate these tree paths with no recursion and a single
-    // vector of intermediate storage.
-    fn flatten_tree_path(
-        p: TreePathJSON,
-        steps: &mut Vec<TreeStep>,
-        language: Language,
-    ) -> Result<(), String> {
-        match p {
-            TreePathJSON::This => {}
-            TreePathJSON::Child { args } => {
-                let (tree_path, index, kinds) = Self::parse_args("child", args, language)?;
-                Self::flatten_tree_path(tree_path, steps, language)?;
-                steps.push(TreeStep::Child {
-                    index: index
-                        .ok_or_else(|| "The `child` function requires an index".to_string())?,
-                    kinds: kinds,
+    /// Get a slice containing all of the highlight names used in the configuration.
+    pub fn names(&self) -> &[String] {
+        self.query.capture_names()
+    }
+
+    /// Set the list of recognized highlight names.
+    ///
+    /// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated
+    /// highlight names like `punctuation.bracket` and `function.method.builtin`. Consumers of
+    /// these queries can choose to recognize highlights with different levels of specificity.
+    /// For example, the string `function.builtin` will match against `function.method.builtin`
+    /// and `function.builtin.constructor`, but will not match `function.method`.
+    ///
+    /// When highlighting, results are returned as `Highlight` values, which contain the index
+    /// of the matched highlight this list of highlight names.
+    pub fn configure(&mut self, recognized_names: &[String]) {
+        let mut capture_parts = Vec::new();
+        self.highlight_indices.clear();
+        self.highlight_indices
+            .extend(self.query.capture_names().iter().map(move |capture_name| {
+                capture_parts.clear();
+                capture_parts.extend(capture_name.split('.'));
+
+                let mut best_index = None;
+                let mut best_match_len = 0;
+                for (i, recognized_name) in recognized_names.iter().enumerate() {
+                    let mut len = 0;
+                    let mut matches = true;
+                    for part in recognized_name.split('.') {
+                        len += 1;
+                        if !capture_parts.contains(&part) {
+                            matches = false;
+                            break;
+                        }
+                    }
+                    if matches && len > best_match_len {
+                        best_index = Some(i);
+                        best_match_len = len;
+                    }
+                }
+                best_index.map(Highlight)
+            }));
+    }
+}
+
+impl<'a> HighlightIterLayer<'a> {
+    /// Create a new 'layer' of highlighting for this document.
+    ///
+    /// In the even that the new layer contains "combined injections" (injections where multiple
+    /// disjoint ranges are parsed as one syntax tree), these will be eagerly processed and
+    /// added to the returned vector.
+    fn new<F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a>(
+        source: &'a [u8],
+        highlighter: &mut Highlighter,
+        cancellation_flag: Option<&'a AtomicUsize>,
+        injection_callback: &mut F,
+        mut config: &'a HighlightConfiguration,
+        mut depth: usize,
+        mut ranges: Vec<Range>,
+    ) -> Result<Vec<Self>, Error> {
+        let mut result = Vec::with_capacity(1);
+        let mut queue = Vec::new();
+        loop {
+            if highlighter.parser.set_included_ranges(&ranges).is_ok() {
+                highlighter
+                    .parser
+                    .set_language(config.language)
+                    .map_err(|_| Error::InvalidLanguage)?;
+
+                unsafe { highlighter.parser.set_cancellation_flag(cancellation_flag) };
+                let tree = highlighter
+                    .parser
+                    .parse(source, None)
+                    .ok_or(Error::Cancelled)?;
+                unsafe { highlighter.parser.set_cancellation_flag(None) };
+                let mut cursor = highlighter.cursors.pop().unwrap_or(QueryCursor::new());
+
+                // Process combined injections.
+                if let Some(combined_injections_query) = &config.combined_injections_query {
+                    let mut injections_by_pattern_index =
+                        vec![(None, Vec::new(), false); combined_injections_query.pattern_count()];
+                    let matches =
+                        cursor.matches(combined_injections_query, tree.root_node(), |n: Node| {
+                            &source[n.byte_range()]
+                        });
+                    for mat in matches {
+                        let entry = &mut injections_by_pattern_index[mat.pattern_index];
+                        let (language_name, content_node, include_children) =
+                            injection_for_match(config, combined_injections_query, &mat, source);
+                        if language_name.is_some() {
+                            entry.0 = language_name;
+                        }
+                        if let Some(content_node) = content_node {
+                            entry.1.push(content_node);
+                        }
+                        entry.2 = include_children;
+                    }
+                    for (lang_name, content_nodes, includes_children) in injections_by_pattern_index
+                    {
+                        if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) {
+                            if let Some(next_config) = (injection_callback)(lang_name) {
+                                let ranges = Self::intersect_ranges(
+                                    &ranges,
+                                    &content_nodes,
+                                    includes_children,
+                                );
+                                if !ranges.is_empty() {
+                                    queue.push((next_config, depth + 1, ranges));
+                                }
+                            }
+                        }
+                    }
+                }
+
+                // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which
+                // prevents them from being moved. But both of these values are really just
+                // pointers, so it's actually ok to move them.
+                let tree_ref = unsafe { mem::transmute::<_, &'static Tree>(&tree) };
+                let cursor_ref =
+                    unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) };
+                let captures = cursor_ref
+                    .captures(&config.query, tree_ref.root_node(), move |n: Node| {
+                        &source[n.byte_range()]
+                    })
+                    .peekable();
+
+                result.push(HighlightIterLayer {
+                    highlight_end_stack: Vec::new(),
+                    scope_stack: vec![LocalScope {
+                        inherits: false,
+                        range: 0..usize::MAX,
+                        local_defs: Vec::new(),
+                    }],
+                    cursor,
+                    depth,
+                    _tree: tree,
+                    captures,
+                    config,
+                    ranges,
                 });
             }
-            TreePathJSON::Children { args } => {
-                let (tree_path, _, kinds) = Self::parse_args("children", args, language)?;
-                Self::flatten_tree_path(tree_path, steps, language)?;
-                steps.push(TreeStep::Children { kinds });
-            }
-            TreePathJSON::Next { args } => {
-                let (tree_path, _, kinds) = Self::parse_args("next", args, language)?;
-                Self::flatten_tree_path(tree_path, steps, language)?;
-                steps.push(TreeStep::Next { kinds });
-            }
-        }
-        Ok(())
-    }
 
-    fn parse_args(
-        name: &str,
-        args: Vec<TreePathArgJSON>,
-        language: Language,
-    ) -> Result<(TreePathJSON, Option<isize>, Option<Vec<u16>>), String> {
-        let tree_path;
-        let mut index = None;
-        let mut kinds = Vec::new();
-        let mut iter = args.into_iter();
-
-        match iter.next() {
-            Some(TreePathArgJSON::TreePath(p)) => tree_path = p,
-            _ => {
-                return Err(format!(
-                    "First argument to `{}()` must be a tree path",
-                    name
-                ));
+            if queue.is_empty() {
+                break;
+            } else {
+                let (next_config, next_depth, next_ranges) = queue.remove(0);
+                config = next_config;
+                depth = next_depth;
+                ranges = next_ranges;
             }
         }
 
-        for arg in iter {
-            match arg {
-                TreePathArgJSON::TreePath(_) => {
-                    return Err(format!(
-                        "Other arguments to `{}()` must be strings or numbers",
-                        name
-                    ));
-                }
-                TreePathArgJSON::Number(i) => index = Some(i),
-                TreePathArgJSON::String(s) => kinds.push(s),
-            }
-        }
-
-        if kinds.len() > 0 {
-            let mut kind_ids = Vec::new();
-            for i in 0..(language.node_kind_count() as u16) {
-                if kinds.iter().any(|s| s == language.node_kind_for_id(i))
-                    && language.node_kind_is_named(i)
-                {
-                    kind_ids.push(i);
-                }
-            }
-            if kind_ids.len() == 0 {
-                return Err(format!("Non-existent node kinds: {:?}", kinds));
-            }
-
-            Ok((tree_path, index, Some(kind_ids)))
-        } else {
-            Ok((tree_path, index, None))
-        }
-    }
-}
-
-impl<'a, F> Highlighter<'a, F>
-where
-    F: Fn(&str) -> Option<(Language, &'a PropertySheet<Properties>)>,
-{
-    fn new(
-        source: &'a [u8],
-        language: Language,
-        property_sheet: &'a PropertySheet<Properties>,
-        injection_callback: F,
-        cancellation_flag: Option<&'a AtomicUsize>,
-    ) -> Result<Self, Error> {
-        let mut parser = Parser::new();
-        unsafe { parser.set_cancellation_flag(cancellation_flag.clone()) };
-        parser
-            .set_language(language)
-            .map_err(|_| Error::InvalidLanguage)?;
-        let tree = parser.parse(source, None).ok_or_else(|| Error::Cancelled)?;
-        Ok(Self {
-            parser,
-            source,
-            cancellation_flag,
-            injection_callback,
-            source_offset: 0,
-            operation_count: 0,
-            max_opaque_layer_depth: 0,
-            layers: vec![Layer::new(
-                source,
-                tree,
-                property_sheet,
-                vec![Range {
-                    start_byte: 0,
-                    end_byte: usize::MAX,
-                    start_point: Point::new(0, 0),
-                    end_point: Point::new(usize::MAX, usize::MAX),
-                }],
-                0,
-                true,
-            )],
-        })
-    }
-
-    fn emit_source(&mut self, next_offset: usize) -> HighlightEvent {
-        let result = HighlightEvent::Source {
-            start: self.source_offset,
-            end: next_offset,
-        };
-        self.source_offset = next_offset;
-        result
-    }
-
-    fn process_tree_step(&self, step: &TreeStep, nodes: &mut Vec<Node>) {
-        let len = nodes.len();
-        for i in 0..len {
-            let node = nodes[i];
-            match step {
-                TreeStep::Child { index, kinds } => {
-                    let index = if *index >= 0 {
-                        *index as usize
-                    } else {
-                        (node.child_count() as isize + *index) as usize
-                    };
-                    if let Some(child) = node.child(index) {
-                        if let Some(kinds) = kinds {
-                            if kinds.contains(&child.kind_id()) {
-                                nodes.push(child);
-                            }
-                        } else {
-                            nodes.push(child);
-                        }
-                    }
-                }
-                TreeStep::Children { kinds } => {
-                    for child in node.children() {
-                        if let Some(kinds) = kinds {
-                            if kinds.contains(&child.kind_id()) {
-                                nodes.push(child);
-                            }
-                        } else {
-                            nodes.push(child);
-                        }
-                    }
-                }
-                TreeStep::Next { .. } => unimplemented!(),
-            }
-        }
-        nodes.drain(0..len);
-    }
-
-    fn nodes_for_tree_path(&self, node: Node<'a>, steps: &Vec<TreeStep>) -> Vec<Node<'a>> {
-        let mut nodes = vec![node];
-        for step in steps.iter() {
-            self.process_tree_step(step, &mut nodes);
-        }
-        nodes
-    }
-
-    // An injected language name may either be specified as a fixed string, or based
-    // on the text of some node in the syntax tree.
-    fn injection_language_string(
-        &self,
-        node: &Node<'a>,
-        language: &InjectionLanguage,
-    ) -> Option<String> {
-        match language {
-            InjectionLanguage::Literal(s) => Some(s.to_string()),
-            InjectionLanguage::TreePath(steps) => self
-                .nodes_for_tree_path(*node, steps)
-                .first()
-                .and_then(|node| {
-                    str::from_utf8(&self.source[node.start_byte()..node.end_byte()])
-                        .map(|s| s.to_owned())
-                        .ok()
-                }),
-        }
+        Ok(result)
     }
 
     // Compute the ranges that should be included when parsing an injection.
     // This takes into account three things:
-    // * `parent_ranges` - The new injection may be nested inside of *another* injection
-    //   (e.g. JavaScript within HTML within ERB). The parent injection's ranges must
-    //   be taken into account.
+    // * `parent_ranges` - The ranges must all fall within the *current* layer's ranges.
     // * `nodes` - Every injection takes place within a set of nodes. The injection ranges
     //   are the ranges of those nodes.
     // * `includes_children` - For some injections, the content nodes' children should be
@@ -573,10 +439,11 @@ where
     //   is reparsed. For other injections, the content nodes' entire ranges should be
     //   reparsed, including the ranges of their children.
     fn intersect_ranges(
-        parent_ranges: &Vec<Range>,
-        nodes: &Vec<Node>,
+        parent_ranges: &[Range],
+        nodes: &[Node],
         includes_children: bool,
     ) -> Vec<Range> {
+        let mut cursor = nodes[0].walk();
         let mut result = Vec::new();
         let mut parent_range_iter = parent_ranges.iter();
         let mut parent_range = parent_range_iter
@@ -597,7 +464,7 @@ where
             };
 
             for excluded_range in node
-                .children()
+                .children(&mut cursor)
                 .filter_map(|child| {
                     if includes_children {
                         None
@@ -656,474 +523,385 @@ where
         result
     }
 
-    fn add_layer(
-        &mut self,
-        language_string: &str,
-        ranges: Vec<Range>,
-        depth: usize,
-        includes_children: bool,
-    ) -> Option<Error> {
-        if let Some((language, property_sheet)) = (self.injection_callback)(language_string) {
-            if self.parser.set_language(language).is_err() {
-                return Some(Error::InvalidLanguage);
-            }
-            self.parser.set_included_ranges(&ranges);
-            if let Some(tree) = self.parser.parse(self.source, None) {
-                let layer = Layer::new(
-                    self.source,
-                    tree,
-                    property_sheet,
-                    ranges,
-                    depth,
-                    includes_children,
-                );
-                if includes_children && depth > self.max_opaque_layer_depth {
-                    self.max_opaque_layer_depth = depth;
+    // First, sort scope boundaries by their byte offset in the document. At a
+    // given position, emit scope endings before scope beginnings. Finally, emit
+    // scope boundaries from deeper layers first.
+    fn sort_key(&mut self) -> Option<(usize, bool, isize)> {
+        let depth = -(self.depth as isize);
+        let next_start = self
+            .captures
+            .peek()
+            .map(|(m, i)| m.captures[*i].node.start_byte());
+        let next_end = self.highlight_end_stack.last().cloned();
+        match (next_start, next_end) {
+            (Some(start), Some(end)) => {
+                if start < end {
+                    Some((start, true, depth))
+                } else {
+                    Some((end, false, depth))
                 }
-                match self.layers.binary_search_by(|l| l.cmp(&layer)) {
-                    Ok(i) | Err(i) => self.layers.insert(i, layer),
-                };
-            } else {
-                return Some(Error::Cancelled);
             }
-        }
-        None
-    }
-
-    fn remove_first_layer(&mut self) {
-        let layer = self.layers.remove(0);
-        if layer.opaque && layer.depth == self.max_opaque_layer_depth {
-            self.max_opaque_layer_depth = self
-                .layers
-                .iter()
-                .filter_map(|l| if l.opaque { Some(l.depth) } else { None })
-                .max()
-                .unwrap_or(0);
+            (Some(i), None) => Some((i, true, depth)),
+            (None, Some(j)) => Some((j, false, depth)),
+            _ => None,
         }
     }
 }
 
-impl<'a, T> Iterator for Highlighter<'a, T>
+impl<'a, F> HighlightIter<'a, F>
 where
-    T: Fn(&str) -> Option<(Language, &'a PropertySheet<Properties>)>,
+    F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
+{
+    fn emit_event(
+        &mut self,
+        offset: usize,
+        event: Option<HighlightEvent>,
+    ) -> Option<Result<HighlightEvent, Error>> {
+        let result;
+        if self.byte_offset < offset {
+            result = Some(Ok(HighlightEvent::Source {
+                start: self.byte_offset,
+                end: offset,
+            }));
+            self.byte_offset = offset;
+            self.next_event = event;
+        } else {
+            result = event.map(Ok);
+        }
+        self.sort_layers();
+        result
+    }
+
+    fn sort_layers(&mut self) {
+        while !self.layers.is_empty() {
+            if let Some(sort_key) = self.layers[0].sort_key() {
+                let mut i = 0;
+                while i + 1 < self.layers.len() {
+                    if let Some(next_offset) = self.layers[i + 1].sort_key() {
+                        if next_offset < sort_key {
+                            i += 1;
+                            continue;
+                        }
+                    }
+                    break;
+                }
+                if i > 0 {
+                    &self.layers[0..(i + 1)].rotate_left(1);
+                }
+                break;
+            } else {
+                let layer = self.layers.remove(0);
+                self.highlighter.cursors.push(layer.cursor);
+            }
+        }
+    }
+
+    fn insert_layer(&mut self, mut layer: HighlightIterLayer<'a>) {
+        if let Some(sort_key) = layer.sort_key() {
+            let mut i = 1;
+            while i < self.layers.len() {
+                if let Some(sort_key_i) = self.layers[i].sort_key() {
+                    if sort_key_i > sort_key {
+                        self.layers.insert(i, layer);
+                        return;
+                    }
+                    i += 1;
+                } else {
+                    self.layers.remove(i);
+                }
+            }
+            self.layers.push(layer);
+        }
+    }
+}
+
+impl<'a, F> Iterator for HighlightIter<'a, F>
+where
+    F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
 {
     type Item = Result<HighlightEvent, Error>;
 
     fn next(&mut self) -> Option<Self::Item> {
-        if let Some(cancellation_flag) = self.cancellation_flag {
-            self.operation_count += 1;
-            if self.operation_count >= CANCELLATION_CHECK_INTERVAL {
-                self.operation_count = 0;
-                if cancellation_flag.load(Ordering::Relaxed) != 0 {
-                    return Some(Err(Error::Cancelled));
+        'main: loop {
+            // If we've already determined the next highlight boundary, just return it.
+            if let Some(e) = self.next_event.take() {
+                return Some(Ok(e));
+            }
+
+            // Periodically check for cancellation, returning `Cancelled` error if the
+            // cancellation flag was flipped.
+            if let Some(cancellation_flag) = self.cancellation_flag {
+                self.iter_count += 1;
+                if self.iter_count >= CANCELLATION_CHECK_INTERVAL {
+                    self.iter_count = 0;
+                    if cancellation_flag.load(Ordering::Relaxed) != 0 {
+                        return Some(Err(Error::Cancelled));
+                    }
                 }
             }
-        }
 
-        while !self.layers.is_empty() {
-            let mut scope_event = None;
-            let first_layer = &self.layers[0];
+            // If none of the layers have any more highlight boundaries, terminate.
+            if self.layers.is_empty() {
+                return if self.byte_offset < self.source.len() {
+                    let result = Some(Ok(HighlightEvent::Source {
+                        start: self.byte_offset,
+                        end: self.source.len(),
+                    }));
+                    self.byte_offset = self.source.len();
+                    result
+                } else {
+                    None
+                };
+            }
 
-            // If the current layer is not covered up by a nested layer, then
-            // process any scope boundaries and language injections for the layer's
-            // current position.
-            let first_layer_is_visible = first_layer.depth >= self.max_opaque_layer_depth;
-            if first_layer_is_visible {
-                let local_highlight = first_layer.local_highlight;
-                let properties = &first_layer.cursor.node_properties();
+            // Get the next capture from whichever layer has the earliest highlight boundary.
+            let range;
+            let layer = &mut self.layers[0];
+            if let Some((next_match, capture_index)) = layer.captures.peek() {
+                let next_capture = next_match.captures[*capture_index];
+                range = next_capture.node.byte_range();
 
-                // Add any injections for the current node.
-                if !first_layer.at_node_end {
-                    let node = first_layer.cursor.node();
-                    let injections = properties
-                        .injections
-                        .iter()
-                        .filter_map(
-                            |Injection {
-                                 language,
-                                 content,
-                                 includes_children,
-                             }| {
-                                if let Some(language) =
-                                    self.injection_language_string(&node, language)
-                                {
-                                    let nodes = self.nodes_for_tree_path(node, content);
-                                    let ranges = Self::intersect_ranges(
-                                        &first_layer.ranges,
-                                        &nodes,
-                                        *includes_children,
-                                    );
-                                    if ranges.len() > 0 {
-                                        return Some((language, ranges, *includes_children));
+                // If any previous highlight ends before this node starts, then before
+                // processing this capture, emit the source code up until the end of the
+                // previous highlight, and an end event for that highlight.
+                if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
+                    if end_byte <= range.start {
+                        layer.highlight_end_stack.pop();
+                        return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
+                    }
+                }
+            }
+            // If there are no more captures, then emit any remaining highlight end events.
+            // And if there are none of those, then just advance to the end of the document.
+            else if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
+                layer.highlight_end_stack.pop();
+                return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
+            } else {
+                return self.emit_event(self.source.len(), None);
+            };
+
+            let (mut match_, capture_index) = layer.captures.next().unwrap();
+            let mut capture = match_.captures[capture_index];
+
+            // If this capture represents an injection, then process the injection.
+            if match_.pattern_index < layer.config.locals_pattern_index {
+                let (language_name, content_node, include_children) =
+                    injection_for_match(&layer.config, &layer.config.query, &match_, &self.source);
+
+                // Explicitly remove this match so that none of its other captures will remain
+                // in the stream of captures.
+                match_.remove();
+
+                // If a language is found with the given name, then add a new language layer
+                // to the highlighted document.
+                if let (Some(language_name), Some(content_node)) = (language_name, content_node) {
+                    if let Some(config) = (self.injection_callback)(language_name) {
+                        let ranges = HighlightIterLayer::intersect_ranges(
+                            &self.layers[0].ranges,
+                            &[content_node],
+                            include_children,
+                        );
+                        if !ranges.is_empty() {
+                            match HighlightIterLayer::new(
+                                self.source,
+                                self.highlighter,
+                                self.cancellation_flag,
+                                &mut self.injection_callback,
+                                config,
+                                self.layers[0].depth + 1,
+                                ranges,
+                            ) {
+                                Ok(layers) => {
+                                    for layer in layers {
+                                        self.insert_layer(layer);
                                     }
                                 }
-                                None
-                            },
-                        )
-                        .collect::<Vec<_>>();
-
-                    let depth = first_layer.depth + 1;
-                    for (language, ranges, includes_children) in injections {
-                        if let Some(error) =
-                            self.add_layer(&language, ranges, depth, includes_children)
-                        {
-                            return Some(Err(error));
+                                Err(e) => return Some(Err(e)),
+                            }
                         }
                     }
                 }
 
-                // Determine if any scopes start or end at the current position.
-                let first_layer = &mut self.layers[0];
-                if let Some(highlight) = local_highlight
-                    .or(properties.highlight_nonlocal)
-                    .or(properties.highlight)
-                {
-                    let next_offset = cmp::min(self.source.len(), first_layer.offset());
+                self.sort_layers();
+                continue 'main;
+            }
 
-                    // Before returning any highlight boundaries, return any remaining slice of
-                    // the source code the precedes that highlight boundary.
-                    if self.source_offset < next_offset {
-                        return Some(Ok(self.emit_source(next_offset)));
-                    }
+            // Remove from the local scope stack any local scopes that have already ended.
+            while range.start > layer.scope_stack.last().unwrap().range.end {
+                layer.scope_stack.pop();
+            }
 
-                    scope_event = if first_layer.at_node_end {
-                        Some(Ok(HighlightEvent::HighlightEnd))
-                    } else {
-                        Some(Ok(HighlightEvent::HighlightStart(highlight)))
+            // If this capture is for tracking local variables, then process the
+            // local variable info.
+            let mut reference_highlight = None;
+            let mut definition_highlight = None;
+            while match_.pattern_index < layer.config.highlights_pattern_index {
+                // If the node represents a local scope, push a new local scope onto
+                // the scope stack.
+                if Some(capture.index) == layer.config.local_scope_capture_index {
+                    definition_highlight = None;
+                    let mut scope = LocalScope {
+                        inherits: true,
+                        range: range.clone(),
+                        local_defs: Vec::new(),
                     };
-                }
-            }
-
-            // Advance the current layer's tree cursor. This might cause that cursor to move
-            // beyond one of the other layers' cursors for a different syntax tree, so we need
-            // to re-sort the layers. If the cursor is already at the end of its syntax tree,
-            // remove it.
-            if self.layers[0].advance() {
-                let mut index = 0;
-                while self.layers.get(index + 1).map_or(false, |next| {
-                    self.layers[index].cmp(next) == cmp::Ordering::Greater
-                }) {
-                    self.layers.swap(index, index + 1);
-                    index += 1;
-                }
-            } else {
-                self.remove_first_layer();
-            }
-
-            if scope_event.is_some() {
-                return scope_event;
-            }
-        }
-
-        if self.source_offset < self.source.len() {
-            Some(Ok(self.emit_source(self.source.len())))
-        } else {
-            None
-        }
-    }
-}
-
-impl<'a, T> fmt::Debug for Highlighter<'a, T>
-where
-    T: Fn(&str) -> Option<(Language, &'a PropertySheet<Properties>)>,
-{
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        if let Some(layer) = self.layers.first() {
-            let node = layer.cursor.node();
-            let position = if layer.at_node_end {
-                node.end_position()
-            } else {
-                node.start_position()
-            };
-            write!(
-                f,
-                "{{Highlighter position: {:?}, kind: {}, at_end: {}, props: {:?}}}",
-                position,
-                node.kind(),
-                layer.at_node_end,
-                layer.cursor.node_properties()
-            )?;
-        }
-        Ok(())
-    }
-}
-
-impl<'a> Layer<'a> {
-    fn new(
-        source: &'a [u8],
-        tree: Tree,
-        sheet: &'a PropertySheet<Properties>,
-        ranges: Vec<Range>,
-        depth: usize,
-        opaque: bool,
-    ) -> Self {
-        // The cursor's lifetime parameter indicates that the tree must outlive the cursor.
-        // But because the tree is really a pointer to the heap, the cursor can remain
-        // valid when the tree is moved. There's no way to express this with lifetimes
-        // right now, so we have to `transmute` the cursor's lifetime.
-        let cursor = unsafe { transmute(tree.walk_with_properties(sheet, source)) };
-        Self {
-            _tree: tree,
-            cursor,
-            ranges,
-            depth,
-            opaque,
-            at_node_end: false,
-            scope_stack: vec![Scope {
-                inherits: false,
-                local_defs: Vec::new(),
-            }],
-            local_highlight: None,
-        }
-    }
-
-    fn cmp(&self, other: &Layer) -> cmp::Ordering {
-        // Events are ordered primarily by their position in the document. But if
-        // one highlight starts at a given position and another highlight ends at that
-        // same position, return the highlight end event before the highlight start event.
-        self.offset()
-            .cmp(&other.offset())
-            .then_with(|| other.at_node_end.cmp(&self.at_node_end))
-            .then_with(|| self.depth.cmp(&other.depth))
-    }
-
-    fn offset(&self) -> usize {
-        if self.at_node_end {
-            self.cursor.node().end_byte()
-        } else {
-            self.cursor.node().start_byte()
-        }
-    }
-
-    fn advance(&mut self) -> bool {
-        // Clear the current local highlighting class, which may be re-populated
-        // if we enter a node that represents a local definition or local reference.
-        self.local_highlight = None;
-
-        // Step through the tree in a depth-first traversal, stopping at both
-        // the start and end position of every node.
-        if self.at_node_end {
-            self.leave_node();
-            if self.cursor.goto_next_sibling() {
-                self.enter_node();
-                self.at_node_end = false;
-            } else if !self.cursor.goto_parent() {
-                return false;
-            }
-        } else if self.cursor.goto_first_child() {
-            self.enter_node();
-        } else {
-            self.at_node_end = true;
-        }
-        true
-    }
-
-    fn enter_node(&mut self) {
-        let node = self.cursor.node();
-        let props = self.cursor.node_properties();
-        let node_text = if props.local_definition || props.local_reference {
-            node.utf8_text(self.cursor.source()).ok()
-        } else {
-            None
-        };
-
-        // If this node represents a local definition, then record its highlighting class
-        // and store the highlighting class in the current local scope.
-        if props.local_definition {
-            if let (Some(text), Some(inner_scope), Some(highlight)) =
-                (node_text, self.scope_stack.last_mut(), props.highlight)
-            {
-                self.local_highlight = props.highlight;
-                if let Err(i) = inner_scope.local_defs.binary_search_by_key(&text, |e| e.0) {
-                    inner_scope.local_defs.insert(i, (text, highlight));
-                }
-            }
-        }
-        // If this node represents a local reference, then look it up in the current scope
-        // stack. If a local definition is found, record its highlighting class.
-        else if props.local_reference {
-            if let Some(text) = node_text {
-                for scope in self.scope_stack.iter().rev() {
-                    if let Ok(i) = scope.local_defs.binary_search_by_key(&text, |e| e.0) {
-                        self.local_highlight = Some(scope.local_defs[i].1);
-                        break;
+                    for prop in layer.config.query.property_settings(match_.pattern_index) {
+                        match prop.key.as_ref() {
+                            "local.scope-inherits" => {
+                                scope.inherits =
+                                    prop.value.as_ref().map_or(true, |r| r.as_ref() == "true");
+                            }
+                            _ => {}
+                        }
                     }
-                    if !scope.inherits {
-                        break;
+                    layer.scope_stack.push(scope);
+                }
+                // If the node represents a definition, add a new definition to the
+                // local scope at the top of the scope stack.
+                else if Some(capture.index) == layer.config.local_def_capture_index {
+                    reference_highlight = None;
+                    definition_highlight = None;
+                    let scope = layer.scope_stack.last_mut().unwrap();
+
+                    let mut value_range = 0..0;
+                    for capture in match_.captures {
+                        if Some(capture.index) == layer.config.local_def_value_capture_index {
+                            value_range = capture.node.byte_range();
+                        }
+                    }
+
+                    if let Ok(name) = str::from_utf8(&self.source[range.clone()]) {
+                        scope.local_defs.push(LocalDef {
+                            name,
+                            value_range,
+                            highlight: None,
+                        });
+                        definition_highlight =
+                            scope.local_defs.last_mut().map(|s| &mut s.highlight);
                     }
                 }
+                // If the node represents a reference, then try to find the corresponding
+                // definition in the scope stack.
+                else if Some(capture.index) == layer.config.local_ref_capture_index {
+                    if definition_highlight.is_none() {
+                        definition_highlight = None;
+                        if let Ok(name) = str::from_utf8(&self.source[range.clone()]) {
+                            for scope in layer.scope_stack.iter().rev() {
+                                if let Some(highlight) =
+                                    scope.local_defs.iter().rev().find_map(|def| {
+                                        if def.name == name && range.start >= def.value_range.end {
+                                            Some(def.highlight)
+                                        } else {
+                                            None
+                                        }
+                                    })
+                                {
+                                    reference_highlight = highlight;
+                                    break;
+                                }
+                                if !scope.inherits {
+                                    break;
+                                }
+                            }
+                        }
+                    }
+                }
+
+                // Continue processing any additional matches for the same node.
+                if let Some((next_match, next_capture_index)) = layer.captures.peek() {
+                    let next_capture = next_match.captures[*next_capture_index];
+                    if next_capture.node == capture.node {
+                        capture = next_capture;
+                        match_ = layer.captures.next().unwrap().0;
+                        continue;
+                    }
+                }
+
+                self.sort_layers();
+                continue 'main;
             }
-        }
-        // If this node represents a new local scope, then push it onto the scope stack.
-        if let Some(inherits) = props.local_scope {
-            self.scope_stack.push(Scope {
-                inherits,
-                local_defs: Vec::new(),
-            });
+
+            // Otherwise, this capture must represent a highlight.
+            // If this exact range has already been highlighted by an earlier pattern, or by
+            // a different layer, then skip over this one.
+            if let Some((last_start, last_end, last_depth)) = self.last_highlight_range {
+                if range.start == last_start && range.end == last_end && layer.depth < last_depth {
+                    self.sort_layers();
+                    continue 'main;
+                }
+            }
+
+            // If the current node was found to be a local variable, then skip over any
+            // highlighting patterns that are disabled for local variables.
+            if definition_highlight.is_some() || reference_highlight.is_some() {
+                while layer.config.non_local_variable_patterns[match_.pattern_index] {
+                    if let Some((next_match, next_capture_index)) = layer.captures.peek() {
+                        let next_capture = next_match.captures[*next_capture_index];
+                        if next_capture.node == capture.node {
+                            capture = next_capture;
+                            match_ = layer.captures.next().unwrap().0;
+                            continue;
+                        }
+                    }
+
+                    self.sort_layers();
+                    continue 'main;
+                }
+            }
+
+            // Once a highlighting pattern is found for the current node, skip over
+            // any later highlighting patterns that also match this node. Captures
+            // for a given node are ordered by pattern index, so these subsequent
+            // captures are guaranteed to be for highlighting, not injections or
+            // local variables.
+            while let Some((next_match, next_capture_index)) = layer.captures.peek() {
+                let next_capture = next_match.captures[*next_capture_index];
+                if next_capture.node == capture.node {
+                    layer.captures.next();
+                } else {
+                    break;
+                }
+            }
+
+            let current_highlight = layer.config.highlight_indices[capture.index as usize];
+
+            // If this node represents a local definition, then store the current
+            // highlight value on the local scope entry representing this node.
+            if let Some(definition_highlight) = definition_highlight {
+                *definition_highlight = current_highlight;
+            }
+
+            // Emit a scope start event and push the node's end position to the stack.
+            if let Some(highlight) = reference_highlight.or(current_highlight) {
+                self.last_highlight_range = Some((range.start, range.end, layer.depth));
+                layer.highlight_end_stack.push(range.end);
+                return self
+                    .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight)));
+            }
+
+            self.sort_layers();
         }
     }
-
-    fn leave_node(&mut self) {
-        let props = self.cursor.node_properties();
-        if props.local_scope.is_some() {
-            self.scope_stack.pop();
-        }
-    }
-}
-
-impl<'de> Deserialize<'de> for Highlight {
-    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
-    where
-        D: Deserializer<'de>,
-    {
-        let s = String::deserialize(deserializer)?;
-        match s.as_str() {
-            "attribute" => Ok(Highlight::Attribute),
-            "comment" => Ok(Highlight::Comment),
-            "constant" => Ok(Highlight::Constant),
-            "constant.builtin" => Ok(Highlight::ConstantBuiltin),
-            "constructor" => Ok(Highlight::Constructor),
-            "constructor.builtin" => Ok(Highlight::ConstructorBuiltin),
-            "embedded" => Ok(Highlight::Embedded),
-            "escape" => Ok(Highlight::Escape),
-            "function" => Ok(Highlight::Function),
-            "function.builtin" => Ok(Highlight::FunctionBuiltin),
-            "keyword" => Ok(Highlight::Keyword),
-            "number" => Ok(Highlight::Number),
-            "operator" => Ok(Highlight::Operator),
-            "property" => Ok(Highlight::Property),
-            "property.builtin" => Ok(Highlight::PropertyBuiltin),
-            "punctuation" => Ok(Highlight::Punctuation),
-            "punctuation.bracket" => Ok(Highlight::PunctuationBracket),
-            "punctuation.delimiter" => Ok(Highlight::PunctuationDelimiter),
-            "punctuation.special" => Ok(Highlight::PunctuationSpecial),
-            "string" => Ok(Highlight::String),
-            "string.special" => Ok(Highlight::StringSpecial),
-            "type" => Ok(Highlight::Type),
-            "type.builtin" => Ok(Highlight::TypeBuiltin),
-            "variable" => Ok(Highlight::Variable),
-            "variable.builtin" => Ok(Highlight::VariableBuiltin),
-            "variable.parameter" => Ok(Highlight::VariableParameter),
-            "tag" => Ok(Highlight::Tag),
-            _ => Ok(Highlight::Unknown),
-        }
-    }
-}
-
-impl Serialize for Highlight {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: Serializer,
-    {
-        match self {
-            Highlight::Attribute => serializer.serialize_str("attribute"),
-            Highlight::Comment => serializer.serialize_str("comment"),
-            Highlight::Constant => serializer.serialize_str("constant"),
-            Highlight::ConstantBuiltin => serializer.serialize_str("constant.builtin"),
-            Highlight::Constructor => serializer.serialize_str("constructor"),
-            Highlight::ConstructorBuiltin => serializer.serialize_str("constructor.builtin"),
-            Highlight::Embedded => serializer.serialize_str("embedded"),
-            Highlight::Escape => serializer.serialize_str("escape"),
-            Highlight::Function => serializer.serialize_str("function"),
-            Highlight::FunctionBuiltin => serializer.serialize_str("function.builtin"),
-            Highlight::Keyword => serializer.serialize_str("keyword"),
-            Highlight::Number => serializer.serialize_str("number"),
-            Highlight::Operator => serializer.serialize_str("operator"),
-            Highlight::Property => serializer.serialize_str("property"),
-            Highlight::PropertyBuiltin => serializer.serialize_str("property.builtin"),
-            Highlight::Punctuation => serializer.serialize_str("punctuation"),
-            Highlight::PunctuationBracket => serializer.serialize_str("punctuation.bracket"),
-            Highlight::PunctuationDelimiter => serializer.serialize_str("punctuation.delimiter"),
-            Highlight::PunctuationSpecial => serializer.serialize_str("punctuation.special"),
-            Highlight::String => serializer.serialize_str("string"),
-            Highlight::StringSpecial => serializer.serialize_str("string.special"),
-            Highlight::Type => serializer.serialize_str("type"),
-            Highlight::TypeBuiltin => serializer.serialize_str("type.builtin"),
-            Highlight::Variable => serializer.serialize_str("variable"),
-            Highlight::VariableBuiltin => serializer.serialize_str("variable.builtin"),
-            Highlight::VariableParameter => serializer.serialize_str("variable.parameter"),
-            Highlight::Tag => serializer.serialize_str("tag"),
-            Highlight::Unknown => serializer.serialize_str(""),
-        }
-    }
-}
-
-pub trait HTMLAttributeCallback<'a>: Fn(Highlight) -> &'a str {}
-
-pub fn highlight<'a, F>(
-    source: &'a [u8],
-    language: Language,
-    property_sheet: &'a PropertySheet<Properties>,
-    cancellation_flag: Option<&'a AtomicUsize>,
-    injection_callback: F,
-) -> Result<impl Iterator<Item = Result<HighlightEvent, Error>> + 'a, Error>
-where
-    F: Fn(&str) -> Option<(Language, &'a PropertySheet<Properties>)> + 'a,
-{
-    Highlighter::new(
-        source,
-        language,
-        property_sheet,
-        injection_callback,
-        cancellation_flag,
-    )
-}
-
-pub fn highlight_html<'a, F1, F2>(
-    source: &'a [u8],
-    language: Language,
-    property_sheet: &'a PropertySheet<Properties>,
-    cancellation_flag: Option<&'a AtomicUsize>,
-    injection_callback: F1,
-    attribute_callback: F2,
-) -> Result<Vec<String>, Error>
-where
-    F1: Fn(&str) -> Option<(Language, &'a PropertySheet<Properties>)>,
-    F2: Fn(Highlight) -> &'a str,
-{
-    let mut renderer = HtmlRenderer::new();
-    renderer.render(
-        Highlighter::new(
-            source,
-            language,
-            property_sheet,
-            injection_callback,
-            cancellation_flag,
-        )?,
-        source,
-        &|s| (attribute_callback)(s).as_bytes(),
-    )?;
-    Ok(renderer
-        .line_offsets
-        .iter()
-        .enumerate()
-        .map(|(i, offset)| {
-            let offset = *offset as usize;
-            let next_offset = renderer
-                .line_offsets
-                .get(i + 1)
-                .map_or(renderer.html.len(), |i| *i as usize);
-            String::from_utf8(renderer.html[offset..next_offset].to_vec()).unwrap()
-        })
-        .collect())
-}
-
-pub struct HtmlRenderer {
-    pub html: Vec<u8>,
-    pub line_offsets: Vec<u32>,
 }
 
 impl HtmlRenderer {
-    fn new() -> Self {
-        HtmlRenderer {
-            html: Vec::new(),
-            line_offsets: vec![0],
-        }
+    pub fn new() -> Self {
+        let mut result = HtmlRenderer {
+            html: Vec::with_capacity(BUFFER_HTML_RESERVE_CAPACITY),
+            line_offsets: Vec::with_capacity(BUFFER_LINES_RESERVE_CAPACITY),
+            carriage_return_highlight: None,
+        };
+        result.line_offsets.push(0);
+        result
+    }
+
+    pub fn set_carriage_return_highlight(&mut self, highlight: Option<Highlight>) {
+        self.carriage_return_highlight = highlight;
     }
 
     pub fn reset(&mut self) {
-        self.html.clear();
-        self.line_offsets.clear();
+        shrink_and_clear(&mut self.html, BUFFER_HTML_RESERVE_CAPACITY);
+        shrink_and_clear(&mut self.line_offsets, BUFFER_LINES_RESERVE_CAPACITY);
         self.line_offsets.push(0);
     }
 
@@ -1162,6 +940,35 @@ impl HtmlRenderer {
         Ok(())
     }
 
+    pub fn lines(&self) -> impl Iterator<Item = &str> {
+        self.line_offsets
+            .iter()
+            .enumerate()
+            .map(move |(i, line_start)| {
+                let line_start = *line_start as usize;
+                let line_end = if i + 1 == self.line_offsets.len() {
+                    self.html.len()
+                } else {
+                    self.line_offsets[i + 1] as usize
+                };
+                str::from_utf8(&self.html[line_start..line_end]).unwrap()
+            })
+    }
+
+    fn add_carriage_return<'a, F>(&mut self, attribute_callback: &F)
+    where
+        F: Fn(Highlight) -> &'a [u8],
+    {
+        if let Some(highlight) = self.carriage_return_highlight {
+            let attribute_string = (attribute_callback)(highlight);
+            if !attribute_string.is_empty() {
+                self.html.extend(b"<span ");
+                self.html.extend(attribute_string);
+                self.html.extend(b"></span>");
+            }
+        }
+    }
+
     fn start_highlight<'a, F>(&mut self, h: Highlight, attribute_callback: &F)
     where
         F: Fn(Highlight) -> &'a [u8],
@@ -1183,11 +990,23 @@ impl HtmlRenderer {
     where
         F: Fn(Highlight) -> &'a [u8],
     {
-        for c in util::LossyUtf8::new(src).flat_map(|p| p.bytes()) {
-            if c == b'\n' {
-                if self.html.ends_with(b"\r") {
-                    self.html.pop();
+        let mut last_char_was_cr = false;
+        for c in LossyUtf8::new(src).flat_map(|p| p.bytes()) {
+            // Don't render carriage return characters, but allow lone carriage returns (not
+            // followed by line feeds) to be styled via the attribute callback.
+            if c == b'\r' {
+                last_char_was_cr = true;
+                continue;
+            }
+            if last_char_was_cr {
+                if c != b'\n' {
+                    self.add_carriage_return(attribute_callback);
                 }
+                last_char_was_cr = false;
+            }
+
+            // At line boundaries, close and re-open all of the open tags.
+            if c == b'\n' {
                 highlights.iter().for_each(|_| self.end_highlight());
                 self.html.push(c);
                 self.line_offsets.push(self.html.len() as u32);
@@ -1202,3 +1021,55 @@ impl HtmlRenderer {
         }
     }
 }
+
+fn injection_for_match<'a>(
+    config: &HighlightConfiguration,
+    query: &'a Query,
+    query_match: &QueryMatch<'a>,
+    source: &'a [u8],
+) -> (Option<&'a str>, Option<Node<'a>>, bool) {
+    let content_capture_index = config.injection_content_capture_index;
+    let language_capture_index = config.injection_language_capture_index;
+
+    let mut language_name = None;
+    let mut content_node = None;
+    for capture in query_match.captures {
+        let index = Some(capture.index);
+        if index == language_capture_index {
+            language_name = capture.node.utf8_text(source).ok();
+        } else if index == content_capture_index {
+            content_node = Some(capture.node);
+        }
+    }
+
+    let mut include_children = false;
+    for prop in query.property_settings(query_match.pattern_index) {
+        match prop.key.as_ref() {
+            // In addition to specifying the language name via the text of a
+            // captured node, it can also be hard-coded via a `#set!` predicate
+            // that sets the injection.language key.
+            "injection.language" => {
+                if language_name.is_none() {
+                    language_name = prop.value.as_ref().map(|s| s.as_ref())
+                }
+            }
+
+            // By default, injections do not include the *children* of an
+            // `injection.content` node - only the ranges that belong to the
+            // node itself. This can be changed using a `#set!` predicate that
+            // sets the `injection.include-children` key.
+            "injection.include-children" => include_children = true,
+            _ => {}
+        }
+    }
+
+    (language_name, content_node, include_children)
+}
+
+fn shrink_and_clear<T>(vec: &mut Vec<T>, capacity: usize) {
+    if vec.len() > capacity {
+        vec.truncate(capacity);
+        vec.shrink_to_fit();
+    }
+    vec.clear();
+}
diff --git a/highlight/src/util.rs b/highlight/src/util.rs
index 6c325a6c..29adb13b 100644
--- a/highlight/src/util.rs
+++ b/highlight/src/util.rs
@@ -1,56 +1,3 @@
-use std::str;
-
-pub struct LossyUtf8<'a> {
-    bytes: &'a [u8],
-    in_replacement: bool,
-}
-
-impl<'a> LossyUtf8<'a> {
-    pub fn new(bytes: &'a [u8]) -> Self {
-        LossyUtf8 {
-            bytes,
-            in_replacement: false,
-        }
-    }
-}
-
-impl<'a> Iterator for LossyUtf8<'a> {
-    type Item = &'a str;
-
-    fn next(&mut self) -> Option<&'a str> {
-        if self.bytes.is_empty() {
-            return None;
-        }
-        if self.in_replacement {
-            self.in_replacement = false;
-            return Some("\u{fffd}");
-        }
-        match str::from_utf8(self.bytes) {
-            Ok(valid) => {
-                self.bytes = &[];
-                Some(valid)
-            }
-            Err(error) => {
-                if let Some(error_len) = error.error_len() {
-                    let error_start = error.valid_up_to();
-                    if error_start > 0 {
-                        let result =
-                            unsafe { str::from_utf8_unchecked(&self.bytes[..error_start]) };
-                        self.bytes = &self.bytes[(error_start + error_len)..];
-                        self.in_replacement = true;
-                        Some(result)
-                    } else {
-                        self.bytes = &self.bytes[error_len..];
-                        Some("\u{fffd}")
-                    }
-                } else {
-                    None
-                }
-            }
-        }
-    }
-}
-
 pub fn html_escape(c: u8) -> Option<&'static [u8]> {
     match c as char {
         '>' => Some(b"&gt;"),
diff --git a/lib/.ccls b/lib/.ccls
new file mode 100644
index 00000000..fdb974d8
--- /dev/null
+++ b/lib/.ccls
@@ -0,0 +1,4 @@
+-std=c99
+-Isrc
+-Iinclude
+-Iutf8proc
diff --git a/lib/Cargo.toml b/lib/Cargo.toml
index 8ca3bdf9..8f88966f 100644
--- a/lib/Cargo.toml
+++ b/lib/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "tree-sitter"
 description = "Rust bindings to the Tree-sitter parsing library"
-version = "0.3.10"
+version = "0.17.1"
 authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
 license = "MIT"
 readme = "binding_rust/README.md"
@@ -15,15 +15,13 @@ include = [
   "/binding_rust/*",
   "/Cargo.toml",
   "/include/*",
-  "/src/*",
-  "/utf8proc/*",
+  "/src/*.h",
+  "/src/*.c",
+  "/src/unicode/*",
 ]
 
 [dependencies]
 regex = "1"
-serde = "1.0"
-serde_json = "1.0"
-serde_derive = "1.0"
 
 [build-dependencies]
 cc = "1.0"
diff --git a/lib/README.md b/lib/README.md
index b15dbaed..82ebc5a5 100644
--- a/lib/README.md
+++ b/lib/README.md
@@ -3,6 +3,5 @@ Subdirectories
 
 * [`src`](./src) - C source code for the Tree-sitter library
 * [`include`](./include) - C headers for the Tree-sitter library
-* [`utf8proc`](./utf8proc) - A submodule for [`utf8proc`](https://juliastrings.github.io/utf8proc/), Tree-sitter's one library dependency.
 * [`binding_rust`](./binding_rust) - Rust bindings to the Tree-sitter library
 * [`binding_web`](./binding_web) - JavaScript bindings to the Tree-sitter library, using WebAssembly
diff --git a/lib/binding_rust/README.md b/lib/binding_rust/README.md
index 6a7bfc70..e85f45f3 100644
--- a/lib/binding_rust/README.md
+++ b/lib/binding_rust/README.md
@@ -1,5 +1,4 @@
-Rust Tree-sitter
-================
+# Rust Tree-sitter
 
 [![Build Status](https://travis-ci.org/tree-sitter/tree-sitter.svg?branch=master)](https://travis-ci.org/tree-sitter/tree-sitter)
 [![Build status](https://ci.appveyor.com/api/projects/status/vtmbd6i92e97l55w/branch/master?svg=true)](https://ci.appveyor.com/project/maxbrunsfeld/tree-sitter/branch/master)
@@ -14,15 +13,12 @@ First, create a parser:
 ```rust
 use tree_sitter::{Parser, Language};
 
-// ...
-
 let mut parser = Parser::new();
 ```
 
 Tree-sitter languages consist of generated C code. To make sure they're properly compiled and linked, you can create a [build script](https://doc.rust-lang.org/cargo/reference/build-scripts.html) like the following (assuming `tree-sitter-javascript` is in your root directory):
-```rust
-extern crate cc;
 
+```rust
 use std::path::PathBuf;
 
 fn main() {
@@ -36,7 +32,14 @@ fn main() {
 }
 ```
 
-To then use languages from rust, you must declare them as `extern "C"` functions and invoke them with `unsafe`. Then you can assign them to the parser. 
+Add the `cc` crate to your `Cargo.toml` under `[build-dependencies]`:
+
+```toml
+[build-dependencies]
+cc="*"
+```
+
+To then use languages from rust, you must declare them as `extern "C"` functions and invoke them with `unsafe`. Then you can assign them to the parser.
 
 ```rust
 extern "C" { fn tree_sitter_c() -> Language; }
diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs
index a71b297e..f28d3461 100644
--- a/lib/binding_rust/bindings.rs
+++ b/lib/binding_rust/bindings.rs
@@ -19,6 +19,16 @@ pub struct TSParser {
 pub struct TSTree {
     _unused: [u8; 0],
 }
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct TSQuery {
+    _unused: [u8; 0],
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct TSQueryCursor {
+    _unused: [u8; 0],
+}
 pub const TSInputEncoding_TSInputEncodingUTF8: TSInputEncoding = 0;
 pub const TSInputEncoding_TSInputEncodingUTF16: TSInputEncoding = 1;
 pub type TSInputEncoding = u32;
@@ -93,6 +103,37 @@ pub struct TSTreeCursor {
     pub id: *const ::std::os::raw::c_void,
     pub context: [u32; 2usize],
 }
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct TSQueryCapture {
+    pub node: TSNode,
+    pub index: u32,
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct TSQueryMatch {
+    pub id: u32,
+    pub pattern_index: u16,
+    pub capture_count: u16,
+    pub captures: *const TSQueryCapture,
+}
+pub const TSQueryPredicateStepType_TSQueryPredicateStepTypeDone: TSQueryPredicateStepType = 0;
+pub const TSQueryPredicateStepType_TSQueryPredicateStepTypeCapture: TSQueryPredicateStepType = 1;
+pub const TSQueryPredicateStepType_TSQueryPredicateStepTypeString: TSQueryPredicateStepType = 2;
+pub type TSQueryPredicateStepType = u32;
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct TSQueryPredicateStep {
+    pub type_: TSQueryPredicateStepType,
+    pub value_id: u32,
+}
+pub const TSQueryError_TSQueryErrorNone: TSQueryError = 0;
+pub const TSQueryError_TSQueryErrorSyntax: TSQueryError = 1;
+pub const TSQueryError_TSQueryErrorNodeType: TSQueryError = 2;
+pub const TSQueryError_TSQueryErrorField: TSQueryError = 3;
+pub const TSQueryError_TSQueryErrorCapture: TSQueryError = 4;
+pub const TSQueryError_TSQueryErrorStructure: TSQueryError = 5;
+pub type TSQueryError = u32;
 extern "C" {
     #[doc = " Create a new parser."]
     pub fn ts_parser_new() -> *mut TSParser;
@@ -117,7 +158,7 @@ extern "C" {
     pub fn ts_parser_language(self_: *const TSParser) -> *const TSLanguage;
 }
 extern "C" {
-    #[doc = " Set the spans of text that the parser should include when parsing."]
+    #[doc = " Set the ranges of text that the parser should include when parsing."]
     #[doc = ""]
     #[doc = " By default, the parser will always include entire documents. This function"]
     #[doc = " allows you to parse only a *portion* of a document but still return a syntax"]
@@ -127,7 +168,22 @@ extern "C" {
     #[doc = " The second and third parameters specify the location and length of an array"]
     #[doc = " of ranges. The parser does *not* take ownership of these ranges; it copies"]
     #[doc = " the data, so it doesn\'t matter how these ranges are allocated."]
-    pub fn ts_parser_set_included_ranges(self_: *mut TSParser, ranges: *const TSRange, length: u32);
+    #[doc = ""]
+    #[doc = " If `length` is zero, then the entire document will be parsed. Otherwise,"]
+    #[doc = " the given ranges must be ordered from earliest to latest in the document,"]
+    #[doc = " and they must not overlap. That is, the following must hold for all"]
+    #[doc = " `i` < `length - 1`:"]
+    #[doc = ""]
+    #[doc = "     ranges[i].end_byte <= ranges[i + 1].start_byte"]
+    #[doc = ""]
+    #[doc = " If this requirement is not satisfied, the operation will fail, the ranges"]
+    #[doc = " will not be assigned, and this function will return `false`. On success,"]
+    #[doc = " this function returns `true`"]
+    pub fn ts_parser_set_included_ranges(
+        self_: *mut TSParser,
+        ranges: *const TSRange,
+        length: u32,
+    ) -> bool;
 }
 extern "C" {
     #[doc = " Get the ranges of text that the parser will include when parsing."]
@@ -212,13 +268,15 @@ extern "C" {
     #[doc = " by default, it will resume where it left off on the next call to"]
     #[doc = " `ts_parser_parse` or other parsing functions. If you don\'t want to resume,"]
     #[doc = " and instead intend to use this parser to parse some other document, you must"]
-    #[doc = " call this `ts_parser_reset` first."]
+    #[doc = " call `ts_parser_reset` first."]
     pub fn ts_parser_reset(self_: *mut TSParser);
 }
 extern "C" {
     #[doc = " Set the maximum duration in microseconds that parsing should be allowed to"]
-    #[doc = " take before halting. If parsing takes longer than this, it will halt early,"]
-    #[doc = " returning NULL. See `ts_parser_parse` for more information."]
+    #[doc = " take before halting."]
+    #[doc = ""]
+    #[doc = " If parsing takes longer than this, it will halt early, returning NULL."]
+    #[doc = " See `ts_parser_parse` for more information."]
     pub fn ts_parser_set_timeout_micros(self_: *mut TSParser, timeout: u64);
 }
 extern "C" {
@@ -226,10 +284,11 @@ extern "C" {
     pub fn ts_parser_timeout_micros(self_: *const TSParser) -> u64;
 }
 extern "C" {
-    #[doc = " Set the parser\'s current cancellation flag pointer. If a non-null pointer is"]
-    #[doc = " assigned, then the parser will periodically read from this pointer during"]
-    #[doc = " parsing. If it reads a non-zero value, it will halt early, returning NULL."]
-    #[doc = " See `ts_parser_parse` for more information."]
+    #[doc = " Set the parser\'s current cancellation flag pointer."]
+    #[doc = ""]
+    #[doc = " If a non-null pointer is assigned, then the parser will periodically read"]
+    #[doc = " from this pointer during parsing. If it reads a non-zero value, it will"]
+    #[doc = " halt early, returning NULL. See `ts_parser_parse` for more information."]
     pub fn ts_parser_set_cancellation_flag(self_: *mut TSParser, flag: *const usize);
 }
 extern "C" {
@@ -255,13 +314,6 @@ extern "C" {
     #[doc = " SVG output. You can turn off this logging by passing a negative number."]
     pub fn ts_parser_print_dot_graphs(self_: *mut TSParser, file: ::std::os::raw::c_int);
 }
-extern "C" {
-    #[doc = " Set whether or not the parser should halt immediately upon detecting an"]
-    #[doc = " error. This will generally result in a syntax tree with an error at the"]
-    #[doc = " root, and one or more partial syntax trees within the error. This behavior"]
-    #[doc = " may not be supported long-term."]
-    pub fn ts_parser_halt_on_error(self_: *mut TSParser, halt: bool);
-}
 extern "C" {
     #[doc = " Create a shallow copy of the syntax tree. This is very fast."]
     #[doc = ""]
@@ -290,21 +342,21 @@ extern "C" {
     pub fn ts_tree_edit(self_: *mut TSTree, edit: *const TSInputEdit);
 }
 extern "C" {
-    #[doc = " Compare a new syntax tree to a previous syntax tree representing the same"]
+    #[doc = " Compare an old edited syntax tree to a new syntax tree representing the same"]
     #[doc = " document, returning an array of ranges whose syntactic structure has changed."]
     #[doc = ""]
     #[doc = " For this to work correctly, the old syntax tree must have been edited such"]
     #[doc = " that its ranges match up to the new tree. Generally, you\'ll want to call"]
-    #[doc = " this function right after calling one of the `ts_parser_parse` functions,"]
-    #[doc = " passing in the new tree that was returned from `ts_parser_parse` and the old"]
-    #[doc = " tree that was passed as a parameter."]
+    #[doc = " this function right after calling one of the `ts_parser_parse` functions."]
+    #[doc = " You need to pass the old tree that was passed to parse, as well as the new"]
+    #[doc = " tree that was returned from that function."]
     #[doc = ""]
     #[doc = " The returned array is allocated using `malloc` and the caller is responsible"]
     #[doc = " for freeing it using `free`. The length of the array will be written to the"]
     #[doc = " given `length` pointer."]
     pub fn ts_tree_get_changed_ranges(
-        self_: *const TSTree,
         old_tree: *const TSTree,
+        new_tree: *const TSTree,
         length: *mut u32,
     ) -> *mut TSRange;
 }
@@ -361,8 +413,8 @@ extern "C" {
     pub fn ts_node_is_missing(arg1: TSNode) -> bool;
 }
 extern "C" {
-    #[doc = " Check if the node is *missing*. Missing nodes are inserted by the parser in"]
-    #[doc = " order to recover from certain kinds of syntax errors."]
+    #[doc = " Check if the node is *extra*. Extra nodes represent things like comments,"]
+    #[doc = " which are not required the grammar, but can appear anywhere."]
     pub fn ts_node_is_extra(arg1: TSNode) -> bool;
 }
 extern "C" {
@@ -483,7 +535,7 @@ extern "C" {
     pub fn ts_tree_cursor_delete(arg1: *mut TSTreeCursor);
 }
 extern "C" {
-    #[doc = " Re-initialize a tree cursor to start at a different ndoe."]
+    #[doc = " Re-initialize a tree cursor to start at a different node."]
     pub fn ts_tree_cursor_reset(arg1: *mut TSTreeCursor, arg2: TSNode);
 }
 extern "C" {
@@ -521,14 +573,14 @@ extern "C" {
     pub fn ts_tree_cursor_goto_next_sibling(arg1: *mut TSTreeCursor) -> bool;
 }
 extern "C" {
-    #[doc = " Move the cursor to the first schild of its current node."]
+    #[doc = " Move the cursor to the first child of its current node."]
     #[doc = ""]
     #[doc = " This returns `true` if the cursor successfully moved, and returns `false`"]
     #[doc = " if there were no children."]
     pub fn ts_tree_cursor_goto_first_child(arg1: *mut TSTreeCursor) -> bool;
 }
 extern "C" {
-    #[doc = " Move the cursor to the first schild of its current node that extends beyond"]
+    #[doc = " Move the cursor to the first child of its current node that extends beyond"]
     #[doc = " the given byte offset."]
     #[doc = ""]
     #[doc = " This returns the index of the child node if one was found, and returns -1"]
@@ -538,6 +590,165 @@ extern "C" {
 extern "C" {
     pub fn ts_tree_cursor_copy(arg1: *const TSTreeCursor) -> TSTreeCursor;
 }
+extern "C" {
+    #[doc = " Create a new query from a string containing one or more S-expression"]
+    #[doc = " patterns. The query is associated with a particular language, and can"]
+    #[doc = " only be run on syntax nodes parsed with that language."]
+    #[doc = ""]
+    #[doc = " If all of the given patterns are valid, this returns a `TSQuery`."]
+    #[doc = " If a pattern is invalid, this returns `NULL`, and provides two pieces"]
+    #[doc = " of information about the problem:"]
+    #[doc = " 1. The byte offset of the error is written to the `error_offset` parameter."]
+    #[doc = " 2. The type of error is written to the `error_type` parameter."]
+    pub fn ts_query_new(
+        language: *const TSLanguage,
+        source: *const ::std::os::raw::c_char,
+        source_len: u32,
+        error_offset: *mut u32,
+        error_type: *mut TSQueryError,
+    ) -> *mut TSQuery;
+}
+extern "C" {
+    #[doc = " Delete a query, freeing all of the memory that it used."]
+    pub fn ts_query_delete(arg1: *mut TSQuery);
+}
+extern "C" {
+    #[doc = " Get the number of patterns, captures, or string literals in the query."]
+    pub fn ts_query_pattern_count(arg1: *const TSQuery) -> u32;
+}
+extern "C" {
+    pub fn ts_query_capture_count(arg1: *const TSQuery) -> u32;
+}
+extern "C" {
+    pub fn ts_query_string_count(arg1: *const TSQuery) -> u32;
+}
+extern "C" {
+    #[doc = " Get the byte offset where the given pattern starts in the query\'s source."]
+    #[doc = ""]
+    #[doc = " This can be useful when combining queries by concatenating their source"]
+    #[doc = " code strings."]
+    pub fn ts_query_start_byte_for_pattern(arg1: *const TSQuery, arg2: u32) -> u32;
+}
+extern "C" {
+    #[doc = " Get all of the predicates for the given pattern in the query."]
+    #[doc = ""]
+    #[doc = " The predicates are represented as a single array of steps. There are three"]
+    #[doc = " types of steps in this array, which correspond to the three legal values for"]
+    #[doc = " the `type` field:"]
+    #[doc = " - `TSQueryPredicateStepTypeCapture` - Steps with this type represent names"]
+    #[doc = "    of captures. Their `value_id` can be used with the"]
+    #[doc = "   `ts_query_capture_name_for_id` function to obtain the name of the capture."]
+    #[doc = " - `TSQueryPredicateStepTypeString` - Steps with this type represent literal"]
+    #[doc = "    strings. Their `value_id` can be used with the"]
+    #[doc = "    `ts_query_string_value_for_id` function to obtain their string value."]
+    #[doc = " - `TSQueryPredicateStepTypeDone` - Steps with this type are *sentinels*"]
+    #[doc = "    that represent the end of an individual predicate. If a pattern has two"]
+    #[doc = "    predicates, then there will be two steps with this `type` in the array."]
+    pub fn ts_query_predicates_for_pattern(
+        self_: *const TSQuery,
+        pattern_index: u32,
+        length: *mut u32,
+    ) -> *const TSQueryPredicateStep;
+}
+extern "C" {
+    pub fn ts_query_step_is_definite(self_: *const TSQuery, byte_offset: u32) -> bool;
+}
+extern "C" {
+    #[doc = " Get the name and length of one of the query\'s captures, or one of the"]
+    #[doc = " query\'s string literals. Each capture and string is associated with a"]
+    #[doc = " numeric id based on the order that it appeared in the query\'s source."]
+    pub fn ts_query_capture_name_for_id(
+        arg1: *const TSQuery,
+        id: u32,
+        length: *mut u32,
+    ) -> *const ::std::os::raw::c_char;
+}
+extern "C" {
+    pub fn ts_query_string_value_for_id(
+        arg1: *const TSQuery,
+        id: u32,
+        length: *mut u32,
+    ) -> *const ::std::os::raw::c_char;
+}
+extern "C" {
+    #[doc = " Disable a certain capture within a query."]
+    #[doc = ""]
+    #[doc = " This prevents the capture from being returned in matches, and also avoids"]
+    #[doc = " any resource usage associated with recording the capture. Currently, there"]
+    #[doc = " is no way to undo this."]
+    pub fn ts_query_disable_capture(
+        arg1: *mut TSQuery,
+        arg2: *const ::std::os::raw::c_char,
+        arg3: u32,
+    );
+}
+extern "C" {
+    #[doc = " Disable a certain pattern within a query."]
+    #[doc = ""]
+    #[doc = " This prevents the pattern from matching and removes most of the overhead"]
+    #[doc = " associated with the pattern. Currently, there is no way to undo this."]
+    pub fn ts_query_disable_pattern(arg1: *mut TSQuery, arg2: u32);
+}
+extern "C" {
+    #[doc = " Create a new cursor for executing a given query."]
+    #[doc = ""]
+    #[doc = " The cursor stores the state that is needed to iteratively search"]
+    #[doc = " for matches. To use the query cursor, first call `ts_query_cursor_exec`"]
+    #[doc = " to start running a given query on a given syntax node. Then, there are"]
+    #[doc = " two options for consuming the results of the query:"]
+    #[doc = " 1. Repeatedly call `ts_query_cursor_next_match` to iterate over all of the"]
+    #[doc = "    the *matches* in the order that they were found. Each match contains the"]
+    #[doc = "    index of the pattern that matched, and an array of captures. Because"]
+    #[doc = "    multiple patterns can match the same set of nodes, one match may contain"]
+    #[doc = "    captures that appear *before* some of the captures from a previous match."]
+    #[doc = " 2. Repeatedly call `ts_query_cursor_next_capture` to iterate over all of the"]
+    #[doc = "    individual *captures* in the order that they appear. This is useful if"]
+    #[doc = "    don\'t care about which pattern matched, and just want a single ordered"]
+    #[doc = "    sequence of captures."]
+    #[doc = ""]
+    #[doc = " If you don\'t care about consuming all of the results, you can stop calling"]
+    #[doc = " `ts_query_cursor_next_match` or `ts_query_cursor_next_capture` at any point."]
+    #[doc = "  You can then start executing another query on another node by calling"]
+    #[doc = "  `ts_query_cursor_exec` again."]
+    pub fn ts_query_cursor_new() -> *mut TSQueryCursor;
+}
+extern "C" {
+    #[doc = " Delete a query cursor, freeing all of the memory that it used."]
+    pub fn ts_query_cursor_delete(arg1: *mut TSQueryCursor);
+}
+extern "C" {
+    #[doc = " Start running a given query on a given node."]
+    pub fn ts_query_cursor_exec(arg1: *mut TSQueryCursor, arg2: *const TSQuery, arg3: TSNode);
+}
+extern "C" {
+    #[doc = " Set the range of bytes or (row, column) positions in which the query"]
+    #[doc = " will be executed."]
+    pub fn ts_query_cursor_set_byte_range(arg1: *mut TSQueryCursor, arg2: u32, arg3: u32);
+}
+extern "C" {
+    pub fn ts_query_cursor_set_point_range(arg1: *mut TSQueryCursor, arg2: TSPoint, arg3: TSPoint);
+}
+extern "C" {
+    #[doc = " Advance to the next match of the currently running query."]
+    #[doc = ""]
+    #[doc = " If there is a match, write it to `*match` and return `true`."]
+    #[doc = " Otherwise, return `false`."]
+    pub fn ts_query_cursor_next_match(arg1: *mut TSQueryCursor, match_: *mut TSQueryMatch) -> bool;
+}
+extern "C" {
+    pub fn ts_query_cursor_remove_match(arg1: *mut TSQueryCursor, id: u32);
+}
+extern "C" {
+    #[doc = " Advance to the next capture of the currently running query."]
+    #[doc = ""]
+    #[doc = " If there is a capture, write its match to `*match` and its index within"]
+    #[doc = " the matche\'s capture list to `*capture_index`. Otherwise, return `false`."]
+    pub fn ts_query_cursor_next_capture(
+        arg1: *mut TSQueryCursor,
+        match_: *mut TSQueryMatch,
+        capture_index: *mut u32,
+    ) -> bool;
+}
 extern "C" {
     #[doc = " Get the number of distinct node types in the language."]
     pub fn ts_language_symbol_count(arg1: *const TSLanguage) -> u32;
@@ -552,8 +763,10 @@ extern "C" {
 extern "C" {
     #[doc = " Get the numerical id for the given node type string."]
     pub fn ts_language_symbol_for_name(
-        arg1: *const TSLanguage,
-        arg2: *const ::std::os::raw::c_char,
+        self_: *const TSLanguage,
+        string: *const ::std::os::raw::c_char,
+        length: u32,
+        is_named: bool,
     ) -> TSSymbol;
 }
 extern "C" {
@@ -591,5 +804,5 @@ extern "C" {
     pub fn ts_language_version(arg1: *const TSLanguage) -> u32;
 }
 
-pub const TREE_SITTER_LANGUAGE_VERSION: usize = 11;
+pub const TREE_SITTER_LANGUAGE_VERSION: usize = 12;
 pub const TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION: usize = 9;
diff --git a/lib/binding_rust/build.rs b/lib/binding_rust/build.rs
index b1e0fd51..0ec7a4ad 100644
--- a/lib/binding_rust/build.rs
+++ b/lib/binding_rust/build.rs
@@ -1,7 +1,7 @@
 extern crate cc;
 
-use std::{env, fs};
 use std::path::{Path, PathBuf};
+use std::{env, fs};
 
 fn main() {
     println!("cargo:rerun-if-env-changed=TREE_SITTER_STATIC_ANALYSIS");
@@ -21,13 +21,12 @@ fn main() {
 
     let mut config = cc::Build::new();
 
-    println!("cargo:rerun-if-env-changed=TREE_SITTER_TEST");
-    if env::var("TREE_SITTER_TEST").is_ok() {
+    println!("cargo:rerun-if-env-changed=PROFILE");
+    if env::var("PROFILE").map_or(false, |s| s == "debug") {
         config.define("TREE_SITTER_TEST", "");
     }
 
     let src_path = Path::new("src");
-
     for entry in fs::read_dir(&src_path).unwrap() {
         let entry = entry.unwrap();
         let path = src_path.join(entry.file_name());
@@ -37,10 +36,9 @@ fn main() {
     config
         .flag_if_supported("-std=c99")
         .flag_if_supported("-Wno-unused-parameter")
+        .include(src_path)
         .include("include")
-        .include("utf8proc")
         .file(src_path.join("lib.c"))
-        .file(Path::new("binding_rust").join("helper.c"))
         .compile("tree-sitter");
 }
 
diff --git a/lib/binding_rust/helper.c b/lib/binding_rust/helper.c
deleted file mode 100644
index 4275e445..00000000
--- a/lib/binding_rust/helper.c
+++ /dev/null
@@ -1,17 +0,0 @@
-#if defined(TREE_SITTER_TEST)
-
-void ts_record_free(void *);
-
-void rust_tree_sitter_free(void *p) {
-  ts_record_free(p);
-}
-
-#else
-
-void free(void *);
-
-void rust_tree_sitter_free(void *p) {
-  free(p);
-}
-
-#endif
diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs
index 4c34d202..0b0097f9 100644
--- a/lib/binding_rust/lib.rs
+++ b/lib/binding_rust/lib.rs
@@ -1,51 +1,53 @@
 mod ffi;
 mod util;
 
-#[macro_use]
-extern crate serde_derive;
-extern crate regex;
-extern crate serde;
-extern crate serde_json;
-
 #[cfg(unix)]
 use std::os::unix::io::AsRawFd;
 
-use regex::Regex;
-use serde::de::DeserializeOwned;
-use std::collections::HashMap;
 use std::ffi::CStr;
 use std::marker::PhantomData;
+use std::mem::MaybeUninit;
 use std::os::raw::{c_char, c_void};
+use std::ptr::NonNull;
 use std::sync::atomic::AtomicUsize;
-use std::{fmt, ptr, str, u16};
+use std::{char, fmt, hash, iter, ptr, slice, str, u16};
 
+/// The latest ABI version that is supported by the current version of the
+/// library.
+///
+/// When Languages are generated by the Tree-sitter CLI, they are
+/// assigned an ABI version number that corresponds to the current CLI version.
+/// The Tree-sitter library is generally backwards-compatible with languages
+/// generated using older CLI versions, but is not forwards-compatible.
 pub const LANGUAGE_VERSION: usize = ffi::TREE_SITTER_LANGUAGE_VERSION;
+
+/// The earliest ABI version that is supported by the current version of the
+/// library.
+pub const MIN_COMPATIBLE_LANGUAGE_VERSION: usize = ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION;
+
 pub const PARSER_HEADER: &'static str = include_str!("../include/tree_sitter/parser.h");
 
-#[derive(Clone, Copy, PartialEq, Eq)]
+/// An opaque object that defines how to parse a particular language. The code for each
+/// `Language` is generated by the Tree-sitter CLI.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
 #[repr(transparent)]
 pub struct Language(*const ffi::TSLanguage);
 
-#[derive(Debug, PartialEq, Eq)]
-pub struct LanguageError {
-    version: usize,
-}
+/// A tree that represents the syntactic structure of a source code file.
+pub struct Tree(NonNull<ffi::TSTree>);
 
-#[derive(Debug, PartialEq, Eq)]
-pub enum LogType {
-    Parse,
-    Lex,
-}
-
-type Logger<'a> = Box<dyn FnMut(LogType, &str) + 'a>;
-
-#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
+/// A position in a multi-line text document, in terms of rows and columns.
+///
+/// Rows and columns are zero-based.
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub struct Point {
     pub row: usize,
     pub column: usize,
 }
 
-#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
+/// A range of positions in a multi-line text document, both in terms of bytes and of
+/// rows and columns.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub struct Range {
     pub start_byte: usize,
     pub end_byte: usize,
@@ -53,6 +55,7 @@ pub struct Range {
     pub end_point: Point,
 }
 
+/// A summary of a change to a text document.
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub struct InputEdit {
     pub start_byte: usize,
@@ -63,108 +66,191 @@ pub struct InputEdit {
     pub new_end_position: Point,
 }
 
-struct PropertyTransition {
-    state_id: u16,
-    child_index: Option<u16>,
-    text_regex_index: Option<u16>,
-    node_kind_id: Option<u16>,
+/// A single node within a syntax `Tree`.
+#[derive(Clone, Copy)]
+#[repr(transparent)]
+pub struct Node<'a>(ffi::TSNode, PhantomData<&'a ()>);
+
+/// A stateful object that this is used to produce a `Tree` based on some source code.
+pub struct Parser(NonNull<ffi::TSParser>);
+
+/// A type of log message.
+#[derive(Debug, PartialEq, Eq)]
+pub enum LogType {
+    Parse,
+    Lex,
 }
 
-struct PropertyState {
-    field_transitions: HashMap<u16, Vec<PropertyTransition>>,
-    kind_transitions: HashMap<u16, Vec<PropertyTransition>>,
-    property_set_id: usize,
-    default_next_state_id: usize,
+/// A callback that receives log messages during parser.
+type Logger<'a> = Box<dyn FnMut(LogType, &str) + 'a>;
+
+/// A stateful object for walking a syntax `Tree` efficiently.
+pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>);
+
+/// A set of patterns that match nodes in a syntax tree.
+#[derive(Debug)]
+pub struct Query {
+    ptr: NonNull<ffi::TSQuery>,
+    capture_names: Vec<String>,
+    text_predicates: Vec<Box<[TextPredicate]>>,
+    property_settings: Vec<Box<[QueryProperty]>>,
+    property_predicates: Vec<Box<[(QueryProperty, bool)]>>,
+    general_predicates: Vec<Box<[QueryPredicate]>>,
+}
+
+/// A stateful object for executing a `Query` on a syntax `Tree`.
+pub struct QueryCursor(NonNull<ffi::TSQueryCursor>);
+
+/// A key-value pair associated with a particular pattern in a `Query`.
+#[derive(Debug, PartialEq, Eq)]
+pub struct QueryProperty {
+    pub key: Box<str>,
+    pub value: Option<Box<str>>,
+    pub capture_id: Option<usize>,
+}
+
+#[derive(Debug, PartialEq, Eq)]
+pub enum QueryPredicateArg {
+    Capture(u32),
+    String(Box<str>),
+}
+
+/// A key-value pair associated with a particular pattern in a `Query`.
+#[derive(Debug, PartialEq, Eq)]
+pub struct QueryPredicate {
+    pub operator: Box<str>,
+    pub args: Vec<QueryPredicateArg>,
+}
+
+/// A match of a `Query` to a particular set of `Node`s.
+pub struct QueryMatch<'a> {
+    pub pattern_index: usize,
+    pub captures: &'a [QueryCapture<'a>],
+    id: u32,
+    cursor: *mut ffi::TSQueryCursor,
+}
+
+/// A sequence of `QueryCapture`s within a `QueryMatch`.
+pub struct QueryCaptures<'a, T: AsRef<[u8]>> {
+    ptr: *mut ffi::TSQueryCursor,
+    query: &'a Query,
+    text_callback: Box<dyn FnMut(Node<'a>) -> T + 'a>,
+}
+
+/// A particular `Node` that has been captured with a particular name within a `Query`.
+#[derive(Clone, Copy, Debug)]
+#[repr(C)]
+pub struct QueryCapture<'a> {
+    pub node: Node<'a>,
+    pub index: u32,
+}
+
+/// An error that occurred when trying to assign an incompatible `Language` to a `Parser`.
+#[derive(Debug, PartialEq, Eq)]
+pub struct LanguageError {
+    version: usize,
+}
+
+/// An error that occurred in `Parser::set_included_ranges`.
+#[derive(Debug, PartialEq, Eq)]
+pub struct IncludedRangesError(pub usize);
+
+/// An error that occurred when trying to create a `Query`.
+#[derive(Debug, PartialEq, Eq)]
+pub struct QueryError {
+    pub row: usize,
+    pub column: usize,
+    pub offset: usize,
+    pub message: String,
+    pub kind: QueryErrorKind,
+}
+
+#[derive(Debug, PartialEq, Eq)]
+pub enum QueryErrorKind {
+    Syntax,
+    NodeType,
+    Field,
+    Capture,
+    Predicate,
+    Structure,
 }
 
 #[derive(Debug)]
-pub enum PropertySheetError {
-    InvalidJSON(serde_json::Error),
-    InvalidRegex(regex::Error),
+enum TextPredicate {
+    CaptureEqString(u32, String, bool),
+    CaptureEqCapture(u32, u32, bool),
+    CaptureMatchString(u32, regex::bytes::Regex, bool),
 }
 
-pub struct PropertySheet<P = HashMap<String, String>> {
-    states: Vec<PropertyState>,
-    property_sets: Vec<P>,
-    text_regexes: Vec<Regex>,
-}
-
-#[derive(Clone, Debug, Deserialize, Serialize, Hash, PartialEq, Eq)]
-pub struct PropertyTransitionJSON {
-    #[serde(rename = "type")]
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub kind: Option<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub named: Option<bool>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub index: Option<usize>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub field: Option<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub text: Option<String>,
-    pub state_id: usize,
-}
-
-#[derive(Debug, Default, Deserialize, Serialize, PartialEq, Eq)]
-pub struct PropertyStateJSON {
-    pub id: Option<usize>,
-    pub property_set_id: usize,
-    pub transitions: Vec<PropertyTransitionJSON>,
-    pub default_next_state_id: usize,
-}
-
-#[derive(Debug, Deserialize, Serialize)]
-pub struct PropertySheetJSON<P> {
-    pub states: Vec<PropertyStateJSON>,
-    pub property_sets: Vec<P>,
-}
-
-#[derive(Clone, Copy)]
-pub struct Node<'a>(ffi::TSNode, PhantomData<&'a ()>);
-
-pub struct Parser(*mut ffi::TSParser);
-
-pub struct Tree(*mut ffi::TSTree);
-
-pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>);
-
-pub struct TreePropertyCursor<'a, P> {
-    cursor: TreeCursor<'a>,
-    state_stack: Vec<usize>,
-    child_index_stack: Vec<usize>,
-    property_sheet: &'a PropertySheet<P>,
-    source: &'a [u8],
+// TODO: Remove this struct at at some point. If `core::str::lossy::Utf8Lossy`
+// is ever stabilized.
+pub struct LossyUtf8<'a> {
+    bytes: &'a [u8],
+    in_replacement: bool,
 }
 
 impl Language {
+    /// Get the ABI version number that indicates which version of the Tree-sitter CLI
+    /// that was used to generate this `Language`.
     pub fn version(&self) -> usize {
         unsafe { ffi::ts_language_version(self.0) as usize }
     }
 
+    /// Get the number of distinct node types in this language.
     pub fn node_kind_count(&self) -> usize {
         unsafe { ffi::ts_language_symbol_count(self.0) as usize }
     }
 
-    pub fn node_kind_for_id(&self, id: u16) -> &'static str {
-        unsafe { CStr::from_ptr(ffi::ts_language_symbol_name(self.0, id)) }
-            .to_str()
-            .unwrap()
+    /// Get the name of the node kind for the given numerical id.
+    pub fn node_kind_for_id(&self, id: u16) -> Option<&'static str> {
+        let ptr = unsafe { ffi::ts_language_symbol_name(self.0, id) };
+        if ptr.is_null() {
+            None
+        } else {
+            Some(unsafe { CStr::from_ptr(ptr) }.to_str().unwrap())
+        }
     }
 
+    /// Get the numeric id for the given node kind.
+    pub fn id_for_node_kind(&self, kind: &str, named: bool) -> u16 {
+        unsafe {
+            ffi::ts_language_symbol_for_name(
+                self.0,
+                kind.as_bytes().as_ptr() as *const c_char,
+                kind.len() as u32,
+                named,
+            )
+        }
+    }
+
+    /// Check if the node type for the given numerical id is named (as opposed
+    /// to an anonymous node type).
     pub fn node_kind_is_named(&self, id: u16) -> bool {
         unsafe { ffi::ts_language_symbol_type(self.0, id) == ffi::TSSymbolType_TSSymbolTypeRegular }
     }
 
+    pub fn node_kind_is_visible(&self, id: u16) -> bool {
+        unsafe {
+            ffi::ts_language_symbol_type(self.0, id) <= ffi::TSSymbolType_TSSymbolTypeAnonymous
+        }
+    }
+
+    /// Get the number of distinct field names in this language.
     pub fn field_count(&self) -> usize {
         unsafe { ffi::ts_language_field_count(self.0) as usize }
     }
 
-    pub fn field_name_for_id(&self, field_id: u16) -> &'static str {
-        unsafe { CStr::from_ptr(ffi::ts_language_field_name_for_id(self.0, field_id)) }
-            .to_str()
-            .unwrap()
+    /// Get the field names for the given numerical id.
+    pub fn field_name_for_id(&self, field_id: u16) -> Option<&'static str> {
+        let ptr = unsafe { ffi::ts_language_field_name_for_id(self.0, field_id) };
+        if ptr.is_null() {
+            None
+        } else {
+            Some(unsafe { CStr::from_ptr(ptr) }.to_str().unwrap())
+        }
     }
 
+    /// Get the numerical id for the given field name.
     pub fn field_id_for_name(&self, field_name: impl AsRef<[u8]>) -> Option<u16> {
         let field_name = field_name.as_ref();
         let id = unsafe {
@@ -187,41 +273,43 @@ impl fmt::Display for LanguageError {
         write!(
             f,
             "Incompatible language version {}. Expected minimum {}, maximum {}",
-            self.version,
-            ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION,
-            ffi::TREE_SITTER_LANGUAGE_VERSION
+            self.version, MIN_COMPATIBLE_LANGUAGE_VERSION, LANGUAGE_VERSION,
         )
     }
 }
 
-unsafe impl Send for Language {}
-
-unsafe impl Sync for Language {}
-
 impl Parser {
+    /// Create a new parser.
     pub fn new() -> Parser {
         unsafe {
             let parser = ffi::ts_parser_new();
-            Parser(parser)
+            Parser(NonNull::new_unchecked(parser))
         }
     }
 
+    /// Set the language that the parser should use for parsing.
+    ///
+    /// Returns a Result indicating whether or not the language was successfully
+    /// assigned. True means assignment succeeded. False means there was a version
+    /// mismatch: the language was generated with an incompatible version of the
+    /// Tree-sitter CLI. Check the language's version using [Language::version]
+    /// and compare it to this library's [LANGUAGE_VERSION](LANGUAGE_VERSION) and
+    /// [MIN_COMPATIBLE_LANGUAGE_VERSION](MIN_COMPATIBLE_LANGUAGE_VERSION) constants.
     pub fn set_language(&mut self, language: Language) -> Result<(), LanguageError> {
         let version = language.version();
-        if version < ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
-            || version > ffi::TREE_SITTER_LANGUAGE_VERSION
-        {
+        if version < MIN_COMPATIBLE_LANGUAGE_VERSION || version > LANGUAGE_VERSION {
             Err(LanguageError { version })
         } else {
             unsafe {
-                ffi::ts_parser_set_language(self.0, language.0);
+                ffi::ts_parser_set_language(self.0.as_ptr(), language.0);
             }
             Ok(())
         }
     }
 
+    /// Get the parser's current language.
     pub fn language(&self) -> Option<Language> {
-        let ptr = unsafe { ffi::ts_parser_language(self.0) };
+        let ptr = unsafe { ffi::ts_parser_language(self.0.as_ptr()) };
         if ptr.is_null() {
             None
         } else {
@@ -229,15 +317,17 @@ impl Parser {
         }
     }
 
+    /// Get the parser's current logger.
     pub fn logger(&self) -> Option<&Logger> {
-        let logger = unsafe { ffi::ts_parser_logger(self.0) };
+        let logger = unsafe { ffi::ts_parser_logger(self.0.as_ptr()) };
         unsafe { (logger.payload as *mut Logger).as_ref() }
     }
 
+    /// Set the logging callback that a parser should use during parsing.
     pub fn set_logger(&mut self, logger: Option<Logger>) {
-        let prev_logger = unsafe { ffi::ts_parser_logger(self.0) };
+        let prev_logger = unsafe { ffi::ts_parser_logger(self.0.as_ptr()) };
         if !prev_logger.payload.is_null() {
-            unsafe { Box::from_raw(prev_logger.payload as *mut Logger) };
+            drop(unsafe { Box::from_raw(prev_logger.payload as *mut Logger) });
         }
 
         let c_logger;
@@ -273,17 +363,22 @@ impl Parser {
             };
         }
 
-        unsafe { ffi::ts_parser_set_logger(self.0, c_logger) };
+        unsafe { ffi::ts_parser_set_logger(self.0.as_ptr(), c_logger) };
     }
 
+    /// Set the destination to which the parser should write debugging graphs
+    /// during parsing. The graphs are formatted in the DOT language. You may want
+    /// to pipe these graphs directly to a `dot(1)` process in order to generate
+    /// SVG output.
     #[cfg(unix)]
     pub fn print_dot_graphs(&mut self, file: &impl AsRawFd) {
         let fd = file.as_raw_fd();
-        unsafe { ffi::ts_parser_print_dot_graphs(self.0, ffi::dup(fd)) }
+        unsafe { ffi::ts_parser_print_dot_graphs(self.0.as_ptr(), ffi::dup(fd)) }
     }
 
+    /// Stop the parser from printing debugging graphs while parsing.
     pub fn stop_printing_dot_graphs(&mut self) {
-        unsafe { ffi::ts_parser_print_dot_graphs(self.0, -1) }
+        unsafe { ffi::ts_parser_print_dot_graphs(self.0.as_ptr(), -1) }
     }
 
     /// Parse a slice of UTF8 text.
@@ -308,7 +403,7 @@ impl Parser {
         )
     }
 
-    /// Parse a slice UTF16 text.
+    /// Parse a slice of UTF16 text.
     ///
     /// # Arguments:
     /// * `text` The UTF16-encoded text to parse.
@@ -372,12 +467,10 @@ impl Parser {
             encoding: ffi::TSInputEncoding_TSInputEncodingUTF8,
         };
 
-        let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0);
-        let c_new_tree = unsafe { ffi::ts_parser_parse(self.0, c_old_tree, c_input) };
-        if c_new_tree.is_null() {
-            None
-        } else {
-            Some(Tree(c_new_tree))
+        let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0.as_ptr());
+        unsafe {
+            let c_new_tree = ffi::ts_parser_parse(self.0.as_ptr(), c_old_tree, c_input);
+            NonNull::new(c_new_tree).map(Tree)
         }
     }
 
@@ -430,47 +523,100 @@ impl Parser {
             encoding: ffi::TSInputEncoding_TSInputEncodingUTF16,
         };
 
-        let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0);
-        let c_new_tree = unsafe { ffi::ts_parser_parse(self.0, c_old_tree, c_input) };
-        if c_new_tree.is_null() {
-            None
-        } else {
-            Some(Tree(c_new_tree))
+        let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0.as_ptr());
+        unsafe {
+            let c_new_tree = ffi::ts_parser_parse(self.0.as_ptr(), c_old_tree, c_input);
+            NonNull::new(c_new_tree).map(Tree)
         }
     }
 
+    /// Instruct the parser to start the next parse from the beginning.
+    ///
+    /// If the parser previously failed because of a timeout or a cancellation, then
+    /// by default, it will resume where it left off on the next call to `parse` or
+    /// other parsing functions. If you don't want to resume, and instead intend to
+    /// use this parser to parse some other document, you must call `reset` first.
     pub fn reset(&mut self) {
-        unsafe { ffi::ts_parser_reset(self.0) }
+        unsafe { ffi::ts_parser_reset(self.0.as_ptr()) }
     }
 
+    /// Get the duration in microseconds that parsing is allowed to take.
+    ///
+    /// This is set via [set_timeout_micros](Parser::set_timeout_micros).
     pub fn timeout_micros(&self) -> u64 {
-        unsafe { ffi::ts_parser_timeout_micros(self.0) }
+        unsafe { ffi::ts_parser_timeout_micros(self.0.as_ptr()) }
     }
 
+    /// Set the maximum duration in microseconds that parsing should be allowed to
+    /// take before halting.
+    ///
+    /// If parsing takes longer than this, it will halt early, returning `None`.
+    /// See `parse` for more information.
     pub fn set_timeout_micros(&mut self, timeout_micros: u64) {
-        unsafe { ffi::ts_parser_set_timeout_micros(self.0, timeout_micros) }
+        unsafe { ffi::ts_parser_set_timeout_micros(self.0.as_ptr(), timeout_micros) }
     }
 
-    pub fn set_included_ranges(&mut self, ranges: &[Range]) {
+    /// Set the ranges of text that the parser should include when parsing.
+    ///
+    /// By default, the parser will always include entire documents. This function
+    /// allows you to parse only a *portion* of a document but still return a syntax
+    /// tree whose ranges match up with the document as a whole. You can also pass
+    /// multiple disjoint ranges.
+    ///
+    /// If `ranges` is empty, then the entire document will be parsed. Otherwise,
+    /// the given ranges must be ordered from earliest to latest in the document,
+    /// and they must not overlap. That is, the following must hold for all
+    /// `i` < `length - 1`:
+    /// ```text
+    ///     ranges[i].end_byte <= ranges[i + 1].start_byte
+    /// ```
+    /// If this requirement is not satisfied, method will panic.
+    pub fn set_included_ranges<'a>(
+        &mut self,
+        ranges: &'a [Range],
+    ) -> Result<(), IncludedRangesError> {
         let ts_ranges: Vec<ffi::TSRange> =
             ranges.iter().cloned().map(|range| range.into()).collect();
-        unsafe {
-            ffi::ts_parser_set_included_ranges(self.0, ts_ranges.as_ptr(), ts_ranges.len() as u32)
+        let result = unsafe {
+            ffi::ts_parser_set_included_ranges(
+                self.0.as_ptr(),
+                ts_ranges.as_ptr(),
+                ts_ranges.len() as u32,
+            )
         };
+
+        if result {
+            Ok(())
+        } else {
+            let mut prev_end_byte = 0;
+            for (i, range) in ranges.iter().enumerate() {
+                if range.start_byte < prev_end_byte || range.end_byte < range.start_byte {
+                    return Err(IncludedRangesError(i));
+                }
+                prev_end_byte = range.end_byte;
+            }
+            Err(IncludedRangesError(0))
+        }
     }
 
+    /// Get the parser's current cancellation flag pointer.
     pub unsafe fn cancellation_flag(&self) -> Option<&AtomicUsize> {
-        (ffi::ts_parser_cancellation_flag(self.0) as *const AtomicUsize).as_ref()
+        (ffi::ts_parser_cancellation_flag(self.0.as_ptr()) as *const AtomicUsize).as_ref()
     }
 
+    /// Set the parser's current cancellation flag pointer.
+    ///
+    /// If a pointer is assigned, then the parser will periodically read from
+    /// this pointer during parsing. If it reads a non-zero value, it will halt early,
+    /// returning `None`. See [parse](Parser::parse) for more information.
     pub unsafe fn set_cancellation_flag(&self, flag: Option<&AtomicUsize>) {
         if let Some(flag) = flag {
             ffi::ts_parser_set_cancellation_flag(
-                self.0,
+                self.0.as_ptr(),
                 flag as *const AtomicUsize as *const usize,
             );
         } else {
-            ffi::ts_parser_set_cancellation_flag(self.0, ptr::null());
+            ffi::ts_parser_set_cancellation_flag(self.0.as_ptr(), ptr::null());
         }
     }
 }
@@ -479,50 +625,56 @@ impl Drop for Parser {
     fn drop(&mut self) {
         self.stop_printing_dot_graphs();
         self.set_logger(None);
-        unsafe { ffi::ts_parser_delete(self.0) }
+        unsafe { ffi::ts_parser_delete(self.0.as_ptr()) }
     }
 }
 
-unsafe impl Send for Parser {}
-
 impl Tree {
+    /// Get the root node of the syntax tree.
     pub fn root_node(&self) -> Node {
-        Node::new(unsafe { ffi::ts_tree_root_node(self.0) }).unwrap()
+        Node::new(unsafe { ffi::ts_tree_root_node(self.0.as_ptr()) }).unwrap()
     }
 
+    /// Get the language that was used to parse the syntax tree.
     pub fn language(&self) -> Language {
-        Language(unsafe { ffi::ts_tree_language(self.0) })
+        Language(unsafe { ffi::ts_tree_language(self.0.as_ptr()) })
     }
 
+    /// Edit the syntax tree to keep it in sync with source code that has been
+    /// edited.
+    ///
+    /// You must describe the edit both in terms of byte offsets and in terms of
+    /// row/column coordinates.
     pub fn edit(&mut self, edit: &InputEdit) {
         let edit = edit.into();
-        unsafe { ffi::ts_tree_edit(self.0, &edit) };
+        unsafe { ffi::ts_tree_edit(self.0.as_ptr(), &edit) };
     }
 
+    /// Create a new [TreeCursor] starting from the root of the tree.
     pub fn walk(&self) -> TreeCursor {
         self.root_node().walk()
     }
 
-    pub fn walk_with_properties<'a, P>(
-        &'a self,
-        property_sheet: &'a PropertySheet<P>,
-        source: &'a [u8],
-    ) -> TreePropertyCursor<'a, P> {
-        TreePropertyCursor::new(self, property_sheet, source)
-    }
-
+    /// Compare this old edited syntax tree to a new syntax tree representing the same
+    /// document, returning a sequence of ranges whose syntactic structure has changed.
+    ///
+    /// For this to work correctly, this syntax tree must have been edited such that its
+    /// ranges match up to the new tree. Generally, you'll want to call this method right
+    /// after calling one of the [Parser::parse] functions. Call it on the old tree that
+    /// was passed to parse, and pass the new tree that was returned from `parse`.
     pub fn changed_ranges(&self, other: &Tree) -> impl ExactSizeIterator<Item = Range> {
         let mut count = 0;
         unsafe {
-            let ptr =
-                ffi::ts_tree_get_changed_ranges(self.0, other.0, &mut count as *mut _ as *mut u32);
+            let ptr = ffi::ts_tree_get_changed_ranges(
+                self.0.as_ptr(),
+                other.0.as_ptr(),
+                &mut count as *mut _ as *mut u32,
+            );
             util::CBufferIter::new(ptr, count).map(|r| r.into())
         }
     }
 }
 
-unsafe impl Send for Tree {}
-
 impl fmt::Debug for Tree {
     fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
         write!(f, "{{Tree {:?}}}", self.root_node())
@@ -531,13 +683,13 @@ impl fmt::Debug for Tree {
 
 impl Drop for Tree {
     fn drop(&mut self) {
-        unsafe { ffi::ts_tree_delete(self.0) }
+        unsafe { ffi::ts_tree_delete(self.0.as_ptr()) }
     }
 }
 
 impl Clone for Tree {
     fn clone(&self) -> Tree {
-        unsafe { Tree(ffi::ts_tree_copy(self.0)) }
+        unsafe { Tree(NonNull::new_unchecked(ffi::ts_tree_copy(self.0.as_ptr()))) }
     }
 }
 
@@ -550,48 +702,93 @@ impl<'tree> Node<'tree> {
         }
     }
 
+    /// Get a numeric id for this node that is unique.
+    ///
+    /// Within a given syntax tree, no two nodes have the same id. However, if
+    /// a new tree is created based on an older tree, and a node from the old
+    /// tree is reused in the process, then that node will have the same id in
+    /// both trees.
+    pub fn id(&self) -> usize {
+        self.0.id as usize
+    }
+
+    /// Get this node's type as a numerical id.
     pub fn kind_id(&self) -> u16 {
         unsafe { ffi::ts_node_symbol(self.0) }
     }
 
+    /// Get this node's type as a string.
     pub fn kind(&self) -> &'static str {
         unsafe { CStr::from_ptr(ffi::ts_node_type(self.0)) }
             .to_str()
             .unwrap()
     }
 
+    /// Get the [Language] that was used to parse this node's syntax tree.
+    pub fn language(&self) -> Language {
+        Language(unsafe { ffi::ts_tree_language(self.0.tree) })
+    }
+
+    /// Check if this node is *named*.
+    ///
+    /// Named nodes correspond to named rules in the grammar, whereas *anonymous* nodes
+    /// correspond to string literals in the grammar.
     pub fn is_named(&self) -> bool {
         unsafe { ffi::ts_node_is_named(self.0) }
     }
 
+    /// Check if this node is *extra*.
+    ///
+    /// Extra nodes represent things like comments, which are not required the grammar,
+    /// but can appear anywhere.
     pub fn is_extra(&self) -> bool {
         unsafe { ffi::ts_node_is_extra(self.0) }
     }
 
+    /// Check if this node has been edited.
     pub fn has_changes(&self) -> bool {
         unsafe { ffi::ts_node_has_changes(self.0) }
     }
 
+    /// Check if this node represents a syntax error or contains any syntax errors anywhere
+    /// within it.
     pub fn has_error(&self) -> bool {
         unsafe { ffi::ts_node_has_error(self.0) }
     }
 
+    /// Check if this node represents a syntax error.
+    ///
+    /// Syntax errors represent parts of the code that could not be incorporated into a
+    /// valid syntax tree.
     pub fn is_error(&self) -> bool {
         self.kind_id() == u16::MAX
     }
 
+    /// Check if this node is *missing*.
+    ///
+    /// Missing nodes are inserted by the parser in order to recover from certain kinds of
+    /// syntax errors.
     pub fn is_missing(&self) -> bool {
         unsafe { ffi::ts_node_is_missing(self.0) }
     }
 
+    /// Get the byte offsets where this node starts.
     pub fn start_byte(&self) -> usize {
         unsafe { ffi::ts_node_start_byte(self.0) as usize }
     }
 
+    /// Get the byte offsets where this node end.
     pub fn end_byte(&self) -> usize {
         unsafe { ffi::ts_node_end_byte(self.0) as usize }
     }
 
+    /// Get the byte range of source code that this node represents.
+    pub fn byte_range(&self) -> std::ops::Range<usize> {
+        self.start_byte()..self.end_byte()
+    }
+
+    /// Get the range of source code that this node represents, both in terms of raw bytes
+    /// and of row/column coordinates.
     pub fn range(&self) -> Range {
         Range {
             start_byte: self.start_byte(),
@@ -601,20 +798,54 @@ impl<'tree> Node<'tree> {
         }
     }
 
+    /// Get this node's start position in terms of rows and columns.
     pub fn start_position(&self) -> Point {
         let result = unsafe { ffi::ts_node_start_point(self.0) };
         result.into()
     }
 
+    /// Get this node's end position in terms of rows and columns.
     pub fn end_position(&self) -> Point {
         let result = unsafe { ffi::ts_node_end_point(self.0) };
         result.into()
     }
 
+    /// Get the node's child at the given index, where zero represents the first
+    /// child.
+    ///
+    /// This method is fairly fast, but its cost is technically log(i), so you
+    /// if you might be iterating over a long list of children, you should use
+    /// [Node::children] instead.
     pub fn child(&self, i: usize) -> Option<Self> {
         Self::new(unsafe { ffi::ts_node_child(self.0, i as u32) })
     }
 
+    /// Get this node's number of children.
+    pub fn child_count(&self) -> usize {
+        unsafe { ffi::ts_node_child_count(self.0) as usize }
+    }
+
+    /// Get this node's *named* child at the given index.
+    ///
+    /// See also [Node::is_named].
+    /// This method is fairly fast, but its cost is technically log(i), so you
+    /// if you might be iterating over a long list of children, you should use
+    /// [Node::named_children] instead.
+    pub fn named_child<'a>(&'a self, i: usize) -> Option<Self> {
+        Self::new(unsafe { ffi::ts_node_named_child(self.0, i as u32) })
+    }
+
+    /// Get this node's number of *named* children.
+    ///
+    /// See also [Node::is_named].
+    pub fn named_child_count(&self) -> usize {
+        unsafe { ffi::ts_node_named_child_count(self.0) as usize }
+    }
+
+    /// Get the first child with the given field name.
+    ///
+    /// If multiple children may have the same field name, access them using
+    /// [children_by_field_name](Node::children_by_field_name)
     pub fn child_by_field_name(&self, field_name: impl AsRef<[u8]>) -> Option<Self> {
         let field_name = field_name.as_ref();
         Self::new(unsafe {
@@ -626,67 +857,144 @@ impl<'tree> Node<'tree> {
         })
     }
 
+    /// Get this node's child with the given numerical field id.
+    ///
+    /// See also [child_by_field_name](Node::child_by_field_name). You can convert a field name to
+    /// an id using [Language::field_id_for_name].
     pub fn child_by_field_id(&self, field_id: u16) -> Option<Self> {
         Self::new(unsafe { ffi::ts_node_child_by_field_id(self.0, field_id) })
     }
 
-    pub fn child_count(&self) -> usize {
-        unsafe { ffi::ts_node_child_count(self.0) as usize }
+    /// Iterate over this node's children.
+    ///
+    /// A [TreeCursor] is used to retrieve the children efficiently. Obtain
+    /// a [TreeCursor] by calling [Tree::walk] or [Node::walk]. To avoid unnecessary
+    /// allocations, you should reuse the same cursor for subsequent calls to
+    /// this method.
+    ///
+    /// If you're walking the tree recursively, you may want to use the `TreeCursor`
+    /// APIs directly instead.
+    pub fn children<'a>(
+        &self,
+        cursor: &'a mut TreeCursor<'tree>,
+    ) -> impl ExactSizeIterator<Item = Node<'tree>> + 'a {
+        cursor.reset(*self);
+        cursor.goto_first_child();
+        (0..self.child_count()).into_iter().map(move |_| {
+            let result = cursor.node();
+            cursor.goto_next_sibling();
+            result
+        })
     }
 
-    pub fn children(&self) -> impl ExactSizeIterator<Item = Node<'tree>> {
-        let me = self.clone();
-        (0..self.child_count())
-            .into_iter()
-            .map(move |i| me.child(i).unwrap())
+    /// Iterate over this node's named children.
+    ///
+    /// See also [Node::children].
+    pub fn named_children<'a>(
+        &self,
+        cursor: &'a mut TreeCursor<'tree>,
+    ) -> impl ExactSizeIterator<Item = Node<'tree>> + 'a {
+        cursor.reset(*self);
+        cursor.goto_first_child();
+        (0..self.named_child_count()).into_iter().map(move |_| {
+            while !cursor.node().is_named() {
+                if !cursor.goto_next_sibling() {
+                    break;
+                }
+            }
+            let result = cursor.node();
+            cursor.goto_next_sibling();
+            result
+        })
     }
 
-    pub fn named_child<'a>(&'a self, i: usize) -> Option<Self> {
-        Self::new(unsafe { ffi::ts_node_named_child(self.0, i as u32) })
+    /// Iterate over this node's children with a given field name.
+    ///
+    /// See also [Node::children].
+    pub fn children_by_field_name<'a>(
+        &self,
+        field_name: &str,
+        cursor: &'a mut TreeCursor<'tree>,
+    ) -> impl Iterator<Item = Node<'tree>> + 'a {
+        let field_id = self.language().field_id_for_name(field_name);
+        self.children_by_field_id(field_id.unwrap_or(0), cursor)
     }
 
-    pub fn named_child_count(&self) -> usize {
-        unsafe { ffi::ts_node_named_child_count(self.0) as usize }
+    /// Iterate over this node's children with a given field id.
+    ///
+    /// See also [Node::children_by_field_name].
+    pub fn children_by_field_id<'a>(
+        &self,
+        field_id: u16,
+        cursor: &'a mut TreeCursor<'tree>,
+    ) -> impl Iterator<Item = Node<'tree>> + 'a {
+        cursor.reset(*self);
+        cursor.goto_first_child();
+        let mut done = false;
+        iter::from_fn(move || {
+            while !done {
+                while cursor.field_id() != Some(field_id) {
+                    if !cursor.goto_next_sibling() {
+                        return None;
+                    }
+                }
+                let result = cursor.node();
+                if !cursor.goto_next_sibling() {
+                    done = true;
+                }
+                return Some(result);
+            }
+            None
+        })
     }
 
+    /// Get this node's immediate parent.
     pub fn parent(&self) -> Option<Self> {
         Self::new(unsafe { ffi::ts_node_parent(self.0) })
     }
 
+    /// Get this node's next sibling.
     pub fn next_sibling(&self) -> Option<Self> {
         Self::new(unsafe { ffi::ts_node_next_sibling(self.0) })
     }
 
+    /// Get this node's previous sibling.
     pub fn prev_sibling(&self) -> Option<Self> {
         Self::new(unsafe { ffi::ts_node_prev_sibling(self.0) })
     }
 
+    /// Get this node's next named sibling.
     pub fn next_named_sibling(&self) -> Option<Self> {
         Self::new(unsafe { ffi::ts_node_next_named_sibling(self.0) })
     }
 
+    /// Get this node's previous named sibling.
     pub fn prev_named_sibling(&self) -> Option<Self> {
         Self::new(unsafe { ffi::ts_node_prev_named_sibling(self.0) })
     }
 
+    /// Get the smallest node within this node that spans the given range.
     pub fn descendant_for_byte_range(&self, start: usize, end: usize) -> Option<Self> {
         Self::new(unsafe {
             ffi::ts_node_descendant_for_byte_range(self.0, start as u32, end as u32)
         })
     }
 
+    /// Get the smallest named node within this node that spans the given range.
     pub fn named_descendant_for_byte_range(&self, start: usize, end: usize) -> Option<Self> {
         Self::new(unsafe {
             ffi::ts_node_named_descendant_for_byte_range(self.0, start as u32, end as u32)
         })
     }
 
+    /// Get the smallest node within this node that spans the given range.
     pub fn descendant_for_point_range(&self, start: Point, end: Point) -> Option<Self> {
         Self::new(unsafe {
             ffi::ts_node_descendant_for_point_range(self.0, start.into(), end.into())
         })
     }
 
+    /// Get the smallest named node within this node that spans the given range.
     pub fn named_descendant_for_point_range(&self, start: Point, end: Point) -> Option<Self> {
         Self::new(unsafe {
             ffi::ts_node_named_descendant_for_point_range(self.0, start.into(), end.into())
@@ -711,10 +1019,18 @@ impl<'tree> Node<'tree> {
         &source.as_ref()[self.start_byte()..self.end_byte()]
     }
 
+    /// Create a new [TreeCursor] starting from this node.
     pub fn walk(&self) -> TreeCursor<'tree> {
         TreeCursor(unsafe { ffi::ts_tree_cursor_new(self.0) }, PhantomData)
     }
 
+    /// Edit this node to keep it in-sync with source code that has been edited.
+    ///
+    /// This function is only rarely needed. When you edit a syntax tree with the
+    /// [Tree::edit] method, all of the nodes that you retrieve from the tree
+    /// afterward will already reflect the edit. You only need to use [Node::edit]
+    /// when you have a specific [Node] instance that you want to keep and continue
+    /// to use after an edit.
     pub fn edit(&mut self, edit: &InputEdit) {
         let edit = edit.into();
         unsafe { ffi::ts_node_edit(&mut self.0 as *mut ffi::TSNode, &edit) }
@@ -727,6 +1043,18 @@ impl<'a> PartialEq for Node<'a> {
     }
 }
 
+impl<'a> Eq for Node<'a> {}
+
+impl<'a> hash::Hash for Node<'a> {
+    fn hash<H: hash::Hasher>(&self, state: &mut H) {
+        self.0.id.hash(state);
+        self.0.context[0].hash(state);
+        self.0.context[1].hash(state);
+        self.0.context[2].hash(state);
+        self.0.context[3].hash(state);
+    }
+}
+
 impl<'a> fmt::Debug for Node<'a> {
     fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
         write!(
@@ -740,6 +1068,7 @@ impl<'a> fmt::Debug for Node<'a> {
 }
 
 impl<'a> TreeCursor<'a> {
+    /// Get the tree cursor's current [Node].
     pub fn node(&self) -> Node<'a> {
         Node(
             unsafe { ffi::ts_tree_cursor_current_node(&self.0) },
@@ -747,6 +1076,9 @@ impl<'a> TreeCursor<'a> {
         )
     }
 
+    /// Get the numerical field id of this tree cursor's current node.
+    ///
+    /// See also [field_name](TreeCursor::field_name).
     pub fn field_id(&self) -> Option<u16> {
         unsafe {
             let id = ffi::ts_tree_cursor_current_field_id(&self.0);
@@ -758,7 +1090,8 @@ impl<'a> TreeCursor<'a> {
         }
     }
 
-    pub fn field_name(&self) -> Option<&str> {
+    /// Get the field name of this tree cursor's current node.
+    pub fn field_name(&self) -> Option<&'static str> {
         unsafe {
             let ptr = ffi::ts_tree_cursor_current_field_name(&self.0);
             if ptr.is_null() {
@@ -769,19 +1102,36 @@ impl<'a> TreeCursor<'a> {
         }
     }
 
+    /// Move this cursor to the first child of its current node.
+    ///
+    /// This returns `true` if the cursor successfully moved, and returns `false`
+    /// if there were no children.
     pub fn goto_first_child(&mut self) -> bool {
         return unsafe { ffi::ts_tree_cursor_goto_first_child(&mut self.0) };
     }
 
+    /// Move this cursor to the parent of its current node.
+    ///
+    /// This returns `true` if the cursor successfully moved, and returns `false`
+    /// if there was no parent node (the cursor was already on the root node).
     pub fn goto_parent(&mut self) -> bool {
         return unsafe { ffi::ts_tree_cursor_goto_parent(&mut self.0) };
     }
 
+    /// Move this cursor to the next sibling of its current node.
+    ///
+    /// This returns `true` if the cursor successfully moved, and returns `false`
+    /// if there was no next sibling node.
     pub fn goto_next_sibling(&mut self) -> bool {
         return unsafe { ffi::ts_tree_cursor_goto_next_sibling(&mut self.0) };
     }
 
-    pub fn goto_first_child_for_index(&mut self, index: usize) -> Option<usize> {
+    /// Move this cursor to the first child of its current node that extends beyond
+    /// the given byte offset.
+    ///
+    /// This returns the index of the child node if one was found, and returns `None`
+    /// if no such child was found.
+    pub fn goto_first_child_for_byte(&mut self, index: usize) -> Option<usize> {
         let result =
             unsafe { ffi::ts_tree_cursor_goto_first_child_for_byte(&mut self.0, index as u32) };
         if result < 0 {
@@ -791,6 +1141,7 @@ impl<'a> TreeCursor<'a> {
         }
     }
 
+    /// Re-initialize this tree cursor to start at a different node.
     pub fn reset(&mut self, node: Node<'a>) {
         unsafe { ffi::ts_tree_cursor_reset(&mut self.0, node.0) };
     }
@@ -802,122 +1153,622 @@ impl<'a> Drop for TreeCursor<'a> {
     }
 }
 
-impl<'a, P> TreePropertyCursor<'a, P> {
-    fn new(tree: &'a Tree, property_sheet: &'a PropertySheet<P>, source: &'a [u8]) -> Self {
-        let mut result = Self {
-            cursor: tree.root_node().walk(),
-            child_index_stack: vec![0],
-            state_stack: vec![0],
-            property_sheet,
-            source,
+impl Query {
+    /// Create a new query from a string containing one or more S-expression
+    /// patterns.
+    ///
+    /// The query is associated with a particular language, and can only be run
+    /// on syntax nodes parsed with that language. References to Queries can be
+    /// shared between multiple threads.
+    pub fn new(language: Language, source: &str) -> Result<Self, QueryError> {
+        let mut error_offset = 0u32;
+        let mut error_type: ffi::TSQueryError = 0;
+        let bytes = source.as_bytes();
+
+        // Compile the query.
+        let ptr = unsafe {
+            ffi::ts_query_new(
+                language.0,
+                bytes.as_ptr() as *const c_char,
+                bytes.len() as u32,
+                &mut error_offset as *mut u32,
+                &mut error_type as *mut ffi::TSQueryError,
+            )
         };
-        let state = result.next_state(0);
-        result.state_stack.push(state);
-        result
-    }
 
-    pub fn node(&self) -> Node<'a> {
-        self.cursor.node()
-    }
+        // On failure, build an error based on the error code and offset.
+        if ptr.is_null() {
+            let offset = error_offset as usize;
+            let mut line_start = 0;
+            let mut row = 0;
+            let mut line_containing_error = None;
+            for line in source.split("\n") {
+                let line_end = line_start + line.len() + 1;
+                if line_end > offset {
+                    line_containing_error = Some(line);
+                    break;
+                }
+                line_start = line_end;
+                row += 1;
+            }
+            let column = offset - line_start;
 
-    pub fn node_properties(&self) -> &'a P {
-        &self.property_sheet.property_sets[self.current_state().property_set_id]
-    }
+            let kind;
+            let message;
+            match error_type {
+                // Error types that report names
+                ffi::TSQueryError_TSQueryErrorNodeType
+                | ffi::TSQueryError_TSQueryErrorField
+                | ffi::TSQueryError_TSQueryErrorCapture => {
+                    let suffix = source.split_at(offset).1;
+                    let end_offset = suffix
+                        .find(|c| !char::is_alphanumeric(c) && c != '_' && c != '-')
+                        .unwrap_or(source.len());
+                    message = suffix.split_at(end_offset).0.to_string();
+                    kind = match error_type {
+                        ffi::TSQueryError_TSQueryErrorNodeType => QueryErrorKind::NodeType,
+                        ffi::TSQueryError_TSQueryErrorField => QueryErrorKind::Field,
+                        ffi::TSQueryError_TSQueryErrorCapture => QueryErrorKind::Capture,
+                        _ => unreachable!(),
+                    };
+                }
 
-    pub fn goto_first_child(&mut self) -> bool {
-        if self.cursor.goto_first_child() {
-            let next_state_id = self.next_state(0);
-            self.state_stack.push(next_state_id);
-            self.child_index_stack.push(0);
-            true
-        } else {
-            false
+                // Error types that report positions
+                _ => {
+                    message = if let Some(line) = line_containing_error {
+                        line.to_string() + "\n" + &" ".repeat(offset - line_start) + "^"
+                    } else {
+                        "Unexpected EOF".to_string()
+                    };
+                    kind = match error_type {
+                        ffi::TSQueryError_TSQueryErrorStructure => QueryErrorKind::Structure,
+                        _ => QueryErrorKind::Syntax,
+                    };
+                }
+            };
+
+            return Err(QueryError {
+                row,
+                column,
+                offset,
+                kind,
+                message,
+            });
         }
-    }
 
-    pub fn goto_next_sibling(&mut self) -> bool {
-        if self.cursor.goto_next_sibling() {
-            let child_index = self.child_index_stack.pop().unwrap() + 1;
-            self.state_stack.pop();
-            let next_state_id = self.next_state(child_index);
-            self.state_stack.push(next_state_id);
-            self.child_index_stack.push(child_index);
-            true
-        } else {
-            false
+        let string_count = unsafe { ffi::ts_query_string_count(ptr) };
+        let capture_count = unsafe { ffi::ts_query_capture_count(ptr) };
+        let pattern_count = unsafe { ffi::ts_query_pattern_count(ptr) as usize };
+        let mut result = Query {
+            ptr: unsafe { NonNull::new_unchecked(ptr) },
+            capture_names: Vec::with_capacity(capture_count as usize),
+            text_predicates: Vec::with_capacity(pattern_count),
+            property_predicates: Vec::with_capacity(pattern_count),
+            property_settings: Vec::with_capacity(pattern_count),
+            general_predicates: Vec::with_capacity(pattern_count),
+        };
+
+        // Build a vector of strings to store the capture names.
+        for i in 0..capture_count {
+            unsafe {
+                let mut length = 0u32;
+                let name =
+                    ffi::ts_query_capture_name_for_id(ptr, i, &mut length as *mut u32) as *const u8;
+                let name = slice::from_raw_parts(name, length as usize);
+                let name = str::from_utf8_unchecked(name);
+                result.capture_names.push(name.to_string());
+            }
         }
-    }
 
-    pub fn goto_parent(&mut self) -> bool {
-        if self.cursor.goto_parent() {
-            self.state_stack.pop();
-            self.child_index_stack.pop();
-            true
-        } else {
-            false
-        }
-    }
+        // Build a vector of strings to represent literal values used in predicates.
+        let string_values = (0..string_count)
+            .map(|i| unsafe {
+                let mut length = 0u32;
+                let value =
+                    ffi::ts_query_string_value_for_id(ptr, i as u32, &mut length as *mut u32)
+                        as *const u8;
+                let value = slice::from_raw_parts(value, length as usize);
+                let value = str::from_utf8_unchecked(value);
+                value.to_string()
+            })
+            .collect::<Vec<_>>();
 
-    pub fn source(&self) -> &'a [u8] {
-        &self.source
-    }
+        // Build a vector of predicates for each pattern.
+        for i in 0..pattern_count {
+            let predicate_steps = unsafe {
+                let mut length = 0u32;
+                let raw_predicates =
+                    ffi::ts_query_predicates_for_pattern(ptr, i as u32, &mut length as *mut u32);
+                if length > 0 {
+                    slice::from_raw_parts(raw_predicates, length as usize)
+                } else {
+                    &[]
+                }
+            };
 
-    fn next_state(&self, node_child_index: usize) -> usize {
-        let current_state = self.current_state();
-        let default_state = self.default_state();
+            let byte_offset = unsafe { ffi::ts_query_start_byte_for_pattern(ptr, i as u32) };
+            let row = source
+                .char_indices()
+                .take_while(|(i, _)| *i < byte_offset as usize)
+                .filter(|(_, c)| *c == '\n')
+                .count();
 
-        for state in [current_state, default_state].iter() {
-            let node_field_id = self.cursor.field_id();
-            let node_kind_id = self.cursor.node().kind_id();
-            let transitions = node_field_id
-                .and_then(|field_id| state.field_transitions.get(&field_id))
-                .or_else(|| state.kind_transitions.get(&node_kind_id));
+            let type_done = ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeDone;
+            let type_capture = ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeCapture;
+            let type_string = ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeString;
 
-            if let Some(transitions) = transitions {
-                for transition in transitions.iter() {
-                    if transition
-                        .node_kind_id
-                        .map_or(false, |id| id != node_kind_id)
-                    {
-                        continue;
-                    }
+            let mut text_predicates = Vec::new();
+            let mut property_predicates = Vec::new();
+            let mut property_settings = Vec::new();
+            let mut general_predicates = Vec::new();
+            for p in predicate_steps.split(|s| s.type_ == type_done) {
+                if p.is_empty() {
+                    continue;
+                }
 
-                    if let Some(text_regex_index) = transition.text_regex_index {
-                        let node = self.cursor.node();
-                        let text = &self.source[node.start_byte()..node.end_byte()];
-                        if let Ok(text) = str::from_utf8(text) {
-                            if !self.property_sheet.text_regexes[text_regex_index as usize]
-                                .is_match(text)
-                            {
-                                continue;
-                            }
+                if p[0].type_ != type_string {
+                    return Err(predicate_error(
+                        row,
+                        format!(
+                            "Expected predicate to start with a function name. Got @{}.",
+                            result.capture_names[p[0].value_id as usize],
+                        ),
+                    ));
+                }
+
+                // Build a predicate for each of the known predicate function names.
+                let operator_name = &string_values[p[0].value_id as usize];
+                match operator_name.as_str() {
+                    "eq?" | "not-eq?" => {
+                        if p.len() != 3 {
+                            return Err(predicate_error(
+                                row,
+                                format!(
+                                "Wrong number of arguments to #eq? predicate. Expected 2, got {}.",
+                                p.len() - 1
+                            ),
+                            ));
                         }
-                    }
-
-                    if let Some(child_index) = transition.child_index {
-                        if child_index != node_child_index as u16 {
-                            continue;
+                        if p[1].type_ != type_capture {
+                            return Err(predicate_error(row, format!(
+                                "First argument to #eq? predicate must be a capture name. Got literal \"{}\".",
+                                string_values[p[1].value_id as usize],
+                            )));
                         }
+
+                        let is_positive = operator_name == "eq?";
+                        text_predicates.push(if p[2].type_ == type_capture {
+                            TextPredicate::CaptureEqCapture(
+                                p[1].value_id,
+                                p[2].value_id,
+                                is_positive,
+                            )
+                        } else {
+                            TextPredicate::CaptureEqString(
+                                p[1].value_id,
+                                string_values[p[2].value_id as usize].clone(),
+                                is_positive,
+                            )
+                        });
                     }
 
-                    return transition.state_id as usize;
+                    "match?" | "not-match?" => {
+                        if p.len() != 3 {
+                            return Err(predicate_error(row, format!(
+                                "Wrong number of arguments to #match? predicate. Expected 2, got {}.",
+                                p.len() - 1
+                            )));
+                        }
+                        if p[1].type_ != type_capture {
+                            return Err(predicate_error(row, format!(
+                                "First argument to #match? predicate must be a capture name. Got literal \"{}\".",
+                                string_values[p[1].value_id as usize],
+                            )));
+                        }
+                        if p[2].type_ == type_capture {
+                            return Err(predicate_error(row, format!(
+                                "Second argument to #match? predicate must be a literal. Got capture @{}.",
+                                result.capture_names[p[2].value_id as usize],
+                            )));
+                        }
+
+                        let is_positive = operator_name == "match?";
+                        let regex = &string_values[p[2].value_id as usize];
+                        text_predicates.push(TextPredicate::CaptureMatchString(
+                            p[1].value_id,
+                            regex::bytes::Regex::new(regex).map_err(|_| {
+                                predicate_error(row, format!("Invalid regex '{}'", regex))
+                            })?,
+                            is_positive,
+                        ));
+                    }
+
+                    "set!" => property_settings.push(Self::parse_property(
+                        row,
+                        &operator_name,
+                        &result.capture_names,
+                        &string_values,
+                        &p[1..],
+                    )?),
+
+                    "is?" | "is-not?" => property_predicates.push((
+                        Self::parse_property(
+                            row,
+                            &operator_name,
+                            &result.capture_names,
+                            &string_values,
+                            &p[1..],
+                        )?,
+                        operator_name == "is?",
+                    )),
+
+                    _ => general_predicates.push(QueryPredicate {
+                        operator: operator_name.clone().into_boxed_str(),
+                        args: p[1..]
+                            .iter()
+                            .map(|a| {
+                                if a.type_ == type_capture {
+                                    QueryPredicateArg::Capture(a.value_id)
+                                } else {
+                                    QueryPredicateArg::String(
+                                        string_values[a.value_id as usize].clone().into_boxed_str(),
+                                    )
+                                }
+                            })
+                            .collect(),
+                    }),
                 }
             }
 
-            if current_state as *const PropertyState == default_state as *const PropertyState {
-                break;
+            result
+                .text_predicates
+                .push(text_predicates.into_boxed_slice());
+            result
+                .property_predicates
+                .push(property_predicates.into_boxed_slice());
+            result
+                .property_settings
+                .push(property_settings.into_boxed_slice());
+            result
+                .general_predicates
+                .push(general_predicates.into_boxed_slice());
+        }
+        Ok(result)
+    }
+
+    /// Get the byte offset where the given pattern starts in the query's source.
+    pub fn start_byte_for_pattern(&self, pattern_index: usize) -> usize {
+        if pattern_index >= self.text_predicates.len() {
+            panic!(
+                "Pattern index is {} but the pattern count is {}",
+                pattern_index,
+                self.text_predicates.len(),
+            );
+        }
+        unsafe {
+            ffi::ts_query_start_byte_for_pattern(self.ptr.as_ptr(), pattern_index as u32) as usize
+        }
+    }
+
+    /// Get the number of patterns in the query.
+    pub fn pattern_count(&self) -> usize {
+        unsafe { ffi::ts_query_pattern_count(self.ptr.as_ptr()) as usize }
+    }
+
+    /// Get the names of the captures used in the query.
+    pub fn capture_names(&self) -> &[String] {
+        &self.capture_names
+    }
+
+    /// Get the properties that are checked for the given pattern index.
+    ///
+    /// This includes predicates with the operators `is?` and `is-not?`.
+    pub fn property_predicates(&self, index: usize) -> &[(QueryProperty, bool)] {
+        &self.property_predicates[index]
+    }
+
+    /// Get the properties that are set for the given pattern index.
+    ///
+    /// This includes predicates with the operator `set!`.
+    pub fn property_settings(&self, index: usize) -> &[QueryProperty] {
+        &self.property_settings[index]
+    }
+
+    /// Get the other user-defined predicates associated with the given index.
+    ///
+    /// This includes predicate with operators other than:
+    /// * `match?`
+    /// * `eq?` and `not-eq?
+    /// * `is?` and `is-not?`
+    /// * `set!`
+    pub fn general_predicates(&self, index: usize) -> &[QueryPredicate] {
+        &self.general_predicates[index]
+    }
+
+    /// Disable a certain capture within a query.
+    ///
+    /// This prevents the capture from being returned in matches, and also avoids any
+    /// resource usage associated with recording the capture.
+    pub fn disable_capture(&mut self, name: &str) {
+        unsafe {
+            ffi::ts_query_disable_capture(
+                self.ptr.as_ptr(),
+                name.as_bytes().as_ptr() as *const c_char,
+                name.len() as u32,
+            );
+        }
+    }
+
+    /// Disable a certain pattern within a query.
+    ///
+    /// This prevents the pattern from matching, and also avoids any resource usage
+    /// associated with the pattern.
+    pub fn disable_pattern(&mut self, index: usize) {
+        unsafe { ffi::ts_query_disable_pattern(self.ptr.as_ptr(), index as u32) }
+    }
+
+    /// Check if a given step in a query is 'definite'.
+    ///
+    /// A query step is 'definite' if its parent pattern will be guaranteed to match
+    /// successfully once it reaches the step.
+    pub fn step_is_definite(&self, byte_offset: usize) -> bool {
+        unsafe { ffi::ts_query_step_is_definite(self.ptr.as_ptr(), byte_offset as u32) }
+    }
+
+    fn parse_property(
+        row: usize,
+        function_name: &str,
+        capture_names: &[String],
+        string_values: &[String],
+        args: &[ffi::TSQueryPredicateStep],
+    ) -> Result<QueryProperty, QueryError> {
+        if args.len() == 0 || args.len() > 3 {
+            return Err(predicate_error(
+                row,
+                format!(
+                    "Wrong number of arguments to {} predicate. Expected 1 to 3, got {}.",
+                    function_name,
+                    args.len(),
+                ),
+            ));
+        }
+
+        let mut capture_id = None;
+        let mut key = None;
+        let mut value = None;
+
+        for arg in args {
+            if arg.type_ == ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeCapture {
+                if capture_id.is_some() {
+                    return Err(predicate_error(
+                        row,
+                        format!(
+                            "Invalid arguments to {} predicate. Unexpected second capture name @{}",
+                            function_name, capture_names[arg.value_id as usize]
+                        ),
+                    ));
+                }
+                capture_id = Some(arg.value_id as usize);
+            } else if key.is_none() {
+                key = Some(&string_values[arg.value_id as usize]);
+            } else if value.is_none() {
+                value = Some(string_values[arg.value_id as usize].as_str());
+            } else {
+                return Err(predicate_error(
+                    row,
+                    format!(
+                        "Invalid arguments to {} predicate. Unexpected third argument @{}",
+                        function_name, string_values[arg.value_id as usize]
+                    ),
+                ));
             }
         }
 
-        current_state.default_next_state_id
+        if let Some(key) = key {
+            Ok(QueryProperty::new(key, value, capture_id))
+        } else {
+            return Err(predicate_error(
+                row,
+                format!(
+                    "Invalid arguments to {} predicate. Missing key argument",
+                    function_name,
+                ),
+            ));
+        }
+    }
+}
+
+impl QueryCursor {
+    /// Create a new cursor for executing a given query.
+    ///
+    /// The cursor stores the state that is needed to iteratively search for matches.
+    pub fn new() -> Self {
+        QueryCursor(unsafe { NonNull::new_unchecked(ffi::ts_query_cursor_new()) })
     }
 
-    fn current_state(&self) -> &PropertyState {
-        &self.property_sheet.states[*self.state_stack.last().unwrap()]
+    /// Iterate over all of the matches in the order that they were found.
+    ///
+    /// Each match contains the index of the pattern that matched, and a list of captures.
+    /// Because multiple patterns can match the same set of nodes, one match may contain
+    /// captures that appear *before* some of the captures from a previous match.
+    pub fn matches<'a, T: AsRef<[u8]>>(
+        &'a mut self,
+        query: &'a Query,
+        node: Node<'a>,
+        mut text_callback: impl FnMut(Node<'a>) -> T + 'a,
+    ) -> impl Iterator<Item = QueryMatch<'a>> + 'a {
+        let ptr = self.0.as_ptr();
+        unsafe { ffi::ts_query_cursor_exec(ptr, query.ptr.as_ptr(), node.0) };
+        std::iter::from_fn(move || loop {
+            unsafe {
+                let mut m = MaybeUninit::<ffi::TSQueryMatch>::uninit();
+                if ffi::ts_query_cursor_next_match(ptr, m.as_mut_ptr()) {
+                    let result = QueryMatch::new(m.assume_init(), ptr);
+                    if result.satisfies_text_predicates(query, &mut text_callback) {
+                        return Some(result);
+                    }
+                } else {
+                    return None;
+                }
+            }
+        })
     }
 
-    fn default_state(&self) -> &PropertyState {
-        &self.property_sheet.states.first().unwrap()
+    /// Iterate over all of the individual captures in the order that they appear.
+    ///
+    /// This is useful if don't care about which pattern matched, and just want a single,
+    /// ordered sequence of captures.
+    pub fn captures<'a, T: AsRef<[u8]>>(
+        &'a mut self,
+        query: &'a Query,
+        node: Node<'a>,
+        text_callback: impl FnMut(Node<'a>) -> T + 'a,
+    ) -> QueryCaptures<'a, T> {
+        let ptr = self.0.as_ptr();
+        unsafe { ffi::ts_query_cursor_exec(ptr, query.ptr.as_ptr(), node.0) };
+        QueryCaptures {
+            ptr,
+            query,
+            text_callback: Box::new(text_callback),
+        }
+    }
+
+    /// Set the range in which the query will be executed, in terms of byte offsets.
+    pub fn set_byte_range(&mut self, start: usize, end: usize) -> &mut Self {
+        unsafe {
+            ffi::ts_query_cursor_set_byte_range(self.0.as_ptr(), start as u32, end as u32);
+        }
+        self
+    }
+
+    /// Set the range in which the query will be executed, in terms of rows and columns.
+    pub fn set_point_range(&mut self, start: Point, end: Point) -> &mut Self {
+        unsafe {
+            ffi::ts_query_cursor_set_point_range(self.0.as_ptr(), start.into(), end.into());
+        }
+        self
+    }
+}
+
+impl<'a> QueryMatch<'a> {
+    pub fn remove(self) {
+        unsafe { ffi::ts_query_cursor_remove_match(self.cursor, self.id) }
+    }
+
+    fn new(m: ffi::TSQueryMatch, cursor: *mut ffi::TSQueryCursor) -> Self {
+        QueryMatch {
+            cursor,
+            id: m.id,
+            pattern_index: m.pattern_index as usize,
+            captures: if m.capture_count > 0 {
+                unsafe {
+                    slice::from_raw_parts(
+                        m.captures as *const QueryCapture<'a>,
+                        m.capture_count as usize,
+                    )
+                }
+            } else {
+                &[]
+            },
+        }
+    }
+
+    fn satisfies_text_predicates<T: AsRef<[u8]>>(
+        &self,
+        query: &Query,
+        text_callback: &mut impl FnMut(Node<'a>) -> T,
+    ) -> bool {
+        query.text_predicates[self.pattern_index]
+            .iter()
+            .all(|predicate| match predicate {
+                TextPredicate::CaptureEqCapture(i, j, is_positive) => {
+                    let node1 = self.capture_for_index(*i).unwrap();
+                    let node2 = self.capture_for_index(*j).unwrap();
+                    (text_callback(node1).as_ref() == text_callback(node2).as_ref()) == *is_positive
+                }
+                TextPredicate::CaptureEqString(i, s, is_positive) => {
+                    let node = self.capture_for_index(*i).unwrap();
+                    (text_callback(node).as_ref() == s.as_bytes()) == *is_positive
+                }
+                TextPredicate::CaptureMatchString(i, r, is_positive) => {
+                    let node = self.capture_for_index(*i).unwrap();
+                    r.is_match(text_callback(node).as_ref()) == *is_positive
+                }
+            })
+    }
+
+    fn capture_for_index(&self, capture_index: u32) -> Option<Node<'a>> {
+        for c in self.captures {
+            if c.index == capture_index {
+                return Some(c.node);
+            }
+        }
+        None
+    }
+}
+
+impl QueryProperty {
+    pub fn new(key: &str, value: Option<&str>, capture_id: Option<usize>) -> Self {
+        QueryProperty {
+            capture_id,
+            key: key.to_string().into_boxed_str(),
+            value: value.map(|s| s.to_string().into_boxed_str()),
+        }
+    }
+}
+
+impl<'a, T: AsRef<[u8]>> Iterator for QueryCaptures<'a, T> {
+    type Item = (QueryMatch<'a>, usize);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        loop {
+            unsafe {
+                let mut capture_index = 0u32;
+                let mut m = MaybeUninit::<ffi::TSQueryMatch>::uninit();
+                if ffi::ts_query_cursor_next_capture(
+                    self.ptr,
+                    m.as_mut_ptr(),
+                    &mut capture_index as *mut u32,
+                ) {
+                    let result = QueryMatch::new(m.assume_init(), self.ptr);
+                    if result.satisfies_text_predicates(self.query, &mut self.text_callback) {
+                        return Some((result, capture_index as usize));
+                    } else {
+                        result.remove();
+                    }
+                } else {
+                    return None;
+                }
+            }
+        }
+    }
+}
+
+impl<'a> fmt::Debug for QueryMatch<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "QueryMatch {{ id: {}, pattern_index: {}, captures: {:?} }}",
+            self.id, self.pattern_index, self.captures
+        )
+    }
+}
+
+impl PartialEq for Query {
+    fn eq(&self, other: &Self) -> bool {
+        self.ptr == other.ptr
+    }
+}
+
+impl Drop for Query {
+    fn drop(&mut self) {
+        unsafe { ffi::ts_query_delete(self.ptr.as_ptr()) }
+    }
+}
+
+impl Drop for QueryCursor {
+    fn drop(&mut self) {
+        unsafe { ffi::ts_query_cursor_delete(self.0.as_ptr()) }
     }
 }
 
@@ -986,150 +1837,66 @@ impl<'a> Into<ffi::TSInputEdit> for &'a InputEdit {
     }
 }
 
-impl<P> PropertySheet<P> {
-    pub fn new(language: Language, json: &str) -> Result<Self, PropertySheetError>
-    where
-        P: DeserializeOwned,
-    {
-        let input: PropertySheetJSON<P> =
-            serde_json::from_str(json).map_err(PropertySheetError::InvalidJSON)?;
-        let mut states = Vec::new();
-        let mut text_regexes = Vec::new();
-        let mut text_regex_patterns = Vec::new();
+impl<'a> LossyUtf8<'a> {
+    pub fn new(bytes: &'a [u8]) -> Self {
+        LossyUtf8 {
+            bytes,
+            in_replacement: false,
+        }
+    }
+}
 
-        for state in input.states.iter() {
-            let node_kind_count = language.node_kind_count();
-            let mut kind_transitions = HashMap::new();
-            let mut field_transitions = HashMap::new();
+impl<'a> Iterator for LossyUtf8<'a> {
+    type Item = &'a str;
 
-            for transition in state.transitions.iter() {
-                let field_id = transition
-                    .field
-                    .as_ref()
-                    .and_then(|field| language.field_id_for_name(&field));
-                if let Some(field_id) = field_id {
-                    field_transitions.entry(field_id).or_insert(Vec::new());
-                }
+    fn next(&mut self) -> Option<&'a str> {
+        if self.bytes.is_empty() {
+            return None;
+        }
+        if self.in_replacement {
+            self.in_replacement = false;
+            return Some("\u{fffd}");
+        }
+        match std::str::from_utf8(self.bytes) {
+            Ok(valid) => {
+                self.bytes = &[];
+                Some(valid)
             }
-
-            for transition in state.transitions.iter() {
-                let text_regex_index = if let Some(regex_pattern) = transition.text.as_ref() {
-                    if let Some(index) =
-                        text_regex_patterns.iter().position(|r| *r == regex_pattern)
-                    {
-                        Some(index as u16)
+            Err(error) => {
+                if let Some(error_len) = error.error_len() {
+                    let error_start = error.valid_up_to();
+                    if error_start > 0 {
+                        let result =
+                            unsafe { std::str::from_utf8_unchecked(&self.bytes[..error_start]) };
+                        self.bytes = &self.bytes[(error_start + error_len)..];
+                        self.in_replacement = true;
+                        Some(result)
                     } else {
-                        text_regex_patterns.push(regex_pattern);
-                        text_regexes.push(
-                            Regex::new(&regex_pattern).map_err(PropertySheetError::InvalidRegex)?,
-                        );
-                        Some(text_regexes.len() as u16 - 1)
+                        self.bytes = &self.bytes[error_len..];
+                        Some("\u{fffd}")
                     }
                 } else {
                     None
-                };
-
-                let state_id = transition.state_id as u16;
-                let child_index = transition.index.map(|i| i as u16);
-                let field_id = transition
-                    .field
-                    .as_ref()
-                    .and_then(|field| language.field_id_for_name(&field));
-
-                if let Some(kind) = transition.kind.as_ref() {
-                    for kind_id in 0..(node_kind_count as u16) {
-                        if kind != language.node_kind_for_id(kind_id)
-                            || transition.named != Some(language.node_kind_is_named(kind_id))
-                        {
-                            continue;
-                        }
-
-                        if let Some(field_id) = field_id {
-                            field_transitions
-                                .entry(field_id)
-                                .or_insert(Vec::new())
-                                .push(PropertyTransition {
-                                    node_kind_id: Some(kind_id),
-                                    state_id,
-                                    child_index,
-                                    text_regex_index,
-                                });
-                        } else {
-                            for (_, entries) in field_transitions.iter_mut() {
-                                entries.push(PropertyTransition {
-                                    node_kind_id: Some(kind_id),
-                                    state_id,
-                                    child_index,
-                                    text_regex_index,
-                                });
-                            }
-
-                            kind_transitions.entry(kind_id).or_insert(Vec::new()).push(
-                                PropertyTransition {
-                                    node_kind_id: None,
-                                    state_id,
-                                    child_index,
-                                    text_regex_index,
-                                },
-                            );
-                        }
-                    }
-                } else if let Some(field_id) = field_id {
-                    field_transitions
-                        .entry(field_id)
-                        .or_insert(Vec::new())
-                        .push(PropertyTransition {
-                            node_kind_id: None,
-                            state_id,
-                            child_index,
-                            text_regex_index,
-                        });
                 }
             }
-            states.push(PropertyState {
-                field_transitions,
-                kind_transitions,
-                default_next_state_id: state.default_next_state_id,
-                property_set_id: state.property_set_id,
-            });
-        }
-        Ok(Self {
-            property_sets: input.property_sets,
-            states,
-            text_regexes,
-        })
-    }
-
-    pub fn map<F, T, E>(self, mut f: F) -> Result<PropertySheet<T>, E>
-    where
-        F: FnMut(P) -> Result<T, E>,
-    {
-        let mut property_sets = Vec::with_capacity(self.property_sets.len());
-        for set in self.property_sets {
-            property_sets.push(f(set)?);
-        }
-        Ok(PropertySheet {
-            states: self.states,
-            text_regexes: self.text_regexes,
-            property_sets,
-        })
-    }
-}
-
-impl fmt::Display for PropertySheetError {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            PropertySheetError::InvalidJSON(e) => write!(f, "Invalid JSON: {}", e),
-            PropertySheetError::InvalidRegex(e) => write!(f, "Invalid Regex: {}", e),
         }
     }
 }
 
-impl std::error::Error for PropertySheetError {
-    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
-        match self {
-            PropertySheetError::InvalidJSON(e) => Some(e),
-            PropertySheetError::InvalidRegex(e) => Some(e),
-        }
+fn predicate_error(row: usize, message: String) -> QueryError {
+    QueryError {
+        kind: QueryErrorKind::Predicate,
+        row,
+        column: 0,
+        offset: 0,
+        message,
     }
 }
+
+unsafe impl Send for Language {}
+unsafe impl Send for Parser {}
+unsafe impl Send for Query {}
+unsafe impl Send for Tree {}
+unsafe impl Send for QueryCursor {}
+unsafe impl Sync for Language {}
+unsafe impl Sync for Query {}
diff --git a/lib/binding_rust/util.rs b/lib/binding_rust/util.rs
index e62e371f..1a4ac1b7 100644
--- a/lib/binding_rust/util.rs
+++ b/lib/binding_rust/util.rs
@@ -1,8 +1,69 @@
 use std::os::raw::c_void;
 
 extern "C" {
-    #[link_name = "rust_tree_sitter_free"]
+    /// In *Release* builds, the C library links directly against `malloc` and `free`.
+    ///
+    /// When freeing memory that was allocated by C code, use `free` directly.
+    #[cfg(not(debug_assertions))]
+    #[link_name = "free"]
     pub fn free_ptr(ptr: *mut c_void);
+
+    /// In *Test* builds, the C library is compiled with the `TREE_SITTER_TEST` macro,
+    /// so all calls to `malloc`, `free`, etc are linked against wrapper functions
+    /// called `ts_record_malloc`, `ts_record_free`, etc. These symbols are defined
+    /// in the `tree_sitter_cli::tests::helpers::allocations` module.
+    ///
+    /// When freeing memory that was allocated by C code, use the `free` function
+    /// from that module.
+    #[cfg(debug_assertions)]
+    #[link_name = "ts_record_free"]
+    pub fn free_ptr(ptr: *mut c_void);
+
+    /// In *Debug* builds, the C library is compiled the same as in test builds: using
+    /// the wrapper functions. This prevents the C library from having to be recompiled
+    /// constantly when switching between running tests and compiling with RLS.
+    ///
+    /// But we don't want to actually record allocations when running the library in
+    /// debug mode, so we define symbols like `ts_record_malloc` to just delegate to
+    /// the normal `malloc` functions.
+    #[cfg(all(debug_assertions, not(test)))]
+    fn malloc(size: usize) -> *mut c_void;
+    #[cfg(all(debug_assertions, not(test)))]
+    fn calloc(count: usize, size: usize) -> *mut c_void;
+    #[cfg(all(debug_assertions, not(test)))]
+    fn realloc(ptr: *mut c_void, size: usize) -> *mut c_void;
+    #[cfg(all(debug_assertions, not(test)))]
+    fn free(ptr: *mut c_void);
+}
+
+#[cfg(all(debug_assertions, not(test)))]
+#[no_mangle]
+unsafe extern "C" fn ts_record_malloc(size: usize) -> *const c_void {
+    malloc(size)
+}
+
+#[cfg(all(debug_assertions, not(test)))]
+#[no_mangle]
+unsafe extern "C" fn ts_record_calloc(count: usize, size: usize) -> *const c_void {
+    calloc(count, size)
+}
+
+#[cfg(all(debug_assertions, not(test)))]
+#[no_mangle]
+unsafe extern "C" fn ts_record_realloc(ptr: *mut c_void, size: usize) -> *const c_void {
+    realloc(ptr, size)
+}
+
+#[cfg(all(debug_assertions, not(test)))]
+#[no_mangle]
+unsafe extern "C" fn ts_record_free(ptr: *mut c_void) {
+    free(ptr)
+}
+
+#[cfg(all(debug_assertions, not(test)))]
+#[no_mangle]
+extern "C" fn ts_toggle_allocation_recording(_: bool) -> bool {
+    false
 }
 
 pub struct CBufferIter<T> {
@@ -40,6 +101,8 @@ impl<T: Copy> ExactSizeIterator for CBufferIter<T> {}
 
 impl<T> Drop for CBufferIter<T> {
     fn drop(&mut self) {
-        unsafe { free_ptr(self.ptr as *mut c_void); }
+        unsafe {
+            free_ptr(self.ptr as *mut c_void);
+        }
     }
 }
diff --git a/lib/binding_web/.gitignore b/lib/binding_web/.gitignore
index 820ce7df..1a4530c9 100644
--- a/lib/binding_web/.gitignore
+++ b/lib/binding_web/.gitignore
@@ -1,4 +1,5 @@
 /tree-sitter.js
 /tree-sitter.wasm
+package-lock.json
 node_modules
 *.tgz
diff --git a/lib/binding_web/binding.c b/lib/binding_web/binding.c
index e6018b03..9180f405 100644
--- a/lib/binding_web/binding.c
+++ b/lib/binding_web/binding.c
@@ -2,6 +2,7 @@
 #include <tree_sitter/api.h>
 #include <stdio.h>
 #include "array.h"
+#include "point.h"
 
 /*****************************/
 /* Section - Data marshaling */
@@ -31,18 +32,18 @@ static uint32_t byte_to_code_unit(uint32_t byte) {
 
 static inline void marshal_node(const void **buffer, TSNode node) {
   buffer[0] = (const void *)node.id;
-  buffer[1] = (const void *)node.context[0];
+  buffer[1] = (const void *)byte_to_code_unit(node.context[0]);
   buffer[2] = (const void *)node.context[1];
-  buffer[3] = (const void *)node.context[2];
+  buffer[3] = (const void *)byte_to_code_unit(node.context[2]);
   buffer[4] = (const void *)node.context[3];
 }
 
 static inline TSNode unmarshal_node(const TSTree *tree) {
   TSNode node;
   node.id = TRANSFER_BUFFER[0];
-  node.context[0] = (uint32_t)TRANSFER_BUFFER[1];
+  node.context[0] = code_unit_to_byte((uint32_t)TRANSFER_BUFFER[1]);
   node.context[1] = (uint32_t)TRANSFER_BUFFER[2];
-  node.context[2] = (uint32_t)TRANSFER_BUFFER[3];
+  node.context[2] = code_unit_to_byte((uint32_t)TRANSFER_BUFFER[3]);
   node.context[3] = (uint32_t)TRANSFER_BUFFER[4];
   node.tree = tree;
   return node;
@@ -114,18 +115,10 @@ extern void tree_sitter_parse_callback(
 );
 
 extern void tree_sitter_log_callback(
-  void *payload,
-  TSLogType log_type,
+  bool is_lex_message,
   const char *message
 );
 
-void ts_parser_new_wasm() {
-  TSParser *parser = ts_parser_new();
-  char *input_buffer = calloc(INPUT_BUFFER_SIZE, sizeof(char));
-  TRANSFER_BUFFER[0] = parser;
-  TRANSFER_BUFFER[1] = input_buffer;
-}
-
 static const char *call_parse_callback(
   void *payload,
   uint32_t byte,
@@ -147,8 +140,23 @@ static const char *call_parse_callback(
   return buffer;
 }
 
+static void call_log_callback(
+  void *payload,
+  TSLogType log_type,
+  const char *message
+) {
+  tree_sitter_log_callback(log_type == TSLogTypeLex, message);
+}
+
+void ts_parser_new_wasm() {
+  TSParser *parser = ts_parser_new();
+  char *input_buffer = calloc(INPUT_BUFFER_SIZE, sizeof(char));
+  TRANSFER_BUFFER[0] = parser;
+  TRANSFER_BUFFER[1] = input_buffer;
+}
+
 void ts_parser_enable_logger_wasm(TSParser *self, bool should_log) {
-  TSLogger logger = {self, should_log ? tree_sitter_log_callback : NULL};
+  TSLogger logger = {self, should_log ? call_log_callback : NULL};
   ts_parser_set_logger(self, logger);
 }
 
@@ -305,6 +313,7 @@ void ts_tree_cursor_current_node_wasm(const TSTree *tree) {
 /******************/
 
 static TSTreeCursor scratch_cursor = {0};
+static TSQueryCursor *scratch_query_cursor = NULL;
 
 uint16_t ts_node_symbol_wasm(const TSTree *tree) {
   TSNode node = unmarshal_node(tree);
@@ -464,12 +473,6 @@ void ts_node_named_children_wasm(const TSTree *tree) {
   TRANSFER_BUFFER[1] = result;
 }
 
-bool point_lte(TSPoint a, TSPoint b) {
-  if (a.row < b.row) return true;
-  if (a.row > b.row) return false;
-  return a.column <= b.column;
-}
-
 bool symbols_contain(const uint32_t *set, uint32_t length, uint32_t value) {
   for (unsigned i = 0; i < length; i++) {
     if (set[i] == value) return true;
@@ -566,3 +569,90 @@ int ts_node_is_missing_wasm(const TSTree *tree) {
   TSNode node = unmarshal_node(tree);
   return ts_node_is_missing(node);
 }
+
+/******************/
+/* Section - Query */
+/******************/
+
+void ts_query_matches_wasm(
+  const TSQuery *self,
+  const TSTree *tree,
+  uint32_t start_row,
+  uint32_t start_column,
+  uint32_t end_row,
+  uint32_t end_column
+) {
+  if (!scratch_query_cursor) scratch_query_cursor = ts_query_cursor_new();
+
+  TSNode node = unmarshal_node(tree);
+  TSPoint start_point = {start_row, code_unit_to_byte(start_column)};
+  TSPoint end_point = {end_row, code_unit_to_byte(end_column)};
+  ts_query_cursor_set_point_range(scratch_query_cursor, start_point, end_point);
+  ts_query_cursor_exec(scratch_query_cursor, self, node);
+
+  uint32_t index = 0;
+  uint32_t match_count = 0;
+  Array(const void *) result = array_new();
+
+  TSQueryMatch match;
+  while (ts_query_cursor_next_match(scratch_query_cursor, &match)) {
+    match_count++;
+    array_grow_by(&result, 2 + 6 * match.capture_count);
+    result.contents[index++] = (const void *)(uint32_t)match.pattern_index;
+    result.contents[index++] = (const void *)(uint32_t)match.capture_count;
+    for (unsigned i = 0; i < match.capture_count; i++) {
+      const TSQueryCapture *capture = &match.captures[i];
+      result.contents[index++] = (const void *)capture->index;
+      marshal_node(result.contents + index, capture->node);
+      index += 5;
+    }
+  }
+
+  TRANSFER_BUFFER[0] = (const void *)(match_count);
+  TRANSFER_BUFFER[1] = result.contents;
+}
+
+void ts_query_captures_wasm(
+  const TSQuery *self,
+  const TSTree *tree,
+  uint32_t start_row,
+  uint32_t start_column,
+  uint32_t end_row,
+  uint32_t end_column
+) {
+  if (!scratch_query_cursor) scratch_query_cursor = ts_query_cursor_new();
+
+  TSNode node = unmarshal_node(tree);
+  TSPoint start_point = {start_row, code_unit_to_byte(start_column)};
+  TSPoint end_point = {end_row, code_unit_to_byte(end_column)};
+  ts_query_cursor_set_point_range(scratch_query_cursor, start_point, end_point);
+  ts_query_cursor_exec(scratch_query_cursor, self, node);
+
+  unsigned index = 0;
+  unsigned capture_count = 0;
+  Array(const void *) result = array_new();
+
+  TSQueryMatch match;
+  uint32_t capture_index;
+  while (ts_query_cursor_next_capture(
+    scratch_query_cursor,
+    &match,
+    &capture_index
+  )) {
+    capture_count++;
+
+    array_grow_by(&result, 3 + 6 * match.capture_count);
+    result.contents[index++] = (const void *)(uint32_t)match.pattern_index;
+    result.contents[index++] = (const void *)(uint32_t)match.capture_count;
+    result.contents[index++] = (const void *)(uint32_t)capture_index;
+    for (unsigned i = 0; i < match.capture_count; i++) {
+      const TSQueryCapture *capture = &match.captures[i];
+      result.contents[index++] = (const void *)capture->index;
+      marshal_node(result.contents + index, capture->node);
+      index += 5;
+    }
+  }
+
+  TRANSFER_BUFFER[0] = (const void *)(capture_count);
+  TRANSFER_BUFFER[1] = result.contents;
+}
diff --git a/lib/binding_web/binding.js b/lib/binding_web/binding.js
index 4ce334cc..b060715f 100644
--- a/lib/binding_web/binding.js
+++ b/lib/binding_web/binding.js
@@ -5,6 +5,12 @@ const SIZE_OF_NODE = 5 * SIZE_OF_INT;
 const SIZE_OF_POINT = 2 * SIZE_OF_INT;
 const SIZE_OF_RANGE = 2 * SIZE_OF_INT + 2 * SIZE_OF_POINT;
 const ZERO_POINT = {row: 0, column: 0};
+const QUERY_WORD_REGEX = /[\w-.]*/g;
+
+const PREDICATE_STEP_TYPE_CAPTURE = 1;
+const PREDICATE_STEP_TYPE_STRING = 2;
+
+const LANGUAGE_FUNCTION_REGEX = /^_?tree_sitter_\w+/;
 
 var VERSION;
 var MIN_COMPATIBLE_VERSION;
@@ -37,6 +43,8 @@ class Parser {
   delete() {
     C._ts_parser_delete(this[0]);
     C._free(this[1]);
+    this[0] = 0;
+    this[1] = 0;
   }
 
   setLanguage(language) {
@@ -122,8 +130,8 @@ class Parser {
     C._ts_parser_set_timeout_micros(this[0], timeout);
   }
 
-  getTimeoutMicros(timeout) {
-    C._ts_parser_timeout_micros(this[0]);
+  getTimeoutMicros() {
+    return C._ts_parser_timeout_micros(this[0]);
   }
 
   setLogger(callback) {
@@ -143,9 +151,7 @@ class Parser {
 
 class Tree {
   constructor(internal, address, language, textCallback) {
-    if (internal !== INTERNAL) {
-      throw new Error('Illegal constructor')
-    }
+    assertInternal(internal);
     this[0] = address;
     this.language = language;
     this.textCallback = textCallback;
@@ -158,6 +164,7 @@ class Tree {
 
   delete() {
     C._ts_tree_delete(this[0]);
+    this[0] = 0;
   }
 
   edit(edit) {
@@ -201,65 +208,32 @@ class Tree {
 
 class Node {
   constructor(internal, tree) {
-    if (internal !== INTERNAL) {
-      throw new Error('Illegal constructor')
-    }
+    assertInternal(internal);
     this.tree = tree;
   }
 
-  get id() {
-    return this[0];
-  }
-
   get typeId() {
     marshalNode(this);
-    return C._ts_node_symbol_wasm(this.tree);
+    return C._ts_node_symbol_wasm(this.tree[0]);
   }
 
   get type() {
     return this.tree.language.types[this.typeId] || 'ERROR';
   }
 
-  get startPosition() {
-    marshalNode(this);
-    C._ts_node_start_point_wasm(this.tree[0]);
-    return unmarshalPoint(TRANSFER_BUFFER);
-  }
-
   get endPosition() {
     marshalNode(this);
     C._ts_node_end_point_wasm(this.tree[0]);
     return unmarshalPoint(TRANSFER_BUFFER);
   }
 
-  get startIndex() {
-    marshalNode(this);
-    return C._ts_node_start_index_wasm(this.tree[0]);
-  }
-
   get endIndex() {
     marshalNode(this);
     return C._ts_node_end_index_wasm(this.tree[0]);
   }
 
   get text() {
-    let {startIndex, endIndex} = this;
-    const length = endIndex - startIndex;
-    let result = this.tree.textCallback(startIndex, null, endIndex);
-    startIndex += result.length;
-    while (startIndex < endIndex) {
-      const string = this.tree.textCallback(startIndex, null, endIndex);
-      if (string && string.length > 0) {
-        startIndex += string.length;
-        result += string;
-      } else {
-        break;
-      }
-    }
-    if (startIndex > endIndex) {
-      result = result.slice(0, length);
-    }
-    return result;
+    return getText(this.tree, this.startIndex, this.endIndex);
   }
 
   isNamed() {
@@ -283,11 +257,7 @@ class Node {
   }
 
   equals(other) {
-    if (this === other) return true;
-    for (let i = 0; i < 5; i++) {
-      if (this[i] !== other[i]) return false;
-    }
-    return true;
+    return this.id === other.id;
   }
 
   child(index) {
@@ -526,9 +496,7 @@ class Node {
 
 class TreeCursor {
   constructor(internal, tree) {
-    if (internal !== INTERNAL) {
-      throw new Error('Illegal constructor')
-    }
+    assertInternal(internal);
     this.tree = tree;
     unmarshalTreeCursor(this);
   }
@@ -536,6 +504,7 @@ class TreeCursor {
   delete() {
     marshalTreeCursor(this);
     C._ts_tree_cursor_delete_wasm(this.tree[0]);
+    this[0] = this[1] = this[2] = 0;
   }
 
   reset(node) {
@@ -569,6 +538,13 @@ class TreeCursor {
     return C._ts_tree_cursor_current_node_is_missing_wasm(this.tree[0]) === 1;
   }
 
+  get nodeText() {
+    marshalTreeCursor(this);
+    const startIndex = C._ts_tree_cursor_start_index_wasm(this.tree[0]);
+    const endIndex = C._ts_tree_cursor_end_index_wasm(this.tree[0]);
+    return getText(this.tree, startIndex, endIndex);
+  }
+
   get startPosition() {
     marshalTreeCursor(this);
     C._ts_tree_cursor_start_position_wasm(this.tree[0]);
@@ -630,9 +606,7 @@ class TreeCursor {
 
 class Language {
   constructor(internal, address) {
-    if (internal !== INTERNAL) {
-      throw new Error('Illegal constructor')
-    }
+    assertInternal(internal);
     this[0] = address;
     this.types = new Array(C._ts_language_symbol_count(this[0]));
     for (let i = 0, n = this.types.length; i < n; i++) {
@@ -669,7 +643,217 @@ class Language {
   }
 
   fieldNameForId(fieldId) {
-    return this.fields[fieldName] || null;
+    return this.fields[fieldId] || null;
+  }
+
+  query(source) {
+    const sourceLength = lengthBytesUTF8(source);
+    const sourceAddress = C._malloc(sourceLength + 1);
+    stringToUTF8(source, sourceAddress, sourceLength + 1);
+    const address = C._ts_query_new(
+      this[0],
+      sourceAddress,
+      sourceLength,
+      TRANSFER_BUFFER,
+      TRANSFER_BUFFER + SIZE_OF_INT
+    );
+
+    if (!address) {
+      const errorId = getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
+      const errorByte = getValue(TRANSFER_BUFFER, 'i32');
+      const errorIndex = UTF8ToString(sourceAddress, errorByte).length;
+      const suffix = source.substr(errorIndex, 100).split('\n')[0];
+      let word = suffix.match(QUERY_WORD_REGEX)[0];
+      let error;
+      switch (errorId) {
+        case 2:
+          error = new RangeError(`Bad node name '${word}'`);
+          break;
+        case 3:
+          error = new RangeError(`Bad field name '${word}'`);
+          break;
+        case 4:
+          error = new RangeError(`Bad capture name @${word}`);
+          break;
+        case 5:
+          error = new TypeError(`Bad pattern structure at offset ${errorIndex}: '${suffix}'...`);
+          word = "";
+          break;
+        default:
+          error = new SyntaxError(`Bad syntax at offset ${errorIndex}: '${suffix}'...`);
+          word = "";
+          break;
+      }
+      error.index = errorIndex;
+      error.length = word.length;
+      C._free(sourceAddress);
+      throw error;
+    }
+
+    const stringCount = C._ts_query_string_count(address);
+    const captureCount = C._ts_query_capture_count(address);
+    const patternCount = C._ts_query_pattern_count(address);
+    const captureNames = new Array(captureCount);
+    const stringValues = new Array(stringCount);
+
+    for (let i = 0; i < captureCount; i++) {
+      const nameAddress = C._ts_query_capture_name_for_id(
+        address,
+        i,
+        TRANSFER_BUFFER
+      );
+      const nameLength = getValue(TRANSFER_BUFFER, 'i32');
+      captureNames[i] = UTF8ToString(nameAddress, nameLength);
+    }
+
+    for (let i = 0; i < stringCount; i++) {
+      const valueAddress = C._ts_query_string_value_for_id(
+        address,
+        i,
+        TRANSFER_BUFFER
+      );
+      const nameLength = getValue(TRANSFER_BUFFER, 'i32');
+      stringValues[i] = UTF8ToString(valueAddress, nameLength);
+    }
+
+    const setProperties = new Array(patternCount);
+    const assertedProperties = new Array(patternCount);
+    const refutedProperties = new Array(patternCount);
+    const predicates = new Array(patternCount);
+    const textPredicates = new Array(patternCount);
+    for (let i = 0; i < patternCount; i++) {
+      const predicatesAddress = C._ts_query_predicates_for_pattern(
+        address,
+        i,
+        TRANSFER_BUFFER
+      );
+      const stepCount = getValue(TRANSFER_BUFFER, 'i32');
+
+      predicates[i] = [];
+      textPredicates[i] = [];
+
+      const steps = [];
+      let stepAddress = predicatesAddress;
+      for (let j = 0; j < stepCount; j++) {
+        const stepType = getValue(stepAddress, 'i32');
+        stepAddress += SIZE_OF_INT;
+        const stepValueId = getValue(stepAddress, 'i32');
+        stepAddress += SIZE_OF_INT;
+        if (stepType === PREDICATE_STEP_TYPE_CAPTURE) {
+          steps.push({type: 'capture', name: captureNames[stepValueId]});
+        } else if (stepType === PREDICATE_STEP_TYPE_STRING) {
+          steps.push({type: 'string', value: stringValues[stepValueId]});
+        } else if (steps.length > 0) {
+          if (steps[0].type !== 'string') {
+            throw new Error('Predicates must begin with a literal value');
+          }
+          const operator = steps[0].value;
+          let isPositive = true;
+          switch (operator) {
+            case 'not-eq?':
+              isPositive = false;
+            case 'eq?':
+              if (steps.length !== 3) throw new Error(
+                `Wrong number of arguments to \`#eq?\` predicate. Expected 2, got ${steps.length - 1}`
+              );
+              if (steps[1].type !== 'capture') throw new Error(
+                `First argument of \`#eq?\` predicate must be a capture. Got "${steps[1].value}"`
+              );
+              if (steps[2].type === 'capture') {
+                const captureName1 = steps[1].name;
+                const captureName2 = steps[2].name;
+                textPredicates[i].push(function(captures) {
+                  let node1, node2
+                  for (const c of captures) {
+                    if (c.name === captureName1) node1 = c.node;
+                    if (c.name === captureName2) node2 = c.node;
+                  }
+                  return (node1.text === node2.text) === isPositive;
+                });
+              } else {
+                const captureName = steps[1].name;
+                const stringValue = steps[2].value;
+                textPredicates[i].push(function(captures) {
+                  for (const c of captures) {
+                    if (c.name === captureName) {
+                      return (c.node.text === stringValue) === isPositive;
+                    };
+                  }
+                  return false;
+                });
+              }
+              break;
+
+            case 'not-match?':
+              isPositive = false;
+            case 'match?':
+              if (steps.length !== 3) throw new Error(
+                `Wrong number of arguments to \`#match?\` predicate. Expected 2, got ${steps.length - 1}.`
+              );
+              if (steps[1].type !== 'capture') throw new Error(
+                `First argument of \`#match?\` predicate must be a capture. Got "${steps[1].value}".`
+              );
+              if (steps[2].type !== 'string') throw new Error(
+                `Second argument of \`#match?\` predicate must be a string. Got @${steps[2].value}.`
+              );
+              const captureName = steps[1].name;
+              const regex = new RegExp(steps[2].value);
+              textPredicates[i].push(function(captures) {
+                for (const c of captures) {
+                  if (c.name === captureName) return regex.test(c.node.text) === isPositive;
+                }
+                return false;
+              });
+              break;
+
+            case 'set!':
+              if (steps.length < 2 || steps.length > 3) throw new Error(
+                `Wrong number of arguments to \`#set!\` predicate. Expected 1 or 2. Got ${steps.length - 1}.`
+              );
+              if (steps.some(s => s.type !== 'string')) throw new Error(
+                `Arguments to \`#set!\` predicate must be a strings.".`
+              );
+              if (!setProperties[i]) setProperties[i] = {};
+              setProperties[i][steps[1].value] = steps[2] ? steps[2].value : null;
+              break;
+
+            case 'is?':
+            case 'is-not?':
+              if (steps.length < 2 || steps.length > 3) throw new Error(
+                `Wrong number of arguments to \`#${operator}\` predicate. Expected 1 or 2. Got ${steps.length - 1}.`
+              );
+              if (steps.some(s => s.type !== 'string')) throw new Error(
+                `Arguments to \`#${operator}\` predicate must be a strings.".`
+              );
+              const properties = operator === 'is?' ? assertedProperties : refutedProperties;
+              if (!properties[i]) properties[i] = {};
+              properties[i][steps[1].value] = steps[2] ? steps[2].value : null;
+              break;
+
+            default:
+              predicates[i].push({operator, operands: steps.slice(1)});
+          }
+
+          steps.length = 0;
+        }
+      }
+
+      Object.freeze(setProperties[i]);
+      Object.freeze(assertedProperties[i]);
+      Object.freeze(refutedProperties[i]);
+    }
+
+    C._free(sourceAddress);
+    return new Query(
+      INTERNAL,
+      address,
+      captureNames,
+      textPredicates,
+      predicates,
+      Object.freeze(setProperties),
+      Object.freeze(assertedProperties),
+      Object.freeze(refutedProperties)
+    );
   }
 
   static load(url) {
@@ -695,15 +879,172 @@ class Language {
     }
 
     return bytes
-      .then(bytes => loadWebAssemblyModule(bytes, {loadAsync: true}))
+      .then(bytes => loadSideModule(bytes, {loadAsync: true}))
       .then(mod => {
-        const functionName = Object.keys(mod).find(key => key.includes("tree_sitter_"));
+        const symbolNames = Object.keys(mod)
+        const functionName = symbolNames.find(key =>
+          LANGUAGE_FUNCTION_REGEX.test(key) &&
+          !key.includes("external_scanner_")
+        );
+        if (!functionName) {
+          console.log(`Couldn't find language function in WASM file. Symbols:\n${JSON.stringify(symbolNames, null, 2)}`)
+        }
         const languageAddress = mod[functionName]();
         return new Language(INTERNAL, languageAddress);
       });
   }
 }
 
+class Query {
+  constructor(
+    internal, address, captureNames, textPredicates, predicates,
+    setProperties, assertedProperties, refutedProperties
+  ) {
+    assertInternal(internal);
+    this[0] = address;
+    this.captureNames = captureNames;
+    this.textPredicates = textPredicates;
+    this.predicates = predicates;
+    this.setProperties = setProperties;
+    this.assertedProperties = assertedProperties;
+    this.refutedProperties = refutedProperties;
+  }
+
+  delete() {
+    C._ts_query_delete(this[0]);
+    this[0] = 0;
+  }
+
+  matches(node, startPosition, endPosition) {
+    if (!startPosition) startPosition = ZERO_POINT;
+    if (!endPosition) endPosition = ZERO_POINT;
+
+    marshalNode(node);
+
+    C._ts_query_matches_wasm(
+      this[0],
+      node.tree[0],
+      startPosition.row,
+      startPosition.column,
+      endPosition.row,
+      endPosition.column
+    );
+
+    const count = getValue(TRANSFER_BUFFER, 'i32');
+    const startAddress = getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
+    const result = new Array(count);
+
+    let address = startAddress;
+    for (let i = 0; i < count; i++) {
+      const pattern = getValue(address, 'i32');
+      address += SIZE_OF_INT;
+      const captureCount = getValue(address, 'i32');
+      address += SIZE_OF_INT;
+
+      const captures = new Array(captureCount);
+      address = unmarshalCaptures(this, node.tree, address, captures);
+      if (this.textPredicates[pattern].every(p => p(captures))) {
+        result[i] = {pattern, captures};
+        const setProperties = this.setProperties[pattern];
+        if (setProperties) result[i].setProperties = setProperties;
+        const assertedProperties = this.assertedProperties[pattern];
+        if (assertedProperties) result[i].assertedProperties = assertedProperties;
+        const refutedProperties = this.refutedProperties[pattern];
+        if (refutedProperties) result[i].refutedProperties = refutedProperties;
+      }
+    }
+
+    C._free(startAddress);
+    return result;
+  }
+
+  captures(node, startPosition, endPosition) {
+    if (!startPosition) startPosition = ZERO_POINT;
+    if (!endPosition) endPosition = ZERO_POINT;
+
+    marshalNode(node);
+
+    C._ts_query_captures_wasm(
+      this[0],
+      node.tree[0],
+      startPosition.row,
+      startPosition.column,
+      endPosition.row,
+      endPosition.column
+    );
+
+    const count = getValue(TRANSFER_BUFFER, 'i32');
+    const startAddress = getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
+    const result = [];
+
+    const captures = [];
+    let address = startAddress;
+    for (let i = 0; i < count; i++) {
+      const pattern = getValue(address, 'i32');
+      address += SIZE_OF_INT;
+      const captureCount = getValue(address, 'i32');
+      address += SIZE_OF_INT;
+      const captureIndex = getValue(address, 'i32');
+      address += SIZE_OF_INT;
+
+      captures.length = captureCount
+      address = unmarshalCaptures(this, node.tree, address, captures);
+
+      if (this.textPredicates[pattern].every(p => p(captures))) {
+        const capture = captures[captureIndex];
+        const setProperties = this.setProperties[pattern];
+        if (setProperties) capture.setProperties = setProperties;
+        const assertedProperties = this.assertedProperties[pattern];
+        if (assertedProperties) capture.assertedProperties = assertedProperties;
+        const refutedProperties = this.refutedProperties[pattern];
+        if (refutedProperties) capture.refutedProperties = refutedProperties;
+        result.push(capture);
+      }
+    }
+
+    C._free(startAddress);
+    return result;
+  }
+
+  predicatesForPattern(patternIndex) {
+    return this.predicates[patternIndex]
+  }
+}
+
+function getText(tree, startIndex, endIndex) {
+  const length = endIndex - startIndex;
+  let result = tree.textCallback(startIndex, null, endIndex);
+  startIndex += result.length;
+  while (startIndex < endIndex) {
+    const string = tree.textCallback(startIndex, null, endIndex);
+    if (string && string.length > 0) {
+      startIndex += string.length;
+      result += string;
+    } else {
+      break;
+    }
+  }
+  if (startIndex > endIndex) {
+    result = result.slice(0, length);
+  }
+  return result;
+}
+
+function unmarshalCaptures(query, tree, address, result) {
+  for (let i = 0, n = result.length; i < n; i++) {
+    const captureIndex = getValue(address, 'i32');
+    address += SIZE_OF_INT;
+    const node = unmarshalNode(tree, address);
+    address += SIZE_OF_NODE;
+    result[i] = {name: query.captureNames[captureIndex], node};
+  }
+  return address;
+}
+
+function assertInternal(x) {
+  if (x !== INTERNAL) throw new Error('Illegal constructor')
+}
+
 function isPoint(point) {
   return (
     point &&
@@ -714,22 +1055,36 @@ function isPoint(point) {
 
 function marshalNode(node) {
   let address = TRANSFER_BUFFER;
-  for (let i = 0; i < 5; i++) {
-    setValue(address, node[i], 'i32');
-    address += SIZE_OF_INT;
-  }
+  setValue(address, node.id, 'i32');
+  address += SIZE_OF_INT;
+  setValue(address, node.startIndex, 'i32');
+  address += SIZE_OF_INT;
+  setValue(address, node.startPosition.row, 'i32');
+  address += SIZE_OF_INT;
+  setValue(address, node.startPosition.column, 'i32');
+  address += SIZE_OF_INT;
+  setValue(address, node[0], 'i32');
 }
 
 function unmarshalNode(tree, address = TRANSFER_BUFFER) {
   const id = getValue(address, 'i32');
-  if (id === 0) return null;
-  const result = new Node(INTERNAL, tree);
-  result[0] = id;
   address += SIZE_OF_INT;
-  for (let i = 1; i < 5; i++) {
-    result[i] = getValue(address, 'i32');
-    address += SIZE_OF_INT;
-  }
+  if (id === 0) return null;
+
+  const index = getValue(address, 'i32');
+  address += SIZE_OF_INT;
+  const row = getValue(address, 'i32');
+  address += SIZE_OF_INT;
+  const column = getValue(address, 'i32');
+  address += SIZE_OF_INT;
+  const other = getValue(address, 'i32');
+
+  const result = new Node(INTERNAL, tree);
+  result.id = id;
+  result.startIndex = index;
+  result.startPosition = {row, column};
+  result[0] = other;
+
   return result;
 }
 
@@ -784,7 +1139,3 @@ function marshalEdit(edit) {
 }
 
 Parser.Language = Language;
-
-return Parser;
-
-}));
diff --git a/lib/binding_web/exports.json b/lib/binding_web/exports.json
index a0cf9305..72105158 100644
--- a/lib/binding_web/exports.json
+++ b/lib/binding_web/exports.json
@@ -15,7 +15,6 @@
   "__ZNSt3__212basic_stringIwNS_11char_traitsIwEENS_9allocatorIwEEED2Ev",
   "__ZdlPv",
   "__Znwm",
-  "___assert_fail",
   "_abort",
   "_iswalnum",
   "_iswalpha",
@@ -68,6 +67,18 @@
   "_ts_parser_new_wasm",
   "_ts_parser_parse_wasm",
   "_ts_parser_set_language",
+  "_ts_parser_set_timeout_micros",
+  "_ts_parser_timeout_micros",
+  "_ts_query_capture_count",
+  "_ts_query_capture_name_for_id",
+  "_ts_query_captures_wasm",
+  "_ts_query_delete",
+  "_ts_query_matches_wasm",
+  "_ts_query_new",
+  "_ts_query_pattern_count",
+  "_ts_query_predicates_for_pattern",
+  "_ts_query_string_count",
+  "_ts_query_string_value_for_id",
   "_ts_tree_cursor_current_field_id_wasm",
   "_ts_tree_cursor_current_node_id_wasm",
   "_ts_tree_cursor_current_node_is_missing_wasm",
diff --git a/lib/binding_web/imports.js b/lib/binding_web/imports.js
index ea34926f..a76c42ac 100644
--- a/lib/binding_web/imports.js
+++ b/lib/binding_web/imports.js
@@ -16,7 +16,7 @@ mergeInto(LibraryManager.library, {
     }
   },
 
-  tree_sitter_log_callback: function(_payload, isLexMessage, messageAddress) {
+  tree_sitter_log_callback: function(isLexMessage, messageAddress) {
     if (currentLogCallback) {
       const message = UTF8ToString(messageAddress);
       currentLogCallback(message, isLexMessage !== 0);
diff --git a/lib/binding_web/package.json b/lib/binding_web/package.json
index 5aaed8de..8c61cf75 100644
--- a/lib/binding_web/package.json
+++ b/lib/binding_web/package.json
@@ -1,6 +1,6 @@
 {
   "name": "web-tree-sitter",
-  "version": "0.15.9",
+  "version": "0.17.1",
   "description": "Tree-sitter bindings for the web",
   "main": "tree-sitter.js",
   "types": "tree-sitter-web.d.ts",
diff --git a/lib/binding_web/suffix.js b/lib/binding_web/suffix.js
new file mode 100644
index 00000000..0e9fe021
--- /dev/null
+++ b/lib/binding_web/suffix.js
@@ -0,0 +1,2 @@
+return Parser;
+}));
diff --git a/lib/binding_web/test/node-test.js b/lib/binding_web/test/node-test.js
index 933ff38f..6bbcafb0 100644
--- a/lib/binding_web/test/node-test.js
+++ b/lib/binding_web/test/node-test.js
@@ -388,4 +388,24 @@ describe("Node", () => {
       assert.throws(() => number.closest({a: 1}), /Argument must be a string or array of strings/)
     });
   });
+
+  describe('.equals(other)', () => {
+    it('returns true if the nodes are the same', () => {
+      tree = parser.parse('1 + 2');
+
+      const sumNode = tree.rootNode.firstChild.firstChild;
+      const node1 = sumNode.firstChild;
+      const node2 = sumNode.firstChild;
+      assert(node1.equals(node2));
+    });
+
+    it('returns false if the nodes are not the same', () => {
+      tree = parser.parse('1 + 2');
+
+      const sumNode = tree.rootNode.firstChild.firstChild;
+      const node1 = sumNode.firstChild;
+      const node2 = node1.nextSibling;
+      assert(!node1.equals(node2));
+    });
+  });
 });
diff --git a/lib/binding_web/test/parser-test.js b/lib/binding_web/test/parser-test.js
index d6851539..c76b49c7 100644
--- a/lib/binding_web/test/parser-test.js
+++ b/lib/binding_web/test/parser-test.js
@@ -130,12 +130,15 @@ describe("Parser", () => {
       tree = parser.parse("FOO=bar echo <<EOF 2> err.txt > hello.txt \nhello\nEOF");
       assert.equal(
         tree.rootNode.toString(),
-        '(program (redirected_statement (command ' +
-          '(variable_assignment (variable_name) (word)) ' +
-          '(command_name (word))) ' +
-          '(heredoc_redirect (heredoc_start)) ' +
-          '(file_redirect (file_descriptor) (word)) ' +
-          '(file_redirect (word))) ' +
+        '(program (redirected_statement ' +
+          'body: (command ' +
+            '(variable_assignment ' +
+              'name: (variable_name) ' +
+              'value: (word)) ' +
+            'name: (command_name (word))) ' +
+          'redirect: (heredoc_redirect (heredoc_start)) ' +
+          'redirect: (file_redirect descriptor: (file_descriptor) destination: (word)) ' +
+          'redirect: (file_redirect destination: (word))) ' +
           '(heredoc_body))'
       );
     }).timeout(5000);
@@ -185,7 +188,10 @@ describe("Parser", () => {
       tree = parser.parse("const x: &'static str = r###\"hello\"###;");
       assert.equal(
         tree.rootNode.toString(),
-        '(source_file (const_item (identifier) (reference_type (lifetime (identifier)) (primitive_type)) (raw_string_literal)))'
+        '(source_file (const_item ' +
+          'name: (identifier) ' +
+          'type: (reference_type (lifetime (identifier)) type: (primitive_type)) ' +
+          'value: (raw_string_literal)))'
       );
     }).timeout(5000);
 
diff --git a/lib/binding_web/test/query-test.js b/lib/binding_web/test/query-test.js
new file mode 100644
index 00000000..23663e9a
--- /dev/null
+++ b/lib/binding_web/test/query-test.js
@@ -0,0 +1,279 @@
+const { assert } = require("chai");
+let Parser, JavaScript;
+
+describe("Query", () => {
+  let parser, tree, query;
+
+  before(async () => ({ Parser, JavaScript } = await require("./helper")));
+
+  beforeEach(() => {
+    parser = new Parser().setLanguage(JavaScript);
+  });
+
+  afterEach(() => {
+    parser.delete();
+    if (tree) tree.delete();
+    if (query) query.delete();
+  });
+
+  describe("construction", () => {
+    it("throws an error on invalid patterns", () => {
+      assert.throws(() => {
+        JavaScript.query("(function_declaration wat)");
+      }, "Bad syntax at offset 22: 'wat)'...");
+      assert.throws(() => {
+        JavaScript.query("(non_existent)");
+      }, "Bad node name 'non_existent'");
+      assert.throws(() => {
+        JavaScript.query("(a)");
+      }, "Bad node name 'a'");
+      assert.throws(() => {
+        JavaScript.query("(function_declaration non_existent:(identifier))");
+      }, "Bad field name 'non_existent'");
+      assert.throws(() => {
+        JavaScript.query("(function_declaration name:(statement_block))");
+      }, "Bad pattern structure at offset 22: 'name:(statement_block))'");
+    });
+
+    it("throws an error on invalid predicates", () => {
+      assert.throws(() => {
+        JavaScript.query("((identifier) @abc (#eq? @ab hi))");
+      }, "Bad capture name @ab");
+      assert.throws(() => {
+        JavaScript.query("((identifier) @abc (#eq? @ab hi))");
+      }, "Bad capture name @ab");
+      assert.throws(() => {
+        JavaScript.query("((identifier) @abc (#eq?))");
+      }, "Wrong number of arguments to `#eq?` predicate. Expected 2, got 0");
+      assert.throws(() => {
+        JavaScript.query("((identifier) @a (eq? @a @a @a))");
+      }, "Wrong number of arguments to `#eq?` predicate. Expected 2, got 3");
+    });
+  });
+
+  describe(".matches", () => {
+    it("returns all of the matches for the given query", () => {
+      tree = parser.parse("function one() { two(); function three() {} }");
+      query = JavaScript.query(`
+        (function_declaration name: (identifier) @fn-def)
+        (call_expression function: (identifier) @fn-ref)
+      `);
+      const matches = query.matches(tree.rootNode);
+      assert.deepEqual(formatMatches(matches), [
+        { pattern: 0, captures: [{ name: "fn-def", text: "one" }] },
+        { pattern: 1, captures: [{ name: "fn-ref", text: "two" }] },
+        { pattern: 0, captures: [{ name: "fn-def", text: "three" }] },
+      ]);
+    });
+
+    it("can search in a specified ranges", () => {
+      tree = parser.parse("[a, b,\nc, d,\ne, f,\ng, h]");
+      query = JavaScript.query("(identifier) @element");
+      const matches = query.matches(
+        tree.rootNode,
+        { row: 1, column: 1 },
+        { row: 3, column: 1 }
+      );
+      assert.deepEqual(formatMatches(matches), [
+        { pattern: 0, captures: [{ name: "element", text: "d" }] },
+        { pattern: 0, captures: [{ name: "element", text: "e" }] },
+        { pattern: 0, captures: [{ name: "element", text: "f" }] },
+        { pattern: 0, captures: [{ name: "element", text: "g" }] },
+      ]);
+    });
+  });
+
+  describe(".captures", () => {
+    it("returns all of the captures for the given query, in order", () => {
+      tree = parser.parse(`
+        a({
+          bc: function de() {
+            const fg = function hi() {}
+          },
+          jk: function lm() {
+            const no = function pq() {}
+          },
+        });
+      `);
+      query = JavaScript.query(`
+        (pair
+          key: _ @method.def
+          (function
+            name: (identifier) @method.alias))
+
+        (variable_declarator
+          name: _ @function.def
+          value: (function
+            name: (identifier) @function.alias))
+
+        ":" @delimiter
+        "=" @operator
+      `);
+
+      const captures = query.captures(tree.rootNode);
+      assert.deepEqual(formatCaptures(captures), [
+        { name: "method.def", text: "bc" },
+        { name: "delimiter", text: ":" },
+        { name: "method.alias", text: "de" },
+        { name: "function.def", text: "fg" },
+        { name: "operator", text: "=" },
+        { name: "function.alias", text: "hi" },
+        { name: "method.def", text: "jk" },
+        { name: "delimiter", text: ":" },
+        { name: "method.alias", text: "lm" },
+        { name: "function.def", text: "no" },
+        { name: "operator", text: "=" },
+        { name: "function.alias", text: "pq" },
+      ]);
+    });
+
+    it("handles conditions that compare the text of capture to literal strings", () => {
+      tree = parser.parse(`
+        lambda
+        panda
+        load
+        toad
+        const ab = require('./ab');
+        new Cd(EF);
+      `);
+
+      query = JavaScript.query(`
+        ((identifier) @variable
+         (#not-match? @variable "^(lambda|load)$"))
+
+        ((identifier) @function.builtin
+         (#eq? @function.builtin "require"))
+
+        ((identifier) @constructor
+         (#match? @constructor "^[A-Z]"))
+
+        ((identifier) @constant
+         (#match? @constant "^[A-Z]{2,}$"))
+      `);
+
+      const captures = query.captures(tree.rootNode);
+      assert.deepEqual(formatCaptures(captures), [
+        { name: "variable", text: "panda" },
+        { name: "variable", text: "toad" },
+        { name: "variable", text: "ab" },
+        { name: "variable", text: "require" },
+        { name: "function.builtin", text: "require" },
+        { name: "variable", text: "Cd" },
+        { name: "constructor", text: "Cd" },
+        { name: "variable", text: "EF" },
+        { name: "constructor", text: "EF" },
+        { name: "constant", text: "EF" },
+      ]);
+    });
+
+    it("handles conditions that compare the text of capture to each other", () => {
+      tree = parser.parse(`
+        ab = abc + 1;
+        def = de + 1;
+        ghi = ghi + 1;
+      `);
+
+      query = JavaScript.query(`
+        (
+          (assignment_expression
+            left: (identifier) @id1
+            right: (binary_expression
+              left: (identifier) @id2))
+          (#eq? @id1 @id2)
+        )
+      `);
+
+      const captures = query.captures(tree.rootNode);
+      assert.deepEqual(formatCaptures(captures), [
+        { name: "id1", text: "ghi" },
+        { name: "id2", text: "ghi" },
+      ]);
+    });
+
+    it("handles patterns with properties", () => {
+      tree = parser.parse(`a(b.c);`);
+      query = JavaScript.query(`
+        ((call_expression (identifier) @func)
+         (#set! foo)
+         (#set! bar baz))
+
+        ((property_identifier) @prop
+         (#is? foo)
+         (#is-not? bar baz))
+      `);
+
+      const captures = query.captures(tree.rootNode);
+      assert.deepEqual(formatCaptures(captures), [
+        { name: "func", text: "a", setProperties: { foo: null, bar: "baz" } },
+        {
+          name: "prop",
+          text: "c",
+          assertedProperties: { foo: null },
+          refutedProperties: { bar: "baz" },
+        },
+      ]);
+    });
+  });
+
+  describe(".predicatesForPattern(index)", () => {
+    it("returns all of the predicates as objects", () => {
+      query = JavaScript.query(`
+        (
+          (binary_expression
+            left: (identifier) @a
+            right: (identifier) @b)
+          (#something? @a @b)
+          (#match? @a "c")
+          (#something-else? @a "A" @b "B")
+        )
+
+        ((identifier) @c
+         (#hello! @c))
+
+        "if" @d
+      `);
+
+      assert.deepEqual(query.predicatesForPattern(0), [
+        {
+          operator: "something?",
+          operands: [
+            { type: "capture", name: "a" },
+            { type: "capture", name: "b" },
+          ],
+        },
+        {
+          operator: "something-else?",
+          operands: [
+            { type: "capture", name: "a" },
+            { type: "string", value: "A" },
+            { type: "capture", name: "b" },
+            { type: "string", value: "B" },
+          ],
+        },
+      ]);
+      assert.deepEqual(query.predicatesForPattern(1), [
+        {
+          operator: "hello!",
+          operands: [{ type: "capture", name: "c" }],
+        },
+      ]);
+      assert.deepEqual(query.predicatesForPattern(2), []);
+    });
+  });
+});
+
+function formatMatches(matches) {
+  return matches.map(({ pattern, captures }) => ({
+    pattern,
+    captures: formatCaptures(captures),
+  }));
+}
+
+function formatCaptures(captures) {
+  return captures.map((c) => {
+    const node = c.node;
+    delete c.node;
+    c.text = node.text;
+    return c;
+  });
+}
diff --git a/lib/binding_web/test/tree-test.js b/lib/binding_web/test/tree-test.js
index 2311ce4e..ccb7a830 100644
--- a/lib/binding_web/test/tree-test.js
+++ b/lib/binding_web/test/tree-test.js
@@ -180,6 +180,7 @@ describe("Tree", () => {
       });
 
       assert(cursor.gotoFirstChild());
+      assert.equal(cursor.nodeText, 'a');
       assertCursorState(cursor, {
         nodeType: 'identifier',
         nodeIsNamed: true,
@@ -191,6 +192,7 @@ describe("Tree", () => {
 
       assert(!cursor.gotoFirstChild())
       assert(cursor.gotoNextSibling());
+      assert.equal(cursor.nodeText, '*');
       assertCursorState(cursor, {
         nodeType: '*',
         nodeIsNamed: false,
@@ -201,6 +203,7 @@ describe("Tree", () => {
       });
 
       assert(cursor.gotoNextSibling());
+      assert.equal(cursor.nodeText, 'b');
       assertCursorState(cursor, {
         nodeType: 'identifier',
         nodeIsNamed: true,
diff --git a/lib/binding_web/tree-sitter-web.d.ts b/lib/binding_web/tree-sitter-web.d.ts
index 26d2b073..092c9353 100644
--- a/lib/binding_web/tree-sitter-web.d.ts
+++ b/lib/binding_web/tree-sitter-web.d.ts
@@ -2,7 +2,7 @@ declare module 'web-tree-sitter' {
   class Parser {
     static init(): Promise<void>;
     delete(): void;
-    parse(input: string | Parser.Input, previousTree?: Parser.Tree): Parser.Tree;
+    parse(input: string | Parser.Input, previousTree?: Parser.Tree, options?: Parser.Options): Parser.Tree;
     getLanguage(): any;
     setLanguage(language: any): void;
     getLogger(): Parser.Logger;
@@ -10,14 +10,20 @@ declare module 'web-tree-sitter' {
   }
 
   namespace Parser {
+    export type Options = {
+      includedRanges?: Range[];
+    };
+
     export type Point = {
       row: number;
       column: number;
     };
 
     export type Range = {
-      start: Point;
-      end: Point;
+      startPosition: Point;
+      endPosition: Point;
+      startIndex: number;
+      endIndex: number;
     };
 
     export type Edit = {
@@ -31,7 +37,7 @@ declare module 'web-tree-sitter' {
 
     export type Logger = (
       message: string,
-      params: {[param: string]: string},
+      params: { [param: string]: string },
       type: "parse" | "lex"
     ) => void;
 
@@ -42,9 +48,9 @@ declare module 'web-tree-sitter' {
     ) => string | null;
 
     export interface SyntaxNode {
+      id: number;
       tree: Tree;
       type: string;
-      isNamed: boolean;
       text: string;
       startPosition: Point;
       endPosition: Point;
@@ -68,6 +74,7 @@ declare module 'web-tree-sitter' {
       hasError(): boolean;
       equals(other: SyntaxNode): boolean;
       isMissing(): boolean;
+      isNamed(): boolean;
       toString(): string;
       child(index: number): SyntaxNode | null;
       namedChild(index: number): SyntaxNode | null;
@@ -120,13 +127,38 @@ declare module 'web-tree-sitter' {
     }
 
     class Language {
-      static load(): Promise<Language>;
+      static load(path: string): Promise<Language>;
 
       readonly version: number;
       readonly fieldCount: number;
 
-      fieldNameForId(fieldId: number): string | null
-      fieldIdForName(fieldName: string): number | null
+      fieldNameForId(fieldId: number): string | null;
+      fieldIdForName(fieldName: string): number | null;
+      query(source: string): Query;
+    }
+
+    interface QueryCapture {
+      name: string;
+      node: SyntaxNode;
+    }
+
+    interface QueryMatch {
+      pattern: number;
+      captures: QueryCapture[];
+    }
+
+    interface PredicateResult {
+      operator: string;
+      operands: { name: string; type: string }[];
+    }
+
+    class Query {
+      captureNames: string[];
+
+      delete(): void;
+      matches(node: SyntaxNode, startPosition?: Point, endPosition?: Point): QueryMatch[];
+      captures(node: SyntaxNode, startPosition?: Point, endPosition?: Point): QueryCapture[];
+      predicatesForPattern(patternIndex: number): PredicateResult[];
     }
   }
 
diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h
index d39d0521..caa05f52 100644
--- a/lib/include/tree_sitter/api.h
+++ b/lib/include/tree_sitter/api.h
@@ -14,7 +14,19 @@ extern "C" {
 /* Section - ABI Versioning */
 /****************************/
 
-#define TREE_SITTER_LANGUAGE_VERSION 11
+/**
+ * The latest ABI version that is supported by the current version of the
+ * library. When Languages are generated by the Tree-sitter CLI, they are
+ * assigned an ABI version number that corresponds to the current CLI version.
+ * The Tree-sitter library is generally backwards-compatible with languages
+ * generated using older CLI versions, but is not forwards-compatible.
+ */
+#define TREE_SITTER_LANGUAGE_VERSION 12
+
+/**
+ * The earliest ABI version that is supported by the current version of the
+ * library.
+ */
 #define TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION 9
 
 /*******************/
@@ -26,6 +38,8 @@ typedef uint16_t TSFieldId;
 typedef struct TSLanguage TSLanguage;
 typedef struct TSParser TSParser;
 typedef struct TSTree TSTree;
+typedef struct TSQuery TSQuery;
+typedef struct TSQueryCursor TSQueryCursor;
 
 typedef enum {
   TSInputEncodingUTF8,
@@ -87,6 +101,38 @@ typedef struct {
   uint32_t context[2];
 } TSTreeCursor;
 
+typedef struct {
+  TSNode node;
+  uint32_t index;
+} TSQueryCapture;
+
+typedef struct {
+  uint32_t id;
+  uint16_t pattern_index;
+  uint16_t capture_count;
+  const TSQueryCapture *captures;
+} TSQueryMatch;
+
+typedef enum {
+  TSQueryPredicateStepTypeDone,
+  TSQueryPredicateStepTypeCapture,
+  TSQueryPredicateStepTypeString,
+} TSQueryPredicateStepType;
+
+typedef struct {
+  TSQueryPredicateStepType type;
+  uint32_t value_id;
+} TSQueryPredicateStep;
+
+typedef enum {
+  TSQueryErrorNone = 0,
+  TSQueryErrorSyntax,
+  TSQueryErrorNodeType,
+  TSQueryErrorField,
+  TSQueryErrorCapture,
+  TSQueryErrorStructure,
+} TSQueryError;
+
 /********************/
 /* Section - Parser */
 /********************/
@@ -119,7 +165,7 @@ bool ts_parser_set_language(TSParser *self, const TSLanguage *language);
 const TSLanguage *ts_parser_language(const TSParser *self);
 
 /**
- * Set the spans of text that the parser should include when parsing.
+ * Set the ranges of text that the parser should include when parsing.
  *
  * By default, the parser will always include entire documents. This function
  * allows you to parse only a *portion* of a document but still return a syntax
@@ -129,8 +175,19 @@ const TSLanguage *ts_parser_language(const TSParser *self);
  * The second and third parameters specify the location and length of an array
  * of ranges. The parser does *not* take ownership of these ranges; it copies
  * the data, so it doesn't matter how these ranges are allocated.
+ *
+ * If `length` is zero, then the entire document will be parsed. Otherwise,
+ * the given ranges must be ordered from earliest to latest in the document,
+ * and they must not overlap. That is, the following must hold for all
+ * `i` < `length - 1`:
+ *
+ *     ranges[i].end_byte <= ranges[i + 1].start_byte
+ *
+ * If this requirement is not satisfied, the operation will fail, the ranges
+ * will not be assigned, and this function will return `false`. On success,
+ * this function returns `true`
  */
-void ts_parser_set_included_ranges(
+bool ts_parser_set_included_ranges(
   TSParser *self,
   const TSRange *ranges,
   uint32_t length
@@ -163,8 +220,8 @@ const TSRange *ts_parser_included_ranges(
  * following three fields:
  * 1. `read`: A function to retrieve a chunk of text at a given byte offset
  *    and (row, column) position. The function should return a pointer to the
- *    text and write its length to the the `bytes_read` pointer. The parser
- *    does not take ownership of this buffer; it just borrows it until it has
+ *    text and write its length to the `bytes_read` pointer. The parser does
+ *    not take ownership of this buffer; it just borrows it until it has
  *    finished reading it. The function should write a zero value to the
  *    `bytes_read` pointer to indicate the end of the document.
  * 2. `payload`: An arbitrary pointer that will be passed to each invocation
@@ -226,14 +283,16 @@ TSTree *ts_parser_parse_string_encoding(
  * by default, it will resume where it left off on the next call to
  * `ts_parser_parse` or other parsing functions. If you don't want to resume,
  * and instead intend to use this parser to parse some other document, you must
- * call this `ts_parser_reset` first.
+ * call `ts_parser_reset` first.
  */
 void ts_parser_reset(TSParser *self);
 
 /**
  * Set the maximum duration in microseconds that parsing should be allowed to
- * take before halting. If parsing takes longer than this, it will halt early,
- * returning NULL. See `ts_parser_parse` for more information.
+ * take before halting.
+ *
+ * If parsing takes longer than this, it will halt early, returning NULL.
+ * See `ts_parser_parse` for more information.
  */
 void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout);
 
@@ -243,10 +302,11 @@ void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout);
 uint64_t ts_parser_timeout_micros(const TSParser *self);
 
 /**
- * Set the parser's current cancellation flag pointer. If a non-null pointer is
- * assigned, then the parser will periodically read from this pointer during
- * parsing. If it reads a non-zero value, it will halt early, returning NULL.
- * See `ts_parser_parse` for more information.
+ * Set the parser's current cancellation flag pointer.
+ *
+ * If a non-null pointer is assigned, then the parser will periodically read
+ * from this pointer during parsing. If it reads a non-zero value, it will
+ * halt early, returning NULL. See `ts_parser_parse` for more information.
  */
 void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag);
 
@@ -277,14 +337,6 @@ TSLogger ts_parser_logger(const TSParser *self);
  */
 void ts_parser_print_dot_graphs(TSParser *self, int file);
 
-/**
- * Set whether or not the parser should halt immediately upon detecting an
- * error. This will generally result in a syntax tree with an error at the
- * root, and one or more partial syntax trees within the error. This behavior
- * may not be supported long-term.
- */
-void ts_parser_halt_on_error(TSParser *self, bool halt);
-
 /******************/
 /* Section - Tree */
 /******************/
@@ -322,22 +374,22 @@ const TSLanguage *ts_tree_language(const TSTree *);
 void ts_tree_edit(TSTree *self, const TSInputEdit *edit);
 
 /**
- * Compare a new syntax tree to a previous syntax tree representing the same
+ * Compare an old edited syntax tree to a new syntax tree representing the same
  * document, returning an array of ranges whose syntactic structure has changed.
  *
  * For this to work correctly, the old syntax tree must have been edited such
  * that its ranges match up to the new tree. Generally, you'll want to call
- * this function right after calling one of the `ts_parser_parse` functions,
- * passing in the new tree that was returned from `ts_parser_parse` and the old
- * tree that was passed as a parameter.
+ * this function right after calling one of the `ts_parser_parse` functions.
+ * You need to pass the old tree that was passed to parse, as well as the new
+ * tree that was returned from that function.
  *
  * The returned array is allocated using `malloc` and the caller is responsible
  * for freeing it using `free`. The length of the array will be written to the
  * given `length` pointer.
  */
 TSRange *ts_tree_get_changed_ranges(
-  const TSTree *self,
   const TSTree *old_tree,
+  const TSTree *new_tree,
   uint32_t *length
 );
 
@@ -409,8 +461,8 @@ bool ts_node_is_named(TSNode);
 bool ts_node_is_missing(TSNode);
 
 /**
- * Check if the node is *missing*. Missing nodes are inserted by the parser in
- * order to recover from certain kinds of syntax errors.
+ * Check if the node is *extra*. Extra nodes represent things like comments,
+ * which are not required the grammar, but can appear anywhere.
  */
 bool ts_node_is_extra(TSNode);
 
@@ -542,7 +594,7 @@ TSTreeCursor ts_tree_cursor_new(TSNode);
 void ts_tree_cursor_delete(TSTreeCursor *);
 
 /**
- * Re-initialize a tree cursor to start at a different ndoe.
+ * Re-initialize a tree cursor to start at a different node.
  */
 void ts_tree_cursor_reset(TSTreeCursor *, TSNode);
 
@@ -584,7 +636,7 @@ bool ts_tree_cursor_goto_parent(TSTreeCursor *);
 bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *);
 
 /**
- * Move the cursor to the first schild of its current node.
+ * Move the cursor to the first child of its current node.
  *
  * This returns `true` if the cursor successfully moved, and returns `false`
  * if there were no children.
@@ -592,7 +644,7 @@ bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *);
 bool ts_tree_cursor_goto_first_child(TSTreeCursor *);
 
 /**
- * Move the cursor to the first schild of its current node that extends beyond
+ * Move the cursor to the first child of its current node that extends beyond
  * the given byte offset.
  *
  * This returns the index of the child node if one was found, and returns -1
@@ -602,6 +654,171 @@ int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *, uint32_t);
 
 TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *);
 
+/*******************/
+/* Section - Query */
+/*******************/
+
+/**
+ * Create a new query from a string containing one or more S-expression
+ * patterns. The query is associated with a particular language, and can
+ * only be run on syntax nodes parsed with that language.
+ *
+ * If all of the given patterns are valid, this returns a `TSQuery`.
+ * If a pattern is invalid, this returns `NULL`, and provides two pieces
+ * of information about the problem:
+ * 1. The byte offset of the error is written to the `error_offset` parameter.
+ * 2. The type of error is written to the `error_type` parameter.
+ */
+TSQuery *ts_query_new(
+  const TSLanguage *language,
+  const char *source,
+  uint32_t source_len,
+  uint32_t *error_offset,
+  TSQueryError *error_type
+);
+
+/**
+ * Delete a query, freeing all of the memory that it used.
+ */
+void ts_query_delete(TSQuery *);
+
+/**
+ * Get the number of patterns, captures, or string literals in the query.
+ */
+uint32_t ts_query_pattern_count(const TSQuery *);
+uint32_t ts_query_capture_count(const TSQuery *);
+uint32_t ts_query_string_count(const TSQuery *);
+
+/**
+ * Get the byte offset where the given pattern starts in the query's source.
+ *
+ * This can be useful when combining queries by concatenating their source
+ * code strings.
+ */
+uint32_t ts_query_start_byte_for_pattern(const TSQuery *, uint32_t);
+
+/**
+ * Get all of the predicates for the given pattern in the query.
+ *
+ * The predicates are represented as a single array of steps. There are three
+ * types of steps in this array, which correspond to the three legal values for
+ * the `type` field:
+ * - `TSQueryPredicateStepTypeCapture` - Steps with this type represent names
+ *    of captures. Their `value_id` can be used with the
+ *   `ts_query_capture_name_for_id` function to obtain the name of the capture.
+ * - `TSQueryPredicateStepTypeString` - Steps with this type represent literal
+ *    strings. Their `value_id` can be used with the
+ *    `ts_query_string_value_for_id` function to obtain their string value.
+ * - `TSQueryPredicateStepTypeDone` - Steps with this type are *sentinels*
+ *    that represent the end of an individual predicate. If a pattern has two
+ *    predicates, then there will be two steps with this `type` in the array.
+ */
+const TSQueryPredicateStep *ts_query_predicates_for_pattern(
+  const TSQuery *self,
+  uint32_t pattern_index,
+  uint32_t *length
+);
+
+bool ts_query_step_is_definite(
+  const TSQuery *self,
+  uint32_t byte_offset
+);
+
+/**
+ * Get the name and length of one of the query's captures, or one of the
+ * query's string literals. Each capture and string is associated with a
+ * numeric id based on the order that it appeared in the query's source.
+ */
+const char *ts_query_capture_name_for_id(
+  const TSQuery *,
+  uint32_t id,
+  uint32_t *length
+);
+const char *ts_query_string_value_for_id(
+  const TSQuery *,
+  uint32_t id,
+  uint32_t *length
+);
+
+/**
+ * Disable a certain capture within a query.
+ *
+ * This prevents the capture from being returned in matches, and also avoids
+ * any resource usage associated with recording the capture. Currently, there
+ * is no way to undo this.
+ */
+void ts_query_disable_capture(TSQuery *, const char *, uint32_t);
+
+/**
+ * Disable a certain pattern within a query.
+ *
+ * This prevents the pattern from matching and removes most of the overhead
+ * associated with the pattern. Currently, there is no way to undo this.
+ */
+void ts_query_disable_pattern(TSQuery *, uint32_t);
+
+/**
+ * Create a new cursor for executing a given query.
+ *
+ * The cursor stores the state that is needed to iteratively search
+ * for matches. To use the query cursor, first call `ts_query_cursor_exec`
+ * to start running a given query on a given syntax node. Then, there are
+ * two options for consuming the results of the query:
+ * 1. Repeatedly call `ts_query_cursor_next_match` to iterate over all of the
+ *    *matches* in the order that they were found. Each match contains the
+ *    index of the pattern that matched, and an array of captures. Because
+ *    multiple patterns can match the same set of nodes, one match may contain
+ *    captures that appear *before* some of the captures from a previous match.
+ * 2. Repeatedly call `ts_query_cursor_next_capture` to iterate over all of the
+ *    individual *captures* in the order that they appear. This is useful if
+ *    don't care about which pattern matched, and just want a single ordered
+ *    sequence of captures.
+ *
+ * If you don't care about consuming all of the results, you can stop calling
+ * `ts_query_cursor_next_match` or `ts_query_cursor_next_capture` at any point.
+ *  You can then start executing another query on another node by calling
+ *  `ts_query_cursor_exec` again.
+ */
+TSQueryCursor *ts_query_cursor_new(void);
+
+/**
+ * Delete a query cursor, freeing all of the memory that it used.
+ */
+void ts_query_cursor_delete(TSQueryCursor *);
+
+/**
+ * Start running a given query on a given node.
+ */
+void ts_query_cursor_exec(TSQueryCursor *, const TSQuery *, TSNode);
+
+/**
+ * Set the range of bytes or (row, column) positions in which the query
+ * will be executed.
+ */
+void ts_query_cursor_set_byte_range(TSQueryCursor *, uint32_t, uint32_t);
+void ts_query_cursor_set_point_range(TSQueryCursor *, TSPoint, TSPoint);
+
+/**
+ * Advance to the next match of the currently running query.
+ *
+ * If there is a match, write it to `*match` and return `true`.
+ * Otherwise, return `false`.
+ */
+bool ts_query_cursor_next_match(TSQueryCursor *, TSQueryMatch *match);
+void ts_query_cursor_remove_match(TSQueryCursor *, uint32_t id);
+
+/**
+ * Advance to the next capture of the currently running query.
+ *
+ * If there is a capture, write its match to `*match` and its index within
+ * the matche's capture list to `*capture_index`. Otherwise, return `false`.
+ */
+bool ts_query_cursor_next_capture(
+  TSQueryCursor *,
+  TSQueryMatch *match,
+  uint32_t *capture_index
+);
+
 /**********************/
 /* Section - Language */
 /**********************/
@@ -619,7 +836,12 @@ const char *ts_language_symbol_name(const TSLanguage *, TSSymbol);
 /**
  * Get the numerical id for the given node type string.
  */
-TSSymbol ts_language_symbol_for_name(const TSLanguage *, const char *);
+TSSymbol ts_language_symbol_for_name(
+  const TSLanguage *self,
+  const char *string,
+  uint32_t length,
+  bool is_named
+);
 
 /**
  * Get the number of distinct field names in the language.
diff --git a/lib/include/tree_sitter/parser.h b/lib/include/tree_sitter/parser.h
index 974a7ca5..c5a788ff 100644
--- a/lib/include/tree_sitter/parser.h
+++ b/lib/include/tree_sitter/parser.h
@@ -35,6 +35,7 @@ typedef uint16_t TSStateId;
 typedef struct {
   bool visible : 1;
   bool named : 1;
+  bool supertype: 1;
 } TSSymbolMetadata;
 
 typedef struct TSLexer TSLexer;
@@ -45,7 +46,8 @@ struct TSLexer {
   void (*advance)(TSLexer *, bool);
   void (*mark_end)(TSLexer *);
   uint32_t (*get_column)(TSLexer *);
-  bool (*is_at_included_range_start)(TSLexer *);
+  bool (*is_at_included_range_start)(const TSLexer *);
+  bool (*eof)(const TSLexer *);
 };
 
 typedef enum {
@@ -61,13 +63,13 @@ typedef struct {
       TSStateId state;
       bool extra : 1;
       bool repetition : 1;
-    };
+    } shift;
     struct {
       TSSymbol symbol;
       int16_t dynamic_precedence;
       uint8_t child_count;
       uint8_t production_id;
-    };
+    } reduce;
   } params;
   TSParseActionType type : 4;
 } TSParseAction;
@@ -82,7 +84,7 @@ typedef union {
   struct {
     uint8_t count;
     bool reusable : 1;
-  };
+  } entry;
 } TSParseActionEntry;
 
 struct TSLanguage {
@@ -117,6 +119,9 @@ struct TSLanguage {
   uint32_t large_state_count;
   const uint16_t *small_parse_table;
   const uint32_t *small_parse_table_map;
+  const TSSymbol *public_symbol_map;
+  const uint16_t *alias_map;
+  uint32_t state_count;
 };
 
 /*
@@ -126,6 +131,7 @@ struct TSLanguage {
 #define START_LEXER()           \
   bool result = false;          \
   bool skip = false;            \
+  bool eof = false;             \
   int32_t lookahead;            \
   goto start;                   \
   next_state:                   \
@@ -164,22 +170,28 @@ struct TSLanguage {
 
 #define ACTIONS(id) id
 
-#define SHIFT(state_value)              \
-  {                                     \
-    {                                   \
-      .type = TSParseActionTypeShift,   \
-      .params = {.state = state_value}, \
-    }                                   \
+#define SHIFT(state_value)                \
+  {                                       \
+    {                                     \
+      .params = {                         \
+        .shift = {                        \
+          .state = state_value            \
+        }                                 \
+      },                                  \
+      .type = TSParseActionTypeShift      \
+    }                                     \
   }
 
 #define SHIFT_REPEAT(state_value)     \
   {                                   \
     {                                 \
-      .type = TSParseActionTypeShift, \
       .params = {                     \
-        .state = state_value,         \
-        .repetition = true            \
+        .shift = {                    \
+          .state = state_value,       \
+          .repetition = true          \
+        }                             \
       },                              \
+      .type = TSParseActionTypeShift  \
     }                                 \
   }
 
@@ -191,20 +203,26 @@ struct TSLanguage {
 #define SHIFT_EXTRA()                 \
   {                                   \
     {                                 \
-      .type = TSParseActionTypeShift, \
-      .params = {.extra = true}       \
+      .params = {                     \
+        .shift = {                    \
+          .extra = true               \
+        }                             \
+      },                              \
+      .type = TSParseActionTypeShift  \
     }                                 \
   }
 
 #define REDUCE(symbol_val, child_count_val, ...) \
   {                                              \
     {                                            \
-      .type = TSParseActionTypeReduce,           \
       .params = {                                \
-        .symbol = symbol_val,                    \
-        .child_count = child_count_val,          \
-        __VA_ARGS__                              \
-      }                                          \
+        .reduce = {                              \
+          .symbol = symbol_val,                  \
+          .child_count = child_count_val,        \
+          __VA_ARGS__                            \
+        },                                       \
+      },                                         \
+      .type = TSParseActionTypeReduce            \
     }                                            \
   }
 
diff --git a/lib/src/alloc.h b/lib/src/alloc.h
index c8fe6c6e..6e22a0ab 100644
--- a/lib/src/alloc.h
+++ b/lib/src/alloc.h
@@ -17,58 +17,64 @@ void *ts_record_realloc(void *, size_t);
 void ts_record_free(void *);
 bool ts_toggle_allocation_recording(bool);
 
-static inline void *ts_malloc(size_t size) {
-  return ts_record_malloc(size);
-}
-
-static inline void *ts_calloc(size_t count, size_t size) {
-  return ts_record_calloc(count, size);
-}
-
-static inline void *ts_realloc(void *buffer, size_t size) {
-  return ts_record_realloc(buffer, size);
-}
-
-static inline void ts_free(void *buffer) {
-  ts_record_free(buffer);
-}
+#define ts_malloc  ts_record_malloc
+#define ts_calloc  ts_record_calloc
+#define ts_realloc ts_record_realloc
+#define ts_free    ts_record_free
 
 #else
 
+// Allow clients to override allocation functions
+
+#ifndef ts_malloc
+#define ts_malloc  ts_malloc_default
+#endif
+#ifndef ts_calloc
+#define ts_calloc  ts_calloc_default
+#endif
+#ifndef ts_realloc
+#define ts_realloc ts_realloc_default
+#endif
+#ifndef ts_free
+#define ts_free    ts_free_default
+#endif
+
 #include <stdlib.h>
 
 static inline bool ts_toggle_allocation_recording(bool value) {
+  (void)value;
   return false;
 }
 
-static inline void *ts_malloc(size_t size) {
+
+static inline void *ts_malloc_default(size_t size) {
   void *result = malloc(size);
   if (size > 0 && !result) {
-    fprintf(stderr, "tree-sitter failed to allocate %lu bytes", size);
+    fprintf(stderr, "tree-sitter failed to allocate %zu bytes", size);
     exit(1);
   }
   return result;
 }
 
-static inline void *ts_calloc(size_t count, size_t size) {
+static inline void *ts_calloc_default(size_t count, size_t size) {
   void *result = calloc(count, size);
   if (count > 0 && !result) {
-    fprintf(stderr, "tree-sitter failed to allocate %lu bytes", count * size);
+    fprintf(stderr, "tree-sitter failed to allocate %zu bytes", count * size);
     exit(1);
   }
   return result;
 }
 
-static inline void *ts_realloc(void *buffer, size_t size) {
+static inline void *ts_realloc_default(void *buffer, size_t size) {
   void *result = realloc(buffer, size);
   if (size > 0 && !result) {
-    fprintf(stderr, "tree-sitter failed to reallocate %lu bytes", size);
+    fprintf(stderr, "tree-sitter failed to reallocate %zu bytes", size);
     exit(1);
   }
   return result;
 }
 
-static inline void ts_free(void *buffer) {
+static inline void ts_free_default(void *buffer) {
   free(buffer);
 }
 
diff --git a/lib/src/array.h b/lib/src/array.h
index bc77e687..5ff5580a 100644
--- a/lib/src/array.h
+++ b/lib/src/array.h
@@ -12,9 +12,9 @@ extern "C" {
 #include <stdbool.h>
 #include "./alloc.h"
 
-#define Array(T)     \
-  struct {           \
-    T *contents;     \
+#define Array(T)       \
+  struct {             \
+    T *contents;       \
     uint32_t size;     \
     uint32_t capacity; \
   }
@@ -37,35 +37,94 @@ extern "C" {
 #define array_reserve(self, new_capacity) \
   array__reserve((VoidArray *)(self), array__elem_size(self), new_capacity)
 
-#define array_erase(self, index) \
-  array__erase((VoidArray *)(self), array__elem_size(self), index)
-
+// Free any memory allocated for this array.
 #define array_delete(self) array__delete((VoidArray *)self)
 
 #define array_push(self, element)                            \
   (array__grow((VoidArray *)(self), 1, array__elem_size(self)), \
    (self)->contents[(self)->size++] = (element))
 
+// Increase the array's size by a given number of elements, reallocating
+// if necessary. New elements are zero-initialized.
 #define array_grow_by(self, count) \
   (array__grow((VoidArray *)(self), count, array__elem_size(self)), \
    memset((self)->contents + (self)->size, 0, (count) * array__elem_size(self)), \
    (self)->size += (count))
 
 #define array_push_all(self, other)                                       \
-  array_splice((self), (self)->size, 0, (other)->size, (other)->contents)
+  array_extend((self), (other)->size, (other)->contents)
 
-#define array_splice(self, index, old_count, new_count, new_contents)          \
-  array__splice((VoidArray *)(self), array__elem_size(self), index, old_count, \
-                new_count, new_contents)
+// Append `count` elements to the end of the array, reading their values from the
+// `contents` pointer.
+#define array_extend(self, count, contents)                    \
+  array__splice(                                               \
+    (VoidArray *)(self), array__elem_size(self), (self)->size, \
+    0, count,  contents                                        \
+  )
 
+// Remove `old_count` elements from the array starting at the given `index`. At
+// the same index, insert `new_count` new elements, reading their values from the
+// `new_contents` pointer.
+#define array_splice(self, index, old_count, new_count, new_contents)  \
+  array__splice(                                                       \
+    (VoidArray *)(self), array__elem_size(self), index,                \
+    old_count, new_count, new_contents                                 \
+  )
+
+// Insert one `element` into the array at the given `index`.
 #define array_insert(self, index, element) \
   array__splice((VoidArray *)(self), array__elem_size(self), index, 0, 1, &element)
 
+// Remove one `element` from the array at the given `index`.
+#define array_erase(self, index) \
+  array__erase((VoidArray *)(self), array__elem_size(self), index)
+
 #define array_pop(self) ((self)->contents[--(self)->size])
 
 #define array_assign(self, other) \
   array__assign((VoidArray *)(self), (const VoidArray *)(other), array__elem_size(self))
 
+#define array_swap(self, other) \
+  array__swap((VoidArray *)(self), (VoidArray *)(other))
+
+// Search a sorted array for a given `needle` value, using the given `compare`
+// callback to determine the order.
+//
+// If an existing element is found to be equal to `needle`, then the `index`
+// out-parameter is set to the existing value's index, and the `exists`
+// out-parameter is set to true. Otherwise, `index` is set to an index where
+// `needle` should be inserted in order to preserve the sorting, and `exists`
+// is set to false.
+#define array_search_sorted_with(self, compare, needle, index, exists) \
+  array__search_sorted(self, 0, compare, , needle, index, exists)
+
+// Search a sorted array for a given `needle` value, using integer comparisons
+// of a given struct field (specified with a leading dot) to determine the order.
+//
+// See also `array_search_sorted_with`.
+#define array_search_sorted_by(self, field, needle, index, exists) \
+  array__search_sorted(self, 0, _compare_int, field, needle, index, exists)
+
+// Insert a given `value` into a sorted array, using the given `compare`
+// callback to determine the order.
+#define array_insert_sorted_with(self, compare, value) \
+  do { \
+    unsigned index, exists; \
+    array_search_sorted_with(self, compare, &(value), &index, &exists); \
+    if (!exists) array_insert(self, index, value); \
+  } while (0)
+
+// Insert a given `value` into a sorted array, using integer comparisons of
+// a given struct field (specified with a leading dot) to determine the order.
+//
+// See also `array_search_sorted_by`.
+#define array_insert_sorted_by(self, field, value) \
+  do { \
+    unsigned index, exists; \
+    array_search_sorted_by(self, field, (value) field, &index, &exists); \
+    if (!exists) array_insert(self, index, value); \
+  } while (0)
+
 // Private
 
 typedef Array(void) VoidArray;
@@ -93,7 +152,7 @@ static inline void array__reserve(VoidArray *self, size_t element_size, uint32_t
     if (self->contents) {
       self->contents = ts_realloc(self->contents, new_capacity * element_size);
     } else {
-      self->contents = ts_calloc(new_capacity, element_size);
+      self->contents = ts_malloc(new_capacity * element_size);
     }
     self->capacity = new_capacity;
   }
@@ -105,6 +164,12 @@ static inline void array__assign(VoidArray *self, const VoidArray *other, size_t
   memcpy(self->contents, other->contents, self->size * element_size);
 }
 
+static inline void array__swap(VoidArray *self, VoidArray *other) {
+  VoidArray swap = *other;
+  *other = *self;
+  *self = swap;
+}
+
 static inline void array__grow(VoidArray *self, size_t count, size_t element_size) {
   size_t new_size = self->size + count;
   if (new_size > self->capacity) {
@@ -126,15 +191,55 @@ static inline void array__splice(VoidArray *self, size_t element_size,
   array__reserve(self, element_size, new_size);
 
   char *contents = (char *)self->contents;
-  if (self->size > old_end)
-    memmove(contents + new_end * element_size, contents + old_end * element_size,
-            (self->size - old_end) * element_size);
-  if (new_count > 0)
-    memcpy((contents + index * element_size), elements,
-           new_count * element_size);
+  if (self->size > old_end) {
+    memmove(
+      contents + new_end * element_size,
+      contents + old_end * element_size,
+      (self->size - old_end) * element_size
+    );
+  }
+  if (new_count > 0) {
+    if (elements) {
+      memcpy(
+        (contents + index * element_size),
+        elements,
+        new_count * element_size
+      );
+    } else {
+      memset(
+        (contents + index * element_size),
+        0,
+        new_count * element_size
+      );
+    }
+  }
   self->size += new_count - old_count;
 }
 
+// A binary search routine, based on Rust's `std::slice::binary_search_by`.
+#define array__search_sorted(self, start, compare, suffix, needle, index, exists) \
+  do { \
+    *(index) = start; \
+    *(exists) = false; \
+    uint32_t size = (self)->size - *(index); \
+    if (size == 0) break; \
+    int comparison; \
+    while (size > 1) { \
+      uint32_t half_size = size / 2; \
+      uint32_t mid_index = *(index) + half_size; \
+      comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \
+      if (comparison <= 0) *(index) = mid_index; \
+      size -= half_size; \
+    } \
+    comparison = compare(&((self)->contents[*(index)] suffix), (needle)); \
+    if (comparison == 0) *(exists) = true; \
+    else if (comparison < 0) *(index) += 1; \
+  } while (0)
+
+// Helper macro for the `_sorted_by` routines below. This takes the left (existing)
+// parameter by reference in order to work with the generic sorting function above.
+#define _compare_int(a, b) ((int)*(a) - (int)(b))
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/src/atomic.h b/lib/src/atomic.h
index 301ee367..16573242 100644
--- a/lib/src/atomic.h
+++ b/lib/src/atomic.h
@@ -3,7 +3,23 @@
 
 #include <stdint.h>
 
-#ifdef _WIN32
+#ifdef __TINYC__
+
+static inline size_t atomic_load(const volatile size_t *p) {
+  return *p;
+}
+
+static inline uint32_t atomic_inc(volatile uint32_t *p) {
+  *p += 1;
+  return *p;
+}
+
+static inline uint32_t atomic_dec(volatile uint32_t *p) {
+  *p-= 1;
+  return *p;
+}
+
+#elif defined(_WIN32)
 
 #include <windows.h>
 
@@ -12,11 +28,11 @@ static inline size_t atomic_load(const volatile size_t *p) {
 }
 
 static inline uint32_t atomic_inc(volatile uint32_t *p) {
-  return InterlockedIncrement(p);
+  return InterlockedIncrement((long volatile *)p);
 }
 
 static inline uint32_t atomic_dec(volatile uint32_t *p) {
-  return InterlockedDecrement(p);
+  return InterlockedDecrement((long volatile *)p);
 }
 
 #else
diff --git a/lib/src/bits.h b/lib/src/bits.h
new file mode 100644
index 00000000..ca8caf30
--- /dev/null
+++ b/lib/src/bits.h
@@ -0,0 +1,42 @@
+#ifndef TREE_SITTER_BITS_H_
+#define TREE_SITTER_BITS_H_
+
+#include <stdint.h>
+
+static inline uint32_t bitmask_for_index(uint16_t id) {
+  return (1u << (31 - id));
+}
+
+#ifdef __TINYC__
+
+// Algorithm taken from the Hacker's Delight book
+// See also https://graphics.stanford.edu/~seander/bithacks.html
+static inline uint32_t count_leading_zeros(uint32_t x) {
+  int count = 0;
+  if (x == 0) return 32;
+  x = x - ((x >> 1) & 0x55555555);
+  x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+  count = (((x + (x >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24;
+  return count;
+}
+
+#elif defined _WIN32 && !defined __GNUC__
+
+#include <intrin.h>
+
+static inline uint32_t count_leading_zeros(uint32_t x) {
+  if (x == 0) return 32;
+  uint32_t result;
+  _BitScanReverse(&result, x);
+  return 31 - result;
+}
+
+#else
+
+static inline uint32_t count_leading_zeros(uint32_t x) {
+  if (x == 0) return 32;
+  return __builtin_clz(x);
+}
+
+#endif
+#endif  // TREE_SITTER_BITS_H_
diff --git a/lib/src/get_changed_ranges.c b/lib/src/get_changed_ranges.c
index 5bd1d814..b8915544 100644
--- a/lib/src/get_changed_ranges.c
+++ b/lib/src/get_changed_ranges.c
@@ -146,17 +146,21 @@ static bool iterator_tree_is_visible(const Iterator *self) {
   if (ts_subtree_visible(*entry.subtree)) return true;
   if (self->cursor.stack.size > 1) {
     Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree;
-    const TSSymbol *alias_sequence = ts_language_alias_sequence(
+    return ts_language_alias_at(
       self->language,
-      parent.ptr->production_id
-    );
-    return alias_sequence && alias_sequence[entry.structural_child_index] != 0;
+      parent.ptr->production_id,
+      entry.structural_child_index
+    ) != 0;
   }
   return false;
 }
 
-static void iterator_get_visible_state(const Iterator *self, Subtree *tree,
-                                       TSSymbol *alias_symbol, uint32_t *start_byte) {
+static void iterator_get_visible_state(
+  const Iterator *self,
+  Subtree *tree,
+  TSSymbol *alias_symbol,
+  uint32_t *start_byte
+) {
   uint32_t i = self->cursor.stack.size - 1;
 
   if (self->in_padding) {
@@ -169,13 +173,11 @@ static void iterator_get_visible_state(const Iterator *self, Subtree *tree,
 
     if (i > 0) {
       const Subtree *parent = self->cursor.stack.contents[i - 1].subtree;
-      const TSSymbol *alias_sequence = ts_language_alias_sequence(
+      *alias_symbol = ts_language_alias_at(
         self->language,
-        parent->ptr->production_id
+        parent->ptr->production_id,
+        entry.structural_child_index
       );
-      if (alias_sequence) {
-        *alias_symbol = alias_sequence[entry.structural_child_index];
-      }
     }
 
     if (ts_subtree_visible(*entry.subtree) || *alias_symbol) {
@@ -203,7 +205,7 @@ static bool iterator_descend(Iterator *self, uint32_t goal_position) {
     Length position = entry.position;
     uint32_t structural_child_index = 0;
     for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) {
-      const Subtree *child = &entry.subtree->ptr->children[i];
+      const Subtree *child = &ts_subtree_children(*entry.subtree)[i];
       Length child_left = length_add(position, ts_subtree_padding(*child));
       Length child_right = length_add(child_left, ts_subtree_size(*child));
 
@@ -258,7 +260,7 @@ static void iterator_advance(Iterator *self) {
       Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree));
       uint32_t structural_child_index = entry.structural_child_index;
       if (!ts_subtree_extra(*entry.subtree)) structural_child_index++;
-      const Subtree *next_child = &parent->ptr->children[child_index];
+      const Subtree *next_child = &ts_subtree_children(*parent)[child_index];
 
       array_push(&self->cursor.stack, ((TreeCursorEntry){
         .subtree = next_child,
diff --git a/lib/src/language.c b/lib/src/language.c
index 1bfb1a8d..9ccf2bc3 100644
--- a/lib/src/language.c
+++ b/lib/src/language.c
@@ -3,71 +3,12 @@
 #include "./error_costs.h"
 #include <string.h>
 
-void ts_language_table_entry(const TSLanguage *self, TSStateId state,
-                             TSSymbol symbol, TableEntry *result) {
-  if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
-    result->action_count = 0;
-    result->is_reusable = false;
-    result->actions = NULL;
-  } else {
-    assert(symbol < self->token_count);
-    uint32_t action_index = ts_language_lookup(self, state, symbol);
-    const TSParseActionEntry *entry = &self->parse_actions[action_index];
-    result->action_count = entry->count;
-    result->is_reusable = entry->reusable;
-    result->actions = (const TSParseAction *)(entry + 1);
-  }
+uint32_t ts_language_symbol_count(const TSLanguage *self) {
+  return self->symbol_count + self->alias_count;
 }
 
-uint32_t ts_language_symbol_count(const TSLanguage *language) {
-  return language->symbol_count + language->alias_count;
-}
-
-uint32_t ts_language_version(const TSLanguage *language) {
-  return language->version;
-}
-
-TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *language, TSSymbol symbol) {
-  if (symbol == ts_builtin_sym_error)  {
-    return (TSSymbolMetadata){.visible = true, .named = true};
-  } else if (symbol == ts_builtin_sym_error_repeat) {
-    return (TSSymbolMetadata){.visible = false, .named = false};
-  } else {
-    return language->symbol_metadata[symbol];
-  }
-}
-
-const char *ts_language_symbol_name(const TSLanguage *language, TSSymbol symbol) {
-  if (symbol == ts_builtin_sym_error) {
-    return "ERROR";
-  } else if (symbol == ts_builtin_sym_error_repeat) {
-    return "_ERROR";
-  } else {
-    return language->symbol_names[symbol];
-  }
-}
-
-TSSymbol ts_language_symbol_for_name(const TSLanguage *self, const char *name) {
-  if (!strcmp(name, "ERROR")) return ts_builtin_sym_error;
-
-  uint32_t count = ts_language_symbol_count(self);
-  for (TSSymbol i = 0; i < count; i++) {
-    if (!strcmp(self->symbol_names[i], name)) {
-      return i;
-    }
-  }
-  return 0;
-}
-
-TSSymbolType ts_language_symbol_type(const TSLanguage *language, TSSymbol symbol) {
-  TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
-  if (metadata.named) {
-    return TSSymbolTypeRegular;
-  } else if (metadata.visible) {
-    return TSSymbolTypeAnonymous;
-  } else {
-    return TSSymbolTypeAuxiliary;
-  }
+uint32_t ts_language_version(const TSLanguage *self) {
+  return self->version;
 }
 
 uint32_t ts_language_field_count(const TSLanguage *self) {
@@ -78,9 +19,109 @@ uint32_t ts_language_field_count(const TSLanguage *self) {
   }
 }
 
-const char *ts_language_field_name_for_id(const TSLanguage *self, TSFieldId id) {
+void ts_language_table_entry(
+  const TSLanguage *self,
+  TSStateId state,
+  TSSymbol symbol,
+  TableEntry *result
+) {
+  if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
+    result->action_count = 0;
+    result->is_reusable = false;
+    result->actions = NULL;
+  } else {
+    assert(symbol < self->token_count);
+    uint32_t action_index = ts_language_lookup(self, state, symbol);
+    const TSParseActionEntry *entry = &self->parse_actions[action_index];
+    result->action_count = entry->entry.count;
+    result->is_reusable = entry->entry.reusable;
+    result->actions = (const TSParseAction *)(entry + 1);
+  }
+}
+
+TSSymbolMetadata ts_language_symbol_metadata(
+  const TSLanguage *self,
+  TSSymbol symbol
+) {
+  if (symbol == ts_builtin_sym_error)  {
+    return (TSSymbolMetadata){.visible = true, .named = true};
+  } else if (symbol == ts_builtin_sym_error_repeat) {
+    return (TSSymbolMetadata){.visible = false, .named = false};
+  } else {
+    return self->symbol_metadata[symbol];
+  }
+}
+
+TSSymbol ts_language_public_symbol(
+  const TSLanguage *self,
+  TSSymbol symbol
+) {
+  if (symbol == ts_builtin_sym_error) return symbol;
+  if (self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING) {
+    return self->public_symbol_map[symbol];
+  } else {
+    return symbol;
+  }
+}
+
+const char *ts_language_symbol_name(
+  const TSLanguage *self,
+  TSSymbol symbol
+) {
+  if (symbol == ts_builtin_sym_error) {
+    return "ERROR";
+  } else if (symbol == ts_builtin_sym_error_repeat) {
+    return "_ERROR";
+  } else if (symbol < ts_language_symbol_count(self)) {
+    return self->symbol_names[symbol];
+  } else {
+    return NULL;
+  }
+}
+
+TSSymbol ts_language_symbol_for_name(
+  const TSLanguage *self,
+  const char *string,
+  uint32_t length,
+  bool is_named
+) {
+  if (!strncmp(string, "ERROR", length)) return ts_builtin_sym_error;
+  uint32_t count = ts_language_symbol_count(self);
+  for (TSSymbol i = 0; i < count; i++) {
+    TSSymbolMetadata metadata = ts_language_symbol_metadata(self, i);
+    if ((!metadata.visible && !metadata.supertype) || metadata.named != is_named) continue;
+    const char *symbol_name = self->symbol_names[i];
+    if (!strncmp(symbol_name, string, length) && !symbol_name[length]) {
+      if (self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING) {
+        return self->public_symbol_map[i];
+      } else {
+        return i;
+      }
+    }
+  }
+  return 0;
+}
+
+TSSymbolType ts_language_symbol_type(
+  const TSLanguage *self,
+  TSSymbol symbol
+) {
+  TSSymbolMetadata metadata = ts_language_symbol_metadata(self, symbol);
+  if (metadata.named) {
+    return TSSymbolTypeRegular;
+  } else if (metadata.visible) {
+    return TSSymbolTypeAnonymous;
+  } else {
+    return TSSymbolTypeAuxiliary;
+  }
+}
+
+const char *ts_language_field_name_for_id(
+  const TSLanguage *self,
+  TSFieldId id
+) {
   uint32_t count = ts_language_field_count(self);
-  if (count) {
+  if (count && id <= count) {
     return self->field_names[id];
   } else {
     return NULL;
@@ -96,7 +137,8 @@ TSFieldId ts_language_field_id_for_name(
   for (TSSymbol i = 1; i < count + 1; i++) {
     switch (strncmp(name, self->field_names[i], name_length)) {
       case 0:
-        return i;
+        if (self->field_names[i][name_length] == 0) return i;
+        break;
       case -1:
         return 0;
       default:
diff --git a/lib/src/language.h b/lib/src/language.h
index 0741486a..e5c07aa2 100644
--- a/lib/src/language.h
+++ b/lib/src/language.h
@@ -10,7 +10,10 @@ extern "C" {
 
 #define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1)
 #define TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS 10
+#define TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING 11
 #define TREE_SITTER_LANGUAGE_VERSION_WITH_SMALL_STATES 11
+#define TREE_SITTER_LANGUAGE_VERSION_WITH_STATE_COUNT 12
+#define TREE_SITTER_LANGUAGE_VERSION_WITH_ALIAS_MAP 12
 
 typedef struct {
   const TSParseAction *actions;
@@ -18,40 +21,71 @@ typedef struct {
   bool is_reusable;
 } TableEntry;
 
+typedef struct {
+  const TSLanguage *language;
+  const uint16_t *data;
+  const uint16_t *group_end;
+  TSStateId state;
+  uint16_t table_value;
+  uint16_t section_index;
+  uint16_t group_count;
+  bool is_small_state;
+
+  const TSParseAction *actions;
+  TSSymbol symbol;
+  TSStateId next_state;
+  uint16_t action_count;
+} LookaheadIterator;
+
 void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol, TableEntry *);
 
 TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol);
 
+TSSymbol ts_language_public_symbol(const TSLanguage *, TSSymbol);
+
 static inline bool ts_language_is_symbol_external(const TSLanguage *self, TSSymbol symbol) {
   return 0 < symbol && symbol < self->external_token_count + 1;
 }
 
-static inline const TSParseAction *ts_language_actions(const TSLanguage *self,
-                                                       TSStateId state,
-                                                       TSSymbol symbol,
-                                                       uint32_t *count) {
+static inline const TSParseAction *ts_language_actions(
+  const TSLanguage *self,
+  TSStateId state,
+  TSSymbol symbol,
+  uint32_t *count
+) {
   TableEntry entry;
   ts_language_table_entry(self, state, symbol, &entry);
   *count = entry.action_count;
   return entry.actions;
 }
 
-static inline bool ts_language_has_actions(const TSLanguage *self,
-                                           TSStateId state,
-                                           TSSymbol symbol) {
+static inline bool ts_language_has_actions(
+  const TSLanguage *self,
+  TSStateId state,
+  TSSymbol symbol
+) {
   TableEntry entry;
   ts_language_table_entry(self, state, symbol, &entry);
   return entry.action_count > 0;
 }
 
-static inline bool ts_language_has_reduce_action(const TSLanguage *self,
-                                                 TSStateId state,
-                                                 TSSymbol symbol) {
+static inline bool ts_language_has_reduce_action(
+  const TSLanguage *self,
+  TSStateId state,
+  TSSymbol symbol
+) {
   TableEntry entry;
   ts_language_table_entry(self, state, symbol, &entry);
   return entry.action_count > 0 && entry.actions[0].type == TSParseActionTypeReduce;
 }
 
+// Lookup the table value for a given symbol and state.
+//
+// For non-terminal symbols, the table value represents a successor state.
+// For terminal symbols, it represents an index in the actions table.
+// For 'large' parse states, this is a direct lookup. For 'small' parse
+// states, this requires searching through the symbol groups to find
+// the given symbol.
 static inline uint16_t ts_language_lookup(
   const TSLanguage *self,
   TSStateId state,
@@ -63,8 +97,8 @@ static inline uint16_t ts_language_lookup(
   ) {
     uint32_t index = self->small_parse_table_map[state - self->large_state_count];
     const uint16_t *data = &self->small_parse_table[index];
-    uint16_t section_count = *(data++);
-    for (unsigned i = 0; i < section_count; i++) {
+    uint16_t group_count = *(data++);
+    for (unsigned i = 0; i < group_count; i++) {
       uint16_t section_value = *(data++);
       uint16_t symbol_count = *(data++);
       for (unsigned i = 0; i < symbol_count; i++) {
@@ -77,9 +111,90 @@ static inline uint16_t ts_language_lookup(
   }
 }
 
-static inline TSStateId ts_language_next_state(const TSLanguage *self,
-                                               TSStateId state,
-                                               TSSymbol symbol) {
+// Iterate over all of the symbols that are valid in the given state.
+//
+// For 'large' parse states, this just requires iterating through
+// all possible symbols and checking the parse table for each one.
+// For 'small' parse states, this exploits the structure of the
+// table to only visit the valid symbols.
+static inline LookaheadIterator ts_language_lookaheads(
+  const TSLanguage *self,
+  TSStateId state
+) {
+  bool is_small_state =
+    self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_SMALL_STATES &&
+    state >= self->large_state_count;
+  const uint16_t *data;
+  const uint16_t *group_end = NULL;
+  uint16_t group_count = 0;
+  if (is_small_state) {
+    uint32_t index = self->small_parse_table_map[state - self->large_state_count];
+    data = &self->small_parse_table[index];
+    group_end = data + 1;
+    group_count = *data;
+  } else {
+    data = &self->parse_table[state * self->symbol_count] - 1;
+  }
+  return (LookaheadIterator) {
+    .language = self,
+    .data = data,
+    .group_end = group_end,
+    .group_count = group_count,
+    .is_small_state = is_small_state,
+    .symbol = UINT16_MAX,
+    .next_state = 0,
+  };
+}
+
+static inline bool ts_lookahead_iterator_next(LookaheadIterator *self) {
+  // For small parse states, valid symbols are listed explicitly,
+  // grouped by their value. There's no need to look up the actions
+  // again until moving to the next group.
+  if (self->is_small_state) {
+    self->data++;
+    if (self->data == self->group_end) {
+      if (self->group_count == 0) return false;
+      self->group_count--;
+      self->table_value = *(self->data++);
+      unsigned symbol_count = *(self->data++);
+      self->group_end = self->data + symbol_count;
+      self->symbol = *self->data;
+    } else {
+      self->symbol = *self->data;
+      return true;
+    }
+  }
+
+  // For large parse states, iterate through every symbol until one
+  // is found that has valid actions.
+  else {
+    do {
+      self->data++;
+      self->symbol++;
+      if (self->symbol >= self->language->symbol_count) return false;
+      self->table_value = *self->data;
+    } while (!self->table_value);
+  }
+
+  // Depending on if the symbols is terminal or non-terminal, the table value either
+  // represents a list of actions or a successor state.
+  if (self->symbol < self->language->token_count) {
+    const TSParseActionEntry *entry = &self->language->parse_actions[self->table_value];
+    self->action_count = entry->entry.count;
+    self->actions = (const TSParseAction *)(entry + 1);
+    self->next_state = 0;
+  } else {
+    self->action_count = 0;
+    self->next_state = self->table_value;
+  }
+  return true;
+}
+
+static inline TSStateId ts_language_next_state(
+  const TSLanguage *self,
+  TSStateId state,
+  TSSymbol symbol
+) {
   if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
     return 0;
   } else if (symbol < self->token_count) {
@@ -87,8 +202,8 @@ static inline TSStateId ts_language_next_state(const TSLanguage *self,
     const TSParseAction *actions = ts_language_actions(self, state, symbol, &count);
     if (count > 0) {
       TSParseAction action = actions[count - 1];
-      if (action.type == TSParseActionTypeShift || action.type == TSParseActionTypeRecover) {
-        return action.params.state;
+      if (action.type == TSParseActionTypeShift) {
+        return action.params.shift.extra ? state : action.params.shift.state;
       }
     }
     return 0;
@@ -97,9 +212,10 @@ static inline TSStateId ts_language_next_state(const TSLanguage *self,
   }
 }
 
-static inline const bool *
-ts_language_enabled_external_tokens(const TSLanguage *self,
-                                    unsigned external_scanner_state) {
+static inline const bool *ts_language_enabled_external_tokens(
+  const TSLanguage *self,
+  unsigned external_scanner_state
+) {
   if (external_scanner_state == 0) {
     return NULL;
   } else {
@@ -107,13 +223,25 @@ ts_language_enabled_external_tokens(const TSLanguage *self,
   }
 }
 
-static inline const TSSymbol *
-ts_language_alias_sequence(const TSLanguage *self, uint32_t production_id) {
-  return production_id > 0 ?
-    self->alias_sequences + production_id * self->max_alias_sequence_length :
+static inline const TSSymbol *ts_language_alias_sequence(
+  const TSLanguage *self,
+  uint32_t production_id
+) {
+  return production_id ?
+    &self->alias_sequences[production_id * self->max_alias_sequence_length] :
     NULL;
 }
 
+static inline TSSymbol ts_language_alias_at(
+  const TSLanguage *self,
+  uint32_t production_id,
+  uint32_t child_index
+) {
+  return production_id ?
+    self->alias_sequences[production_id * self->max_alias_sequence_length + child_index] :
+    0;
+}
+
 static inline void ts_language_field_map(
   const TSLanguage *self,
   uint32_t production_id,
@@ -131,6 +259,32 @@ static inline void ts_language_field_map(
   *end = &self->field_map_entries[slice.index] + slice.length;
 }
 
+static inline void ts_language_aliases_for_symbol(
+  const TSLanguage *self,
+  TSSymbol original_symbol,
+  const TSSymbol **start,
+  const TSSymbol **end
+) {
+  *start = &self->public_symbol_map[original_symbol];
+  *end = *start + 1;
+
+  if (self->version < TREE_SITTER_LANGUAGE_VERSION_WITH_ALIAS_MAP) return;
+
+  unsigned i = 0;
+  for (;;) {
+    TSSymbol symbol = self->alias_map[i++];
+    if (symbol == 0 || symbol > original_symbol) break;
+    uint16_t count = self->alias_map[i++];
+    if (symbol == original_symbol) {
+      *start = &self->alias_map[i];
+      *end = &self->alias_map[i + count];
+      break;
+    }
+    i += count;
+  }
+}
+
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/src/lexer.c b/lib/src/lexer.c
index fdc12746..08e90a8c 100644
--- a/lib/src/lexer.c
+++ b/lib/src/lexer.c
@@ -2,26 +2,58 @@
 #include "./lexer.h"
 #include "./subtree.h"
 #include "./length.h"
-#include "./utf16.h"
-#include "utf8proc.h"
+#include "./unicode.h"
 
-#define LOG(...)                                                                      \
-  if (self->logger.log) {                                                             \
-    snprintf(self->debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, __VA_ARGS__); \
-    self->logger.log(self->logger.payload, TSLogTypeLex, self->debug_buffer);         \
+#define LOG(message, character)              \
+  if (self->logger.log) {                    \
+    snprintf(                                \
+      self->debug_buffer,                    \
+      TREE_SITTER_SERIALIZATION_BUFFER_SIZE, \
+      32 <= character && character < 127 ?   \
+        message " character:'%c'" :          \
+        message " character:%d",             \
+      character                              \
+    );                                       \
+    self->logger.log(                        \
+      self->logger.payload,                  \
+      TSLogTypeLex,                          \
+      self->debug_buffer                     \
+    );                                       \
   }
 
-#define LOG_CHARACTER(message, character) \
-  LOG(                                    \
-    32 <= character && character < 127 ?  \
-    message " character:'%c'" :           \
-    message " character:%d", character    \
-  )
-
-static const char empty_chunk[3] = { 0, 0 };
-
 static const int32_t BYTE_ORDER_MARK = 0xFEFF;
 
+static const TSRange DEFAULT_RANGE = {
+  .start_point = {
+    .row = 0,
+    .column = 0,
+  },
+  .end_point = {
+    .row = UINT32_MAX,
+    .column = UINT32_MAX,
+  },
+  .start_byte = 0,
+  .end_byte = UINT32_MAX
+};
+
+// Check if the lexer has reached EOF. This state is stored
+// by setting the lexer's `current_included_range_index` such that
+// it has consumed all of its available ranges.
+static bool ts_lexer__eof(const TSLexer *_self) {
+  Lexer *self = (Lexer *)_self;
+  return self->current_included_range_index == self->included_range_count;
+}
+
+// Clear the currently stored chunk of source code, because the lexer's
+// position has changed.
+static void ts_lexer__clear_chunk(Lexer *self) {
+  self->chunk = NULL;
+  self->chunk_size = 0;
+  self->chunk_start = 0;
+}
+
+// Call the lexer's input callback to obtain a new chunk of source code
+// for the current position.
 static void ts_lexer__get_chunk(Lexer *self) {
   self->chunk_start = self->current_position.bytes;
   self->chunk = self->input.read(
@@ -30,18 +62,17 @@ static void ts_lexer__get_chunk(Lexer *self) {
     self->current_position.extent,
     &self->chunk_size
   );
-  if (!self->chunk_size) self->chunk = empty_chunk;
+  if (!self->chunk_size) {
+    self->current_included_range_index = self->included_range_count;
+    self->chunk = NULL;
+  }
 }
 
-typedef utf8proc_ssize_t (*DecodeFunction)(
-  const utf8proc_uint8_t *,
-  utf8proc_ssize_t,
-  utf8proc_int32_t *
-);
-
+// Decode the next unicode character in the current chunk of source code.
+// This assumes that the lexer has already retrieved a chunk of source
+// code that spans the current position.
 static void ts_lexer__get_lookahead(Lexer *self) {
   uint32_t position_in_chunk = self->current_position.bytes - self->chunk_start;
-  const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk;
   uint32_t size = self->chunk_size - position_in_chunk;
 
   if (size == 0) {
@@ -50,29 +81,38 @@ static void ts_lexer__get_lookahead(Lexer *self) {
     return;
   }
 
-  DecodeFunction decode =
-    self->input.encoding == TSInputEncodingUTF8 ? utf8proc_iterate : utf16_iterate;
+  const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk;
+  UnicodeDecodeFunction decode = self->input.encoding == TSInputEncodingUTF8
+    ? ts_decode_utf8
+    : ts_decode_utf16;
 
   self->lookahead_size = decode(chunk, size, &self->data.lookahead);
 
   // If this chunk ended in the middle of a multi-byte character,
   // try again with a fresh chunk.
-  if (self->data.lookahead == -1 && size < 4) {
+  if (self->data.lookahead == TS_DECODE_ERROR && size < 4) {
     ts_lexer__get_chunk(self);
     chunk = (const uint8_t *)self->chunk;
     size = self->chunk_size;
     self->lookahead_size = decode(chunk, size, &self->data.lookahead);
   }
 
-  if (self->data.lookahead == -1) {
+  if (self->data.lookahead == TS_DECODE_ERROR) {
     self->lookahead_size = 1;
   }
 }
 
-static void ts_lexer__advance(TSLexer *payload, bool skip) {
-  Lexer *self = (Lexer *)payload;
-  if (self->chunk == empty_chunk)
-    return;
+// Advance to the next character in the source code, retrieving a new
+// chunk of source code if needed.
+static void ts_lexer__advance(TSLexer *_self, bool skip) {
+  Lexer *self = (Lexer *)_self;
+  if (!self->chunk) return;
+
+  if (skip) {
+    LOG("skip", self->data.lookahead);
+  } else {
+    LOG("consume", self->data.lookahead);
+  }
 
   if (self->lookahead_size) {
     self->current_position.bytes += self->lookahead_size;
@@ -84,53 +124,65 @@ static void ts_lexer__advance(TSLexer *payload, bool skip) {
     }
   }
 
-  TSRange *current_range = &self->included_ranges[self->current_included_range_index];
-  if (self->current_position.bytes == current_range->end_byte) {
-    self->current_included_range_index++;
-    if (self->current_included_range_index == self->included_range_count) {
-      self->data.lookahead = '\0';
-      self->lookahead_size = 1;
-      return;
-    } else {
-      current_range++;
-      self->current_position = (Length) {
-        current_range->start_byte,
-        current_range->start_point,
-      };
+  const TSRange *current_range = NULL;
+  if (self->current_included_range_index < self->included_range_count) {
+    current_range = &self->included_ranges[self->current_included_range_index];
+    if (self->current_position.bytes == current_range->end_byte) {
+      self->current_included_range_index++;
+      if (self->current_included_range_index < self->included_range_count) {
+        current_range++;
+        self->current_position = (Length) {
+          current_range->start_byte,
+          current_range->start_point,
+        };
+      } else {
+        current_range = NULL;
+      }
     }
   }
 
-  if (skip) {
-    LOG_CHARACTER("skip", self->data.lookahead);
-    self->token_start_position = self->current_position;
+  if (skip) self->token_start_position = self->current_position;
+
+  if (current_range) {
+    if (self->current_position.bytes >= self->chunk_start + self->chunk_size) {
+      ts_lexer__get_chunk(self);
+    }
+    ts_lexer__get_lookahead(self);
   } else {
-    LOG_CHARACTER("consume", self->data.lookahead);
-  }
-
-  if (self->current_position.bytes >= self->chunk_start + self->chunk_size) {
-    ts_lexer__get_chunk(self);
-  }
-
-  ts_lexer__get_lookahead(self);
-}
-
-static void ts_lexer__mark_end(TSLexer *payload) {
-  Lexer *self = (Lexer *)payload;
-  TSRange *current_included_range = &self->included_ranges[self->current_included_range_index];
-  if (self->current_included_range_index > 0 &&
-      self->current_position.bytes == current_included_range->start_byte) {
-    TSRange *previous_included_range = current_included_range - 1;
-    self->token_end_position = (Length) {
-      previous_included_range->end_byte,
-      previous_included_range->end_point,
-    };
-  } else {
-    self->token_end_position = self->current_position;
+    ts_lexer__clear_chunk(self);
+    self->data.lookahead = '\0';
+    self->lookahead_size = 1;
   }
 }
 
-static uint32_t ts_lexer__get_column(TSLexer *payload) {
-  Lexer *self = (Lexer *)payload;
+// Mark that a token match has completed. This can be called multiple
+// times if a longer match is found later.
+static void ts_lexer__mark_end(TSLexer *_self) {
+  Lexer *self = (Lexer *)_self;
+  if (!ts_lexer__eof(&self->data)) {
+    // If the lexer is right at the beginning of included range,
+    // then the token should be considered to end at the *end* of the
+    // previous included range, rather than here.
+    TSRange *current_included_range = &self->included_ranges[
+      self->current_included_range_index
+    ];
+    if (
+      self->current_included_range_index > 0 &&
+      self->current_position.bytes == current_included_range->start_byte
+    ) {
+      TSRange *previous_included_range = current_included_range - 1;
+      self->token_end_position = (Length) {
+        previous_included_range->end_byte,
+        previous_included_range->end_point,
+      };
+      return;
+    }
+  }
+  self->token_end_position = self->current_position;
+}
+
+static uint32_t ts_lexer__get_column(TSLexer *_self) {
+  Lexer *self = (Lexer *)_self;
   uint32_t goal_byte = self->current_position.bytes;
 
   self->current_position.bytes -= self->current_position.extent.column;
@@ -142,67 +194,69 @@ static uint32_t ts_lexer__get_column(TSLexer *payload) {
 
   uint32_t result = 0;
   while (self->current_position.bytes < goal_byte) {
-    ts_lexer__advance(payload, false);
+    ts_lexer__advance(&self->data, false);
     result++;
   }
 
   return result;
 }
 
-static bool ts_lexer__is_at_included_range_start(TSLexer *payload) {
-  const Lexer *self = (const Lexer *)payload;
-  TSRange *current_range = &self->included_ranges[self->current_included_range_index];
-  return self->current_position.bytes == current_range->start_byte;
+// Is the lexer at a boundary between two disjoint included ranges of
+// source code? This is exposed as an API because some languages' external
+// scanners need to perform custom actions at these boundaries.
+static bool ts_lexer__is_at_included_range_start(const TSLexer *_self) {
+  const Lexer *self = (const Lexer *)_self;
+  if (self->current_included_range_index < self->included_range_count) {
+    TSRange *current_range = &self->included_ranges[self->current_included_range_index];
+    return self->current_position.bytes == current_range->start_byte;
+  } else {
+    return false;
+  }
 }
 
-// The lexer's methods are stored as a struct field so that generated
-// parsers can call them without needing to be linked against this library.
-
 void ts_lexer_init(Lexer *self) {
   *self = (Lexer) {
     .data = {
+      // The lexer's methods are stored as struct fields so that generated
+      // parsers can call them without needing to be linked against this
+      // library.
       .advance = ts_lexer__advance,
       .mark_end = ts_lexer__mark_end,
       .get_column = ts_lexer__get_column,
       .is_at_included_range_start = ts_lexer__is_at_included_range_start,
+      .eof = ts_lexer__eof,
       .lookahead = 0,
       .result_symbol = 0,
     },
     .chunk = NULL,
+    .chunk_size = 0,
     .chunk_start = 0,
-    .current_position = {UINT32_MAX, {0, 0}},
+    .current_position = {0, {0, 0}},
     .logger = {
       .payload = NULL,
       .log = NULL
     },
+    .included_ranges = NULL,
+    .included_range_count = 0,
     .current_included_range_index = 0,
   };
-
-  self->included_ranges = NULL;
   ts_lexer_set_included_ranges(self, NULL, 0);
-  ts_lexer_reset(self, length_zero());
 }
 
 void ts_lexer_delete(Lexer *self) {
   ts_free(self->included_ranges);
 }
 
-void ts_lexer_set_input(Lexer *self, TSInput input) {
-  self->input = input;
-  self->data.lookahead = 0;
-  self->lookahead_size = 0;
-  self->chunk = 0;
-  self->chunk_start = 0;
-  self->chunk_size = 0;
-}
-
 static void ts_lexer_goto(Lexer *self, Length position) {
+  self->current_position = position;
   bool found_included_range = false;
+
+  // Move to the first valid position at or after the given position.
   for (unsigned i = 0; i < self->included_range_count; i++) {
     TSRange *included_range = &self->included_ranges[i];
     if (included_range->end_byte > position.bytes) {
       if (included_range->start_byte > position.bytes) {
-        position = (Length) {
+        self->current_position = (Length) {
           .bytes = included_range->start_byte,
           .extent = included_range->start_point,
         };
@@ -214,46 +268,61 @@ static void ts_lexer_goto(Lexer *self, Length position) {
     }
   }
 
-  if (!found_included_range) {
+  if (found_included_range) {
+    // If the current position is outside of the current chunk of text,
+    // then clear out the current chunk of text.
+    if (self->chunk && (
+      position.bytes < self->chunk_start ||
+      position.bytes >= self->chunk_start + self->chunk_size
+    )) {
+      ts_lexer__clear_chunk(self);
+    }
+
+    self->lookahead_size = 0;
+    self->data.lookahead = '\0';
+  }
+
+  // If the given position is beyond any of included ranges, move to the EOF
+  // state - past the end of the included ranges.
+  else {
+    self->current_included_range_index = self->included_range_count;
     TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1];
-    position = (Length) {
+    self->current_position = (Length) {
       .bytes = last_included_range->end_byte,
       .extent = last_included_range->end_point,
     };
-    self->chunk = empty_chunk;
-    self->chunk_start = position.bytes;
-    self->chunk_size = 2;
+    ts_lexer__clear_chunk(self);
+    self->lookahead_size = 1;
+    self->data.lookahead = '\0';
   }
-
-  self->token_start_position = position;
-  self->token_end_position = LENGTH_UNDEFINED;
-  self->current_position = position;
-
-  if (self->chunk && (position.bytes < self->chunk_start ||
-                      position.bytes >= self->chunk_start + self->chunk_size)) {
-    self->chunk = 0;
-    self->chunk_start = 0;
-    self->chunk_size = 0;
-  }
-
-  self->lookahead_size = 0;
-  self->data.lookahead = 0;
 }
 
+void ts_lexer_set_input(Lexer *self, TSInput input) {
+  self->input = input;
+  ts_lexer__clear_chunk(self);
+  ts_lexer_goto(self, self->current_position);
+}
+
+// Move the lexer to the given position. This doesn't do any work
+// if the parser is already at the given position.
 void ts_lexer_reset(Lexer *self, Length position) {
-  if (position.bytes != self->current_position.bytes) ts_lexer_goto(self, position);
+  if (position.bytes != self->current_position.bytes) {
+    ts_lexer_goto(self, position);
+  }
 }
 
 void ts_lexer_start(Lexer *self) {
   self->token_start_position = self->current_position;
   self->token_end_position = LENGTH_UNDEFINED;
   self->data.result_symbol = 0;
-  if (!self->chunk) ts_lexer__get_chunk(self);
-  if (!self->lookahead_size) ts_lexer__get_lookahead(self);
-  if (
-    self->current_position.bytes == 0 &&
-    self->data.lookahead == BYTE_ORDER_MARK
-  ) ts_lexer__advance((TSLexer *)self, true);
+  if (!ts_lexer__eof(&self->data)) {
+    if (!self->chunk_size) ts_lexer__get_chunk(self);
+    if (!self->lookahead_size) ts_lexer__get_lookahead(self);
+    if (
+      self->current_position.bytes == 0 &&
+      self->data.lookahead == BYTE_ORDER_MARK
+    ) ts_lexer__advance(&self->data, true);
+  }
 }
 
 void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte) {
@@ -267,7 +336,7 @@ void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte) {
   // the character decoding algorithm may have looked at the following byte.
   // Therefore, the next byte *after* the current (invalid) character
   // affects the interpretation of the current character.
-  if (self->data.lookahead == -1) {
+  if (self->data.lookahead == TS_DECODE_ERROR) {
     current_lookahead_end_byte++;
   }
 
@@ -277,8 +346,8 @@ void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte) {
 }
 
 void ts_lexer_advance_to_end(Lexer *self) {
-  while (self->data.lookahead != 0) {
-    ts_lexer__advance((TSLexer *)self, false);
+  while (self->chunk) {
+    ts_lexer__advance(&self->data, false);
   }
 }
 
@@ -286,32 +355,32 @@ void ts_lexer_mark_end(Lexer *self) {
   ts_lexer__mark_end(&self->data);
 }
 
-static const TSRange DEFAULT_RANGES[] = {
-  {
-    .start_point = {
-      .row = 0,
-      .column = 0,
-    },
-    .end_point = {
-      .row = UINT32_MAX,
-      .column = UINT32_MAX,
-    },
-    .start_byte = 0,
-    .end_byte = UINT32_MAX
-  }
-};
-
-void ts_lexer_set_included_ranges(Lexer *self, const TSRange *ranges, uint32_t count) {
-  if (!ranges) {
-    ranges = DEFAULT_RANGES;
+bool ts_lexer_set_included_ranges(
+  Lexer *self,
+  const TSRange *ranges,
+  uint32_t count
+) {
+  if (count == 0 || !ranges) {
+    ranges = &DEFAULT_RANGE;
     count = 1;
+  } else {
+    uint32_t previous_byte = 0;
+    for (unsigned i = 0; i < count; i++) {
+      const TSRange *range = &ranges[i];
+      if (
+        range->start_byte < previous_byte ||
+        range->end_byte < range->start_byte
+      ) return false;
+      previous_byte = range->end_byte;
+    }
   }
 
-  size_t sz = count * sizeof(TSRange);
-  self->included_ranges = ts_realloc(self->included_ranges, sz);
-  memcpy(self->included_ranges, ranges, sz);
+  size_t size = count * sizeof(TSRange);
+  self->included_ranges = ts_realloc(self->included_ranges, size);
+  memcpy(self->included_ranges, ranges, size);
   self->included_range_count = count;
   ts_lexer_goto(self, self->current_position);
+  return true;
 }
 
 TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count) {
diff --git a/lib/src/lexer.h b/lib/src/lexer.h
index f523d88f..5e392945 100644
--- a/lib/src/lexer.h
+++ b/lib/src/lexer.h
@@ -16,7 +16,7 @@ typedef struct {
   Length token_start_position;
   Length token_end_position;
 
-  TSRange * included_ranges;
+  TSRange *included_ranges;
   size_t included_range_count;
   size_t current_included_range_index;
 
@@ -38,7 +38,7 @@ void ts_lexer_start(Lexer *);
 void ts_lexer_finish(Lexer *, uint32_t *);
 void ts_lexer_advance_to_end(Lexer *);
 void ts_lexer_mark_end(Lexer *);
-void ts_lexer_set_included_ranges(Lexer *self, const TSRange *ranges, uint32_t count);
+bool ts_lexer_set_included_ranges(Lexer *self, const TSRange *ranges, uint32_t count);
 TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count);
 
 #ifdef __cplusplus
diff --git a/lib/src/lib.c b/lib/src/lib.c
index fc5fbc92..289d32f4 100644
--- a/lib/src/lib.c
+++ b/lib/src/lib.c
@@ -2,19 +2,16 @@
 //
 // The following directories must be added to the include path:
 //   - include
-//   - utf8proc
 
 #define _POSIX_C_SOURCE 200112L
-#define UTF8PROC_STATIC
 
 #include "./get_changed_ranges.c"
 #include "./language.c"
 #include "./lexer.c"
 #include "./node.c"
 #include "./parser.c"
+#include "./query.c"
 #include "./stack.c"
 #include "./subtree.c"
 #include "./tree_cursor.c"
 #include "./tree.c"
-#include "./utf16.c"
-#include "utf8proc.c"
diff --git a/lib/src/node.c b/lib/src/node.c
index 6b2be36e..9ce0f0b3 100644
--- a/lib/src/node.c
+++ b/lib/src/node.c
@@ -79,7 +79,7 @@ static inline bool ts_node_child_iterator_next(
   TSNode *result
 ) {
   if (!self->parent.ptr || ts_node_child_iterator_done(self)) return false;
-  const Subtree *child = &self->parent.ptr->children[self->child_index];
+  const Subtree *child = &ts_subtree_children(self->parent)[self->child_index];
   TSSymbol alias_symbol = 0;
   if (!ts_subtree_extra(*child)) {
     if (self->alias_sequence) {
@@ -150,7 +150,9 @@ static inline TSNode ts_node__child(
     while (ts_node_child_iterator_next(&iterator, &child)) {
       if (ts_node__is_relevant(child, include_anonymous)) {
         if (index == child_index) {
-          ts_tree_set_cached_parent(self.tree, &child, &self);
+          if (ts_node__is_relevant(self, true)) {
+            ts_tree_set_cached_parent(self.tree, &child, &self);
+          }
           return child;
         }
         index++;
@@ -176,7 +178,7 @@ static bool ts_subtree_has_trailing_empty_descendant(
   Subtree other
 ) {
   for (unsigned i = ts_subtree_child_count(self) - 1; i + 1 > 0; i--) {
-    Subtree child = self.ptr->children[i];
+    Subtree child = ts_subtree_children(self)[i];
     if (ts_subtree_total_bytes(child) > 0) break;
     if (child.ptr == other.ptr || ts_subtree_has_trailing_empty_descendant(child, other)) {
       return true;
@@ -415,13 +417,15 @@ TSPoint ts_node_end_point(TSNode self) {
 }
 
 TSSymbol ts_node_symbol(TSNode self) {
-  return ts_node__alias(&self)
-    ? ts_node__alias(&self)
-    : ts_subtree_symbol(ts_node__subtree(self));
+  TSSymbol symbol = ts_node__alias(&self);
+  if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self));
+  return ts_language_public_symbol(self.tree->language, symbol);
 }
 
 const char *ts_node_type(TSNode self) {
-  return ts_language_symbol_name(self.tree->language, ts_node_symbol(self));
+  TSSymbol symbol = ts_node__alias(&self);
+  if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self));
+  return ts_language_symbol_name(self.tree->language, symbol);
 }
 
 char *ts_node_string(TSNode self) {
diff --git a/lib/src/parser.c b/lib/src/parser.c
index 88b20845..0c711b0c 100644
--- a/lib/src/parser.c
+++ b/lib/src/parser.c
@@ -60,8 +60,9 @@ struct TSParser {
   const TSLanguage *language;
   ReduceActionSet reduce_actions;
   Subtree finished_tree;
-  SubtreeHeapData scratch_tree_data;
-  MutableSubtree scratch_tree;
+  SubtreeArray trailing_extras;
+  SubtreeArray trailing_extras2;
+  SubtreeArray scratch_trees;
   TokenCache token_cache;
   ReusableNode reusable_node;
   void *external_scanner_payload;
@@ -71,7 +72,6 @@ struct TSParser {
   unsigned accept_count;
   unsigned operation_count;
   const volatile size_t *cancellation_flag;
-  bool halt_on_error;
   Subtree old_tree;
   TSRangeArray included_range_differences;
   unsigned included_range_difference_index;
@@ -102,9 +102,10 @@ typedef struct {
 static const char *ts_string_input_read(
   void *_self,
   uint32_t byte,
-  TSPoint _,
+  TSPoint pt,
   uint32_t *length
 ) {
+  (void)pt;
   TSStringInput *self = (TSStringInput *)_self;
   if (byte >= self->length) {
     *length = 0;
@@ -155,7 +156,7 @@ static bool ts_parser__breakdown_top_of_stack(
       Subtree parent = *array_front(&slice.subtrees);
 
       for (uint32_t j = 0, n = ts_subtree_child_count(parent); j < n; j++) {
-        Subtree child = parent.ptr->children[j];
+        Subtree child = ts_subtree_children(parent)[j];
         pending = ts_subtree_child_count(child) > 0;
 
         if (ts_subtree_is_error(child)) {
@@ -211,6 +212,7 @@ static ErrorComparison ts_parser__compare_versions(
   ErrorStatus a,
   ErrorStatus b
 ) {
+  (void)self;
   if (!a.is_in_error && b.is_in_error) {
     if (a.cost < b.cost) {
       return ErrorComparisonTakeLeft;
@@ -291,6 +293,7 @@ static bool ts_parser__better_version_exists(
         return true;
       case ErrorComparisonPreferRight:
         if (ts_stack_can_merge(self->stack, i, version)) return true;
+        break;
       default:
         break;
     }
@@ -325,6 +328,12 @@ static bool ts_parser__can_reuse_first_leaf(
   TSStateId leaf_state = ts_subtree_leaf_parse_state(tree);
   TSLexMode leaf_lex_mode = self->language->lex_modes[leaf_state];
 
+  // At the end of a non-terminal extra node, the lexer normally returns
+  // NULL, which indicates that the parser should look for a reduce action
+  // at symbol `0`. Avoid reusing tokens in this situation to ensure that
+  // the same thing happens when incrementally reparsing.
+  if (current_lex_mode.lex_state == (uint16_t)(-1)) return false;
+
   // If the token was created in a state with the same set of lookaheads, it is reusable.
   if (
     table_entry->action_count > 0 &&
@@ -348,9 +357,14 @@ static Subtree ts_parser__lex(
   StackVersion version,
   TSStateId parse_state
 ) {
+  TSLexMode lex_mode = self->language->lex_modes[parse_state];
+  if (lex_mode.lex_state == (uint16_t)-1) {
+    LOG("no_lookahead_after_non_terminal_extra");
+    return NULL_SUBTREE;
+  }
+
   Length start_position = ts_stack_position(self->stack, version);
   Subtree external_token = ts_stack_last_external_token(self->stack, version);
-  TSLexMode lex_mode = self->language->lex_modes[parse_state];
   const bool *valid_external_tokens = ts_language_enabled_external_tokens(
     self->language,
     lex_mode.external_lex_state
@@ -438,7 +452,7 @@ static Subtree ts_parser__lex(
     }
 
     if (self->lexer.current_position.bytes == error_end_position.bytes) {
-      if (self->lexer.data.lookahead == 0) {
+      if (self->lexer.data.eof(&self->lexer.data)) {
         self->lexer.data.result_symbol = ts_builtin_sym_error;
         break;
       }
@@ -593,6 +607,10 @@ static Subtree ts_parser__reuse_node(
     uint32_t byte_offset = reusable_node_byte_offset(&self->reusable_node);
     uint32_t end_byte_offset = byte_offset + ts_subtree_total_bytes(result);
 
+    // Do not reuse an EOF node if the included ranges array has changes
+    // later on in the file.
+    if (ts_subtree_is_eof(result)) end_byte_offset = UINT32_MAX;
+
     if (byte_offset > position) {
       LOG("before_reusable_node symbol:%s", TREE_NAME(result));
       break;
@@ -655,6 +673,10 @@ static Subtree ts_parser__reuse_node(
   return NULL_SUBTREE;
 }
 
+// Determine if a given tree should be replaced by an alternative tree.
+//
+// The decision is based on the trees' error costs (if any), their dynamic precedence,
+// and finally, as a default, by a recursive comparison of the trees' symbols.
 static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right) {
   if (!left.ptr) return true;
   if (!right.ptr) return false;
@@ -700,6 +722,33 @@ static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right)
   }
 }
 
+// Determine if a given tree's children should be replaced by an alternative
+// array of children.
+static bool ts_parser__select_children(
+  TSParser *self,
+  Subtree left,
+  const SubtreeArray *children
+) {
+  array_assign(&self->scratch_trees, children);
+
+  // Create a temporary subtree using the scratch trees array. This node does
+  // not perform any allocation except for possibly growing the array to make
+  // room for its own heap data. The scratch tree is never explicitly released,
+  // so the same 'scratch trees' array can be reused again later.
+  MutableSubtree scratch_tree = ts_subtree_new_node(
+    ts_subtree_symbol(left),
+    &self->scratch_trees,
+    0,
+    self->language
+  );
+
+  return ts_parser__select_tree(
+    self,
+    left,
+    ts_subtree_from_mut(scratch_tree)
+  );
+}
+
 static void ts_parser__shift(
   TSParser *self,
   StackVersion version,
@@ -725,22 +774,6 @@ static void ts_parser__shift(
   }
 }
 
-static bool ts_parser__replace_children(
-  TSParser *self,
-  MutableSubtree *tree,
-  SubtreeArray *children
-) {
-  *self->scratch_tree.ptr = *tree->ptr;
-  self->scratch_tree.ptr->child_count = 0;
-  ts_subtree_set_children(self->scratch_tree, children->contents, children->size, self->language);
-  if (ts_parser__select_tree(self, ts_subtree_from_mut(*tree), ts_subtree_from_mut(self->scratch_tree))) {
-    *tree->ptr = *self->scratch_tree.ptr;
-    return true;
-  } else {
-    return false;
-  }
-}
-
 static StackVersion ts_parser__reduce(
   TSParser *self,
   StackVersion version,
@@ -748,20 +781,27 @@ static StackVersion ts_parser__reduce(
   uint32_t count,
   int dynamic_precedence,
   uint16_t production_id,
-  bool fragile
+  bool is_fragile,
+  bool end_of_non_terminal_extra
 ) {
   uint32_t initial_version_count = ts_stack_version_count(self->stack);
-  uint32_t removed_version_count = 0;
-  StackSliceArray pop = ts_stack_pop_count(self->stack, version, count);
 
+  // Pop the given number of nodes from the given version of the parse stack.
+  // If stack versions have previously merged, then there may be more than one
+  // path back through the stack. For each path, create a new parent node to
+  // contain the popped children, and push it onto the stack in place of the
+  // children.
+  StackSliceArray pop = ts_stack_pop_count(self->stack, version, count);
+  uint32_t removed_version_count = 0;
   for (uint32_t i = 0; i < pop.size; i++) {
     StackSlice slice = pop.contents[i];
     StackVersion slice_version = slice.version - removed_version_count;
 
-    // Error recovery can sometimes cause lots of stack versions to merge,
-    // such that a single pop operation can produce a lots of slices.
-    // Avoid creating too many stack versions in that situation.
-    if (i > 0 && slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW) {
+    // This is where new versions are added to the parse stack. The versions
+    // will all be sorted and truncated at the end of the outer parsing loop.
+    // Allow the maximum version count to be temporarily exceeded, but only
+    // by a limited threshold.
+    if (slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW) {
       ts_stack_remove_version(self->stack, slice_version);
       ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
       removed_version_count++;
@@ -778,11 +818,9 @@ static StackVersion ts_parser__reduce(
     // node. They will be re-pushed onto the stack after the parent node is
     // created and pushed.
     SubtreeArray children = slice.subtrees;
-    while (children.size > 0 && ts_subtree_extra(children.contents[children.size - 1])) {
-      children.size--;
-    }
+    ts_subtree_array_remove_trailing_extras(&children, &self->trailing_extras);
 
-    MutableSubtree parent = ts_subtree_new_node(&self->tree_pool,
+    MutableSubtree parent = ts_subtree_new_node(
       symbol, &children, production_id, self->language
     );
 
@@ -796,36 +834,44 @@ static StackVersion ts_parser__reduce(
       i++;
 
       SubtreeArray children = next_slice.subtrees;
-      while (children.size > 0 && ts_subtree_extra(children.contents[children.size - 1])) {
-        children.size--;
-      }
+      ts_subtree_array_remove_trailing_extras(&children, &self->trailing_extras2);
 
-      if (ts_parser__replace_children(self, &parent, &children)) {
-        ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
-        slice = next_slice;
+      if (ts_parser__select_children(
+        self,
+        ts_subtree_from_mut(parent),
+        &children
+      )) {
+        ts_subtree_array_clear(&self->tree_pool, &self->trailing_extras);
+        ts_subtree_release(&self->tree_pool, ts_subtree_from_mut(parent));
+        array_swap(&self->trailing_extras, &self->trailing_extras2);
+        parent = ts_subtree_new_node(
+          symbol, &children, production_id, self->language
+        );
       } else {
+        array_clear(&self->trailing_extras2);
         ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees);
       }
     }
 
-    parent.ptr->dynamic_precedence += dynamic_precedence;
-    parent.ptr->production_id = production_id;
-
     TSStateId state = ts_stack_state(self->stack, slice_version);
     TSStateId next_state = ts_language_next_state(self->language, state, symbol);
-    if (fragile || pop.size > 1 || initial_version_count > 1) {
+    if (end_of_non_terminal_extra && next_state == state) {
+      parent.ptr->extra = true;
+    }
+    if (is_fragile || pop.size > 1 || initial_version_count > 1) {
       parent.ptr->fragile_left = true;
       parent.ptr->fragile_right = true;
       parent.ptr->parse_state = TS_TREE_STATE_NONE;
     } else {
       parent.ptr->parse_state = state;
     }
+    parent.ptr->dynamic_precedence += dynamic_precedence;
 
     // Push the parent node onto the stack, along with any extra tokens that
     // were previously on top of the stack.
     ts_stack_push(self->stack, slice_version, ts_subtree_from_mut(parent), false, next_state);
-    for (uint32_t j = parent.ptr->child_count; j < slice.subtrees.size; j++) {
-      ts_stack_push(self->stack, slice_version, slice.subtrees.contents[j], false, next_state);
+    for (uint32_t j = 0; j < self->trailing_extras.size; j++) {
+      ts_stack_push(self->stack, slice_version, self->trailing_extras.contents[j], false, next_state);
     }
 
     for (StackVersion j = 0; j < slice_version; j++) {
@@ -857,22 +903,22 @@ static void ts_parser__accept(
 
     Subtree root = NULL_SUBTREE;
     for (uint32_t j = trees.size - 1; j + 1 > 0; j--) {
-      Subtree child = trees.contents[j];
-      if (!ts_subtree_extra(child)) {
-        assert(!child.data.is_inline);
-        uint32_t child_count = ts_subtree_child_count(child);
+      Subtree tree = trees.contents[j];
+      if (!ts_subtree_extra(tree)) {
+        assert(!tree.data.is_inline);
+        uint32_t child_count = ts_subtree_child_count(tree);
+        const Subtree *children = ts_subtree_children(tree);
         for (uint32_t k = 0; k < child_count; k++) {
-          ts_subtree_retain(child.ptr->children[k]);
+          ts_subtree_retain(children[k]);
         }
-        array_splice(&trees, j, 1, child_count, child.ptr->children);
+        array_splice(&trees, j, 1, child_count, children);
         root = ts_subtree_from_mut(ts_subtree_new_node(
-          &self->tree_pool,
-          ts_subtree_symbol(child),
+          ts_subtree_symbol(tree),
           &trees,
-          child.ptr->production_id,
+          tree.ptr->production_id,
           self->language
         ));
-        ts_subtree_release(&self->tree_pool, child);
+        ts_subtree_release(&self->tree_pool, tree);
         break;
       }
     }
@@ -939,16 +985,17 @@ static bool ts_parser__do_all_potential_reductions(
         switch (action.type) {
           case TSParseActionTypeShift:
           case TSParseActionTypeRecover:
-            if (!action.params.extra && !action.params.repetition) has_shift_action = true;
+            if (!action.params.shift.extra && !action.params.shift.repetition) has_shift_action = true;
             break;
           case TSParseActionTypeReduce:
-            if (action.params.child_count > 0)
+            if (action.params.reduce.child_count > 0)
               ts_reduce_action_set_add(&self->reduce_actions, (ReduceAction){
-                .symbol = action.params.symbol,
-                .count = action.params.child_count,
-                .dynamic_precedence = action.params.dynamic_precedence,
-                .production_id = action.params.production_id,
+                .symbol = action.params.reduce.symbol,
+                .count = action.params.reduce.child_count,
+                .dynamic_precedence = action.params.reduce.dynamic_precedence,
+                .production_id = action.params.reduce.production_id,
               });
+            break;
           default:
             break;
         }
@@ -962,7 +1009,7 @@ static bool ts_parser__do_all_potential_reductions(
       reduction_version = ts_parser__reduce(
         self, version, action.symbol, action.count,
         action.dynamic_precedence, action.production_id,
-        true
+        true, false
       );
     }
 
@@ -1011,7 +1058,9 @@ static void ts_parser__handle_error(
         TSStateId state_after_missing_symbol = ts_language_next_state(
           self->language, state, missing_symbol
         );
-        if (state_after_missing_symbol == 0) continue;
+        if (state_after_missing_symbol == 0 || state_after_missing_symbol == state) {
+          continue;
+        }
 
         if (ts_language_has_reduce_action(
           self->language,
@@ -1064,46 +1113,6 @@ static void ts_parser__handle_error(
   LOG_STACK();
 }
 
-static void ts_parser__halt_parse(TSParser *self) {
-  LOG("halting_parse");
-  LOG_STACK();
-
-  ts_lexer_advance_to_end(&self->lexer);
-  Length remaining_length = length_sub(
-    self->lexer.current_position,
-    ts_stack_position(self->stack, 0)
-  );
-
-  Subtree filler_node = ts_subtree_new_error(
-    &self->tree_pool,
-    0,
-    length_zero(),
-    remaining_length,
-    remaining_length.bytes,
-    0,
-    self->language
-  );
-  ts_subtree_to_mut_unsafe(filler_node).ptr->visible = false;
-  ts_stack_push(self->stack, 0, filler_node, false, 0);
-
-  SubtreeArray children = array_new();
-  Subtree root_error = ts_subtree_new_error_node(&self->tree_pool, &children, false, self->language);
-  ts_stack_push(self->stack, 0, root_error, false, 0);
-
-  Subtree eof = ts_subtree_new_leaf(
-    &self->tree_pool,
-    ts_builtin_sym_end,
-    length_zero(),
-    length_zero(),
-    0,
-    0,
-    false,
-    false,
-    self->language
-  );
-  ts_parser__accept(self, 0, eof);
-}
-
 static bool ts_parser__recover_to_state(
   TSParser *self,
   StackVersion version,
@@ -1135,7 +1144,7 @@ static bool ts_parser__recover_to_state(
       Subtree error_tree = error_trees.contents[0];
       uint32_t error_child_count = ts_subtree_child_count(error_tree);
       if (error_child_count > 0) {
-        array_splice(&slice.subtrees, 0, 0, error_child_count, error_tree.ptr->children);
+        array_splice(&slice.subtrees, 0, 0, error_child_count, ts_subtree_children(error_tree));
         for (unsigned j = 0; j < error_child_count; j++) {
           ts_subtree_retain(slice.subtrees.contents[j]);
         }
@@ -1143,22 +1152,21 @@ static bool ts_parser__recover_to_state(
       ts_subtree_array_delete(&self->tree_pool, &error_trees);
     }
 
-    SubtreeArray trailing_extras = ts_subtree_array_remove_trailing_extras(&slice.subtrees);
+    ts_subtree_array_remove_trailing_extras(&slice.subtrees, &self->trailing_extras);
 
     if (slice.subtrees.size > 0) {
-      Subtree error = ts_subtree_new_error_node(&self->tree_pool, &slice.subtrees, true, self->language);
+      Subtree error = ts_subtree_new_error_node(&slice.subtrees, true, self->language);
       ts_stack_push(self->stack, slice.version, error, false, goal_state);
     } else {
       array_delete(&slice.subtrees);
     }
 
-    for (unsigned j = 0; j < trailing_extras.size; j++) {
-      Subtree tree = trailing_extras.contents[j];
+    for (unsigned j = 0; j < self->trailing_extras.size; j++) {
+      Subtree tree = self->trailing_extras.contents[j];
       ts_stack_push(self->stack, slice.version, tree, false, goal_state);
     }
 
     previous_version = slice.version;
-    array_delete(&trailing_extras);
   }
 
   return previous_version != STACK_VERSION_NONE;
@@ -1231,7 +1239,7 @@ static void ts_parser__recover(
     }
   }
 
-  // In the process of attemping to recover, some stack versions may have been created
+  // In the process of attempting to recover, some stack versions may have been created
   // and subsequently halted. Remove those versions.
   for (unsigned i = previous_version_count; i < ts_stack_version_count(self->stack); i++) {
     if (!ts_stack_is_active(self->stack, i)) {
@@ -1255,7 +1263,7 @@ static void ts_parser__recover(
   if (ts_subtree_is_eof(lookahead)) {
     LOG("recover_eof");
     SubtreeArray children = array_new();
-    Subtree parent = ts_subtree_new_error_node(&self->tree_pool, &children, false, self->language);
+    Subtree parent = ts_subtree_new_error_node(&children, false, self->language);
     ts_stack_push(self->stack, version, parent, false, 1);
     ts_parser__accept(self, version, lookahead);
     return;
@@ -1276,7 +1284,7 @@ static void ts_parser__recover(
   // be counted in error cost calculations.
   unsigned n;
   const TSParseAction *actions = ts_language_actions(self->language, 1, ts_subtree_symbol(lookahead), &n);
-  if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].params.extra) {
+  if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].params.shift.extra) {
     MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead);
     ts_subtree_set_extra(&mutable_lookahead);
     lookahead = ts_subtree_from_mut(mutable_lookahead);
@@ -1288,7 +1296,6 @@ static void ts_parser__recover(
   array_reserve(&children, 1);
   array_push(&children, lookahead);
   MutableSubtree error_repeat = ts_subtree_new_node(
-    &self->tree_pool,
     ts_builtin_sym_error_repeat,
     &children,
     0,
@@ -1317,7 +1324,6 @@ static void ts_parser__recover(
     ts_stack_renumber_version(self->stack, pop.contents[0].version, version);
     array_push(&pop.contents[0].subtrees, ts_subtree_from_mut(error_repeat));
     error_repeat = ts_subtree_new_node(
-      &self->tree_pool,
       ts_builtin_sym_error_repeat,
       &pop.contents[0].subtrees,
       0,
@@ -1363,14 +1369,26 @@ static bool ts_parser__advance(
     );
   }
 
-  // Otherwise, re-run the lexer.
-  if (!lookahead.ptr) {
-    lookahead = ts_parser__lex(self, version, state);
-    ts_parser__set_cached_token(self, position, last_external_token, lookahead);
-    ts_language_table_entry(self->language, state, ts_subtree_symbol(lookahead), &table_entry);
-  }
-
+  bool needs_lex = !lookahead.ptr;
   for (;;) {
+    // Otherwise, re-run the lexer.
+    if (needs_lex) {
+      needs_lex = false;
+      lookahead = ts_parser__lex(self, version, state);
+
+      if (lookahead.ptr) {
+        ts_parser__set_cached_token(self, position, last_external_token, lookahead);
+        ts_language_table_entry(self->language, state, ts_subtree_symbol(lookahead), &table_entry);
+      }
+
+      // When parsing a non-terminal extra, a null lookahead indicates the
+      // end of the rule. The reduction is stored in the EOF table entry.
+      // After the reduction, the lexer needs to be run again.
+      else {
+        ts_language_table_entry(self->language, state, ts_builtin_sym_end, &table_entry);
+      }
+    }
+
     // If a cancellation flag or a timeout was provided, then check every
     // time a fixed number of parse actions has been processed.
     if (++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) {
@@ -1396,9 +1414,9 @@ static bool ts_parser__advance(
 
       switch (action.type) {
         case TSParseActionTypeShift: {
-          if (action.params.repetition) break;
+          if (action.params.shift.repetition) break;
           TSStateId next_state;
-          if (action.params.extra) {
+          if (action.params.shift.extra) {
 
             // TODO: remove when TREE_SITTER_LANGUAGE_VERSION 9 is out.
             if (state == ERROR_STATE) continue;
@@ -1406,7 +1424,7 @@ static bool ts_parser__advance(
             next_state = state;
             LOG("shift_extra");
           } else {
-            next_state = action.params.state;
+            next_state = action.params.shift.state;
             LOG("shift state:%u", next_state);
           }
 
@@ -1415,18 +1433,19 @@ static bool ts_parser__advance(
             next_state = ts_language_next_state(self->language, state, ts_subtree_symbol(lookahead));
           }
 
-          ts_parser__shift(self, version, next_state, lookahead, action.params.extra);
+          ts_parser__shift(self, version, next_state, lookahead, action.params.shift.extra);
           if (did_reuse) reusable_node_advance(&self->reusable_node);
           return true;
         }
 
         case TSParseActionTypeReduce: {
           bool is_fragile = table_entry.action_count > 1;
-          LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.params.symbol), action.params.child_count);
+          bool end_of_non_terminal_extra = lookahead.ptr == NULL;
+          LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.params.reduce.symbol), action.params.reduce.child_count);
           StackVersion reduction_version = ts_parser__reduce(
-            self, version, action.params.symbol, action.params.child_count,
-            action.params.dynamic_precedence, action.params.production_id,
-            is_fragile
+            self, version, action.params.reduce.symbol, action.params.reduce.child_count,
+            action.params.reduce.dynamic_precedence, action.params.reduce.production_id,
+            is_fragile, end_of_non_terminal_extra
           );
           if (reduction_version != STACK_VERSION_NONE) {
             last_reduction_version = reduction_version;
@@ -1459,6 +1478,17 @@ static bool ts_parser__advance(
       ts_stack_renumber_version(self->stack, last_reduction_version, version);
       LOG_STACK();
       state = ts_stack_state(self->stack, version);
+
+      // At the end of a non-terminal extra rule, the lexer will return a
+      // null subtree, because the parser needs to perform a fixed reduction
+      // regardless of the lookahead node. After performing that reduction,
+      // (and completing the non-terminal extra rule) run the lexer again based
+      // on the current parse state.
+      if (!lookahead.ptr) {
+        needs_lex = true;
+        continue;
+      }
+
       ts_language_table_entry(
         self->language,
         state,
@@ -1468,6 +1498,11 @@ static bool ts_parser__advance(
       continue;
     }
 
+    if (!lookahead.ptr) {
+      ts_stack_pause(self->stack, version, ts_builtin_sym_end);
+      return true;
+    }
+
     // If there were no parse actions for the current lookahead token, then
     // it is not valid in this state. If the current lookahead token is a
     // keyword, then switch to treating it as the normal word token if that
@@ -1505,6 +1540,9 @@ static bool ts_parser__advance(
     // push each of its children. Then try again to process the current
     // lookahead.
     if (ts_parser__breakdown_top_of_stack(self, version)) {
+      state = ts_stack_state(self->stack, version);
+      ts_subtree_release(&self->tree_pool, lookahead);
+      needs_lex = true;
       continue;
     }
 
@@ -1622,8 +1660,8 @@ static unsigned ts_parser__condense_stack(TSParser *self) {
 
 static bool ts_parser_has_outstanding_parse(TSParser *self) {
   return (
-    self->lexer.current_position.bytes > 0 ||
-    ts_stack_state(self->stack, 0) != 1
+    ts_stack_state(self->stack, 0) != 1 ||
+    ts_stack_node_count_since_error(self->stack, 0) != 0
   );
 }
 
@@ -1639,13 +1677,11 @@ TSParser *ts_parser_new(void) {
   self->finished_tree = NULL_SUBTREE;
   self->reusable_node = reusable_node_new();
   self->dot_graph_file = NULL;
-  self->halt_on_error = false;
   self->cancellation_flag = NULL;
   self->timeout_duration = 0;
   self->end_clock = clock_null();
   self->operation_count = 0;
   self->old_tree = NULL_SUBTREE;
-  self->scratch_tree.ptr = &self->scratch_tree_data;
   self->included_range_differences = (TSRangeArray) array_new();
   self->included_range_difference_index = 0;
   ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
@@ -1655,6 +1691,7 @@ TSParser *ts_parser_new(void) {
 void ts_parser_delete(TSParser *self) {
   if (!self) return;
 
+  ts_parser_set_language(self, NULL);
   ts_stack_delete(self->stack);
   if (self->reduce_actions.contents) {
     array_delete(&self->reduce_actions);
@@ -1670,7 +1707,9 @@ void ts_parser_delete(TSParser *self) {
   ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
   ts_subtree_pool_delete(&self->tree_pool);
   reusable_node_delete(&self->reusable_node);
-  ts_parser_set_language(self, NULL);
+  array_delete(&self->trailing_extras);
+  array_delete(&self->trailing_extras2);
+  array_delete(&self->scratch_trees);
   ts_free(self);
 }
 
@@ -1695,6 +1734,7 @@ bool ts_parser_set_language(TSParser *self, const TSLanguage *language) {
   }
 
   self->language = language;
+  ts_parser_reset(self);
   return true;
 }
 
@@ -1718,10 +1758,6 @@ void ts_parser_print_dot_graphs(TSParser *self, int fd) {
   }
 }
 
-void ts_parser_halt_on_error(TSParser *self, bool should_halt_on_error) {
-  self->halt_on_error = should_halt_on_error;
-}
-
 const size_t *ts_parser_cancellation_flag(const TSParser *self) {
   return (const size_t *)self->cancellation_flag;
 }
@@ -1738,8 +1774,12 @@ void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros) {
   self->timeout_duration = duration_from_micros(timeout_micros);
 }
 
-void ts_parser_set_included_ranges(TSParser *self, const TSRange *ranges, uint32_t count) {
-  ts_lexer_set_included_ranges(&self->lexer, ranges, count);
+bool ts_parser_set_included_ranges(
+  TSParser *self,
+  const TSRange *ranges,
+  uint32_t count
+) {
+  return ts_lexer_set_included_ranges(&self->lexer, ranges, count);
 }
 
 const TSRange *ts_parser_included_ranges(const TSParser *self, uint32_t *count) {
@@ -1747,7 +1787,7 @@ const TSRange *ts_parser_included_ranges(const TSParser *self, uint32_t *count)
 }
 
 void ts_parser_reset(TSParser *self) {
-  if (self->language->external_scanner.deserialize) {
+  if (self->language && self->language->external_scanner.deserialize) {
     self->language->external_scanner.deserialize(self->external_scanner_payload, NULL, 0);
   }
 
@@ -1835,9 +1875,6 @@ TSTree *ts_parser_parse(
     unsigned min_error_cost = ts_parser__condense_stack(self);
     if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) < min_error_cost) {
       break;
-    } else if (self->halt_on_error && min_error_cost > 0) {
-      ts_parser__halt_parse(self);
-      break;
     }
 
     while (self->included_range_difference_index < self->included_range_differences.size) {
diff --git a/lib/src/point.h b/lib/src/point.h
index 4d0aed18..a50d2021 100644
--- a/lib/src/point.h
+++ b/lib/src/point.h
@@ -3,6 +3,7 @@
 
 #include "tree_sitter/api.h"
 
+#define POINT_ZERO ((TSPoint) {0, 0})
 #define POINT_MAX ((TSPoint) {UINT32_MAX, UINT32_MAX})
 
 static inline TSPoint point__new(unsigned row, unsigned column) {
diff --git a/lib/src/query.c b/lib/src/query.c
new file mode 100644
index 00000000..bf0598ce
--- /dev/null
+++ b/lib/src/query.c
@@ -0,0 +1,3071 @@
+#include "tree_sitter/api.h"
+#include "./alloc.h"
+#include "./array.h"
+#include "./bits.h"
+#include "./language.h"
+#include "./point.h"
+#include "./tree_cursor.h"
+#include "./unicode.h"
+#include <wctype.h>
+
+// #define DEBUG_ANALYZE_QUERY
+// #define LOG(...) fprintf(stderr, __VA_ARGS__)
+#define LOG(...)
+
+#define MAX_CAPTURE_LIST_COUNT 32
+#define MAX_STEP_CAPTURE_COUNT 3
+#define MAX_STATE_PREDECESSOR_COUNT 100
+#define MAX_ANALYSIS_STATE_DEPTH 12
+
+/*
+ * Stream - A sequence of unicode characters derived from a UTF8 string.
+ * This struct is used in parsing queries from S-expressions.
+ */
+typedef struct {
+  const char *input;
+  const char *start;
+  const char *end;
+  int32_t next;
+  uint8_t next_size;
+} Stream;
+
+/*
+ * QueryStep - A step in the process of matching a query. Each node within
+ * a query S-expression maps to one of these steps. An entire pattern is
+ * represented as a sequence of these steps. Fields:
+ *
+ * - `symbol` - The grammar symbol to match. A zero value represents the
+ *    wildcard symbol, '_'.
+ * - `field` - The field name to match. A zero value means that a field name
+ *    was not specified.
+ * - `capture_ids` - An array of integers representing the names of captures
+ *    associated with this node in the pattern, terminated by a `NONE` value.
+ * - `depth` - The depth where this node occurs in the pattern. The root node
+ *    of the pattern has depth zero.
+ * - `alternative_index` - The index of a different query step that serves as
+ *    an alternative to this step.
+ */
+typedef struct {
+  TSSymbol symbol;
+  TSSymbol supertype_symbol;
+  TSFieldId field;
+  uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT];
+  uint16_t alternative_index;
+  uint16_t depth;
+  bool contains_captures: 1;
+  bool is_immediate: 1;
+  bool is_last_child: 1;
+  bool is_pass_through: 1;
+  bool is_dead_end: 1;
+  bool alternative_is_immediate: 1;
+  bool is_definite: 1;
+} QueryStep;
+
+/*
+ * Slice - A slice of an external array. Within a query, capture names,
+ * literal string values, and predicate step informations are stored in three
+ * contiguous arrays. Individual captures, string values, and predicates are
+ * represented as slices of these three arrays.
+ */
+typedef struct {
+  uint32_t offset;
+  uint32_t length;
+} Slice;
+
+/*
+ * SymbolTable - a two-way mapping of strings to ids.
+ */
+typedef struct {
+  Array(char) characters;
+  Array(Slice) slices;
+} SymbolTable;
+
+/*
+ * PatternEntry - Information about the starting point for matching a
+ * particular pattern, consisting of the index of the pattern within the query,
+ * and the index of the patter's first step in the shared `steps` array. These
+ * entries are stored in a 'pattern map' - a sorted array that makes it
+ * possible to efficiently lookup patterns based on the symbol for their first
+ * step.
+ */
+typedef struct {
+  uint16_t step_index;
+  uint16_t pattern_index;
+} PatternEntry;
+
+typedef struct {
+  Slice steps;
+  Slice predicate_steps;
+  uint32_t start_byte;
+} QueryPattern;
+
+typedef struct {
+  uint32_t byte_offset;
+  uint16_t step_index;
+} StepOffset;
+
+/*
+ * QueryState - The state of an in-progress match of a particular pattern
+ * in a query. While executing, a `TSQueryCursor` must keep track of a number
+ * of possible in-progress matches. Each of those possible matches is
+ * represented as one of these states. Fields:
+ * - `id` - A numeric id that is exposed to the public API. This allows the
+ *    caller to remove a given match, preventing any more of its captures
+ *    from being returned.
+ * - `start_depth` - The depth in the tree where the first step of the state's
+ *    pattern was matched.
+ * - `pattern_index` - The pattern that the state is matching.
+ * - `consumed_capture_count` - The number of captures from this match that
+ *    have already been returned.
+ * - `capture_list_id` - A numeric id that can be used to retrieve the state's
+ *    list of captures from the `CaptureListPool`.
+ * - `seeking_immediate_match` - A flag that indicates that the state's next
+ *    step must be matched by the very next sibling. This is used when
+ *    processing repetitions.
+ * - `has_in_progress_alternatives` - A flag that indicates that there is are
+ *    other states that have the same captures as this state, but are at
+ *    different steps in their pattern. This means that in order to obey the
+ *    'longest-match' rule, this state should not be returned as a match until
+ *    it is clear that there can be no longer match.
+ */
+typedef struct {
+  uint32_t id;
+  uint16_t start_depth;
+  uint16_t step_index;
+  uint16_t pattern_index;
+  uint16_t capture_list_id;
+  uint16_t consumed_capture_count: 12;
+  bool seeking_immediate_match: 1;
+  bool has_in_progress_alternatives: 1;
+  bool dead: 1;
+  bool needs_parent: 1;
+} QueryState;
+
+typedef Array(TSQueryCapture) CaptureList;
+
+/*
+ * CaptureListPool - A collection of *lists* of captures. Each QueryState
+ * needs to maintain its own list of captures. To avoid repeated allocations,
+ * the reuses a fixed set of capture lists, and keeps track of which ones
+ * are currently in use.
+ */
+typedef struct {
+  CaptureList list[MAX_CAPTURE_LIST_COUNT];
+  CaptureList empty_list;
+  uint32_t usage_map;
+} CaptureListPool;
+
+/*
+ * AnalysisState - The state needed for walking the parse table when analyzing
+ * a query pattern, to determine at which steps the pattern might fail to match.
+ */
+typedef struct {
+  TSStateId parse_state;
+  TSSymbol parent_symbol;
+  uint16_t child_index;
+  TSFieldId field_id: 15;
+  bool done: 1;
+} AnalysisStateEntry;
+
+typedef struct {
+  AnalysisStateEntry stack[MAX_ANALYSIS_STATE_DEPTH];
+  uint16_t depth;
+  uint16_t step_index;
+} AnalysisState;
+
+typedef Array(AnalysisState) AnalysisStateSet;
+
+/*
+ * AnalysisSubgraph - A subset of the states in the parse table that are used
+ * in constructing nodes with a certain symbol. Each state is accompanied by
+ * some information about the possible node that could be produced in
+ * downstream states.
+ */
+typedef struct {
+  TSStateId state;
+  uint8_t production_id;
+  uint8_t child_index: 7;
+  bool done: 1;
+} AnalysisSubgraphNode;
+
+typedef struct {
+  TSSymbol symbol;
+  Array(TSStateId) start_states;
+  Array(AnalysisSubgraphNode) nodes;
+} AnalysisSubgraph;
+
+/*
+ * StatePredecessorMap - A map that stores the predecessors of each parse state.
+ */
+typedef struct {
+  TSStateId *contents;
+} StatePredecessorMap;
+
+/*
+ * TSQuery - A tree query, compiled from a string of S-expressions. The query
+ * itself is immutable. The mutable state used in the process of executing the
+ * query is stored in a `TSQueryCursor`.
+ */
+struct TSQuery {
+  SymbolTable captures;
+  SymbolTable predicate_values;
+  Array(QueryStep) steps;
+  Array(PatternEntry) pattern_map;
+  Array(TSQueryPredicateStep) predicate_steps;
+  Array(QueryPattern) patterns;
+  Array(StepOffset) step_offsets;
+  Array(char) string_buffer;
+  const TSLanguage *language;
+  uint16_t wildcard_root_pattern_count;
+  TSSymbol *symbol_map;
+};
+
+/*
+ * TSQueryCursor - A stateful struct used to execute a query on a tree.
+ */
+struct TSQueryCursor {
+  const TSQuery *query;
+  TSTreeCursor cursor;
+  Array(QueryState) states;
+  Array(QueryState) finished_states;
+  CaptureListPool capture_list_pool;
+  uint32_t depth;
+  uint32_t start_byte;
+  uint32_t end_byte;
+  uint32_t next_state_id;
+  TSPoint start_point;
+  TSPoint end_point;
+  bool ascending;
+  bool halted;
+};
+
+static const TSQueryError PARENT_DONE = -1;
+static const uint16_t PATTERN_DONE_MARKER = UINT16_MAX;
+static const uint16_t NONE = UINT16_MAX;
+static const TSSymbol WILDCARD_SYMBOL = 0;
+static const TSSymbol NAMED_WILDCARD_SYMBOL = UINT16_MAX - 1;
+
+/**********
+ * Stream
+ **********/
+
+// Advance to the next unicode code point in the stream.
+static bool stream_advance(Stream *self) {
+  self->input += self->next_size;
+  if (self->input < self->end) {
+    uint32_t size = ts_decode_utf8(
+      (const uint8_t *)self->input,
+      self->end - self->input,
+      &self->next
+    );
+    if (size > 0) {
+      self->next_size = size;
+      return true;
+    }
+  } else {
+    self->next_size = 0;
+    self->next = '\0';
+  }
+  return false;
+}
+
+// Reset the stream to the given input position, represented as a pointer
+// into the input string.
+static void stream_reset(Stream *self, const char *input) {
+  self->input = input;
+  self->next_size = 0;
+  stream_advance(self);
+}
+
+static Stream stream_new(const char *string, uint32_t length) {
+  Stream self = {
+    .next = 0,
+    .input = string,
+    .start = string,
+    .end = string + length,
+  };
+  stream_advance(&self);
+  return self;
+}
+
+static void stream_skip_whitespace(Stream *self) {
+  for (;;) {
+    if (iswspace(self->next)) {
+      stream_advance(self);
+    } else if (self->next == ';') {
+      // skip over comments
+      stream_advance(self);
+      while (self->next && self->next != '\n') {
+        if (!stream_advance(self)) break;
+      }
+    } else {
+      break;
+    }
+  }
+}
+
+static bool stream_is_ident_start(Stream *self) {
+  return iswalnum(self->next) || self->next == '_' || self->next == '-';
+}
+
+static void stream_scan_identifier(Stream *stream) {
+  do {
+    stream_advance(stream);
+  } while (
+    iswalnum(stream->next) ||
+    stream->next == '_' ||
+    stream->next == '-' ||
+    stream->next == '.' ||
+    stream->next == '?' ||
+    stream->next == '!'
+  );
+}
+
+static uint32_t stream_offset(Stream *self) {
+  return self->input - self->start;
+}
+
+/******************
+ * CaptureListPool
+ ******************/
+
+static CaptureListPool capture_list_pool_new(void) {
+  return (CaptureListPool) {
+    .empty_list = array_new(),
+    .usage_map = UINT32_MAX,
+  };
+}
+
+static void capture_list_pool_reset(CaptureListPool *self) {
+  self->usage_map = UINT32_MAX;
+  for (unsigned i = 0; i < MAX_CAPTURE_LIST_COUNT; i++) {
+    array_clear(&self->list[i]);
+  }
+}
+
+static void capture_list_pool_delete(CaptureListPool *self) {
+  for (unsigned i = 0; i < MAX_CAPTURE_LIST_COUNT; i++) {
+    array_delete(&self->list[i]);
+  }
+}
+
+static const CaptureList *capture_list_pool_get(const CaptureListPool *self, uint16_t id) {
+  if (id >= MAX_CAPTURE_LIST_COUNT) return &self->empty_list;
+  return &self->list[id];
+}
+
+static CaptureList *capture_list_pool_get_mut(CaptureListPool *self, uint16_t id) {
+  assert(id < MAX_CAPTURE_LIST_COUNT);
+  return &self->list[id];
+}
+
+static bool capture_list_pool_is_empty(const CaptureListPool *self) {
+  return self->usage_map == 0;
+}
+
+static uint16_t capture_list_pool_acquire(CaptureListPool *self) {
+  // In the usage_map bitmask, ones represent free lists, and zeros represent
+  // lists that are in use. A free list id can quickly be found by counting
+  // the leading zeros in the usage map. An id of zero corresponds to the
+  // highest-order bit in the bitmask.
+  uint16_t id = count_leading_zeros(self->usage_map);
+  if (id >= MAX_CAPTURE_LIST_COUNT) return NONE;
+  self->usage_map &= ~bitmask_for_index(id);
+  array_clear(&self->list[id]);
+  return id;
+}
+
+static void capture_list_pool_release(CaptureListPool *self, uint16_t id) {
+  if (id >= MAX_CAPTURE_LIST_COUNT) return;
+  array_clear(&self->list[id]);
+  self->usage_map |= bitmask_for_index(id);
+}
+
+/**************
+ * SymbolTable
+ **************/
+
+static SymbolTable symbol_table_new(void) {
+  return (SymbolTable) {
+    .characters = array_new(),
+    .slices = array_new(),
+  };
+}
+
+static void symbol_table_delete(SymbolTable *self) {
+  array_delete(&self->characters);
+  array_delete(&self->slices);
+}
+
+static int symbol_table_id_for_name(
+  const SymbolTable *self,
+  const char *name,
+  uint32_t length
+) {
+  for (unsigned i = 0; i < self->slices.size; i++) {
+    Slice slice = self->slices.contents[i];
+    if (
+      slice.length == length &&
+      !strncmp(&self->characters.contents[slice.offset], name, length)
+    ) return i;
+  }
+  return -1;
+}
+
+static const char *symbol_table_name_for_id(
+  const SymbolTable *self,
+  uint16_t id,
+  uint32_t *length
+) {
+  Slice slice = self->slices.contents[id];
+  *length = slice.length;
+  return &self->characters.contents[slice.offset];
+}
+
+static uint16_t symbol_table_insert_name(
+  SymbolTable *self,
+  const char *name,
+  uint32_t length
+) {
+  int id = symbol_table_id_for_name(self, name, length);
+  if (id >= 0) return (uint16_t)id;
+  Slice slice = {
+    .offset = self->characters.size,
+    .length = length,
+  };
+  array_grow_by(&self->characters, length + 1);
+  memcpy(&self->characters.contents[slice.offset], name, length);
+  self->characters.contents[self->characters.size - 1] = 0;
+  array_push(&self->slices, slice);
+  return self->slices.size - 1;
+}
+
+/************
+ * QueryStep
+ ************/
+
+static QueryStep query_step__new(
+  TSSymbol symbol,
+  uint16_t depth,
+  bool is_immediate
+) {
+  return (QueryStep) {
+    .symbol = symbol,
+    .depth = depth,
+    .field = 0,
+    .capture_ids = {NONE, NONE, NONE},
+    .alternative_index = NONE,
+    .contains_captures = false,
+    .is_last_child = false,
+    .is_pass_through = false,
+    .is_dead_end = false,
+    .is_definite = false,
+    .is_immediate = is_immediate,
+    .alternative_is_immediate = false,
+  };
+}
+
+static void query_step__add_capture(QueryStep *self, uint16_t capture_id) {
+  for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) {
+    if (self->capture_ids[i] == NONE) {
+      self->capture_ids[i] = capture_id;
+      break;
+    }
+  }
+}
+
+static void query_step__remove_capture(QueryStep *self, uint16_t capture_id) {
+  for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) {
+    if (self->capture_ids[i] == capture_id) {
+      self->capture_ids[i] = NONE;
+      while (i + 1 < MAX_STEP_CAPTURE_COUNT) {
+        if (self->capture_ids[i + 1] == NONE) break;
+        self->capture_ids[i] = self->capture_ids[i + 1];
+        self->capture_ids[i + 1] = NONE;
+        i++;
+      }
+      break;
+    }
+  }
+}
+
+/**********************
+ * StatePredecessorMap
+ **********************/
+
+static inline StatePredecessorMap state_predecessor_map_new(const TSLanguage *language) {
+  return (StatePredecessorMap) {
+    .contents = ts_calloc(language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1), sizeof(TSStateId)),
+  };
+}
+
+static inline void state_predecessor_map_delete(StatePredecessorMap *self) {
+  ts_free(self->contents);
+}
+
+static inline void state_predecessor_map_add(
+  StatePredecessorMap *self,
+  TSStateId state,
+  TSStateId predecessor
+) {
+  unsigned index = state * (MAX_STATE_PREDECESSOR_COUNT + 1);
+  TSStateId *count = &self->contents[index];
+  if (*count == 0 || (*count < MAX_STATE_PREDECESSOR_COUNT && self->contents[index + *count] != predecessor)) {
+    (*count)++;
+    self->contents[index + *count] = predecessor;
+  }
+}
+
+static inline const TSStateId *state_predecessor_map_get(
+  const StatePredecessorMap *self,
+  TSStateId state,
+  unsigned *count
+) {
+  unsigned index = state * (MAX_STATE_PREDECESSOR_COUNT + 1);
+  *count = self->contents[index];
+  return &self->contents[index + 1];
+}
+
+/****************
+ * AnalysisState
+ ****************/
+
+static unsigned analysis_state__recursion_depth(const AnalysisState *self) {
+  unsigned result = 0;
+  for (unsigned i = 0; i < self->depth; i++) {
+    TSSymbol symbol = self->stack[i].parent_symbol;
+    for (unsigned j = 0; j < i; j++) {
+      if (self->stack[j].parent_symbol == symbol) {
+        result++;
+        break;
+      }
+    }
+  }
+  return result;
+}
+
+static inline int analysis_state__compare_position(
+  const AnalysisState *self,
+  const AnalysisState *other
+) {
+  for (unsigned i = 0; i < self->depth; i++) {
+    if (i >= other->depth) return -1;
+    if (self->stack[i].child_index < other->stack[i].child_index) return -1;
+    if (self->stack[i].child_index > other->stack[i].child_index) return 1;
+  }
+  if (self->depth < other->depth) return 1;
+  return 0;
+}
+
+static inline int analysis_state__compare(
+  const AnalysisState *self,
+  const AnalysisState *other
+) {
+  int result = analysis_state__compare_position(self, other);
+  if (result != 0) return result;
+  for (unsigned i = 0; i < self->depth; i++) {
+    if (self->stack[i].parent_symbol < other->stack[i].parent_symbol) return -1;
+    if (self->stack[i].parent_symbol > other->stack[i].parent_symbol) return 1;
+    if (self->stack[i].parse_state < other->stack[i].parse_state) return -1;
+    if (self->stack[i].parse_state > other->stack[i].parse_state) return 1;
+    if (self->stack[i].field_id < other->stack[i].field_id) return -1;
+    if (self->stack[i].field_id > other->stack[i].field_id) return 1;
+  }
+  if (self->step_index < other->step_index) return -1;
+  if (self->step_index > other->step_index) return 1;
+  return 0;
+}
+
+static inline AnalysisStateEntry *analysis_state__top(AnalysisState *self) {
+  return &self->stack[self->depth - 1];
+}
+
+static inline bool analysis_state__has_supertype(AnalysisState *self, TSSymbol symbol) {
+  for (unsigned i = 0; i < self->depth; i++) {
+    if (self->stack[i].parent_symbol == symbol) return true;
+  }
+  return false;
+}
+
+/***********************
+ * AnalysisSubgraphNode
+ ***********************/
+
+static inline int analysis_subgraph_node__compare(const AnalysisSubgraphNode *self, const AnalysisSubgraphNode *other) {
+  if (self->state < other->state) return -1;
+  if (self->state > other->state) return 1;
+  if (self->child_index < other->child_index) return -1;
+  if (self->child_index > other->child_index) return 1;
+  if (self->done < other->done) return -1;
+  if (self->done > other->done) return 1;
+  if (self->production_id < other->production_id) return -1;
+  if (self->production_id > other->production_id) return 1;
+  return 0;
+}
+
+/*********
+ * Query
+ *********/
+
+// The `pattern_map` contains a mapping from TSSymbol values to indices in the
+// `steps` array. For a given syntax node, the `pattern_map` makes it possible
+// to quickly find the starting steps of all of the patterns whose root matches
+// that node. Each entry has two fields: a `pattern_index`, which identifies one
+// of the patterns in the query, and a `step_index`, which indicates the start
+// offset of that pattern's steps within the `steps` array.
+//
+// The entries are sorted by the patterns' root symbols, and lookups use a
+// binary search. This ensures that the cost of this initial lookup step
+// scales logarithmically with the number of patterns in the query.
+//
+// This returns `true` if the symbol is present and `false` otherwise.
+// If the symbol is not present `*result` is set to the index where the
+// symbol should be inserted.
+static inline bool ts_query__pattern_map_search(
+  const TSQuery *self,
+  TSSymbol needle,
+  uint32_t *result
+) {
+  uint32_t base_index = self->wildcard_root_pattern_count;
+  uint32_t size = self->pattern_map.size - base_index;
+  if (size == 0) {
+    *result = base_index;
+    return false;
+  }
+  while (size > 1) {
+    uint32_t half_size = size / 2;
+    uint32_t mid_index = base_index + half_size;
+    TSSymbol mid_symbol = self->steps.contents[
+      self->pattern_map.contents[mid_index].step_index
+    ].symbol;
+    if (needle > mid_symbol) base_index = mid_index;
+    size -= half_size;
+  }
+
+  TSSymbol symbol = self->steps.contents[
+    self->pattern_map.contents[base_index].step_index
+  ].symbol;
+
+  if (needle > symbol) {
+    base_index++;
+    if (base_index < self->pattern_map.size) {
+      symbol = self->steps.contents[
+        self->pattern_map.contents[base_index].step_index
+      ].symbol;
+    }
+  }
+
+  *result = base_index;
+  return needle == symbol;
+}
+
+// Insert a new pattern's start index into the pattern map, maintaining
+// the pattern map's ordering invariant.
+static inline void ts_query__pattern_map_insert(
+  TSQuery *self,
+  TSSymbol symbol,
+  uint32_t start_step_index,
+  uint32_t pattern_index
+) {
+  uint32_t index;
+  ts_query__pattern_map_search(self, symbol, &index);
+
+  // Ensure that the entries are sorted not only by symbol, but also
+  // by pattern_index. This way, states for earlier patterns will be
+  // initiated first, which allows the ordering of the states array
+  // to be maintained more efficiently.
+  while (index < self->pattern_map.size) {
+    PatternEntry *entry = &self->pattern_map.contents[index];
+    if (
+      self->steps.contents[entry->step_index].symbol == symbol &&
+      entry->pattern_index < pattern_index
+    ) {
+      index++;
+    } else {
+      break;
+    }
+  }
+
+  array_insert(&self->pattern_map, index, ((PatternEntry) {
+    .step_index = start_step_index,
+    .pattern_index = pattern_index,
+  }));
+}
+
+static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
+  // Identify all of the patterns in the query that have child patterns, both at the
+  // top level and nested within other larger patterns. Record the step index where
+  // each pattern starts.
+  Array(uint32_t) parent_step_indices = array_new();
+  for (unsigned i = 0; i < self->steps.size; i++) {
+    QueryStep *step = &self->steps.contents[i];
+    if (i + 1 < self->steps.size) {
+      QueryStep *next_step = &self->steps.contents[i + 1];
+      if (
+        step->symbol != WILDCARD_SYMBOL &&
+        step->symbol != NAMED_WILDCARD_SYMBOL &&
+        next_step->depth > step->depth &&
+        next_step->depth != PATTERN_DONE_MARKER
+      ) {
+        array_push(&parent_step_indices, i);
+      }
+    }
+    if (step->depth > 0) {
+      step->is_definite = true;
+    }
+  }
+
+  // For every parent symbol in the query, initialize an 'analysis subgraph'.
+  // This subgraph lists all of the states in the parse table that are directly
+  // involved in building subtrees for this symbol.
+  //
+  // In addition to the parent symbols in the query, construct subgraphs for all
+  // of the hidden symbols in the grammar, because these might occur within
+  // one of the parent nodes, such that their children appear to belong to the
+  // parent.
+  Array(AnalysisSubgraph) subgraphs = array_new();
+  for (unsigned i = 0; i < parent_step_indices.size; i++) {
+    uint32_t parent_step_index = parent_step_indices.contents[i];
+    TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol;
+    AnalysisSubgraph subgraph = { .symbol = parent_symbol };
+    array_insert_sorted_by(&subgraphs, .symbol, subgraph);
+  }
+  for (TSSymbol sym = self->language->token_count; sym < self->language->symbol_count; sym++) {
+    if (!ts_language_symbol_metadata(self->language, sym).visible) {
+      AnalysisSubgraph subgraph = { .symbol = sym };
+      array_insert_sorted_by(&subgraphs, .symbol, subgraph);
+    }
+  }
+
+  // Scan the parse table to find the data needed to populate these subgraphs.
+  // Collect three things during this scan:
+  //   1) All of the parse states where one of these symbols can start.
+  //   2) All of the parse states where one of these symbols can end, along
+  //      with information about the node that would be created.
+  //   3) A list of predecessor states for each state.
+  StatePredecessorMap predecessor_map = state_predecessor_map_new(self->language);
+  for (TSStateId state = 1; state < self->language->state_count; state++) {
+    unsigned subgraph_index, exists;
+    LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, state);
+    while (ts_lookahead_iterator_next(&lookahead_iterator)) {
+      if (lookahead_iterator.action_count) {
+        for (unsigned i = 0; i < lookahead_iterator.action_count; i++) {
+          const TSParseAction *action = &lookahead_iterator.actions[i];
+          if (action->type == TSParseActionTypeReduce) {
+            const TSSymbol *aliases, *aliases_end;
+            ts_language_aliases_for_symbol(
+              self->language,
+              action->params.reduce.symbol,
+              &aliases,
+              &aliases_end
+            );
+            for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) {
+              array_search_sorted_by(
+                &subgraphs,
+                .symbol,
+                *symbol,
+                &subgraph_index,
+                &exists
+              );
+              if (exists) {
+                AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index];
+                if (subgraph->nodes.size == 0 || array_back(&subgraph->nodes)->state != state) {
+                  array_push(&subgraph->nodes, ((AnalysisSubgraphNode) {
+                    .state = state,
+                    .production_id = action->params.reduce.production_id,
+                    .child_index = action->params.reduce.child_count,
+                    .done = true,
+                  }));
+                }
+              }
+            }
+          } else if (action->type == TSParseActionTypeShift && !action->params.shift.extra) {
+            TSStateId next_state = action->params.shift.state;
+            state_predecessor_map_add(&predecessor_map, next_state, state);
+          }
+        }
+      } else if (lookahead_iterator.next_state != 0 && lookahead_iterator.next_state != state) {
+        state_predecessor_map_add(&predecessor_map, lookahead_iterator.next_state, state);
+        const TSSymbol *aliases, *aliases_end;
+        ts_language_aliases_for_symbol(
+          self->language,
+          lookahead_iterator.symbol,
+          &aliases,
+          &aliases_end
+        );
+        for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) {
+          array_search_sorted_by(
+            &subgraphs,
+            .symbol,
+            *symbol,
+            &subgraph_index,
+            &exists
+          );
+          if (exists) {
+            AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index];
+            if (
+              subgraph->start_states.size == 0 ||
+              *array_back(&subgraph->start_states) != state
+            )
+            array_push(&subgraph->start_states, state);
+          }
+        }
+      }
+    }
+  }
+
+  // For each subgraph, compute the preceding states by walking backward
+  // from the end states using the predecessor map.
+  Array(AnalysisSubgraphNode) next_nodes = array_new();
+  for (unsigned i = 0; i < subgraphs.size; i++) {
+    AnalysisSubgraph *subgraph = &subgraphs.contents[i];
+    if (subgraph->nodes.size == 0) {
+      array_delete(&subgraph->start_states);
+      array_erase(&subgraphs, i);
+      i--;
+      continue;
+    }
+    array_assign(&next_nodes, &subgraph->nodes);
+    while (next_nodes.size > 0) {
+      AnalysisSubgraphNode node = array_pop(&next_nodes);
+      if (node.child_index > 1) {
+        unsigned predecessor_count;
+        const TSStateId *predecessors = state_predecessor_map_get(
+          &predecessor_map,
+          node.state,
+          &predecessor_count
+        );
+        for (unsigned j = 0; j < predecessor_count; j++) {
+          AnalysisSubgraphNode predecessor_node = {
+            .state = predecessors[j],
+            .child_index = node.child_index - 1,
+            .production_id = node.production_id,
+            .done = false,
+          };
+          unsigned index, exists;
+          array_search_sorted_with(
+            &subgraph->nodes, analysis_subgraph_node__compare, &predecessor_node,
+            &index, &exists
+          );
+          if (!exists) {
+            array_insert(&subgraph->nodes, index, predecessor_node);
+            array_push(&next_nodes, predecessor_node);
+          }
+        }
+      }
+    }
+  }
+
+  #ifdef DEBUG_ANALYZE_QUERY
+    printf("\nSubgraphs:\n");
+    for (unsigned i = 0; i < subgraphs.size; i++) {
+      AnalysisSubgraph *subgraph = &subgraphs.contents[i];
+      printf("  %u, %s:\n", subgraph->symbol, ts_language_symbol_name(self->language, subgraph->symbol));
+      for (unsigned j = 0; j < subgraph->start_states.size; j++) {
+        printf(
+          "    {state: %u}\n",
+          subgraph->start_states.contents[j]
+        );
+      }
+      for (unsigned j = 0; j < subgraph->nodes.size; j++) {
+        AnalysisSubgraphNode *node = &subgraph->nodes.contents[j];
+        printf(
+          "    {state: %u, child_index: %u, production_id: %u, done: %d}\n",
+          node->state, node->child_index, node->production_id, node->done
+        );
+      }
+      printf("\n");
+    }
+  #endif
+
+  // For each non-terminal pattern, determine if the pattern can successfully match,
+  // and identify all of the possible children within the pattern where matching could fail.
+  bool result = true;
+  AnalysisStateSet states = array_new();
+  AnalysisStateSet next_states = array_new();
+  AnalysisStateSet deeper_states = array_new();
+  Array(uint16_t) final_step_indices = array_new();
+  for (unsigned i = 0; i < parent_step_indices.size; i++) {
+    uint16_t parent_step_index = parent_step_indices.contents[i];
+    uint16_t parent_depth = self->steps.contents[parent_step_index].depth;
+    TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol;
+    if (parent_symbol == ts_builtin_sym_error) continue;
+
+    // Find the subgraph that corresponds to this pattern's root symbol. If the pattern's
+    // root symbols is not a non-terminal, then return an error.
+    unsigned subgraph_index, exists;
+    array_search_sorted_by(&subgraphs, .symbol, parent_symbol, &subgraph_index, &exists);
+    if (!exists) {
+      unsigned first_child_step_index = parent_step_index + 1;
+      uint32_t i, exists;
+      array_search_sorted_by(&self->step_offsets, .step_index, first_child_step_index, &i, &exists);
+      assert(exists);
+      *error_offset = self->step_offsets.contents[i].byte_offset;
+      result = false;
+      break;
+    }
+
+    // Initialize an analysis state at every parse state in the table where
+    // this parent symbol can occur.
+    AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index];
+    array_clear(&states);
+    array_clear(&deeper_states);
+    for (unsigned j = 0; j < subgraph->start_states.size; j++) {
+      TSStateId parse_state = subgraph->start_states.contents[j];
+      array_push(&states, ((AnalysisState) {
+        .step_index = parent_step_index + 1,
+        .stack = {
+          [0] = {
+            .parse_state = parse_state,
+            .parent_symbol = parent_symbol,
+            .child_index = 0,
+            .field_id = 0,
+            .done = false,
+          },
+        },
+        .depth = 1,
+      }));
+    }
+
+    // Walk the subgraph for this non-terminal, tracking all of the possible
+    // sequences of progress within the pattern.
+    bool can_finish_pattern = false;
+    bool did_exceed_max_depth = false;
+    unsigned recursion_depth_limit = 0;
+    unsigned prev_final_step_count = 0;
+    array_clear(&final_step_indices);
+    for (;;) {
+      #ifdef DEBUG_ANALYZE_QUERY
+        printf("Final step indices:");
+        for (unsigned j = 0; j < final_step_indices.size; j++) {
+          printf(" %4u", final_step_indices.contents[j]);
+        }
+        printf("\nWalk states for %u %s:\n", i, ts_language_symbol_name(self->language, parent_symbol));
+        for (unsigned j = 0; j < states.size; j++) {
+          AnalysisState *state = &states.contents[j];
+          printf("  %3u: step: %u, stack: [", j, state->step_index);
+          for (unsigned k = 0; k < state->depth; k++) {
+            printf(
+              " {%s, child: %u, state: %4u",
+              self->language->symbol_names[state->stack[k].parent_symbol],
+              state->stack[k].child_index,
+              state->stack[k].parse_state
+            );
+            if (state->stack[k].field_id) printf(", field: %s", self->language->field_names[state->stack[k].field_id]);
+            if (state->stack[k].done) printf(", DONE");
+            printf("}");
+          }
+          printf(" ]\n");
+        }
+      #endif
+
+      if (states.size == 0) {
+        if (deeper_states.size > 0 && final_step_indices.size > prev_final_step_count) {
+          #ifdef DEBUG_ANALYZE_QUERY
+            printf("Increase recursion depth limit to %u\n", recursion_depth_limit + 1);
+          #endif
+
+          prev_final_step_count = final_step_indices.size;
+          recursion_depth_limit++;
+          AnalysisStateSet _states = states;
+          states = deeper_states;
+          deeper_states = _states;
+          continue;
+        }
+
+        break;
+      }
+
+      array_clear(&next_states);
+      for (unsigned j = 0; j < states.size; j++) {
+        AnalysisState * const state = &states.contents[j];
+
+        // For efficiency, it's important to avoid processing the same analysis state more
+        // than once. To achieve this, keep the states in order of ascending position within
+        // their hypothetical syntax trees. In each iteration of this loop, start by advancing
+        // the states that have made the least progress. Avoid advancing states that have already
+        // made more progress.
+        if (next_states.size > 0) {
+          int comparison = analysis_state__compare_position(state, array_back(&next_states));
+          if (comparison == 0) {
+            array_insert_sorted_with(&next_states, analysis_state__compare, *state);
+            continue;
+          } else if (comparison > 0) {
+            while (j < states.size) {
+              array_push(&next_states, states.contents[j]);
+              j++;
+            }
+            break;
+          }
+        }
+
+        const TSStateId parse_state = analysis_state__top(state)->parse_state;
+        const TSSymbol parent_symbol = analysis_state__top(state)->parent_symbol;
+        const TSFieldId parent_field_id = analysis_state__top(state)->field_id;
+        const unsigned child_index = analysis_state__top(state)->child_index;
+        const QueryStep * const step = &self->steps.contents[state->step_index];
+
+        unsigned subgraph_index, exists;
+        array_search_sorted_by(&subgraphs, .symbol, parent_symbol, &subgraph_index, &exists);
+        if (!exists) continue;
+        const AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index];
+
+        // Follow every possible path in the parse table, but only visit states that
+        // are part of the subgraph for the current symbol.
+        LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, parse_state);
+        while (ts_lookahead_iterator_next(&lookahead_iterator)) {
+          TSSymbol sym = lookahead_iterator.symbol;
+
+          TSStateId next_parse_state;
+          if (lookahead_iterator.action_count) {
+            const TSParseAction *action = &lookahead_iterator.actions[lookahead_iterator.action_count - 1];
+            if (action->type == TSParseActionTypeShift && !action->params.shift.extra) {
+              next_parse_state = action->params.shift.state;
+            } else {
+              continue;
+            }
+          } else if (lookahead_iterator.next_state != 0 && lookahead_iterator.next_state != parse_state) {
+            next_parse_state = lookahead_iterator.next_state;
+          } else {
+            continue;
+          }
+
+          AnalysisSubgraphNode successor = {
+            .state = next_parse_state,
+            .child_index = child_index + 1,
+          };
+          unsigned node_index;
+          array_search_sorted_with(
+            &subgraph->nodes,
+            analysis_subgraph_node__compare, &successor,
+            &node_index, &exists
+          );
+          while (node_index < subgraph->nodes.size) {
+            AnalysisSubgraphNode *node = &subgraph->nodes.contents[node_index++];
+            if (node->state != successor.state || node->child_index != successor.child_index) break;
+
+            // Use the subgraph to determine what alias and field will eventually be applied
+            // to this child node.
+            TSSymbol alias = ts_language_alias_at(self->language, node->production_id, child_index);
+            TSSymbol visible_symbol = alias
+              ? alias
+              : self->language->symbol_metadata[sym].visible
+                ? self->language->public_symbol_map[sym]
+                : 0;
+            TSFieldId field_id = parent_field_id;
+            if (!field_id) {
+              const TSFieldMapEntry *field_map, *field_map_end;
+              ts_language_field_map(self->language, node->production_id, &field_map, &field_map_end);
+              for (; field_map != field_map_end; field_map++) {
+                if (!field_map->inherited && field_map->child_index == child_index) {
+                  field_id = field_map->field_id;
+                  break;
+                }
+              }
+            }
+
+            AnalysisState next_state = *state;
+            analysis_state__top(&next_state)->child_index++;
+            analysis_state__top(&next_state)->parse_state = successor.state;
+            if (node->done) analysis_state__top(&next_state)->done = true;
+
+            // Determine if this hypothetical child node would match the current step
+            // of the query pattern.
+            bool does_match = false;
+            if (visible_symbol) {
+              does_match = true;
+              if (step->symbol == NAMED_WILDCARD_SYMBOL) {
+                if (!self->language->symbol_metadata[visible_symbol].named) does_match = false;
+              } else if (step->symbol != WILDCARD_SYMBOL) {
+                if (step->symbol != visible_symbol) does_match = false;
+              }
+              if (step->field && step->field != field_id) {
+                does_match = false;
+              }
+              if (
+                step->supertype_symbol &&
+                !analysis_state__has_supertype(state, step->supertype_symbol)
+              ) does_match = false;
+            }
+
+            // If this is a hidden child, then push a new entry to the stack, in order to
+            // walk through the children of this child.
+            else if (sym >= self->language->token_count) {
+              if (next_state.depth + 1 >= MAX_ANALYSIS_STATE_DEPTH) {
+                did_exceed_max_depth = true;
+                continue;
+              }
+
+              next_state.depth++;
+              analysis_state__top(&next_state)->parse_state = parse_state;
+              analysis_state__top(&next_state)->child_index = 0;
+              analysis_state__top(&next_state)->parent_symbol = sym;
+              analysis_state__top(&next_state)->field_id = field_id;
+              analysis_state__top(&next_state)->done = false;
+
+              if (analysis_state__recursion_depth(&next_state) > recursion_depth_limit) {
+                array_insert_sorted_with(&deeper_states, analysis_state__compare, next_state);
+                continue;
+              }
+            }
+
+            // Pop from the stack when this state reached the end of its current syntax node.
+            while (next_state.depth > 0 && analysis_state__top(&next_state)->done) {
+              next_state.depth--;
+            }
+
+            // If this hypothetical child did match the current step of the query pattern,
+            // then advance to the next step at the current depth. This involves skipping
+            // over any descendant steps of the current child.
+            const QueryStep *next_step = step;
+            if (does_match) {
+              for (;;) {
+                next_state.step_index++;
+                next_step = &self->steps.contents[next_state.step_index];
+                if (
+                  next_step->depth == PATTERN_DONE_MARKER ||
+                  next_step->depth <= parent_depth + 1
+                ) break;
+              }
+            }
+
+            for (;;) {
+              // If this state can make further progress, then add it to the states for the next iteration.
+              // Otherwise, record the fact that matching can fail at this step of the pattern.
+              if (!next_step->is_dead_end) {
+                bool did_finish_pattern = self->steps.contents[next_state.step_index].depth != parent_depth + 1;
+                if (did_finish_pattern) can_finish_pattern = true;
+                if (did_finish_pattern || next_state.depth == 0) {
+                  array_insert_sorted_by(&final_step_indices, , next_state.step_index);
+                } else {
+                  array_insert_sorted_with(&next_states, analysis_state__compare, next_state);
+                }
+              }
+
+              // If the state has advanced to a step with an alternative step, then add another state at
+              // that alternative step to the next iteration.
+              if (
+                does_match &&
+                next_step->alternative_index != NONE &&
+                next_step->alternative_index > next_state.step_index
+              ) {
+                next_state.step_index = next_step->alternative_index;
+                next_step = &self->steps.contents[next_state.step_index];
+              } else {
+                break;
+              }
+            }
+          }
+        }
+      }
+
+      AnalysisStateSet _states = states;
+      states = next_states;
+      next_states = _states;
+    }
+
+    // Mark as indefinite any step where a match terminated.
+    // Later, this property will be propagated to all of the step's predecessors.
+    for (unsigned j = 0; j < final_step_indices.size; j++) {
+      uint32_t final_step_index = final_step_indices.contents[j];
+      QueryStep *step = &self->steps.contents[final_step_index];
+      if (
+        step->depth != PATTERN_DONE_MARKER &&
+        step->depth > parent_depth &&
+        !step->is_dead_end
+      ) {
+        step->is_definite = false;
+      }
+    }
+
+    if (did_exceed_max_depth) {
+      for (unsigned j = parent_step_index + 1; j < self->steps.size; j++) {
+        QueryStep *step = &self->steps.contents[j];
+        if (
+          step->depth <= parent_depth ||
+          step->depth == PATTERN_DONE_MARKER
+        ) break;
+        if (!step->is_dead_end) {
+          step->is_definite = false;
+        }
+      }
+    }
+
+    // If this pattern cannot match, store the pattern index so that it can be
+    // returned to the caller.
+    if (result && !can_finish_pattern && !did_exceed_max_depth) {
+      assert(final_step_indices.size > 0);
+      uint16_t impossible_step_index = *array_back(&final_step_indices);
+      uint32_t i, exists;
+      array_search_sorted_by(&self->step_offsets, .step_index, impossible_step_index, &i, &exists);
+      assert(exists);
+      *error_offset = self->step_offsets.contents[i].byte_offset;
+      result = false;
+      break;
+    }
+  }
+
+  // Mark as indefinite any step with captures that are used in predicates.
+  Array(uint16_t) predicate_capture_ids = array_new();
+  for (unsigned i = 0; i < self->patterns.size; i++) {
+    QueryPattern *pattern = &self->patterns.contents[i];
+
+    // Gather all of the captures that are used in predicates for this pattern.
+    array_clear(&predicate_capture_ids);
+    for (
+      unsigned start = pattern->predicate_steps.offset,
+      end = start + pattern->predicate_steps.length,
+      j = start; j < end; j++
+    ) {
+      TSQueryPredicateStep *step = &self->predicate_steps.contents[j];
+      if (step->type == TSQueryPredicateStepTypeCapture) {
+        array_insert_sorted_by(&predicate_capture_ids, , step->value_id);
+      }
+    }
+
+    // Find all of the steps that have these captures.
+    for (
+      unsigned start = pattern->steps.offset,
+      end = start + pattern->steps.length,
+      j = start; j < end; j++
+    ) {
+      QueryStep *step = &self->steps.contents[j];
+      for (unsigned k = 0; k < MAX_STEP_CAPTURE_COUNT; k++) {
+        uint16_t capture_id = step->capture_ids[k];
+        if (capture_id == NONE) break;
+        unsigned index, exists;
+        array_search_sorted_by(&predicate_capture_ids, , capture_id, &index, &exists);
+        if (exists) {
+          step->is_definite = false;
+          break;
+        }
+      }
+    }
+  }
+
+  // Propagate indefiniteness backwards.
+  bool done = self->steps.size == 0;
+  while (!done) {
+    done = true;
+    for (unsigned i = self->steps.size - 1; i > 0; i--) {
+      QueryStep *step = &self->steps.contents[i];
+
+      // Determine if this step is definite or has definite alternatives.
+      bool is_definite = false;
+      for (;;) {
+        if (step->is_definite) {
+          is_definite = true;
+          break;
+        }
+        if (step->alternative_index == NONE || step->alternative_index < i) {
+          break;
+        }
+        step = &self->steps.contents[step->alternative_index];
+      }
+
+      // If not, mark its predecessor as indefinite.
+      if (!is_definite) {
+        QueryStep *prev_step = &self->steps.contents[i - 1];
+        if (
+          !prev_step->is_dead_end &&
+          prev_step->depth != PATTERN_DONE_MARKER &&
+          prev_step->is_definite
+        ) {
+          prev_step->is_definite = false;
+          done = false;
+        }
+      }
+    }
+  }
+
+  #ifdef DEBUG_ANALYZE_QUERY
+    printf("Steps:\n");
+    for (unsigned i = 0; i < self->steps.size; i++) {
+      QueryStep *step = &self->steps.contents[i];
+      if (step->depth == PATTERN_DONE_MARKER) {
+        printf("  %u: DONE\n", i);
+      } else {
+        printf(
+          "  %u: {symbol: %s, field: %s, is_definite: %d}\n",
+          i,
+          (step->symbol == WILDCARD_SYMBOL || step->symbol == NAMED_WILDCARD_SYMBOL)
+            ? "ANY"
+            : ts_language_symbol_name(self->language, step->symbol),
+          (step->field ? ts_language_field_name_for_id(self->language, step->field) : "-"),
+          step->is_definite
+        );
+      }
+    }
+  #endif
+
+  // Cleanup
+  for (unsigned i = 0; i < subgraphs.size; i++) {
+    array_delete(&subgraphs.contents[i].start_states);
+    array_delete(&subgraphs.contents[i].nodes);
+  }
+  array_delete(&subgraphs);
+  array_delete(&next_nodes);
+  array_delete(&states);
+  array_delete(&next_states);
+  array_delete(&deeper_states);
+  array_delete(&final_step_indices);
+  array_delete(&parent_step_indices);
+  array_delete(&predicate_capture_ids);
+  state_predecessor_map_delete(&predecessor_map);
+
+  return result;
+}
+
+static void ts_query__finalize_steps(TSQuery *self) {
+  for (unsigned i = 0; i < self->steps.size; i++) {
+    QueryStep *step = &self->steps.contents[i];
+    uint32_t depth = step->depth;
+    if (step->capture_ids[0] != NONE) {
+      step->contains_captures = true;
+    } else {
+      step->contains_captures = false;
+      for (unsigned j = i + 1; j < self->steps.size; j++) {
+        QueryStep *s = &self->steps.contents[j];
+        if (s->depth == PATTERN_DONE_MARKER || s->depth <= depth) break;
+        if (s->capture_ids[0] != NONE) step->contains_captures = true;
+      }
+    }
+  }
+}
+
+static TSQueryError ts_query__parse_string_literal(
+  TSQuery *self,
+  Stream *stream
+) {
+  const char *string_start = stream->input;
+  if (stream->next != '"') return TSQueryErrorSyntax;
+  stream_advance(stream);
+  const char *prev_position = stream->input;
+
+  bool is_escaped = false;
+  array_clear(&self->string_buffer);
+  for (;;) {
+    if (is_escaped) {
+      is_escaped = false;
+      switch (stream->next) {
+        case 'n':
+          array_push(&self->string_buffer, '\n');
+          break;
+        case 'r':
+          array_push(&self->string_buffer, '\r');
+          break;
+        case 't':
+          array_push(&self->string_buffer, '\t');
+          break;
+        case '0':
+          array_push(&self->string_buffer, '\0');
+          break;
+        default:
+          array_extend(&self->string_buffer, stream->next_size, stream->input);
+          break;
+      }
+      prev_position = stream->input + stream->next_size;
+    } else {
+      if (stream->next == '\\') {
+        array_extend(&self->string_buffer, (stream->input - prev_position), prev_position);
+        prev_position = stream->input + 1;
+        is_escaped = true;
+      } else if (stream->next == '"') {
+        array_extend(&self->string_buffer, (stream->input - prev_position), prev_position);
+        stream_advance(stream);
+        return TSQueryErrorNone;
+      } else if (stream->next == '\n') {
+        stream_reset(stream, string_start);
+        return TSQueryErrorSyntax;
+      }
+    }
+    if (!stream_advance(stream)) {
+      stream_reset(stream, string_start);
+      return TSQueryErrorSyntax;
+    }
+  }
+}
+
+// Parse a single predicate associated with a pattern, adding it to the
+// query's internal `predicate_steps` array. Predicates are arbitrary
+// S-expressions associated with a pattern which are meant to be handled at
+// a higher level of abstraction, such as the Rust/JavaScript bindings. They
+// can contain '@'-prefixed capture names, double-quoted strings, and bare
+// symbols, which also represent strings.
+static TSQueryError ts_query__parse_predicate(
+  TSQuery *self,
+  Stream *stream
+) {
+  if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax;
+  const char *predicate_name = stream->input;
+  stream_scan_identifier(stream);
+  uint32_t length = stream->input - predicate_name;
+  uint16_t id = symbol_table_insert_name(
+    &self->predicate_values,
+    predicate_name,
+    length
+  );
+  array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
+    .type = TSQueryPredicateStepTypeString,
+    .value_id = id,
+  }));
+  stream_skip_whitespace(stream);
+
+  for (;;) {
+    if (stream->next == ')') {
+      stream_advance(stream);
+      stream_skip_whitespace(stream);
+      array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
+        .type = TSQueryPredicateStepTypeDone,
+        .value_id = 0,
+      }));
+      break;
+    }
+
+    // Parse an '@'-prefixed capture name
+    else if (stream->next == '@') {
+      stream_advance(stream);
+
+      // Parse the capture name
+      if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax;
+      const char *capture_name = stream->input;
+      stream_scan_identifier(stream);
+      uint32_t length = stream->input - capture_name;
+
+      // Add the capture id to the first step of the pattern
+      int capture_id = symbol_table_id_for_name(
+        &self->captures,
+        capture_name,
+        length
+      );
+      if (capture_id == -1) {
+        stream_reset(stream, capture_name);
+        return TSQueryErrorCapture;
+      }
+
+      array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
+        .type = TSQueryPredicateStepTypeCapture,
+        .value_id = capture_id,
+      }));
+    }
+
+    // Parse a string literal
+    else if (stream->next == '"') {
+      TSQueryError e = ts_query__parse_string_literal(self, stream);
+      if (e) return e;
+      uint16_t id = symbol_table_insert_name(
+        &self->predicate_values,
+        self->string_buffer.contents,
+        self->string_buffer.size
+      );
+      array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
+        .type = TSQueryPredicateStepTypeString,
+        .value_id = id,
+      }));
+    }
+
+    // Parse a bare symbol
+    else if (stream_is_ident_start(stream)) {
+      const char *symbol_start = stream->input;
+      stream_scan_identifier(stream);
+      uint32_t length = stream->input - symbol_start;
+      uint16_t id = symbol_table_insert_name(
+        &self->predicate_values,
+        symbol_start,
+        length
+      );
+      array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
+        .type = TSQueryPredicateStepTypeString,
+        .value_id = id,
+      }));
+    }
+
+    else {
+      return TSQueryErrorSyntax;
+    }
+
+    stream_skip_whitespace(stream);
+  }
+
+  return 0;
+}
+
+// Read one S-expression pattern from the stream, and incorporate it into
+// the query's internal state machine representation. For nested patterns,
+// this function calls itself recursively.
+static TSQueryError ts_query__parse_pattern(
+  TSQuery *self,
+  Stream *stream,
+  uint32_t depth,
+  bool is_immediate
+) {
+  if (stream->next == 0) return TSQueryErrorSyntax;
+  if (stream->next == ')' || stream->next == ']') return PARENT_DONE;
+
+  const uint32_t starting_step_index = self->steps.size;
+
+  // Store the byte offset of each step in the query.
+  if (
+    self->step_offsets.size == 0 ||
+    array_back(&self->step_offsets)->step_index != starting_step_index
+  ) {
+    array_push(&self->step_offsets, ((StepOffset) {
+      .step_index = starting_step_index,
+      .byte_offset = stream_offset(stream),
+    }));
+  }
+
+  // An open bracket is the start of an alternation.
+  if (stream->next == '[') {
+    stream_advance(stream);
+    stream_skip_whitespace(stream);
+
+    // Parse each branch, and add a placeholder step in between the branches.
+    Array(uint32_t) branch_step_indices = array_new();
+    for (;;) {
+      uint32_t start_index = self->steps.size;
+      TSQueryError e = ts_query__parse_pattern(
+        self,
+        stream,
+        depth,
+        is_immediate
+      );
+
+      if (e == PARENT_DONE && stream->next == ']' && branch_step_indices.size > 0) {
+        stream_advance(stream);
+        break;
+      } else if (e) {
+        array_delete(&branch_step_indices);
+        return e;
+      }
+
+      array_push(&branch_step_indices, start_index);
+      array_push(&self->steps, query_step__new(0, depth, false));
+    }
+    (void)array_pop(&self->steps);
+
+    // For all of the branches except for the last one, add the subsequent branch as an
+    // alternative, and link the end of the branch to the current end of the steps.
+    for (unsigned i = 0; i < branch_step_indices.size - 1; i++) {
+      uint32_t step_index = branch_step_indices.contents[i];
+      uint32_t next_step_index = branch_step_indices.contents[i + 1];
+      QueryStep *start_step = &self->steps.contents[step_index];
+      QueryStep *end_step = &self->steps.contents[next_step_index - 1];
+      start_step->alternative_index = next_step_index;
+      end_step->alternative_index = self->steps.size;
+      end_step->is_dead_end = true;
+    }
+
+    array_delete(&branch_step_indices);
+  }
+
+  // An open parenthesis can be the start of three possible constructs:
+  // * A grouped sequence
+  // * A predicate
+  // * A named node
+  else if (stream->next == '(') {
+    stream_advance(stream);
+    stream_skip_whitespace(stream);
+
+    // If this parenthesis is followed by a node, then it represents a grouped sequence.
+    if (stream->next == '(' || stream->next == '"' || stream->next == '[') {
+      bool child_is_immediate = false;
+      for (;;) {
+        if (stream->next == '.') {
+          child_is_immediate = true;
+          stream_advance(stream);
+          stream_skip_whitespace(stream);
+        }
+        TSQueryError e = ts_query__parse_pattern(
+          self,
+          stream,
+          depth,
+          child_is_immediate
+        );
+        if (e == PARENT_DONE && stream->next == ')') {
+          stream_advance(stream);
+          break;
+        } else if (e) {
+          return e;
+        }
+
+        child_is_immediate = false;
+      }
+    }
+
+    // A dot/pound character indicates the start of a predicate.
+    else if (stream->next == '.' || stream->next == '#') {
+      stream_advance(stream);
+      return ts_query__parse_predicate(self, stream);
+    }
+
+    // Otherwise, this parenthesis is the start of a named node.
+    else {
+      TSSymbol symbol;
+
+      // TODO - remove.
+      // For temporary backward compatibility, handle '*' as a wildcard.
+      if (stream->next == '*') {
+        symbol = depth > 0 ? NAMED_WILDCARD_SYMBOL : WILDCARD_SYMBOL;
+        stream_advance(stream);
+      }
+
+      // Parse a normal node name
+      else if (stream_is_ident_start(stream)) {
+        const char *node_name = stream->input;
+        stream_scan_identifier(stream);
+        uint32_t length = stream->input - node_name;
+
+        // TODO - remove.
+        // For temporary backward compatibility, handle predicates without the leading '#' sign.
+        if (length > 0 && (node_name[length - 1] == '!' || node_name[length - 1] == '?')) {
+          stream_reset(stream, node_name);
+          return ts_query__parse_predicate(self, stream);
+        }
+
+        // Parse the wildcard symbol
+        else if (length == 1 && node_name[0] == '_') {
+          symbol = depth > 0 ? NAMED_WILDCARD_SYMBOL : WILDCARD_SYMBOL;
+        }
+
+        else {
+          symbol = ts_language_symbol_for_name(
+            self->language,
+            node_name,
+            length,
+            true
+          );
+          if (!symbol) {
+            stream_reset(stream, node_name);
+            return TSQueryErrorNodeType;
+          }
+        }
+      } else {
+        return TSQueryErrorSyntax;
+      }
+
+      // Add a step for the node.
+      array_push(&self->steps, query_step__new(symbol, depth, is_immediate));
+      if (ts_language_symbol_metadata(self->language, symbol).supertype) {
+        QueryStep *step = array_back(&self->steps);
+        step->supertype_symbol = step->symbol;
+        step->symbol = NAMED_WILDCARD_SYMBOL;
+      }
+
+      stream_skip_whitespace(stream);
+
+      if (stream->next == '/') {
+        stream_advance(stream);
+        if (!stream_is_ident_start(stream)) {
+          return TSQueryErrorSyntax;
+        }
+
+        const char *node_name = stream->input;
+        stream_scan_identifier(stream);
+        uint32_t length = stream->input - node_name;
+
+        QueryStep *step = array_back(&self->steps);
+        step->symbol = ts_language_symbol_for_name(
+          self->language,
+          node_name,
+          length,
+          true
+        );
+        if (!step->symbol) {
+          stream_reset(stream, node_name);
+          return TSQueryErrorNodeType;
+        }
+
+        stream_skip_whitespace(stream);
+      }
+
+      // Parse the child patterns
+      bool child_is_immediate = false;
+      uint16_t child_start_step_index = self->steps.size;
+      for (;;) {
+        if (stream->next == '.') {
+          child_is_immediate = true;
+          stream_advance(stream);
+          stream_skip_whitespace(stream);
+        }
+
+        TSQueryError e = ts_query__parse_pattern(
+          self,
+          stream,
+          depth + 1,
+          child_is_immediate
+        );
+        if (e == PARENT_DONE && stream->next == ')') {
+          if (child_is_immediate) {
+            self->steps.contents[child_start_step_index].is_last_child = true;
+          }
+          stream_advance(stream);
+          break;
+        } else if (e) {
+          return e;
+        }
+
+        child_is_immediate = false;
+      }
+    }
+  }
+
+  // Parse a wildcard pattern
+  else if (
+    stream->next == '_' ||
+
+    // TODO remove.
+    // For temporary backward compatibility, handle '*' as a wildcard.
+    stream->next == '*'
+  ) {
+    stream_advance(stream);
+    stream_skip_whitespace(stream);
+
+    // Add a step that matches any kind of node
+    array_push(&self->steps, query_step__new(WILDCARD_SYMBOL, depth, is_immediate));
+  }
+
+  // Parse a double-quoted anonymous leaf node expression
+  else if (stream->next == '"') {
+    const char *string_start = stream->input;
+    TSQueryError e = ts_query__parse_string_literal(self, stream);
+    if (e) return e;
+
+    // Add a step for the node
+    TSSymbol symbol = ts_language_symbol_for_name(
+      self->language,
+      self->string_buffer.contents,
+      self->string_buffer.size,
+      false
+    );
+    if (!symbol) {
+      stream_reset(stream, string_start + 1);
+      return TSQueryErrorNodeType;
+    }
+    array_push(&self->steps, query_step__new(symbol, depth, is_immediate));
+  }
+
+  // Parse a field-prefixed pattern
+  else if (stream_is_ident_start(stream)) {
+    // Parse the field name
+    const char *field_name = stream->input;
+    stream_scan_identifier(stream);
+    uint32_t length = stream->input - field_name;
+    stream_skip_whitespace(stream);
+
+    if (stream->next != ':') {
+      stream_reset(stream, field_name);
+      return TSQueryErrorSyntax;
+    }
+    stream_advance(stream);
+    stream_skip_whitespace(stream);
+
+    // Parse the pattern
+    TSQueryError e = ts_query__parse_pattern(
+      self,
+      stream,
+      depth,
+      is_immediate
+    );
+    if (e == PARENT_DONE) return TSQueryErrorSyntax;
+    if (e) return e;
+
+    // Add the field name to the first step of the pattern
+    TSFieldId field_id = ts_language_field_id_for_name(
+      self->language,
+      field_name,
+      length
+    );
+    if (!field_id) {
+      stream->input = field_name;
+      return TSQueryErrorField;
+    }
+
+    uint32_t step_index = starting_step_index;
+    QueryStep *step = &self->steps.contents[step_index];
+    for (;;) {
+      step->field = field_id;
+      if (
+        step->alternative_index != NONE &&
+        step->alternative_index > step_index &&
+        step->alternative_index < self->steps.size
+      ) {
+        step_index = step->alternative_index;
+        step = &self->steps.contents[step_index];
+      } else {
+        break;
+      }
+    }
+  }
+
+  else {
+    return TSQueryErrorSyntax;
+  }
+
+  stream_skip_whitespace(stream);
+
+  // Parse suffixes modifiers for this pattern
+  for (;;) {
+    QueryStep *step = &self->steps.contents[starting_step_index];
+
+    // Parse the one-or-more operator.
+    if (stream->next == '+') {
+      stream_advance(stream);
+      stream_skip_whitespace(stream);
+
+      QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false);
+      repeat_step.alternative_index = starting_step_index;
+      repeat_step.is_pass_through = true;
+      repeat_step.alternative_is_immediate = true;
+      array_push(&self->steps, repeat_step);
+    }
+
+    // Parse the zero-or-more repetition operator.
+    else if (stream->next == '*') {
+      stream_advance(stream);
+      stream_skip_whitespace(stream);
+
+      QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false);
+      repeat_step.alternative_index = starting_step_index;
+      repeat_step.is_pass_through = true;
+      repeat_step.alternative_is_immediate = true;
+      array_push(&self->steps, repeat_step);
+
+      while (step->alternative_index != NONE) {
+        step = &self->steps.contents[step->alternative_index];
+      }
+      step->alternative_index = self->steps.size;
+    }
+
+    // Parse the optional operator.
+    else if (stream->next == '?') {
+      stream_advance(stream);
+      stream_skip_whitespace(stream);
+
+      while (step->alternative_index != NONE) {
+        step = &self->steps.contents[step->alternative_index];
+      }
+      step->alternative_index = self->steps.size;
+    }
+
+    // Parse an '@'-prefixed capture pattern
+    else if (stream->next == '@') {
+      stream_advance(stream);
+      if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax;
+      const char *capture_name = stream->input;
+      stream_scan_identifier(stream);
+      uint32_t length = stream->input - capture_name;
+      stream_skip_whitespace(stream);
+
+      // Add the capture id to the first step of the pattern
+      uint16_t capture_id = symbol_table_insert_name(
+        &self->captures,
+        capture_name,
+        length
+      );
+
+      uint32_t step_index = starting_step_index;
+      for (;;) {
+        query_step__add_capture(step, capture_id);
+        if (
+          step->alternative_index != NONE &&
+          step->alternative_index > step_index &&
+          step->alternative_index < self->steps.size
+        ) {
+          step_index = step->alternative_index;
+          step = &self->steps.contents[step_index];
+        } else {
+          break;
+        }
+      }
+    }
+
+    // No more suffix modifiers
+    else {
+      break;
+    }
+  }
+
+  return 0;
+}
+
+TSQuery *ts_query_new(
+  const TSLanguage *language,
+  const char *source,
+  uint32_t source_len,
+  uint32_t *error_offset,
+  TSQueryError *error_type
+) {
+  TSSymbol *symbol_map;
+  if (ts_language_version(language) >= TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING) {
+    symbol_map = NULL;
+  } else {
+    // Work around the fact that multiple symbols can currently be
+    // associated with the same name, due to "simple aliases".
+    // In the next language ABI version, this map will be contained
+    // in the language's `public_symbol_map` field.
+    uint32_t symbol_count = ts_language_symbol_count(language);
+    symbol_map = ts_malloc(sizeof(TSSymbol) * symbol_count);
+    for (unsigned i = 0; i < symbol_count; i++) {
+      const char *name = ts_language_symbol_name(language, i);
+      const TSSymbolType symbol_type = ts_language_symbol_type(language, i);
+
+      symbol_map[i] = i;
+
+      for (unsigned j = 0; j < i; j++) {
+        if (ts_language_symbol_type(language, j) == symbol_type) {
+          if (!strcmp(name, ts_language_symbol_name(language, j))) {
+            symbol_map[i] = j;
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  TSQuery *self = ts_malloc(sizeof(TSQuery));
+  *self = (TSQuery) {
+    .steps = array_new(),
+    .pattern_map = array_new(),
+    .captures = symbol_table_new(),
+    .predicate_values = symbol_table_new(),
+    .predicate_steps = array_new(),
+    .patterns = array_new(),
+    .step_offsets = array_new(),
+    .string_buffer = array_new(),
+    .symbol_map = symbol_map,
+    .wildcard_root_pattern_count = 0,
+    .language = language,
+  };
+
+  // Parse all of the S-expressions in the given string.
+  Stream stream = stream_new(source, source_len);
+  stream_skip_whitespace(&stream);
+  while (stream.input < stream.end) {
+    uint32_t pattern_index = self->patterns.size;
+    uint32_t start_step_index = self->steps.size;
+    uint32_t start_predicate_step_index = self->predicate_steps.size;
+    array_push(&self->patterns, ((QueryPattern) {
+      .steps = (Slice) {.offset = start_step_index},
+      .predicate_steps = (Slice) {.offset = start_predicate_step_index},
+      .start_byte = stream_offset(&stream),
+    }));
+    *error_type = ts_query__parse_pattern(self, &stream, 0, false);
+    array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER, false));
+
+    QueryPattern *pattern = array_back(&self->patterns);
+    pattern->steps.length = self->steps.size - start_step_index;
+    pattern->predicate_steps.length = self->predicate_steps.size - start_predicate_step_index;
+
+    // If any pattern could not be parsed, then report the error information
+    // and terminate.
+    if (*error_type) {
+      if (*error_type == PARENT_DONE) *error_type = TSQueryErrorSyntax;
+      *error_offset = stream_offset(&stream);
+      ts_query_delete(self);
+      return NULL;
+    }
+
+    // Maintain a map that can look up patterns for a given root symbol.
+    uint16_t wildcard_root_alternative_index = NONE;
+    for (;;) {
+      QueryStep *step = &self->steps.contents[start_step_index];
+
+      // If a pattern has a wildcard at its root, but it has a non-wildcard child,
+      // then optimize the matching process by skipping matching the wildcard.
+      // Later, during the matching process, the query cursor will check that
+      // there is a parent node, and capture it if necessary.
+      if (step->symbol == WILDCARD_SYMBOL && step->depth == 0) {
+        QueryStep *second_step = &self->steps.contents[start_step_index + 1];
+        if (second_step->symbol != WILDCARD_SYMBOL && second_step->depth == 1) {
+          wildcard_root_alternative_index = step->alternative_index;
+          start_step_index += 1;
+          step = second_step;
+        }
+      }
+
+      ts_query__pattern_map_insert(self, step->symbol, start_step_index, pattern_index);
+      if (step->symbol == WILDCARD_SYMBOL) {
+        self->wildcard_root_pattern_count++;
+      }
+
+      // If there are alternatives or options at the root of the pattern,
+      // then add multiple entries to the pattern map.
+      if (step->alternative_index != NONE) {
+        start_step_index = step->alternative_index;
+        step->alternative_index = NONE;
+      } else if (wildcard_root_alternative_index != NONE) {
+        start_step_index = wildcard_root_alternative_index;
+        wildcard_root_alternative_index = NONE;
+      } else {
+        break;
+      }
+    }
+  }
+
+  if (self->language->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_STATE_COUNT) {
+    if (!ts_query__analyze_patterns(self, error_offset)) {
+      *error_type = TSQueryErrorStructure;
+      ts_query_delete(self);
+      return NULL;
+    }
+  }
+
+  ts_query__finalize_steps(self);
+  array_delete(&self->string_buffer);
+  return self;
+}
+
+void ts_query_delete(TSQuery *self) {
+  if (self) {
+    array_delete(&self->steps);
+    array_delete(&self->pattern_map);
+    array_delete(&self->predicate_steps);
+    array_delete(&self->patterns);
+    array_delete(&self->step_offsets);
+    array_delete(&self->string_buffer);
+    symbol_table_delete(&self->captures);
+    symbol_table_delete(&self->predicate_values);
+    ts_free(self->symbol_map);
+    ts_free(self);
+  }
+}
+
+uint32_t ts_query_pattern_count(const TSQuery *self) {
+  return self->patterns.size;
+}
+
+uint32_t ts_query_capture_count(const TSQuery *self) {
+  return self->captures.slices.size;
+}
+
+uint32_t ts_query_string_count(const TSQuery *self) {
+  return self->predicate_values.slices.size;
+}
+
+const char *ts_query_capture_name_for_id(
+  const TSQuery *self,
+  uint32_t index,
+  uint32_t *length
+) {
+  return symbol_table_name_for_id(&self->captures, index, length);
+}
+
+const char *ts_query_string_value_for_id(
+  const TSQuery *self,
+  uint32_t index,
+  uint32_t *length
+) {
+  return symbol_table_name_for_id(&self->predicate_values, index, length);
+}
+
+const TSQueryPredicateStep *ts_query_predicates_for_pattern(
+  const TSQuery *self,
+  uint32_t pattern_index,
+  uint32_t *step_count
+) {
+  Slice slice = self->patterns.contents[pattern_index].predicate_steps;
+  *step_count = slice.length;
+  if (self->predicate_steps.contents == NULL) {
+    return NULL;
+  }
+  return &self->predicate_steps.contents[slice.offset];
+}
+
+uint32_t ts_query_start_byte_for_pattern(
+  const TSQuery *self,
+  uint32_t pattern_index
+) {
+  return self->patterns.contents[pattern_index].start_byte;
+}
+
+bool ts_query_step_is_definite(
+  const TSQuery *self,
+  uint32_t byte_offset
+) {
+  uint32_t step_index = UINT32_MAX;
+  for (unsigned i = 0; i < self->step_offsets.size; i++) {
+    StepOffset *step_offset = &self->step_offsets.contents[i];
+    if (step_offset->byte_offset > byte_offset) break;
+    step_index = step_offset->step_index;
+  }
+  if (step_index < self->steps.size) {
+    return self->steps.contents[step_index].is_definite;
+  } else {
+    return false;
+  }
+}
+
+void ts_query_disable_capture(
+  TSQuery *self,
+  const char *name,
+  uint32_t length
+) {
+  // Remove capture information for any pattern step that previously
+  // captured with the given name.
+  int id = symbol_table_id_for_name(&self->captures, name, length);
+  if (id != -1) {
+    for (unsigned i = 0; i < self->steps.size; i++) {
+      QueryStep *step = &self->steps.contents[i];
+      query_step__remove_capture(step, id);
+    }
+    ts_query__finalize_steps(self);
+  }
+}
+
+void ts_query_disable_pattern(
+  TSQuery *self,
+  uint32_t pattern_index
+) {
+  // Remove the given pattern from the pattern map. Its steps will still
+  // be in the `steps` array, but they will never be read.
+  for (unsigned i = 0; i < self->pattern_map.size; i++) {
+    PatternEntry *pattern = &self->pattern_map.contents[i];
+    if (pattern->pattern_index == pattern_index) {
+      array_erase(&self->pattern_map, i);
+      i--;
+    }
+  }
+}
+
+/***************
+ * QueryCursor
+ ***************/
+
+TSQueryCursor *ts_query_cursor_new(void) {
+  TSQueryCursor *self = ts_malloc(sizeof(TSQueryCursor));
+  *self = (TSQueryCursor) {
+    .ascending = false,
+    .halted = false,
+    .states = array_new(),
+    .finished_states = array_new(),
+    .capture_list_pool = capture_list_pool_new(),
+    .start_byte = 0,
+    .end_byte = UINT32_MAX,
+    .start_point = {0, 0},
+    .end_point = POINT_MAX,
+  };
+  array_reserve(&self->states, 8);
+  array_reserve(&self->finished_states, 8);
+  return self;
+}
+
+void ts_query_cursor_delete(TSQueryCursor *self) {
+  array_delete(&self->states);
+  array_delete(&self->finished_states);
+  ts_tree_cursor_delete(&self->cursor);
+  capture_list_pool_delete(&self->capture_list_pool);
+  ts_free(self);
+}
+
+void ts_query_cursor_exec(
+  TSQueryCursor *self,
+  const TSQuery *query,
+  TSNode node
+) {
+  array_clear(&self->states);
+  array_clear(&self->finished_states);
+  ts_tree_cursor_reset(&self->cursor, node);
+  capture_list_pool_reset(&self->capture_list_pool);
+  self->next_state_id = 0;
+  self->depth = 0;
+  self->ascending = false;
+  self->halted = false;
+  self->query = query;
+}
+
+void ts_query_cursor_set_byte_range(
+  TSQueryCursor *self,
+  uint32_t start_byte,
+  uint32_t end_byte
+) {
+  if (end_byte == 0) {
+    start_byte = 0;
+    end_byte = UINT32_MAX;
+  }
+  self->start_byte = start_byte;
+  self->end_byte = end_byte;
+}
+
+void ts_query_cursor_set_point_range(
+  TSQueryCursor *self,
+  TSPoint start_point,
+  TSPoint end_point
+) {
+  if (end_point.row == 0 && end_point.column == 0) {
+    start_point = POINT_ZERO;
+    end_point = POINT_MAX;
+  }
+  self->start_point = start_point;
+  self->end_point = end_point;
+}
+
+// Search through all of the in-progress states, and find the captured
+// node that occurs earliest in the document.
+static bool ts_query_cursor__first_in_progress_capture(
+  TSQueryCursor *self,
+  uint32_t *state_index,
+  uint32_t *byte_offset,
+  uint32_t *pattern_index,
+  bool *is_definite
+) {
+  bool result = false;
+  *state_index = UINT32_MAX;
+  *byte_offset = UINT32_MAX;
+  *pattern_index = UINT32_MAX;
+  for (unsigned i = 0; i < self->states.size; i++) {
+    const QueryState *state = &self->states.contents[i];
+    if (state->dead) continue;
+    const CaptureList *captures = capture_list_pool_get(
+      &self->capture_list_pool,
+      state->capture_list_id
+    );
+    if (captures->size > state->consumed_capture_count) {
+      uint32_t capture_byte = ts_node_start_byte(captures->contents[state->consumed_capture_count].node);
+      if (
+        !result ||
+        capture_byte < *byte_offset ||
+        (capture_byte == *byte_offset && state->pattern_index < *pattern_index)
+      ) {
+        QueryStep *step = &self->query->steps.contents[state->step_index];
+        if (is_definite) {
+          *is_definite = step->is_definite;
+        } else if (step->is_definite) {
+          continue;
+        }
+
+        result = true;
+        *state_index = i;
+        *byte_offset = capture_byte;
+        *pattern_index = state->pattern_index;
+      }
+    }
+  }
+  return result;
+}
+
+// Determine which node is first in a depth-first traversal
+int ts_query_cursor__compare_nodes(TSNode left, TSNode right) {
+  if (left.id != right.id) {
+    uint32_t left_start = ts_node_start_byte(left);
+    uint32_t right_start = ts_node_start_byte(right);
+    if (left_start < right_start) return -1;
+    if (left_start > right_start) return 1;
+    uint32_t left_node_count = ts_node_end_byte(left);
+    uint32_t right_node_count = ts_node_end_byte(right);
+    if (left_node_count > right_node_count) return -1;
+    if (left_node_count < right_node_count) return 1;
+  }
+  return 0;
+}
+
+// Determine if either state contains a superset of the other state's captures.
+void ts_query_cursor__compare_captures(
+  TSQueryCursor *self,
+  QueryState *left_state,
+  QueryState *right_state,
+  bool *left_contains_right,
+  bool *right_contains_left
+) {
+  const CaptureList *left_captures = capture_list_pool_get(
+    &self->capture_list_pool,
+    left_state->capture_list_id
+  );
+  const CaptureList *right_captures = capture_list_pool_get(
+    &self->capture_list_pool,
+    right_state->capture_list_id
+  );
+  *left_contains_right = true;
+  *right_contains_left = true;
+  unsigned i = 0, j = 0;
+  for (;;) {
+    if (i < left_captures->size) {
+      if (j < right_captures->size) {
+        TSQueryCapture *left = &left_captures->contents[i];
+        TSQueryCapture *right = &right_captures->contents[j];
+        if (left->node.id == right->node.id && left->index == right->index) {
+          i++;
+          j++;
+        } else {
+          switch (ts_query_cursor__compare_nodes(left->node, right->node)) {
+            case -1:
+              *right_contains_left = false;
+              i++;
+              break;
+            case 1:
+              *left_contains_right = false;
+              j++;
+              break;
+            default:
+              *right_contains_left = false;
+              *left_contains_right = false;
+              i++;
+              j++;
+              break;
+          }
+        }
+      } else {
+        *right_contains_left = false;
+        break;
+      }
+    } else {
+      if (j < right_captures->size) {
+        *left_contains_right = false;
+      }
+      break;
+    }
+  }
+}
+
+static void ts_query_cursor__add_state(
+  TSQueryCursor *self,
+  const PatternEntry *pattern
+) {
+  QueryStep *step = &self->query->steps.contents[pattern->step_index];
+  uint32_t start_depth = self->depth - step->depth;
+
+  // Keep the states array in ascending order of start_depth and pattern_index,
+  // so that it can be processed more efficiently elsewhere. Usually, there is
+  // no work to do here because of two facts:
+  // * States with lower start_depth are naturally added first due to the
+  //   order in which nodes are visited.
+  // * Earlier patterns are naturally added first because of the ordering of the
+  //   pattern_map data structure that's used to initiate matches.
+  //
+  // This loop is only needed in cases where two conditions hold:
+  // * A pattern consists of more than one sibling node, so that its states
+  //   remain in progress after exiting the node that started the match.
+  // * The first node in the pattern matches against multiple nodes at the
+  //   same depth.
+  //
+  // An example of this is the pattern '((comment)* (function))'. If multiple
+  // `comment` nodes appear in a row, then we may initiate a new state for this
+  // pattern while another state for the same pattern is already in progress.
+  // If there are multiple patterns like this in a query, then this loop will
+  // need to execute in order to keep the states ordered by pattern_index.
+  uint32_t index = self->states.size;
+  while (index > 0) {
+    QueryState *prev_state = &self->states.contents[index - 1];
+    if (prev_state->start_depth < start_depth) break;
+    if (prev_state->start_depth == start_depth) {
+      if (prev_state->pattern_index < pattern->pattern_index) break;
+      if (prev_state->pattern_index == pattern->pattern_index) {
+        // Avoid inserting an unnecessary duplicate state, which would be
+        // immediately pruned by the longest-match criteria.
+        if (prev_state->step_index == pattern->step_index) return;
+      }
+    }
+    index--;
+  }
+
+  LOG(
+    "  start state. pattern:%u, step:%u\n",
+    pattern->pattern_index,
+    pattern->step_index
+  );
+  array_insert(&self->states, index, ((QueryState) {
+    .capture_list_id = NONE,
+    .step_index = pattern->step_index,
+    .pattern_index = pattern->pattern_index,
+    .start_depth = start_depth,
+    .consumed_capture_count = 0,
+    .seeking_immediate_match = true,
+    .has_in_progress_alternatives = false,
+    .needs_parent = step->depth == 1,
+    .dead = false,
+  }));
+}
+
+// Acquire a capture list for this state. If there are no capture lists left in the
+// pool, this will steal the capture list from another existing state, and mark that
+// other state as 'dead'.
+static CaptureList *ts_query_cursor__prepare_to_capture(
+  TSQueryCursor *self,
+  QueryState *state,
+  unsigned state_index_to_preserve
+) {
+  if (state->capture_list_id == NONE) {
+    state->capture_list_id = capture_list_pool_acquire(&self->capture_list_pool);
+
+    // If there are no capture lists left in the pool, then terminate whichever
+    // state has captured the earliest node in the document, and steal its
+    // capture list.
+    if (state->capture_list_id == NONE) {
+      uint32_t state_index, byte_offset, pattern_index;
+      if (
+        ts_query_cursor__first_in_progress_capture(
+          self,
+          &state_index,
+          &byte_offset,
+          &pattern_index,
+          NULL
+        ) &&
+        state_index != state_index_to_preserve
+      ) {
+        LOG(
+          "  abandon state. index:%u, pattern:%u, offset:%u.\n",
+          state_index, pattern_index, byte_offset
+        );
+        QueryState *other_state = &self->states.contents[state_index];
+        state->capture_list_id = other_state->capture_list_id;
+        other_state->capture_list_id = NONE;
+        other_state->dead = true;
+        CaptureList *list = capture_list_pool_get_mut(
+          &self->capture_list_pool,
+          state->capture_list_id
+        );
+        array_clear(list);
+        return list;
+      } else {
+        LOG("  ran out of capture lists");
+        return NULL;
+      }
+    }
+  }
+  return capture_list_pool_get_mut(&self->capture_list_pool, state->capture_list_id);
+}
+
+static void ts_query_cursor__capture(
+  TSQueryCursor *self,
+  QueryState *state,
+  QueryStep *step,
+  TSNode node
+) {
+  if (state->dead) return;
+  CaptureList *capture_list = ts_query_cursor__prepare_to_capture(self, state, UINT32_MAX);
+  if (!capture_list) {
+    state->dead = true;
+    return;
+  }
+
+  for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) {
+    uint16_t capture_id = step->capture_ids[j];
+    if (step->capture_ids[j] == NONE) break;
+    array_push(capture_list, ((TSQueryCapture) { node, capture_id }));
+    LOG(
+      "  capture node. type:%s, pattern:%u, capture_id:%u, capture_count:%u\n",
+      ts_node_type(node),
+      state->pattern_index,
+      capture_id,
+      capture_list->size
+    );
+  }
+}
+
+// Duplicate the given state and insert the newly-created state immediately after
+// the given state in the `states` array. Ensures that the given state reference is
+// still valid, even if the states array is reallocated.
+static QueryState *ts_query_cursor__copy_state(
+  TSQueryCursor *self,
+  QueryState **state_ref
+) {
+  const QueryState *state = *state_ref;
+  uint32_t state_index = state - self->states.contents;
+  QueryState copy = *state;
+  copy.capture_list_id = NONE;
+
+  // If the state has captures, copy its capture list.
+  if (state->capture_list_id != NONE) {
+    CaptureList *new_captures = ts_query_cursor__prepare_to_capture(self, &copy, state_index);
+    if (!new_captures) return NULL;
+    const CaptureList *old_captures = capture_list_pool_get(
+      &self->capture_list_pool,
+      state->capture_list_id
+    );
+    array_push_all(new_captures, old_captures);
+  }
+
+  array_insert(&self->states, state_index + 1, copy);
+  *state_ref = &self->states.contents[state_index];
+  return &self->states.contents[state_index + 1];
+}
+
+// Walk the tree, processing patterns until at least one pattern finishes,
+// If one or more patterns finish, return `true` and store their states in the
+// `finished_states` array. Multiple patterns can finish on the same node. If
+// there are no more matches, return `false`.
+static inline bool ts_query_cursor__advance(
+  TSQueryCursor *self,
+  bool stop_on_definite_step
+) {
+  bool did_match = false;
+  for (;;) {
+    if (self->halted) {
+      while (self->states.size > 0) {
+        QueryState state = array_pop(&self->states);
+        capture_list_pool_release(
+          &self->capture_list_pool,
+          state.capture_list_id
+        );
+      }
+    }
+
+    if (did_match || self->halted) return did_match;
+
+    // Exit the current node.
+    if (self->ascending) {
+      LOG("leave node. type:%s\n", ts_node_type(ts_tree_cursor_current_node(&self->cursor)));
+
+      // Leave this node by stepping to its next sibling or to its parent.
+      if (ts_tree_cursor_goto_next_sibling(&self->cursor)) {
+        self->ascending = false;
+      } else if (ts_tree_cursor_goto_parent(&self->cursor)) {
+        self->depth--;
+      } else {
+        LOG("halt at root");
+        self->halted = true;
+      }
+
+      // After leaving a node, remove any states that cannot make further progress.
+      uint32_t deleted_count = 0;
+      for (unsigned i = 0, n = self->states.size; i < n; i++) {
+        QueryState *state = &self->states.contents[i];
+        QueryStep *step = &self->query->steps.contents[state->step_index];
+
+        // If a state completed its pattern inside of this node, but was deferred from finishing
+        // in order to search for longer matches, mark it as finished.
+        if (step->depth == PATTERN_DONE_MARKER) {
+          if (state->start_depth > self->depth || self->halted) {
+            LOG("  finish pattern %u\n", state->pattern_index);
+            state->id = self->next_state_id++;
+            array_push(&self->finished_states, *state);
+            did_match = true;
+            deleted_count++;
+            continue;
+          }
+        }
+
+        // If a state needed to match something within this node, then remove that state
+        // as it has failed to match.
+        else if ((uint32_t)state->start_depth + (uint32_t)step->depth > self->depth) {
+          LOG(
+            "  failed to match. pattern:%u, step:%u\n",
+            state->pattern_index,
+            state->step_index
+          );
+          capture_list_pool_release(
+            &self->capture_list_pool,
+            state->capture_list_id
+          );
+          deleted_count++;
+          continue;
+        }
+
+        if (deleted_count > 0) {
+          self->states.contents[i - deleted_count] = *state;
+        }
+      }
+      self->states.size -= deleted_count;
+    }
+
+    // Enter a new node.
+    else {
+      // If this node is before the selected range, then avoid descending into it.
+      TSNode node = ts_tree_cursor_current_node(&self->cursor);
+      if (
+        ts_node_end_byte(node) <= self->start_byte ||
+        point_lte(ts_node_end_point(node), self->start_point)
+      ) {
+        if (!ts_tree_cursor_goto_next_sibling(&self->cursor)) {
+          self->ascending = true;
+        }
+        continue;
+      }
+
+      // If this node is after the selected range, then stop walking.
+      if (
+        self->end_byte <= ts_node_start_byte(node) ||
+        point_lte(self->end_point, ts_node_start_point(node))
+      ) {
+        LOG("halt at end of range");
+        self->halted = true;
+        continue;
+      }
+
+      // Get the properties of the current node.
+      TSSymbol symbol = ts_node_symbol(node);
+      bool is_named = ts_node_is_named(node);
+      if (symbol != ts_builtin_sym_error && self->query->symbol_map) {
+        symbol = self->query->symbol_map[symbol];
+      }
+      bool has_later_siblings;
+      bool has_later_named_siblings;
+      bool can_have_later_siblings_with_this_field;
+      TSFieldId field_id = 0;
+      TSSymbol supertypes[8] = {0};
+      unsigned supertype_count = 8;
+      ts_tree_cursor_current_status(
+        &self->cursor,
+        &field_id,
+        &has_later_siblings,
+        &has_later_named_siblings,
+        &can_have_later_siblings_with_this_field,
+        supertypes,
+        &supertype_count
+      );
+      LOG(
+        "enter node. type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u\n",
+        ts_node_type(node),
+        ts_language_field_name_for_id(self->query->language, field_id),
+        ts_node_start_point(node).row,
+        self->states.size,
+        self->finished_states.size
+      );
+
+      // Add new states for any patterns whose root node is a wildcard.
+      for (unsigned i = 0; i < self->query->wildcard_root_pattern_count; i++) {
+        PatternEntry *pattern = &self->query->pattern_map.contents[i];
+        QueryStep *step = &self->query->steps.contents[pattern->step_index];
+
+        // If this node matches the first step of the pattern, then add a new
+        // state at the start of this pattern.
+        if (step->field && field_id != step->field) continue;
+        if (step->supertype_symbol && !supertype_count) continue;
+        ts_query_cursor__add_state(self, pattern);
+      }
+
+      // Add new states for any patterns whose root node matches this node.
+      unsigned i;
+      if (ts_query__pattern_map_search(self->query, symbol, &i)) {
+        PatternEntry *pattern = &self->query->pattern_map.contents[i];
+        QueryStep *step = &self->query->steps.contents[pattern->step_index];
+        do {
+          // If this node matches the first step of the pattern, then add a new
+          // state at the start of this pattern.
+          if (step->field && field_id != step->field) continue;
+          ts_query_cursor__add_state(self, pattern);
+
+          // Advance to the next pattern whose root node matches this node.
+          i++;
+          if (i == self->query->pattern_map.size) break;
+          pattern = &self->query->pattern_map.contents[i];
+          step = &self->query->steps.contents[pattern->step_index];
+        } while (step->symbol == symbol);
+      }
+
+      // Update all of the in-progress states with current node.
+      for (unsigned i = 0, copy_count = 0; i < self->states.size; i += 1 + copy_count) {
+        QueryState *state = &self->states.contents[i];
+        QueryStep *step = &self->query->steps.contents[state->step_index];
+        state->has_in_progress_alternatives = false;
+        copy_count = 0;
+
+        // Check that the node matches all of the criteria for the next
+        // step of the pattern.
+        if ((uint32_t)state->start_depth + (uint32_t)step->depth != self->depth) continue;
+
+        // Determine if this node matches this step of the pattern, and also
+        // if this node can have later siblings that match this step of the
+        // pattern.
+        bool node_does_match =
+          step->symbol == symbol ||
+          step->symbol == WILDCARD_SYMBOL ||
+          (step->symbol == NAMED_WILDCARD_SYMBOL && is_named);
+        bool later_sibling_can_match = has_later_siblings;
+        if ((step->is_immediate && is_named) || state->seeking_immediate_match) {
+          later_sibling_can_match = false;
+        }
+        if (step->is_last_child && has_later_named_siblings) {
+          node_does_match = false;
+        }
+        if (step->supertype_symbol) {
+          bool has_supertype = false;
+          for (unsigned j = 0; j < supertype_count; j++) {
+            if (supertypes[j] == step->supertype_symbol) {
+              has_supertype = true;
+              break;
+            }
+          }
+          if (!has_supertype) node_does_match = false;
+        }
+        if (step->field) {
+          if (step->field == field_id) {
+            if (!can_have_later_siblings_with_this_field) {
+              later_sibling_can_match = false;
+            }
+          } else {
+            node_does_match = false;
+          }
+        }
+
+        // Remove states immediately if it is ever clear that they cannot match.
+        if (!node_does_match) {
+          if (!later_sibling_can_match) {
+            LOG(
+              "  discard state. pattern:%u, step:%u\n",
+              state->pattern_index,
+              state->step_index
+            );
+            capture_list_pool_release(
+              &self->capture_list_pool,
+              state->capture_list_id
+            );
+            array_erase(&self->states, i);
+            i--;
+          }
+          continue;
+        }
+
+        // Some patterns can match their root node in multiple ways, capturing different
+        // children. If this pattern step could match later children within the same
+        // parent, then this query state cannot simply be updated in place. It must be
+        // split into two states: one that matches this node, and one which skips over
+        // this node, to preserve the possibility of matching later siblings.
+        if (later_sibling_can_match && step->contains_captures) {
+          if (ts_query_cursor__copy_state(self, &state)) {
+            LOG(
+              "  split state for capture. pattern:%u, step:%u\n",
+              state->pattern_index,
+              state->step_index
+            );
+            copy_count++;
+          }
+        }
+
+        // If this pattern started with a wildcard, such that the pattern map
+        // actually points to the *second* step of the pattern, then check
+        // that the node has a parent, and capture the parent node if necessary.
+        if (state->needs_parent) {
+          TSNode parent = ts_tree_cursor_parent_node(&self->cursor);
+          if (ts_node_is_null(parent)) {
+            LOG("  missing parent node\n");
+            state->dead = true;
+          } else {
+            state->needs_parent = false;
+            QueryStep *skipped_wildcard_step = step;
+            do {
+              skipped_wildcard_step--;
+            } while (
+              skipped_wildcard_step->is_dead_end ||
+              skipped_wildcard_step->is_pass_through ||
+              skipped_wildcard_step->depth > 0
+            );
+            if (skipped_wildcard_step->capture_ids[0] != NONE) {
+              LOG("  capture wildcard parent\n");
+              ts_query_cursor__capture(
+                self,
+                state,
+                skipped_wildcard_step,
+                parent
+              );
+            }
+          }
+        }
+
+        // If the current node is captured in this pattern, add it to the capture list.
+        if (step->capture_ids[0] != NONE) {
+          ts_query_cursor__capture(self, state, step, node);
+        }
+
+        if (state->dead) {
+          array_erase(&self->states, i);
+          i--;
+          continue;
+        }
+
+        // Advance this state to the next step of its pattern.
+        state->step_index++;
+        state->seeking_immediate_match = false;
+        LOG(
+          "  advance state. pattern:%u, step:%u\n",
+          state->pattern_index,
+          state->step_index
+        );
+
+        QueryStep *next_step = &self->query->steps.contents[state->step_index];
+        if (stop_on_definite_step && next_step->is_definite) did_match = true;
+
+        // If this state's next step has an alternative step, then copy the state in order
+        // to pursue both alternatives. The alternative step itself may have an alternative,
+        // so this is an interative process.
+        unsigned end_index = i + 1;
+        for (unsigned j = i; j < end_index; j++) {
+          QueryState *state = &self->states.contents[j];
+          QueryStep *next_step = &self->query->steps.contents[state->step_index];
+          if (next_step->alternative_index != NONE) {
+            // A "dead-end" step exists only to add a non-sequential jump into the step sequence,
+            // via its alternative index. When a state reaches a dead-end step, it jumps straight
+            // to the step's alternative.
+            if (next_step->is_dead_end) {
+              state->step_index = next_step->alternative_index;
+              j--;
+              continue;
+            }
+
+            // A "pass-through" step exists only to add a branch into the step sequence,
+            // via its alternative_index. When a state reaches a pass-through step, it splits
+            // in order to process the alternative step, and then it advances to the next step.
+            if (next_step->is_pass_through) {
+              state->step_index++;
+              j--;
+            }
+
+            QueryState *copy = ts_query_cursor__copy_state(self, &state);
+            if (copy) {
+              LOG(
+                "  split state for branch. pattern:%u, from_step:%u, to_step:%u, immediate:%d, capture_count: %u\n",
+                copy->pattern_index,
+                copy->step_index,
+                next_step->alternative_index,
+                next_step->alternative_is_immediate,
+                capture_list_pool_get(&self->capture_list_pool, copy->capture_list_id)->size
+              );
+              end_index++;
+              copy_count++;
+              copy->step_index = next_step->alternative_index;
+              if (next_step->alternative_is_immediate) {
+                copy->seeking_immediate_match = true;
+              }
+            }
+          }
+        }
+      }
+
+      for (unsigned i = 0; i < self->states.size; i++) {
+        QueryState *state = &self->states.contents[i];
+        if (state->dead) {
+          array_erase(&self->states, i);
+          i--;
+          continue;
+        }
+
+        // Enfore the longest-match criteria. When a query pattern contains optional or
+        // repeated nodes, this is necessary to avoid multiple redundant states, where
+        // one state has a strict subset of another state's captures.
+        bool did_remove = false;
+        for (unsigned j = i + 1; j < self->states.size; j++) {
+          QueryState *other_state = &self->states.contents[j];
+
+          // Query states are kept in ascending order of start_depth and pattern_index.
+          // Since the longest-match criteria is only used for deduping matches of the same
+          // pattern and root node, we only need to perform pairwise comparisons within a
+          // small slice of the states array.
+          if (
+            other_state->start_depth != state->start_depth ||
+            other_state->pattern_index != state->pattern_index
+          ) break;
+
+          bool left_contains_right, right_contains_left;
+          ts_query_cursor__compare_captures(
+            self,
+            state,
+            other_state,
+            &left_contains_right,
+            &right_contains_left
+          );
+          if (left_contains_right) {
+            if (state->step_index == other_state->step_index) {
+              LOG(
+                "  drop shorter state. pattern: %u, step_index: %u\n",
+                state->pattern_index,
+                state->step_index
+              );
+              capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id);
+              array_erase(&self->states, j);
+              j--;
+              continue;
+            }
+            other_state->has_in_progress_alternatives = true;
+          }
+          if (right_contains_left) {
+            if (state->step_index == other_state->step_index) {
+              LOG(
+                "  drop shorter state. pattern: %u, step_index: %u\n",
+                state->pattern_index,
+                state->step_index
+              );
+              capture_list_pool_release(&self->capture_list_pool, state->capture_list_id);
+              array_erase(&self->states, i);
+              i--;
+              did_remove = true;
+              break;
+            }
+            state->has_in_progress_alternatives = true;
+          }
+        }
+
+        // If there the state is at the end of its pattern, remove it from the list
+        // of in-progress states and add it to the list of finished states.
+        if (!did_remove) {
+          LOG(
+            "  keep state. pattern: %u, start_depth: %u, step_index: %u, capture_count: %u\n",
+            state->pattern_index,
+            state->start_depth,
+            state->step_index,
+            capture_list_pool_get(&self->capture_list_pool, state->capture_list_id)->size
+          );
+          QueryStep *next_step = &self->query->steps.contents[state->step_index];
+          if (next_step->depth == PATTERN_DONE_MARKER) {
+            if (state->has_in_progress_alternatives) {
+              LOG("  defer finishing pattern %u\n", state->pattern_index);
+            } else {
+              LOG("  finish pattern %u\n", state->pattern_index);
+              state->id = self->next_state_id++;
+              array_push(&self->finished_states, *state);
+              array_erase(&self->states, state - self->states.contents);
+              did_match = true;
+              i--;
+            }
+          }
+        }
+      }
+
+      // Continue descending if possible.
+      if (ts_tree_cursor_goto_first_child(&self->cursor)) {
+        self->depth++;
+      } else {
+        self->ascending = true;
+      }
+    }
+  }
+}
+
+bool ts_query_cursor_next_match(
+  TSQueryCursor *self,
+  TSQueryMatch *match
+) {
+  if (self->finished_states.size == 0) {
+    if (!ts_query_cursor__advance(self, false)) {
+      return false;
+    }
+  }
+
+  QueryState *state = &self->finished_states.contents[0];
+  match->id = state->id;
+  match->pattern_index = state->pattern_index;
+  const CaptureList *captures = capture_list_pool_get(
+    &self->capture_list_pool,
+    state->capture_list_id
+  );
+  match->captures = captures->contents;
+  match->capture_count = captures->size;
+  capture_list_pool_release(&self->capture_list_pool, state->capture_list_id);
+  array_erase(&self->finished_states, 0);
+  return true;
+}
+
+void ts_query_cursor_remove_match(
+  TSQueryCursor *self,
+  uint32_t match_id
+) {
+  for (unsigned i = 0; i < self->finished_states.size; i++) {
+    const QueryState *state = &self->finished_states.contents[i];
+    if (state->id == match_id) {
+      capture_list_pool_release(
+        &self->capture_list_pool,
+        state->capture_list_id
+      );
+      array_erase(&self->finished_states, i);
+      return;
+    }
+  }
+}
+
+bool ts_query_cursor_next_capture(
+  TSQueryCursor *self,
+  TSQueryMatch *match,
+  uint32_t *capture_index
+) {
+  // The goal here is to return captures in order, even though they may not
+  // be discovered in order, because patterns can overlap. Search for matches
+  // until there is a finished capture that is before any unfinished capture.
+  for (;;) {
+    // First, find the earliest capture in an unfinished match.
+    uint32_t first_unfinished_capture_byte;
+    uint32_t first_unfinished_pattern_index;
+    uint32_t first_unfinished_state_index;
+    bool first_unfinished_state_is_definite = false;
+    ts_query_cursor__first_in_progress_capture(
+      self,
+      &first_unfinished_state_index,
+      &first_unfinished_capture_byte,
+      &first_unfinished_pattern_index,
+      &first_unfinished_state_is_definite
+    );
+
+    // Then find the earliest capture in a finished match. It must occur
+    // before the first capture in an *unfinished* match.
+    QueryState *first_finished_state = NULL;
+    uint32_t first_finished_capture_byte = first_unfinished_capture_byte;
+    uint32_t first_finished_pattern_index = first_unfinished_pattern_index;
+    for (unsigned i = 0; i < self->finished_states.size; i++) {
+      QueryState *state = &self->finished_states.contents[i];
+      const CaptureList *captures = capture_list_pool_get(
+        &self->capture_list_pool,
+        state->capture_list_id
+      );
+      if (captures->size > state->consumed_capture_count) {
+        uint32_t capture_byte = ts_node_start_byte(
+          captures->contents[state->consumed_capture_count].node
+        );
+        if (
+          capture_byte < first_finished_capture_byte ||
+          (
+            capture_byte == first_finished_capture_byte &&
+            state->pattern_index < first_finished_pattern_index
+          )
+        ) {
+          first_finished_state = state;
+          first_finished_capture_byte = capture_byte;
+          first_finished_pattern_index = state->pattern_index;
+        }
+      } else {
+        capture_list_pool_release(
+          &self->capture_list_pool,
+          state->capture_list_id
+        );
+        array_erase(&self->finished_states, i);
+        i--;
+      }
+    }
+
+    // If there is finished capture that is clearly before any unfinished
+    // capture, then return its match, and its capture index. Internally
+    // record the fact that the capture has been 'consumed'.
+    QueryState *state;
+    if (first_finished_state) {
+      state = first_finished_state;
+    } else if (first_unfinished_state_is_definite) {
+      state = &self->states.contents[first_unfinished_state_index];
+    } else {
+      state = NULL;
+    }
+
+    if (state) {
+      match->id = state->id;
+      match->pattern_index = state->pattern_index;
+      const CaptureList *captures = capture_list_pool_get(
+        &self->capture_list_pool,
+        state->capture_list_id
+      );
+      match->captures = captures->contents;
+      match->capture_count = captures->size;
+      *capture_index = state->consumed_capture_count;
+      state->consumed_capture_count++;
+      return true;
+    }
+
+    if (capture_list_pool_is_empty(&self->capture_list_pool)) {
+      LOG(
+        "  abandon state. index:%u, pattern:%u, offset:%u.\n",
+        first_unfinished_state_index,
+        first_unfinished_pattern_index,
+        first_unfinished_capture_byte
+      );
+      capture_list_pool_release(
+        &self->capture_list_pool,
+        self->states.contents[first_unfinished_state_index].capture_list_id
+      );
+      array_erase(&self->states, first_unfinished_state_index);
+    }
+
+    // If there are no finished matches that are ready to be returned, then
+    // continue finding more matches.
+    if (
+      !ts_query_cursor__advance(self, true) &&
+      self->finished_states.size == 0
+    ) return false;
+  }
+}
+
+#undef LOG
diff --git a/lib/src/reusable_node.h b/lib/src/reusable_node.h
index 9cba9519..63fe3c1a 100644
--- a/lib/src/reusable_node.h
+++ b/lib/src/reusable_node.h
@@ -20,15 +20,6 @@ static inline void reusable_node_clear(ReusableNode *self) {
   self->last_external_token = NULL_SUBTREE;
 }
 
-static inline void reusable_node_reset(ReusableNode *self, Subtree tree) {
-  reusable_node_clear(self);
-  array_push(&self->stack, ((StackEntry) {
-    .tree = tree,
-    .child_index = 0,
-    .byte_offset = 0,
-  }));
-}
-
 static inline Subtree reusable_node_tree(ReusableNode *self) {
   return self->stack.size > 0
     ? self->stack.contents[self->stack.size - 1].tree
@@ -62,7 +53,7 @@ static inline void reusable_node_advance(ReusableNode *self) {
   } while (ts_subtree_child_count(tree) <= next_index);
 
   array_push(&self->stack, ((StackEntry) {
-    .tree = tree.ptr->children[next_index],
+    .tree = ts_subtree_children(tree)[next_index],
     .child_index = next_index,
     .byte_offset = byte_offset,
   }));
@@ -72,7 +63,7 @@ static inline bool reusable_node_descend(ReusableNode *self) {
   StackEntry last_entry = *array_back(&self->stack);
   if (ts_subtree_child_count(last_entry.tree) > 0) {
     array_push(&self->stack, ((StackEntry) {
-      .tree = last_entry.tree.ptr->children[0],
+      .tree = ts_subtree_children(last_entry.tree)[0],
       .child_index = 0,
       .byte_offset = last_entry.byte_offset,
     }));
@@ -86,3 +77,19 @@ static inline void reusable_node_advance_past_leaf(ReusableNode *self) {
   while (reusable_node_descend(self)) {}
   reusable_node_advance(self);
 }
+
+static inline void reusable_node_reset(ReusableNode *self, Subtree tree) {
+  reusable_node_clear(self);
+  array_push(&self->stack, ((StackEntry) {
+    .tree = tree,
+    .child_index = 0,
+    .byte_offset = 0,
+  }));
+
+  // Never reuse the root node, because it has a non-standard internal structure
+  // due to transformations that are applied when it is accepted: adding the EOF
+  // child and any extra children.
+  if (!reusable_node_descend(self)) {
+    reusable_node_clear(self);
+  }
+}
diff --git a/lib/src/stack.c b/lib/src/stack.c
index 3e842c99..cc728b05 100644
--- a/lib/src/stack.c
+++ b/lib/src/stack.c
@@ -11,7 +11,7 @@
 #define MAX_NODE_POOL_SIZE 50
 #define MAX_ITERATOR_COUNT 64
 
-#ifdef _WIN32
+#if defined _WIN32 && !defined __GNUC__
 #define inline __forceinline
 #else
 #define inline static inline __attribute__((always_inline))
@@ -288,7 +288,7 @@ inline StackSliceArray stack__iter(Stack *self, StackVersion version,
   bool include_subtrees = false;
   if (goal_subtree_count >= 0) {
     include_subtrees = true;
-    array_reserve(&iterator.subtrees, goal_subtree_count);
+    array_reserve(&iterator.subtrees, ts_subtree_alloc_size(goal_subtree_count) / sizeof(Subtree));
   }
 
   array_push(&self->iterators, iterator);
@@ -304,8 +304,9 @@ inline StackSliceArray stack__iter(Stack *self, StackVersion version,
 
       if (should_pop) {
         SubtreeArray subtrees = iterator->subtrees;
-        if (!should_stop)
+        if (!should_stop) {
           ts_subtree_array_copy(subtrees, &subtrees);
+        }
         ts_subtree_array_reverse(&subtrees);
         ts_stack__add_slice(
           self,
@@ -480,6 +481,7 @@ StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t c
 }
 
 inline StackAction pop_pending_callback(void *payload, const StackIterator *iterator) {
+  (void)payload;
   if (iterator->subtree_count >= 1) {
     if (iterator->is_pending) {
       return StackActionPop | StackActionStop;
@@ -532,6 +534,7 @@ SubtreeArray ts_stack_pop_error(Stack *self, StackVersion version) {
 }
 
 inline StackAction pop_all_callback(void *payload, const StackIterator *iterator) {
+  (void)payload;
   return iterator->node->link_count == 0 ? StackActionPop : StackActionNone;
 }
 
@@ -569,7 +572,12 @@ void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_dep
   };
   array_init(session.summary);
   stack__iter(self, version, summarize_stack_callback, &session, -1);
-  self->heads.contents[version].summary = session.summary;
+  StackHead *head = &self->heads.contents[version];
+  if (head->summary) {
+    array_delete(head->summary);
+    ts_free(head->summary);
+  }
+  head->summary = session.summary;
 }
 
 StackSummary *ts_stack_get_summary(Stack *self, StackVersion version) {
@@ -741,6 +749,10 @@ bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f)
       ts_stack_error_cost(self, i)
     );
 
+    if (head->summary) {
+      fprintf(f, "\nsummary_size: %u", head->summary->size);
+    }
+
     if (head->last_external_token.ptr) {
       const ExternalScannerState *state = &head->last_external_token.ptr->external_scanner_state;
       const char *data = ts_external_scanner_state_data(state);
diff --git a/lib/src/subtree.c b/lib/src/subtree.c
index e95733eb..e90dc9d7 100644
--- a/lib/src/subtree.c
+++ b/lib/src/subtree.c
@@ -18,19 +18,10 @@ typedef struct {
   Length new_end;
 } Edit;
 
-#ifdef TREE_SITTER_TEST
-
-#define TS_MAX_INLINE_TREE_LENGTH 2
-#define TS_MAX_TREE_POOL_SIZE 0
-
-#else
-
 #define TS_MAX_INLINE_TREE_LENGTH UINT8_MAX
 #define TS_MAX_TREE_POOL_SIZE 32
 
-#endif
-
-static const ExternalScannerState empty_state = {.length = 0, .short_data = {0}};
+static const ExternalScannerState empty_state = {{.short_data = {0}}, .length = 0};
 
 // ExternalScannerState
 
@@ -89,26 +80,33 @@ void ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest) {
   }
 }
 
-void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) {
+void ts_subtree_array_clear(SubtreePool *pool, SubtreeArray *self) {
   for (uint32_t i = 0; i < self->size; i++) {
     ts_subtree_release(pool, self->contents[i]);
   }
+  array_clear(self);
+}
+
+void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) {
+  ts_subtree_array_clear(pool, self);
   array_delete(self);
 }
 
-SubtreeArray ts_subtree_array_remove_trailing_extras(SubtreeArray *self) {
-  SubtreeArray result = array_new();
-
-  uint32_t i = self->size - 1;
-  for (; i + 1 > 0; i--) {
-    Subtree child = self->contents[i];
-    if (!ts_subtree_extra(child)) break;
-    array_push(&result, child);
+void ts_subtree_array_remove_trailing_extras(
+  SubtreeArray *self,
+  SubtreeArray *destination
+) {
+  array_clear(destination);
+  while (self->size > 0) {
+    Subtree last = self->contents[self->size - 1];
+    if (ts_subtree_extra(last)) {
+      self->size--;
+      array_push(destination, last);
+    } else {
+      break;
+    }
   }
-
-  self->size = i + 1;
-  ts_subtree_array_reverse(&result);
-  return result;
+  ts_subtree_array_reverse(destination);
 }
 
 void ts_subtree_array_reverse(SubtreeArray *self) {
@@ -217,7 +215,7 @@ Subtree ts_subtree_new_leaf(
       .has_external_tokens = has_external_tokens,
       .is_missing = false,
       .is_keyword = is_keyword,
-      .first_leaf = {.symbol = 0, .parse_state = 0},
+      {{.first_leaf = {.symbol = 0, .parse_state = 0}}}
     };
     return (Subtree) {.ptr = data};
   }
@@ -256,28 +254,45 @@ Subtree ts_subtree_new_error(
   return result;
 }
 
-MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) {
-  if (self.data.is_inline) return (MutableSubtree) {self.data};
-  if (self.ptr->ref_count == 1) return ts_subtree_to_mut_unsafe(self);
-
-  SubtreeHeapData *result = ts_subtree_pool_allocate(pool);
-  memcpy(result, self.ptr, sizeof(SubtreeHeapData));
-  if (result->child_count > 0) {
-    result->children = ts_calloc(self.ptr->child_count, sizeof(Subtree));
-    memcpy(result->children, self.ptr->children, result->child_count * sizeof(Subtree));
-    for (uint32_t i = 0; i < result->child_count; i++) {
-      ts_subtree_retain(result->children[i]);
+// Clone a subtree.
+MutableSubtree ts_subtree_clone(Subtree self) {
+  size_t alloc_size = ts_subtree_alloc_size(self.ptr->child_count);
+  Subtree *new_children = ts_malloc(alloc_size);
+  Subtree *old_children = ts_subtree_children(self);
+  memcpy(new_children, old_children, alloc_size);
+  SubtreeHeapData *result = (SubtreeHeapData *)&new_children[self.ptr->child_count];
+  if (self.ptr->child_count > 0) {
+    for (uint32_t i = 0; i < self.ptr->child_count; i++) {
+      ts_subtree_retain(new_children[i]);
     }
-  } else if (result->has_external_tokens) {
-    result->external_scanner_state = ts_external_scanner_state_copy(&self.ptr->external_scanner_state);
+  } else if (self.ptr->has_external_tokens) {
+    result->external_scanner_state = ts_external_scanner_state_copy(
+      &self.ptr->external_scanner_state
+    );
   }
   result->ref_count = 1;
-  ts_subtree_release(pool, self);
   return (MutableSubtree) {.ptr = result};
 }
 
-static void ts_subtree__compress(MutableSubtree self, unsigned count, const TSLanguage *language,
-                                 MutableSubtreeArray *stack) {
+// Get mutable version of a subtree.
+//
+// This takes ownership of the subtree. If the subtree has only one owner,
+// this will directly convert it into a mutable version. Otherwise, it will
+// perform a copy.
+MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) {
+  if (self.data.is_inline) return (MutableSubtree) {self.data};
+  if (self.ptr->ref_count == 1) return ts_subtree_to_mut_unsafe(self);
+  MutableSubtree result = ts_subtree_clone(self);
+  ts_subtree_release(pool, self);
+  return result;
+}
+
+static void ts_subtree__compress(
+  MutableSubtree self,
+  unsigned count,
+  const TSLanguage *language,
+  MutableSubtreeArray *stack
+) {
   unsigned initial_stack_size = stack->size;
 
   MutableSubtree tree = self;
@@ -285,7 +300,7 @@ static void ts_subtree__compress(MutableSubtree self, unsigned count, const TSLa
   for (unsigned i = 0; i < count; i++) {
     if (tree.ptr->ref_count > 1 || tree.ptr->child_count < 2) break;
 
-    MutableSubtree child = ts_subtree_to_mut_unsafe(tree.ptr->children[0]);
+    MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]);
     if (
       child.data.is_inline ||
       child.ptr->child_count < 2 ||
@@ -293,7 +308,7 @@ static void ts_subtree__compress(MutableSubtree self, unsigned count, const TSLa
       child.ptr->symbol != symbol
     ) break;
 
-    MutableSubtree grandchild = ts_subtree_to_mut_unsafe(child.ptr->children[0]);
+    MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[0]);
     if (
       grandchild.data.is_inline ||
       grandchild.ptr->child_count < 2 ||
@@ -301,20 +316,20 @@ static void ts_subtree__compress(MutableSubtree self, unsigned count, const TSLa
       grandchild.ptr->symbol != symbol
     ) break;
 
-    tree.ptr->children[0] = ts_subtree_from_mut(grandchild);
-    child.ptr->children[0] = grandchild.ptr->children[grandchild.ptr->child_count - 1];
-    grandchild.ptr->children[grandchild.ptr->child_count - 1] = ts_subtree_from_mut(child);
+    ts_subtree_children(tree)[0] = ts_subtree_from_mut(grandchild);
+    ts_subtree_children(child)[0] = ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1];
+    ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1] = ts_subtree_from_mut(child);
     array_push(stack, tree);
     tree = grandchild;
   }
 
   while (stack->size > initial_stack_size) {
     tree = array_pop(stack);
-    MutableSubtree child = ts_subtree_to_mut_unsafe(tree.ptr->children[0]);
-    MutableSubtree grandchild = ts_subtree_to_mut_unsafe(child.ptr->children[child.ptr->child_count - 1]);
-    ts_subtree_set_children(grandchild, grandchild.ptr->children, grandchild.ptr->child_count, language);
-    ts_subtree_set_children(child, child.ptr->children, child.ptr->child_count, language);
-    ts_subtree_set_children(tree, tree.ptr->children, tree.ptr->child_count, language);
+    MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]);
+    MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[child.ptr->child_count - 1]);
+    ts_subtree_summarize_children(grandchild, language);
+    ts_subtree_summarize_children(child, language);
+    ts_subtree_summarize_children(tree, language);
   }
 }
 
@@ -329,14 +344,11 @@ void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *langu
     MutableSubtree tree = array_pop(&pool->tree_stack);
 
     if (tree.ptr->repeat_depth > 0) {
-      Subtree child1 = tree.ptr->children[0];
-      Subtree child2 = tree.ptr->children[tree.ptr->child_count - 1];
-      if (
-        ts_subtree_child_count(child1) > 0 &&
-        ts_subtree_child_count(child2) > 0 &&
-        child1.ptr->repeat_depth > child2.ptr->repeat_depth
-      ) {
-        unsigned n = child1.ptr->repeat_depth - child2.ptr->repeat_depth;
+      Subtree child1 = ts_subtree_children(tree)[0];
+      Subtree child2 = ts_subtree_children(tree)[tree.ptr->child_count - 1];
+      long repeat_delta = (long)ts_subtree_repeat_depth(child1) - (long)ts_subtree_repeat_depth(child2);
+      if (repeat_delta > 0) {
+        unsigned n = repeat_delta;
         for (unsigned i = n / 2; i > 0; i /= 2) {
           ts_subtree__compress(tree, i, language, &pool->tree_stack);
           n -= i;
@@ -345,7 +357,7 @@ void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *langu
     }
 
     for (uint32_t i = 0; i < tree.ptr->child_count; i++) {
-      Subtree child = tree.ptr->children[i];
+      Subtree child = ts_subtree_children(tree)[i];
       if (ts_subtree_child_count(child) > 0 && child.ptr->ref_count == 1) {
         array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child));
       }
@@ -353,21 +365,13 @@ void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *langu
   }
 }
 
-static inline uint32_t ts_subtree_repeat_depth(Subtree self) {
-  return ts_subtree_child_count(self) ? self.ptr->repeat_depth : 0;
-}
-
-void ts_subtree_set_children(
-  MutableSubtree self, Subtree *children, uint32_t child_count, const TSLanguage *language
+// Assign all of the node's properties that depend on its children.
+void ts_subtree_summarize_children(
+  MutableSubtree self,
+  const TSLanguage *language
 ) {
   assert(!self.data.is_inline);
 
-  if (self.ptr->child_count > 0 && children != self.ptr->children) {
-    ts_free(self.ptr->children);
-  }
-
-  self.ptr->child_count = child_count;
-  self.ptr->children = children;
   self.ptr->named_child_count = 0;
   self.ptr->visible_child_count = 0;
   self.ptr->error_cost = 0;
@@ -376,12 +380,13 @@ void ts_subtree_set_children(
   self.ptr->has_external_tokens = false;
   self.ptr->dynamic_precedence = 0;
 
-  uint32_t non_extra_index = 0;
+  uint32_t structural_index = 0;
   const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id);
   uint32_t lookahead_end_byte = 0;
 
+  const Subtree *children = ts_subtree_children(self);
   for (uint32_t i = 0; i < self.ptr->child_count; i++) {
-    Subtree child = self.ptr->children[i];
+    Subtree child = children[i];
 
     if (i == 0) {
       self.ptr->padding = ts_subtree_padding(child);
@@ -400,18 +405,29 @@ void ts_subtree_set_children(
       self.ptr->error_cost += ts_subtree_error_cost(child);
     }
 
+    uint32_t grandchild_count = ts_subtree_child_count(child);
+    if (self.ptr->symbol == ts_builtin_sym_error || self.ptr->symbol == ts_builtin_sym_error_repeat) {
+      if (!ts_subtree_extra(child) && !(ts_subtree_is_error(child) && grandchild_count == 0)) {
+        if (ts_subtree_visible(child)) {
+          self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE;
+        } else if (grandchild_count > 0) {
+          self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->visible_child_count;
+        }
+      }
+    }
+
     self.ptr->dynamic_precedence += ts_subtree_dynamic_precedence(child);
     self.ptr->node_count += ts_subtree_node_count(child);
 
-    if (alias_sequence && alias_sequence[non_extra_index] != 0 && !ts_subtree_extra(child)) {
+    if (alias_sequence && alias_sequence[structural_index] != 0 && !ts_subtree_extra(child)) {
       self.ptr->visible_child_count++;
-      if (ts_language_symbol_metadata(language, alias_sequence[non_extra_index]).named) {
+      if (ts_language_symbol_metadata(language, alias_sequence[structural_index]).named) {
         self.ptr->named_child_count++;
       }
     } else if (ts_subtree_visible(child)) {
       self.ptr->visible_child_count++;
       if (ts_subtree_named(child)) self.ptr->named_child_count++;
-    } else if (ts_subtree_child_count(child) > 0) {
+    } else if (grandchild_count > 0) {
       self.ptr->visible_child_count += child.ptr->visible_child_count;
       self.ptr->named_child_count += child.ptr->named_child_count;
     }
@@ -423,7 +439,7 @@ void ts_subtree_set_children(
       self.ptr->parse_state = TS_TREE_STATE_NONE;
     }
 
-    if (!ts_subtree_extra(child)) non_extra_index++;
+    if (!ts_subtree_extra(child)) structural_index++;
   }
 
   self.ptr->lookahead_bytes = lookahead_end_byte - self.ptr->size.bytes - self.ptr->padding.bytes;
@@ -433,22 +449,11 @@ void ts_subtree_set_children(
       ERROR_COST_PER_RECOVERY +
       ERROR_COST_PER_SKIPPED_CHAR * self.ptr->size.bytes +
       ERROR_COST_PER_SKIPPED_LINE * self.ptr->size.extent.row;
-    for (uint32_t i = 0; i < self.ptr->child_count; i++) {
-      Subtree child = self.ptr->children[i];
-      uint32_t grandchild_count = ts_subtree_child_count(child);
-      if (ts_subtree_extra(child)) continue;
-      if (ts_subtree_is_error(child) && grandchild_count == 0) continue;
-      if (ts_subtree_visible(child)) {
-        self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE;
-      } else if (grandchild_count > 0) {
-        self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->visible_child_count;
-      }
-    }
   }
 
   if (self.ptr->child_count > 0) {
-    Subtree first_child = self.ptr->children[0];
-    Subtree last_child = self.ptr->children[self.ptr->child_count - 1];
+    Subtree first_child = children[0];
+    Subtree last_child = children[self.ptr->child_count - 1];
 
     self.ptr->first_leaf.symbol = ts_subtree_leaf_symbol(first_child);
     self.ptr->first_leaf.parse_state = ts_subtree_leaf_parse_state(first_child);
@@ -471,52 +476,82 @@ void ts_subtree_set_children(
   }
 }
 
-MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol,
-                                   SubtreeArray *children, unsigned production_id,
-                                   const TSLanguage *language) {
+// Create a new parent node with the given children.
+//
+// This takes ownership of the children array.
+MutableSubtree ts_subtree_new_node(
+  TSSymbol symbol,
+  SubtreeArray *children,
+  unsigned production_id,
+  const TSLanguage *language
+) {
   TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
   bool fragile = symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat;
-  SubtreeHeapData *data = ts_subtree_pool_allocate(pool);
+
+  // Allocate the node's data at the end of the array of children.
+  size_t new_byte_size = ts_subtree_alloc_size(children->size);
+  if (children->capacity * sizeof(Subtree) < new_byte_size) {
+    children->contents = ts_realloc(children->contents, new_byte_size);
+    children->capacity = new_byte_size / sizeof(Subtree);
+  }
+  SubtreeHeapData *data = (SubtreeHeapData *)&children->contents[children->size];
+
   *data = (SubtreeHeapData) {
     .ref_count = 1,
     .symbol = symbol,
-    .production_id = production_id,
+    .child_count = children->size,
     .visible = metadata.visible,
     .named = metadata.named,
     .has_changes = false,
     .fragile_left = fragile,
     .fragile_right = fragile,
     .is_keyword = false,
-    .node_count = 0,
-    .first_leaf = {.symbol = 0, .parse_state = 0},
+    {{
+      .node_count = 0,
+      .production_id = production_id,
+      .first_leaf = {.symbol = 0, .parse_state = 0},
+    }}
   };
   MutableSubtree result = {.ptr = data};
-  ts_subtree_set_children(result, children->contents, children->size, language);
+  ts_subtree_summarize_children(result, language);
   return result;
 }
 
-Subtree ts_subtree_new_error_node(SubtreePool *pool, SubtreeArray *children,
-                                  bool extra, const TSLanguage *language) {
+// Create a new error node contaning the given children.
+//
+// This node is treated as 'extra'. Its children are prevented from having
+// having any effect on the parse state.
+Subtree ts_subtree_new_error_node(
+  SubtreeArray *children,
+  bool extra,
+  const TSLanguage *language
+) {
   MutableSubtree result = ts_subtree_new_node(
-    pool, ts_builtin_sym_error, children, 0, language
+    ts_builtin_sym_error, children, 0, language
   );
   result.ptr->extra = extra;
   return ts_subtree_from_mut(result);
 }
 
-Subtree ts_subtree_new_missing_leaf(SubtreePool *pool, TSSymbol symbol, Length padding,
-                                    const TSLanguage *language) {
+// Create a new 'missing leaf' node.
+//
+// This node is treated as 'extra'. Its children are prevented from having
+// having any effect on the parse state.
+Subtree ts_subtree_new_missing_leaf(
+  SubtreePool *pool,
+  TSSymbol symbol,
+  Length padding,
+  const TSLanguage *language
+) {
   Subtree result = ts_subtree_new_leaf(
     pool, symbol, padding, length_zero(), 0,
     0, false, false, language
   );
-
   if (result.data.is_inline) {
     result.data.is_missing = true;
   } else {
     ((SubtreeHeapData *)result.ptr)->is_missing = true;
   }
-
   return result;
 }
 
@@ -539,19 +574,22 @@ void ts_subtree_release(SubtreePool *pool, Subtree self) {
   while (pool->tree_stack.size > 0) {
     MutableSubtree tree = array_pop(&pool->tree_stack);
     if (tree.ptr->child_count > 0) {
+      Subtree *children = ts_subtree_children(tree);
       for (uint32_t i = 0; i < tree.ptr->child_count; i++) {
-        Subtree child = tree.ptr->children[i];
+        Subtree child = children[i];
         if (child.data.is_inline) continue;
         assert(child.ptr->ref_count > 0);
         if (atomic_dec((volatile uint32_t *)&child.ptr->ref_count) == 0) {
           array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child));
         }
       }
-      ts_free(tree.ptr->children);
-    } else if (tree.ptr->has_external_tokens) {
-      ts_external_scanner_state_delete(&tree.ptr->external_scanner_state);
+      ts_free(children);
+    } else {
+      if (tree.ptr->has_external_tokens) {
+        ts_external_scanner_state_delete(&tree.ptr->external_scanner_state);
+      }
+      ts_subtree_pool_free(pool, tree.ptr);
     }
-    ts_subtree_pool_free(pool, tree.ptr);
   }
 }
 
@@ -578,7 +616,7 @@ bool ts_subtree_eq(Subtree self, Subtree other) {
     if (self.ptr->named_child_count != other.ptr->named_child_count) return false;
 
     for (uint32_t i = 0; i < self.ptr->child_count; i++) {
-      if (!ts_subtree_eq(self.ptr->children[i], other.ptr->children[i])) {
+      if (!ts_subtree_eq(ts_subtree_children(self)[i], ts_subtree_children(other)[i])) {
         return false;
       }
     }
@@ -592,8 +630,8 @@ int ts_subtree_compare(Subtree left, Subtree right) {
   if (ts_subtree_child_count(left) < ts_subtree_child_count(right)) return -1;
   if (ts_subtree_child_count(right) < ts_subtree_child_count(left)) return 1;
   for (uint32_t i = 0, n = ts_subtree_child_count(left); i < n; i++) {
-    Subtree left_child = left.ptr->children[i];
-    Subtree right_child = right.ptr->children[i];
+    Subtree left_child = ts_subtree_children(left)[i];
+    Subtree right_child = ts_subtree_children(right)[i];
     switch (ts_subtree_compare(left_child, right_child)) {
       case -1: return -1;
       case 1: return 1;
@@ -709,7 +747,7 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool
 
     Length child_left, child_right = length_zero();
     for (uint32_t i = 0, n = ts_subtree_child_count(*entry.tree); i < n; i++) {
-      Subtree *child = &result.ptr->children[i];
+      Subtree *child = &ts_subtree_children(*entry.tree)[i];
       Length child_size = ts_subtree_total_size(*child);
       child_left = child_right;
       child_right = length_add(child_left, child_size);
@@ -764,7 +802,7 @@ Subtree ts_subtree_last_external_token(Subtree tree) {
   if (!ts_subtree_has_external_tokens(tree)) return NULL_SUBTREE;
   while (tree.ptr->child_count > 0) {
     for (uint32_t i = tree.ptr->child_count - 1; i + 1 > 0; i--) {
-      Subtree child = tree.ptr->children[i];
+      Subtree child = ts_subtree_children(tree)[i];
       if (ts_subtree_has_external_tokens(child)) {
         tree = child;
         break;
@@ -775,10 +813,10 @@ Subtree ts_subtree_last_external_token(Subtree tree) {
 }
 
 static size_t ts_subtree__write_char_to_string(char *s, size_t n, int32_t c) {
-  if (c == 0)
-    return snprintf(s, n, "EOF");
   if (c == -1)
     return snprintf(s, n, "INVALID");
+  else if (c == '\0')
+    return snprintf(s, n, "'\\0'");
   else if (c == '\n')
     return snprintf(s, n, "'\\n'");
   else if (c == '\t')
@@ -867,7 +905,7 @@ static size_t ts_subtree__write_to_string(
 
     uint32_t structural_child_index = 0;
     for (uint32_t i = 0; i < self.ptr->child_count; i++) {
-      Subtree child = self.ptr->children[i];
+      Subtree child = ts_subtree_children(self)[i];
       if (ts_subtree_extra(child)) {
         cursor += ts_subtree__write_to_string(
           child, *writer, limit,
@@ -916,7 +954,7 @@ char *ts_subtree_string(
     language, include_all,
     0, false, ROOT_FIELD
   ) + 1;
-  char *result = malloc(size * sizeof(char));
+  char *result = ts_malloc(size * sizeof(char));
   ts_subtree__write_to_string(
     self, result, size,
     language, include_all,
@@ -964,7 +1002,7 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset,
     language->max_alias_sequence_length *
     ts_subtree_production_id(*self);
   for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) {
-    const Subtree *child = &self->ptr->children[i];
+    const Subtree *child = &ts_subtree_children(*self)[i];
     TSSymbol alias_symbol = 0;
     if (!ts_subtree_extra(*child) && child_info_offset) {
       alias_symbol = language->alias_sequences[child_info_offset];
diff --git a/lib/src/subtree.h b/lib/src/subtree.h
index 79ccd923..b020deb6 100644
--- a/lib/src/subtree.h
+++ b/lib/src/subtree.h
@@ -14,12 +14,19 @@ extern "C" {
 #include "tree_sitter/api.h"
 #include "tree_sitter/parser.h"
 
-static const TSStateId TS_TREE_STATE_NONE = USHRT_MAX;
+#define TS_TREE_STATE_NONE USHRT_MAX
 #define NULL_SUBTREE ((Subtree) {.ptr = NULL})
 
-typedef union Subtree Subtree;
-typedef union MutableSubtree MutableSubtree;
-
+// The serialized state of an external scanner.
+//
+// Every time an external token subtree is created after a call to an
+// external scanner, the scanner's `serialize` function is called to
+// retrieve a serialized copy of its state. The bytes are then copied
+// onto the subtree itself so that the scanner's state can later be
+// restored using its `deserialize` function.
+//
+// Small byte arrays are stored inline, and long ones are allocated
+// separately on the heap.
 typedef struct {
   union {
     char *long_data;
@@ -28,6 +35,10 @@ typedef struct {
   uint32_t length;
 } ExternalScannerState;
 
+// A compact representation of a subtree.
+//
+// This representation is used for small leaf nodes that are not
+// errors, and were not created by an external scanner.
 typedef struct {
   bool is_inline : 1;
   bool visible : 1;
@@ -45,6 +56,11 @@ typedef struct {
   uint16_t parse_state;
 } SubtreeInlineData;
 
+// A heap-allocated representation of a subtree.
+//
+// This representation is used for parent nodes, external tokens,
+// errors, and other leaf nodes whose data is too large to fit into
+// the inlinen representation.
 typedef struct {
   volatile uint32_t ref_count;
   Length padding;
@@ -68,7 +84,6 @@ typedef struct {
   union {
     // Non-terminal subtrees (`child_count > 0`)
     struct {
-      Subtree *children;
       uint32_t visible_child_count;
       uint32_t named_child_count;
       uint32_t node_count;
@@ -89,15 +104,17 @@ typedef struct {
   };
 } SubtreeHeapData;
 
-union Subtree {
+// The fundamental building block of a syntax tree.
+typedef union {
   SubtreeInlineData data;
   const SubtreeHeapData *ptr;
-};
+} Subtree;
 
-union MutableSubtree {
+// Like Subtree, but mutable.
+typedef union {
   SubtreeInlineData data;
   SubtreeHeapData *ptr;
-};
+} MutableSubtree;
 
 typedef Array(Subtree) SubtreeArray;
 typedef Array(MutableSubtree) MutableSubtreeArray;
@@ -111,8 +128,9 @@ void ts_external_scanner_state_init(ExternalScannerState *, const char *, unsign
 const char *ts_external_scanner_state_data(const ExternalScannerState *);
 
 void ts_subtree_array_copy(SubtreeArray, SubtreeArray *);
+void ts_subtree_array_clear(SubtreePool *, SubtreeArray *);
 void ts_subtree_array_delete(SubtreePool *, SubtreeArray *);
-SubtreeArray ts_subtree_array_remove_trailing_extras(SubtreeArray *);
+void ts_subtree_array_remove_trailing_extras(SubtreeArray *, SubtreeArray *);
 void ts_subtree_array_reverse(SubtreeArray *);
 
 SubtreePool ts_subtree_pool_new(uint32_t capacity);
@@ -125,8 +143,8 @@ Subtree ts_subtree_new_leaf(
 Subtree ts_subtree_new_error(
   SubtreePool *, int32_t, Length, Length, uint32_t, TSStateId, const TSLanguage *
 );
-MutableSubtree ts_subtree_new_node(SubtreePool *, TSSymbol, SubtreeArray *, unsigned, const TSLanguage *);
-Subtree ts_subtree_new_error_node(SubtreePool *, SubtreeArray *, bool, const TSLanguage *);
+MutableSubtree ts_subtree_new_node(TSSymbol, SubtreeArray *, unsigned, const TSLanguage *);
+Subtree ts_subtree_new_error_node(SubtreeArray *, bool, const TSLanguage *);
 Subtree ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, Length, const TSLanguage *);
 MutableSubtree ts_subtree_make_mut(SubtreePool *, Subtree);
 void ts_subtree_retain(Subtree);
@@ -134,7 +152,8 @@ void ts_subtree_release(SubtreePool *, Subtree);
 bool ts_subtree_eq(Subtree, Subtree);
 int ts_subtree_compare(Subtree, Subtree);
 void ts_subtree_set_symbol(MutableSubtree *, TSSymbol, const TSLanguage *);
-void ts_subtree_set_children(MutableSubtree, Subtree *, uint32_t, const TSLanguage *);
+void ts_subtree_summarize(MutableSubtree, const Subtree *, uint32_t, const TSLanguage *);
+void ts_subtree_summarize_children(MutableSubtree, const TSLanguage *);
 void ts_subtree_balance(Subtree, SubtreePool *, const TSLanguage *);
 Subtree ts_subtree_edit(Subtree, const TSInputEdit *edit, SubtreePool *);
 char *ts_subtree_string(Subtree, const TSLanguage *, bool include_all);
@@ -156,6 +175,17 @@ static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) { return SUBTREE
 
 #undef SUBTREE_GET
 
+// Get the size needed to store a heap-allocated subtree with the given
+// number of children.
+static inline size_t ts_subtree_alloc_size(uint32_t child_count) {
+  return child_count * sizeof(Subtree) + sizeof(SubtreeHeapData);
+}
+
+// Get a subtree's children, which are allocated immediately before the
+// tree's own heap data.
+#define ts_subtree_children(self) \
+  ((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count)
+
 static inline void ts_subtree_set_extra(MutableSubtree *self) {
   if (self->data.is_inline) {
     self->data.extra = true;
@@ -206,6 +236,10 @@ static inline uint32_t ts_subtree_child_count(Subtree self) {
   return self.data.is_inline ? 0 : self.ptr->child_count;
 }
 
+static inline uint32_t ts_subtree_repeat_depth(Subtree self) {
+  return self.data.is_inline ? 0 : self.ptr->repeat_depth;
+}
+
 static inline uint32_t ts_subtree_node_count(Subtree self) {
   return (self.data.is_inline || self.ptr->child_count == 0) ? 1 : self.ptr->node_count;
 }
diff --git a/lib/src/tree.c b/lib/src/tree.c
index 04cb1d24..391fa7f5 100644
--- a/lib/src/tree.c
+++ b/lib/src/tree.c
@@ -84,12 +84,10 @@ void ts_tree_edit(TSTree *self, const TSInputEdit *edit) {
 }
 
 TSRange *ts_tree_get_changed_ranges(const TSTree *self, const TSTree *other, uint32_t *count) {
-  TSRange *result;
   TreeCursor cursor1 = {NULL, array_new()};
   TreeCursor cursor2 = {NULL, array_new()};
-  TSNode root = ts_tree_root_node(self);
-  ts_tree_cursor_init(&cursor1, root);
-  ts_tree_cursor_init(&cursor2, root);
+  ts_tree_cursor_init(&cursor1, ts_tree_root_node(self));
+  ts_tree_cursor_init(&cursor2, ts_tree_root_node(other));
 
   TSRangeArray included_range_differences = array_new();
   ts_range_array_get_changed_ranges(
@@ -98,6 +96,7 @@ TSRange *ts_tree_get_changed_ranges(const TSTree *self, const TSTree *other, uin
     &included_range_differences
   );
 
+  TSRange *result;
   *count = ts_subtree_get_changed_ranges(
     &self->root, &other->root, &cursor1, &cursor2,
     self->language, &included_range_differences, &result
diff --git a/lib/src/tree_cursor.c b/lib/src/tree_cursor.c
index ba77ebc0..98b86605 100644
--- a/lib/src/tree_cursor.c
+++ b/lib/src/tree_cursor.c
@@ -38,7 +38,7 @@ static inline bool ts_tree_cursor_child_iterator_next(CursorChildIterator *self,
                                                       TreeCursorEntry *result,
                                                       bool *visible) {
   if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) return false;
-  const Subtree *child = &self->parent.ptr->children[self->child_index];
+  const Subtree *child = &ts_subtree_children(self->parent)[self->child_index];
   *result = (TreeCursorEntry) {
     .subtree = child,
     .position = self->position,
@@ -56,7 +56,7 @@ static inline bool ts_tree_cursor_child_iterator_next(CursorChildIterator *self,
   self->child_index++;
 
   if (self->child_index < self->parent.ptr->child_count) {
-    Subtree next_child = self->parent.ptr->children[self->child_index];
+    Subtree next_child = ts_subtree_children(self->parent)[self->child_index];
     self->position = length_add(self->position, ts_subtree_padding(next_child));
   }
 
@@ -205,19 +205,21 @@ bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) {
   TreeCursor *self = (TreeCursor *)_self;
   for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) {
     TreeCursorEntry *entry = &self->stack.contents[i];
-    bool is_aliased = false;
-    if (i > 0) {
-      TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
-      const TSSymbol *alias_sequence = ts_language_alias_sequence(
-        self->tree->language,
-        parent_entry->subtree->ptr->production_id
-      );
-      is_aliased = alias_sequence && alias_sequence[entry->structural_child_index];
-    }
-    if (ts_subtree_visible(*entry->subtree) || is_aliased) {
+    if (ts_subtree_visible(*entry->subtree)) {
       self->stack.size = i + 1;
       return true;
     }
+    if (i > 0 && !ts_subtree_extra(*entry->subtree)) {
+      TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
+      if (ts_language_alias_at(
+        self->tree->language,
+        parent_entry->subtree->ptr->production_id,
+        entry->structural_child_index
+      )) {
+        self->stack.size = i + 1;
+        return true;
+      }
+    }
   }
   return false;
 }
@@ -226,15 +228,13 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) {
   const TreeCursor *self = (const TreeCursor *)_self;
   TreeCursorEntry *last_entry = array_back(&self->stack);
   TSSymbol alias_symbol = 0;
-  if (self->stack.size > 1) {
+  if (self->stack.size > 1 && !ts_subtree_extra(*last_entry->subtree)) {
     TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2];
-    const TSSymbol *alias_sequence = ts_language_alias_sequence(
+    alias_symbol = ts_language_alias_at(
       self->tree->language,
-      parent_entry->subtree->ptr->production_id
+      parent_entry->subtree->ptr->production_id,
+      last_entry->structural_child_index
     );
-    if (alias_sequence && !ts_subtree_extra(*last_entry->subtree)) {
-      alias_symbol = alias_sequence[last_entry->structural_child_index];
-    }
   }
   return ts_node_new(
     self->tree,
@@ -244,6 +244,157 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) {
   );
 }
 
+// Private - Get various facts about the current node that are needed
+// when executing tree queries.
+void ts_tree_cursor_current_status(
+  const TSTreeCursor *_self,
+  TSFieldId *field_id,
+  bool *has_later_siblings,
+  bool *has_later_named_siblings,
+  bool *can_have_later_siblings_with_this_field,
+  TSSymbol *supertypes,
+  unsigned *supertype_count
+) {
+  const TreeCursor *self = (const TreeCursor *)_self;
+  unsigned max_supertypes = *supertype_count;
+  *field_id = 0;
+  *supertype_count = 0;
+  *has_later_siblings = false;
+  *has_later_named_siblings = false;
+  *can_have_later_siblings_with_this_field = false;
+
+  // Walk up the tree, visiting the current node and its invisible ancestors,
+  // because fields can refer to nodes through invisible *wrapper* nodes,
+  for (unsigned i = self->stack.size - 1; i > 0; i--) {
+    TreeCursorEntry *entry = &self->stack.contents[i];
+    TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
+
+    const TSSymbol *alias_sequence = ts_language_alias_sequence(
+      self->tree->language,
+      parent_entry->subtree->ptr->production_id
+    );
+
+    #define subtree_symbol(subtree, structural_child_index) \
+      ((                                                    \
+        !ts_subtree_extra(subtree) &&                       \
+        alias_sequence &&                                   \
+        alias_sequence[structural_child_index]              \
+      ) ?                                                   \
+        alias_sequence[structural_child_index] :            \
+        ts_subtree_symbol(subtree))
+
+    // Stop walking up when a visible ancestor is found.
+    TSSymbol entry_symbol = subtree_symbol(
+      *entry->subtree,
+      entry->structural_child_index
+    );
+    TSSymbolMetadata entry_metadata = ts_language_symbol_metadata(
+      self->tree->language,
+      entry_symbol
+    );
+    if (i != self->stack.size - 1 && entry_metadata.visible) break;
+
+    // Record any supertypes
+    if (entry_metadata.supertype && *supertype_count < max_supertypes) {
+      supertypes[*supertype_count] = entry_symbol;
+      (*supertype_count)++;
+    }
+
+    // Determine if the current node has later siblings.
+    if (!*has_later_siblings) {
+      unsigned sibling_count = parent_entry->subtree->ptr->child_count;
+      unsigned structural_child_index = entry->structural_child_index;
+      if (!ts_subtree_extra(*entry->subtree)) structural_child_index++;
+      for (unsigned j = entry->child_index + 1; j < sibling_count; j++) {
+        Subtree sibling = ts_subtree_children(*parent_entry->subtree)[j];
+        TSSymbolMetadata sibling_metadata = ts_language_symbol_metadata(
+          self->tree->language,
+          subtree_symbol(sibling, structural_child_index)
+        );
+        if (sibling_metadata.visible) {
+          *has_later_siblings = true;
+          if (*has_later_named_siblings) break;
+          if (sibling_metadata.named) {
+            *has_later_named_siblings = true;
+            break;
+          }
+        } else if (ts_subtree_visible_child_count(sibling) > 0) {
+          *has_later_siblings = true;
+          if (*has_later_named_siblings) break;
+          if (sibling.ptr->named_child_count > 0) {
+            *has_later_named_siblings = true;
+            break;
+          }
+        }
+        if (!ts_subtree_extra(sibling)) structural_child_index++;
+      }
+    }
+
+    #undef subtree_symbol
+
+    if (!ts_subtree_extra(*entry->subtree)) {
+      const TSFieldMapEntry *field_map, *field_map_end;
+      ts_language_field_map(
+        self->tree->language,
+        parent_entry->subtree->ptr->production_id,
+        &field_map, &field_map_end
+      );
+
+      // Look for a field name associated with the current node.
+      if (!*field_id) {
+        for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) {
+          if (!i->inherited && i->child_index == entry->structural_child_index) {
+            *field_id = i->field_id;
+            break;
+          }
+        }
+      }
+
+      // Determine if the current node can have later siblings with the same field name.
+      if (*field_id) {
+        for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) {
+          if (i->field_id == *field_id) {
+            if (
+              i->child_index > entry->structural_child_index ||
+              (i->child_index == entry->structural_child_index && *has_later_named_siblings)
+            ) {
+              *can_have_later_siblings_with_this_field = true;
+              break;
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+TSNode ts_tree_cursor_parent_node(const TSTreeCursor *_self) {
+  const TreeCursor *self = (const TreeCursor *)_self;
+  for (int i = (int)self->stack.size - 2; i >= 0; i--) {
+    TreeCursorEntry *entry = &self->stack.contents[i];
+    bool is_visible = true;
+    TSSymbol alias_symbol = 0;
+    if (i > 0) {
+      TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
+      alias_symbol = ts_language_alias_at(
+        self->tree->language,
+        parent_entry->subtree->ptr->production_id,
+        entry->structural_child_index
+      );
+      is_visible = (alias_symbol != 0) || ts_subtree_visible(*entry->subtree);
+    }
+    if (is_visible) {
+      return ts_node_new(
+        self->tree,
+        entry->subtree,
+        entry->position,
+        alias_symbol
+      );
+    }
+  }
+  return ts_node_new(NULL, NULL, length_zero(), 0);
+}
+
 TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) {
   const TreeCursor *self = (const TreeCursor *)_self;
 
@@ -255,29 +406,28 @@ TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) {
     // Stop walking up when another visible node is found.
     if (i != self->stack.size - 1) {
       if (ts_subtree_visible(*entry->subtree)) break;
-      const TSSymbol *alias_sequence = ts_language_alias_sequence(
-        self->tree->language,
-        parent_entry->subtree->ptr->production_id
-      );
-      if (alias_sequence && alias_sequence[entry->structural_child_index]) {
-        break;
-      }
+      if (
+        !ts_subtree_extra(*entry->subtree) &&
+        ts_language_alias_at(
+          self->tree->language,
+          parent_entry->subtree->ptr->production_id,
+          entry->structural_child_index
+        )
+      ) break;
     }
 
+    if (ts_subtree_extra(*entry->subtree)) break;
+
     const TSFieldMapEntry *field_map, *field_map_end;
     ts_language_field_map(
       self->tree->language,
       parent_entry->subtree->ptr->production_id,
       &field_map, &field_map_end
     );
-
-    while (field_map < field_map_end) {
-      if (
-        !ts_subtree_extra(*entry->subtree) &&
-        !field_map->inherited &&
-        field_map->child_index == entry->structural_child_index
-      ) return field_map->field_id;
-      field_map++;
+    for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) {
+      if (!i->inherited && i->child_index == entry->structural_child_index) {
+        return i->field_id;
+      }
     }
   }
   return 0;
diff --git a/lib/src/tree_cursor.h b/lib/src/tree_cursor.h
index 55bdad86..69647d1d 100644
--- a/lib/src/tree_cursor.h
+++ b/lib/src/tree_cursor.h
@@ -16,5 +16,16 @@ typedef struct {
 } TreeCursor;
 
 void ts_tree_cursor_init(TreeCursor *, TSNode);
+void ts_tree_cursor_current_status(
+  const TSTreeCursor *,
+  TSFieldId *,
+  bool *,
+  bool *,
+  bool *,
+  TSSymbol *,
+  unsigned *
+);
+
+TSNode ts_tree_cursor_parent_node(const TSTreeCursor *);
 
 #endif  // TREE_SITTER_TREE_CURSOR_H_
diff --git a/lib/src/unicode.h b/lib/src/unicode.h
new file mode 100644
index 00000000..0fba56a6
--- /dev/null
+++ b/lib/src/unicode.h
@@ -0,0 +1,50 @@
+#ifndef TREE_SITTER_UNICODE_H_
+#define TREE_SITTER_UNICODE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <limits.h>
+#include <stdint.h>
+
+#define U_EXPORT
+#define U_EXPORT2
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
+
+static const int32_t TS_DECODE_ERROR = U_SENTINEL;
+
+// These functions read one unicode code point from the given string,
+// returning the number of bytes consumed.
+typedef uint32_t (*UnicodeDecodeFunction)(
+  const uint8_t *string,
+  uint32_t length,
+  int32_t *code_point
+);
+
+static inline uint32_t ts_decode_utf8(
+  const uint8_t *string,
+  uint32_t length,
+  int32_t *code_point
+) {
+  uint32_t i = 0;
+  U8_NEXT(string, i, length, *code_point);
+  return i;
+}
+
+static inline uint32_t ts_decode_utf16(
+  const uint8_t *string,
+  uint32_t length,
+  int32_t *code_point
+) {
+  uint32_t i = 0;
+  U16_NEXT(((uint16_t *)string), i, length, *code_point);
+  return i * 2;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_UNICODE_H_
diff --git a/lib/src/unicode/ICU_SHA b/lib/src/unicode/ICU_SHA
new file mode 100644
index 00000000..3622283b
--- /dev/null
+++ b/lib/src/unicode/ICU_SHA
@@ -0,0 +1 @@
+552b01f61127d30d6589aa4bf99468224979b661
diff --git a/lib/src/unicode/LICENSE b/lib/src/unicode/LICENSE
new file mode 100644
index 00000000..2e01e368
--- /dev/null
+++ b/lib/src/unicode/LICENSE
@@ -0,0 +1,414 @@
+COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later)
+
+Copyright © 1991-2019 Unicode, Inc. All rights reserved.
+Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of the Unicode data files and any associated documentation
+(the "Data Files") or Unicode software and any associated documentation
+(the "Software") to deal in the Data Files or Software
+without restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, and/or sell copies of
+the Data Files or Software, and to permit persons to whom the Data Files
+or Software are furnished to do so, provided that either
+(a) this copyright and permission notice appear with all copies
+of the Data Files or Software, or
+(b) this copyright and permission notice appear in associated
+Documentation.
+
+THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
+NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
+DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THE DATA FILES OR SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder
+shall not be used in advertising or otherwise to promote the sale,
+use or other dealings in these Data Files or Software without prior
+written authorization of the copyright holder.
+
+---------------------
+
+Third-Party Software Licenses
+
+This section contains third-party software notices and/or additional
+terms for licensed third-party software components included within ICU
+libraries.
+
+1. ICU License - ICU 1.8.1 to ICU 57.1
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright (c) 1995-2016 International Business Machines Corporation and others
+All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, and/or sell copies of the Software, and to permit persons
+to whom the Software is furnished to do so, provided that the above
+copyright notice(s) and this permission notice appear in all copies of
+the Software and that both the above copyright notice(s) and this
+permission notice appear in supporting documentation.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
+OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY
+SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER
+RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
+CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder
+shall not be used in advertising or otherwise to promote the sale, use
+or other dealings in this Software without prior written authorization
+of the copyright holder.
+
+All trademarks and registered trademarks mentioned herein are the
+property of their respective owners.
+
+2. Chinese/Japanese Word Break Dictionary Data (cjdict.txt)
+
+ #     The Google Chrome software developed by Google is licensed under
+ # the BSD license. Other software included in this distribution is
+ # provided under other licenses, as set forth below.
+ #
+ #  The BSD License
+ #  http://opensource.org/licenses/bsd-license.php
+ #  Copyright (C) 2006-2008, Google Inc.
+ #
+ #  All rights reserved.
+ #
+ #  Redistribution and use in source and binary forms, with or without
+ # modification, are permitted provided that the following conditions are met:
+ #
+ #  Redistributions of source code must retain the above copyright notice,
+ # this list of conditions and the following disclaimer.
+ #  Redistributions in binary form must reproduce the above
+ # copyright notice, this list of conditions and the following
+ # disclaimer in the documentation and/or other materials provided with
+ # the distribution.
+ #  Neither the name of  Google Inc. nor the names of its
+ # contributors may be used to endorse or promote products derived from
+ # this software without specific prior written permission.
+ #
+ #
+ #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ #
+ #
+ #  The word list in cjdict.txt are generated by combining three word lists
+ # listed below with further processing for compound word breaking. The
+ # frequency is generated with an iterative training against Google web
+ # corpora.
+ #
+ #  * Libtabe (Chinese)
+ #    - https://sourceforge.net/project/?group_id=1519
+ #    - Its license terms and conditions are shown below.
+ #
+ #  * IPADIC (Japanese)
+ #    - http://chasen.aist-nara.ac.jp/chasen/distribution.html
+ #    - Its license terms and conditions are shown below.
+ #
+ #  ---------COPYING.libtabe ---- BEGIN--------------------
+ #
+ #  /*
+ #   * Copyright (c) 1999 TaBE Project.
+ #   * Copyright (c) 1999 Pai-Hsiang Hsiao.
+ #   * All rights reserved.
+ #   *
+ #   * Redistribution and use in source and binary forms, with or without
+ #   * modification, are permitted provided that the following conditions
+ #   * are met:
+ #   *
+ #   * . Redistributions of source code must retain the above copyright
+ #   *   notice, this list of conditions and the following disclaimer.
+ #   * . Redistributions in binary form must reproduce the above copyright
+ #   *   notice, this list of conditions and the following disclaimer in
+ #   *   the documentation and/or other materials provided with the
+ #   *   distribution.
+ #   * . Neither the name of the TaBE Project nor the names of its
+ #   *   contributors may be used to endorse or promote products derived
+ #   *   from this software without specific prior written permission.
+ #   *
+ #   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ #   * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ #   * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ #   * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ #   * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ #   * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ #   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ #   * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ #   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ #   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ #   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ #   * OF THE POSSIBILITY OF SUCH DAMAGE.
+ #   */
+ #
+ #  /*
+ #   * Copyright (c) 1999 Computer Systems and Communication Lab,
+ #   *                    Institute of Information Science, Academia
+ #       *                    Sinica. All rights reserved.
+ #   *
+ #   * Redistribution and use in source and binary forms, with or without
+ #   * modification, are permitted provided that the following conditions
+ #   * are met:
+ #   *
+ #   * . Redistributions of source code must retain the above copyright
+ #   *   notice, this list of conditions and the following disclaimer.
+ #   * . Redistributions in binary form must reproduce the above copyright
+ #   *   notice, this list of conditions and the following disclaimer in
+ #   *   the documentation and/or other materials provided with the
+ #   *   distribution.
+ #   * . Neither the name of the Computer Systems and Communication Lab
+ #   *   nor the names of its contributors may be used to endorse or
+ #   *   promote products derived from this software without specific
+ #   *   prior written permission.
+ #   *
+ #   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ #   * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ #   * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ #   * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ #   * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ #   * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ #   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ #   * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ #   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ #   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ #   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ #   * OF THE POSSIBILITY OF SUCH DAMAGE.
+ #   */
+ #
+ #  Copyright 1996 Chih-Hao Tsai @ Beckman Institute,
+ #      University of Illinois
+ #  c-tsai4@uiuc.edu  http://casper.beckman.uiuc.edu/~c-tsai4
+ #
+ #  ---------------COPYING.libtabe-----END--------------------------------
+ #
+ #
+ #  ---------------COPYING.ipadic-----BEGIN-------------------------------
+ #
+ #  Copyright 2000, 2001, 2002, 2003 Nara Institute of Science
+ #  and Technology.  All Rights Reserved.
+ #
+ #  Use, reproduction, and distribution of this software is permitted.
+ #  Any copy of this software, whether in its original form or modified,
+ #  must include both the above copyright notice and the following
+ #  paragraphs.
+ #
+ #  Nara Institute of Science and Technology (NAIST),
+ #  the copyright holders, disclaims all warranties with regard to this
+ #  software, including all implied warranties of merchantability and
+ #  fitness, in no event shall NAIST be liable for
+ #  any special, indirect or consequential damages or any damages
+ #  whatsoever resulting from loss of use, data or profits, whether in an
+ #  action of contract, negligence or other tortuous action, arising out
+ #  of or in connection with the use or performance of this software.
+ #
+ #  A large portion of the dictionary entries
+ #  originate from ICOT Free Software.  The following conditions for ICOT
+ #  Free Software applies to the current dictionary as well.
+ #
+ #  Each User may also freely distribute the Program, whether in its
+ #  original form or modified, to any third party or parties, PROVIDED
+ #  that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear
+ #  on, or be attached to, the Program, which is distributed substantially
+ #  in the same form as set out herein and that such intended
+ #  distribution, if actually made, will neither violate or otherwise
+ #  contravene any of the laws and regulations of the countries having
+ #  jurisdiction over the User or the intended distribution itself.
+ #
+ #  NO WARRANTY
+ #
+ #  The program was produced on an experimental basis in the course of the
+ #  research and development conducted during the project and is provided
+ #  to users as so produced on an experimental basis.  Accordingly, the
+ #  program is provided without any warranty whatsoever, whether express,
+ #  implied, statutory or otherwise.  The term "warranty" used herein
+ #  includes, but is not limited to, any warranty of the quality,
+ #  performance, merchantability and fitness for a particular purpose of
+ #  the program and the nonexistence of any infringement or violation of
+ #  any right of any third party.
+ #
+ #  Each user of the program will agree and understand, and be deemed to
+ #  have agreed and understood, that there is no warranty whatsoever for
+ #  the program and, accordingly, the entire risk arising from or
+ #  otherwise connected with the program is assumed by the user.
+ #
+ #  Therefore, neither ICOT, the copyright holder, or any other
+ #  organization that participated in or was otherwise related to the
+ #  development of the program and their respective officials, directors,
+ #  officers and other employees shall be held liable for any and all
+ #  damages, including, without limitation, general, special, incidental
+ #  and consequential damages, arising out of or otherwise in connection
+ #  with the use or inability to use the program or any product, material
+ #  or result produced or otherwise obtained by using the program,
+ #  regardless of whether they have been advised of, or otherwise had
+ #  knowledge of, the possibility of such damages at any time during the
+ #  project or thereafter.  Each user will be deemed to have agreed to the
+ #  foregoing by his or her commencement of use of the program.  The term
+ #  "use" as used herein includes, but is not limited to, the use,
+ #  modification, copying and distribution of the program and the
+ #  production of secondary products from the program.
+ #
+ #  In the case where the program, whether in its original form or
+ #  modified, was distributed or delivered to or received by a user from
+ #  any person, organization or entity other than ICOT, unless it makes or
+ #  grants independently of ICOT any specific warranty to the user in
+ #  writing, such person, organization or entity, will also be exempted
+ #  from and not be held liable to the user for any such damages as noted
+ #  above as far as the program is concerned.
+ #
+ #  ---------------COPYING.ipadic-----END----------------------------------
+
+3. Lao Word Break Dictionary Data (laodict.txt)
+
+ #  Copyright (c) 2013 International Business Machines Corporation
+ #  and others. All Rights Reserved.
+ #
+ # Project: http://code.google.com/p/lao-dictionary/
+ # Dictionary: http://lao-dictionary.googlecode.com/git/Lao-Dictionary.txt
+ # License: http://lao-dictionary.googlecode.com/git/Lao-Dictionary-LICENSE.txt
+ #              (copied below)
+ #
+ #  This file is derived from the above dictionary, with slight
+ #  modifications.
+ #  ----------------------------------------------------------------------
+ #  Copyright (C) 2013 Brian Eugene Wilson, Robert Martin Campbell.
+ #  All rights reserved.
+ #
+ #  Redistribution and use in source and binary forms, with or without
+ #  modification,
+ #  are permitted provided that the following conditions are met:
+ #
+ #
+ # Redistributions of source code must retain the above copyright notice, this
+ #  list of conditions and the following disclaimer. Redistributions in
+ #  binary form must reproduce the above copyright notice, this list of
+ #  conditions and the following disclaimer in the documentation and/or
+ #  other materials provided with the distribution.
+ #
+ #
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ # OF THE POSSIBILITY OF SUCH DAMAGE.
+ #  --------------------------------------------------------------------------
+
+4. Burmese Word Break Dictionary Data (burmesedict.txt)
+
+ #  Copyright (c) 2014 International Business Machines Corporation
+ #  and others. All Rights Reserved.
+ #
+ #  This list is part of a project hosted at:
+ #    github.com/kanyawtech/myanmar-karen-word-lists
+ #
+ #  --------------------------------------------------------------------------
+ #  Copyright (c) 2013, LeRoy Benjamin Sharon
+ #  All rights reserved.
+ #
+ #  Redistribution and use in source and binary forms, with or without
+ #  modification, are permitted provided that the following conditions
+ #  are met: Redistributions of source code must retain the above
+ #  copyright notice, this list of conditions and the following
+ #  disclaimer.  Redistributions in binary form must reproduce the
+ #  above copyright notice, this list of conditions and the following
+ #  disclaimer in the documentation and/or other materials provided
+ #  with the distribution.
+ #
+ #    Neither the name Myanmar Karen Word Lists, nor the names of its
+ #    contributors may be used to endorse or promote products derived
+ #    from this software without specific prior written permission.
+ #
+ #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ #  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ #  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ #  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ #  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+ #  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ #  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ #  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ #  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ #  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+ #  TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ #  THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ #  SUCH DAMAGE.
+ #  --------------------------------------------------------------------------
+
+5. Time Zone Database
+
+  ICU uses the public domain data and code derived from Time Zone
+Database for its time zone support. The ownership of the TZ database
+is explained in BCP 175: Procedure for Maintaining the Time Zone
+Database section 7.
+
+ # 7.  Database Ownership
+ #
+ #    The TZ database itself is not an IETF Contribution or an IETF
+ #    document.  Rather it is a pre-existing and regularly updated work
+ #    that is in the public domain, and is intended to remain in the
+ #    public domain.  Therefore, BCPs 78 [RFC5378] and 79 [RFC3979] do
+ #    not apply to the TZ Database or contributions that individuals make
+ #    to it.  Should any claims be made and substantiated against the TZ
+ #    Database, the organization that is providing the IANA
+ #    Considerations defined in this RFC, under the memorandum of
+ #    understanding with the IETF, currently ICANN, may act in accordance
+ #    with all competent court orders.  No ownership claims will be made
+ #    by ICANN or the IETF Trust on the database or the code.  Any person
+ #    making a contribution to the database or code waives all rights to
+ #    future claims in that contribution or in the TZ Database.
+
+6. Google double-conversion
+
+Copyright 2006-2011, the V8 project authors. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+    * Neither the name of Google Inc. nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/lib/src/unicode/README.md b/lib/src/unicode/README.md
new file mode 100644
index 00000000..623b8e38
--- /dev/null
+++ b/lib/src/unicode/README.md
@@ -0,0 +1,29 @@
+# ICU Parts
+
+This directory contains a small subset of files from the Unicode organization's [ICU repository](https://github.com/unicode-org/icu).
+
+### License
+
+The license for these files is contained in the `LICENSE` file within this directory.
+
+### Contents
+
+* Source files taken from the [`icu4c/source/common/unicode`](https://github.com/unicode-org/icu/tree/552b01f61127d30d6589aa4bf99468224979b661/icu4c/source/common/unicode) directory:
+  * `utf8.h`
+  * `utf16.h`
+  * `umachine.h`
+* Empty source files that are referenced by the above source files, but whose original contents in `libicu` are not needed:
+  * `ptypes.h`
+  * `urename.h`
+  * `utf.h`
+* `ICU_SHA` - File containing the Git SHA of the commit in the `icu` repository from which the files were obtained.
+* `LICENSE` - The license file from the [`icu4c`](https://github.com/unicode-org/icu/tree/552b01f61127d30d6589aa4bf99468224979b661/icu4c) directory of the `icu` repository.
+* `README.md` - This text file.
+
+### Updating ICU
+
+To incorporate changes from the upstream `icu` repository:
+
+* Update `ICU_SHA` with the new Git SHA.
+* Update `LICENSE` with the license text from the directory mentioned above.
+* Update `utf8.h`, `utf16.h`, and `umachine.h` with their new contents in the `icu` repository.
diff --git a/lib/src/unicode/ptypes.h b/lib/src/unicode/ptypes.h
new file mode 100644
index 00000000..ac79ad0f
--- /dev/null
+++ b/lib/src/unicode/ptypes.h
@@ -0,0 +1 @@
+// This file must exist in order for `utf8.h` and `utf16.h` to be used.
diff --git a/lib/src/unicode/umachine.h b/lib/src/unicode/umachine.h
new file mode 100644
index 00000000..9195824d
--- /dev/null
+++ b/lib/src/unicode/umachine.h
@@ -0,0 +1,448 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+*   Copyright (C) 1999-2015, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*   file name:  umachine.h
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 1999sep13
+*   created by: Markus W. Scherer
+*
+*   This file defines basic types and constants for ICU to be
+*   platform-independent. umachine.h and utf.h are included into
+*   utypes.h to provide all the general definitions for ICU.
+*   All of these definitions used to be in utypes.h before
+*   the UTF-handling macros made this unmaintainable.
+*/
+
+#ifndef __UMACHINE_H__
+#define __UMACHINE_H__
+
+
+/**
+ * \file
+ * \brief Basic types and constants for UTF
+ *
+ * <h2> Basic types and constants for UTF </h2>
+ *   This file defines basic types and constants for utf.h to be
+ *   platform-independent. umachine.h and utf.h are included into
+ *   utypes.h to provide all the general definitions for ICU.
+ *   All of these definitions used to be in utypes.h before
+ *   the UTF-handling macros made this unmaintainable.
+ *
+ */
+/*==========================================================================*/
+/* Include platform-dependent definitions                                   */
+/* which are contained in the platform-specific file platform.h             */
+/*==========================================================================*/
+
+#include "unicode/ptypes.h" /* platform.h is included in ptypes.h */
+
+/*
+ * ANSI C headers:
+ * stddef.h defines wchar_t
+ */
+#include <stddef.h>
+
+/*==========================================================================*/
+/* For C wrappers, we use the symbol U_STABLE.                                */
+/* This works properly if the includer is C or C++.                         */
+/* Functions are declared   U_STABLE return-type U_EXPORT2 function-name()... */
+/*==========================================================================*/
+
+/**
+ * \def U_CFUNC
+ * This is used in a declaration of a library private ICU C function.
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_CDECL_BEGIN
+ * This is used to begin a declaration of a library private ICU C API.
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_CDECL_END
+ * This is used to end a declaration of a library private ICU C API
+ * @stable ICU 2.4
+ */
+
+#ifdef __cplusplus
+#   define U_CFUNC extern "C"
+#   define U_CDECL_BEGIN extern "C" {
+#   define U_CDECL_END   }
+#else
+#   define U_CFUNC extern
+#   define U_CDECL_BEGIN
+#   define U_CDECL_END
+#endif
+
+#ifndef U_ATTRIBUTE_DEPRECATED
+/**
+ * \def U_ATTRIBUTE_DEPRECATED
+ *  This is used for GCC specific attributes
+ * @internal
+ */
+#if U_GCC_MAJOR_MINOR >= 302
+#    define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated))
+/**
+ * \def U_ATTRIBUTE_DEPRECATED
+ * This is used for Visual C++ specific attributes
+ * @internal
+ */
+#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
+#    define U_ATTRIBUTE_DEPRECATED __declspec(deprecated)
+#else
+#    define U_ATTRIBUTE_DEPRECATED
+#endif
+#endif
+
+/** This is used to declare a function as a public ICU C API @stable ICU 2.0*/
+#define U_CAPI U_CFUNC U_EXPORT
+/** This is used to declare a function as a stable public ICU C API*/
+#define U_STABLE U_CAPI
+/** This is used to declare a function as a draft public ICU C API  */
+#define U_DRAFT  U_CAPI
+/** This is used to declare a function as a deprecated public ICU C API  */
+#define U_DEPRECATED U_CAPI U_ATTRIBUTE_DEPRECATED
+/** This is used to declare a function as an obsolete public ICU C API  */
+#define U_OBSOLETE U_CAPI
+/** This is used to declare a function as an internal ICU C API  */
+#define U_INTERNAL U_CAPI
+
+/**
+ * \def U_OVERRIDE
+ * Defined to the C++11 "override" keyword if available.
+ * Denotes a class or member which is an override of the base class.
+ * May result in an error if it applied to something not an override.
+ * @internal
+ */
+#ifndef U_OVERRIDE
+#define U_OVERRIDE override
+#endif
+
+/**
+ * \def U_FINAL
+ * Defined to the C++11 "final" keyword if available.
+ * Denotes a class or member which may not be overridden in subclasses.
+ * May result in an error if subclasses attempt to override.
+ * @internal
+ */
+#if !defined(U_FINAL) || defined(U_IN_DOXYGEN)
+#define U_FINAL final
+#endif
+
+// Before ICU 65, function-like, multi-statement ICU macros were just defined as
+// series of statements wrapped in { } blocks and the caller could choose to
+// either treat them as if they were actual functions and end the invocation
+// with a trailing ; creating an empty statement after the block or else omit
+// this trailing ; using the knowledge that the macro would expand to { }.
+//
+// But doing so doesn't work well with macros that look like functions and
+// compiler warnings about empty statements (ICU-20601) and ICU 65 therefore
+// switches to the standard solution of wrapping such macros in do { } while.
+//
+// This will however break existing code that depends on being able to invoke
+// these macros without a trailing ; so to be able to remain compatible with
+// such code the wrapper is itself defined as macros so that it's possible to
+// build ICU 65 and later with the old macro behaviour, like this:
+//
+// CPPFLAGS='-DUPRV_BLOCK_MACRO_BEGIN="" -DUPRV_BLOCK_MACRO_END=""'
+// runConfigureICU ...
+
+/**
+ * \def UPRV_BLOCK_MACRO_BEGIN
+ * Defined as the "do" keyword by default.
+ * @internal
+ */
+#ifndef UPRV_BLOCK_MACRO_BEGIN
+#define UPRV_BLOCK_MACRO_BEGIN do
+#endif
+
+/**
+ * \def UPRV_BLOCK_MACRO_END
+ * Defined as "while (FALSE)" by default.
+ * @internal
+ */
+#ifndef UPRV_BLOCK_MACRO_END
+#define UPRV_BLOCK_MACRO_END while (FALSE)
+#endif
+
+/*==========================================================================*/
+/* limits for int32_t etc., like in POSIX inttypes.h                        */
+/*==========================================================================*/
+
+#ifndef INT8_MIN
+/** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */
+#   define INT8_MIN        ((int8_t)(-128))
+#endif
+#ifndef INT16_MIN
+/** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */
+#   define INT16_MIN       ((int16_t)(-32767-1))
+#endif
+#ifndef INT32_MIN
+/** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */
+#   define INT32_MIN       ((int32_t)(-2147483647-1))
+#endif
+
+#ifndef INT8_MAX
+/** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */
+#   define INT8_MAX        ((int8_t)(127))
+#endif
+#ifndef INT16_MAX
+/** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */
+#   define INT16_MAX       ((int16_t)(32767))
+#endif
+#ifndef INT32_MAX
+/** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */
+#   define INT32_MAX       ((int32_t)(2147483647))
+#endif
+
+#ifndef UINT8_MAX
+/** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */
+#   define UINT8_MAX       ((uint8_t)(255U))
+#endif
+#ifndef UINT16_MAX
+/** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */
+#   define UINT16_MAX      ((uint16_t)(65535U))
+#endif
+#ifndef UINT32_MAX
+/** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */
+#   define UINT32_MAX      ((uint32_t)(4294967295U))
+#endif
+
+#if defined(U_INT64_T_UNAVAILABLE)
+# error int64_t is required for decimal format and rule-based number format.
+#else
+# ifndef INT64_C
+/**
+ * Provides a platform independent way to specify a signed 64-bit integer constant.
+ * note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C
+ * @stable ICU 2.8
+ */
+#   define INT64_C(c) c ## LL
+# endif
+# ifndef UINT64_C
+/**
+ * Provides a platform independent way to specify an unsigned 64-bit integer constant.
+ * note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C
+ * @stable ICU 2.8
+ */
+#   define UINT64_C(c) c ## ULL
+# endif
+# ifndef U_INT64_MIN
+/** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */
+#     define U_INT64_MIN       ((int64_t)(INT64_C(-9223372036854775807)-1))
+# endif
+# ifndef U_INT64_MAX
+/** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */
+#     define U_INT64_MAX       ((int64_t)(INT64_C(9223372036854775807)))
+# endif
+# ifndef U_UINT64_MAX
+/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */
+#     define U_UINT64_MAX      ((uint64_t)(UINT64_C(18446744073709551615)))
+# endif
+#endif
+
+/*==========================================================================*/
+/* Boolean data type                                                        */
+/*==========================================================================*/
+
+/** The ICU boolean type @stable ICU 2.0 */
+typedef int8_t UBool;
+
+#ifndef TRUE
+/** The TRUE value of a UBool @stable ICU 2.0 */
+#   define TRUE  1
+#endif
+#ifndef FALSE
+/** The FALSE value of a UBool @stable ICU 2.0 */
+#   define FALSE 0
+#endif
+
+
+/*==========================================================================*/
+/* Unicode data types                                                       */
+/*==========================================================================*/
+
+/* wchar_t-related definitions -------------------------------------------- */
+
+/*
+ * \def U_WCHAR_IS_UTF16
+ * Defined if wchar_t uses UTF-16.
+ *
+ * @stable ICU 2.0
+ */
+/*
+ * \def U_WCHAR_IS_UTF32
+ * Defined if wchar_t uses UTF-32.
+ *
+ * @stable ICU 2.0
+ */
+#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
+#   ifdef __STDC_ISO_10646__
+#       if (U_SIZEOF_WCHAR_T==2)
+#           define U_WCHAR_IS_UTF16
+#       elif (U_SIZEOF_WCHAR_T==4)
+#           define  U_WCHAR_IS_UTF32
+#       endif
+#   elif defined __UCS2__
+#       if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2)
+#           define U_WCHAR_IS_UTF16
+#       endif
+#   elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__))
+#       if (U_SIZEOF_WCHAR_T==4)
+#           define U_WCHAR_IS_UTF32
+#       endif
+#   elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED)
+#       define U_WCHAR_IS_UTF32
+#   elif U_PLATFORM_HAS_WIN32_API
+#       define U_WCHAR_IS_UTF16
+#   endif
+#endif
+
+/* UChar and UChar32 definitions -------------------------------------------- */
+
+/** Number of bytes in a UChar. @stable ICU 2.0 */
+#define U_SIZEOF_UCHAR 2
+
+/**
+ * \def U_CHAR16_IS_TYPEDEF
+ * If 1, then char16_t is a typedef and not a real type (yet)
+ * @internal
+ */
+#if (U_PLATFORM == U_PF_AIX) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11)
+// for AIX, uchar.h needs to be included
+# include <uchar.h>
+# define U_CHAR16_IS_TYPEDEF 1
+#elif defined(_MSC_VER) && (_MSC_VER < 1900)
+// Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type,
+// and instead use a typedef.  https://msdn.microsoft.com/library/bb531344.aspx
+# define U_CHAR16_IS_TYPEDEF 1
+#else
+# define U_CHAR16_IS_TYPEDEF 0
+#endif
+
+
+/**
+ * \var UChar
+ *
+ * The base type for UTF-16 code units and pointers.
+ * Unsigned 16-bit integer.
+ * Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar.
+ *
+ * UChar is configurable by defining the macro UCHAR_TYPE
+ * on the preprocessor or compiler command line:
+ * -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc.
+ * (The UCHAR_TYPE can also be \#defined earlier in this file, for outside the ICU library code.)
+ * This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16.
+ *
+ * The default is UChar=char16_t.
+ *
+ * C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type.
+ *
+ * In C, char16_t is a simple typedef of uint_least16_t.
+ * ICU requires uint_least16_t=uint16_t for data memory mapping.
+ * On macOS, char16_t is not available because the uchar.h standard header is missing.
+ *
+ * @stable ICU 4.4
+ */
+
+#if 1
+    // #if 1 is normal. UChar defaults to char16_t in C++.
+    // For configuration testing of UChar=uint16_t temporarily change this to #if 0.
+    // The intltest Makefile #defines UCHAR_TYPE=char16_t,
+    // so we only #define it to uint16_t if it is undefined so far.
+#elif !defined(UCHAR_TYPE)
+#   define UCHAR_TYPE uint16_t
+#endif
+
+#if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
+        defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
+    // Inside the ICU library code, never configurable.
+    typedef char16_t UChar;
+#elif defined(UCHAR_TYPE)
+    typedef UCHAR_TYPE UChar;
+#elif defined(__cplusplus)
+    typedef char16_t UChar;
+#else
+    typedef uint16_t UChar;
+#endif
+
+/**
+ * \var OldUChar
+ * Default ICU 58 definition of UChar.
+ * A base type for UTF-16 code units and pointers.
+ * Unsigned 16-bit integer.
+ *
+ * Define OldUChar to be wchar_t if that is 16 bits wide.
+ * If wchar_t is not 16 bits wide, then define UChar to be uint16_t.
+ *
+ * This makes the definition of OldUChar platform-dependent
+ * but allows direct string type compatibility with platforms with
+ * 16-bit wchar_t types.
+ *
+ * This is how UChar was defined in ICU 58, for transition convenience.
+ * Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined.
+ * The current UChar responds to UCHAR_TYPE but OldUChar does not.
+ *
+ * @stable ICU 59
+ */
+#if U_SIZEOF_WCHAR_T==2
+    typedef wchar_t OldUChar;
+#elif defined(__CHAR16_TYPE__)
+    typedef __CHAR16_TYPE__ OldUChar;
+#else
+    typedef uint16_t OldUChar;
+#endif
+
+/**
+ * Define UChar32 as a type for single Unicode code points.
+ * UChar32 is a signed 32-bit integer (same as int32_t).
+ *
+ * The Unicode code point range is 0..0x10ffff.
+ * All other values (negative or >=0x110000) are illegal as Unicode code points.
+ * They may be used as sentinel values to indicate "done", "error"
+ * or similar non-code point conditions.
+ *
+ * Before ICU 2.4 (Jitterbug 2146), UChar32 was defined
+ * to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned)
+ * or else to be uint32_t.
+ * That is, the definition of UChar32 was platform-dependent.
+ *
+ * @see U_SENTINEL
+ * @stable ICU 2.4
+ */
+typedef int32_t UChar32;
+
+/**
+ * This value is intended for sentinel values for APIs that
+ * (take or) return single code points (UChar32).
+ * It is outside of the Unicode code point range 0..0x10ffff.
+ *
+ * For example, a "done" or "error" value in a new API
+ * could be indicated with U_SENTINEL.
+ *
+ * ICU APIs designed before ICU 2.4 usually define service-specific "done"
+ * values, mostly 0xffff.
+ * Those may need to be distinguished from
+ * actual U+ffff text contents by calling functions like
+ * CharacterIterator::hasNext() or UnicodeString::length().
+ *
+ * @return -1
+ * @see UChar32
+ * @stable ICU 2.4
+ */
+#define U_SENTINEL (-1)
+
+#include "unicode/urename.h"
+
+#endif
diff --git a/lib/src/unicode/urename.h b/lib/src/unicode/urename.h
new file mode 100644
index 00000000..ac79ad0f
--- /dev/null
+++ b/lib/src/unicode/urename.h
@@ -0,0 +1 @@
+// This file must exist in order for `utf8.h` and `utf16.h` to be used.
diff --git a/lib/src/unicode/utf.h b/lib/src/unicode/utf.h
new file mode 100644
index 00000000..ac79ad0f
--- /dev/null
+++ b/lib/src/unicode/utf.h
@@ -0,0 +1 @@
+// This file must exist in order for `utf8.h` and `utf16.h` to be used.
diff --git a/lib/src/unicode/utf16.h b/lib/src/unicode/utf16.h
new file mode 100644
index 00000000..9fd7d5c8
--- /dev/null
+++ b/lib/src/unicode/utf16.h
@@ -0,0 +1,733 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1999-2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  utf16.h
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 1999sep09
+*   created by: Markus W. Scherer
+*/
+
+/**
+ * \file
+ * \brief C API: 16-bit Unicode handling macros
+ * 
+ * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
+ *
+ * For more information see utf.h and the ICU User Guide Strings chapter
+ * (http://userguide.icu-project.org/strings).
+ *
+ * <em>Usage:</em>
+ * ICU coding guidelines for if() statements should be followed when using these macros.
+ * Compound statements (curly braces {}) must be used  for if-else-while... 
+ * bodies and all macro statements should be terminated with semicolon.
+ */
+
+#ifndef __UTF16_H__
+#define __UTF16_H__
+
+#include "unicode/umachine.h"
+#ifndef __UTF_H__
+#   include "unicode/utf.h"
+#endif
+
+/* single-code point definitions -------------------------------------------- */
+
+/**
+ * Does this code unit alone encode a code point (BMP, not a surrogate)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
+
+/**
+ * Is this code unit a lead surrogate (U+d800..U+dbff)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
+
+/**
+ * Is this code unit a trail surrogate (U+dc00..U+dfff)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
+
+/**
+ * Is this code unit a surrogate (U+d800..U+dfff)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
+
+/**
+ * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
+ * is it a lead surrogate?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
+
+/**
+ * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
+ * is it a trail surrogate?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 4.2
+ */
+#define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
+
+/**
+ * Helper constant for U16_GET_SUPPLEMENTARY.
+ * @internal
+ */
+#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
+
+/**
+ * Get a supplementary code point value (U+10000..U+10ffff)
+ * from its lead and trail surrogates.
+ * The result is undefined if the input values are not
+ * lead and trail surrogates.
+ *
+ * @param lead lead surrogate (U+d800..U+dbff)
+ * @param trail trail surrogate (U+dc00..U+dfff)
+ * @return supplementary code point (U+10000..U+10ffff)
+ * @stable ICU 2.4
+ */
+#define U16_GET_SUPPLEMENTARY(lead, trail) \
+    (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
+
+
+/**
+ * Get the lead surrogate (0xd800..0xdbff) for a
+ * supplementary code point (0x10000..0x10ffff).
+ * @param supplementary 32-bit code point (U+10000..U+10ffff)
+ * @return lead surrogate (U+d800..U+dbff) for supplementary
+ * @stable ICU 2.4
+ */
+#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
+
+/**
+ * Get the trail surrogate (0xdc00..0xdfff) for a
+ * supplementary code point (0x10000..0x10ffff).
+ * @param supplementary 32-bit code point (U+10000..U+10ffff)
+ * @return trail surrogate (U+dc00..U+dfff) for supplementary
+ * @stable ICU 2.4
+ */
+#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
+
+/**
+ * How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
+ * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
+ * @param c 32-bit code point
+ * @return 1 or 2
+ * @stable ICU 2.4
+ */
+#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
+
+/**
+ * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
+ * @return 2
+ * @stable ICU 2.4
+ */
+#define U16_MAX_LENGTH 2
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * The offset may point to either the lead or trail surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the adjacent matching surrogate as well.
+ * The result is undefined if the offset points to a single, unpaired surrogate.
+ * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U16_GET
+ * @stable ICU 2.4
+ */
+#define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+    (c)=(s)[i]; \
+    if(U16_IS_SURROGATE(c)) { \
+        if(U16_IS_SURROGATE_LEAD(c)) { \
+            (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
+        } else { \
+            (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
+        } \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The offset may point to either the lead or trail surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the adjacent matching surrogate as well.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * If the offset points to a single, unpaired surrogate, then
+ * c is set to that unpaired surrogate.
+ * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<=i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_GET_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
+    (c)=(s)[i]; \
+    if(U16_IS_SURROGATE(c)) { \
+        uint16_t __c2; \
+        if(U16_IS_SURROGATE_LEAD(c)) { \
+            if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
+                (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+            } \
+        } else { \
+            if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+                (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+            } \
+        } \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The offset may point to either the lead or trail surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the adjacent matching surrogate as well.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * If the offset points to a single, unpaired surrogate, then
+ * c is set to U+FFFD.
+ * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<=i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_GET_UNSAFE
+ * @stable ICU 60
+ */
+#define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
+    (c)=(s)[i]; \
+    if(U16_IS_SURROGATE(c)) { \
+        uint16_t __c2; \
+        if(U16_IS_SURROGATE_LEAD(c)) { \
+            if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
+                (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+            } else { \
+                (c)=0xfffd; \
+            } \
+        } else { \
+            if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+                (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+            } else { \
+                (c)=0xfffd; \
+            } \
+        } \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/* definitions with forward iteration --------------------------------------- */
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * The offset may point to the lead surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the following trail surrogate as well.
+ * If the offset points to a trail surrogate, then that itself
+ * will be returned as the code point.
+ * The result is undefined if the offset points to a single, unpaired lead surrogate.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U16_NEXT
+ * @stable ICU 2.4
+ */
+#define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+    (c)=(s)[(i)++]; \
+    if(U16_IS_LEAD(c)) { \
+        (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * The offset may point to the lead surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the following trail surrogate as well.
+ * If the offset points to a trail surrogate or
+ * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate.
+ *
+ * @param s const UChar * string
+ * @param i string offset, must be i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_NEXT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
+    (c)=(s)[(i)++]; \
+    if(U16_IS_LEAD(c)) { \
+        uint16_t __c2; \
+        if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
+            ++(i); \
+            (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+        } \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * The offset may point to the lead surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the following trail surrogate as well.
+ * If the offset points to a trail surrogate or
+ * to a single, unpaired lead surrogate, then c is set to U+FFFD.
+ *
+ * @param s const UChar * string
+ * @param i string offset, must be i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_NEXT_UNSAFE
+ * @stable ICU 60
+ */
+#define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
+    (c)=(s)[(i)++]; \
+    if(U16_IS_SURROGATE(c)) { \
+        uint16_t __c2; \
+        if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
+            ++(i); \
+            (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+        } else { \
+            (c)=0xfffd; \
+        } \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Append a code point to a string, overwriting 1 or 2 code units.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
+ * Otherwise, the result is undefined.
+ *
+ * @param s const UChar * string buffer
+ * @param i string offset
+ * @param c code point to append
+ * @see U16_APPEND
+ * @stable ICU 2.4
+ */
+#define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+    if((uint32_t)(c)<=0xffff) { \
+        (s)[(i)++]=(uint16_t)(c); \
+    } else { \
+        (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
+        (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Append a code point to a string, overwriting 1 or 2 code units.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Safe" macro, checks for a valid code point.
+ * If a surrogate pair is written, checks for sufficient space in the string.
+ * If the code point is not valid or a trail surrogate does not fit,
+ * then isError is set to TRUE.
+ *
+ * @param s const UChar * string buffer
+ * @param i string offset, must be i<capacity
+ * @param capacity size of the string buffer
+ * @param c code point to append
+ * @param isError output UBool set to TRUE if an error occurs, otherwise not modified
+ * @see U16_APPEND_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
+    if((uint32_t)(c)<=0xffff) { \
+        (s)[(i)++]=(uint16_t)(c); \
+    } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
+        (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
+        (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+    } else /* c>0x10ffff or not enough space */ { \
+        (isError)=TRUE; \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_FWD_1
+ * @stable ICU 2.4
+ */
+#define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+    if(U16_IS_LEAD((s)[(i)++])) { \
+        ++(i); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const UChar * string
+ * @param i string offset, must be i<length
+ * @param length string length
+ * @see U16_FWD_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
+    if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \
+        ++(i); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U16_FWD_N
+ * @stable ICU 2.4
+ */
+#define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+    int32_t __N=(n); \
+    while(__N>0) { \
+        U16_FWD_1_UNSAFE(s, i); \
+        --__N; \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const UChar * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @param n number of code points to skip
+ * @see U16_FWD_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
+    int32_t __N=(n); \
+    while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
+        U16_FWD_1(s, i, length); \
+        --__N; \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to the trail surrogate of a surrogate pair,
+ * then the offset is decremented.
+ * Otherwise, it is not modified.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_SET_CP_START
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+    if(U16_IS_TRAIL((s)[i])) { \
+        --(i); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to the trail surrogate of a surrogate pair,
+ * then the offset is decremented.
+ * Otherwise, it is not modified.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<=i
+ * @see U16_SET_CP_START_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
+    if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
+        --(i); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/* definitions with backward iteration -------------------------------------- */
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a trail surrogate unit
+ * for a supplementary code point, then the macro will read
+ * the preceding lead surrogate as well.
+ * If the offset is behind a lead surrogate, then that itself
+ * will be returned as the code point.
+ * The result is undefined if the offset is behind a single, unpaired trail surrogate.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U16_PREV
+ * @stable ICU 2.4
+ */
+#define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+    (c)=(s)[--(i)]; \
+    if(U16_IS_TRAIL(c)) { \
+        (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a trail surrogate unit
+ * for a supplementary code point, then the macro will read
+ * the preceding lead surrogate as well.
+ * If the offset is behind a lead surrogate or behind a single, unpaired
+ * trail surrogate, then c is set to that unpaired surrogate.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<i
+ * @param c output UChar32 variable
+ * @see U16_PREV_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+    (c)=(s)[--(i)]; \
+    if(U16_IS_TRAIL(c)) { \
+        uint16_t __c2; \
+        if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+            --(i); \
+            (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+        } \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a trail surrogate unit
+ * for a supplementary code point, then the macro will read
+ * the preceding lead surrogate as well.
+ * If the offset is behind a lead surrogate or behind a single, unpaired
+ * trail surrogate, then c is set to U+FFFD.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<i
+ * @param c output UChar32 variable
+ * @see U16_PREV_UNSAFE
+ * @stable ICU 60
+ */
+#define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+    (c)=(s)[--(i)]; \
+    if(U16_IS_SURROGATE(c)) { \
+        uint16_t __c2; \
+        if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+            --(i); \
+            (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+        } else { \
+            (c)=0xfffd; \
+        } \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_BACK_1
+ * @stable ICU 2.4
+ */
+#define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+    if(U16_IS_TRAIL((s)[--(i)])) { \
+        --(i); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<i
+ * @see U16_BACK_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
+    if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
+        --(i); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U16_BACK_N
+ * @stable ICU 2.4
+ */
+#define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+    int32_t __N=(n); \
+    while(__N>0) { \
+        U16_BACK_1_UNSAFE(s, i); \
+        --__N; \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start start of string
+ * @param i string offset, must be start<i
+ * @param n number of code points to skip
+ * @see U16_BACK_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+    int32_t __N=(n); \
+    while(__N>0 && (i)>(start)) { \
+        U16_BACK_1(s, start, i); \
+        --__N; \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind the lead surrogate of a surrogate pair,
+ * then the offset is incremented.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_SET_CP_LIMIT
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+    if(U16_IS_LEAD((s)[(i)-1])) { \
+        ++(i); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind the lead surrogate of a surrogate pair,
+ * then the offset is incremented.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const UChar * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, start<=i<=length
+ * @param length int32_t string length
+ * @see U16_SET_CP_LIMIT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
+    if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
+        ++(i); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+#endif
diff --git a/lib/src/unicode/utf8.h b/lib/src/unicode/utf8.h
new file mode 100644
index 00000000..bb001303
--- /dev/null
+++ b/lib/src/unicode/utf8.h
@@ -0,0 +1,881 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1999-2015, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  utf8.h
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 1999sep13
+*   created by: Markus W. Scherer
+*/
+
+/**
+ * \file
+ * \brief C API: 8-bit Unicode handling macros
+ *
+ * This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings.
+ *
+ * For more information see utf.h and the ICU User Guide Strings chapter
+ * (http://userguide.icu-project.org/strings).
+ *
+ * <em>Usage:</em>
+ * ICU coding guidelines for if() statements should be followed when using these macros.
+ * Compound statements (curly braces {}) must be used  for if-else-while...
+ * bodies and all macro statements should be terminated with semicolon.
+ */
+
+#ifndef __UTF8_H__
+#define __UTF8_H__
+
+#include "unicode/umachine.h"
+#ifndef __UTF_H__
+#   include "unicode/utf.h"
+#endif
+
+/* internal definitions ----------------------------------------------------- */
+
+/**
+ * Counts the trail bytes for a UTF-8 lead byte.
+ * Returns 0 for 0..0xc1 as well as for 0xf5..0xff.
+ * leadByte might be evaluated multiple times.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is called by public macros in this file and thus must remain stable.
+ *
+ * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
+ * @internal
+ */
+#define U8_COUNT_TRAIL_BYTES(leadByte) \
+    (U8_IS_LEAD(leadByte) ? \
+        ((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+1 : 0)
+
+/**
+ * Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence.
+ * Returns 0 for 0..0xc1. Undefined for 0xf5..0xff.
+ * leadByte might be evaluated multiple times.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is called by public macros in this file and thus must remain stable.
+ *
+ * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
+ * @internal
+ */
+#define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) \
+    (((uint8_t)(leadByte)>=0xc2)+((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0))
+
+/**
+ * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is called by public macros in this file and thus must remain stable.
+ * @internal
+ */
+#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
+
+/**
+ * Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1.
+ * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
+ * Lead byte E0..EF bits 3..0 are used as byte index,
+ * first trail byte bits 7..5 are used as bit index into that byte.
+ * @see U8_IS_VALID_LEAD3_AND_T1
+ * @internal
+ */
+#define U8_LEAD3_T1_BITS "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30"
+
+/**
+ * Internal 3-byte UTF-8 validity check.
+ * Non-zero if lead byte E0..EF and first trail byte 00..FF start a valid sequence.
+ * @internal
+ */
+#define U8_IS_VALID_LEAD3_AND_T1(lead, t1) (U8_LEAD3_T1_BITS[(lead)&0xf]&(1<<((uint8_t)(t1)>>5)))
+
+/**
+ * Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1.
+ * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
+ * First trail byte bits 7..4 are used as byte index,
+ * lead byte F0..F4 bits 2..0 are used as bit index into that byte.
+ * @see U8_IS_VALID_LEAD4_AND_T1
+ * @internal
+ */
+#define U8_LEAD4_T1_BITS "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00"
+
+/**
+ * Internal 4-byte UTF-8 validity check.
+ * Non-zero if lead byte F0..F4 and first trail byte 00..FF start a valid sequence.
+ * @internal
+ */
+#define U8_IS_VALID_LEAD4_AND_T1(lead, t1) (U8_LEAD4_T1_BITS[(uint8_t)(t1)>>4]&(1<<((lead)&7)))
+
+/**
+ * Function for handling "next code point" with error-checking.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
+ * file and thus must remain stable, and should not be hidden when other internal
+ * functions are hidden (otherwise public macros would fail to compile).
+ * @internal
+ */
+U_STABLE UChar32 U_EXPORT2
+utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict);
+
+/**
+ * Function for handling "append code point" with error-checking.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
+ * file and thus must remain stable, and should not be hidden when other internal
+ * functions are hidden (otherwise public macros would fail to compile).
+ * @internal
+ */
+U_STABLE int32_t U_EXPORT2
+utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError);
+
+/**
+ * Function for handling "previous code point" with error-checking.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
+ * file and thus must remain stable, and should not be hidden when other internal
+ * functions are hidden (otherwise public macros would fail to compile).
+ * @internal
+ */
+U_STABLE UChar32 U_EXPORT2
+utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict);
+
+/**
+ * Function for handling "skip backward one code point" with error-checking.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
+ * file and thus must remain stable, and should not be hidden when other internal
+ * functions are hidden (otherwise public macros would fail to compile).
+ * @internal
+ */
+U_STABLE int32_t U_EXPORT2
+utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
+
+/* single-code point definitions -------------------------------------------- */
+
+/**
+ * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
+ * @param c 8-bit code unit (byte)
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U8_IS_SINGLE(c) (((c)&0x80)==0)
+
+/**
+ * Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4)
+ * @param c 8-bit code unit (byte)
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc2)<=0x32)
+// 0x32=0xf4-0xc2
+
+/**
+ * Is this code unit (byte) a UTF-8 trail byte? (0x80..0xBF)
+ * @param c 8-bit code unit (byte)
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U8_IS_TRAIL(c) ((int8_t)(c)<-0x40)
+
+/**
+ * How many code units (bytes) are used for the UTF-8 encoding
+ * of this Unicode code point?
+ * @param c 32-bit code point
+ * @return 1..4, or 0 if c is a surrogate or not a Unicode code point
+ * @stable ICU 2.4
+ */
+#define U8_LENGTH(c) \
+    ((uint32_t)(c)<=0x7f ? 1 : \
+        ((uint32_t)(c)<=0x7ff ? 2 : \
+            ((uint32_t)(c)<=0xd7ff ? 3 : \
+                ((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \
+                    ((uint32_t)(c)<=0xffff ? 3 : 4)\
+                ) \
+            ) \
+        ) \
+    )
+
+/**
+ * The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
+ * @return 4
+ * @stable ICU 2.4
+ */
+#define U8_MAX_LENGTH 4
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * The offset may point to either the lead byte or one of the trail bytes
+ * for a code point, in which case the macro will read all of the bytes
+ * for the code point.
+ * The result is undefined if the offset points to an illegal UTF-8
+ * byte sequence.
+ * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U8_GET
+ * @stable ICU 2.4
+ */
+#define U8_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+    int32_t _u8_get_unsafe_index=(int32_t)(i); \
+    U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \
+    U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * The offset may point to either the lead byte or one of the trail bytes
+ * for a code point, in which case the macro will read all of the bytes
+ * for the code point.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * If the offset points to an illegal UTF-8 byte sequence, then
+ * c is set to a negative value.
+ * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset
+ * @param i int32_t string offset, must be start<=i<length
+ * @param length int32_t string length
+ * @param c output UChar32 variable, set to <0 in case of an error
+ * @see U8_GET_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
+    int32_t _u8_get_index=(i); \
+    U8_SET_CP_START(s, start, _u8_get_index); \
+    U8_NEXT(s, _u8_get_index, length, c); \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * The offset may point to either the lead byte or one of the trail bytes
+ * for a code point, in which case the macro will read all of the bytes
+ * for the code point.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * If the offset points to an illegal UTF-8 byte sequence, then
+ * c is set to U+FFFD.
+ * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT_OR_FFFD.
+ *
+ * This macro does not distinguish between a real U+FFFD in the text
+ * and U+FFFD returned for an ill-formed sequence.
+ * Use U8_GET() if that distinction is important.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset
+ * @param i int32_t string offset, must be start<=i<length
+ * @param length int32_t string length
+ * @param c output UChar32 variable, set to U+FFFD in case of an error
+ * @see U8_GET
+ * @stable ICU 51
+ */
+#define U8_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
+    int32_t _u8_get_index=(i); \
+    U8_SET_CP_START(s, start, _u8_get_index); \
+    U8_NEXT_OR_FFFD(s, _u8_get_index, length, c); \
+} UPRV_BLOCK_MACRO_END
+
+/* definitions with forward iteration --------------------------------------- */
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * The offset may point to the lead byte of a multi-byte sequence,
+ * in which case the macro will read the whole sequence.
+ * The result is undefined if the offset points to a trail byte
+ * or an illegal UTF-8 sequence.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U8_NEXT
+ * @stable ICU 2.4
+ */
+#define U8_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+    (c)=(uint8_t)(s)[(i)++]; \
+    if(!U8_IS_SINGLE(c)) { \
+        if((c)<0xe0) { \
+            (c)=(((c)&0x1f)<<6)|((s)[(i)++]&0x3f); \
+        } else if((c)<0xf0) { \
+            /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
+            (c)=(UChar)(((c)<<12)|(((s)[i]&0x3f)<<6)|((s)[(i)+1]&0x3f)); \
+            (i)+=2; \
+        } else { \
+            (c)=(((c)&7)<<18)|(((s)[i]&0x3f)<<12)|(((s)[(i)+1]&0x3f)<<6)|((s)[(i)+2]&0x3f); \
+            (i)+=3; \
+        } \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * The offset may point to the lead byte of a multi-byte sequence,
+ * in which case the macro will read the whole sequence.
+ * If the offset points to a trail byte or an illegal UTF-8 sequence, then
+ * c is set to a negative value.
+ *
+ * @param s const uint8_t * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @param c output UChar32 variable, set to <0 in case of an error
+ * @see U8_NEXT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_NEXT(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, U_SENTINEL)
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * The offset may point to the lead byte of a multi-byte sequence,
+ * in which case the macro will read the whole sequence.
+ * If the offset points to a trail byte or an illegal UTF-8 sequence, then
+ * c is set to U+FFFD.
+ *
+ * This macro does not distinguish between a real U+FFFD in the text
+ * and U+FFFD returned for an ill-formed sequence.
+ * Use U8_NEXT() if that distinction is important.
+ *
+ * @param s const uint8_t * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @param c output UChar32 variable, set to U+FFFD in case of an error
+ * @see U8_NEXT
+ * @stable ICU 51
+ */
+#define U8_NEXT_OR_FFFD(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, 0xfffd)
+
+/** @internal */
+#define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) UPRV_BLOCK_MACRO_BEGIN { \
+    (c)=(uint8_t)(s)[(i)++]; \
+    if(!U8_IS_SINGLE(c)) { \
+        uint8_t __t = 0; \
+        if((i)!=(length) && \
+            /* fetch/validate/assemble all but last trail byte */ \
+            ((c)>=0xe0 ? \
+                ((c)<0xf0 ?  /* U+0800..U+FFFF except surrogates */ \
+                    U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \
+                    (__t&=0x3f, 1) \
+                :  /* U+10000..U+10FFFF */ \
+                    ((c)-=0xf0)<=4 && \
+                    U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \
+                    ((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \
+                    (__t=(s)[i]-0x80)<=0x3f) && \
+                /* valid second-to-last trail byte */ \
+                ((c)=((c)<<6)|__t, ++(i)!=(length)) \
+            :  /* U+0080..U+07FF */ \
+                (c)>=0xc2 && ((c)&=0x1f, 1)) && \
+            /* last trail byte */ \
+            (__t=(s)[i]-0x80)<=0x3f && \
+            ((c)=((c)<<6)|__t, ++(i), 1)) { \
+        } else { \
+            (c)=(sub);  /* ill-formed*/ \
+        } \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Append a code point to a string, overwriting 1 to 4 bytes.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
+ * Otherwise, the result is undefined.
+ *
+ * @param s const uint8_t * string buffer
+ * @param i string offset
+ * @param c code point to append
+ * @see U8_APPEND
+ * @stable ICU 2.4
+ */
+#define U8_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+    uint32_t __uc=(c); \
+    if(__uc<=0x7f) { \
+        (s)[(i)++]=(uint8_t)__uc; \
+    } else { \
+        if(__uc<=0x7ff) { \
+            (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
+        } else { \
+            if(__uc<=0xffff) { \
+                (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
+            } else { \
+                (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
+                (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
+            } \
+            (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
+        } \
+        (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Append a code point to a string, overwriting 1 to 4 bytes.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Safe" macro, checks for a valid code point.
+ * If a non-ASCII code point is written, checks for sufficient space in the string.
+ * If the code point is not valid or trail bytes do not fit,
+ * then isError is set to TRUE.
+ *
+ * @param s const uint8_t * string buffer
+ * @param i int32_t string offset, must be i<capacity
+ * @param capacity int32_t size of the string buffer
+ * @param c UChar32 code point to append
+ * @param isError output UBool set to TRUE if an error occurs, otherwise not modified
+ * @see U8_APPEND_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
+    uint32_t __uc=(c); \
+    if(__uc<=0x7f) { \
+        (s)[(i)++]=(uint8_t)__uc; \
+    } else if(__uc<=0x7ff && (i)+1<(capacity)) { \
+        (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
+        (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
+    } else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \
+        (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
+        (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
+        (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
+    } else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \
+        (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
+        (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
+        (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
+        (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
+    } else { \
+        (isError)=TRUE; \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_FWD_1
+ * @stable ICU 2.4
+ */
+#define U8_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+    (i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((s)[i]); \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const uint8_t * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @see U8_FWD_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
+    uint8_t __b=(s)[(i)++]; \
+    if(U8_IS_LEAD(__b) && (i)!=(length)) { \
+        uint8_t __t1=(s)[i]; \
+        if((0xe0<=__b && __b<0xf0)) { \
+            if(U8_IS_VALID_LEAD3_AND_T1(__b, __t1) && \
+                    ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
+                ++(i); \
+            } \
+        } else if(__b<0xe0) { \
+            if(U8_IS_TRAIL(__t1)) { \
+                ++(i); \
+            } \
+        } else /* c>=0xf0 */ { \
+            if(U8_IS_VALID_LEAD4_AND_T1(__b, __t1) && \
+                    ++(i)!=(length) && U8_IS_TRAIL((s)[i]) && \
+                    ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
+                ++(i); \
+            } \
+        } \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U8_FWD_N
+ * @stable ICU 2.4
+ */
+#define U8_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+    int32_t __N=(n); \
+    while(__N>0) { \
+        U8_FWD_1_UNSAFE(s, i); \
+        --__N; \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const uint8_t * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @param n number of code points to skip
+ * @see U8_FWD_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
+    int32_t __N=(n); \
+    while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
+        U8_FWD_1(s, i, length); \
+        --__N; \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to a UTF-8 trail byte,
+ * then the offset is moved backward to the corresponding lead byte.
+ * Otherwise, it is not modified.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_SET_CP_START
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+    while(U8_IS_TRAIL((s)[i])) { --(i); } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to a UTF-8 trail byte,
+ * then the offset is moved backward to the corresponding lead byte.
+ * Otherwise, it is not modified.
+ *
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ * Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i].
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<=i
+ * @see U8_SET_CP_START_UNSAFE
+ * @see U8_TRUNCATE_IF_INCOMPLETE
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
+    if(U8_IS_TRAIL((s)[(i)])) { \
+        (i)=utf8_back1SafeBody(s, start, (i)); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * If the string ends with a UTF-8 byte sequence that is valid so far
+ * but incomplete, then reduce the length of the string to end before
+ * the lead byte of that incomplete sequence.
+ * For example, if the string ends with E1 80, the length is reduced by 2.
+ *
+ * In all other cases (the string ends with a complete sequence, or it is not
+ * possible for any further trail byte to extend the trailing sequence)
+ * the length remains unchanged.
+ *
+ * Useful for processing text split across multiple buffers
+ * (save the incomplete sequence for later)
+ * and for optimizing iteration
+ * (check for string length only once per character).
+ *
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ * Unlike U8_SET_CP_START(), this macro never reads s[length].
+ *
+ * (In UTF-16, simply check for U16_IS_LEAD(last code unit).)
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param length int32_t string length (usually start<=length)
+ * @see U8_SET_CP_START
+ * @stable ICU 61
+ */
+#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) UPRV_BLOCK_MACRO_BEGIN { \
+    if((length)>(start)) { \
+        uint8_t __b1=s[(length)-1]; \
+        if(U8_IS_SINGLE(__b1)) { \
+            /* common ASCII character */ \
+        } else if(U8_IS_LEAD(__b1)) { \
+            --(length); \
+        } else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \
+            uint8_t __b2=s[(length)-2]; \
+            if(0xe0<=__b2 && __b2<=0xf4) { \
+                if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \
+                        U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \
+                    (length)-=2; \
+                } \
+            } else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \
+                uint8_t __b3=s[(length)-3]; \
+                if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \
+                    (length)-=3; \
+                } \
+            } \
+        } \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/* definitions with backward iteration -------------------------------------- */
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a multi-byte sequence, then the macro will read
+ * the whole sequence.
+ * If the offset is behind a lead byte, then that itself
+ * will be returned as the code point.
+ * The result is undefined if the offset is behind an illegal UTF-8 sequence.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U8_PREV
+ * @stable ICU 2.4
+ */
+#define U8_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+    (c)=(uint8_t)(s)[--(i)]; \
+    if(U8_IS_TRAIL(c)) { \
+        uint8_t __b, __count=1, __shift=6; \
+\
+        /* c is a trail byte */ \
+        (c)&=0x3f; \
+        for(;;) { \
+            __b=(s)[--(i)]; \
+            if(__b>=0xc0) { \
+                U8_MASK_LEAD_BYTE(__b, __count); \
+                (c)|=(UChar32)__b<<__shift; \
+                break; \
+            } else { \
+                (c)|=(UChar32)(__b&0x3f)<<__shift; \
+                ++__count; \
+                __shift+=6; \
+            } \
+        } \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a multi-byte sequence, then the macro will read
+ * the whole sequence.
+ * If the offset is behind a lead byte, then that itself
+ * will be returned as the code point.
+ * If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<i
+ * @param c output UChar32 variable, set to <0 in case of an error
+ * @see U8_PREV_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+    (c)=(uint8_t)(s)[--(i)]; \
+    if(!U8_IS_SINGLE(c)) { \
+        (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a multi-byte sequence, then the macro will read
+ * the whole sequence.
+ * If the offset is behind a lead byte, then that itself
+ * will be returned as the code point.
+ * If the offset is behind an illegal UTF-8 sequence, then c is set to U+FFFD.
+ *
+ * This macro does not distinguish between a real U+FFFD in the text
+ * and U+FFFD returned for an ill-formed sequence.
+ * Use U8_PREV() if that distinction is important.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<i
+ * @param c output UChar32 variable, set to U+FFFD in case of an error
+ * @see U8_PREV
+ * @stable ICU 51
+ */
+#define U8_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+    (c)=(uint8_t)(s)[--(i)]; \
+    if(!U8_IS_SINGLE(c)) { \
+        (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -3); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_BACK_1
+ * @stable ICU 2.4
+ */
+#define U8_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+    while(U8_IS_TRAIL((s)[--(i)])) {} \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<i
+ * @see U8_BACK_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
+    if(U8_IS_TRAIL((s)[--(i)])) { \
+        (i)=utf8_back1SafeBody(s, start, (i)); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U8_BACK_N
+ * @stable ICU 2.4
+ */
+#define U8_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+    int32_t __N=(n); \
+    while(__N>0) { \
+        U8_BACK_1_UNSAFE(s, i); \
+        --__N; \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t index of the start of the string
+ * @param i int32_t string offset, must be start<i
+ * @param n number of code points to skip
+ * @see U8_BACK_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+    int32_t __N=(n); \
+    while(__N>0 && (i)>(start)) { \
+        U8_BACK_1(s, start, i); \
+        --__N; \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind a partial multi-byte sequence,
+ * then the offset is incremented to behind the whole sequence.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_SET_CP_LIMIT
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+    U8_BACK_1_UNSAFE(s, i); \
+    U8_FWD_1_UNSAFE(s, i); \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind a partial multi-byte sequence,
+ * then the offset is incremented to behind the whole sequence.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<=i<=length
+ * @param length int32_t string length
+ * @see U8_SET_CP_LIMIT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
+    if((start)<(i) && ((i)<(length) || (length)<0)) { \
+        U8_BACK_1(s, start, i); \
+        U8_FWD_1(s, i, length); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+#endif
diff --git a/lib/src/utf16.c b/lib/src/utf16.c
deleted file mode 100644
index 3956c01c..00000000
--- a/lib/src/utf16.c
+++ /dev/null
@@ -1,33 +0,0 @@
-#include "./utf16.h"
-
-utf8proc_ssize_t utf16_iterate(
-  const utf8proc_uint8_t *string,
-  utf8proc_ssize_t length,
-  utf8proc_int32_t *code_point
-) {
-  if (length < 2) {
-    *code_point = -1;
-    return 0;
-  }
-
-  uint16_t *units = (uint16_t *)string;
-  uint16_t unit = units[0];
-
-  if (unit < 0xd800 || unit >= 0xe000) {
-    *code_point = unit;
-    return 2;
-  }
-
-  if (unit < 0xdc00) {
-    if (length >= 4) {
-      uint16_t next_unit = units[1];
-      if (next_unit >= 0xdc00 && next_unit < 0xe000) {
-        *code_point = 0x10000 + ((unit - 0xd800) << 10) + (next_unit - 0xdc00);
-        return 4;
-      }
-    }
-  }
-
-  *code_point = -1;
-  return 2;
-}
diff --git a/lib/src/utf16.h b/lib/src/utf16.h
deleted file mode 100644
index 32fd05e6..00000000
--- a/lib/src/utf16.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef TREE_SITTER_UTF16_H_
-#define TREE_SITTER_UTF16_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stdint.h>
-#include <stdlib.h>
-#include "utf8proc.h"
-
-// Analogous to utf8proc's utf8proc_iterate function. Reads one code point from
-// the given UTF16 string and stores it in the location pointed to by `code_point`.
-// Returns the number of bytes in `string` that were read.
-utf8proc_ssize_t utf16_iterate(const utf8proc_uint8_t *, utf8proc_ssize_t, utf8proc_int32_t *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // TREE_SITTER_UTF16_H_
diff --git a/lib/utf8proc b/lib/utf8proc
deleted file mode 160000
index d81308fa..00000000
--- a/lib/utf8proc
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit d81308faba0cfb3fccf8c3b12446863c7b76ae32
diff --git a/script/benchmark b/script/benchmark
index 61e57920..7599e989 100755
--- a/script/benchmark
+++ b/script/benchmark
@@ -18,15 +18,22 @@ OPTIONS
 
   -r  parse each sample the given number of times (default 5)
 
+  -g  debug
+
 EOF
 }
 
-while getopts "hl:e:r:" option; do
+mode=normal
+
+while getopts "hgl:e:r:" option; do
   case ${option} in
     h)
       usage
       exit
       ;;
+    g)
+      mode=debug
+      ;;
     e)
       export TREE_SITTER_BENCHMARK_EXAMPLE_FILTER=${OPTARG}
       ;;
@@ -39,4 +46,13 @@ while getopts "hl:e:r:" option; do
   esac
 done
 
-cargo bench benchmark
+if [[ "${mode}" == "debug" ]]; then
+  test_binary=$(
+    cargo bench benchmark --no-run --message-format=json 2> /dev/null |\
+    jq -rs 'map(select(.target.name == "benchmark" and .executable))[0].executable'
+  )
+  env | grep TREE_SITTER
+  echo $test_binary
+else
+  exec cargo bench benchmark
+fi
diff --git a/script/build-fuzzers b/script/build-fuzzers
index 2a44b10c..bff43c8b 100755
--- a/script/build-fuzzers
+++ b/script/build-fuzzers
@@ -21,7 +21,7 @@ CFLAGS=${CFLAGS:-"$default_fuzz_flags"}
 CXXFLAGS=${CXXFLAGS:-"$default_fuzz_flags"}
 
 export CFLAGS
-script/build-lib
+make
 
 if [ -z "$@" ]; then
   languages=$(ls test/fixtures/grammars)
@@ -51,17 +51,22 @@ for lang in ${languages[@]}; do
   $CC $CFLAGS -g -O0 "-I${lang_dir}/src" "${lang_dir}/src/parser.c" -c -o "${lang_dir}/src/parser.o"
   objects+=("${lang_dir}/src/parser.o")
 
-  modes=(true halt false recover)
-  for i in 0 2; do
-    # FIXME: We should extract the grammar name from grammar.js. Use the name of
-    # the directory instead. Also, the grammar name needs to be a valid C
-    # identifier so replace any '-' characters
-    ts_lang="tree_sitter_$(echo $lang | tr -- - _)"
-    $CXX $CXXFLAGS -std=c++11 -I lib/include -D TS_HALT_ON_ERROR="${modes[i]}" -D TS_LANG="$ts_lang" \
-      "test/fuzz/fuzzer.cc" "${objects[@]}" \
-      libtree-sitter.a "$LIB_FUZZER_PATH" \
-      -o "out/${lang}_fuzzer_${modes[i+1]}"
-  done
+  highlights_filename="${lang_dir}/queries/highlights.scm"
+  if [ -e "${highlights_filename}" ]; then
+    ts_lang_query_filename="${lang}.scm"
+    cp "${highlights_filename}" "out/${ts_lang_query_filename}"
+  else
+    ts_lang_query_filename=""
+  fi
+
+  # FIXME: We should extract the grammar name from grammar.js. Use the name of
+  # the directory instead. Also, the grammar name needs to be a valid C
+  # identifier so replace any '-' characters
+  ts_lang="tree_sitter_$(echo $lang | tr -- - _)"
+  $CXX $CXXFLAGS -std=c++11 -I lib/include -D TS_LANG="$ts_lang" -D TS_LANG_QUERY_FILENAME="\"${ts_lang_query_filename}\"" \
+    "test/fuzz/fuzzer.cc" "${objects[@]}" \
+    libtree-sitter.a "$LIB_FUZZER_PATH" \
+    -o "out/${lang}_fuzzer"
 
   python test/fuzz/gen-dict.py "${lang_dir}/src/grammar.json" > "out/$lang.dict"
 done
diff --git a/script/build-lib b/script/build-lib
deleted file mode 100755
index dc0ee19d..00000000
--- a/script/build-lib
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/usr/bin/env bash
-
-# If `CC` isn't set, pick a default compiler
-if hash clang 2>/dev/null; then
-  : ${CC:=clang}
-else
-  : ${CC:=gcc}
-fi
-
-${CC}                   \
-  -c                    \
-  -O3                   \
-  -std=c99              \
-  $CFLAGS               \
-  -I lib/src            \
-  -I lib/include        \
-  -I lib/utf8proc       \
-  lib/src/lib.c     \
-  -o tree-sitter.o
-
-rm -f libtree-sitter.a
-ar rcs libtree-sitter.a tree-sitter.o
-rm tree-sitter.o
diff --git a/script/build-wasm b/script/build-wasm
index 41acc910..1b5e48ec 100755
--- a/script/build-wasm
+++ b/script/build-wasm
@@ -30,7 +30,6 @@ EOF
 set -e
 
 web_dir=lib/binding_web
-exports=$(cat ${web_dir}/exports.json)
 emscripten_flags="-O3"
 minify_js=1
 force_docker=0
@@ -62,15 +61,13 @@ done
 
 emcc=
 if which emcc > /dev/null && [[ "$force_docker" == "0" ]]; then
-  export EMCC_FORCE_STDLIBS=libc++
   emcc=emcc
 elif which docker > /dev/null; then
-  emcc="docker run               \
-    --rm                         \
-    -v $(pwd):/src:Z             \
-    -u $(id -u)                  \
-    -e EMCC_FORCE_STDLIBS=libc++ \
-    trzeci/emscripten-slim       \
+  emcc="docker run    \
+    --rm              \
+    -v $(pwd):/src:Z  \
+    -u $(id -u)       \
+    emscripten/emsdk  \
     emcc"
 else
   echo 'You must have either `docker` or `emcc` on your PATH to run this script'
@@ -81,25 +78,27 @@ mkdir -p target/scratch
 
 # Use emscripten to generate `tree-sitter.js` and `tree-sitter.wasm`
 # in the `target/scratch` directory
-$emcc                                \
-  -s WASM=1                          \
-  -s TOTAL_MEMORY=33554432           \
-  -s ALLOW_MEMORY_GROWTH=1           \
-  -s MAIN_MODULE=2                   \
-  -s NO_FILESYSTEM=1                 \
-  -s "EXPORTED_FUNCTIONS=${exports}" \
-  $emscripten_flags                  \
-  -std=c99                           \
-  -D 'fprintf(...)='                 \
-  -D NDEBUG=                         \
-  -I lib/src                         \
-  -I lib/include                     \
-  -I lib/utf8proc                    \
-  --js-library ${web_dir}/imports.js \
-  --pre-js ${web_dir}/prefix.js      \
-  --post-js ${web_dir}/binding.js    \
-  lib/src/lib.c                      \
-  ${web_dir}/binding.c               \
+$emcc                                            \
+  -s WASM=1                                      \
+  -s TOTAL_MEMORY=33554432                       \
+  -s ALLOW_MEMORY_GROWTH=1                       \
+  -s MAIN_MODULE=2                               \
+  -s NO_FILESYSTEM=1                             \
+  -s NODEJS_CATCH_EXIT=0                         \
+  -s NODEJS_CATCH_REJECTION=0                    \
+  -s EXPORTED_FUNCTIONS=@${web_dir}/exports.json \
+  $emscripten_flags                              \
+  -std=c99                                       \
+  -D 'fprintf(...)='                             \
+  -D NDEBUG=                                     \
+  -I lib/src                                     \
+  -I lib/include                                 \
+  --js-library ${web_dir}/imports.js             \
+  --pre-js ${web_dir}/prefix.js                  \
+  --post-js ${web_dir}/binding.js                \
+  --post-js ${web_dir}/suffix.js                 \
+  lib/src/lib.c                                  \
+  ${web_dir}/binding.c                           \
   -o target/scratch/tree-sitter.js
 
 # Use terser to write a minified version of `tree-sitter.js` into
@@ -114,7 +113,6 @@ if [[ "$minify_js" == "1" ]]; then
   ${web_dir}/node_modules/.bin/terser   \
     --compress                       \
     --mangle                         \
-    --keep-fnames                    \
     --keep-classnames                \
     -- target/scratch/tree-sitter.js \
     > $web_dir/tree-sitter.js
diff --git a/script/fetch-emscripten b/script/fetch-emscripten
index 3e219286..c1b072ad 100755
--- a/script/fetch-emscripten
+++ b/script/fetch-emscripten
@@ -2,7 +2,7 @@
 
 set -e
 
-EMSCRIPTEN_VERSION=1.38.40
+EMSCRIPTEN_VERSION=2.0.9
 
 mkdir -p target
 EMSDK_DIR="./target/emsdk"
diff --git a/script/fetch-fixtures b/script/fetch-fixtures
index 94f9eddd..1eec16ee 100755
--- a/script/fetch-fixtures
+++ b/script/fetch-fixtures
@@ -27,8 +27,11 @@ fetch_grammar cpp               master
 fetch_grammar embedded-template master
 fetch_grammar go                master
 fetch_grammar html              master
+fetch_grammar java              master
 fetch_grammar javascript        master
+fetch_grammar jsdoc             master
 fetch_grammar json              master
+fetch_grammar php               master
 fetch_grammar python            master
 fetch_grammar ruby              master
 fetch_grammar rust              master
diff --git a/script/fetch-fixtures.cmd b/script/fetch-fixtures.cmd
index 011d73ff..32727b0c 100644
--- a/script/fetch-fixtures.cmd
+++ b/script/fetch-fixtures.cmd
@@ -6,8 +6,11 @@ call:fetch_grammar cpp               master
 call:fetch_grammar embedded-template master
 call:fetch_grammar go                master
 call:fetch_grammar html              master
+call:fetch_grammar java              master
 call:fetch_grammar javascript        master
+call:fetch_grammar jsdoc             master
 call:fetch_grammar json              master
+call:fetch_grammar php               master
 call:fetch_grammar python            master
 call:fetch_grammar ruby              master
 call:fetch_grammar rust              master
diff --git a/script/generate-fixtures-wasm b/script/generate-fixtures-wasm
index a987e31a..9d44b58c 100755
--- a/script/generate-fixtures-wasm
+++ b/script/generate-fixtures-wasm
@@ -4,6 +4,12 @@ set -e
 
 cargo build --release
 
+build_wasm_args=
+if [[ $1 == "--docker" ]]; then
+  build_wasm_args="--docker"
+  shift
+fi
+
 filter_grammar_name=$1
 
 root_dir=$PWD
@@ -20,7 +26,7 @@ while read -r grammar_file; do
   fi
 
   echo "Compiling ${grammar_name} parser to wasm"
-  "$tree_sitter" build-wasm $grammar_dir
+  "$tree_sitter" build-wasm $build_wasm_args $grammar_dir
 done <<< "$grammar_files"
 
 mv tree-sitter-*.wasm target/release/
diff --git a/script/heap-profile b/script/heap-profile
new file mode 100755
index 00000000..012d86c7
--- /dev/null
+++ b/script/heap-profile
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+#
+# Usage:
+#   script/heap-profile
+#
+# Parse an example source file and record memory usage
+#
+# Dependencies:
+#   * `pprof` executable:   https://github.com/google/pprof
+#   * `gperftools` package: https://github.com/gperftools/gperftools
+
+set -e
+
+GRAMMARS_DIR=$PWD/test/fixtures/grammars
+
+# Build the library
+make
+
+# Build the heap-profiling harness
+clang++                               \
+  -I lib/include                      \
+  -I $GRAMMARS_DIR                    \
+  -D GRAMMARS_DIR=\"${GRAMMARS_DIR}/\" \
+  -l tcmalloc                         \
+  ./libtree-sitter.a                  \
+  test/profile/heap.cc                \
+  -o target/heap-profile
+
+# Run the harness with heap profiling enabled.
+export HEAPPROFILE=$PWD/profile
+target/heap-profile $@
+
+# Extract statistics using pprof.
+pprof -top -cum profile.0001.heap
diff --git a/script/serve-docs b/script/serve-docs
index f3a80a15..9639016e 100755
--- a/script/serve-docs
+++ b/script/serve-docs
@@ -9,14 +9,15 @@ bundle exec ruby <<RUBY &
   require "listen"
 
   def copy_wasm_files
-    `cp $root/target/release/*.{js,wasm} $root/docs/assets/js/`
+    `cp $root/lib/binding_web/tree-sitter.{js,wasm} $root/docs/assets/js/`
+    `cp $root/target/release/*.wasm $root/docs/assets/js/`
   end
 
   puts "Copying WASM files to docs folder..."
   copy_wasm_files
 
   puts "Watching release directory"
-  listener = Listen.to("$root/target/release", only: /^tree-sitter\.(js|wasm)$/, wait_for_delay: 2) do
+  listener = Listen.to("$root/lib/binding_web", only: /^tree-sitter\.(js|wasm)$/, wait_for_delay: 2) do
     puts "WASM files updated. Copying new files to docs folder..."
     copy_wasm_files
   end
diff --git a/script/test b/script/test
index 5fda7cb2..9b578dcf 100755
--- a/script/test
+++ b/script/test
@@ -37,12 +37,26 @@ export RUST_BACKTRACE=full
 
 mode=normal
 
-while getopts "dDghl:e:s:t:" option; do
+# Specify a `--target` explicitly. For some reason, this is required for
+# address sanitizer support.
+toolchain=$(rustup show active-toolchain)
+toolchain_regex='(stable|beta|nightly)-([_a-z0-9-]+).*'
+if [[ $toolchain =~ $toolchain_regex ]]; then
+  release=${BASH_REMATCH[1]}
+  current_target=${BASH_REMATCH[2]}
+else
+  echo "Failed to parse toolchain '${toolchain}'"
+fi
+
+while getopts "adDghl:e:s:t:" option; do
   case ${option} in
     h)
       usage
       exit
       ;;
+    a)
+      export RUSTFLAGS="-Z sanitizer=address"
+      ;;
     l)
       export TREE_SITTER_TEST_LANGUAGE_FILTER=${OPTARG}
       ;;
@@ -69,10 +83,14 @@ done
 
 shift $(expr $OPTIND - 1)
 
-if [[ -n $TREE_SITTER_TEST_LANGUAGE_FILTER || -n $TREE_SITTER_TEST_EXAMPLE_FILTER || -n $TREE_SITTER_TEST_TRIAL_FILTER ]]; then
-  top_level_filter=corpus
-else
-  top_level_filter=$1
+top_level_filter=$1
+
+if [[ \
+  -n $TREE_SITTER_TEST_LANGUAGE_FILTER || \
+  -n $TREE_SITTER_TEST_EXAMPLE_FILTER || \
+  -n $TREE_SITTER_TEST_TRIAL_FILTER \
+]]; then
+  : ${top_level_filter:=corpus}
 fi
 
 if [[ "${mode}" == "debug" ]]; then
@@ -82,5 +100,5 @@ if [[ "${mode}" == "debug" ]]; then
   )
   lldb "${test_binary}" -- $top_level_filter
 else
-  cargo test -p tree-sitter-cli --jobs 1 $top_level_filter -- --nocapture
+  cargo test --target=${current_target} -p tree-sitter-cli --jobs 1 $top_level_filter -- --nocapture
 fi
diff --git a/tags/Cargo.toml b/tags/Cargo.toml
new file mode 100644
index 00000000..db73bb72
--- /dev/null
+++ b/tags/Cargo.toml
@@ -0,0 +1,25 @@
+[package]
+name = "tree-sitter-tags"
+description = "Library for extracting tag information"
+version = "0.3.0"
+authors = [
+  "Max Brunsfeld <maxbrunsfeld@gmail.com>",
+  "Patrick Thomson <patrickt@github.com>"
+]
+license = "MIT"
+readme = "README.md"
+edition = "2018"
+keywords = ["incremental", "parsing", "syntax", "tagging"]
+categories = ["parsing", "text-editors"]
+repository = "https://github.com/tree-sitter/tree-sitter"
+
+[lib]
+crate-type = ["lib", "staticlib"]
+
+[dependencies]
+regex = "1"
+memchr = "2.3"
+
+[dependencies.tree-sitter]
+version = ">= 0.17.0"
+path = "../lib"
diff --git a/tags/README.md b/tags/README.md
new file mode 100644
index 00000000..7a55c254
--- /dev/null
+++ b/tags/README.md
@@ -0,0 +1,60 @@
+Tree-sitter Tags
+=========================
+
+### Usage
+
+Compile some languages into your app, and declare them:
+
+```rust
+extern "C" tree_sitter_python();
+extern "C" tree_sitter_javascript();
+```
+
+Create a tag context. You need one of these for each thread that you're using for tag computation:
+
+```rust
+use tree_sitter_tags::TagsContext;
+
+let context = TagsContext::new();
+```
+
+Load some tagging queries from the `queries` directory of some language repositories:
+
+```rust
+use tree_sitter_highlight::TagsConfiguration;
+
+let python_language = unsafe { tree_sitter_python() };
+let javascript_language = unsafe { tree_sitter_javascript() };
+
+let python_config = HighlightConfiguration::new(
+    python_language,
+    &fs::read_to_string("./tree-sitter-python/queries/tags.scm").unwrap(),
+    &fs::read_to_string("./tree-sitter-python/queries/locals.scm").unwrap(),
+).unwrap();
+
+let javascript_config = HighlightConfiguration::new(
+    javascript_language,
+    &fs::read_to_string("./tree-sitter-javascript/queries/tags.scm").unwrap(),
+    &fs::read_to_string("./tree-sitter-javascript/queries/locals.scm").unwrap(),
+).unwrap();
+```
+
+Compute code navigation tags for some source code:
+
+```rust
+use tree_sitter_highlight::HighlightEvent;
+
+let tags = context.generate_tags(
+    &javascript_config,
+    b"class A { getB() { return c(); } }",
+    None,
+    |_| None
+);
+
+for tag in tags {
+    println!("kind: {:?}", tag.kind);
+    println!("range: {:?}", tag.range);
+    println!("name_range: {:?}", tag.name_range);
+    println!("docs: {:?}", tag.docs);
+}
+```
diff --git a/tags/include/tree_sitter/tags.h b/tags/include/tree_sitter/tags.h
new file mode 100644
index 00000000..4784abbb
--- /dev/null
+++ b/tags/include/tree_sitter/tags.h
@@ -0,0 +1,98 @@
+#ifndef TREE_SITTER_TAGS_H_
+#define TREE_SITTER_TAGS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include "tree_sitter/api.h"
+
+typedef enum {
+  TSTagsOk,
+  TSTagsUnknownScope,
+  TSTagsTimeout,
+  TSTagsInvalidLanguage,
+  TSTagsInvalidUtf8,
+  TSTagsInvalidRegex,
+  TSTagsInvalidQuery,
+  TSTagsInvalidCapture,
+} TSTagsError;
+
+typedef struct {
+  uint32_t start_byte;
+  uint32_t end_byte;
+  uint32_t name_start_byte;
+  uint32_t name_end_byte;
+  uint32_t line_start_byte;
+  uint32_t line_end_byte;
+  TSPoint start_point;
+  TSPoint end_point;
+  uint32_t utf16_start_column;
+  uint32_t utf16_end_column;
+  uint32_t docs_start_byte;
+  uint32_t docs_end_byte;
+  uint32_t syntax_type_id;
+  bool is_definition;
+} TSTag;
+
+typedef struct TSTagger TSTagger;
+typedef struct TSTagsBuffer TSTagsBuffer;
+
+// Construct a tagger.
+TSTagger *ts_tagger_new();
+
+// Delete a tagger.
+void ts_tagger_delete(TSTagger *);
+
+// Add a `TSLanguage` to a tagger. The language is associated with a scope name,
+// which can be used later to select a language for tagging. Along with the language,
+// you must provide two tree query strings, one for matching tags themselves, and one
+// specifying local variable definitions.
+TSTagsError ts_tagger_add_language(
+  TSTagger *self,
+  const char *scope_name,
+  const TSLanguage *language,
+  const char *tags_query,
+  const char *locals_query,
+  uint32_t tags_query_len,
+  uint32_t locals_query_len
+);
+
+// Compute syntax highlighting for a given document. You must first
+// create a `TSTagsBuffer` to hold the output.
+TSTagsError ts_tagger_tag(
+  const TSTagger *self,
+  const char *scope_name,
+  const char *source_code,
+  uint32_t source_code_len,
+  TSTagsBuffer *output,
+  const size_t *cancellation_flag
+);
+
+// A tags buffer stores the results produced by a tagging call. It can be reused
+// for multiple calls.
+TSTagsBuffer *ts_tags_buffer_new();
+
+// Delete a tags buffer.
+void ts_tags_buffer_delete(TSTagsBuffer *);
+
+// Access the tags within a tag buffer.
+const TSTag *ts_tags_buffer_tags(const TSTagsBuffer *);
+uint32_t ts_tags_buffer_tags_len(const TSTagsBuffer *);
+
+// Access the string containing all of the docs
+const char *ts_tags_buffer_docs(const TSTagsBuffer *);
+uint32_t ts_tags_buffer_docs_len(const TSTagsBuffer *);
+
+// Get the syntax kinds for a scope.
+const char **ts_tagger_syntax_kinds_for_scope_name(const TSTagger *, const char *scope_name, uint32_t *len);
+
+// Determine whether a parse error was encountered while tagging.
+bool ts_tags_buffer_found_parse_error(const TSTagsBuffer*);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_TAGS_H_
diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs
new file mode 100644
index 00000000..8f689a9a
--- /dev/null
+++ b/tags/src/c_lib.rs
@@ -0,0 +1,280 @@
+use super::{Error, TagsConfiguration, TagsContext};
+use std::collections::HashMap;
+use std::ffi::CStr;
+use std::os::raw::c_char;
+use std::process::abort;
+use std::sync::atomic::AtomicUsize;
+use std::{fmt, slice, str};
+use tree_sitter::Language;
+
+const BUFFER_TAGS_RESERVE_CAPACITY: usize = 100;
+const BUFFER_DOCS_RESERVE_CAPACITY: usize = 1024;
+
+#[repr(C)]
+#[derive(Debug, PartialEq, Eq)]
+pub enum TSTagsError {
+    Ok,
+    UnknownScope,
+    Timeout,
+    InvalidLanguage,
+    InvalidUtf8,
+    InvalidRegex,
+    InvalidQuery,
+    InvalidCapture,
+    Unknown,
+}
+
+#[repr(C)]
+pub struct TSPoint {
+    row: u32,
+    column: u32,
+}
+
+#[repr(C)]
+pub struct TSTag {
+    pub start_byte: u32,
+    pub end_byte: u32,
+    pub name_start_byte: u32,
+    pub name_end_byte: u32,
+    pub line_start_byte: u32,
+    pub line_end_byte: u32,
+    pub start_point: TSPoint,
+    pub end_point: TSPoint,
+    pub utf16_start_colum: u32,
+    pub utf16_end_colum: u32,
+    pub docs_start_byte: u32,
+    pub docs_end_byte: u32,
+    pub syntax_type_id: u32,
+    pub is_definition: bool,
+}
+
+pub struct TSTagger {
+    languages: HashMap<String, TagsConfiguration>,
+}
+
+pub struct TSTagsBuffer {
+    context: TagsContext,
+    tags: Vec<TSTag>,
+    docs: Vec<u8>,
+    errors_present: bool,
+}
+
+#[no_mangle]
+pub extern "C" fn ts_tagger_new() -> *mut TSTagger {
+    Box::into_raw(Box::new(TSTagger {
+        languages: HashMap::new(),
+    }))
+}
+
+#[no_mangle]
+pub extern "C" fn ts_tagger_delete(this: *mut TSTagger) {
+    drop(unsafe { Box::from_raw(this) })
+}
+
+#[no_mangle]
+pub extern "C" fn ts_tagger_add_language(
+    this: *mut TSTagger,
+    scope_name: *const c_char,
+    language: Language,
+    tags_query: *const u8,
+    locals_query: *const u8,
+    tags_query_len: u32,
+    locals_query_len: u32,
+) -> TSTagsError {
+    let tagger = unwrap_mut_ptr(this);
+    let scope_name = unsafe { unwrap(CStr::from_ptr(scope_name).to_str()) };
+    let tags_query = unsafe { slice::from_raw_parts(tags_query, tags_query_len as usize) };
+    let locals_query = unsafe { slice::from_raw_parts(locals_query, locals_query_len as usize) };
+    let tags_query = match str::from_utf8(tags_query) {
+        Ok(e) => e,
+        Err(_) => return TSTagsError::InvalidUtf8,
+    };
+    let locals_query = match str::from_utf8(locals_query) {
+        Ok(e) => e,
+        Err(_) => return TSTagsError::InvalidUtf8,
+    };
+
+    match TagsConfiguration::new(language, tags_query, locals_query) {
+        Ok(c) => {
+            tagger.languages.insert(scope_name.to_string(), c);
+            TSTagsError::Ok
+        }
+        Err(Error::Query(_)) => TSTagsError::InvalidQuery,
+        Err(Error::Regex(_)) => TSTagsError::InvalidRegex,
+        Err(Error::Cancelled) => TSTagsError::Timeout,
+        Err(Error::InvalidLanguage) => TSTagsError::InvalidLanguage,
+        Err(Error::InvalidCapture(_)) => TSTagsError::InvalidCapture,
+    }
+}
+
+#[no_mangle]
+pub extern "C" fn ts_tagger_tag(
+    this: *mut TSTagger,
+    scope_name: *const c_char,
+    source_code: *const u8,
+    source_code_len: u32,
+    output: *mut TSTagsBuffer,
+    cancellation_flag: *const AtomicUsize,
+) -> TSTagsError {
+    let tagger = unwrap_mut_ptr(this);
+    let buffer = unwrap_mut_ptr(output);
+    let scope_name = unsafe { unwrap(CStr::from_ptr(scope_name).to_str()) };
+
+    if let Some(config) = tagger.languages.get(scope_name) {
+        shrink_and_clear(&mut buffer.tags, BUFFER_TAGS_RESERVE_CAPACITY);
+        shrink_and_clear(&mut buffer.docs, BUFFER_DOCS_RESERVE_CAPACITY);
+
+        let source_code = unsafe { slice::from_raw_parts(source_code, source_code_len as usize) };
+        let cancellation_flag = unsafe { cancellation_flag.as_ref() };
+
+        let tags = match buffer
+            .context
+            .generate_tags(config, source_code, cancellation_flag)
+        {
+            Ok((tags, found_error)) => {
+                buffer.errors_present = found_error;
+                tags
+            }
+            Err(e) => {
+                return match e {
+                    Error::InvalidLanguage => TSTagsError::InvalidLanguage,
+                    Error::Cancelled => TSTagsError::Timeout,
+                    _ => TSTagsError::Timeout,
+                }
+            }
+        };
+
+        for tag in tags {
+            let tag = if let Ok(tag) = tag {
+                tag
+            } else {
+                buffer.tags.clear();
+                buffer.docs.clear();
+                return TSTagsError::Timeout;
+            };
+
+            let prev_docs_len = buffer.docs.len();
+            if let Some(docs) = tag.docs {
+                buffer.docs.extend_from_slice(docs.as_bytes());
+            }
+            buffer.tags.push(TSTag {
+                start_byte: tag.range.start as u32,
+                end_byte: tag.range.end as u32,
+                name_start_byte: tag.name_range.start as u32,
+                name_end_byte: tag.name_range.end as u32,
+                line_start_byte: tag.line_range.start as u32,
+                line_end_byte: tag.line_range.end as u32,
+                start_point: TSPoint {
+                    row: tag.span.start.row as u32,
+                    column: tag.span.start.column as u32,
+                },
+                end_point: TSPoint {
+                    row: tag.span.end.row as u32,
+                    column: tag.span.end.column as u32,
+                },
+                utf16_start_colum: tag.utf16_column_range.start as u32,
+                utf16_end_colum: tag.utf16_column_range.end as u32,
+                docs_start_byte: prev_docs_len as u32,
+                docs_end_byte: buffer.docs.len() as u32,
+                syntax_type_id: tag.syntax_type_id,
+                is_definition: tag.is_definition,
+            });
+        }
+
+        TSTagsError::Ok
+    } else {
+        TSTagsError::UnknownScope
+    }
+}
+
+#[no_mangle]
+pub extern "C" fn ts_tags_buffer_new() -> *mut TSTagsBuffer {
+    Box::into_raw(Box::new(TSTagsBuffer {
+        context: TagsContext::new(),
+        tags: Vec::with_capacity(BUFFER_TAGS_RESERVE_CAPACITY),
+        docs: Vec::with_capacity(BUFFER_DOCS_RESERVE_CAPACITY),
+        errors_present: false,
+    }))
+}
+
+#[no_mangle]
+pub extern "C" fn ts_tags_buffer_delete(this: *mut TSTagsBuffer) {
+    drop(unsafe { Box::from_raw(this) })
+}
+
+#[no_mangle]
+pub extern "C" fn ts_tags_buffer_tags(this: *const TSTagsBuffer) -> *const TSTag {
+    let buffer = unwrap_ptr(this);
+    buffer.tags.as_ptr()
+}
+
+#[no_mangle]
+pub extern "C" fn ts_tags_buffer_tags_len(this: *const TSTagsBuffer) -> u32 {
+    let buffer = unwrap_ptr(this);
+    buffer.tags.len() as u32
+}
+
+#[no_mangle]
+pub extern "C" fn ts_tags_buffer_docs(this: *const TSTagsBuffer) -> *const i8 {
+    let buffer = unwrap_ptr(this);
+    buffer.docs.as_ptr() as *const i8
+}
+
+#[no_mangle]
+pub extern "C" fn ts_tags_buffer_docs_len(this: *const TSTagsBuffer) -> u32 {
+    let buffer = unwrap_ptr(this);
+    buffer.docs.len() as u32
+}
+
+#[no_mangle]
+pub extern "C" fn ts_tags_buffer_found_parse_error(this: *const TSTagsBuffer) -> bool {
+    let buffer = unwrap_ptr(this);
+    buffer.errors_present
+}
+
+#[no_mangle]
+pub extern "C" fn ts_tagger_syntax_kinds_for_scope_name(
+    this: *mut TSTagger,
+    scope_name: *const c_char,
+    len: *mut u32,
+) -> *const *const i8 {
+    let tagger = unwrap_mut_ptr(this);
+    let scope_name = unsafe { unwrap(CStr::from_ptr(scope_name).to_str()) };
+    let len = unwrap_mut_ptr(len);
+
+    *len = 0;
+    if let Some(config) = tagger.languages.get(scope_name) {
+        *len = config.c_syntax_type_names.len() as u32;
+        return config.c_syntax_type_names.as_ptr() as *const *const i8;
+    }
+    std::ptr::null()
+}
+
+fn unwrap_ptr<'a, T>(result: *const T) -> &'a T {
+    unsafe { result.as_ref() }.unwrap_or_else(|| {
+        eprintln!("{}:{} - pointer must not be null", file!(), line!());
+        abort();
+    })
+}
+
+fn unwrap_mut_ptr<'a, T>(result: *mut T) -> &'a mut T {
+    unsafe { result.as_mut() }.unwrap_or_else(|| {
+        eprintln!("{}:{} - pointer must not be null", file!(), line!());
+        abort();
+    })
+}
+
+fn unwrap<T, E: fmt::Display>(result: Result<T, E>) -> T {
+    result.unwrap_or_else(|error| {
+        eprintln!("tree-sitter tag error: {}", error);
+        abort();
+    })
+}
+
+fn shrink_and_clear<T>(vec: &mut Vec<T>, capacity: usize) {
+    if vec.len() > capacity {
+        vec.truncate(capacity);
+        vec.shrink_to_fit();
+    }
+    vec.clear();
+}
diff --git a/tags/src/lib.rs b/tags/src/lib.rs
new file mode 100644
index 00000000..89809052
--- /dev/null
+++ b/tags/src/lib.rs
@@ -0,0 +1,654 @@
+pub mod c_lib;
+
+use memchr::memchr;
+use regex::Regex;
+use std::collections::HashMap;
+use std::ffi::{CStr, CString};
+use std::ops::Range;
+use std::os::raw::c_char;
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::{char, fmt, mem, str};
+use tree_sitter::{
+    Language, LossyUtf8, Parser, Point, Query, QueryCursor, QueryError, QueryPredicateArg, Tree,
+};
+
+const MAX_LINE_LEN: usize = 180;
+const CANCELLATION_CHECK_INTERVAL: usize = 100;
+
+/// Contains the data neeeded to compute tags for code written in a
+/// particular language.
+#[derive(Debug)]
+pub struct TagsConfiguration {
+    pub language: Language,
+    pub query: Query,
+    syntax_type_names: Vec<Box<[u8]>>,
+    c_syntax_type_names: Vec<*const u8>,
+    capture_map: HashMap<u32, NamedCapture>,
+    doc_capture_index: Option<u32>,
+    name_capture_index: Option<u32>,
+    ignore_capture_index: Option<u32>,
+    local_scope_capture_index: Option<u32>,
+    local_definition_capture_index: Option<u32>,
+    tags_pattern_index: usize,
+    pattern_info: Vec<PatternInfo>,
+}
+
+#[derive(Debug)]
+pub struct NamedCapture {
+    pub syntax_type_id: u32,
+    pub is_definition: bool,
+}
+
+pub struct TagsContext {
+    parser: Parser,
+    cursor: QueryCursor,
+}
+
+#[derive(Debug, Clone)]
+pub struct Tag {
+    pub range: Range<usize>,
+    pub name_range: Range<usize>,
+    pub line_range: Range<usize>,
+    pub span: Range<Point>,
+    pub utf16_column_range: Range<usize>,
+    pub docs: Option<String>,
+    pub is_definition: bool,
+    pub syntax_type_id: u32,
+}
+
+#[derive(Debug, PartialEq)]
+pub enum Error {
+    Query(QueryError),
+    Regex(regex::Error),
+    Cancelled,
+    InvalidLanguage,
+    InvalidCapture(String),
+}
+
+#[derive(Debug, Default)]
+struct PatternInfo {
+    docs_adjacent_capture: Option<u32>,
+    local_scope_inherits: bool,
+    name_must_be_non_local: bool,
+    doc_strip_regex: Option<Regex>,
+}
+
+#[derive(Debug)]
+struct LocalDef<'a> {
+    name: &'a [u8],
+    value_range: Range<usize>,
+}
+
+#[derive(Debug)]
+struct LocalScope<'a> {
+    inherits: bool,
+    range: Range<usize>,
+    local_defs: Vec<LocalDef<'a>>,
+}
+
+struct TagsIter<'a, I>
+where
+    I: Iterator<Item = tree_sitter::QueryMatch<'a>>,
+{
+    matches: I,
+    _tree: Tree,
+    source: &'a [u8],
+    prev_line_info: Option<LineInfo>,
+    config: &'a TagsConfiguration,
+    cancellation_flag: Option<&'a AtomicUsize>,
+    iter_count: usize,
+    tag_queue: Vec<(Tag, usize)>,
+    scopes: Vec<LocalScope<'a>>,
+}
+
+struct LineInfo {
+    utf8_position: Point,
+    utf8_byte: usize,
+    utf16_column: usize,
+    line_range: Range<usize>,
+}
+
+impl TagsConfiguration {
+    pub fn new(language: Language, tags_query: &str, locals_query: &str) -> Result<Self, Error> {
+        let query = Query::new(language, &format!("{}{}", locals_query, tags_query))?;
+
+        let tags_query_offset = locals_query.len();
+        let mut tags_pattern_index = 0;
+        for i in 0..(query.pattern_count()) {
+            let pattern_offset = query.start_byte_for_pattern(i);
+            if pattern_offset < tags_query_offset {
+                tags_pattern_index += 1;
+            }
+        }
+
+        let mut capture_map = HashMap::new();
+        let mut syntax_type_names = Vec::new();
+        let mut doc_capture_index = None;
+        let mut name_capture_index = None;
+        let mut ignore_capture_index = None;
+        let mut local_scope_capture_index = None;
+        let mut local_definition_capture_index = None;
+        for (i, name) in query.capture_names().iter().enumerate() {
+            match name.as_str() {
+                "" => continue,
+                "name" => name_capture_index = Some(i as u32),
+                "ignore" => ignore_capture_index = Some(i as u32),
+                "doc" => doc_capture_index = Some(i as u32),
+                "local.scope" => local_scope_capture_index = Some(i as u32),
+                "local.definition" => local_definition_capture_index = Some(i as u32),
+                "local.reference" => continue,
+                _ => {
+                    let mut is_definition = false;
+
+                    let kind = if name.starts_with("definition.") {
+                        is_definition = true;
+                        name.trim_start_matches("definition.")
+                    } else if name.starts_with("reference.") {
+                        name.trim_start_matches("reference.")
+                    } else {
+                        return Err(Error::InvalidCapture(name.to_string()));
+                    };
+
+                    if let Ok(cstr) = CString::new(kind) {
+                        let c_kind = cstr.to_bytes_with_nul().to_vec().into_boxed_slice();
+                        let syntax_type_id = syntax_type_names
+                            .iter()
+                            .position(|n| n == &c_kind)
+                            .unwrap_or_else(|| {
+                                syntax_type_names.push(c_kind);
+                                syntax_type_names.len() - 1
+                            }) as u32;
+                        capture_map.insert(
+                            i as u32,
+                            NamedCapture {
+                                syntax_type_id,
+                                is_definition,
+                            },
+                        );
+                    }
+                }
+            }
+        }
+
+        let c_syntax_type_names = syntax_type_names.iter().map(|s| s.as_ptr()).collect();
+
+        let pattern_info = (0..query.pattern_count())
+            .map(|pattern_index| {
+                let mut info = PatternInfo::default();
+                for (property, is_positive) in query.property_predicates(pattern_index) {
+                    if !is_positive && property.key.as_ref() == "local" {
+                        info.name_must_be_non_local = true;
+                    }
+                }
+                info.local_scope_inherits = true;
+                for property in query.property_settings(pattern_index) {
+                    if property.key.as_ref() == "local.scope-inherits"
+                        && property
+                            .value
+                            .as_ref()
+                            .map_or(false, |v| v.as_ref() == "false")
+                    {
+                        info.local_scope_inherits = false;
+                    }
+                }
+                if let Some(doc_capture_index) = doc_capture_index {
+                    for predicate in query.general_predicates(pattern_index) {
+                        if predicate.args.get(0)
+                            == Some(&QueryPredicateArg::Capture(doc_capture_index))
+                        {
+                            match (predicate.operator.as_ref(), predicate.args.get(1)) {
+                                ("select-adjacent!", Some(QueryPredicateArg::Capture(index))) => {
+                                    info.docs_adjacent_capture = Some(*index);
+                                }
+                                ("strip!", Some(QueryPredicateArg::String(pattern))) => {
+                                    let regex = Regex::new(pattern.as_ref())?;
+                                    info.doc_strip_regex = Some(regex);
+                                }
+                                _ => {}
+                            }
+                        }
+                    }
+                }
+                return Ok(info);
+            })
+            .collect::<Result<Vec<_>, Error>>()?;
+
+        Ok(TagsConfiguration {
+            language,
+            query,
+            syntax_type_names,
+            c_syntax_type_names,
+            capture_map,
+            doc_capture_index,
+            name_capture_index,
+            ignore_capture_index,
+            tags_pattern_index,
+            local_scope_capture_index,
+            local_definition_capture_index,
+            pattern_info,
+        })
+    }
+
+    pub fn syntax_type_name(&self, id: u32) -> &str {
+        unsafe {
+            let cstr =
+                CStr::from_ptr(self.syntax_type_names[id as usize].as_ptr() as *const c_char)
+                    .to_bytes();
+            str::from_utf8(cstr).expect("syntax type name was not valid utf-8")
+        }
+    }
+}
+
+impl TagsContext {
+    pub fn new() -> Self {
+        TagsContext {
+            parser: Parser::new(),
+            cursor: QueryCursor::new(),
+        }
+    }
+
+    pub fn generate_tags<'a>(
+        &'a mut self,
+        config: &'a TagsConfiguration,
+        source: &'a [u8],
+        cancellation_flag: Option<&'a AtomicUsize>,
+    ) -> Result<(impl Iterator<Item = Result<Tag, Error>> + 'a, bool), Error> {
+        self.parser
+            .set_language(config.language)
+            .map_err(|_| Error::InvalidLanguage)?;
+        self.parser.reset();
+        unsafe { self.parser.set_cancellation_flag(cancellation_flag) };
+        let tree = self.parser.parse(source, None).ok_or(Error::Cancelled)?;
+
+        // The `matches` iterator borrows the `Tree`, which prevents it from being moved.
+        // But the tree is really just a pointer, so it's actually ok to move it.
+        let tree_ref = unsafe { mem::transmute::<_, &'static Tree>(&tree) };
+        let matches = self
+            .cursor
+            .matches(&config.query, tree_ref.root_node(), move |node| {
+                &source[node.byte_range()]
+            });
+        Ok((
+            TagsIter {
+                _tree: tree,
+                matches,
+                source,
+                config,
+                cancellation_flag,
+                prev_line_info: None,
+                tag_queue: Vec::new(),
+                iter_count: 0,
+                scopes: vec![LocalScope {
+                    range: 0..source.len(),
+                    inherits: false,
+                    local_defs: Vec::new(),
+                }],
+            },
+            tree_ref.root_node().has_error(),
+        ))
+    }
+}
+
+impl<'a, I> Iterator for TagsIter<'a, I>
+where
+    I: Iterator<Item = tree_sitter::QueryMatch<'a>>,
+{
+    type Item = Result<Tag, Error>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        loop {
+            // Periodically check for cancellation, returning `Cancelled` error if the
+            // cancellation flag was flipped.
+            if let Some(cancellation_flag) = self.cancellation_flag {
+                self.iter_count += 1;
+                if self.iter_count >= CANCELLATION_CHECK_INTERVAL {
+                    self.iter_count = 0;
+                    if cancellation_flag.load(Ordering::Relaxed) != 0 {
+                        return Some(Err(Error::Cancelled));
+                    }
+                }
+            }
+
+            // If there is a queued tag for an earlier node in the syntax tree, then pop
+            // it off of the queue and return it.
+            if let Some(last_entry) = self.tag_queue.last() {
+                if self.tag_queue.len() > 1
+                    && self.tag_queue[0].0.name_range.end < last_entry.0.name_range.start
+                {
+                    let tag = self.tag_queue.remove(0).0;
+                    if tag.is_ignored() {
+                        continue;
+                    } else {
+                        return Some(Ok(tag));
+                    }
+                }
+            }
+
+            // If there is another match, then compute its tag and add it to the
+            // tag queue.
+            if let Some(mat) = self.matches.next() {
+                let pattern_info = &self.config.pattern_info[mat.pattern_index];
+
+                if mat.pattern_index < self.config.tags_pattern_index {
+                    for capture in mat.captures {
+                        let index = Some(capture.index);
+                        let range = capture.node.byte_range();
+                        if index == self.config.local_scope_capture_index {
+                            self.scopes.push(LocalScope {
+                                range,
+                                inherits: pattern_info.local_scope_inherits,
+                                local_defs: Vec::new(),
+                            });
+                        } else if index == self.config.local_definition_capture_index {
+                            if let Some(scope) = self.scopes.iter_mut().rev().find(|scope| {
+                                scope.range.start <= range.start && scope.range.end >= range.end
+                            }) {
+                                scope.local_defs.push(LocalDef {
+                                    name: &self.source[range.clone()],
+                                    value_range: range,
+                                });
+                            }
+                        }
+                    }
+                    continue;
+                }
+
+                let mut name_node = None;
+                let mut doc_nodes = Vec::new();
+                let mut tag_node = None;
+                let mut syntax_type_id = 0;
+                let mut is_definition = false;
+                let mut docs_adjacent_node = None;
+                let mut is_ignored = false;
+
+                for capture in mat.captures {
+                    let index = Some(capture.index);
+
+                    if index == self.config.ignore_capture_index {
+                        is_ignored = true;
+                        name_node = Some(capture.node);
+                    }
+
+                    if index == self.config.pattern_info[mat.pattern_index].docs_adjacent_capture {
+                        docs_adjacent_node = Some(capture.node);
+                    }
+
+                    if index == self.config.name_capture_index {
+                        name_node = Some(capture.node);
+                    } else if index == self.config.doc_capture_index {
+                        doc_nodes.push(capture.node);
+                    }
+
+                    if let Some(named_capture) = self.config.capture_map.get(&capture.index) {
+                        tag_node = Some(capture.node);
+                        syntax_type_id = named_capture.syntax_type_id;
+                        is_definition = named_capture.is_definition;
+                    }
+                }
+
+                if let Some(name_node) = name_node {
+                    let name_range = name_node.byte_range();
+
+                    let tag;
+                    if let Some(tag_node) = tag_node {
+                        if name_node.has_error() {
+                            continue;
+                        }
+
+                        if pattern_info.name_must_be_non_local {
+                            let mut is_local = false;
+                            for scope in self.scopes.iter().rev() {
+                                if scope.range.start <= name_range.start
+                                    && scope.range.end >= name_range.end
+                                {
+                                    if scope
+                                        .local_defs
+                                        .iter()
+                                        .any(|d| d.name == &self.source[name_range.clone()])
+                                    {
+                                        is_local = true;
+                                        break;
+                                    }
+                                    if !scope.inherits {
+                                        break;
+                                    }
+                                }
+                            }
+                            if is_local {
+                                continue;
+                            }
+                        }
+
+                        // If needed, filter the doc nodes based on their ranges, selecting
+                        // only the slice that are adjacent to some specified node.
+                        let mut docs_start_index = 0;
+                        if let (Some(docs_adjacent_node), false) =
+                            (docs_adjacent_node, doc_nodes.is_empty())
+                        {
+                            docs_start_index = doc_nodes.len();
+                            let mut start_row = docs_adjacent_node.start_position().row;
+                            while docs_start_index > 0 {
+                                let doc_node = &doc_nodes[docs_start_index - 1];
+                                let prev_doc_end_row = doc_node.end_position().row;
+                                if prev_doc_end_row + 1 >= start_row {
+                                    docs_start_index -= 1;
+                                    start_row = doc_node.start_position().row;
+                                } else {
+                                    break;
+                                }
+                            }
+                        }
+
+                        // Generate a doc string from all of the doc nodes, applying any strip regexes.
+                        let mut docs = None;
+                        for doc_node in &doc_nodes[docs_start_index..] {
+                            if let Ok(content) = str::from_utf8(&self.source[doc_node.byte_range()])
+                            {
+                                let content = if let Some(regex) = &pattern_info.doc_strip_regex {
+                                    regex.replace_all(content, "").to_string()
+                                } else {
+                                    content.to_string()
+                                };
+                                match &mut docs {
+                                    None => docs = Some(content),
+                                    Some(d) => {
+                                        d.push('\n');
+                                        d.push_str(&content);
+                                    }
+                                }
+                            }
+                        }
+
+                        let rng = tag_node.byte_range();
+                        let range = rng.start.min(name_range.start)..rng.end.max(name_range.end);
+                        let span = name_node.start_position()..name_node.end_position();
+
+                        // Compute tag properties that depend on the text of the containing line. If the
+                        // previous tag occurred on the same line, then reuse results from the previous tag.
+                        let line_range;
+                        let mut prev_utf16_column = 0;
+                        let mut prev_utf8_byte = name_range.start - span.start.column;
+                        let line_info = self.prev_line_info.as_ref().and_then(|info| {
+                            if info.utf8_position.row == span.start.row {
+                                Some(info)
+                            } else {
+                                None
+                            }
+                        });
+                        if let Some(line_info) = line_info {
+                            line_range = line_info.line_range.clone();
+                            if line_info.utf8_position.column <= span.start.column {
+                                prev_utf8_byte = line_info.utf8_byte;
+                                prev_utf16_column = line_info.utf16_column;
+                            }
+                        } else {
+                            line_range = self::line_range(
+                                self.source,
+                                name_range.start,
+                                span.start,
+                                MAX_LINE_LEN,
+                            );
+                        }
+
+                        let utf16_start_column = prev_utf16_column
+                            + utf16_len(&self.source[prev_utf8_byte..name_range.start]);
+                        let utf16_end_column =
+                            utf16_start_column + utf16_len(&self.source[name_range.clone()]);
+                        let utf16_column_range = utf16_start_column..utf16_end_column;
+
+                        self.prev_line_info = Some(LineInfo {
+                            utf8_position: span.end,
+                            utf8_byte: name_range.end,
+                            utf16_column: utf16_end_column,
+                            line_range: line_range.clone(),
+                        });
+                        tag = Tag {
+                            line_range,
+                            span,
+                            utf16_column_range,
+                            range,
+                            name_range,
+                            docs,
+                            is_definition,
+                            syntax_type_id,
+                        };
+                    } else if is_ignored {
+                        tag = Tag::ignored(name_range);
+                    } else {
+                        continue;
+                    }
+
+                    // Only create one tag per node. The tag queue is sorted by node position
+                    // to allow for fast lookup.
+                    match self.tag_queue.binary_search_by_key(
+                        &(tag.name_range.end, tag.name_range.start),
+                        |(tag, _)| (tag.name_range.end, tag.name_range.start),
+                    ) {
+                        Ok(i) => {
+                            let (existing_tag, pattern_index) = &mut self.tag_queue[i];
+                            if *pattern_index > mat.pattern_index {
+                                *pattern_index = mat.pattern_index;
+                                *existing_tag = tag;
+                            }
+                        }
+                        Err(i) => self.tag_queue.insert(i, (tag, mat.pattern_index)),
+                    }
+                }
+            }
+            // If there are no more matches, then drain the queue.
+            else if !self.tag_queue.is_empty() {
+                return Some(Ok(self.tag_queue.remove(0).0));
+            } else {
+                return None;
+            }
+        }
+    }
+}
+
+impl Tag {
+    fn ignored(name_range: Range<usize>) -> Self {
+        Tag {
+            name_range,
+            line_range: 0..0,
+            span: Point::new(0, 0)..Point::new(0, 0),
+            utf16_column_range: 0..0,
+            range: usize::MAX..usize::MAX,
+            docs: None,
+            is_definition: false,
+            syntax_type_id: 0,
+        }
+    }
+
+    fn is_ignored(&self) -> bool {
+        self.range.start == usize::MAX
+    }
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            Error::InvalidCapture(name) => write!(f, "Invalid capture @{}. Expected one of: @definition.*, @reference.*, @doc, @name, @local.(scope|definition|reference).", name),
+            _ => write!(f, "{:?}", self)
+        }
+    }
+}
+
+impl From<regex::Error> for Error {
+    fn from(error: regex::Error) -> Self {
+        Error::Regex(error)
+    }
+}
+
+impl From<QueryError> for Error {
+    fn from(error: QueryError) -> Self {
+        Error::Query(error)
+    }
+}
+
+fn line_range(
+    text: &[u8],
+    start_byte: usize,
+    start_point: Point,
+    max_line_len: usize,
+) -> Range<usize> {
+    // Trim leading whitespace
+    let mut line_start_byte = start_byte - start_point.column;
+    while line_start_byte < text.len() && text[line_start_byte].is_ascii_whitespace() {
+        line_start_byte += 1;
+    }
+
+    let max_line_len = max_line_len.min(text.len() - line_start_byte);
+    let text_after_line_start = &text[line_start_byte..(line_start_byte + max_line_len)];
+    let line_len = if let Some(len) = memchr(b'\n', text_after_line_start) {
+        len
+    } else if let Err(e) = str::from_utf8(text_after_line_start) {
+        e.valid_up_to()
+    } else {
+        max_line_len
+    };
+
+    // Trim trailing whitespace
+    let mut line_end_byte = line_start_byte + line_len;
+    while line_end_byte > line_start_byte && text[line_end_byte - 1].is_ascii_whitespace() {
+        line_end_byte -= 1;
+    }
+
+    line_start_byte..line_end_byte
+}
+
+fn utf16_len(bytes: &[u8]) -> usize {
+    LossyUtf8::new(bytes)
+        .flat_map(|chunk| chunk.chars().map(char::len_utf16))
+        .sum()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_get_line() {
+        let text = "abc\ndefg❤hij\nklmno".as_bytes();
+        assert_eq!(line_range(text, 5, Point::new(1, 1), 30), 4..14);
+        assert_eq!(line_range(text, 5, Point::new(1, 1), 6), 4..8);
+        assert_eq!(line_range(text, 17, Point::new(2, 2), 30), 15..20);
+        assert_eq!(line_range(text, 17, Point::new(2, 2), 4), 15..19);
+    }
+
+    #[test]
+    fn test_get_line_trims() {
+        let text = b"   foo\nbar\n";
+        assert_eq!(line_range(text, 0, Point::new(0, 0), 10), 3..6);
+
+        let text = b"\t func foo \nbar\n";
+        assert_eq!(line_range(text, 0, Point::new(0, 0), 10), 2..10);
+
+        let r = line_range(text, 0, Point::new(0, 0), 14);
+        assert_eq!(r, 2..10);
+        assert_eq!(str::from_utf8(&text[r]).unwrap_or(""), "func foo");
+
+        let r = line_range(text, 12, Point::new(1, 0), 14);
+        assert_eq!(r, 12..15);
+        assert_eq!(str::from_utf8(&text[r]).unwrap_or(""), "bar");
+    }
+}
diff --git a/test/fixtures/error_corpus/c_errors.txt b/test/fixtures/error_corpus/c_errors.txt
index 4d0c8e8b..b8733245 100644
--- a/test/fixtures/error_corpus/c_errors.txt
+++ b/test/fixtures/error_corpus/c_errors.txt
@@ -158,9 +158,17 @@ int a() {
 (translation_unit
   (function_definition
     (primitive_type)
-    (function_declarator (identifier) (parameter_list))
+    (function_declarator
+      (identifier)
+      (parameter_list))
     (compound_statement
-      (struct_specifier (type_identifier))
-      (ERROR (number_literal))
-      (primitive_type)
-      (ERROR (number_literal)))))
+      (declaration
+        (struct_specifier (type_identifier))
+        (init_declarator
+          (MISSING identifier)
+          (number_literal)))
+      (declaration
+        (primitive_type)
+        (init_declarator
+          (MISSING identifier)
+          (number_literal))))))
diff --git a/test/fixtures/error_corpus/javascript_errors.txt b/test/fixtures/error_corpus/javascript_errors.txt
index ddaea919..ad71037c 100644
--- a/test/fixtures/error_corpus/javascript_errors.txt
+++ b/test/fixtures/error_corpus/javascript_errors.txt
@@ -179,8 +179,9 @@ function main(x) {
           (member_expression (identifier) (property_identifier))
           (arguments (string))))
       (expression_statement
-        (identifier)
-        (ERROR
+        (binary_expression
+          (identifier)
+          (ERROR)
           (call_expression
             (member_expression (identifier) (property_identifier))
             (arguments (string)))))
diff --git a/test/fixtures/error_corpus/json_errors.txt b/test/fixtures/error_corpus/json_errors.txt
index 80a8d407..53ce94e4 100644
--- a/test/fixtures/error_corpus/json_errors.txt
+++ b/test/fixtures/error_corpus/json_errors.txt
@@ -26,7 +26,7 @@ errors inside arrays
 
 ---
 
-(value (array
+(document (array
   (number)
   (ERROR)
   (number)))
@@ -39,8 +39,8 @@ errors inside objects
 
 ---
 
-(value (object
-  (pair (string) (number))
+(document (object
+  (pair (string (string_content)) (number))
   (ERROR (UNEXPECTED 'o'))))
 
 ==========================================
@@ -51,9 +51,18 @@ errors inside nested objects
 
 ---
 
-(value (object
-  (pair (string) (object
-    (pair (string) (number))
+(document (object
+  (pair (string (string_content)) (object
+    (pair (string (string_content)) (number))
     (ERROR (number))))
-  (pair (string) (number))
+  (pair (string (string_content)) (number))
   (ERROR)))
+
+===============================
+incomplete tokens at EOF
+========================
+
+nul
+---
+
+(ERROR (UNEXPECTED '\0'))
diff --git a/test/fixtures/error_corpus/readme.md b/test/fixtures/error_corpus/readme.md
new file mode 100644
index 00000000..d8b5da09
--- /dev/null
+++ b/test/fixtures/error_corpus/readme.md
@@ -0,0 +1,8 @@
+The Error Corpus
+================
+
+This directory contains corpus tests that exercise error recovery in a variety of languages.
+
+These corpus tests provide a simple way of asserting that error recoveries are "reasonable" in a variety of situations. But they are also somewhat *overspecified*. It isn't critical that error recovery behaves *exactly* as these tests specify, just that most of the syntax tree is preserved despite the error.
+
+Sometimes these tests can start failing when changes are pushed to the parser repositories like `tree-sitter-ruby`, `tree-sitter-javascript`, etc. Usually, we just need to tweak the expected syntax tree.
diff --git a/test/fixtures/error_corpus/ruby_errors.txt b/test/fixtures/error_corpus/ruby_errors.txt
new file mode 100644
index 00000000..49dc2b32
--- /dev/null
+++ b/test/fixtures/error_corpus/ruby_errors.txt
@@ -0,0 +1,19 @@
+==========================
+Heredocs with errors
+==========================
+
+joins(<<~SQL(
+  b
+SQL
+c
+
+---
+
+(program
+  (method_call
+    method: (identifier)
+    (ERROR (heredoc_beginning))
+    arguments: (argument_list
+      (heredoc_body (heredoc_content) (heredoc_end))
+      (identifier)
+      (MISSING ")"))))
diff --git a/test/fixtures/test_grammars/aliased_token_rules/corpus.txt b/test/fixtures/test_grammars/aliased_token_rules/corpus.txt
new file mode 100644
index 00000000..cb37a095
--- /dev/null
+++ b/test/fixtures/test_grammars/aliased_token_rules/corpus.txt
@@ -0,0 +1,9 @@
+======================
+Aliased token rules
+======================
+
+abcde
+
+---
+
+(expression (X) (Y))
diff --git a/test/fixtures/test_grammars/aliased_token_rules/grammar.json b/test/fixtures/test_grammars/aliased_token_rules/grammar.json
new file mode 100644
index 00000000..76df46eb
--- /dev/null
+++ b/test/fixtures/test_grammars/aliased_token_rules/grammar.json
@@ -0,0 +1,61 @@
+{
+  "name": "aliased_token_rules",
+
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"}
+  ],
+
+  "rules": {
+    "expression": {
+      "type": "SEQ",
+      "members": [
+        {
+          "type": "STRING",
+          "value": "a"
+        },
+        {
+          "type": "ALIAS",
+          "value": "X",
+          "named": true,
+          "content": {
+            "type": "TOKEN",
+            "content": {
+              "type": "SEQ",
+              "members": [
+                {
+                  "type": "STRING",
+                  "value": "b"
+                },
+                {
+                  "type": "STRING",
+                  "value": "c"
+                }
+              ]
+            }
+          }
+        },
+        {
+          "type": "ALIAS",
+          "value": "Y",
+          "named": true,
+          "content": {
+            "type": "IMMEDIATE_TOKEN",
+            "content": {
+              "type": "SEQ",
+              "members": [
+                {
+                  "type": "STRING",
+                  "value": "d"
+                },
+                {
+                  "type": "STRING",
+                  "value": "e"
+                }
+              ]
+            }
+          }
+        }
+      ]
+    }
+  }
+}
diff --git a/test/fixtures/test_grammars/aliased_token_rules/readme.md b/test/fixtures/test_grammars/aliased_token_rules/readme.md
new file mode 100644
index 00000000..03898a5f
--- /dev/null
+++ b/test/fixtures/test_grammars/aliased_token_rules/readme.md
@@ -0,0 +1 @@
+This grammar shows that `ALIAS` rules can be applied directly to `TOKEN` and `IMMEDIATE_TOKEN` rules.
diff --git a/test/fixtures/test_grammars/extra_non_terminals/corpus.txt b/test/fixtures/test_grammars/extra_non_terminals/corpus.txt
new file mode 100644
index 00000000..52b7d864
--- /dev/null
+++ b/test/fixtures/test_grammars/extra_non_terminals/corpus.txt
@@ -0,0 +1,22 @@
+==============
+No extras
+==============
+
+a b c d
+
+---
+
+(module)
+
+==============
+Extras
+==============
+
+a (one) b (two) (three) c d
+
+---
+
+(module
+  (comment)
+  (comment)
+  (comment))
diff --git a/test/fixtures/test_grammars/extra_non_terminals/grammar.json b/test/fixtures/test_grammars/extra_non_terminals/grammar.json
new file mode 100644
index 00000000..7302668d
--- /dev/null
+++ b/test/fixtures/test_grammars/extra_non_terminals/grammar.json
@@ -0,0 +1,35 @@
+{
+  "name": "extra_non_terminals",
+
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"},
+    {"type": "SYMBOL", "name": "comment"}
+  ],
+
+  "rules": {
+    "module": {
+      "type": "SEQ",
+      "members": [
+        {"type": "STRING", "value": "a"},
+        {"type": "STRING", "value": "b"},
+        {"type": "STRING", "value": "c"},
+        {"type": "STRING", "value": "d"}
+      ]
+    },
+
+    "comment": {
+      "type": "SEQ",
+      "members": [
+        {"type": "STRING", "value": "("},
+        {
+          "type": "REPEAT",
+          "content": {
+            "type": "PATTERN",
+            "value": "[a-z]+"
+          }
+        },
+        {"type": "STRING", "value": ")"}
+      ]
+    }
+  }
+}
diff --git a/test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/corpus.txt b/test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/corpus.txt
new file mode 100644
index 00000000..a22d8b8d
--- /dev/null
+++ b/test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/corpus.txt
@@ -0,0 +1,23 @@
+=====
+Extras
+=====
+
+;
+%;
+%foo:;
+;
+bar: baz:;
+;
+
+---
+
+(program
+  (statement)
+  (macro_statement (statement))
+  (macro_statement (statement
+    (label_declaration (identifier))))
+  (statement)
+  (statement
+    (label_declaration (identifier))
+    (label_declaration (identifier)))
+  (statement))
diff --git a/test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/grammar.json b/test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/grammar.json
new file mode 100644
index 00000000..a7f51b8e
--- /dev/null
+++ b/test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/grammar.json
@@ -0,0 +1,68 @@
+{
+  "name": "extra_non_terminals_with_shared_rules",
+
+  "extras": [
+    { "type": "PATTERN", "value": "\\s+" },
+    { "type": "SYMBOL", "name": "macro_statement" }
+  ],
+
+  "rules": {
+    "program": {
+      "type": "REPEAT",
+      "content": {
+        "type": "SYMBOL",
+        "name": "statement"
+      }
+    },
+    "statement": {
+      "type": "SEQ",
+      "members": [
+        {
+          "type": "REPEAT",
+          "content": {
+            "type": "SYMBOL",
+            "name": "label_declaration"
+          }
+        },
+        {
+          "type": "STRING",
+          "value": ";"
+        }
+      ]
+    },
+    "macro_statement": {
+      "type": "SEQ",
+      "members": [
+        {
+          "type": "STRING",
+          "value": "%"
+        },
+        {
+          "type": "SYMBOL",
+          "name": "statement"
+        }
+      ]
+    },
+    "label_declaration": {
+      "type": "SEQ",
+      "members": [
+        {
+          "type": "SYMBOL",
+          "name": "identifier"
+        },
+        {
+          "type": "STRING",
+          "value": ":"
+        }
+      ]
+    },
+    "identifier": {
+      "type": "PATTERN",
+      "value": "[a-zA-Z]+"
+    }
+  },
+  "conflicts": [],
+  "externals": [],
+  "inline": [],
+  "supertypes": []
+}
diff --git a/test/fuzz/README.md b/test/fuzz/README.md
index 649d2d89..a02d2689 100644
--- a/test/fuzz/README.md
+++ b/test/fuzz/README.md
@@ -22,10 +22,10 @@ The fuzzers can then be built with:
 export CLANG_DIR=$HOME/src/third_party/llvm-build/Release+Asserts/bin
 CC="$CLANG_DIR/clang" CXX="$CLANG_DIR/clang++" LINK="$CLANG_DIR/clang++" \
   LIB_FUZZER_PATH=$HOME/src/compiler-rt/lib/fuzzer/libFuzzer.a \
-  ./script/build_fuzzers
+  ./script/build-fuzzers
 ```
 
-This will generate a separate fuzzer for each grammar defined in `test/fixtures/grammars` and will be instrumented with [AddressSanitizer](https://clang.llvm.org/docs/AddressSanitizer.html) and [UndefinedBehaviorSanitizer](https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html). Individual fuzzers can be built with, for example, `./script/build_fuzzers python ruby`.
+This will generate a separate fuzzer for each grammar defined in `test/fixtures/grammars` and will be instrumented with [AddressSanitizer](https://clang.llvm.org/docs/AddressSanitizer.html) and [UndefinedBehaviorSanitizer](https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html). Individual fuzzers can be built with, for example, `./script/build-fuzzers python ruby`.
 
 The `run-fuzzer` script handles running an individual fuzzer with a sensible default set of arguments:
 ```
diff --git a/test/fuzz/fuzzer.cc b/test/fuzz/fuzzer.cc
index 8d6f9cef..ef800883 100644
--- a/test/fuzz/fuzzer.cc
+++ b/test/fuzz/fuzzer.cc
@@ -1,8 +1,40 @@
 #include <cassert>
+#include <fstream>
 #include "tree_sitter/api.h"
 
 extern "C" const TSLanguage *TS_LANG();
 
+static TSQuery *lang_query;
+
+extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) {
+  if(TS_LANG_QUERY_FILENAME[0]) {
+    // The query filename is relative to the fuzzing binary. Convert it
+    // to an absolute path first
+    auto binary_filename = std::string((*argv)[0]);
+    auto binary_directory = binary_filename.substr(0, binary_filename.find_last_of("\\/"));
+    auto lang_query_filename = binary_directory + "/" + TS_LANG_QUERY_FILENAME;
+
+    auto f = std::ifstream(lang_query_filename);
+    assert(f.good());
+    std::string lang_query_source((std::istreambuf_iterator<char>(f)), std::istreambuf_iterator<char>());
+
+    uint32_t error_offset = 0;
+    TSQueryError error_type = TSQueryErrorNone;
+
+    lang_query = ts_query_new(
+      TS_LANG(),
+      lang_query_source.c_str(),
+      lang_query_source.size(),
+      &error_offset,
+      &error_type
+    );
+
+    assert(lang_query);
+  }
+
+  return 0;
+}
+
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
   const char *str = reinterpret_cast<const char *>(data);
 
@@ -12,11 +44,34 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
   bool language_ok = ts_parser_set_language(parser, TS_LANG());
   assert(language_ok);
 
-  ts_parser_halt_on_error(parser, TS_HALT_ON_ERROR);
-
   TSTree *tree = ts_parser_parse_string(parser, NULL, str, size);
   TSNode root_node = ts_tree_root_node(tree);
 
+  if (lang_query) {
+    {
+      TSQueryCursor *cursor = ts_query_cursor_new();
+
+      ts_query_cursor_exec(cursor, lang_query, root_node);
+      TSQueryMatch match;
+      while (ts_query_cursor_next_match(cursor, &match)) {
+      }
+
+      ts_query_cursor_delete(cursor);
+    }
+
+    {
+      TSQueryCursor *cursor = ts_query_cursor_new();
+
+      ts_query_cursor_exec(cursor, lang_query, root_node);
+      TSQueryMatch match;
+      uint32_t capture_index;
+      while (ts_query_cursor_next_capture(cursor, &match, &capture_index)) {
+      }
+
+      ts_query_cursor_delete(cursor);
+    }
+  }
+
   ts_tree_delete(tree);
   ts_parser_delete(parser);
 
diff --git a/test/fuzz/gen-dict.py b/test/fuzz/gen-dict.py
index a9e07838..f8cf834e 100644
--- a/test/fuzz/gen-dict.py
+++ b/test/fuzz/gen-dict.py
@@ -25,7 +25,7 @@ def main():
 
   for lit in sorted(literals):
     if lit:
-      print '"%s"' % ''.join([(c if c.isalnum() else '\\x%02x' % ord(c)) for c in lit])
+      print '"%s"' % ''.join(['\\x%02x' % ord(b) for b in lit.encode('utf-8')])
 
 if __name__ == '__main__':
   main()
diff --git a/test/profile/heap.cc b/test/profile/heap.cc
new file mode 100644
index 00000000..6c0027e8
--- /dev/null
+++ b/test/profile/heap.cc
@@ -0,0 +1,42 @@
+#include <fstream>
+#include <string>
+#include <cstdlib>
+#include <tree_sitter/api.h>
+
+extern "C" {
+#include "javascript/src/parser.c"
+#include "javascript/src/scanner.c"
+}
+
+#define LANGUAGE tree_sitter_javascript
+#define SOURCE_PATH "javascript/examples/jquery.js"
+
+int main() {
+  TSParser *parser = ts_parser_new();
+  if (!ts_parser_set_language(parser, LANGUAGE())) {
+    fprintf(stderr, "Invalid language\n");
+    exit(1);
+  }
+
+  const char *source_path = GRAMMARS_DIR SOURCE_PATH;
+
+  printf("Parsing %s\n", source_path);
+
+  std::ifstream source_file(source_path);
+  if (!source_file.good()) {
+    fprintf(stderr, "Invalid source path %s\n", source_path);
+    exit(1);
+  }
+
+  std::string source_code(
+    (std::istreambuf_iterator<char>(source_file)),
+    std::istreambuf_iterator<char>()
+  );
+
+  TSTree *tree = ts_parser_parse_string(
+    parser,
+    NULL,
+    source_code.c_str(),
+    source_code.size()
+  );
+}
diff --git a/tree-sitter.pc.in b/tree-sitter.pc.in
new file mode 100644
index 00000000..f98816cb
--- /dev/null
+++ b/tree-sitter.pc.in
@@ -0,0 +1,10 @@
+prefix=@PREFIX@
+libdir=@LIBDIR@
+includedir=@INCLUDEDIR@
+
+Name: tree-sitter
+Description: An incremental parsing system for programming tools
+URL: https://tree-sitter.github.io/
+Version: @VERSION@
+Libs: -L${libdir} -ltree-sitter
+Cflags: -I${includedir}