diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..4fcce330 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +/lib/src/unicode/*.h linguist-vendored +/lib/src/unicode/LICENSE linguist-vendored diff --git a/.gitignore b/.gitignore index ed31e54a..58d73daa 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ docs/assets/js/tree-sitter.js /target *.rs.bk *.a +*.dylib *.o *.obj *.exp diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 6e45ee19..00000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "externals/utf8proc"] - path = lib/utf8proc - url = https://github.com/julialang/utf8proc diff --git a/.travis.yml b/.travis.yml index 44d989a1..282ba02d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,6 +2,9 @@ language: rust rust: - stable +env: + CFLAGS="-Wall -Wextra -Werror -Wstrict-prototypes" + matrix: include: - os: osx @@ -11,8 +14,8 @@ matrix: before_install: # Install node - - nvm install 10 - - nvm use 10 + - nvm install 12 + - nvm use 12 # Download emscripten and create a shorthand for adding it to the PATH. # Don't add it to the path globally because it overrides the default @@ -23,6 +26,9 @@ script: # Build the WASM binding - (eval "$WASM_ENV" && script/build-wasm) + # build the shared/static libraries + - make + # Build the CLI - cargo build --release @@ -32,7 +38,6 @@ script: - (eval "$WASM_ENV" && script/generate-fixtures-wasm) # Run the tests - - export TREE_SITTER_STATIC_ANALYSIS=1 - script/test - script/test-wasm - script/benchmark @@ -53,8 +58,6 @@ deploy: file_glob: true file: - "tree-sitter-*.gz" - - "target/release/tree-sitter.js" - - "target/release/tree-sitter.wasm" draft: true overwrite: true skip_cleanup: true @@ -65,5 +68,3 @@ cache: cargo: true directories: - target/emsdk - - test/fixtures/grammars - - /home/travis/.emscripten_cache diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 120000 index 00000000..4f643710 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1 @@ +docs/section-6-contributing.md \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 4494a3dc..cd411095 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,1066 +4,988 @@ name = "aho-corasick" version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9a933f4e58658d7b12defcf96dc5c720f20832deebe3e0a19efd3b6aaeeb9e" dependencies = [ - "memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr", ] [[package]] name = "ansi_term" version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" dependencies = [ - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi", ] [[package]] name = "arrayref" version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d382e583f07208808f6b1249e60848879ba3543f57c32277bf52d69c2f0f0ee" [[package]] name = "arrayvec" version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8d73f9beda665eaa98ab9e4f7442bd4e7de6652587de55b2525e52e29c1b0ba" dependencies = [ - "nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)", + "nodrop", ] [[package]] name = "ascii" version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97be891acc47ca214468e09425d02cef3af2c94d0d82081cd02061f996802f14" [[package]] name = "atty" version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652" dependencies = [ - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "libc", + "termion", + "winapi", ] [[package]] name = "autocfg" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e5f34df7a019573fb8bdc7e24a2bfebe51a2a1d6bfdbaeccedb3c41fc574727" [[package]] name = "backtrace" version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89a47830402e9981c5c41223151efcced65a0510c13097c769cede7efb34782a" dependencies = [ - "backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)", - "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "rustc-demangle 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "backtrace-sys", + "cfg-if", + "libc", + "rustc-demangle", + "winapi", ] [[package]] name = "backtrace-sys" version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c66d56ac8dabd07f6aacdaf633f4b8262f5b3601a810a0dcddffd5c22c69daa0" dependencies = [ - "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", + "cc", + "libc", ] [[package]] name = "base64" version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b25d992356d2eb0ed82172f5248873db5560c4721f564b13cb5193bda5e668e" dependencies = [ - "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", + "byteorder", ] [[package]] name = "bitflags" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12" [[package]] name = "blake2b_simd" version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "461f4b879a8eb70c1debf7d0788a9a5ff15f1ea9d25925fea264ef4258bed6b2" dependencies = [ - "arrayref 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", - "arrayvec 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)", - "constant_time_eq 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "arrayref", + "arrayvec", + "constant_time_eq", ] -[[package]] -name = "bytecount" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "byteorder" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5" [[package]] name = "c2-chacha" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d64d04786e0f528460fc884753cf8dddcc466be308f6026f8e355c41a0e4101" dependencies = [ - "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", - "ppv-lite86 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static", + "ppv-lite86", ] [[package]] name = "cc" version = "1.0.25" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f159dfd43363c4d08055a07703eb7a3406b0dac4d0584d96965a3262db3c9d16" [[package]] name = "cfg-if" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4" [[package]] name = "chrono" version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45912881121cb26fad7c38c17ba7daa18764771836b34fab7d3fbd93ed633878" dependencies = [ - "num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)", - "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", - "time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)", + "num-integer", + "num-traits", + "time", ] [[package]] name = "chunked_transfer" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "498d20a7aaf62625b9bf26e637cf7736417cde1d0c99f1d04d1170229a85cf87" [[package]] name = "clap" version = "2.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b957d88f4b6a63b9d70d5f454ac8011819c6efa7727858f458ab71c756ce2d3e" dependencies = [ - "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", - "atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", - "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", - "strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", - "textwrap 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", - "vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", + "ansi_term", + "atty", + "bitflags", + "strsim", + "textwrap", + "unicode-width", + "vec_map", ] [[package]] name = "cloudabi" version = "0.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" dependencies = [ - "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", + "bitflags", ] [[package]] name = "constant_time_eq" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ff012e225ce166d4422e0e78419d901719760f62ae2b7969ca6b564d1b54a9e" [[package]] name = "crossbeam-utils" version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04973fa96e96579258a5091af6003abde64af786b860f18622b82e026cca60e6" dependencies = [ - "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if", + "lazy_static", ] [[package]] name = "difference" version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" [[package]] name = "dirs" version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13aea89a5c93364a98e9b37b2fa237effbb694d5cfe01c5b70941f7eb087d5e3" dependencies = [ - "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "dirs-sys 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if", + "dirs-sys", ] [[package]] name = "dirs-sys" version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afa0b23de8fd801745c471deffa6e12d248f962c9fd4b4c33787b055599bde7b" dependencies = [ - "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "redox_users 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if", + "libc", + "redox_users", + "winapi", ] [[package]] name = "failure" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dd377bcc1b1b7ce911967e3ec24fa19c3224394ec05b54aa7b083d498341ac7" dependencies = [ - "backtrace 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", - "failure_derive 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "backtrace", + "failure_derive", ] [[package]] name = "failure_derive" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64c2d913fe8ed3b6c6518eedf4538255b989945c14c2a7d5cbff62a5e2120596" dependencies = [ - "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)", - "synstructure 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2", + "quote", + "syn", + "synstructure", ] [[package]] name = "fuchsia-zircon" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82" dependencies = [ - "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", - "fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "bitflags", + "fuchsia-zircon-sys", ] [[package]] name = "fuchsia-zircon-sys" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" [[package]] name = "getrandom" version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34f33de6f0ae7c9cb5e574502a562e2b512799e32abb801cd1e79ad952b62b49" dependencies = [ - "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if", + "libc", ] +[[package]] +name = "glob" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" + [[package]] name = "idna" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38f09e0f0b1fb55fdee1f17470ad800da77af5186a1a76c026b679358b7e844e" dependencies = [ - "matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-normalization 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", + "matches", + "unicode-bidi", + "unicode-normalization", ] [[package]] name = "indexmap" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e81a7c05f79578dbc15793d8b619db9ba32b4577003ef3af1a91c416798c58d" [[package]] name = "itoa" version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1306f3464951f30e30d12373d31c79fbd52d236e5e896fd92f96ec7babbbe60b" [[package]] name = "lazy_static" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "lexical-core" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", - "ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", - "stackvector 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", - "static_assertions 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", -] +checksum = "a374c89b9db55895453a74c1e38861d9deec0b01b405a82516e9d5de4820dea1" [[package]] name = "libc" version = "0.2.61" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c665266eb592905e8503ba3403020f4b8794d26263f412ca33171600eca9a6fa" [[package]] name = "libloading" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3ad660d7cb8c5822cd83d10897b0f1f1526792737a179e73896152f85b88c2" dependencies = [ - "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "cc", + "winapi", ] [[package]] name = "lock_api" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62ebf1391f6acad60e5c8b43706dde4582df75c06698ab44511d15016bc2442c" dependencies = [ - "scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "scopeguard", ] [[package]] name = "log" version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6" dependencies = [ - "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if", ] [[package]] name = "matches" version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" [[package]] name = "memchr" -version = "2.1.1" +version = "2.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", -] +checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" [[package]] name = "nodrop" version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "nom" -version = "5.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "lexical-core 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", - "memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", -] +checksum = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945" [[package]] name = "num-integer" version = "0.1.39" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e83d528d2677f0518c570baf2b7abdcf0cd2d248860b68507bdcb3e91d4c0cea" dependencies = [ - "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "num-rational" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)", - "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits", ] [[package]] name = "num-traits" version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b3a5d7cc97d6d30d8b9bc8fa19bf45349ffe46241e8816f50f62f6d6aaabee1" [[package]] name = "once_cell" version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "532c29a261168a45ce28948f9537ddd7a5dd272cc513b3017b1e82a88f962c37" dependencies = [ - "parking_lot 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", + "parking_lot", ] [[package]] name = "parking_lot" version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab41b4aed082705d1056416ae4468b6ea99d52599ecf3169b00088d43113e337" dependencies = [ - "lock_api 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", - "parking_lot_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lock_api", + "parking_lot_core", ] [[package]] name = "parking_lot_core" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94c8c7923936b28d546dfd14d4472eaf34c99b14e1c973a32b3e6d4eb04298c9" dependencies = [ - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "rand 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", - "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", - "smallvec 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "libc", + "rand 0.6.4", + "rustc_version", + "smallvec", + "winapi", ] [[package]] name = "percent-encoding" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31010dd2e1ac33d5b46a5b413495239882813e0369f8ed8a5e266f173602f831" [[package]] name = "ppv-lite86" version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3cbf9f658cdb5000fcf6f362b8ea2ba154b9f146a61c7a20d647034c6b6561b" [[package]] name = "proc-macro2" version = "0.4.24" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77619697826f31a02ae974457af0b29b723e5619e113e9397b8b82c6bd253f09" dependencies = [ - "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-xid", ] [[package]] name = "quote" version = "0.6.10" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53fa22a1994bd0f9372d7a816207d8a2677ad0325b073f5c5332760f0fb62b5c" dependencies = [ - "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2", ] [[package]] name = "rand" version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3906503e80ac6cbcacb2c2973fa8e473f24d7e2747c8c92bb230c2441cad96b5" dependencies = [ - "autocfg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_isaac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_os 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_pcg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "autocfg", + "libc", + "rand_chacha 0.1.1", + "rand_core 0.3.0", + "rand_hc 0.1.0", + "rand_isaac", + "rand_os", + "rand_pcg", + "rand_xorshift", + "winapi", ] [[package]] name = "rand" version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d47eab0e83d9693d40f825f86948aa16eff6750ead4bdffc4ab95b8b3a7f052c" dependencies = [ - "getrandom 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_chacha 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_core 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "getrandom", + "libc", + "rand_chacha 0.2.1", + "rand_core 0.5.0", + "rand_hc 0.2.0", ] [[package]] name = "rand_chacha" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "556d3a1ca6600bfcbab7c7c91ccb085ac7fbbcd70e008a98742e7847f4f7bcef" dependencies = [ - "autocfg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "autocfg", + "rand_core 0.3.0", ] [[package]] name = "rand_chacha" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03a2a90da8c7523f554344f921aa97283eadf6ac484a6d2a7d0212fa7f8d6853" dependencies = [ - "c2-chacha 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_core 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", + "c2-chacha", + "rand_core 0.5.0", ] [[package]] name = "rand_core" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0905b6b7079ec73b314d4c748701f6931eb79fd97c668caa3f1899b22b32c6db" [[package]] name = "rand_core" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "615e683324e75af5d43d8f7a39ffe3ee4a9dc42c5c701167a71dc59c3a493aca" dependencies = [ - "getrandom 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", + "getrandom", ] [[package]] name = "rand_hc" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b40677c7be09ae76218dc623efbf7b18e34bced3f38883af07bb75630a21bc4" dependencies = [ - "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.3.0", ] [[package]] name = "rand_hc" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" dependencies = [ - "rand_core 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.5.0", ] [[package]] name = "rand_isaac" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ded997c9d5f13925be2a6fd7e66bf1872597f759fd9dd93513dd7e92e5a5ee08" dependencies = [ - "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.3.0", ] [[package]] name = "rand_os" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f46fbd5550acf75b0c2730f5dd1873751daf9beb8f11b44027778fae50d7feca" dependencies = [ - "cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)", - "fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "cloudabi", + "fuchsia-zircon", + "libc", + "rand_core 0.3.0", + "rdrand", + "winapi", ] [[package]] name = "rand_pcg" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "086bd09a33c7044e56bb44d5bdde5a60e7f119a9e95b0775f545de759a32fe05" dependencies = [ - "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.3.0", + "rustc_version", ] [[package]] name = "rand_xorshift" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c" dependencies = [ - "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.3.0", ] [[package]] name = "rdrand" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" dependencies = [ - "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.3.0", ] [[package]] name = "redox_syscall" version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "679da7508e9a6390aeaf7fbd02a800fdc64b73fe2204dd2c8ae66d22d9d5ad5d" [[package]] name = "redox_termios" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76" dependencies = [ - "redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", + "redox_syscall", ] [[package]] name = "redox_users" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ecedbca3bf205f8d8f5c2b44d83cd0690e39ee84b951ed649e9f1841132b66d" dependencies = [ - "failure 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_os 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", - "rust-argon2 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", + "failure", + "rand_os", + "redox_syscall", + "rust-argon2", ] [[package]] name = "regex" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37e7cbbd370869ce2e8dff25c7018702d10b21a20ef7135316f8daecd6c25b7f" dependencies = [ - "aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)", - "memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", - "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", - "utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "aho-corasick", + "memchr", + "regex-syntax", + "thread_local", + "utf8-ranges", ] [[package]] name = "regex-syntax" version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e47a2ed29da7a9e1960e1639e7a982e6edc6d49be308a3b02daf511504a16d1" dependencies = [ - "ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "ucd-util", ] [[package]] name = "remove_dir_all" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3488ba1b9a2084d38645c4c08276a1752dcbf2c7130d74f1569681ad5d2799c5" dependencies = [ - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "rsass" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "bytecount 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", - "nom 5.0.0 (registry+https://github.com/rust-lang/crates.io-index)", - "num-rational 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", - "rand 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi", ] [[package]] name = "rust-argon2" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ca4eaef519b494d1f2848fc602d18816fed808a981aedf4f1f00ceb7c9d32cf" dependencies = [ - "base64 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)", - "blake2b_simd 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)", - "crossbeam-utils 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)", + "base64", + "blake2b_simd", + "crossbeam-utils", ] [[package]] name = "rustc-demangle" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcfe5b13211b4d78e5c2cadfebd7769197d95c639c35a50057eb4c05de811395" [[package]] name = "rustc_version" version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" dependencies = [ - "semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", + "semver", ] [[package]] name = "ryu" version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "ryu" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb9e9b8cde282a9fe6a42dd4681319bfb63f121b8a8ee9439c6f4107e58a46f7" [[package]] name = "scopeguard" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27" [[package]] name = "semver" version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" dependencies = [ - "semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", + "semver-parser", ] [[package]] name = "semver-parser" version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" [[package]] name = "serde" version = "1.0.80" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15c141fc7027dd265a47c090bf864cf62b42c4d228bbcf4e51a0c9e2b0d3f7ef" [[package]] name = "serde_derive" version = "1.0.80" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225de307c6302bec3898c51ca302fc94a7a1697ef0845fcee6448f33c032249c" dependencies = [ - "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2", + "quote", + "syn", ] [[package]] name = "serde_json" version = "1.0.33" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c37ccd6be3ed1fdf419ee848f7c758eb31b054d7cd3ae3600e3bae0adf569811" dependencies = [ - "indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", - "ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", + "indexmap", + "itoa", + "ryu", + "serde", ] [[package]] name = "smallbitvec" version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1764fe2b30ee783bfe3b9b37b2649d8d590b3148bb12e0079715d4d5c673562e" [[package]] name = "smallvec" version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88aea073965ab29f6edb5493faf96ad662fb18aa9eeb186a3b7057951605ed15" dependencies = [ - "unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "unreachable", ] [[package]] name = "spin" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "stackvector" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", - "unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "static_assertions" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44363f6f51401c34e7be73db0db371c04705d35efbe9f7d6082e03a921a32c55" [[package]] name = "strsim" version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550" [[package]] name = "syn" version = "0.15.22" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae8b29eb5210bc5cf63ed6149cbf9adfc82ac0be023d8735c176ee74a2db4da7" dependencies = [ - "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2", + "quote", + "unicode-xid", ] [[package]] name = "synstructure" version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73687139bf99285483c96ac0add482c3776528beac1d97d444f6e91f203a2015" dependencies = [ - "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2", + "quote", + "syn", + "unicode-xid", ] [[package]] name = "tempfile" version = "3.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b86c784c88d98c801132806dadd3819ed29d8600836c4088e855cdf3e178ed8a" dependencies = [ - "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "rand 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", - "redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", - "remove_dir_all 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if", + "libc", + "rand 0.6.4", + "redox_syscall", + "remove_dir_all", + "winapi", ] [[package]] name = "termion" version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096" dependencies = [ - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", - "redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "libc", + "redox_syscall", + "redox_termios", ] [[package]] name = "textwrap" version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "307686869c93e71f94da64286f9a9524c0f308a9e1c87a583de8e9c9039ad3f6" dependencies = [ - "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-width", ] [[package]] name = "thread_local" version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" dependencies = [ - "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static", ] [[package]] name = "time" version = "0.1.42" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db8dcfca086c1143c9270ac42a2bbd8a7ee477b78ac8e45b19abfb0cbede4b6f" dependencies = [ - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "libc", + "redox_syscall", + "winapi", ] [[package]] name = "tiny_http" version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1661fa0a44c95d01604bd05c66732a446c657efb62b5164a7a083a3b552b4951" dependencies = [ - "ascii 0.8.7 (registry+https://github.com/rust-lang/crates.io-index)", - "chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", - "chunked_transfer 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", - "url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)", + "ascii", + "chrono", + "chunked_transfer", + "log", + "url", ] [[package]] name = "tree-sitter" -version = "0.3.10" +version = "0.17.1" dependencies = [ - "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)", + "cc", + "regex", ] [[package]] name = "tree-sitter-cli" -version = "0.15.8" +version = "0.17.3" dependencies = [ - "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", - "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", - "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)", - "difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)", - "dirs 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", - "libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", - "once_cell 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", - "rand 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", - "rsass 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)", - "smallbitvec 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", - "tempfile 3.0.7 (registry+https://github.com/rust-lang/crates.io-index)", - "tiny_http 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", - "tree-sitter 0.3.10", - "tree-sitter-highlight 0.1.6", - "webbrowser 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", + "ansi_term", + "atty", + "cc", + "clap", + "difference", + "dirs", + "glob", + "lazy_static", + "libloading", + "log", + "once_cell", + "rand 0.7.0", + "regex", + "regex-syntax", + "serde", + "serde_derive", + "serde_json", + "smallbitvec", + "spin", + "tempfile", + "tiny_http", + "tree-sitter", + "tree-sitter-highlight", + "tree-sitter-tags", + "webbrowser", ] [[package]] name = "tree-sitter-highlight" -version = "0.1.6" +version = "0.3.0" dependencies = [ - "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)", - "tree-sitter 0.3.10", + "regex", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-tags" +version = "0.3.0" +dependencies = [ + "memchr", + "regex", + "tree-sitter", ] [[package]] name = "ucd-util" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86" [[package]] name = "unicode-bidi" version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5" dependencies = [ - "matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", + "matches", ] [[package]] name = "unicode-normalization" version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "141339a08b982d942be2ca06ff8b076563cbe223d1befd5450716790d44e2426" dependencies = [ - "smallvec 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)", + "smallvec", ] [[package]] name = "unicode-width" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" [[package]] name = "unicode-xid" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" [[package]] name = "unreachable" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" dependencies = [ - "void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "void", ] [[package]] name = "url" version = "1.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd4e7c0d531266369519a4aa4f399d748bd37043b00bde1e4ff1f60a120b355a" dependencies = [ - "idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", - "matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", - "percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "idna", + "matches", + "percent-encoding", ] [[package]] name = "utf8-ranges" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737" [[package]] name = "vec_map" version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "version_check" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" [[package]] name = "void" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" [[package]] name = "webbrowser" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c01efd7cb6939b7f34983f1edff0550e5b21b49e2db4495656295922df8939ac" dependencies = [ - "widestring 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "widestring", + "winapi", ] [[package]] name = "widestring" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "effc0e4ff8085673ea7b9b2e3c73f6bd4d118810c9009ed8f1e16bd96c331db6" [[package]] name = "winapi" version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92c1eb33641e276cfa214a0522acad57be5c56b10cb348b3c5117db75f3ac4b0" dependencies = [ - "winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", ] [[package]] name = "winapi-i686-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" - -[metadata] -"checksum aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)" = "1e9a933f4e58658d7b12defcf96dc5c720f20832deebe3e0a19efd3b6aaeeb9e" -"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" -"checksum arrayref 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "0d382e583f07208808f6b1249e60848879ba3543f57c32277bf52d69c2f0f0ee" -"checksum arrayvec 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)" = "b8d73f9beda665eaa98ab9e4f7442bd4e7de6652587de55b2525e52e29c1b0ba" -"checksum ascii 0.8.7 (registry+https://github.com/rust-lang/crates.io-index)" = "97be891acc47ca214468e09425d02cef3af2c94d0d82081cd02061f996802f14" -"checksum atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652" -"checksum autocfg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4e5f34df7a019573fb8bdc7e24a2bfebe51a2a1d6bfdbaeccedb3c41fc574727" -"checksum backtrace 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "89a47830402e9981c5c41223151efcced65a0510c13097c769cede7efb34782a" -"checksum backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)" = "c66d56ac8dabd07f6aacdaf633f4b8262f5b3601a810a0dcddffd5c22c69daa0" -"checksum base64 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0b25d992356d2eb0ed82172f5248873db5560c4721f564b13cb5193bda5e668e" -"checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12" -"checksum blake2b_simd 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "461f4b879a8eb70c1debf7d0788a9a5ff15f1ea9d25925fea264ef4258bed6b2" -"checksum bytecount 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "be0fdd54b507df8f22012890aadd099979befdba27713c767993f8380112ca7c" -"checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5" -"checksum c2-chacha 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7d64d04786e0f528460fc884753cf8dddcc466be308f6026f8e355c41a0e4101" -"checksum cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)" = "f159dfd43363c4d08055a07703eb7a3406b0dac4d0584d96965a3262db3c9d16" -"checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4" -"checksum chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "45912881121cb26fad7c38c17ba7daa18764771836b34fab7d3fbd93ed633878" -"checksum chunked_transfer 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "498d20a7aaf62625b9bf26e637cf7736417cde1d0c99f1d04d1170229a85cf87" -"checksum clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b957d88f4b6a63b9d70d5f454ac8011819c6efa7727858f458ab71c756ce2d3e" -"checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" -"checksum constant_time_eq 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8ff012e225ce166d4422e0e78419d901719760f62ae2b7969ca6b564d1b54a9e" -"checksum crossbeam-utils 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)" = "04973fa96e96579258a5091af6003abde64af786b860f18622b82e026cca60e6" -"checksum difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" -"checksum dirs 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "13aea89a5c93364a98e9b37b2fa237effbb694d5cfe01c5b70941f7eb087d5e3" -"checksum dirs-sys 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "afa0b23de8fd801745c471deffa6e12d248f962c9fd4b4c33787b055599bde7b" -"checksum failure 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "6dd377bcc1b1b7ce911967e3ec24fa19c3224394ec05b54aa7b083d498341ac7" -"checksum failure_derive 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "64c2d913fe8ed3b6c6518eedf4538255b989945c14c2a7d5cbff62a5e2120596" -"checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82" -"checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" -"checksum getrandom 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "34f33de6f0ae7c9cb5e574502a562e2b512799e32abb801cd1e79ad952b62b49" -"checksum idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "38f09e0f0b1fb55fdee1f17470ad800da77af5186a1a76c026b679358b7e844e" -"checksum indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7e81a7c05f79578dbc15793d8b619db9ba32b4577003ef3af1a91c416798c58d" -"checksum itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1306f3464951f30e30d12373d31c79fbd52d236e5e896fd92f96ec7babbbe60b" -"checksum lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a374c89b9db55895453a74c1e38861d9deec0b01b405a82516e9d5de4820dea1" -"checksum lexical-core 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "b8b0f90c979adde96d19eb10eb6431ba0c441e2f9e9bdff868b2f6f5114ff519" -"checksum libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)" = "c665266eb592905e8503ba3403020f4b8794d26263f412ca33171600eca9a6fa" -"checksum libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3ad660d7cb8c5822cd83d10897b0f1f1526792737a179e73896152f85b88c2" -"checksum lock_api 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "62ebf1391f6acad60e5c8b43706dde4582df75c06698ab44511d15016bc2442c" -"checksum log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6" -"checksum matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" -"checksum memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0a3eb002f0535929f1199681417029ebea04aadc0c7a4224b46be99c7f5d6a16" -"checksum nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945" -"checksum nom 5.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e9761d859320e381010a4f7f8ed425f2c924de33ad121ace447367c713ad561b" -"checksum num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)" = "e83d528d2677f0518c570baf2b7abdcf0cd2d248860b68507bdcb3e91d4c0cea" -"checksum num-rational 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4e96f040177bb3da242b5b1ecf3f54b5d5af3efbbfb18608977a5d2767b22f10" -"checksum num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0b3a5d7cc97d6d30d8b9bc8fa19bf45349ffe46241e8816f50f62f6d6aaabee1" -"checksum once_cell 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "532c29a261168a45ce28948f9537ddd7a5dd272cc513b3017b1e82a88f962c37" -"checksum parking_lot 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ab41b4aed082705d1056416ae4468b6ea99d52599ecf3169b00088d43113e337" -"checksum parking_lot_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "94c8c7923936b28d546dfd14d4472eaf34c99b14e1c973a32b3e6d4eb04298c9" -"checksum percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "31010dd2e1ac33d5b46a5b413495239882813e0369f8ed8a5e266f173602f831" -"checksum ppv-lite86 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "e3cbf9f658cdb5000fcf6f362b8ea2ba154b9f146a61c7a20d647034c6b6561b" -"checksum proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)" = "77619697826f31a02ae974457af0b29b723e5619e113e9397b8b82c6bd253f09" -"checksum quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)" = "53fa22a1994bd0f9372d7a816207d8a2677ad0325b073f5c5332760f0fb62b5c" -"checksum rand 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "3906503e80ac6cbcacb2c2973fa8e473f24d7e2747c8c92bb230c2441cad96b5" -"checksum rand 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d47eab0e83d9693d40f825f86948aa16eff6750ead4bdffc4ab95b8b3a7f052c" -"checksum rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "556d3a1ca6600bfcbab7c7c91ccb085ac7fbbcd70e008a98742e7847f4f7bcef" -"checksum rand_chacha 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "03a2a90da8c7523f554344f921aa97283eadf6ac484a6d2a7d0212fa7f8d6853" -"checksum rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0905b6b7079ec73b314d4c748701f6931eb79fd97c668caa3f1899b22b32c6db" -"checksum rand_core 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "615e683324e75af5d43d8f7a39ffe3ee4a9dc42c5c701167a71dc59c3a493aca" -"checksum rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7b40677c7be09ae76218dc623efbf7b18e34bced3f38883af07bb75630a21bc4" -"checksum rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" -"checksum rand_isaac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ded997c9d5f13925be2a6fd7e66bf1872597f759fd9dd93513dd7e92e5a5ee08" -"checksum rand_os 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f46fbd5550acf75b0c2730f5dd1873751daf9beb8f11b44027778fae50d7feca" -"checksum rand_pcg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "086bd09a33c7044e56bb44d5bdde5a60e7f119a9e95b0775f545de759a32fe05" -"checksum rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c" -"checksum rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" -"checksum redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)" = "679da7508e9a6390aeaf7fbd02a800fdc64b73fe2204dd2c8ae66d22d9d5ad5d" -"checksum redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76" -"checksum redox_users 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4ecedbca3bf205f8d8f5c2b44d83cd0690e39ee84b951ed649e9f1841132b66d" -"checksum regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "37e7cbbd370869ce2e8dff25c7018702d10b21a20ef7135316f8daecd6c25b7f" -"checksum regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4e47a2ed29da7a9e1960e1639e7a982e6edc6d49be308a3b02daf511504a16d1" -"checksum remove_dir_all 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3488ba1b9a2084d38645c4c08276a1752dcbf2c7130d74f1569681ad5d2799c5" -"checksum rsass 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4520dc8a2786c0319f3947e3d79e735b27f0c63c555b854aaa802e49e3f45098" -"checksum rust-argon2 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4ca4eaef519b494d1f2848fc602d18816fed808a981aedf4f1f00ceb7c9d32cf" -"checksum rustc-demangle 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "bcfe5b13211b4d78e5c2cadfebd7769197d95c639c35a50057eb4c05de811395" -"checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" -"checksum ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "eb9e9b8cde282a9fe6a42dd4681319bfb63f121b8a8ee9439c6f4107e58a46f7" -"checksum ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c92464b447c0ee8c4fb3824ecc8383b81717b9f1e74ba2e72540aef7b9f82997" -"checksum scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27" -"checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" -"checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" -"checksum serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)" = "15c141fc7027dd265a47c090bf864cf62b42c4d228bbcf4e51a0c9e2b0d3f7ef" -"checksum serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)" = "225de307c6302bec3898c51ca302fc94a7a1697ef0845fcee6448f33c032249c" -"checksum serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)" = "c37ccd6be3ed1fdf419ee848f7c758eb31b054d7cd3ae3600e3bae0adf569811" -"checksum smallbitvec 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1764fe2b30ee783bfe3b9b37b2649d8d590b3148bb12e0079715d4d5c673562e" -"checksum smallvec 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)" = "88aea073965ab29f6edb5493faf96ad662fb18aa9eeb186a3b7057951605ed15" -"checksum spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "44363f6f51401c34e7be73db0db371c04705d35efbe9f7d6082e03a921a32c55" -"checksum stackvector 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "1c4725650978235083241fab0fdc8e694c3de37821524e7534a1a9061d1068af" -"checksum static_assertions 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "7f3eb36b47e512f8f1c9e3d10c2c1965bc992bd9cdb024fa581e2194501c83d3" -"checksum strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550" -"checksum syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)" = "ae8b29eb5210bc5cf63ed6149cbf9adfc82ac0be023d8735c176ee74a2db4da7" -"checksum synstructure 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "73687139bf99285483c96ac0add482c3776528beac1d97d444f6e91f203a2015" -"checksum tempfile 3.0.7 (registry+https://github.com/rust-lang/crates.io-index)" = "b86c784c88d98c801132806dadd3819ed29d8600836c4088e855cdf3e178ed8a" -"checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096" -"checksum textwrap 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "307686869c93e71f94da64286f9a9524c0f308a9e1c87a583de8e9c9039ad3f6" -"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" -"checksum time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)" = "db8dcfca086c1143c9270ac42a2bbd8a7ee477b78ac8e45b19abfb0cbede4b6f" -"checksum tiny_http 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1661fa0a44c95d01604bd05c66732a446c657efb62b5164a7a083a3b552b4951" -"checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86" -"checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5" -"checksum unicode-normalization 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "141339a08b982d942be2ca06ff8b076563cbe223d1befd5450716790d44e2426" -"checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" -"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" -"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" -"checksum url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "dd4e7c0d531266369519a4aa4f399d748bd37043b00bde1e4ff1f60a120b355a" -"checksum utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737" -"checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" -"checksum version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd" -"checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" -"checksum webbrowser 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c01efd7cb6939b7f34983f1edff0550e5b21b49e2db4495656295922df8939ac" -"checksum widestring 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "effc0e4ff8085673ea7b9b2e3c73f6bd4d118810c9009ed8f1e16bd96c331db6" -"checksum winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "92c1eb33641e276cfa214a0522acad57be5c56b10cb348b3c5117db75f3ac4b0" -"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" -"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..764f411a --- /dev/null +++ b/Makefile @@ -0,0 +1,71 @@ +VERSION := 0.6.3 + +# install directory layout +PREFIX ?= /usr/local +INCLUDEDIR ?= $(PREFIX)/include +LIBDIR ?= $(PREFIX)/lib +PCLIBDIR ?= $(LIBDIR)/pkgconfig + +# collect sources +ifneq ($(AMALGAMATED),1) + SRC := $(wildcard lib/src/*.c) + # do not double-include amalgamation + SRC := $(filter-out lib/src/lib.c,$(SRC)) +else + # use amalgamated build + SRC := lib/src/lib.c +endif +OBJ := $(SRC:.c=.o) + +# define default flags, and override to append mandatory flags +CFLAGS ?= -O3 -Wall -Wextra -Werror +override CFLAGS += -std=gnu99 -fPIC -Ilib/src -Ilib/include + +# ABI versioning +SONAME_MAJOR := 0 +SONAME_MINOR := 0 + +# OS-specific bits +ifeq ($(shell uname),Darwin) + SOEXT = dylib + SOEXTVER_MAJOR = $(SONAME_MAJOR).dylib + SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).dylib + LINKSHARED += -dynamiclib -Wl,-install_name,$(LIBDIR)/libtree-sitter.$(SONAME_MAJOR).dylib +else + SOEXT = so + SOEXTVER_MAJOR = so.$(SONAME_MAJOR) + SOEXTVER = so.$(SONAME_MAJOR).$(SONAME_MINOR) + LINKSHARED += -shared -Wl,-soname,libtree-sitter.so.$(SONAME_MAJOR) +endif +ifneq (,$(filter $(shell uname),FreeBSD NetBSD DragonFly)) + PCLIBDIR := $(PREFIX)/libdata/pkgconfig +endif + +all: libtree-sitter.a libtree-sitter.$(SOEXTVER) + +libtree-sitter.a: $(OBJ) + $(AR) rcs $@ $^ + +libtree-sitter.$(SOEXTVER): $(OBJ) + $(CC) $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@ + ln -sf $@ libtree-sitter.$(SOEXT) + ln -sf $@ libtree-sitter.$(SOEXTVER_MAJOR) + +install: all + install -d '$(DESTDIR)$(LIBDIR)' + install -m755 libtree-sitter.a '$(DESTDIR)$(LIBDIR)'/libtree-sitter.a + install -m755 libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER) + ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER_MAJOR) + ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXT) + install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter + install -m644 lib/include/tree_sitter/*.h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/ + install -d '$(DESTDIR)$(PCLIBDIR)' + sed -e 's|@LIBDIR@|$(LIBDIR)|;s|@INCLUDEDIR@|$(INCLUDEDIR)|;s|@VERSION@|$(VERSION)|' \ + -e 's|=$(PREFIX)|=$${prefix}|' \ + -e 's|@PREFIX@|$(PREFIX)|' \ + tree-sitter.pc.in > '$(DESTDIR)$(PCLIBDIR)'/tree-sitter.pc + +clean: + rm -f lib/src/*.o libtree-sitter.a libtree-sitter.$(SOEXT) libtree-sitter.$(SOEXTVER_MAJOR) libtree-sitter.$(SOEXTVER) + +.PHONY: all install clean diff --git a/README.md b/README.md index b6df76e9..a4bb7a48 100644 --- a/README.md +++ b/README.md @@ -5,9 +5,14 @@ Tree-sitter is a parser generator tool and an incremental parsing library. It can build a concrete syntax tree for a source file and efficiently update the syntax tree as the source file is edited. Tree-sitter aims to be: -* **General** enough to parse any programming language -* **Fast** enough to parse on every keystroke in a text editor -* **Robust** enough to provide useful results even in the presence of syntax errors -* **Dependency-free** so that the runtime library (which is written in pure C) can be embedded in any application +- **General** enough to parse any programming language +- **Fast** enough to parse on every keystroke in a text editor +- **Robust** enough to provide useful results even in the presence of syntax errors +- **Dependency-free** so that the runtime library (which is written in pure C) can be embedded in any application -[Documentation](https://tree-sitter.github.io/tree-sitter/) +## Links + +- [Documentation](https://tree-sitter.github.io) +- [Rust binding](lib/binding_rust/README.md) +- [WASM binding](lib/binding_web/README.md) +- [Command-line interface](cli/README.md) diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 2bf83f8d..48dbbff7 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter-cli" description = "CLI tool for developing, testing, and using Tree-sitter parsers" -version = "0.15.8" +version = "0.17.3" authors = ["Max Brunsfeld "] edition = "2018" license = "MIT" @@ -19,31 +19,36 @@ name = "benchmark" harness = false [dependencies] -cc = "1.0" ansi_term = "0.11" -difference = "2.0" -lazy_static = "1.2.0" -smallbitvec = "2.3.0" +cc = "1.0" +atty = "0.2" clap = "2.32" +difference = "2.0" dirs = "2.0.2" +glob = "0.3.0" +lazy_static = "1.2.0" libloading = "0.5" once_cell = "0.1.8" +regex = "1" +regex-syntax = "0.6.4" serde = "1.0" serde_derive = "1.0" -regex-syntax = "0.6.4" -regex = "1" -rsass = "^0.11.0" +smallbitvec = "2.3.0" tiny_http = "0.6" webbrowser = "0.5.1" [dependencies.tree-sitter] -version = ">= 0.3.7" +version = ">= 0.17.0" path = "../lib" [dependencies.tree-sitter-highlight] -version = ">= 0.1.0" +version = ">= 0.3.0" path = "../highlight" +[dependencies.tree-sitter-tags] +version = ">= 0.1.0" +path = "../tags" + [dependencies.serde_json] version = "1.0" features = ["preserve_order"] diff --git a/cli/benches/benchmark.rs b/cli/benches/benchmark.rs index 294c8a97..53ab3fea 100644 --- a/cli/benches/benchmark.rs +++ b/cli/benches/benchmark.rs @@ -2,8 +2,8 @@ use lazy_static::lazy_static; use std::collections::BTreeMap; use std::path::{Path, PathBuf}; use std::time::Instant; -use std::{env, fs, usize}; -use tree_sitter::{Language, Parser}; +use std::{env, fs, str, usize}; +use tree_sitter::{Language, Parser, Query}; use tree_sitter_cli::error::Error; use tree_sitter_cli::loader::Loader; @@ -17,28 +17,34 @@ lazy_static! { static ref REPETITION_COUNT: usize = env::var("TREE_SITTER_BENCHMARK_REPETITION_COUNT") .map(|s| usize::from_str_radix(&s, 10).unwrap()) .unwrap_or(5); - static ref TEST_LOADER: Loader = Loader::new(SCRATCH_DIR.clone()); - static ref EXAMPLE_PATHS_BY_LANGUAGE_DIR: BTreeMap> = { - fn process_dir(result: &mut BTreeMap>, dir: &Path) { + static ref EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR: BTreeMap, Vec)> = { + fn process_dir(result: &mut BTreeMap, Vec)>, dir: &Path) { if dir.join("grammar.js").exists() { let relative_path = dir.strip_prefix(GRAMMARS_DIR.as_path()).unwrap(); + let (example_paths, query_paths) = + result.entry(relative_path.to_owned()).or_default(); + if let Ok(example_files) = fs::read_dir(&dir.join("examples")) { - result.insert( - relative_path.to_owned(), - example_files - .filter_map(|p| { - let p = p.unwrap().path(); - if p.is_file() { - Some(p) - } else { - None - } - }) - .collect(), - ); - } else { - result.insert(relative_path.to_owned(), Vec::new()); + example_paths.extend(example_files.filter_map(|p| { + let p = p.unwrap().path(); + if p.is_file() { + Some(p.to_owned()) + } else { + None + } + })); + } + + if let Ok(query_files) = fs::read_dir(&dir.join("queries")) { + query_paths.extend(query_files.filter_map(|p| { + let p = p.unwrap().path(); + if p.is_file() { + Some(p.to_owned()) + } else { + None + } + })); } } else { for entry in fs::read_dir(&dir).unwrap() { @@ -57,20 +63,25 @@ lazy_static! { } fn main() { - let mut parser = Parser::new(); - let max_path_length = EXAMPLE_PATHS_BY_LANGUAGE_DIR - .iter() - .flat_map(|(_, paths)| paths.iter()) - .map(|p| p.file_name().unwrap().to_str().unwrap().chars().count()) + let max_path_length = EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR + .values() + .flat_map(|(e, q)| { + e.iter() + .chain(q.iter()) + .map(|s| s.file_name().unwrap().to_str().unwrap().len()) + }) .max() - .unwrap(); - - let mut all_normal_speeds = Vec::new(); - let mut all_error_speeds = Vec::new(); + .unwrap_or(0); eprintln!("Benchmarking with {} repetitions", *REPETITION_COUNT); - for (language_path, example_paths) in EXAMPLE_PATHS_BY_LANGUAGE_DIR.iter() { + let mut parser = Parser::new(); + let mut all_normal_speeds = Vec::new(); + let mut all_error_speeds = Vec::new(); + + for (language_path, (example_paths, query_paths)) in + EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR.iter() + { let language_name = language_path.file_name().unwrap().to_str().unwrap(); if let Some(filter) = LANGUAGE_FILTER.as_ref() { @@ -80,9 +91,24 @@ fn main() { } eprintln!("\nLanguage: {}", language_name); - parser.set_language(get_language(language_path)).unwrap(); + let language = get_language(language_path); + parser.set_language(language).unwrap(); - eprintln!(" Normal examples:"); + eprintln!(" Constructing Queries"); + for path in query_paths { + if let Some(filter) = EXAMPLE_FILTER.as_ref() { + if !path.to_str().unwrap().contains(filter.as_str()) { + continue; + } + } + + parse(&path, max_path_length, |source| { + Query::new(language, str::from_utf8(source).unwrap()) + .expect("Failed to parse query"); + }); + } + + eprintln!(" Parsing Valid Code:"); let mut normal_speeds = Vec::new(); for example_path in example_paths { if let Some(filter) = EXAMPLE_FILTER.as_ref() { @@ -91,12 +117,16 @@ fn main() { } } - normal_speeds.push(parse(&mut parser, example_path, max_path_length)); + normal_speeds.push(parse(example_path, max_path_length, |code| { + parser.parse(code, None).expect("Failed to parse"); + })); } - eprintln!(" Error examples (mismatched languages):"); + eprintln!(" Parsing Invalid Code (mismatched languages):"); let mut error_speeds = Vec::new(); - for (other_language_path, example_paths) in EXAMPLE_PATHS_BY_LANGUAGE_DIR.iter() { + for (other_language_path, (example_paths, _)) in + EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR.iter() + { if other_language_path != language_path { for example_path in example_paths { if let Some(filter) = EXAMPLE_FILTER.as_ref() { @@ -105,7 +135,9 @@ fn main() { } } - error_speeds.push(parse(&mut parser, example_path, max_path_length)); + error_speeds.push(parse(example_path, max_path_length, |code| { + parser.parse(code, None).expect("Failed to parse"); + })); } } } @@ -124,7 +156,7 @@ fn main() { all_error_speeds.extend(error_speeds); } - eprintln!("\nOverall"); + eprintln!("\n Overall"); if let Some((average_normal, worst_normal)) = aggregate(&all_normal_speeds) { eprintln!(" Average Speed (normal): {} bytes/ms", average_normal); eprintln!(" Worst Speed (normal): {} bytes/ms", worst_normal); @@ -137,7 +169,7 @@ fn main() { eprintln!(""); } -fn aggregate(speeds: &Vec<(usize)>) -> Option<(usize, usize)> { +fn aggregate(speeds: &Vec) -> Option<(usize, usize)> { if speeds.is_empty() { return None; } @@ -152,28 +184,25 @@ fn aggregate(speeds: &Vec<(usize)>) -> Option<(usize, usize)> { Some((total / speeds.len(), max)) } -fn parse(parser: &mut Parser, example_path: &Path, max_path_length: usize) -> usize { +fn parse(path: &Path, max_path_length: usize, mut action: impl FnMut(&[u8])) -> usize { eprint!( " {:width$}\t", - example_path.file_name().unwrap().to_str().unwrap(), + path.file_name().unwrap().to_str().unwrap(), width = max_path_length ); - let source_code = fs::read(example_path) - .map_err(Error::wrap(|| format!("Failed to read {:?}", example_path))) + let source_code = fs::read(path) + .map_err(Error::wrap(|| format!("Failed to read {:?}", path))) .unwrap(); let time = Instant::now(); for _ in 0..*REPETITION_COUNT { - parser - .parse(&source_code, None) - .expect("Incompatible language version"); + action(&source_code); } let duration = time.elapsed() / (*REPETITION_COUNT as u32); - let duration_ms = - duration.as_secs() as f64 * 1000.0 + duration.subsec_nanos() as f64 / 1000000.0; - let speed = (source_code.len() as f64 / duration_ms) as usize; + let duration_ms = duration.as_millis(); + let speed = source_code.len() as u128 / (duration_ms + 1); eprintln!("time {} ms\tspeed {} bytes/ms", duration_ms as usize, speed); - speed + speed as usize } fn get_language(path: &Path) -> Language { diff --git a/cli/build.rs b/cli/build.rs index 0ed9ef06..47506018 100644 --- a/cli/build.rs +++ b/cli/build.rs @@ -1,4 +1,4 @@ -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::{env, fs}; fn main() { @@ -6,12 +6,25 @@ fn main() { println!("cargo:rustc-env={}={}", "BUILD_SHA", git_sha); } + if wasm_files_present() { + println!("cargo:rustc-cfg={}", "TREE_SITTER_EMBED_WASM_BINDING"); + } + println!( "cargo:rustc-env=BUILD_TARGET={}", std::env::var("TARGET").unwrap() ); } +fn wasm_files_present() -> bool { + let paths = [ + "../lib/binding_web/tree-sitter.js", + "../lib/binding_web/tree-sitter.wasm", + ]; + + paths.iter().all(|p| Path::new(p).exists()) +} + fn read_git_sha() -> Option { let mut repo_path = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); @@ -51,7 +64,6 @@ fn read_git_sha() -> Option { } return fs::read_to_string(&ref_filename).ok(); } - // If we're on a detached commit, then the `HEAD` file itself contains the sha. else if head_content.len() == 40 { return Some(head_content); diff --git a/cli/npm/dsl.d.ts b/cli/npm/dsl.d.ts new file mode 100644 index 00000000..b9bf1c98 --- /dev/null +++ b/cli/npm/dsl.d.ts @@ -0,0 +1,356 @@ +type AliasRule = {type: 'ALIAS'; named: boolean; content: Rule; value: string}; +type BlankRule = {type: 'BLANK'}; +type ChoiceRule = {type: 'CHOICE'; members: Rule[]}; +type FieldRule = {type: 'FIELD'; name: string; content: Rule}; +type ImmediateTokenRule = {type: 'IMMEDIATE_TOKEN'; content: Rule}; +type PatternRule = {type: 'PATTERN'; value: string}; +type PrecDynamicRule = {type: 'PREC_DYNAMIC'; content: Rule; value: number}; +type PrecLeftRule = {type: 'PREC_LEFT'; content: Rule; value: number}; +type PrecRightRule = {type: 'PREC_RIGHT'; content: Rule; value: number}; +type PrecRule = {type: 'PREC'; content: Rule; value: number}; +type Repeat1Rule = {type: 'REPEAT1'; content: Rule}; +type RepeatRule = {type: 'REPEAT'; content: Rule}; +type SeqRule = {type: 'SEQ'; members: Rule[]}; +type StringRule = {type: 'STRING'; value: string}; +type SymbolRule = {type: 'SYMBOL'; name: Name}; +type TokenRule = {type: 'TOKEN'; content: Rule}; + +type Rule = + | AliasRule + | BlankRule + | ChoiceRule + | FieldRule + | ImmediateTokenRule + | PatternRule + | PrecDynamicRule + | PrecLeftRule + | PrecRightRule + | PrecRule + | Repeat1Rule + | RepeatRule + | SeqRule + | StringRule + | SymbolRule + | TokenRule; + +type RuleOrLiteral = Rule | RegExp | string; + +type GrammarSymbols = { + [name in RuleName]: SymbolRule; +} & + Record>; + +type RuleBuilder = ( + $: GrammarSymbols, +) => RuleOrLiteral; + +type RuleBuilders< + RuleName extends string, + BaseGrammarRuleName extends string +> = { + [name in RuleName]: RuleBuilder; +}; + +interface Grammar< + RuleName extends string, + BaseGrammarRuleName extends string = never, + Rules extends RuleBuilders = RuleBuilders< + RuleName, + BaseGrammarRuleName + > +> { + /** + * Name of the grammar language. + */ + name: string; + + /** Mapping of grammar rule names to rule builder functions. */ + rules: Rules; + + /** + * An array of arrays of rule names. Each inner array represents a set of + * rules that's involved in an _LR(1) conflict_ that is _intended to exist_ + * in the grammar. When these conflicts occur at runtime, Tree-sitter will + * use the GLR algorithm to explore all of the possible interpretations. If + * _multiple_ parses end up succeeding, Tree-sitter will pick the subtree + * whose corresponding rule has the highest total _dynamic precedence_. + * + * @param $ grammar rules + */ + conflicts?: ( + $: GrammarSymbols, + ) => RuleOrLiteral[][]; + + /** + * An array of token names which can be returned by an _external scanner_. + * External scanners allow you to write custom C code which runs during the + * lexing process in order to handle lexical rules (e.g. Python's indentation + * tokens) that cannot be described by regular expressions. + * + * @param $ grammar rules + * @param previous array of externals from the base schema, if any + * + * @see https://tree-sitter.github.io/tree-sitter/creating-parsers#external-scanners + */ + externals?: ( + $: Record>, + previous: Rule[], + ) => SymbolRule[]; + + /** + * An array of tokens that may appear anywhere in the language. This + * is often used for whitespace and comments. The default value of + * extras is to accept whitespace. To control whitespace explicitly, + * specify extras: `$ => []` in your grammar. + * + * @param $ grammar rules + */ + extras?: ( + $: GrammarSymbols, + ) => RuleOrLiteral[]; + + /** + * An array of rules that should be automatically removed from the + * grammar by replacing all of their usages with a copy of their definition. + * This is useful for rules that are used in multiple places but for which + * you don't want to create syntax tree nodes at runtime. + * + * @param $ grammar rules + */ + inline?: ( + $: GrammarSymbols, + ) => RuleOrLiteral[]; + + /** + * A list of hidden rule names that should be considered supertypes in the + * generated node types file. + * + * @param $ grammar rules + * + * @see http://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types + */ + supertypes?: ( + $: GrammarSymbols, + ) => RuleOrLiteral[]; + + /** + * The name of a token that will match keywords for the purpose of the + * keyword extraction optimization. + * + * @param $ grammar rules + * + * @see https://tree-sitter.github.io/tree-sitter/creating-parsers#keyword-extraction + */ + word?: ($: GrammarSymbols) => RuleOrLiteral; +} + +type GrammarSchema = { + [K in keyof Grammar]: K extends 'rules' + ? Record + : Grammar[K]; +}; + +/** + * Causes the given rule to appear with an alternative name in the syntax tree. + * For instance with `alias($.foo, 'bar')`, the aliased rule will appear as an + * anonymous node, as if the rule had been written as the simple string. + * + * @param rule rule that will be aliased + * @param name target name for the alias + */ +declare function alias(rule: RuleOrLiteral, name: string): AliasRule; + +/** + * Causes the given rule to appear as an alternative named node, for instance + * with `alias($.foo, $.bar)`, the aliased rule `foo` will appear as a named + * node called `bar`. + * + * @param rule rule that will be aliased + * @param symbol target symbol for the alias + */ +declare function alias( + rule: RuleOrLiteral, + symbol: SymbolRule, +): AliasRule; + +/** + * Creates a blank rule, matching nothing. + */ +declare function blank(): BlankRule; + +/** + * Assigns a field name to the child node(s) matched by the given rule. + * In the resulting syntax tree, you can then use that field name to + * access specific children. + * + * @param name name of the field + * @param rule rule the field should match + */ +declare function field(name: string, rule: RuleOrLiteral): FieldRule; + +/** + * Creates a rule that matches one of a set of possible rules. The order + * of the arguments does not matter. This is analogous to the `|` (pipe) + * operator in EBNF notation. + * + * @param options possible rule choices + */ +declare function choice(...options: RuleOrLiteral[]): ChoiceRule; + +/** + * Creates a rule that matches zero or one occurrence of a given rule. + * It is analogous to the `[x]` (square bracket) syntax in EBNF notation. + * + * @param value rule to be made optional + */ +declare function optional(rule: RuleOrLiteral): ChoiceRule; + +/** + * Marks the given rule with a numerical precedence which will be used to + * resolve LR(1) conflicts at parser-generation time. When two rules overlap + * in a way that represents either a true ambiguity or a _local_ ambiguity + * given one token of lookahead, Tree-sitter will try to resolve the conflict by + * matching the rule with the higher precedence. The default precedence of all + * rules is zero. This works similarly to the precedence directives in Yacc grammars. + * + * @param number precedence weight + * @param rule rule being weighted + * + * @see https://en.wikipedia.org/wiki/LR_parser#Conflicts_in_the_constructed_tables + * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html + */ +declare const prec: { + (number: number, rule: RuleOrLiteral): PrecRule; + + /** + * Marks the given rule as left-associative (and optionally applies a + * numerical precedence). When an LR(1) conflict arises in which all of the + * rules have the same numerical precedence, Tree-sitter will consult the + * rules' associativity. If there is a left-associative rule, Tree-sitter + * will prefer matching a rule that ends _earlier_. This works similarly to + * associativity directives in Yacc grammars. + * + * @param number (optional) precedence weight + * @param rule rule to mark as left-associative + * + * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html + */ + left(rule: RuleOrLiteral): PrecLeftRule; + left(number: number, rule: RuleOrLiteral): PrecLeftRule; + + /** + * Marks the given rule as right-associative (and optionally applies a + * numerical precedence). When an LR(1) conflict arises in which all of the + * rules have the same numerical precedence, Tree-sitter will consult the + * rules' associativity. If there is a right-associative rule, Tree-sitter + * will prefer matching a rule that ends _later_. This works similarly to + * associativity directives in Yacc grammars. + * + * @param number (optional) precedence weight + * @param rule rule to mark as right-associative + * + * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html + */ + right(rule: RuleOrLiteral): PrecRightRule; + right(number: number, rule: RuleOrLiteral): PrecRightRule; + + /** + * Marks the given rule with a numerical precedence which will be used to + * resolve LR(1) conflicts at _runtime_ instead of parser-generation time. + * This is only necessary when handling a conflict dynamically using the + * `conflicts` field in the grammar, and when there is a genuine _ambiguity_: + * multiple rules correctly match a given piece of code. In that event, + * Tree-sitter compares the total dynamic precedence associated with each + * rule, and selects the one with the highest total. This is similar to + * dynamic precedence directives in Bison grammars. + * + * @param number precedence weight + * @param rule rule being weighted + * + * @see https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html + */ + dynamic(number: number, rule: RuleOrLiteral): PrecDynamicRule; +}; + +/** + * Creates a rule that matches _zero-or-more_ occurrences of a given rule. + * It is analogous to the `{x}` (curly brace) syntax in EBNF notation. This + * rule is implemented in terms of `repeat1` but is included because it + * is very commonly used. + * + * @param rule rule to repeat, zero or more times + */ +declare function repeat(rule: RuleOrLiteral): RepeatRule; + +/** + * Creates a rule that matches one-or-more occurrences of a given rule. + * + * @param rule rule to repeat, one or more times + */ +declare function repeat1(rule: RuleOrLiteral): Repeat1Rule; + +/** + * Creates a rule that matches any number of other rules, one after another. + * It is analogous to simply writing multiple symbols next to each other + * in EBNF notation. + * + * @param rules ordered rules that comprise the sequence + */ +declare function seq(...rules: RuleOrLiteral[]): SeqRule; + +/** + * Creates a symbol rule, representing another rule in the grammar by name. + * + * @param name name of the target rule + */ +declare function sym(name: Name): SymbolRule; + +/** + * Marks the given rule as producing only a single token. Tree-sitter's + * default is to treat each String or RegExp literal in the grammar as a + * separate token. Each token is matched separately by the lexer and + * returned as its own leaf node in the tree. The token function allows + * you to express a complex rule using the DSL functions (rather + * than as a single regular expression) but still have Tree-sitter treat + * it as a single token. + * + * @param rule rule to represent as a single token + */ +declare const token: { + (rule: RuleOrLiteral): TokenRule; + + /** + * Marks the given rule as producing an immediate token. This allows + * the parser to produce a different token based on whether or not + * there are `extras` preceding the token's main content. When there + * are _no_ leading `extras`, an immediate token is preferred over a + * normal token which would otherwise match. + * + * @param rule rule to represent as an immediate token + */ + immediate(rule: RuleOrLiteral): ImmediateTokenRule; +}; + +/** + * Creates a new language grammar with the provided schema. + * + * @param options grammar options + */ +declare function grammar( + options: Grammar, +): GrammarSchema; + +/** + * Extends an existing language grammar with the provided options, + * creating a new language. + * + * @param baseGrammar base grammar schema to extend from + * @param options grammar options for the new extended language + */ +declare function grammar< + BaseGrammarRuleName extends string, + RuleName extends string +>( + baseGrammar: GrammarSchema, + options: Grammar, +): GrammarSchema; diff --git a/cli/npm/package.json b/cli/npm/package.json index 4f4d08cc..4c6dfe90 100644 --- a/cli/npm/package.json +++ b/cli/npm/package.json @@ -1,6 +1,6 @@ { "name": "tree-sitter-cli", - "version": "0.15.8", + "version": "0.17.3", "author": "Max Brunsfeld", "license": "MIT", "repository": { diff --git a/cli/src/error.rs b/cli/src/error.rs index 968486f4..63b57c9e 100644 --- a/cli/src/error.rs +++ b/cli/src/error.rs @@ -1,6 +1,7 @@ +use super::test_highlight; use std::fmt::Write; use std::io; -use tree_sitter_highlight::PropertySheetError; +use tree_sitter::{QueryError, QueryErrorKind}; #[derive(Debug)] pub struct Error(pub Vec); @@ -50,6 +51,34 @@ impl Error { } } +impl<'a> From<(&str, QueryError)> for Error { + fn from((path, error): (&str, QueryError)) -> Self { + let mut msg = format!("Query error at {}:{}. ", path, error.row + 1); + match error.kind { + QueryErrorKind::Capture => write!(&mut msg, "Invalid capture name {}", error.message), + QueryErrorKind::Field => write!(&mut msg, "Invalid field name {}", error.message), + QueryErrorKind::NodeType => write!(&mut msg, "Invalid node type {}", error.message), + QueryErrorKind::Syntax => write!(&mut msg, "Invalid syntax:\n{}", error.message), + QueryErrorKind::Structure => write!(&mut msg, "Impossible pattern:\n{}", error.message), + QueryErrorKind::Predicate => write!(&mut msg, "Invalid predicate: {}", error.message), + } + .unwrap(); + Self::new(msg) + } +} + +impl<'a> From for Error { + fn from(error: tree_sitter_highlight::Error) -> Self { + Error::new(format!("{:?}", error)) + } +} + +impl<'a> From for Error { + fn from(error: tree_sitter_tags::Error) -> Self { + Error::new(format!("{}", error)) + } +} + impl From for Error { fn from(error: serde_json::Error) -> Self { Error::new(error.to_string()) @@ -62,8 +91,14 @@ impl From for Error { } } -impl From for Error { - fn from(error: rsass::Error) -> Self { +impl From for Error { + fn from(error: glob::PatternError) -> Self { + Error::new(error.to_string()) + } +} + +impl From for Error { + fn from(error: glob::GlobError) -> Self { Error::new(error.to_string()) } } @@ -74,18 +109,14 @@ impl From for Error { } } +impl From for Error { + fn from(error: test_highlight::Failure) -> Self { + Error::new(error.message()) + } +} + impl From for Error { fn from(error: String) -> Self { Error::new(error) } } - -impl From for Error { - fn from(error: PropertySheetError) -> Self { - match error { - PropertySheetError::InvalidFormat(e) => Self::from(e), - PropertySheetError::InvalidRegex(e) => Self::regex(&e.to_string()), - PropertySheetError::InvalidJSON(e) => Self::from(e), - } - } -} diff --git a/cli/src/generate/build_tables/build_lex_table.rs b/cli/src/generate/build_tables/build_lex_table.rs index 21594253..b365feb1 100644 --- a/cli/src/generate/build_tables/build_lex_table.rs +++ b/cli/src/generate/build_tables/build_lex_table.rs @@ -2,7 +2,7 @@ use super::coincident_tokens::CoincidentTokenIndex; use super::token_conflicts::TokenConflictMap; use crate::generate::dedup::split_state_id_groups; use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar}; -use crate::generate::nfa::{CharacterSet, NfaCursor}; +use crate::generate::nfa::NfaCursor; use crate::generate::rules::{Symbol, TokenSet}; use crate::generate::tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable}; use log::info; @@ -189,13 +189,10 @@ impl<'a> LexTableBuilder<'a> { // character that leads to the empty set of NFA states. if eof_valid { let (next_state_id, _) = self.add_state(Vec::new(), false); - self.table.states[state_id].advance_actions.push(( - CharacterSet::empty().add_char('\0'), - AdvanceAction { - state: next_state_id, - in_main_token: true, - }, - )); + self.table.states[state_id].eof_action = Some(AdvanceAction { + state: next_state_id, + in_main_token: true, + }); } for transition in transitions { @@ -273,6 +270,7 @@ fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) { let signature = ( i == 0, state.accept_action, + state.eof_action.is_some(), state .advance_actions .iter() @@ -320,6 +318,9 @@ fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) { for (_, advance_action) in new_state.advance_actions.iter_mut() { advance_action.state = group_ids_by_state_id[advance_action.state]; } + if let Some(eof_action) = &mut new_state.eof_action { + eof_action.state = group_ids_by_state_id[eof_action.state]; + } new_states.push(new_state); } @@ -364,6 +365,9 @@ fn sort_states(table: &mut LexTable, parse_table: &mut ParseTable) { for (_, advance_action) in state.advance_actions.iter_mut() { advance_action.state = new_ids_by_old_id[advance_action.state]; } + if let Some(eof_action) = &mut state.eof_action { + eof_action.state = new_ids_by_old_id[eof_action.state]; + } state }) .collect(); diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs index 41d3932c..c63701ee 100644 --- a/cli/src/generate/build_tables/build_parse_table.rs +++ b/cli/src/generate/build_tables/build_parse_table.rs @@ -7,7 +7,7 @@ use crate::generate::grammars::{ use crate::generate::node_types::VariableInfo; use crate::generate::rules::{Associativity, Symbol, SymbolType, TokenSet}; use crate::generate::tables::{ - FieldLocation, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry, + FieldLocation, GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry, ProductionInfo, ProductionInfoId, }; use core::ops::Range; @@ -16,17 +16,19 @@ use std::collections::{BTreeMap, HashMap, HashSet, VecDeque}; use std::fmt::Write; use std::u32; +// For conflict reporting, each parse state is associated with an example +// sequence of symbols that could lead to that parse state. +type SymbolSequence = Vec; + +type AuxiliarySymbolSequence = Vec; +pub(crate) type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>); + #[derive(Clone)] struct AuxiliarySymbolInfo { auxiliary_symbol: Symbol, parent_symbols: Vec, } -type SymbolSequence = Vec; -type AuxiliarySymbolSequence = Vec; - -pub(crate) type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>); - struct ParseStateQueueEntry { state_id: ParseStateId, preceding_auxiliary_symbols: AuxiliarySymbolSequence, @@ -41,6 +43,7 @@ struct ParseTableBuilder<'a> { state_ids_by_item_set: HashMap, ParseStateId>, parse_state_info_by_id: Vec>, parse_state_queue: VecDeque, + non_terminal_extra_states: Vec<(Symbol, usize)>, parse_table: ParseTable, } @@ -52,7 +55,7 @@ impl<'a> ParseTableBuilder<'a> { .push(ProductionInfo::default()); // Add the error state at index 0. - self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default()); + self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default(), false); // Add the starting state at index 1. self.add_parse_state( @@ -66,8 +69,40 @@ impl<'a> ParseTableBuilder<'a> { .iter() .cloned(), ), + false, ); + // Compute the possible item sets for non-terminal extras. + let mut non_terminal_extra_item_sets_by_first_terminal = BTreeMap::new(); + for extra_non_terminal in self + .syntax_grammar + .extra_symbols + .iter() + .filter(|s| s.is_non_terminal()) + { + let variable = &self.syntax_grammar.variables[extra_non_terminal.index]; + for production in &variable.productions { + non_terminal_extra_item_sets_by_first_terminal + .entry(production.first_symbol().unwrap()) + .or_insert(ParseItemSet::default()) + .insert( + ParseItem { + variable_index: extra_non_terminal.index as u32, + production, + step_index: 1, + }, + &[Symbol::end()].iter().cloned().collect(), + ); + } + } + + // Add a state for each starting terminal of a non-terminal extra rule. + for (terminal, item_set) in non_terminal_extra_item_sets_by_first_terminal { + self.non_terminal_extra_states + .push((terminal, self.parse_table.states.len())); + self.add_parse_state(&Vec::new(), &Vec::new(), item_set, true); + } + while let Some(entry) = self.parse_state_queue.pop_front() { let item_set = self .item_set_builder @@ -91,9 +126,15 @@ impl<'a> ParseTableBuilder<'a> { preceding_symbols: &SymbolSequence, preceding_auxiliary_symbols: &AuxiliarySymbolSequence, item_set: ParseItemSet<'a>, + is_non_terminal_extra: bool, ) -> ParseStateId { match self.state_ids_by_item_set.entry(item_set) { + // If an equivalent item set has already been processed, then return + // the existing parse state index. Entry::Occupied(o) => *o.get(), + + // Otherwise, insert a new parse state and add it to the queue of + // parse states to populate. Entry::Vacant(v) => { let core = v.key().core(); let core_count = self.core_ids_by_core.len(); @@ -116,6 +157,7 @@ impl<'a> ParseTableBuilder<'a> { terminal_entries: HashMap::new(), nonterminal_entries: HashMap::new(), core_id, + is_non_terminal_extra, }); self.parse_state_queue.push_back(ParseStateQueueEntry { state_id, @@ -138,7 +180,12 @@ impl<'a> ParseTableBuilder<'a> { let mut non_terminal_successors = BTreeMap::new(); let mut lookaheads_with_conflicts = TokenSet::new(); + // Each item in the item set contributes to either or a Shift action or a Reduce + // action in this state. for (item, lookaheads) in &item_set.entries { + // If the item is unfinished, then this state has a transition for the item's + // next symbol. Advance the item to its next step and insert the resulting + // item into the successor item set. if let Some(next_symbol) = item.symbol() { let successor = item.successor(); if next_symbol.is_non_terminal() { @@ -160,7 +207,10 @@ impl<'a> ParseTableBuilder<'a> { .or_insert_with(|| ParseItemSet::default()) .insert(successor, lookaheads); } - } else { + } + // If the item is finished, then add a Reduce action to this state based + // on this item. + else { let action = if item.is_augmented() { ParseAction::Accept } else { @@ -179,6 +229,10 @@ impl<'a> ParseTableBuilder<'a> { .terminal_entries .entry(lookahead); let entry = entry.or_insert_with(|| ParseTableEntry::new()); + + // While inserting Reduce actions, eagerly resolve conflicts related + // to precedence: avoid inserting lower-precedence reductions, and + // clear the action list when inserting higher-precedence reductions. if entry.actions.is_empty() { entry.actions.push(action); } else if action.precedence() > entry.actions[0].precedence() { @@ -193,12 +247,16 @@ impl<'a> ParseTableBuilder<'a> { } } + // Having computed the the successor item sets for each symbol, add a new + // parse state for each of these item sets, and add a corresponding Shift + // action to this state. for (symbol, next_item_set) in terminal_successors { preceding_symbols.push(symbol); let next_state_id = self.add_parse_state( &preceding_symbols, &preceding_auxiliary_symbols, next_item_set, + self.parse_table.states[state_id].is_non_terminal_extra, ); preceding_symbols.pop(); @@ -226,13 +284,19 @@ impl<'a> ParseTableBuilder<'a> { &preceding_symbols, &preceding_auxiliary_symbols, next_item_set, + self.parse_table.states[state_id].is_non_terminal_extra, ); preceding_symbols.pop(); self.parse_table.states[state_id] .nonterminal_entries - .insert(symbol, next_state_id); + .insert(symbol, GotoAction::Goto(next_state_id)); } + // For any symbol with multiple actions, perform conflict resolution. + // This will either + // * choose one action over the others using precedence or associativity + // * keep multiple actions if this conflict has been whitelisted in the grammar + // * fail, terminating the parser generation process for symbol in lookaheads_with_conflicts.iter() { self.handle_conflict( &item_set, @@ -243,15 +307,50 @@ impl<'a> ParseTableBuilder<'a> { )?; } + // Finally, add actions for the grammar's `extra` symbols. let state = &mut self.parse_table.states[state_id]; - for extra_token in &self.syntax_grammar.extra_tokens { - state - .terminal_entries - .entry(*extra_token) - .or_insert(ParseTableEntry { - reusable: true, - actions: vec![ParseAction::ShiftExtra], - }); + let is_non_terminal_extra = state.is_non_terminal_extra; + let is_end_of_non_terminal_extra = + is_non_terminal_extra && state.terminal_entries.len() == 1; + + // Add actions for the start tokens of each non-terminal extra rule. + // These actions are added to every state except for the states that are + // alread within non-terminal extras. Non-terminal extras are not allowed + // to nest within each other. + if !is_non_terminal_extra { + for (terminal, state_id) in &self.non_terminal_extra_states { + state + .terminal_entries + .entry(*terminal) + .or_insert(ParseTableEntry { + reusable: true, + actions: vec![ParseAction::Shift { + state: *state_id, + is_repetition: false, + }], + }); + } + } + + // Add ShiftExtra actions for the terminal extra tokens. These actions + // are added to every state except for those at the ends of non-terminal + // extras. + if !is_end_of_non_terminal_extra { + for extra_token in &self.syntax_grammar.extra_symbols { + if extra_token.is_non_terminal() { + state + .nonterminal_entries + .insert(*extra_token, GotoAction::ShiftExtra); + } else { + state + .terminal_entries + .entry(*extra_token) + .or_insert(ParseTableEntry { + reusable: true, + actions: vec![ParseAction::ShiftExtra], + }); + } + } } Ok(()) @@ -362,8 +461,8 @@ impl<'a> ParseTableBuilder<'a> { } } - // If all reduce actions are left associative, remove the SHIFT action. - // If all reduce actions are right associative, remove the REDUCE actions. + // If all Reduce actions are left associative, remove the SHIFT action. + // If all Reduce actions are right associative, remove the REDUCE actions. match (has_left, has_non, has_right) { (true, false, false) => { entry.actions.pop(); @@ -744,7 +843,7 @@ fn populate_following_tokens( } } } - for extra in &grammar.extra_tokens { + for extra in &grammar.extra_symbols { if extra.is_terminal() { for entry in result.iter_mut() { entry.insert(*extra); @@ -774,6 +873,7 @@ pub(crate) fn build_parse_table<'a>( lexical_grammar, item_set_builder, variable_info, + non_terminal_extra_states: Vec::new(), state_ids_by_item_set: HashMap::new(), core_ids_by_core: HashMap::new(), parse_state_info_by_id: Vec::new(), diff --git a/cli/src/generate/build_tables/minimize_parse_table.rs b/cli/src/generate/build_tables/minimize_parse_table.rs index 5ecde0fd..d159a2c4 100644 --- a/cli/src/generate/build_tables/minimize_parse_table.rs +++ b/cli/src/generate/build_tables/minimize_parse_table.rs @@ -2,7 +2,9 @@ use super::token_conflicts::TokenConflictMap; use crate::generate::dedup::split_state_id_groups; use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar, VariableType}; use crate::generate::rules::{AliasMap, Symbol, TokenSet}; -use crate::generate::tables::{ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry}; +use crate::generate::tables::{ + GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry, +}; use log::info; use std::collections::{HashMap, HashSet}; use std::mem; @@ -66,6 +68,7 @@ impl<'a> Minimizer<'a> { .. } => { if !self.simple_aliases.contains_key(&symbol) + && !self.syntax_grammar.supertype_symbols.contains(&symbol) && !aliased_symbols.contains(&symbol) && self.syntax_grammar.variables[symbol.index].kind != VariableType::Named @@ -101,7 +104,10 @@ impl<'a> Minimizer<'a> { state.update_referenced_states(|other_state_id, state| { if let Some(symbol) = unit_reduction_symbols_by_state.get(&other_state_id) { done = false; - state.nonterminal_entries[symbol] + match state.nonterminal_entries.get(symbol) { + Some(GotoAction::Goto(state_id)) => *state_id, + _ => other_state_id, + } } else { other_state_id } @@ -194,6 +200,9 @@ impl<'a> Minimizer<'a> { right_state: &ParseState, group_ids_by_state_id: &Vec, ) -> bool { + if left_state.is_non_terminal_extra != right_state.is_non_terminal_extra { + return true; + } for (token, left_entry) in &left_state.terminal_entries { if let Some(right_entry) = right_state.terminal_entries.get(token) { if self.entries_conflict( @@ -262,18 +271,24 @@ impl<'a> Minimizer<'a> { for (symbol, s1) in &state1.nonterminal_entries { if let Some(s2) = state2.nonterminal_entries.get(symbol) { - let group1 = group_ids_by_state_id[*s1]; - let group2 = group_ids_by_state_id[*s2]; - if group1 != group2 { - info!( - "split states {} {} - successors for {} are split: {} {}", - state1.id, - state2.id, - self.symbol_name(symbol), - s1, - s2, - ); - return true; + match (s1, s2) { + (GotoAction::ShiftExtra, GotoAction::ShiftExtra) => continue, + (GotoAction::Goto(s1), GotoAction::Goto(s2)) => { + let group1 = group_ids_by_state_id[*s1]; + let group2 = group_ids_by_state_id[*s2]; + if group1 != group2 { + info!( + "split states {} {} - successors for {} are split: {} {}", + state1.id, + state2.id, + self.symbol_name(symbol), + s1, + s2, + ); + return true; + } + } + _ => return true, } } } diff --git a/cli/src/generate/build_tables/mod.rs b/cli/src/generate/build_tables/mod.rs index e0f84244..2e5d2f57 100644 --- a/cli/src/generate/build_tables/mod.rs +++ b/cli/src/generate/build_tables/mod.rs @@ -271,6 +271,7 @@ fn identify_keywords( cursor.reset(vec![variable.start_state]); if all_chars_are_alphabetical(&cursor) && token_conflict_map.does_match_same_string(i, word_token.index) + && !token_conflict_map.does_match_different_string(i, word_token.index) { info!( "Keywords - add candidate {}", diff --git a/cli/src/generate/build_tables/token_conflicts.rs b/cli/src/generate/build_tables/token_conflicts.rs index edb92108..64e7564b 100644 --- a/cli/src/generate/build_tables/token_conflicts.rs +++ b/cli/src/generate/build_tables/token_conflicts.rs @@ -1,9 +1,9 @@ -use crate::generate::build_tables::item::{TokenSetDisplay}; +use crate::generate::build_tables::item::TokenSetDisplay; use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar}; use crate::generate::nfa::{CharacterSet, NfaCursor, NfaTransition}; use crate::generate::rules::TokenSet; -use std::collections::HashSet; use std::cmp::Ordering; +use std::collections::HashSet; use std::fmt; #[derive(Clone, Debug, Default, PartialEq, Eq)] @@ -13,6 +13,7 @@ struct TokenConflictStatus { does_match_valid_continuation: bool, does_match_separators: bool, matches_same_string: bool, + matches_different_string: bool, } pub(crate) struct TokenConflictMap<'a> { @@ -25,6 +26,12 @@ pub(crate) struct TokenConflictMap<'a> { } impl<'a> TokenConflictMap<'a> { + /// Create a token conflict map based on a lexical grammar, which describes the structure + /// each token, and a `following_token` map, which indicates which tokens may be appear + /// immediately after each other token. + /// + /// This analyzes the possible kinds of overlap between each pair of tokens and stores + /// them in a matrix. pub fn new(grammar: &'a LexicalGrammar, following_tokens: Vec) -> Self { let mut cursor = NfaCursor::new(&grammar.nfa, Vec::new()); let starting_chars = get_starting_chars(&mut cursor, grammar); @@ -50,12 +57,21 @@ impl<'a> TokenConflictMap<'a> { } } + /// Does token `i` match any strings that token `j` also matches, such that token `i` + /// is preferred over token `j`? pub fn has_same_conflict_status(&self, a: usize, b: usize, other: usize) -> bool { let left = &self.status_matrix[matrix_index(self.n, a, other)]; let right = &self.status_matrix[matrix_index(self.n, b, other)]; left == right } + /// Does token `i` match any strings that token `j` does *not* match? + pub fn does_match_different_string(&self, i: usize, j: usize) -> bool { + self.status_matrix[matrix_index(self.n, i, j)].matches_different_string + } + + /// Does token `i` match any strings that token `j` also matches, where + /// token `i` is preferred over token `j`? pub fn does_match_same_string(&self, i: usize, j: usize) -> bool { self.status_matrix[matrix_index(self.n, i, j)].matches_same_string } @@ -67,6 +83,7 @@ impl<'a> TokenConflictMap<'a> { || entry.matches_same_string } + /// Does token `i` match any strings that are *prefixes* of strings matched by `j`? pub fn does_match_prefix(&self, i: usize, j: usize) -> bool { self.status_matrix[matrix_index(self.n, i, j)].matches_prefix } @@ -239,19 +256,29 @@ fn compute_conflict_status( ); while let Some(state_set) = state_set_queue.pop() { - // Don't pursue states where there's no potential for conflict. - if grammar.variable_indices_for_nfa_states(&state_set).count() > 1 { - cursor.reset(state_set); - } else { + let mut live_variable_indices = grammar.variable_indices_for_nfa_states(&state_set); + + // If only one of the two tokens could possibly match from this state, then + // there is no reason to analyze any of its successors. Just record the fact + // that the token matches a string that the other token does not match. + let first_live_variable_index = live_variable_indices.next().unwrap(); + if live_variable_indices.count() == 0 { + if first_live_variable_index == i { + result.0.matches_different_string = true; + } else { + result.1.matches_different_string = true; + } continue; } - let has_sep = cursor.transition_chars().any(|(_, sep)| sep); + // Don't pursue states where there's no potential for conflict. + cursor.reset(state_set); + let within_separator = cursor.transition_chars().any(|(_, sep)| sep); // Examine each possible completed token in this state. let mut completion = None; for (id, precedence) in cursor.completions() { - if has_sep { + if within_separator { if id == i { result.0.does_match_separators = true; } else { @@ -316,7 +343,7 @@ fn compute_conflict_status( &transition, completed_id, completed_precedence, - has_sep, + within_separator, ) { can_advance = true; if advanced_id == i { diff --git a/cli/src/generate/dsl.js b/cli/src/generate/dsl.js index 55594871..62fb1d70 100644 --- a/cli/src/generate/dsl.js +++ b/cli/src/generate/dsl.js @@ -292,7 +292,12 @@ function grammar(baseGrammar, options) { extras = options.extras .call(ruleBuilder, ruleBuilder, baseGrammar.extras) - .map(normalize); + + if (!Array.isArray(extras)) { + throw new Error("Grammar's 'extras' function must return an array.") + } + + extras = extras.map(normalize); } let word = baseGrammar.word; diff --git a/cli/src/generate/grammar-schema.json b/cli/src/generate/grammar-schema.json index 55388364..5ca35370 100644 --- a/cli/src/generate/grammar-schema.json +++ b/cli/src/generate/grammar-schema.json @@ -1,15 +1,15 @@ { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "tree-sitter grammar specification", "type": "object", - "required": [ - "name", - "rules" - ], + "required": ["name", "rules"], "additionalProperties": false, "properties": { "name": { + "description": "the name of the grammar", "type": "string", "pattern": "^[a-zA-Z_]\\w*" }, @@ -60,6 +60,15 @@ "word": { "type": "string", "pattern": "^[a-zA-Z_]\\w*" + }, + + "supertypes": { + "description": "A list of hidden rule names that should be considered supertypes in the generated node types file. See http://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types.", + "type": "array", + "items": { + "description": "the name of a rule in `rules` or `extras`", + "type": "string" + } } }, @@ -96,20 +105,19 @@ "type": "string", "pattern": "^PATTERN$" }, - "value": {"type": "string"} + "value": { "type": "string" } }, "required": ["type", "value"] }, "symbol-rule": { - "required": ["name"], "type": "object", "properties": { "type": { "type": "string", "pattern": "^SYMBOL$" }, - "name": {"type": "string"} + "name": { "type": "string" } }, "required": ["type", "name"] }, @@ -210,6 +218,20 @@ "required": ["type", "content"] }, + "field-rule": { + "properties": { + "name": { "type": "string" }, + "type": { + "type": "string", + "pattern": "^FIELD$" + }, + "content": { + "$ref": "#/definitions/rule" + } + }, + "required": ["name", "type", "content"] + }, + "prec-rule": { "type": "object", "properties": { @@ -239,6 +261,7 @@ { "$ref": "#/definitions/repeat1-rule" }, { "$ref": "#/definitions/repeat-rule" }, { "$ref": "#/definitions/token-rule" }, + { "$ref": "#/definitions/field-rule" }, { "$ref": "#/definitions/prec-rule" } ] } diff --git a/cli/src/generate/grammars.rs b/cli/src/generate/grammars.rs index 0b42c4c3..6cf325dd 100644 --- a/cli/src/generate/grammars.rs +++ b/cli/src/generate/grammars.rs @@ -23,7 +23,7 @@ pub(crate) struct Variable { pub(crate) struct InputGrammar { pub name: String, pub variables: Vec, - pub extra_tokens: Vec, + pub extra_symbols: Vec, pub expected_conflicts: Vec>, pub external_tokens: Vec, pub variables_to_inline: Vec, @@ -87,7 +87,7 @@ pub(crate) struct ExternalToken { #[derive(Debug, Default)] pub(crate) struct SyntaxGrammar { pub variables: Vec, - pub extra_tokens: Vec, + pub extra_symbols: Vec, pub expected_conflicts: Vec>, pub external_tokens: Vec, pub supertype_symbols: Vec, diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs index 5446e4af..830c4a65 100644 --- a/cli/src/generate/mod.rs +++ b/cli/src/generate/mod.rs @@ -6,13 +6,12 @@ mod node_types; mod npm_files; pub mod parse_grammar; mod prepare_grammar; -pub mod properties; mod render; mod rules; mod tables; use self::build_tables::build_tables; -use self::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType}; +use self::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar}; use self::parse_grammar::parse_grammar; use self::prepare_grammar::prepare_grammar; use self::render::render_c_code; @@ -20,9 +19,8 @@ use self::rules::AliasMap; use crate::error::{Error, Result}; use lazy_static::lazy_static; use regex::{Regex, RegexBuilder}; -use std::collections::HashSet; -use std::fs::{self, File}; -use std::io::{BufWriter, Write}; +use std::fs; +use std::io::Write; use std::path::{Path, PathBuf}; use std::process::{Command, Stdio}; @@ -33,15 +31,9 @@ lazy_static! { .unwrap(); } -const NEW_HEADER_PARTS: [&'static str; 2] = [ - " - uint32_t large_state_count; - const uint16_t *small_parse_table; - const uint32_t *small_parse_table_map;", - " -#define SMALL_STATE(id) id - LARGE_STATE_COUNT -", -]; +const NEW_HEADER_PARTS: &[&'static str] = &[" + const uint16_t *alias_map; + uint32_t state_count;"]; struct GeneratedParser { c_code: String, @@ -51,13 +43,11 @@ struct GeneratedParser { pub fn generate_parser_in_directory( repo_path: &PathBuf, grammar_path: Option<&str>, - properties_only: bool, next_abi: bool, report_symbol_name: Option<&str>, ) -> Result<()> { let src_path = repo_path.join("src"); let header_path = src_path.join("tree_sitter"); - let properties_dir_path = repo_path.join("properties"); // Ensure that the output directories exist. fs::create_dir_all(&src_path)?; @@ -82,71 +72,48 @@ pub fn generate_parser_in_directory( prepare_grammar(&input_grammar)?; let language_name = input_grammar.name; - // If run with no arguments, read all of the property sheets and compile them to JSON. - if grammar_path.is_none() { - let token_names = get_token_names(&syntax_grammar, &lexical_grammar); - if let Ok(entries) = fs::read_dir(properties_dir_path) { - for entry in entries { - let css_path = entry?.path(); - let css = fs::read_to_string(&css_path)?; - let sheet = properties::generate_property_sheet(&css_path, &css, &token_names)?; - let property_sheet_json_path = src_path - .join(css_path.file_name().unwrap()) - .with_extension("json"); - let property_sheet_json_file = - File::create(&property_sheet_json_path).map_err(Error::wrap(|| { - format!("Failed to create {:?}", property_sheet_json_path) - }))?; - let mut writer = BufWriter::new(property_sheet_json_file); - serde_json::to_writer_pretty(&mut writer, &sheet)?; - } - } - } - // Generate the parser and related files. - if !properties_only { - let GeneratedParser { - c_code, - node_types_json, - } = generate_parser_for_grammar_with_opts( - &language_name, - syntax_grammar, - lexical_grammar, - inlines, - simple_aliases, - next_abi, - report_symbol_name, - )?; + let GeneratedParser { + c_code, + node_types_json, + } = generate_parser_for_grammar_with_opts( + &language_name, + syntax_grammar, + lexical_grammar, + inlines, + simple_aliases, + next_abi, + report_symbol_name, + )?; - write_file(&src_path.join("parser.c"), c_code)?; - write_file(&src_path.join("node-types.json"), node_types_json)?; + write_file(&src_path.join("parser.c"), c_code)?; + write_file(&src_path.join("node-types.json"), node_types_json)?; - if next_abi { - write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?; - } else { - let mut header = tree_sitter::PARSER_HEADER.to_string(); + if next_abi { + write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?; + } else { + let mut header = tree_sitter::PARSER_HEADER.to_string(); - for part in &NEW_HEADER_PARTS { - let pos = header - .find(part) - .expect("Missing expected part of parser.h header"); - header.replace_range(pos..(pos + part.len()), ""); - } - - write_file(&header_path.join("parser.h"), header)?; + for part in NEW_HEADER_PARTS.iter() { + let pos = header + .find(part) + .expect("Missing expected part of parser.h header"); + header.replace_range(pos..(pos + part.len()), ""); } - ensure_file(&repo_path.join("index.js"), || { - npm_files::index_js(&language_name) - })?; - ensure_file(&src_path.join("binding.cc"), || { - npm_files::binding_cc(&language_name) - })?; - ensure_file(&repo_path.join("binding.gyp"), || { - npm_files::binding_gyp(&language_name) - })?; + write_file(&header_path.join("parser.h"), header)?; } + ensure_file(&repo_path.join("index.js"), || { + npm_files::index_js(&language_name) + })?; + ensure_file(&src_path.join("binding.cc"), || { + npm_files::binding_cc(&language_name) + })?; + ensure_file(&repo_path.join("binding.gyp"), || { + npm_files::binding_gyp(&language_name) + })?; + Ok(()) } @@ -176,7 +143,8 @@ fn generate_parser_for_grammar_with_opts( next_abi: bool, report_symbol_name: Option<&str>, ) -> Result { - let variable_info = node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &inlines)?; + let variable_info = + node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases)?; let node_types_json = node_types::generate_node_types_json( &syntax_grammar, &lexical_grammar, @@ -208,35 +176,6 @@ fn generate_parser_for_grammar_with_opts( }) } -fn get_token_names( - syntax_grammar: &SyntaxGrammar, - lexical_grammar: &LexicalGrammar, -) -> HashSet { - let mut result = HashSet::new(); - for variable in &lexical_grammar.variables { - if variable.kind == VariableType::Named { - result.insert(variable.name.clone()); - } - } - for token in &syntax_grammar.external_tokens { - if token.kind == VariableType::Named { - result.insert(token.name.clone()); - } - } - for variable in &syntax_grammar.variables { - for production in &variable.productions { - for step in &production.steps { - if let Some(alias) = &step.alias { - if !step.symbol.is_non_terminal() && alias.is_named { - result.insert(alias.value.clone()); - } - } - } - } - } - result -} - fn load_grammar_file(grammar_path: &Path) -> Result { match grammar_path.extension().and_then(|e| e.to_str()) { Some("js") => Ok(load_js_grammar_file(grammar_path)?), diff --git a/cli/src/generate/nfa.rs b/cli/src/generate/nfa.rs index 2e23dc1e..4cbfaaa3 100644 --- a/cli/src/generate/nfa.rs +++ b/cli/src/generate/nfa.rs @@ -1,8 +1,10 @@ use std::char; use std::cmp::max; use std::cmp::Ordering; +use std::collections::HashSet; use std::fmt; use std::mem::swap; +use std::ops::Range; #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub enum CharacterSet { @@ -178,6 +180,40 @@ impl CharacterSet { } } + pub fn ranges<'a>( + chars: &'a Vec, + ruled_out_characters: &'a HashSet, + ) -> impl Iterator> + 'a { + let mut prev_range: Option> = None; + chars + .iter() + .map(|c| (*c, false)) + .chain(Some(('\0', true))) + .filter_map(move |(c, done)| { + if done { + return prev_range.clone(); + } + if ruled_out_characters.contains(&(c as u32)) { + return None; + } + if let Some(range) = prev_range.clone() { + let mut prev_range_successor = range.end as u32 + 1; + while prev_range_successor < c as u32 { + if !ruled_out_characters.contains(&prev_range_successor) { + prev_range = Some(c..c); + return Some(range); + } + prev_range_successor += 1; + } + prev_range = Some(range.start..c); + None + } else { + prev_range = Some(c..c); + None + } + }) + } + #[cfg(test)] pub fn contains(&self, c: char) -> bool { match self { @@ -266,6 +302,13 @@ fn compare_chars(left: &Vec, right: &Vec) -> SetComparision { result.common = true; } } + + match (i, j) { + (Some(_), _) => result.left_only = true, + (_, Some(_)) => result.right_only = true, + _ => {} + } + result } @@ -718,7 +761,7 @@ mod tests { .add_range('d', 'e') ); - // A whitelist and an intersecting blacklist. + // An inclusion and an intersecting exclusion. // Both sets contain 'e', 'f', and 'm' let mut a = CharacterSet::empty() .add_range('c', 'h') @@ -748,7 +791,7 @@ mod tests { assert_eq!(a, CharacterSet::Include(vec!['c', 'd', 'g', 'h', 'k', 'l'])); assert_eq!(b, CharacterSet::empty().add_range('a', 'm').negate()); - // A blacklist and an overlapping blacklist. + // An exclusion and an overlapping inclusion. // Both sets exclude 'c', 'd', and 'e' let mut a = CharacterSet::empty().add_range('a', 'e').negate(); let mut b = CharacterSet::empty().add_range('c', 'h').negate(); @@ -759,7 +802,7 @@ mod tests { assert_eq!(a, CharacterSet::Include(vec!['f', 'g', 'h'])); assert_eq!(b, CharacterSet::Include(vec!['a', 'b'])); - // A blacklist and a larger blacklist. + // An exclusion and a larger exclusion. let mut a = CharacterSet::empty().add_range('b', 'c').negate(); let mut b = CharacterSet::empty().add_range('a', 'd').negate(); assert_eq!( @@ -810,5 +853,53 @@ mod tests { ); assert!(a.does_intersect(&b)); assert!(b.does_intersect(&a)); + + let (a, b) = ( + CharacterSet::Include(vec!['c']), + CharacterSet::Exclude(vec!['a']), + ); + assert!(a.does_intersect(&b)); + assert!(b.does_intersect(&a)); + } + + #[test] + fn test_character_set_get_ranges() { + struct Row { + chars: Vec, + ruled_out_chars: Vec, + expected_ranges: Vec>, + } + + let table = [ + Row { + chars: vec!['a'], + ruled_out_chars: vec![], + expected_ranges: vec!['a'..'a'], + }, + Row { + chars: vec!['a', 'b', 'c', 'e', 'z'], + ruled_out_chars: vec![], + expected_ranges: vec!['a'..'c', 'e'..'e', 'z'..'z'], + }, + Row { + chars: vec!['a', 'b', 'c', 'e', 'h', 'z'], + ruled_out_chars: vec!['d', 'f', 'g'], + expected_ranges: vec!['a'..'h', 'z'..'z'], + }, + ]; + + for Row { + chars, + ruled_out_chars, + expected_ranges, + } in table.iter() + { + let ruled_out_chars = ruled_out_chars + .into_iter() + .map(|c: &char| *c as u32) + .collect(); + let ranges = CharacterSet::ranges(chars, &ruled_out_chars).collect::>(); + assert_eq!(ranges, *expected_ranges); + } } } diff --git a/cli/src/generate/node_types.rs b/cli/src/generate/node_types.rs index 4dab4470..bc5a836f 100644 --- a/cli/src/generate/node_types.rs +++ b/cli/src/generate/node_types.rs @@ -1,12 +1,8 @@ -use super::grammars::{ - InlinedProductionMap, LexicalGrammar, Production, ProductionStep, SyntaxGrammar, - SyntaxVariable, VariableType, -}; +use super::grammars::{LexicalGrammar, SyntaxGrammar, VariableType}; use super::rules::{Alias, AliasMap, Symbol, SymbolType}; use crate::error::{Error, Result}; use serde_derive::Serialize; -use std::collections::{BTreeMap, HashMap}; -use std::mem; +use std::collections::{BTreeMap, HashMap, HashSet}; #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub(crate) enum ChildType { @@ -16,20 +12,19 @@ pub(crate) enum ChildType { #[derive(Clone, Debug, Default, PartialEq, Eq)] pub(crate) struct FieldInfo { - pub required: bool, - pub multiple: bool, + pub quantity: ChildQuantity, pub types: Vec, } -#[derive(Debug, Default, PartialEq, Eq)] +#[derive(Clone, Debug, Default, PartialEq, Eq)] pub(crate) struct VariableInfo { pub fields: HashMap, - pub child_types: Vec, + pub children: FieldInfo, pub children_without_fields: FieldInfo, pub has_multi_step_production: bool, } -#[derive(Debug, Serialize, PartialEq, Eq, Default)] +#[derive(Debug, Serialize, PartialEq, Eq, Default, PartialOrd, Ord)] pub(crate) struct NodeInfoJSON { #[serde(rename = "type")] kind: String, @@ -42,252 +37,296 @@ pub(crate) struct NodeInfoJSON { subtypes: Option>, } -#[derive(Debug, Serialize, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Clone, Debug, Serialize, PartialEq, Eq, PartialOrd, Ord, Hash)] pub(crate) struct NodeTypeJSON { #[serde(rename = "type")] kind: String, named: bool, } -#[derive(Debug, Serialize, PartialEq, Eq)] +#[derive(Debug, Serialize, PartialEq, Eq, PartialOrd, Ord)] pub(crate) struct FieldInfoJSON { multiple: bool, required: bool, types: Vec, } +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct ChildQuantity { + exists: bool, + required: bool, + multiple: bool, +} + +impl Default for FieldInfoJSON { + fn default() -> Self { + FieldInfoJSON { + multiple: false, + required: true, + types: Vec::new(), + } + } +} + +impl Default for ChildQuantity { + fn default() -> Self { + Self::one() + } +} + +impl ChildQuantity { + fn zero() -> Self { + ChildQuantity { + exists: false, + required: false, + multiple: false, + } + } + + fn one() -> Self { + ChildQuantity { + exists: true, + required: true, + multiple: false, + } + } + + fn append(&mut self, other: ChildQuantity) { + if other.exists { + if self.exists || other.multiple { + self.multiple = true; + } + if other.required { + self.required = true; + } + self.exists = true; + } + } + + fn union(&mut self, other: ChildQuantity) -> bool { + let mut result = false; + if !self.exists && other.exists { + result = true; + self.exists = true; + } + if self.required && !other.required { + result = true; + self.required = false; + } + if !self.multiple && other.multiple { + result = true; + self.multiple = true; + } + result + } +} + +/// Compute a summary of the public-facing structure of each variable in the +/// grammar. Each variable in the grammar corresponds to a distinct public-facing +/// node type. +/// +/// The information collected about each node type `N` is: +/// 1. `child_types` - The types of visible children that can appear within `N`. +/// 2. `fields` - The fields that `N` can have. Data regarding each field: +/// * `types` - The types of visible children the field can contain. +/// * `optional` - Do `N` nodes always have this field? +/// * `multiple` - Can `N` nodes have multiple children for this field? +/// 3. `children_without_fields` - The *other* named children of `N` that are +/// not associated with fields. Data regarding these children: +/// * `types` - The types of named children with no field. +/// * `optional` - Do `N` nodes always have at least one named child with no field? +/// * `multiple` - Can `N` nodes have multiple named children with no field? +/// +/// Each summary must account for some indirect factors: +/// 1. hidden nodes. When a parent node `N` has a hidden child `C`, the visible +/// children of `C` *appear* to be direct children of `N`. +/// 2. aliases. If a parent node type `M` is aliased as some other type `N`, +/// then nodes which *appear* to have type `N` may have internal structure based +/// on `M`. pub(crate) fn get_variable_info( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, - inlines: &InlinedProductionMap, + default_aliases: &AliasMap, ) -> Result> { - let mut result = Vec::new(); + let child_type_is_visible = |t: &ChildType| { + variable_type_for_child_type(t, syntax_grammar, lexical_grammar) >= VariableType::Anonymous + }; - // Determine which field names and child node types can appear directly - // within each type of node. - let mut steps = Vec::new(); - for (i, variable) in syntax_grammar.variables.iter().enumerate() { - let mut info = VariableInfo { - fields: HashMap::new(), - child_types: Vec::new(), - children_without_fields: FieldInfo { - multiple: false, - required: true, - types: Vec::new(), - }, - has_multi_step_production: false, - }; + let child_type_is_named = |t: &ChildType| { + variable_type_for_child_type(t, syntax_grammar, lexical_grammar) == VariableType::Named + }; - steps.clear(); - if get_all_child_steps(variable, inlines, &mut steps) > 1 { - info.has_multi_step_production = true; - } + // Each variable's summary can depend on the summaries of other hidden variables, + // and variables can have mutually recursive structure. So we compute the summaries + // iteratively, in a loop that terminates only when no more changes are possible. + let mut did_change = true; + let mut all_initialized = false; + let mut result = vec![VariableInfo::default(); syntax_grammar.variables.len()]; + while did_change { + did_change = false; - let is_recursive = steps.iter().any(|s| s.symbol == Symbol::non_terminal(i)); - - for step in &steps { - let child_type = if let Some(alias) = &step.alias { - ChildType::Aliased(alias.clone()) - } else { - ChildType::Normal(step.symbol) - }; - - if let Some(field_name) = &step.field_name { - let field_info = info.fields.entry(field_name.clone()).or_insert(FieldInfo { - multiple: false, - required: true, - types: Vec::new(), - }); - field_info.multiple |= is_recursive; - if let Err(i) = field_info.types.binary_search(&child_type) { - field_info.types.insert(i, child_type.clone()); - } - } else if variable_type_for_child_type(&child_type, syntax_grammar, lexical_grammar) - == VariableType::Named - { - let children_info = &mut info.children_without_fields; - children_info.multiple |= is_recursive; - if let Err(i) = children_info.types.binary_search(&child_type) { - children_info.types.insert(i, child_type.clone()); - } - } - - if let Err(i) = info.child_types.binary_search(&child_type) { - info.child_types.insert(i, child_type.clone()); - } - } - - for production in &variable.productions { - let production_fields: Vec<&String> = production - .steps - .iter() - .filter_map(|s| s.field_name.as_ref()) - .collect(); - for (field_name, field_info) in info.fields.iter_mut() { - let mut occurrence_count = 0; - for f in &production_fields { - if *f == field_name { - occurrence_count += 1; - } - } - if occurrence_count == 0 { - field_info.required = false; - } - if occurrence_count > 1 { - field_info.multiple = true; - } - } - - let named_children_without_fields_count = production - .steps - .iter() - .filter(|s| { - if s.field_name.is_some() { - false - } else if let Some(alias) = &s.alias { - alias.is_named - } else if s.symbol.is_non_terminal() { - true - } else if s.symbol.is_external() { - syntax_grammar.external_tokens[s.symbol.index].kind == VariableType::Named - } else { - lexical_grammar.variables[s.symbol.index].kind == VariableType::Named - } - }) - .count(); - if named_children_without_fields_count == 0 { - info.children_without_fields.required = false; - } - if named_children_without_fields_count > 1 { - info.children_without_fields.multiple = true; - } - } - - result.push(info); - } - - // Expand each node type's information recursively to inherit the properties of - // hidden children. - let mut done = false; - while !done { - done = true; for (i, variable) in syntax_grammar.variables.iter().enumerate() { - // Move this variable's info out of the vector so it can be modified - // while reading from other entries of the vector. - let mut variable_info = VariableInfo::default(); - mem::swap(&mut variable_info, &mut result[i]); + let mut variable_info = result[i].clone(); - steps.clear(); - get_all_child_steps(variable, inlines, &mut steps); + // Examine each of the variable's productions. The variable's child types can be + // immediately combined across all productions, but the child quantities must be + // recorded separately for each production. + for production in &variable.productions { + let mut production_field_quantities = HashMap::new(); + let mut production_children_quantity = ChildQuantity::zero(); + let mut production_children_without_fields_quantity = ChildQuantity::zero(); + let mut production_has_uninitialized_invisible_children = false; - for step in &steps { - let child_symbol = step.symbol; - if step.alias.is_none() - && child_symbol.kind == SymbolType::NonTerminal - && !syntax_grammar.variables[child_symbol.index] - .kind - .is_visible() - && !syntax_grammar.supertype_symbols.contains(&child_symbol) - { - let child_variable_info = &result[child_symbol.index]; + if production.steps.len() > 1 { + variable_info.has_multi_step_production = true; + } - // If a hidden child can have multiple children, then this - // node can appear to have multiple children. - if child_variable_info.has_multi_step_production { - variable_info.has_multi_step_production = true; + for step in &production.steps { + let child_symbol = step.symbol; + let child_type = if let Some(alias) = &step.alias { + ChildType::Aliased(alias.clone()) + } else if let Some(alias) = default_aliases.get(&step.symbol) { + ChildType::Aliased(alias.clone()) + } else { + ChildType::Normal(child_symbol) + }; + + let child_is_hidden = !child_type_is_visible(&child_type) + && !syntax_grammar.supertype_symbols.contains(&child_symbol); + + // Maintain the set of all child types for this variable, and the quantity of + // visible children in this production. + did_change |= + extend_sorted(&mut variable_info.children.types, Some(&child_type)); + if !child_is_hidden { + production_children_quantity.append(ChildQuantity::one()); } - // Inherit fields from this hidden child - for (field_name, child_field_info) in &child_variable_info.fields { + // Maintain the set of child types associated with each field, and the quantity + // of children associated with each field in this production. + if let Some(field_name) = &step.field_name { let field_info = variable_info .fields .entry(field_name.clone()) - .or_insert_with(|| { - done = false; - child_field_info.clone() - }); - if child_field_info.multiple && !field_info.multiple { - field_info.multiple = child_field_info.multiple; - done = false; + .or_insert(FieldInfo::default()); + did_change |= extend_sorted(&mut field_info.types, Some(&child_type)); + + let production_field_quantity = production_field_quantities + .entry(field_name) + .or_insert(ChildQuantity::zero()); + + // Inherit the types and quantities of hidden children associated with fields. + if child_is_hidden && child_symbol.is_non_terminal() { + let child_variable_info = &result[child_symbol.index]; + did_change |= extend_sorted( + &mut field_info.types, + &child_variable_info.children.types, + ); + production_field_quantity.append(child_variable_info.children.quantity); + } else { + production_field_quantity.append(ChildQuantity::one()); } - if !child_field_info.required && field_info.required { - field_info.required = child_field_info.required; - done = false; + } + // Maintain the set of named children without fields within this variable. + else if child_type_is_named(&child_type) { + production_children_without_fields_quantity.append(ChildQuantity::one()); + did_change |= extend_sorted( + &mut variable_info.children_without_fields.types, + Some(&child_type), + ); + } + + // Inherit all child information from hidden children. + if child_is_hidden && child_symbol.is_non_terminal() { + let child_variable_info = &result[child_symbol.index]; + + // If a hidden child can have multiple children, then its parent node can + // appear to have multiple children. + if child_variable_info.has_multi_step_production { + variable_info.has_multi_step_production = true; } - for child_type in &child_field_info.types { - if let Err(i) = field_info.types.binary_search(&child_type) { - field_info.types.insert(i, child_type.clone()); - done = false; + + // If a hidden child has fields, then the parent node can appear to have + // those same fields. + for (field_name, child_field_info) in &child_variable_info.fields { + production_field_quantities + .entry(field_name) + .or_insert(ChildQuantity::zero()) + .append(child_field_info.quantity); + did_change |= extend_sorted( + &mut variable_info + .fields + .entry(field_name.clone()) + .or_insert(FieldInfo::default()) + .types, + &child_field_info.types, + ); + } + + // If a hidden child has children, then the parent node can appear to have + // those same children. + production_children_quantity.append(child_variable_info.children.quantity); + did_change |= extend_sorted( + &mut variable_info.children.types, + &child_variable_info.children.types, + ); + + // If a hidden child can have named children without fields, then the parent + // node can appear to have those same children. + if step.field_name.is_none() { + let grandchildren_info = &child_variable_info.children_without_fields; + if !grandchildren_info.types.is_empty() { + production_children_without_fields_quantity + .append(child_variable_info.children_without_fields.quantity); + did_change |= extend_sorted( + &mut variable_info.children_without_fields.types, + &child_variable_info.children_without_fields.types, + ); } } } - // Inherit child types from this hidden child - for child_type in &child_variable_info.child_types { - if let Err(i) = variable_info.child_types.binary_search(&child_type) { - variable_info.child_types.insert(i, child_type.clone()); - done = false; - } + // Note whether or not this production contains children whose summaries + // have not yet been computed. + if child_symbol.index >= i && !all_initialized { + production_has_uninitialized_invisible_children = true; } + } - // If any field points to this hidden child, inherit child types - // for the field. - if let Some(field_name) = &step.field_name { - let field_info = variable_info.fields.get_mut(field_name).unwrap(); - for child_type in &child_variable_info.child_types { - if let Err(i) = field_info.types.binary_search(&child_type) { - field_info.types.insert(i, child_type.clone()); - done = false; - } - } - } else { - // Inherit child types without fields from this hidden child - // Inherit info about children w/o fields from this hidden child - let grandchildren_info = &child_variable_info.children_without_fields; - if grandchildren_info.multiple - && !variable_info.children_without_fields.multiple - { - variable_info.children_without_fields.multiple = true; - done = false; - } - // if !grandchildren_info.required - // && variable_info.children_without_fields.required - // { - // variable_info.children_without_fields.required = false; - // done = false; - // } - for child_type in &grandchildren_info.types { - if let Err(i) = variable_info - .children_without_fields - .types - .binary_search(&child_type) - { - variable_info - .children_without_fields - .types - .insert(i, child_type.clone()); - done = false; - } - } + // If this production's children all have had their summaries initialized, + // then expand the quantity information with all of the possibilities introduced + // by this production. + if !production_has_uninitialized_invisible_children { + did_change |= variable_info + .children + .quantity + .union(production_children_quantity); + + did_change |= variable_info + .children_without_fields + .quantity + .union(production_children_without_fields_quantity); + + for (field_name, info) in variable_info.fields.iter_mut() { + did_change |= info.quantity.union( + production_field_quantities + .get(field_name) + .cloned() + .unwrap_or(ChildQuantity::zero()), + ); } } } - // Move this variable's info back into the vector. result[i] = variable_info; } + + all_initialized = true; } for supertype_symbol in &syntax_grammar.supertype_symbols { - let variable = &syntax_grammar.variables[supertype_symbol.index]; - if variable.kind != VariableType::Hidden { - return Err(Error::grammar(&format!( - "Supertype symbols must be hidden, but `{}` is not", - variable.name - ))); - } - if result[supertype_symbol.index].has_multi_step_production { + let variable = &syntax_grammar.variables[supertype_symbol.index]; return Err(Error::grammar(&format!( "Supertype symbols must always have a single visible child, but `{}` can have multiple", variable.name @@ -295,178 +334,31 @@ pub(crate) fn get_variable_info( } } - let child_type_is_visible = |t: &ChildType| { - variable_type_for_child_type(t, syntax_grammar, lexical_grammar) >= VariableType::Anonymous - }; - + // Update all of the node type lists to eliminate hidden nodes. for supertype_symbol in &syntax_grammar.supertype_symbols { result[supertype_symbol.index] - .child_types + .children + .types .retain(child_type_is_visible); } - - for i in 0..result.len() { - let mut variable_info = VariableInfo::default(); - mem::swap(&mut variable_info, &mut result[i]); - - // For each field, make the `types` list more concise by replacing sets of - // subtypes with a single supertype. + for variable_info in result.iter_mut() { for (_, field_info) in variable_info.fields.iter_mut() { - for supertype_symbol in &syntax_grammar.supertype_symbols { - sorted_vec_replace( - &mut field_info.types, - &result[supertype_symbol.index].child_types, - ChildType::Normal(*supertype_symbol), - ); - } - field_info.types.retain(child_type_is_visible); } - - for supertype_symbol in &syntax_grammar.supertype_symbols { - sorted_vec_replace( - &mut variable_info.children_without_fields.types, - &result[supertype_symbol.index].child_types, - ChildType::Normal(*supertype_symbol), - ); - } - + variable_info.fields.retain(|_, v| !v.types.is_empty()); variable_info .children_without_fields .types .retain(child_type_is_visible); - - result[i] = variable_info; } Ok(result) } -// Summarize information about this variable's possible children by walking -// all of its productions. -fn get_all_child_steps( - variable: &SyntaxVariable, - inlines: &InlinedProductionMap, - output: &mut Vec, -) -> usize { - // For each of the given variable's productions, insert all of the reachable steps - // into the output vector, and return the longest possible production length. - return variable - .productions - .iter() - .map(|p| process_production(inlines, p, 0, output)) - .max() - .unwrap_or(0); - - // For the given production suffix, add all of the remaining steps into the output - // vector and return the longest possible production length. - fn process_production( - inlines: &InlinedProductionMap, - production: &Production, - step_index: usize, - output: &mut Vec, - ) -> usize { - let mut max_length = production.steps.len(); - - // Process each of the remaining steps of the production. - for (i, step) in production.steps.iter().enumerate().skip(step_index) { - // If this step is inlined, then process the corresponding suffixes of - // all of the inlined productions instead. - if let Some(inlined_productions) = inlines.inlined_productions(production, i as u32) { - for inlined_production in inlined_productions { - let length = process_production(inlines, inlined_production, i, output); - if length > max_length { - max_length = length; - } - } - break; - } - - // Otherwise, insert this step into the output vector unless it is already - // present. - if let Err(i) = output.binary_search(step) { - output.insert(i, step.clone()); - } - } - - return max_length; - } -} - -fn variable_type_for_child_type( - child_type: &ChildType, - syntax_grammar: &SyntaxGrammar, - lexical_grammar: &LexicalGrammar, -) -> VariableType { - match child_type { - ChildType::Aliased(alias) => { - if alias.is_named { - VariableType::Named - } else { - VariableType::Anonymous - } - } - ChildType::Normal(symbol) => { - if syntax_grammar.supertype_symbols.contains(&symbol) { - return VariableType::Named; - } else { - match symbol.kind { - SymbolType::NonTerminal => syntax_grammar.variables[symbol.index].kind, - SymbolType::Terminal => lexical_grammar.variables[symbol.index].kind, - SymbolType::External => syntax_grammar.external_tokens[symbol.index].kind, - _ => VariableType::Hidden, - } - } - } - } -} - -fn sorted_vec_replace(left: &mut Vec, right: &Vec, value: T) -> bool -where - T: Eq + Ord, -{ - if left.len() == 0 { - return false; - } - - let mut i = 0; - for right_elem in right.iter() { - while left[i] < *right_elem { - i += 1; - if i == left.len() { - return false; - } - } - if left[i] != *right_elem { - return false; - } - } - - i = 0; - left.retain(|left_elem| { - if i == right.len() { - return true; - } - while right[i] < *left_elem { - i += 1; - if i == right.len() { - return true; - } - } - right[i] != *left_elem - }); - - if let Err(i) = left.binary_search(&value) { - left.insert(i, value); - } - - true -} - pub(crate) fn generate_node_types_json( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, - simple_aliases: &AliasMap, + default_aliases: &AliasMap, variable_info: &Vec, ) -> Vec { let mut node_types_json = BTreeMap::new(); @@ -477,7 +369,7 @@ pub(crate) fn generate_node_types_json( named: alias.is_named, }, ChildType::Normal(symbol) => { - if let Some(alias) = simple_aliases.get(&symbol) { + if let Some(alias) = default_aliases.get(&symbol) { NodeTypeJSON { kind: alias.value.clone(), named: alias.is_named, @@ -511,105 +403,303 @@ pub(crate) fn generate_node_types_json( } }; + let populate_field_info_json = |json: &mut FieldInfoJSON, info: &FieldInfo| { + if info.types.len() > 0 { + json.multiple |= info.quantity.multiple; + json.required &= info.quantity.required; + json.types + .extend(info.types.iter().map(child_type_to_node_type)); + json.types.sort_unstable(); + json.types.dedup(); + } else { + json.required = false; + } + }; + + let mut aliases_by_symbol = HashMap::new(); + for (symbol, alias) in default_aliases { + aliases_by_symbol.insert(*symbol, { + let mut aliases = HashSet::new(); + aliases.insert(Some(alias.clone())); + aliases + }); + } + for extra_symbol in &syntax_grammar.extra_symbols { + if !default_aliases.contains_key(extra_symbol) { + aliases_by_symbol + .entry(*extra_symbol) + .or_insert(HashSet::new()) + .insert(None); + } + } + for variable in &syntax_grammar.variables { + for production in &variable.productions { + for step in &production.steps { + aliases_by_symbol + .entry(step.symbol) + .or_insert(HashSet::new()) + .insert( + step.alias + .as_ref() + .or_else(|| default_aliases.get(&step.symbol)) + .cloned(), + ); + } + } + } + aliases_by_symbol.insert(Symbol::non_terminal(0), [None].iter().cloned().collect()); + + let mut subtype_map = HashMap::new(); for (i, info) in variable_info.iter().enumerate() { let symbol = Symbol::non_terminal(i); let variable = &syntax_grammar.variables[i]; - let name = simple_aliases - .get(&Symbol::non_terminal(i)) - .map_or(&variable.name, |alias| &alias.value); - if syntax_grammar.supertype_symbols.contains(&symbol) { let node_type_json = node_types_json - .entry(name.clone()) + .entry(variable.name.clone()) .or_insert_with(|| NodeInfoJSON { - kind: name.clone(), + kind: variable.name.clone(), named: true, fields: None, children: None, subtypes: None, }); let mut subtypes = info - .child_types - .iter() - .map(child_type_to_node_type) - .collect::>(); - subtypes.sort_unstable(); - subtypes.dedup(); - node_type_json.subtypes = Some(subtypes); - } else if variable.kind.is_visible() - && !syntax_grammar.variables_to_inline.contains(&symbol) - { - let node_type_json = - node_types_json - .entry(name.clone()) - .or_insert_with(|| NodeInfoJSON { - kind: name.clone(), - named: true, - fields: None, - children: None, - subtypes: None, - }); - let mut fields_json = BTreeMap::new(); - for (field, field_info) in info.fields.iter() { - let field_info_json = fields_json.entry(field.clone()).or_insert(FieldInfoJSON { - multiple: false, - required: true, - types: Vec::new(), - }); - - field_info_json.multiple |= field_info.multiple; - field_info_json.required &= field_info.required; - field_info_json - .types - .extend(field_info.types.iter().map(child_type_to_node_type)); - field_info_json.types.sort_unstable(); - field_info_json.types.dedup(); - } - node_type_json.fields = Some(fields_json); - let mut children_types = info - .children_without_fields + .children .types .iter() .map(child_type_to_node_type) .collect::>(); - if children_types.len() > 0 { - children_types.sort_unstable(); - children_types.dedup(); - node_type_json.children = Some(FieldInfoJSON { - multiple: info.children_without_fields.multiple, - required: info.children_without_fields.required, - types: children_types, + subtype_map.insert( + NodeTypeJSON { + kind: node_type_json.kind.clone(), + named: true, + }, + subtypes.clone(), + ); + subtypes.sort_unstable(); + subtypes.dedup(); + node_type_json.subtypes = Some(subtypes); + } else if !syntax_grammar.variables_to_inline.contains(&symbol) { + // If a rule is aliased under multiple names, then its information + // contributes to multiple entries in the final JSON. + for alias in aliases_by_symbol + .get(&Symbol::non_terminal(i)) + .unwrap_or(&HashSet::new()) + { + let kind; + let is_named; + if let Some(alias) = alias { + kind = &alias.value; + is_named = alias.is_named; + } else if variable.kind.is_visible() { + kind = &variable.name; + is_named = variable.kind == VariableType::Named; + } else { + continue; + } + + // There may already be an entry with this name, because multiple + // rules may be aliased with the same name. + let mut node_type_existed = true; + let node_type_json = node_types_json.entry(kind.clone()).or_insert_with(|| { + node_type_existed = false; + NodeInfoJSON { + kind: kind.clone(), + named: is_named, + fields: Some(BTreeMap::new()), + children: None, + subtypes: None, + } }); + + let fields_json = node_type_json.fields.as_mut().unwrap(); + for (new_field, field_info) in info.fields.iter() { + let field_json = fields_json.entry(new_field.clone()).or_insert_with(|| { + // If another rule is aliased with the same name, and does *not* have this field, + // then this field cannot be required. + let mut field_json = FieldInfoJSON::default(); + if node_type_existed { + field_json.required = false; + } + field_json + }); + populate_field_info_json(field_json, field_info); + } + + // If another rule is aliased with the same name, any fields that aren't present in this + // cannot be required. + for (existing_field, field_json) in fields_json.iter_mut() { + if !info.fields.contains_key(existing_field) { + field_json.required = false; + } + } + + populate_field_info_json( + node_type_json + .children + .get_or_insert(FieldInfoJSON::default()), + &info.children_without_fields, + ); } } } - let mut result = node_types_json.into_iter().map(|e| e.1).collect::>(); + for (_, node_type_json) in node_types_json.iter_mut() { + if node_type_json + .children + .as_ref() + .map_or(false, |c| c.types.is_empty()) + { + node_type_json.children = None; + } - for variable in &lexical_grammar.variables { - if variable.kind == VariableType::Named { - result.push(NodeInfoJSON { - kind: variable.name.clone(), - named: true, - fields: None, - children: None, - subtypes: None, + if let Some(children) = &mut node_type_json.children { + process_supertypes(children, &subtype_map); + } + if let Some(fields) = &mut node_type_json.fields { + for (_, field_info) in fields.iter_mut() { + process_supertypes(field_info, &subtype_map); + } + } + } + + let mut anonymous_node_types = Vec::new(); + + let empty = HashSet::new(); + let regular_tokens = lexical_grammar + .variables + .iter() + .enumerate() + .flat_map(|(i, variable)| { + aliases_by_symbol + .get(&Symbol::terminal(i)) + .unwrap_or(&empty) + .iter() + .map(move |alias| { + if let Some(alias) = alias { + (&alias.value, alias.kind()) + } else { + (&variable.name, variable.kind) + } + }) + }); + let external_tokens = + syntax_grammar + .external_tokens + .iter() + .enumerate() + .flat_map(|(i, token)| { + aliases_by_symbol + .get(&Symbol::external(i)) + .unwrap_or(&empty) + .iter() + .map(move |alias| { + if let Some(alias) = alias { + (&alias.value, alias.kind()) + } else { + (&token.name, token.kind) + } + }) }); - } else if variable.kind == VariableType::Anonymous { - result.push(NodeInfoJSON { - kind: variable.name.clone(), + + for (name, kind) in regular_tokens.chain(external_tokens) { + match kind { + VariableType::Named => { + let node_type_json = node_types_json.entry(name.clone()).or_insert(NodeInfoJSON { + kind: name.clone(), + named: true, + fields: None, + children: None, + subtypes: None, + }); + if let Some(children) = &mut node_type_json.children { + children.required = false; + } + if let Some(fields) = &mut node_type_json.fields { + for (_, field) in fields.iter_mut() { + field.required = false; + } + } + } + VariableType::Anonymous => anonymous_node_types.push(NodeInfoJSON { + kind: name.clone(), named: false, fields: None, children: None, subtypes: None, - }); + }), + _ => {} } } + let mut result = node_types_json.into_iter().map(|e| e.1).collect::>(); + result.extend(anonymous_node_types.into_iter()); + result.sort_unstable_by(|a, b| { + b.subtypes + .is_some() + .cmp(&a.subtypes.is_some()) + .then_with(|| { + let a_is_leaf = a.children.is_none() && a.fields.is_none(); + let b_is_leaf = b.children.is_none() && b.fields.is_none(); + a_is_leaf.cmp(&b_is_leaf) + }) + .then_with(|| a.kind.cmp(&b.kind)) + }); + result.dedup(); result } +fn process_supertypes( + info: &mut FieldInfoJSON, + subtype_map: &HashMap>, +) { + for (supertype, subtypes) in subtype_map { + if info.types.contains(supertype) { + info.types.retain(|t| !subtypes.contains(t)); + } + } +} + +fn variable_type_for_child_type( + child_type: &ChildType, + syntax_grammar: &SyntaxGrammar, + lexical_grammar: &LexicalGrammar, +) -> VariableType { + match child_type { + ChildType::Aliased(alias) => alias.kind(), + ChildType::Normal(symbol) => { + if syntax_grammar.supertype_symbols.contains(&symbol) { + VariableType::Named + } else if syntax_grammar.variables_to_inline.contains(&symbol) { + VariableType::Hidden + } else { + match symbol.kind { + SymbolType::NonTerminal => syntax_grammar.variables[symbol.index].kind, + SymbolType::Terminal => lexical_grammar.variables[symbol.index].kind, + SymbolType::External => syntax_grammar.external_tokens[symbol.index].kind, + _ => VariableType::Hidden, + } + } + } + } +} + +fn extend_sorted<'a, T>(vec: &mut Vec, values: impl IntoIterator) -> bool +where + T: Clone + Eq + Ord, + T: 'a, +{ + values.into_iter().any(|value| { + if let Err(i) = vec.binary_search(&value) { + vec.insert(i, value.clone()); + true + } else { + false + } + }) +} + #[cfg(test)] mod tests { use super::*; @@ -623,7 +713,7 @@ mod tests { fn test_node_types_simple() { let node_types = get_node_types(InputGrammar { name: String::new(), - extra_tokens: Vec::new(), + extra_symbols: Vec::new(), external_tokens: Vec::new(), expected_conflicts: Vec::new(), variables_to_inline: Vec::new(), @@ -643,9 +733,18 @@ mod tests { kind: VariableType::Named, rule: Rule::string("x"), }, + // This rule is not reachable from the start symbol + // so it won't be present in the node_types + Variable { + name: "v3".to_string(), + kind: VariableType::Named, + rule: Rule::string("y"), + }, ], }); + assert_eq!(node_types.len(), 3); + assert_eq!( node_types[0], NodeInfoJSON { @@ -705,11 +804,117 @@ mod tests { ); } + #[test] + fn test_node_types_simple_extras() { + let node_types = get_node_types(InputGrammar { + name: String::new(), + extra_symbols: vec![Rule::named("v3")], + external_tokens: Vec::new(), + expected_conflicts: Vec::new(), + variables_to_inline: Vec::new(), + word_token: None, + supertype_symbols: vec![], + variables: vec![ + Variable { + name: "v1".to_string(), + kind: VariableType::Named, + rule: Rule::seq(vec![ + Rule::field("f1".to_string(), Rule::named("v2")), + Rule::field("f2".to_string(), Rule::string(";")), + ]), + }, + Variable { + name: "v2".to_string(), + kind: VariableType::Named, + rule: Rule::string("x"), + }, + // This rule is not reachable from the start symbol, but + // it is reachable from the 'extra_symbols' so it + // should be present in the node_types + Variable { + name: "v3".to_string(), + kind: VariableType::Named, + rule: Rule::string("y"), + }, + ], + }); + + assert_eq!(node_types.len(), 4); + + assert_eq!( + node_types[0], + NodeInfoJSON { + kind: "v1".to_string(), + named: true, + subtypes: None, + children: None, + fields: Some( + vec![ + ( + "f1".to_string(), + FieldInfoJSON { + multiple: false, + required: true, + types: vec![NodeTypeJSON { + kind: "v2".to_string(), + named: true, + }] + } + ), + ( + "f2".to_string(), + FieldInfoJSON { + multiple: false, + required: true, + types: vec![NodeTypeJSON { + kind: ";".to_string(), + named: false, + }] + } + ), + ] + .into_iter() + .collect() + ) + } + ); + assert_eq!( + node_types[1], + NodeInfoJSON { + kind: ";".to_string(), + named: false, + subtypes: None, + children: None, + fields: None + } + ); + assert_eq!( + node_types[2], + NodeInfoJSON { + kind: "v2".to_string(), + named: true, + subtypes: None, + children: None, + fields: None + } + ); + assert_eq!( + node_types[3], + NodeInfoJSON { + kind: "v3".to_string(), + named: true, + subtypes: None, + children: None, + fields: None + } + ); + } + #[test] fn test_node_types_with_supertypes() { let node_types = get_node_types(InputGrammar { name: String::new(), - extra_tokens: Vec::new(), + extra_symbols: Vec::new(), external_tokens: Vec::new(), expected_conflicts: Vec::new(), variables_to_inline: Vec::new(), @@ -796,7 +1001,7 @@ mod tests { fn test_node_types_for_children_without_fields() { let node_types = get_node_types(InputGrammar { name: String::new(), - extra_tokens: Vec::new(), + extra_symbols: Vec::new(), external_tokens: Vec::new(), expected_conflicts: Vec::new(), variables_to_inline: Vec::new(), @@ -890,6 +1095,421 @@ mod tests { ); } + #[test] + fn test_node_types_with_inlined_rules() { + let node_types = get_node_types(InputGrammar { + name: String::new(), + word_token: None, + extra_symbols: Vec::new(), + external_tokens: Vec::new(), + expected_conflicts: Vec::new(), + variables_to_inline: vec!["v2".to_string()], + supertype_symbols: vec![], + variables: vec![ + Variable { + name: "v1".to_string(), + kind: VariableType::Named, + rule: Rule::seq(vec![Rule::named("v2"), Rule::named("v3")]), + }, + // v2 should not appear in the node types, since it is inlined + Variable { + name: "v2".to_string(), + kind: VariableType::Named, + rule: Rule::alias(Rule::string("a"), "x".to_string(), true), + }, + Variable { + name: "v3".to_string(), + kind: VariableType::Named, + rule: Rule::string("b"), + }, + ], + }); + + assert_eq!( + node_types[0], + NodeInfoJSON { + kind: "v1".to_string(), + named: true, + subtypes: None, + children: Some(FieldInfoJSON { + multiple: true, + required: true, + types: vec![ + NodeTypeJSON { + kind: "v3".to_string(), + named: true, + }, + NodeTypeJSON { + kind: "x".to_string(), + named: true, + }, + ] + }), + fields: Some(BTreeMap::new()), + } + ); + } + + #[test] + fn test_node_types_for_aliased_nodes() { + let node_types = get_node_types(InputGrammar { + name: String::new(), + extra_symbols: Vec::new(), + external_tokens: Vec::new(), + expected_conflicts: Vec::new(), + variables_to_inline: Vec::new(), + word_token: None, + supertype_symbols: vec![], + variables: vec![ + Variable { + name: "thing".to_string(), + kind: VariableType::Named, + rule: Rule::choice(vec![Rule::named("type"), Rule::named("expression")]), + }, + Variable { + name: "type".to_string(), + kind: VariableType::Named, + rule: Rule::choice(vec![ + Rule::alias( + Rule::named("identifier"), + "type_identifier".to_string(), + true, + ), + Rule::string("void"), + ]), + }, + Variable { + name: "expression".to_string(), + kind: VariableType::Named, + rule: Rule::choice(vec![ + Rule::named("identifier"), + Rule::alias( + Rule::named("foo_identifier"), + "identifier".to_string(), + true, + ), + ]), + }, + Variable { + name: "identifier".to_string(), + kind: VariableType::Named, + rule: Rule::pattern("\\w+"), + }, + Variable { + name: "foo_identifier".to_string(), + kind: VariableType::Named, + rule: Rule::pattern("[\\w-]+"), + }, + ], + }); + + assert_eq!(node_types.iter().find(|t| t.kind == "foo_identifier"), None); + assert_eq!( + node_types.iter().find(|t| t.kind == "identifier"), + Some(&NodeInfoJSON { + kind: "identifier".to_string(), + named: true, + subtypes: None, + children: None, + fields: None, + }) + ); + assert_eq!( + node_types.iter().find(|t| t.kind == "type_identifier"), + Some(&NodeInfoJSON { + kind: "type_identifier".to_string(), + named: true, + subtypes: None, + children: None, + fields: None, + }) + ); + } + + #[test] + fn test_node_types_with_multiple_valued_fields() { + let node_types = get_node_types(InputGrammar { + name: String::new(), + extra_symbols: Vec::new(), + external_tokens: Vec::new(), + expected_conflicts: Vec::new(), + variables_to_inline: Vec::new(), + word_token: None, + supertype_symbols: vec![], + variables: vec![ + Variable { + name: "a".to_string(), + kind: VariableType::Named, + rule: Rule::seq(vec![ + Rule::choice(vec![ + Rule::Blank, + Rule::repeat(Rule::field("f1".to_string(), Rule::named("b"))), + ]), + Rule::repeat(Rule::named("c")), + ]), + }, + Variable { + name: "b".to_string(), + kind: VariableType::Named, + rule: Rule::string("b"), + }, + Variable { + name: "c".to_string(), + kind: VariableType::Named, + rule: Rule::string("c"), + }, + ], + }); + + assert_eq!( + node_types[0], + NodeInfoJSON { + kind: "a".to_string(), + named: true, + subtypes: None, + children: Some(FieldInfoJSON { + multiple: true, + required: true, + types: vec![NodeTypeJSON { + kind: "c".to_string(), + named: true, + },] + }), + fields: Some( + vec![( + "f1".to_string(), + FieldInfoJSON { + multiple: true, + required: false, + types: vec![NodeTypeJSON { + kind: "b".to_string(), + named: true, + }] + } + )] + .into_iter() + .collect() + ), + } + ); + } + + #[test] + fn test_node_types_with_fields_on_hidden_tokens() { + let node_types = get_node_types(InputGrammar { + name: String::new(), + extra_symbols: Vec::new(), + external_tokens: Vec::new(), + expected_conflicts: Vec::new(), + variables_to_inline: Vec::new(), + word_token: None, + supertype_symbols: vec![], + variables: vec![Variable { + name: "script".to_string(), + kind: VariableType::Named, + rule: Rule::seq(vec![ + Rule::field("a".to_string(), Rule::pattern("hi")), + Rule::field("b".to_string(), Rule::pattern("bye")), + ]), + }], + }); + + assert_eq!( + node_types, + [NodeInfoJSON { + kind: "script".to_string(), + named: true, + fields: Some(BTreeMap::new()), + children: None, + subtypes: None + }] + ); + } + + #[test] + fn test_node_types_with_multiple_rules_same_alias_name() { + let node_types = get_node_types(InputGrammar { + name: String::new(), + extra_symbols: Vec::new(), + external_tokens: Vec::new(), + expected_conflicts: Vec::new(), + variables_to_inline: Vec::new(), + word_token: None, + supertype_symbols: vec![], + variables: vec![ + Variable { + name: "script".to_string(), + kind: VariableType::Named, + rule: Rule::choice(vec![ + Rule::named("a"), + // Rule `b` is aliased as rule `a` + Rule::alias(Rule::named("b"), "a".to_string(), true), + ]), + }, + Variable { + name: "a".to_string(), + kind: VariableType::Named, + rule: Rule::seq(vec![ + Rule::field("f1".to_string(), Rule::string("1")), + Rule::field("f2".to_string(), Rule::string("2")), + ]), + }, + Variable { + name: "b".to_string(), + kind: VariableType::Named, + rule: Rule::seq(vec![ + Rule::field("f2".to_string(), Rule::string("22")), + Rule::field("f2".to_string(), Rule::string("222")), + Rule::field("f3".to_string(), Rule::string("3")), + ]), + }, + ], + }); + + assert_eq!( + &node_types + .iter() + .map(|t| t.kind.as_str()) + .collect::>(), + &["a", "script", "1", "2", "22", "222", "3"] + ); + + assert_eq!( + &node_types[0..2], + &[ + // A combination of the types for `a` and `b`. + NodeInfoJSON { + kind: "a".to_string(), + named: true, + subtypes: None, + children: None, + fields: Some( + vec![ + ( + "f1".to_string(), + FieldInfoJSON { + multiple: false, + required: false, + types: vec![NodeTypeJSON { + kind: "1".to_string(), + named: false, + }] + } + ), + ( + "f2".to_string(), + FieldInfoJSON { + multiple: true, + required: true, + types: vec![ + NodeTypeJSON { + kind: "2".to_string(), + named: false, + }, + NodeTypeJSON { + kind: "22".to_string(), + named: false, + }, + NodeTypeJSON { + kind: "222".to_string(), + named: false, + } + ] + }, + ), + ( + "f3".to_string(), + FieldInfoJSON { + multiple: false, + required: false, + types: vec![NodeTypeJSON { + kind: "3".to_string(), + named: false, + }] + } + ), + ] + .into_iter() + .collect() + ), + }, + NodeInfoJSON { + kind: "script".to_string(), + named: true, + subtypes: None, + // Only one node + children: Some(FieldInfoJSON { + multiple: false, + required: true, + types: vec![NodeTypeJSON { + kind: "a".to_string(), + named: true, + }] + }), + fields: Some(BTreeMap::new()), + } + ] + ); + } + + #[test] + fn test_node_types_with_tokens_aliased_to_match_rules() { + let node_types = get_node_types(InputGrammar { + name: String::new(), + extra_symbols: Vec::new(), + external_tokens: Vec::new(), + expected_conflicts: Vec::new(), + variables_to_inline: Vec::new(), + word_token: None, + supertype_symbols: vec![], + variables: vec![ + Variable { + name: "a".to_string(), + kind: VariableType::Named, + rule: Rule::seq(vec![Rule::named("b"), Rule::named("c")]), + }, + // Ordinarily, `b` nodes have two named `c` children. + Variable { + name: "b".to_string(), + kind: VariableType::Named, + rule: Rule::seq(vec![Rule::named("c"), Rule::string("B"), Rule::named("c")]), + }, + Variable { + name: "c".to_string(), + kind: VariableType::Named, + rule: Rule::choice(vec![ + Rule::string("C"), + // This token is aliased as a `b`, which will produce a `b` node + // with no children. + Rule::alias(Rule::string("D"), "b".to_string(), true), + ]), + }, + ], + }); + + assert_eq!( + node_types.iter().map(|n| &n.kind).collect::>(), + &["a", "b", "c", "B", "C"] + ); + assert_eq!( + node_types[1], + NodeInfoJSON { + kind: "b".to_string(), + named: true, + subtypes: None, + children: Some(FieldInfoJSON { + multiple: true, + required: false, + types: vec![NodeTypeJSON { + kind: "c".to_string(), + named: true, + }] + }), + fields: Some(BTreeMap::new()), + } + ); + } + #[test] fn test_get_variable_info() { let variable_info = get_variable_info( @@ -948,7 +1568,7 @@ mod tests { vec![], ), &build_lexical_grammar(), - &InlinedProductionMap::default(), + &AliasMap::new(), ) .unwrap(); @@ -957,8 +1577,11 @@ mod tests { vec![( "field1".to_string(), FieldInfo { - required: true, - multiple: false, + quantity: ChildQuantity { + exists: true, + required: true, + multiple: false, + }, types: vec![ChildType::Normal(Symbol::terminal(1))], } )] @@ -971,8 +1594,11 @@ mod tests { vec![( "field2".to_string(), FieldInfo { - required: false, - multiple: false, + quantity: ChildQuantity { + exists: true, + required: false, + multiple: false, + }, types: vec![ ChildType::Normal(Symbol::terminal(2)), ChildType::Normal(Symbol::terminal(3)), @@ -984,6 +1610,71 @@ mod tests { ); } + #[test] + fn test_get_variable_info_with_repetitions_inside_fields() { + let variable_info = get_variable_info( + &build_syntax_grammar( + vec![ + // Field associated with a repetition. + SyntaxVariable { + name: "rule0".to_string(), + kind: VariableType::Named, + productions: vec![ + Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::non_terminal(1)) + .with_field_name("field1")], + }, + Production { + dynamic_precedence: 0, + steps: vec![], + }, + ], + }, + // Repetition node + SyntaxVariable { + name: "_rule0_repeat".to_string(), + kind: VariableType::Hidden, + productions: vec![ + Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::terminal(1))], + }, + Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::non_terminal(1)), + ProductionStep::new(Symbol::non_terminal(1)), + ], + }, + ], + }, + ], + vec![], + ), + &build_lexical_grammar(), + &AliasMap::new(), + ) + .unwrap(); + + assert_eq!( + variable_info[0].fields, + vec![( + "field1".to_string(), + FieldInfo { + quantity: ChildQuantity { + exists: true, + required: false, + multiple: true, + }, + types: vec![ChildType::Normal(Symbol::terminal(1))], + } + )] + .into_iter() + .collect::>() + ); + } + #[test] fn test_get_variable_info_with_inherited_fields() { let variable_info = get_variable_info( @@ -992,14 +1683,20 @@ mod tests { SyntaxVariable { name: "rule0".to_string(), kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(0)), - ProductionStep::new(Symbol::non_terminal(1)), - ProductionStep::new(Symbol::terminal(1)), - ], - }], + productions: vec![ + Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::non_terminal(1)), + ProductionStep::new(Symbol::terminal(1)), + ], + }, + Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::non_terminal(1))], + }, + ], }, // Hidden node with fields SyntaxVariable { @@ -1008,7 +1705,7 @@ mod tests { productions: vec![Production { dynamic_precedence: 0, steps: vec![ - ProductionStep::new(Symbol::terminal(2)), + ProductionStep::new(Symbol::terminal(2)).with_alias(".", false), ProductionStep::new(Symbol::terminal(3)).with_field_name("field1"), ], }], @@ -1017,7 +1714,7 @@ mod tests { vec![], ), &build_lexical_grammar(), - &InlinedProductionMap::default(), + &AliasMap::new(), ) .unwrap(); @@ -1026,14 +1723,32 @@ mod tests { vec![( "field1".to_string(), FieldInfo { - required: true, - multiple: false, + quantity: ChildQuantity { + exists: true, + required: true, + multiple: false, + }, types: vec![ChildType::Normal(Symbol::terminal(3))], } )] .into_iter() .collect::>() ); + + assert_eq!( + variable_info[0].children_without_fields, + FieldInfo { + quantity: ChildQuantity { + exists: true, + required: false, + multiple: true, + }, + types: vec![ + ChildType::Normal(Symbol::terminal(0)), + ChildType::Normal(Symbol::terminal(1)), + ], + } + ); } #[test] @@ -1073,7 +1788,7 @@ mod tests { vec![Symbol::non_terminal(1)], ), &build_lexical_grammar(), - &InlinedProductionMap::default(), + &AliasMap::new(), ) .unwrap(); @@ -1082,8 +1797,11 @@ mod tests { vec![( "field1".to_string(), FieldInfo { - required: true, - multiple: false, + quantity: ChildQuantity { + exists: true, + required: true, + multiple: false, + }, types: vec![ChildType::Normal(Symbol::non_terminal(1))], } )] @@ -1093,18 +1811,14 @@ mod tests { } fn get_node_types(grammar: InputGrammar) -> Vec { - let (syntax_grammar, lexical_grammar, _, simple_aliases) = + let (syntax_grammar, lexical_grammar, _, default_aliases) = prepare_grammar(&grammar).unwrap(); - let variable_info = get_variable_info( - &syntax_grammar, - &lexical_grammar, - &InlinedProductionMap::default(), - ) - .unwrap(); + let variable_info = + get_variable_info(&syntax_grammar, &lexical_grammar, &default_aliases).unwrap(); generate_node_types_json( &syntax_grammar, &lexical_grammar, - &simple_aliases, + &default_aliases, &variable_info, ) } diff --git a/cli/src/generate/parse_grammar.rs b/cli/src/generate/parse_grammar.rs index feb560a9..c01dbd99 100644 --- a/cli/src/generate/parse_grammar.rs +++ b/cli/src/generate/parse_grammar.rs @@ -87,7 +87,7 @@ pub(crate) fn parse_grammar(input: &str) -> Result { }) } - let extra_tokens = grammar_json + let extra_symbols = grammar_json .extras .unwrap_or(Vec::new()) .into_iter() @@ -107,7 +107,7 @@ pub(crate) fn parse_grammar(input: &str) -> Result { name: grammar_json.name, word_token: grammar_json.word, variables, - extra_tokens, + extra_symbols, expected_conflicts, external_tokens, supertype_symbols, diff --git a/cli/src/generate/prepare_grammar/expand_repeats.rs b/cli/src/generate/prepare_grammar/expand_repeats.rs index ccc83d97..0660f06e 100644 --- a/cli/src/generate/prepare_grammar/expand_repeats.rs +++ b/cli/src/generate/prepare_grammar/expand_repeats.rs @@ -283,7 +283,7 @@ mod tests { fn build_grammar(variables: Vec) -> ExtractedSyntaxGrammar { ExtractedSyntaxGrammar { variables, - extra_tokens: Vec::new(), + extra_symbols: Vec::new(), external_tokens: Vec::new(), expected_conflicts: Vec::new(), variables_to_inline: Vec::new(), diff --git a/cli/src/generate/prepare_grammar/extract_default_aliases.rs b/cli/src/generate/prepare_grammar/extract_default_aliases.rs new file mode 100644 index 00000000..3e08e3ad --- /dev/null +++ b/cli/src/generate/prepare_grammar/extract_default_aliases.rs @@ -0,0 +1,293 @@ +use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar}; +use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType}; + +#[derive(Clone, Default)] +struct SymbolStatus { + aliases: Vec<(Alias, usize)>, + appears_unaliased: bool, +} + +// Update the grammar by finding symbols that always are aliased, and for each such symbol, +// promoting one of its aliases to a "default alias", which is applied globally instead +// of in a context-specific way. +// +// This has two benefits: +// * It reduces the overhead of storing production-specific alias info in the parse table. +// * Within an `ERROR` node, no context-specific aliases will be applied. This transformation +// ensures that the children of an `ERROR` node have symbols that are consistent with the +// way that they would appear in a valid syntax tree. +pub(super) fn extract_default_aliases( + syntax_grammar: &mut SyntaxGrammar, + lexical_grammar: &LexicalGrammar, +) -> AliasMap { + let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()]; + let mut non_terminal_status_list = + vec![SymbolStatus::default(); syntax_grammar.variables.len()]; + let mut external_status_list = + vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()]; + + // For each grammar symbol, find all of the aliases under which the symbol appears, + // and determine whether or not the symbol ever appears *unaliased*. + for variable in syntax_grammar.variables.iter() { + for production in variable.productions.iter() { + for step in production.steps.iter() { + let mut status = match step.symbol.kind { + SymbolType::External => &mut external_status_list[step.symbol.index], + SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index], + SymbolType::Terminal => &mut terminal_status_list[step.symbol.index], + SymbolType::End => panic!("Unexpected end token"), + }; + + // Default aliases don't work for inlined variables. + if syntax_grammar.variables_to_inline.contains(&step.symbol) { + continue; + } + + if let Some(alias) = &step.alias { + if let Some(count_for_alias) = status + .aliases + .iter_mut() + .find_map(|(a, count)| if a == alias { Some(count) } else { None }) + { + *count_for_alias += 1; + } else { + status.aliases.push((alias.clone(), 1)); + } + } else { + status.appears_unaliased = true; + } + } + } + } + + let symbols_with_statuses = (terminal_status_list + .iter_mut() + .enumerate() + .map(|(i, status)| (Symbol::terminal(i), status))) + .chain( + non_terminal_status_list + .iter_mut() + .enumerate() + .map(|(i, status)| (Symbol::non_terminal(i), status)), + ) + .chain( + external_status_list + .iter_mut() + .enumerate() + .map(|(i, status)| (Symbol::external(i), status)), + ); + + // For each symbol that always appears aliased, find the alias the occurs most often, + // and designate that alias as the symbol's "default alias". Store all of these + // default aliases in a map that will be returned. + let mut result = AliasMap::new(); + for (symbol, status) in symbols_with_statuses { + if status.appears_unaliased { + status.aliases.clear(); + } else { + if let Some(default_entry) = status + .aliases + .iter() + .enumerate() + .max_by_key(|(i, (_, count))| (count, -(*i as i64))) + .map(|(_, entry)| entry.clone()) + { + status.aliases.clear(); + status.aliases.push(default_entry.clone()); + result.insert(symbol, default_entry.0); + } + } + } + + // Wherever a symbol is aliased as its default alias, remove the usage of the alias, + // because it will now be redundant. + let mut alias_positions_to_clear = Vec::new(); + for variable in syntax_grammar.variables.iter_mut() { + alias_positions_to_clear.clear(); + + for (i, production) in variable.productions.iter().enumerate() { + for (j, step) in production.steps.iter().enumerate() { + let status = match step.symbol.kind { + SymbolType::External => &mut external_status_list[step.symbol.index], + SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index], + SymbolType::Terminal => &mut terminal_status_list[step.symbol.index], + SymbolType::End => panic!("Unexpected end token"), + }; + + // If this step is aliased as the symbol's default alias, then remove that alias. + if step.alias.is_some() + && step.alias.as_ref() == status.aliases.get(0).map(|t| &t.0) + { + let mut other_productions_must_use_this_alias_at_this_index = false; + for (other_i, other_production) in variable.productions.iter().enumerate() { + if other_i != i + && other_production.steps.len() > j + && other_production.steps[j].alias == step.alias + && result.get(&other_production.steps[j].symbol) != step.alias.as_ref() + { + other_productions_must_use_this_alias_at_this_index = true; + break; + } + } + + if !other_productions_must_use_this_alias_at_this_index { + alias_positions_to_clear.push((i, j)); + } + } + } + } + + for (production_index, step_index) in &alias_positions_to_clear { + variable.productions[*production_index].steps[*step_index].alias = None; + } + } + + result +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::generate::grammars::{ + LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType, + }; + use crate::generate::nfa::Nfa; + + #[test] + fn test_extract_simple_aliases() { + let mut syntax_grammar = SyntaxGrammar { + variables: vec![ + SyntaxVariable { + name: "v1".to_owned(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true), + ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true), + ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true), + ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true), + ], + }], + }, + SyntaxVariable { + name: "v2".to_owned(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + // Token 0 is always aliased as "a1". + ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true), + // Token 1 is aliased within rule `v1` above, but not here. + ProductionStep::new(Symbol::terminal(1)), + // Token 2 is aliased differently here than in `v1`. The alias from + // `v1` should be promoted to the default alias, because `v1` appears + // first in the grammar. + ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true), + // Token 3 is also aliased differently here than in `v1`. In this case, + // this alias should be promoted to the default alias, because it is + // used a greater number of times (twice). + ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true), + ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true), + ], + }], + }, + ], + extra_symbols: Vec::new(), + expected_conflicts: Vec::new(), + variables_to_inline: Vec::new(), + supertype_symbols: Vec::new(), + external_tokens: Vec::new(), + word_token: None, + }; + + let lexical_grammar = LexicalGrammar { + nfa: Nfa::new(), + variables: vec![ + LexicalVariable { + name: "t0".to_string(), + kind: VariableType::Anonymous, + implicit_precedence: 0, + start_state: 0, + }, + LexicalVariable { + name: "t1".to_string(), + kind: VariableType::Anonymous, + implicit_precedence: 0, + start_state: 0, + }, + LexicalVariable { + name: "t2".to_string(), + kind: VariableType::Anonymous, + implicit_precedence: 0, + start_state: 0, + }, + LexicalVariable { + name: "t3".to_string(), + kind: VariableType::Anonymous, + implicit_precedence: 0, + start_state: 0, + }, + ], + }; + + let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar); + assert_eq!(default_aliases.len(), 3); + + assert_eq!( + default_aliases.get(&Symbol::terminal(0)), + Some(&Alias { + value: "a1".to_string(), + is_named: true, + }) + ); + assert_eq!( + default_aliases.get(&Symbol::terminal(2)), + Some(&Alias { + value: "a3".to_string(), + is_named: true, + }) + ); + assert_eq!( + default_aliases.get(&Symbol::terminal(3)), + Some(&Alias { + value: "a6".to_string(), + is_named: true, + }) + ); + assert_eq!(default_aliases.get(&Symbol::terminal(1)), None); + + assert_eq!( + syntax_grammar.variables, + vec![ + SyntaxVariable { + name: "v1".to_owned(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true), + ProductionStep::new(Symbol::terminal(2)), + ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true), + ], + },], + }, + SyntaxVariable { + name: "v2".to_owned(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::terminal(1)), + ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true), + ProductionStep::new(Symbol::terminal(3)), + ProductionStep::new(Symbol::terminal(3)), + ], + },], + }, + ] + ); + } +} diff --git a/cli/src/generate/prepare_grammar/extract_simple_aliases.rs b/cli/src/generate/prepare_grammar/extract_simple_aliases.rs deleted file mode 100644 index 9a0b7fbb..00000000 --- a/cli/src/generate/prepare_grammar/extract_simple_aliases.rs +++ /dev/null @@ -1,223 +0,0 @@ -use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar}; -use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType}; - -#[derive(Clone, Default)] -struct SymbolStatus { - alias: Option, - conflicting: bool, -} - -pub(super) fn extract_simple_aliases( - syntax_grammar: &mut SyntaxGrammar, - lexical_grammar: &LexicalGrammar, -) -> AliasMap { - // Determine which symbols in the grammars are *always* aliased to a single name. - let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()]; - let mut non_terminal_status_list = - vec![SymbolStatus::default(); syntax_grammar.variables.len()]; - let mut external_status_list = - vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()]; - for variable in syntax_grammar.variables.iter() { - for production in variable.productions.iter() { - for step in production.steps.iter() { - let mut status = match step.symbol { - Symbol { - kind: SymbolType::External, - index, - } => &mut external_status_list[index], - Symbol { - kind: SymbolType::NonTerminal, - index, - } => &mut non_terminal_status_list[index], - Symbol { - kind: SymbolType::Terminal, - index, - } => &mut terminal_status_list[index], - Symbol { - kind: SymbolType::End, - .. - } => panic!("Unexpected end token"), - }; - - if step.alias.is_none() { - status.alias = None; - status.conflicting = true; - } - - if !status.conflicting { - if status.alias.is_none() { - status.alias = step.alias.clone(); - } else if status.alias != step.alias { - status.alias = None; - status.conflicting = true; - } - } - } - } - } - - // Remove the aliases for those symbols. - for variable in syntax_grammar.variables.iter_mut() { - for production in variable.productions.iter_mut() { - for step in production.steps.iter_mut() { - let status = match step.symbol { - Symbol { - kind: SymbolType::External, - index, - } => &external_status_list[index], - Symbol { - kind: SymbolType::NonTerminal, - index, - } => &non_terminal_status_list[index], - Symbol { - kind: SymbolType::Terminal, - index, - } => &terminal_status_list[index], - Symbol { - kind: SymbolType::End, - .. - } => panic!("Unexpected end token"), - }; - - if status.alias.is_some() { - step.alias = None; - } - } - } - } - - // Populate a map of the symbols to their aliases. - let mut result = AliasMap::new(); - for (i, status) in terminal_status_list.into_iter().enumerate() { - if let Some(alias) = status.alias { - result.insert(Symbol::terminal(i), alias); - } - } - for (i, status) in non_terminal_status_list.into_iter().enumerate() { - if let Some(alias) = status.alias { - result.insert(Symbol::non_terminal(i), alias); - } - } - for (i, status) in external_status_list.into_iter().enumerate() { - if let Some(alias) = status.alias { - result.insert(Symbol::external(i), alias); - } - } - result -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::generate::grammars::{ - LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType, - }; - use crate::generate::nfa::Nfa; - - #[test] - fn test_extract_simple_aliases() { - let mut syntax_grammar = SyntaxGrammar { - variables: vec![ - SyntaxVariable { - name: "v1".to_owned(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true), - ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true), - ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true), - ], - }], - }, - SyntaxVariable { - name: "v2".to_owned(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - // Token 0 is always aliased as "a1". - ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true), - // Token 1 is aliased above, but not here. - ProductionStep::new(Symbol::terminal(1)), - // Token 2 is aliased differently than above. - ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true), - ], - }], - }, - ], - extra_tokens: Vec::new(), - expected_conflicts: Vec::new(), - variables_to_inline: Vec::new(), - supertype_symbols: Vec::new(), - external_tokens: Vec::new(), - word_token: None, - }; - - let lexical_grammar = LexicalGrammar { - nfa: Nfa::new(), - variables: vec![ - LexicalVariable { - name: "t1".to_string(), - kind: VariableType::Anonymous, - implicit_precedence: 0, - start_state: 0, - }, - LexicalVariable { - name: "t2".to_string(), - kind: VariableType::Anonymous, - implicit_precedence: 0, - start_state: 0, - }, - LexicalVariable { - name: "t3".to_string(), - kind: VariableType::Anonymous, - implicit_precedence: 0, - start_state: 0, - }, - ], - }; - - let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar); - assert_eq!(simple_aliases.len(), 1); - assert_eq!( - simple_aliases[&Symbol::terminal(0)], - Alias { - value: "a1".to_string(), - is_named: true, - } - ); - - assert_eq!( - syntax_grammar.variables, - vec![ - SyntaxVariable { - name: "v1".to_owned(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - // 'Simple' alias removed - ProductionStep::new(Symbol::terminal(0)), - // Other aliases unchanged - ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true), - ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true), - ], - },], - }, - SyntaxVariable { - name: "v2".to_owned(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(0)), - ProductionStep::new(Symbol::terminal(1)), - ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true), - ], - },], - }, - ] - ); - } -} diff --git a/cli/src/generate/prepare_grammar/extract_tokens.rs b/cli/src/generate/prepare_grammar/extract_tokens.rs index def35b97..ae6e7244 100644 --- a/cli/src/generate/prepare_grammar/extract_tokens.rs +++ b/cli/src/generate/prepare_grammar/extract_tokens.rs @@ -90,21 +90,13 @@ pub(super) fn extract_tokens( .collect(); let mut separators = Vec::new(); - let mut extra_tokens = Vec::new(); - for rule in grammar.extra_tokens { + let mut extra_symbols = Vec::new(); + for rule in grammar.extra_symbols { if let Rule::Symbol(symbol) = rule { - let new_symbol = symbol_replacer.replace_symbol(symbol); - if new_symbol.is_non_terminal() { - return Error::err(format!( - "Non-token symbol '{}' cannot be used as an extra token", - &variables[new_symbol.index].name - )); - } else { - extra_tokens.push(new_symbol); - } + extra_symbols.push(symbol_replacer.replace_symbol(symbol)); } else { if let Some(index) = lexical_variables.iter().position(|v| v.rule == rule) { - extra_tokens.push(Symbol::terminal(index)); + extra_symbols.push(Symbol::terminal(index)); } else { separators.push(rule); } @@ -158,7 +150,7 @@ pub(super) fn extract_tokens( ExtractedSyntaxGrammar { variables, expected_conflicts, - extra_tokens, + extra_symbols, variables_to_inline, supertype_symbols, external_tokens, @@ -415,15 +407,15 @@ mod test { } #[test] - fn test_extracting_extra_tokens() { + fn test_extracting_extra_symbols() { let mut grammar = build_grammar(vec![ Variable::named("rule_0", Rule::string("x")), Variable::named("comment", Rule::pattern("//.*")), ]); - grammar.extra_tokens = vec![Rule::string(" "), Rule::non_terminal(1)]; + grammar.extra_symbols = vec![Rule::string(" "), Rule::non_terminal(1)]; let (syntax_grammar, lexical_grammar) = extract_tokens(grammar).unwrap(); - assert_eq!(syntax_grammar.extra_tokens, vec![Symbol::terminal(1),]); + assert_eq!(syntax_grammar.extra_symbols, vec![Symbol::terminal(1),]); assert_eq!(lexical_grammar.separators, vec![Rule::string(" "),]); } @@ -472,28 +464,6 @@ mod test { ); } - #[test] - fn test_error_on_non_terminal_symbol_extras() { - let mut grammar = build_grammar(vec![ - Variable::named("rule_0", Rule::non_terminal(1)), - Variable::named("rule_1", Rule::non_terminal(2)), - Variable::named("rule_2", Rule::string("x")), - ]); - grammar.extra_tokens = vec![Rule::non_terminal(1)]; - - match extract_tokens(grammar) { - Err(e) => { - assert_eq!( - e.message(), - "Non-token symbol 'rule_1' cannot be used as an extra token" - ); - } - _ => { - panic!("Expected an error but got no error"); - } - } - } - #[test] fn test_error_on_external_with_same_name_as_non_terminal() { let mut grammar = build_grammar(vec![ @@ -522,7 +492,7 @@ mod test { fn build_grammar(variables: Vec) -> InternedGrammar { InternedGrammar { variables, - extra_tokens: Vec::new(), + extra_symbols: Vec::new(), external_tokens: Vec::new(), expected_conflicts: Vec::new(), variables_to_inline: Vec::new(), diff --git a/cli/src/generate/prepare_grammar/flatten_grammar.rs b/cli/src/generate/prepare_grammar/flatten_grammar.rs index e325776c..f2b43a04 100644 --- a/cli/src/generate/prepare_grammar/flatten_grammar.rs +++ b/cli/src/generate/prepare_grammar/flatten_grammar.rs @@ -199,7 +199,7 @@ unless they are used only as the grammar's start rule. } } Ok(SyntaxGrammar { - extra_tokens: grammar.extra_tokens, + extra_symbols: grammar.extra_symbols, expected_conflicts: grammar.expected_conflicts, variables_to_inline: grammar.variables_to_inline, external_tokens: grammar.external_tokens, diff --git a/cli/src/generate/prepare_grammar/intern_symbols.rs b/cli/src/generate/prepare_grammar/intern_symbols.rs index 4c0fc5c7..276f13ff 100644 --- a/cli/src/generate/prepare_grammar/intern_symbols.rs +++ b/cli/src/generate/prepare_grammar/intern_symbols.rs @@ -30,9 +30,9 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result external_tokens.push(Variable { name, kind, rule }); } - let mut extra_tokens = Vec::with_capacity(grammar.extra_tokens.len()); - for extra_token in grammar.extra_tokens.iter() { - extra_tokens.push(interner.intern_rule(extra_token)?); + let mut extra_symbols = Vec::with_capacity(grammar.extra_symbols.len()); + for extra_token in grammar.extra_symbols.iter() { + extra_symbols.push(interner.intern_rule(extra_token)?); } let mut supertype_symbols = Vec::with_capacity(grammar.supertype_symbols.len()); @@ -73,10 +73,16 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result ); } + for (i, variable) in variables.iter_mut().enumerate() { + if supertype_symbols.contains(&Symbol::non_terminal(i)) { + variable.kind = VariableType::Hidden; + } + } + Ok(InternedGrammar { variables, external_tokens, - extra_tokens, + extra_symbols, expected_conflicts, variables_to_inline, supertype_symbols, @@ -236,7 +242,7 @@ mod tests { InputGrammar { variables, name: "the_language".to_string(), - extra_tokens: Vec::new(), + extra_symbols: Vec::new(), external_tokens: Vec::new(), expected_conflicts: Vec::new(), variables_to_inline: Vec::new(), diff --git a/cli/src/generate/prepare_grammar/mod.rs b/cli/src/generate/prepare_grammar/mod.rs index a574aefb..8b094c56 100644 --- a/cli/src/generate/prepare_grammar/mod.rs +++ b/cli/src/generate/prepare_grammar/mod.rs @@ -1,6 +1,6 @@ mod expand_repeats; mod expand_tokens; -mod extract_simple_aliases; +mod extract_default_aliases; mod extract_tokens; mod flatten_grammar; mod intern_symbols; @@ -8,7 +8,7 @@ mod process_inlines; use self::expand_repeats::expand_repeats; pub(crate) use self::expand_tokens::expand_tokens; -use self::extract_simple_aliases::extract_simple_aliases; +use self::extract_default_aliases::extract_default_aliases; use self::extract_tokens::extract_tokens; use self::flatten_grammar::flatten_grammar; use self::intern_symbols::intern_symbols; @@ -21,7 +21,7 @@ use crate::generate::rules::{AliasMap, Rule, Symbol}; pub(crate) struct IntermediateGrammar { variables: Vec, - extra_tokens: Vec, + extra_symbols: Vec, expected_conflicts: Vec>, external_tokens: Vec, variables_to_inline: Vec, @@ -52,7 +52,7 @@ pub(crate) fn prepare_grammar( let syntax_grammar = expand_repeats(syntax_grammar); let mut syntax_grammar = flatten_grammar(syntax_grammar)?; let lexical_grammar = expand_tokens(lexical_grammar)?; - let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar); + let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar); let inlines = process_inlines(&syntax_grammar); - Ok((syntax_grammar, lexical_grammar, inlines, simple_aliases)) + Ok((syntax_grammar, lexical_grammar, inlines, default_aliases)) } diff --git a/cli/src/generate/prepare_grammar/process_inlines.rs b/cli/src/generate/prepare_grammar/process_inlines.rs index 68568419..f83658b2 100644 --- a/cli/src/generate/prepare_grammar/process_inlines.rs +++ b/cli/src/generate/prepare_grammar/process_inlines.rs @@ -127,6 +127,9 @@ impl InlinedProductionMapBuilder { last_inserted_step.associativity = removed_step.associativity; } } + if p.dynamic_precedence.abs() > production.dynamic_precedence.abs() { + production.dynamic_precedence = p.dynamic_precedence; + } production }), ); @@ -196,7 +199,7 @@ mod tests { fn test_basic_inlining() { let grammar = SyntaxGrammar { expected_conflicts: Vec::new(), - extra_tokens: Vec::new(), + extra_symbols: Vec::new(), external_tokens: Vec::new(), supertype_symbols: Vec::new(), word_token: None, @@ -226,7 +229,7 @@ mod tests { ], }, Production { - dynamic_precedence: 0, + dynamic_precedence: -2, steps: vec![ProductionStep::new(Symbol::terminal(14))], }, ], @@ -258,7 +261,7 @@ mod tests { ], }, Production { - dynamic_precedence: 0, + dynamic_precedence: -2, steps: vec![ ProductionStep::new(Symbol::terminal(10)), ProductionStep::new(Symbol::terminal(14)), @@ -327,7 +330,7 @@ mod tests { Symbol::non_terminal(3), ], expected_conflicts: Vec::new(), - extra_tokens: Vec::new(), + extra_symbols: Vec::new(), external_tokens: Vec::new(), supertype_symbols: Vec::new(), word_token: None, @@ -429,7 +432,7 @@ mod tests { }, ], expected_conflicts: Vec::new(), - extra_tokens: Vec::new(), + extra_symbols: Vec::new(), external_tokens: Vec::new(), supertype_symbols: Vec::new(), word_token: None, diff --git a/cli/src/generate/properties.rs b/cli/src/generate/properties.rs deleted file mode 100644 index 5091eafc..00000000 --- a/cli/src/generate/properties.rs +++ /dev/null @@ -1,1499 +0,0 @@ -use crate::error::{Error, Result}; -use crate::generate::dedup::split_state_id_groups; -use rsass; -use rsass::sass::Value; -use rsass::selectors::SelectorPart; -use serde_derive::Serialize; -use std::cmp::Ordering; -use std::collections::hash_map::Entry; -use std::collections::{btree_map, BTreeMap, HashMap, HashSet, VecDeque}; -use std::fmt::{self, Write}; -use std::hash::{Hash, Hasher}; -use std::mem; -use std::path::{Path, PathBuf}; -use tree_sitter::{self, PropertyStateJSON, PropertyTransitionJSON}; - -#[derive(Clone, Debug, PartialEq, Eq, Serialize)] -#[serde(untagged)] -pub(crate) enum PropertyValue { - Number(isize), - Boolean(bool), - String(String), - Object(PropertySet), - Array(Vec), -} - -type PropertySet = BTreeMap; -type PropertySheetJSON = tree_sitter::PropertySheetJSON; -type StateId = usize; -type PropertySetId = usize; - -#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] -struct SelectorStep { - kind: Option, - field: Option, - child_index: Option, - text_pattern: Option, - is_named: Option, - is_immediate: bool, -} - -#[derive(PartialEq, Eq, PartialOrd, Ord)] -struct Selector(Vec); - -#[derive(Debug, PartialEq, Eq)] -struct Rule { - selectors: Vec, - properties: PropertySet, -} - -#[derive(Clone, Copy, Debug)] -struct Item<'a> { - rule_id: u32, - selector: &'a Selector, - step_id: u32, -} - -#[derive(Clone, PartialEq, Eq)] -struct ItemSet<'a>(Vec>); - -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] -struct SelectorMatch { - specificity: u32, - rule_id: u32, -} - -struct Builder<'a> { - rules: &'a Vec, - output: PropertySheetJSON, - start_item_set: ItemSet<'a>, - token_names: &'a HashSet, - ids_by_item_set: HashMap, StateId>, - item_set_queue: VecDeque<(ItemSet<'a>, StateId)>, - item_set_list: Vec>, -} - -impl<'a> Item<'a> { - fn next_step(&self) -> Option<&SelectorStep> { - self.selector.0.get(self.step_id as usize) - } - - fn is_done(&self) -> bool { - self.step_id as usize == self.selector.0.len() - } -} - -impl<'a> Ord for Item<'a> { - fn cmp(&self, other: &Item) -> Ordering { - self.rule_id - .cmp(&other.rule_id) - .then_with(|| self.selector.0.len().cmp(&other.selector.0.len())) - .then_with(|| { - for (i, step) in self - .selector - .0 - .iter() - .enumerate() - .skip(self.step_id as usize) - { - let result = step.cmp(&other.selector.0[i]); - if result != Ordering::Equal { - return result; - } - } - Ordering::Equal - }) - } -} - -impl<'a> PartialOrd for Item<'a> { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl<'a> Hash for Item<'a> { - fn hash(&self, hasher: &mut H) { - hasher.write_u32(self.rule_id); - hasher.write_usize(self.selector.0.len()); - hasher.write_u32(self.step_id); - for step in &self.selector.0[self.step_id as usize..] { - step.hash(hasher); - } - } -} - -impl<'a> PartialEq for Item<'a> { - fn eq(&self, other: &Self) -> bool { - if self.rule_id != other.rule_id - || self.selector.0.len() != other.selector.0.len() - || self.step_id != other.step_id - { - return false; - } - - for (i, step) in self - .selector - .0 - .iter() - .enumerate() - .skip(self.step_id as usize) - { - if *step != other.selector.0[i] { - return false; - } - } - - true - } -} - -impl<'a> Eq for Item<'a> {} - -impl<'a> Hash for ItemSet<'a> { - fn hash(&self, hasher: &mut H) { - hasher.write_usize(self.0.len()); - for item in &self.0 { - item.hash(hasher); - } - } -} - -impl<'a> ItemSet<'a> { - fn new() -> Self { - ItemSet(Vec::new()) - } - - fn insert(&mut self, item: Item<'a>) { - match self.0.binary_search(&item) { - Err(i) => self.0.insert(i, item), - _ => {} - } - } -} - -impl<'a> Builder<'a> { - fn new(rules: &'a Vec, token_names: &'a HashSet) -> Self { - Builder { - rules, - start_item_set: ItemSet::new(), - item_set_list: Vec::new(), - output: PropertySheetJSON { - states: Vec::new(), - property_sets: Vec::new(), - }, - token_names, - ids_by_item_set: HashMap::new(), - item_set_queue: VecDeque::new(), - } - } - - fn build(mut self) -> PropertySheetJSON { - for (i, rule) in self.rules.iter().enumerate() { - for selector in &rule.selectors { - self.start_item_set.insert(Item { - rule_id: i as u32, - selector, - step_id: 0, - }); - } - } - - self.add_state(ItemSet::new()); - self.output.states[0].id = Some(0); - while let Some((item_set, state_id)) = self.item_set_queue.pop_front() { - self.populate_state(item_set, state_id); - } - - self.remove_duplicate_states(); - - for (i, state) in self.output.states.iter_mut().enumerate() { - state.id = Some(i); - } - - self.output - } - - fn add_state(&mut self, item_set: ItemSet<'a>) -> StateId { - match self.ids_by_item_set.entry(item_set) { - Entry::Occupied(o) => *o.get(), - Entry::Vacant(v) => { - let state_id = self.output.states.len(); - self.output.states.push(PropertyStateJSON { - id: None, - transitions: Vec::new(), - property_set_id: 0, - default_next_state_id: 0, - }); - self.item_set_queue.push_back((v.key().clone(), state_id)); - v.insert(state_id); - state_id - } - } - } - - fn add_property_set(&mut self, properties: PropertySet) -> PropertySetId { - if let Some(index) = self - .output - .property_sets - .iter() - .position(|i| *i == properties) - { - index - } else { - self.output.property_sets.push(properties); - self.output.property_sets.len() - 1 - } - } - - fn populate_state(&mut self, item_set: ItemSet<'a>, state_id: StateId) { - let is_start_state = state_id == 0; - let mut transitions: HashMap = HashMap::new(); - let mut selector_matches = Vec::new(); - - // First, compute all of the possible state transition conditions for - // this state, and all of the rules that are currently matching. - for item in item_set.0.iter().chain(self.start_item_set.0.iter()) { - // If this item has more elements remaining in its selector, then - // add a state transition based on the next step. - if let Some(step) = item.next_step() { - transitions - .entry(PropertyTransitionJSON { - kind: step.kind.clone(), - field: step.field.clone(), - named: step.is_named, - index: step.child_index, - text: step.text_pattern.clone(), - state_id: 0, - }) - .and_modify(|rule_id| { - if item.rule_id > *rule_id { - *rule_id = item.rule_id; - } - }) - .or_insert(item.rule_id); - } - // If the item has matched its entire selector, then the item's - // properties are applicable to this state. - else { - selector_matches.push(SelectorMatch { - rule_id: item.rule_id, - specificity: selector_specificity(item.selector), - }); - } - } - - // Compute the merged properties that apply in the current state. - // Sort the matching property sets by ascending specificity and by - // their order in the sheet. This way, more specific selectors and later - // rules will override less specific selectors and earlier rules. - let mut properties = PropertySet::new(); - selector_matches.sort_unstable_by(|a, b| { - (a.specificity.cmp(&b.specificity)).then_with(|| a.rule_id.cmp(&b.rule_id)) - }); - selector_matches.dedup(); - for selector_match in selector_matches { - let rule = &self.rules[selector_match.rule_id as usize]; - for (property, value) in &rule.properties { - properties.insert(property.clone(), value.clone()); - } - } - self.output.states[state_id].property_set_id = self.add_property_set(properties); - - // If there are multiple transitions that could *both* match (e.g. one based on a - // a node type and one based on a field name), then create an additional transition - // for the intersection of the two. - let mut i = 0; - let mut transition_list = transitions.into_iter().collect::>(); - while i < transition_list.len() { - for j in 0..i { - if let Some(intersection) = - self.intersect_transitions(&transition_list[j].0, &transition_list[i].0) - { - transition_list.push(( - intersection, - u32::max(transition_list[i].1, transition_list[j].1), - )); - } - } - i += 1; - } - - // Ensure that for a given node type, more specific transitions are tried - // first, and in the event of a tie, transitions corresponding to later rules - // in the cascade are tried first. Also, sort the non-intersecting transitions - // by name to guarantee a deterministic order. - transition_list.sort_by(|a, b| { - (transition_specificity(&b.0).cmp(&transition_specificity(&a.0))) - .then_with(|| b.1.cmp(&a.1)) - .then_with(|| a.0.kind.cmp(&b.0.kind)) - .then_with(|| a.0.named.cmp(&b.0.named)) - .then_with(|| a.0.field.cmp(&b.0.field)) - }); - - // For eacy possible state transition, compute the set of items in that transition's - // destination state. - i = 0; - while i < transition_list.len() { - let transition = &mut transition_list[i].0; - let transition_is_leaf = transition.named == Some(false) - || transition - .kind - .as_ref() - .map_or(false, |kind| self.token_names.contains(kind)); - - let mut next_item_set = ItemSet::new(); - let mut transition_differs_from_start_state = false; - for item in item_set.0.iter().chain(self.start_item_set.0.iter()) { - if let Some(next_step) = item.next_step() { - // If the next step of the item's selector satisfies this transition, - // advance the item to the next part of its selector and add the - // resulting item to this transition's destination state. - if step_matches_transition(next_step, transition) { - let next_item = Item { - rule_id: item.rule_id, - selector: item.selector, - step_id: item.step_id + 1, - }; - if !transition_is_leaf || next_item.is_done() { - next_item_set.insert(next_item); - if item.step_id > 0 { - transition_differs_from_start_state = true; - } - } - } - - // If the next step of the item is not an immediate child, then - // include this item in this transition's destination state, because - // the next step of the item might match a descendant node. - if !transition_is_leaf && !next_step.is_immediate && item.step_id > 0 { - next_item_set.insert(*item); - transition_differs_from_start_state = true; - } - } - } - - if (is_start_state || transition_differs_from_start_state) - && !next_item_set.0.is_empty() - { - transition.state_id = self.add_state(next_item_set); - if is_start_state || !self.output.states[0].transitions.contains(&transition) { - i += 1; - continue; - } - } - transition_list.remove(i); - } - - self.output.states[state_id] - .transitions - .extend(transition_list.into_iter().map(|i| i.0)); - - // Compute the default successor item set - the item set that - // we should advance to if the next element doesn't match any - // of the next elements in the item set's selectors. - let mut default_next_item_set = ItemSet::new(); - for item in &item_set.0 { - let next_step = item.selector.0.get(item.step_id as usize); - if let Some(step) = next_step { - if !step.is_immediate { - default_next_item_set.insert(*item); - } - } - } - self.output.states[state_id].default_next_state_id = self.add_state(default_next_item_set); - - self.item_set_list.push(item_set); - } - - fn intersect_transitions( - &self, - left: &PropertyTransitionJSON, - right: &PropertyTransitionJSON, - ) -> Option { - let mut left_contributes = false; - let mut right_contributes = false; - let mut result = left.clone(); - - if let Some(left_kind) = &left.kind { - if let Some(right_kind) = &right.kind { - if left_kind != right_kind || left.named != right.named { - return None; - } - } else { - left_contributes = true; - } - } else if let Some(right_kind) = &right.kind { - result.kind = Some(right_kind.clone()); - result.named = right.named; - right_contributes = true; - } - - if let Some(left_field) = &left.field { - if let Some(right_field) = &right.field { - if left_field != right_field { - return None; - } - } else { - left_contributes = true; - } - } else if let Some(right_field) = &right.field { - result.field = Some(right_field.clone()); - right_contributes = true; - } - - if let Some(left_text) = &left.text { - if let Some(right_text) = &right.text { - if left_text != right_text { - return None; - } - } else { - left_contributes = true; - } - } else if let Some(right_text) = &right.text { - result.text = Some(right_text.clone()); - right_contributes = true; - } - - if let Some(left_index) = &left.index { - if let Some(right_index) = &right.index { - if left_index != right_index { - return None; - } - } else { - left_contributes = true; - } - } else if let Some(right_index) = &right.index { - result.index = Some(right_index.clone()); - right_contributes = true; - } - - if left_contributes && right_contributes { - Some(result) - } else { - None - } - } - - fn remove_duplicate_states(&mut self) { - let mut state_ids_by_properties = HashMap::new(); - for (i, state) in self.output.states.iter().enumerate() { - state_ids_by_properties - .entry(state.property_set_id) - .or_insert(Vec::new()) - .push(i); - } - let mut state_ids_by_group_id = state_ids_by_properties - .into_iter() - .map(|e| e.1) - .collect::>(); - state_ids_by_group_id.sort(); - let start_group_index = state_ids_by_group_id - .iter() - .position(|g| g.contains(&0)) - .unwrap(); - state_ids_by_group_id.swap(start_group_index, 0); - - let mut group_ids_by_state_id = vec![0; self.output.states.len()]; - for (group_id, state_ids) in state_ids_by_group_id.iter().enumerate() { - for state_id in state_ids { - group_ids_by_state_id[*state_id] = group_id; - } - } - - while split_state_id_groups( - &self.output.states, - &mut state_ids_by_group_id, - &mut group_ids_by_state_id, - 0, - property_states_differ, - ) { - continue; - } - - let mut new_states = Vec::with_capacity(state_ids_by_group_id.len()); - for state_ids in state_ids_by_group_id.iter() { - let mut new_state = PropertyStateJSON::default(); - mem::swap(&mut new_state, &mut self.output.states[state_ids[0]]); - for transition in new_state.transitions.iter_mut() { - transition.state_id = group_ids_by_state_id[transition.state_id]; - } - new_state.default_next_state_id = - group_ids_by_state_id[new_state.default_next_state_id]; - new_states.push(new_state); - } - self.output.states = new_states; - } -} - -fn property_states_differ( - left: &PropertyStateJSON, - right: &PropertyStateJSON, - group_ids_by_state_id: &Vec, -) -> bool { - if group_ids_by_state_id[left.default_next_state_id] - != group_ids_by_state_id[right.default_next_state_id] - { - return true; - } - - left.transitions - .iter() - .zip(right.transitions.iter()) - .any(|(left, right)| { - left.kind != right.kind - || left.named != right.named - || left.index != right.index - || left.field != right.field - || left.text != right.text - || group_ids_by_state_id[left.state_id] != group_ids_by_state_id[right.state_id] - }) -} - -fn selector_specificity(selector: &Selector) -> u32 { - let mut result = 0; - for step in &selector.0 { - if step.kind.is_some() { - result += 1; - } - if step.field.is_some() { - result += 1; - } - if step.child_index.is_some() { - result += 1; - } - if step.text_pattern.is_some() { - result += 1; - } - } - result -} - -fn transition_specificity(transition: &PropertyTransitionJSON) -> u32 { - let mut result = 0; - if transition.kind.is_some() { - result += 1; - } - if transition.field.is_some() { - result += 1; - } - if transition.index.is_some() { - result += 1; - } - if transition.text.is_some() { - result += 1; - } - result -} - -fn step_matches_transition(step: &SelectorStep, transition: &PropertyTransitionJSON) -> bool { - step.kind - .as_ref() - .map_or(true, |kind| transition.kind.as_ref() == Some(kind)) - && step - .is_named - .map_or(true, |named| transition.named == Some(named)) - && step - .field - .as_ref() - .map_or(true, |field| transition.field.as_ref() == Some(field)) - && step - .child_index - .map_or(true, |index| transition.index == Some(index)) - && step - .text_pattern - .as_ref() - .map_or(true, |text| transition.text.as_ref() == Some(text)) -} - -impl fmt::Debug for SelectorStep { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - if self.is_immediate { - write!(f, "> ")?; - } - write!(f, "(")?; - if let Some(kind) = &self.kind { - if self.is_named.unwrap() { - write!(f, "{}", kind)?; - } else { - write!(f, "[token='{}']", kind)?; - } - } - if let Some(field) = &self.field { - write!(f, ".{}", field)?; - } - if let Some(n) = self.child_index { - write!(f, ":nth-child({})", n)?; - } - if let Some(t) = &self.text_pattern { - write!(f, "[text='{}']", t)?; - } - write!(f, ")")?; - Ok(()) - } -} - -impl fmt::Debug for Selector { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "[")?; - for (i, step) in self.0.iter().enumerate() { - if step.is_immediate { - write!(f, " > ")?; - } else if i > 0 { - write!(f, " ")?; - } - write!(f, "{:?}", step)?; - } - write!(f, " (specificity: {})]", selector_specificity(self))?; - Ok(()) - } -} - -pub(crate) fn generate_property_sheet( - path: impl AsRef, - css: &str, - token_names: &HashSet, -) -> Result { - let rules = parse_property_sheet(path.as_ref(), &css)?; - Ok(Builder::new(&rules, token_names).build()) -} - -fn parse_property_sheet(path: &Path, css: &str) -> Result> { - let mut schema_paths = Vec::new(); - let css = css.as_bytes(); - let mut items = rsass::parse_scss_data(css).map_err(|(pos, kind)| rsass::Error::ParseError { - file: path.to_string_lossy().into(), - pos: rsass::ErrPos::pos_of(pos, &css), - kind, - })?; - - process_at_rules(&mut items, &mut schema_paths, path)?; - let mut result = Vec::new(); - let selector_prefixes = vec![Vec::new()]; - parse_sass_items(items, &selector_prefixes, &mut result)?; - Ok(result) -} - -fn parse_sass_items( - items: Vec, - selector_prefixes: &Vec>, - result: &mut Vec, -) -> Result<()> { - let mut properties = PropertySet::new(); - for item in items { - match item { - rsass::Item::None | rsass::Item::Comment(_) => {} - rsass::Item::Property(name, value) => { - let value = parse_sass_value(&value)?; - match properties.entry(name.to_string()) { - btree_map::Entry::Vacant(v) => { - v.insert(value); - } - btree_map::Entry::Occupied(mut o) => { - let existing_value = o.get_mut(); - if let PropertyValue::Array(items) = existing_value { - items.push(value); - continue; - } else { - let v = existing_value.clone(); - *existing_value = PropertyValue::Array(vec![v, value]); - } - } - } - } - rsass::Item::Rule(selectors, items) => { - let mut full_selectors = Vec::new(); - for prefix in selector_prefixes { - for selector in &selectors.s { - let mut prefix = prefix.clone(); - let mut operator_was_immediate: Option = Some(false); - for part in &selector.0 { - match part { - SelectorPart::BackRef => { - operator_was_immediate = None; - } - SelectorPart::Simple(value) => { - if let Some(value) = value.single_raw() { - for (i, value) in value.split('.').enumerate() { - if value.is_empty() { - continue; - } - let value = value.to_string(); - check_node_kind(&value)?; - if i > 0 { - if let Some(immediate) = operator_was_immediate { - prefix.push(SelectorStep { - kind: None, - field: Some(value), - is_named: None, - child_index: None, - text_pattern: None, - is_immediate: immediate, - }) - } else { - prefix.last_mut().unwrap().field = Some(value); - } - } else { - if let Some(immediate) = operator_was_immediate { - prefix.push(SelectorStep { - kind: Some(value.to_string()), - field: None, - child_index: None, - text_pattern: None, - is_named: Some(true), - is_immediate: immediate, - }); - } else { - return Error::err(format!("Node type {} must be separated by whitespace or the `>` operator", value)); - } - } - operator_was_immediate = None; - } - } else { - return Err(interpolation_error()); - } - operator_was_immediate = None; - } - SelectorPart::Attribute { name, val, .. } => { - match name.single_raw() { - None => return Err(interpolation_error()), - Some("text") => { - if operator_was_immediate.is_some() { - return Error::err("The `text` attribute must be used in combination with a node type or field".to_string()); - } - if let Some(last_step) = prefix.last_mut() { - last_step.text_pattern = - Some(get_string_value(val.to_string())?) - } - } - Some("token") => { - if let Some(immediate) = operator_was_immediate { - prefix.push(SelectorStep { - kind: Some(get_string_value(val.to_string())?), - field: None, - is_named: Some(false), - child_index: None, - text_pattern: None, - is_immediate: immediate, - }); - operator_was_immediate = None; - } else { - return Error::err("The `token` attribute canot be used in combination with a node type".to_string()); - } - } - _ => { - return Error::err(format!( - "Unsupported attribute {}", - part - )); - } - } - } - SelectorPart::PseudoElement { .. } => { - return Error::err( - "Pseudo elements are not supported".to_string(), - ); - } - SelectorPart::Pseudo { name, arg } => match name.single_raw() { - None => return Err(interpolation_error()), - Some("nth-child") => { - if let Some(arg) = arg { - let mut arg_str = String::new(); - write!(&mut arg_str, "{}", arg).unwrap(); - if let Some(last_step) = prefix.last_mut() { - if let Ok(i) = usize::from_str_radix(&arg_str, 10) { - last_step.child_index = Some(i); - } else { - return Error::err(format!( - "Invalid child index {}", - arg - )); - } - } - } - } - _ => { - return Error::err(format!( - "Unsupported pseudo-class {}", - part - )); - } - }, - SelectorPart::Descendant => { - operator_was_immediate = Some(false); - } - SelectorPart::RelOp(operator) => { - let operator = *operator as char; - if operator == '>' { - operator_was_immediate = Some(true); - } else { - return Error::err(format!( - "Unsupported operator {}", - operator - )); - } - } - } - } - full_selectors.push(prefix); - } - } - parse_sass_items(items, &full_selectors, result)?; - } - _ => return Error::err(format!("Unsupported syntax type {:?}", item)), - } - } - - if !properties.is_empty() { - result.push(Rule { - selectors: selector_prefixes.iter().cloned().map(Selector).collect(), - properties, - }); - } - - Ok(()) -} - -fn process_at_rules( - items: &mut Vec, - schema_paths: &mut Vec, - path: &Path, -) -> Result<()> { - let mut i = 0; - while i < items.len() { - match &items[i] { - rsass::Item::Import(arg) => { - if let Some(s) = get_sass_string(arg) { - let import_path = resolve_path(path, s)?; - let mut imported_items = rsass::parse_scss_file(&import_path)?; - process_at_rules(&mut imported_items, schema_paths, &import_path)?; - items.splice(i..(i + 1), imported_items); - continue; - } else { - return Err(Error::new("@import arguments must be strings".to_string())); - } - } - rsass::Item::AtRule { name, args, .. } => match name.as_str() { - "schema" => { - if let Some(s) = get_sass_string(args) { - let schema_path = resolve_path(path, s)?; - schema_paths.push(schema_path); - items.remove(i); - continue; - } else { - return Error::err("@schema arguments must be strings".to_string()); - } - } - _ => return Error::err(format!("Unsupported at-rule '{}'", name)), - }, - _ => {} - } - i += 1; - } - Ok(()) -} - -fn parse_sass_value(value: &Value) -> Result { - match value { - Value::Literal(s) => { - if let Some(s) = s.single_raw() { - Ok(PropertyValue::String(s.to_string())) - } else { - Err(interpolation_error()) - } - } - Value::Call(name, raw_args) => { - if let Some(name) = name.single_raw() { - let mut args = Vec::new(); - for (_, arg) in raw_args.iter() { - args.push(parse_sass_value(arg)?); - } - let mut result = PropertySet::new(); - result.insert("name".to_string(), PropertyValue::String(name.to_string())); - result.insert("args".to_string(), PropertyValue::Array(args)); - Ok(PropertyValue::Object(result)) - } else { - Err(Error::new( - "String interpolation is not supported".to_string(), - )) - } - } - Value::List(elements, ..) => { - let mut result = Vec::new(); - for element in elements { - result.push(parse_sass_value(element)?); - } - Ok(PropertyValue::Array(result)) - } - Value::Color(_, Some(name)) => Ok(PropertyValue::String(name.clone())), - Value::Numeric(n, _) => Ok(PropertyValue::Number(n.to_integer())), - Value::True => Ok(PropertyValue::Boolean(true)), - Value::False => Ok(PropertyValue::Boolean(false)), - _ => Err(Error::new(format!( - "Property values must be strings or function calls. Got {:?}", - value - ))), - } -} - -fn get_sass_string(value: &Value) -> Option<&str> { - if let Value::Literal(s) = value { - s.single_raw() - } else { - None - } -} - -fn resolve_path(base: &Path, p: &str) -> Result { - let path = Path::new(p); - let mut base = base.to_owned(); - base.pop(); - if path.starts_with(".") { - base.push(path); - if base.exists() { - return Ok(base); - } - } else { - loop { - let mut result = base.clone(); - result.push("node_modules"); - result.push(path); - if result.exists() { - return Ok(result); - } - if !base.pop() { - break; - } - } - } - Err(Error::new(format!("Could not resolve import path `{}`", p))) -} - -fn check_node_kind(name: &String) -> Result<()> { - for c in name.chars() { - if !c.is_alphanumeric() && c != '_' { - return Err(Error::new(format!("Invalid identifier '{}'", name))); - } - } - Ok(()) -} - -fn get_string_value(mut s: String) -> Result { - if s.starts_with("'") && s.ends_with("'") || s.starts_with('"') && s.ends_with('"') { - s.pop(); - s.remove(0); - Ok(s) - } else { - Err(Error::new(format!("Unsupported string literal {}", s))) - } -} - -fn interpolation_error() -> Error { - Error::new("String interpolation is not supported".to_string()) -} - -#[cfg(test)] -mod tests { - use super::*; - use regex::Regex; - use std::fs; - use tempfile::TempDir; - - #[test] - fn test_property_sheet_with_immediate_child_and_descendant_selectors() { - let sheet = generate_property_sheet( - "foo.css", - " - f1 { - color: red; - - & > f2 { - color: green; - } - - & f3 { - color: blue; - } - } - - f2 { - color: indigo; - height: 2; - } - - f3 { - color: violet; - height: 3; - } - ", - &HashSet::new(), - ) - .unwrap(); - - // f1 single-element selector - assert_eq!( - *query_simple(&sheet, vec!["f1"]), - props(&[("color", string("red"))]) - ); - assert_eq!( - *query_simple(&sheet, vec!["f2", "f1"]), - props(&[("color", string("red"))]) - ); - assert_eq!( - *query_simple(&sheet, vec!["f2", "f3", "f1"]), - props(&[("color", string("red"))]) - ); - - // f2 single-element selector - assert_eq!( - *query_simple(&sheet, vec!["f2"]), - props(&[("color", string("indigo")), ("height", num(2))]) - ); - assert_eq!( - *query_simple(&sheet, vec!["f2", "f2"]), - props(&[("color", string("indigo")), ("height", num(2))]) - ); - assert_eq!( - *query_simple(&sheet, vec!["f1", "f3", "f2"]), - props(&[("color", string("indigo")), ("height", num(2))]) - ); - assert_eq!( - *query_simple(&sheet, vec!["f1", "f6", "f2"]), - props(&[("color", string("indigo")), ("height", num(2))]) - ); - - // f3 single-element selector - assert_eq!( - *query_simple(&sheet, vec!["f3"]), - props(&[("color", string("violet")), ("height", num(3))]) - ); - assert_eq!( - *query_simple(&sheet, vec!["f2", "f3"]), - props(&[("color", string("violet")), ("height", num(3))]) - ); - - // f2 child selector - assert_eq!( - *query_simple(&sheet, vec!["f1", "f2"]), - props(&[("color", string("green")), ("height", num(2))]) - ); - assert_eq!( - *query_simple(&sheet, vec!["f2", "f1", "f2"]), - props(&[("color", string("green")), ("height", num(2))]) - ); - assert_eq!( - *query_simple(&sheet, vec!["f3", "f1", "f2"]), - props(&[("color", string("green")), ("height", num(2))]) - ); - - // f3 descendant selector - assert_eq!( - *query_simple(&sheet, vec!["f1", "f3"]), - props(&[("color", string("blue")), ("height", num(3))]) - ); - assert_eq!( - *query_simple(&sheet, vec!["f1", "f2", "f3"]), - props(&[("color", string("blue")), ("height", num(3))]) - ); - assert_eq!( - *query_simple(&sheet, vec!["f1", "f6", "f7", "f8", "f3"]), - props(&[("color", string("blue")), ("height", num(3))]) - ); - - // no match - assert_eq!(*query_simple(&sheet, vec!["f1", "f3", "f4"]), props(&[])); - assert_eq!(*query_simple(&sheet, vec!["f1", "f2", "f5"]), props(&[])); - } - - #[test] - fn test_property_sheet_with_text_attribute() { - let sheet = generate_property_sheet( - "foo.css", - " - f1 { - color: red; - - &[text='^[A-Z]'] { - color: green; - } - - &[text='^[A-Z_]+$'] { - color: blue; - } - } - - f2[text='^[A-Z_]+$'] { - color: purple; - } - ", - &HashSet::new(), - ) - .unwrap(); - - assert_eq!( - *query(&sheet, vec![("f1", None, true, 0)], "abc"), - props(&[("color", string("red"))]) - ); - assert_eq!( - *query(&sheet, vec![("f1", None, true, 0)], "Abc"), - props(&[("color", string("green"))]) - ); - assert_eq!( - *query(&sheet, vec![("f1", None, true, 0)], "AB_CD"), - props(&[("color", string("blue"))]) - ); - assert_eq!( - *query(&sheet, vec![("f2", None, true, 0)], "Abc"), - props(&[]) - ); - assert_eq!( - *query(&sheet, vec![("f2", None, true, 0)], "ABC"), - props(&[("color", string("purple"))]) - ); - } - - #[test] - fn test_property_sheet_with_fields() { - let sheet = generate_property_sheet( - "foo.css", - " - a { - color: red; - &.x { - color: green; - b { - color: blue; - &.y { color: yellow; } - } - } - b { color: orange; } - b.y { color: indigo; } - } - .x { color: violet; } - ", - &HashSet::new(), - ) - .unwrap(); - - assert_eq!( - *query(&sheet, vec![("a", None, true, 0)], ""), - props(&[("color", string("red"))]) - ); - assert_eq!( - *query(&sheet, vec![("a", Some("x"), true, 0)], ""), - props(&[("color", string("green"))]) - ); - assert_eq!( - *query( - &sheet, - vec![("a", Some("x"), true, 0), ("b", None, true, 0)], - "" - ), - props(&[("color", string("blue"))]) - ); - assert_eq!( - *query( - &sheet, - vec![("a", Some("x"), true, 0), ("b", Some("y"), true, 0)], - "" - ), - props(&[("color", string("yellow"))]) - ); - assert_eq!( - *query(&sheet, vec![("b", Some("x"), true, 0)], ""), - props(&[("color", string("violet"))]) - ); - assert_eq!( - *query(&sheet, vec![("a", None, true, 0), ("b", None, true, 0)], ""), - props(&[("color", string("orange"))]) - ); - assert_eq!( - *query( - &sheet, - vec![("a", None, true, 0), ("b", Some("y"), true, 0)], - "" - ), - props(&[("color", string("indigo"))]) - ); - } - - #[test] - fn test_property_sheet_with_cascade_ordering_as_tie_breaker() { - let sheet = generate_property_sheet( - "foo.css", - " - f1 f2:nth-child(1) { color: red; } - f1:nth-child(1) f2 { color: green; } - f1 f2[text='a'] { color: blue; } - f1 f2[text='b'] { color: violet; } - ", - &HashSet::new(), - ) - .unwrap(); - - assert_eq!( - *query( - &sheet, - vec![("f1", None, true, 0), ("f2", None, true, 0)], - "x" - ), - props(&[]) - ); - assert_eq!( - *query( - &sheet, - vec![("f1", None, true, 0), ("f2", None, true, 1)], - "x" - ), - props(&[("color", string("red"))]) - ); - assert_eq!( - *query( - &sheet, - vec![("f1", None, true, 1), ("f2", None, true, 1)], - "x" - ), - props(&[("color", string("green"))]) - ); - assert_eq!( - *query( - &sheet, - vec![("f1", None, true, 1), ("f2", None, true, 1)], - "a" - ), - props(&[("color", string("blue"))]) - ); - assert_eq!( - *query( - &sheet, - vec![("f1", None, true, 1), ("f2", None, true, 1)], - "ab" - ), - props(&[("color", string("violet"))]) - ); - } - - #[test] - fn test_property_sheet_with_css_function_calls() { - let sheet = generate_property_sheet( - "foo.css", - " - a { - b: f(); - c: f(g(h), i, \"j\", 10); - } - ", - &HashSet::new(), - ) - .unwrap(); - - let p = query_simple(&sheet, vec!["a"]); - - assert_eq!( - p["b"], - object(&[("name", string("f")), ("args", array(vec![])),]) - ); - - assert_eq!( - p["c"], - object(&[ - ("name", string("f")), - ( - "args", - array(vec![ - object(&[("name", string("g")), ("args", array(vec![string("h"),]))]), - string("i"), - string("j"), - num(10), - ]) - ), - ]) - ); - - // Handle differently-formatted calls - let sheet2 = generate_property_sheet( - "foo.css", - " - a { - b: f(); - c: f( - g(h), - i, - \"j\", - 10 - ); - } - ", - &HashSet::new(), - ) - .unwrap(); - - assert_eq!( - query_simple(&sheet2, vec!["a"])["c"], - query_simple(&sheet, vec!["a"])["c"] - ); - } - - #[test] - fn test_property_sheet_with_array_by_declaring_property_multiple_times() { - let sheet = generate_property_sheet( - "foo.css", - " - a { - b: 'foo'; - b: 'bar'; - b: 'baz'; - c: f(g()); - c: h(); - } - ", - &HashSet::new(), - ) - .unwrap(); - - let p = query_simple(&sheet, vec!["a"]); - - assert_eq!( - p["b"], - array(vec![string("foo"), string("bar"), string("baz"),]) - ); - - assert_eq!( - p["c"], - array(vec![ - object(&[ - ("name", string("f")), - ( - "args", - array(vec![object(&[ - ("name", string("g")), - ("args", array(vec![])), - ])]) - ) - ]), - object(&[("name", string("h")), ("args", array(vec![])),]), - ]), - ); - } - - #[test] - fn test_property_sheet_with_imports() { - let repo_dir = TempDir::new().unwrap(); - let properties_dir = repo_dir.path().join("properties"); - let dependency_properties_dir = repo_dir - .path() - .join("node_modules") - .join("the-dependency") - .join("properties"); - fs::create_dir_all(&properties_dir).unwrap(); - fs::create_dir_all(&dependency_properties_dir).unwrap(); - let sheet_path1 = properties_dir.join("sheet1.css"); - let sheet_path2 = properties_dir.join("sheet2.css"); - let dependency_sheet_path1 = dependency_properties_dir.join("dependency-sheet1.css"); - let dependency_sheet_path2 = dependency_properties_dir.join("dependency-sheet2.css"); - - fs::write( - sheet_path2, - r#" - a { x: '1'; } - "#, - ) - .unwrap(); - fs::write( - dependency_sheet_path1, - r#" - @import "./dependency-sheet2.css"; - a { y: '2'; } - "#, - ) - .unwrap(); - fs::write( - dependency_sheet_path2, - r#" - b { x: '3'; } - "#, - ) - .unwrap(); - let sheet = generate_property_sheet( - sheet_path1, - r#" - @import "./sheet2.css"; - @import "the-dependency/properties/dependency-sheet1.css"; - b { y: '4'; } - "#, - &HashSet::new(), - ) - .unwrap(); - - let a = query_simple(&sheet, vec!["a"]); - assert_eq!(a["x"], string("1"),); - assert_eq!(a["y"], string("2"),); - let b = query_simple(&sheet, vec!["b"]); - assert_eq!(b["x"], string("3"),); - assert_eq!(b["y"], string("4"),); - } - - fn query_simple<'a>( - sheet: &'a PropertySheetJSON, - node_stack: Vec<&'static str>, - ) -> &'a PropertySet { - query( - sheet, - node_stack.into_iter().map(|s| (s, None, true, 0)).collect(), - "", - ) - } - - fn query<'a>( - sheet: &'a PropertySheetJSON, - node_stack: Vec<(&'static str, Option<&'static str>, bool, usize)>, - leaf_text: &str, - ) -> &'a PropertySet { - let mut state_id = 0; - for (kind, field, is_named, child_index) in node_stack { - let state = &sheet.states[state_id]; - state_id = state - .transitions - .iter() - .chain(sheet.states[0].transitions.iter()) - .find(|transition| { - transition.kind.as_ref().map_or(true, |k| k == kind) - && transition.named.map_or(true, |n| n == is_named) - && transition.field.as_ref().map_or(true, |f| field == Some(f)) - && transition.index.map_or(true, |index| index == child_index) - && (transition - .text - .as_ref() - .map_or(true, |text| Regex::new(text).unwrap().is_match(leaf_text))) - }) - .map_or(state.default_next_state_id, |t| t.state_id); - } - &sheet.property_sets[sheet.states[state_id].property_set_id] - } - - fn array(s: Vec) -> PropertyValue { - PropertyValue::Array(s) - } - - fn object<'a>(s: &'a [(&'a str, PropertyValue)]) -> PropertyValue { - PropertyValue::Object( - s.into_iter() - .map(|(a, b)| (a.to_string(), b.clone())) - .collect(), - ) - } - - fn string(s: &str) -> PropertyValue { - PropertyValue::String(s.to_string()) - } - - fn num(n: isize) -> PropertyValue { - PropertyValue::Number(n) - } - - fn props<'a>(s: &'a [(&'a str, PropertyValue)]) -> PropertySet { - s.into_iter() - .map(|(a, b)| (a.to_string(), b.clone())) - .collect() - } -} diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index 34d8f391..58d99cc4 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -2,11 +2,12 @@ use super::grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType use super::nfa::CharacterSet; use super::rules::{Alias, AliasMap, Symbol, SymbolType}; use super::tables::{ - AdvanceAction, FieldLocation, LexState, LexTable, ParseAction, ParseTable, ParseTableEntry, + AdvanceAction, FieldLocation, GotoAction, LexState, LexTable, ParseAction, ParseTable, + ParseTableEntry, }; use core::ops::Range; use std::cmp; -use std::collections::{BTreeMap, HashMap, HashSet}; +use std::collections::{HashMap, HashSet}; use std::fmt::Write; use std::mem::swap; @@ -14,6 +15,8 @@ use std::mem::swap; // stabilized, and the parser generation does not use it by default. const STABLE_LANGUAGE_VERSION: usize = tree_sitter::LANGUAGE_VERSION - 1; +const LARGE_CHARACTER_RANGE_COUNT: usize = 8; + macro_rules! add { ($this: tt, $($arg: tt)*) => {{ $this.buffer.write_fmt(format_args!($($arg)*)).unwrap(); @@ -62,15 +65,29 @@ struct Generator { keyword_capture_token: Option, syntax_grammar: SyntaxGrammar, lexical_grammar: LexicalGrammar, - simple_aliases: AliasMap, + default_aliases: AliasMap, symbol_order: HashMap, symbol_ids: HashMap, alias_ids: HashMap, - alias_map: BTreeMap>, + unique_aliases: Vec, + symbol_map: HashMap, field_names: Vec, next_abi: bool, } +struct TransitionSummary { + is_included: bool, + ranges: Vec>, + call_id: Option, +} + +struct LargeCharacterSetInfo { + ranges: Vec>, + symbol: Symbol, + index: usize, + usage_count: usize, +} + impl Generator { fn generate(mut self) -> String { self.init(); @@ -79,6 +96,7 @@ impl Generator { self.add_stats(); self.add_symbol_enum(); self.add_symbol_names_list(); + self.add_unique_symbol_map(); self.add_symbol_metadata_list(); if !self.field_names.is_empty() { @@ -91,14 +109,18 @@ impl Generator { self.add_alias_sequences(); } + if self.next_abi { + self.add_non_terminal_alias_map(); + } + let mut main_lex_table = LexTable::default(); swap(&mut main_lex_table, &mut self.main_lex_table); - self.add_lex_function("ts_lex", main_lex_table); + self.add_lex_function("ts_lex", main_lex_table, true); if self.keyword_capture_token.is_some() { let mut keyword_lex_table = LexTable::default(); swap(&mut keyword_lex_table, &mut self.keyword_lex_table); - self.add_lex_function("ts_lex_keywords", keyword_lex_table); + self.add_lex_function("ts_lex_keywords", keyword_lex_table, false); } self.add_lex_modes_list(); @@ -121,59 +143,105 @@ impl Generator { self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers); } - let mut field_names = Vec::new(); + self.symbol_map = self + .parse_table + .symbols + .iter() + .map(|symbol| { + let mut mapping = symbol; + + // There can be multiple symbols in the grammar that have the same name and kind, + // due to simple aliases. When that happens, ensure that they map to the same + // public-facing symbol. If one of the symbols is not aliased, choose that one + // to be the public-facing symbol. Otherwise, pick the symbol with the lowest + // numeric value. + if let Some(alias) = self.default_aliases.get(symbol) { + let kind = alias.kind(); + for other_symbol in &self.parse_table.symbols { + if let Some(other_alias) = self.default_aliases.get(other_symbol) { + if other_symbol < mapping && other_alias == alias { + mapping = other_symbol; + } + } else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) { + mapping = other_symbol; + break; + } + } + } + // Two anonymous tokens with different flags but the same string value + // should be represented with the same symbol in the public API. Examples: + // * "<" and token(prec(1, "<")) + // * "(" and token.immediate("(") + else if symbol.is_terminal() { + let metadata = self.metadata_for_symbol(*symbol); + for other_symbol in &self.parse_table.symbols { + let other_metadata = self.metadata_for_symbol(*other_symbol); + if other_metadata == metadata { + mapping = other_symbol; + break; + } + } + } + + (*symbol, *mapping) + }) + .collect(); + for production_info in &self.parse_table.production_infos { + // Build a list of all field names for field_name in production_info.field_map.keys() { - field_names.push(field_name); + if let Err(i) = self.field_names.binary_search(&field_name) { + self.field_names.insert(i, field_name.clone()); + } } for alias in &production_info.alias_sequence { + // Generate a mapping from aliases to C identifiers. if let Some(alias) = &alias { - let alias_kind = if alias.is_named { - VariableType::Named - } else { - VariableType::Anonymous - }; - let matching_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| { - let (name, kind) = self.metadata_for_symbol(*symbol); - name == alias.value && kind == alias_kind + let existing_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| { + if let Some(default_alias) = self.default_aliases.get(symbol) { + default_alias == alias + } else { + let (name, kind) = self.metadata_for_symbol(*symbol); + name == alias.value && kind == alias.kind() + } }); - let alias_id = if let Some(symbol) = matching_symbol { - self.symbol_ids[&symbol].clone() - } else if alias.is_named { - format!("alias_sym_{}", self.sanitize_identifier(&alias.value)) - } else { - format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value)) - }; + + // Some aliases match an existing symbol in the grammar. + let alias_id; + if let Some(existing_symbol) = existing_symbol { + alias_id = self.symbol_ids[&self.symbol_map[&existing_symbol]].clone(); + } + // Other aliases don't match any existing symbol, and need their own identifiers. + else { + if let Err(i) = self.unique_aliases.binary_search(alias) { + self.unique_aliases.insert(i, alias.clone()); + } + + alias_id = if alias.is_named { + format!("alias_sym_{}", self.sanitize_identifier(&alias.value)) + } else { + format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value)) + }; + } + self.alias_ids.entry(alias.clone()).or_insert(alias_id); - self.alias_map - .entry(alias.clone()) - .or_insert(matching_symbol); } } } - field_names.sort_unstable(); - field_names.dedup(); - self.field_names = field_names.into_iter().cloned().collect(); - - // If we are opting in to the new unstable language ABI, then use the concept of - // "small parse states". Otherwise, use the same representation for all parse - // states. - if self.next_abi { - let threshold = cmp::min(SMALL_STATE_THRESHOLD, self.parse_table.symbols.len() / 2); - self.large_state_count = self - .parse_table - .states - .iter() - .enumerate() - .take_while(|(i, s)| { - *i <= 1 || s.terminal_entries.len() + s.nonterminal_entries.len() > threshold - }) - .count(); - } else { - self.large_state_count = self.parse_table.states.len(); - } + // Determine which states should use the "small state" representation, and which should + // use the normal array representation. + let threshold = cmp::min(SMALL_STATE_THRESHOLD, self.parse_table.symbols.len() / 2); + self.large_state_count = self + .parse_table + .states + .iter() + .enumerate() + .take_while(|(i, s)| { + *i <= 1 || s.terminal_entries.len() + s.nonterminal_entries.len() > threshold + }) + .count(); } fn add_includes(&mut self) { @@ -239,21 +307,14 @@ impl Generator { "#define STATE_COUNT {}", self.parse_table.states.len() ); - - if self.next_abi { - add_line!(self, "#define LARGE_STATE_COUNT {}", self.large_state_count); - } + add_line!(self, "#define LARGE_STATE_COUNT {}", self.large_state_count); add_line!( self, "#define SYMBOL_COUNT {}", self.parse_table.symbols.len() ); - add_line!( - self, - "#define ALIAS_COUNT {}", - self.alias_map.iter().filter(|e| e.1.is_none()).count() - ); + add_line!(self, "#define ALIAS_COUNT {}", self.unique_aliases.len(),); add_line!(self, "#define TOKEN_COUNT {}", token_count); add_line!( self, @@ -281,11 +342,9 @@ impl Generator { i += 1; } } - for (alias, symbol) in &self.alias_map { - if symbol.is_none() { - add_line!(self, "{} = {},", self.alias_ids[&alias], i); - i += 1; - } + for alias in &self.unique_aliases { + add_line!(self, "{} = {},", self.alias_ids[&alias], i); + i += 1; } dedent!(self); add_line!(self, "}};"); @@ -297,28 +356,52 @@ impl Generator { indent!(self); for symbol in self.parse_table.symbols.iter() { let name = self.sanitize_string( - self.simple_aliases + self.default_aliases .get(symbol) .map(|alias| alias.value.as_str()) .unwrap_or(self.metadata_for_symbol(*symbol).0), ); add_line!(self, "[{}] = \"{}\",", self.symbol_ids[&symbol], name); } - for (alias, symbol) in &self.alias_map { - if symbol.is_none() { - add_line!( - self, - "[{}] = \"{}\",", - self.alias_ids[&alias], - self.sanitize_string(&alias.value) - ); - } + for alias in &self.unique_aliases { + add_line!( + self, + "[{}] = \"{}\",", + self.alias_ids[&alias], + self.sanitize_string(&alias.value) + ); } dedent!(self); add_line!(self, "}};"); add_line!(self, ""); } + fn add_unique_symbol_map(&mut self) { + add_line!(self, "static TSSymbol ts_symbol_map[] = {{"); + indent!(self); + for symbol in &self.parse_table.symbols { + add_line!( + self, + "[{}] = {},", + self.symbol_ids[symbol], + self.symbol_ids[&self.symbol_map[symbol]], + ); + } + + for alias in &self.unique_aliases { + add_line!( + self, + "[{}] = {},", + self.alias_ids[&alias], + self.alias_ids[&alias], + ); + } + + dedent!(self); + add_line!(self, "}};"); + add_line!(self, ""); + } + fn add_field_name_enum(&mut self) { add_line!(self, "enum {{"); indent!(self); @@ -356,7 +439,7 @@ impl Generator { for symbol in &self.parse_table.symbols { add_line!(self, "[{}] = {{", self.symbol_ids[&symbol]); indent!(self); - if let Some(Alias { is_named, .. }) = self.simple_aliases.get(symbol) { + if let Some(Alias { is_named, .. }) = self.default_aliases.get(symbol) { add_line!(self, ".visible = true,"); add_line!(self, ".named = {},", is_named); } else { @@ -372,6 +455,9 @@ impl Generator { VariableType::Hidden => { add_line!(self, ".visible = false,"); add_line!(self, ".named = true,"); + if self.syntax_grammar.supertype_symbols.contains(symbol) { + add_line!(self, ".supertype = true,"); + } } VariableType::Auxiliary => { add_line!(self, ".visible = false,"); @@ -382,15 +468,13 @@ impl Generator { dedent!(self); add_line!(self, "}},"); } - for (alias, matching_symbol) in &self.alias_map { - if matching_symbol.is_none() { - add_line!(self, "[{}] = {{", self.alias_ids[&alias]); - indent!(self); - add_line!(self, ".visible = true,"); - add_line!(self, ".named = {},", alias.is_named); - dedent!(self); - add_line!(self, "}},"); - } + for alias in &self.unique_aliases { + add_line!(self, "[{}] = {{", self.alias_ids[&alias]); + indent!(self); + add_line!(self, ".visible = true,"); + add_line!(self, ".named = {},", alias.is_named); + dedent!(self); + add_line!(self, "}},"); } dedent!(self); add_line!(self, "}};"); @@ -429,6 +513,53 @@ impl Generator { add_line!(self, ""); } + fn add_non_terminal_alias_map(&mut self) { + let mut alias_ids_by_symbol = HashMap::new(); + for variable in &self.syntax_grammar.variables { + for production in &variable.productions { + for step in &production.steps { + if let Some(alias) = &step.alias { + if step.symbol.is_non_terminal() + && Some(alias) != self.default_aliases.get(&step.symbol) + { + if self.symbol_ids.contains_key(&step.symbol) { + if let Some(alias_id) = self.alias_ids.get(&alias) { + let alias_ids = alias_ids_by_symbol + .entry(step.symbol) + .or_insert(Vec::new()); + if let Err(i) = alias_ids.binary_search(&alias_id) { + alias_ids.insert(i, alias_id); + } + } + } + } + } + } + } + } + + let mut alias_ids_by_symbol = alias_ids_by_symbol.iter().collect::>(); + alias_ids_by_symbol.sort_unstable_by_key(|e| e.0); + + add_line!(self, "static uint16_t ts_non_terminal_alias_map[] = {{"); + indent!(self); + for (symbol, alias_ids) in alias_ids_by_symbol { + let symbol_id = &self.symbol_ids[symbol]; + let public_symbol_id = &self.symbol_ids[&self.symbol_map[&symbol]]; + add_line!(self, "{}, {},", symbol_id, 1 + alias_ids.len()); + indent!(self); + add_line!(self, "{},", public_symbol_id); + for alias_id in alias_ids { + add_line!(self, "{},", alias_id); + } + dedent!(self); + } + add_line!(self, "0,"); + dedent!(self); + add_line!(self, "}};"); + add_line!(self, ""); + } + fn add_field_sequences(&mut self) { let mut flat_field_maps = vec![]; let mut next_flat_field_map_index = 0; @@ -462,7 +593,8 @@ impl Generator { add_line!( self, - "static const TSFieldMapSlice ts_field_map_slices[] = {{", + "static const TSFieldMapSlice ts_field_map_slices[{}] = {{", + self.parse_table.production_infos.len(), ); indent!(self); for (production_id, (row_id, length)) in field_map_ids.into_iter().enumerate() { @@ -504,21 +636,120 @@ impl Generator { add_line!(self, ""); } - fn add_lex_function(&mut self, name: &str, lex_table: LexTable) { + fn add_lex_function( + &mut self, + name: &str, + lex_table: LexTable, + extract_helper_functions: bool, + ) { + let mut ruled_out_chars = HashSet::new(); + let mut large_character_sets = Vec::::new(); + + // For each lex state, compute a summary of the code that needs to be + // generated. + let state_transition_summaries: Vec> = lex_table + .states + .iter() + .map(|state| { + ruled_out_chars.clear(); + + // For each state transition, compute the set of character ranges + // that need to be checked. + state + .advance_actions + .iter() + .map(|(chars, action)| { + let (chars, is_included) = match chars { + CharacterSet::Include(c) => (c, true), + CharacterSet::Exclude(c) => (c, false), + }; + let mut call_id = None; + let mut ranges = + CharacterSet::ranges(chars, &ruled_out_chars).collect::>(); + if is_included { + ruled_out_chars.extend(chars.iter().map(|c| *c as u32)); + } else { + ranges.insert(0, '\0'..'\0') + } + + // Record any large character sets so that they can be extracted + // into helper functions, reducing code duplication. + if extract_helper_functions && ranges.len() > LARGE_CHARACTER_RANGE_COUNT { + let char_set_symbol = self + .symbol_for_advance_action(action, &lex_table) + .expect("No symbol for lex state"); + let mut count_for_symbol = 0; + for (i, info) in large_character_sets.iter_mut().enumerate() { + if info.ranges == ranges { + call_id = Some(i); + info.usage_count += 1; + break; + } + if info.symbol == char_set_symbol { + count_for_symbol += 1; + } + } + if call_id.is_none() { + call_id = Some(large_character_sets.len()); + large_character_sets.push(LargeCharacterSetInfo { + symbol: char_set_symbol, + index: count_for_symbol + 1, + ranges: ranges.clone(), + usage_count: 1, + }); + } + } + + TransitionSummary { + is_included, + ranges, + call_id, + } + }) + .collect() + }) + .collect(); + + // Generate a helper function for each large character set. + let mut sorted_large_char_sets: Vec<_> = large_character_sets.iter().map(|e| e).collect(); + sorted_large_char_sets.sort_unstable_by_key(|info| (info.symbol, info.index)); + for info in sorted_large_char_sets { + if info.usage_count > 1 { + add_line!( + self, + "static inline bool {}_character_set_{}(int32_t lookahead) {{", + self.symbol_ids[&info.symbol], + info.index + ); + indent!(self); + add_line!(self, "return"); + indent!(self); + add_whitespace!(self); + self.add_character_range_conditions(&info.ranges, true, 0); + add!(self, ";\n"); + dedent!(self); + dedent!(self); + add_line!(self, "}}"); + add_line!(self, ""); + } + } + add_line!( self, "static bool {}(TSLexer *lexer, TSStateId state) {{", name ); indent!(self); - add_line!(self, "START_LEXER();"); - add_line!(self, "switch (state) {{"); - indent!(self); + add_line!(self, "START_LEXER();"); + add_line!(self, "eof = lexer->eof(lexer);"); + add_line!(self, "switch (state) {{"); + + indent!(self); for (i, state) in lex_table.states.into_iter().enumerate() { add_line!(self, "case {}:", i); indent!(self); - self.add_lex_state(state); + self.add_lex_state(state, &state_transition_summaries[i], &large_character_sets); dedent!(self); } @@ -529,85 +760,102 @@ impl Generator { dedent!(self); add_line!(self, "}}"); + dedent!(self); add_line!(self, "}}"); add_line!(self, ""); } - fn add_lex_state(&mut self, state: LexState) { + fn symbol_for_advance_action( + &self, + action: &AdvanceAction, + lex_table: &LexTable, + ) -> Option { + let mut state_ids = vec![action.state]; + let mut i = 0; + while i < state_ids.len() { + let id = state_ids[i]; + let state = &lex_table.states[id]; + if let Some(accept) = state.accept_action { + return Some(accept); + } + for (_, action) in &state.advance_actions { + if !state_ids.contains(&action.state) { + state_ids.push(action.state); + } + } + i += 1; + } + return None; + } + + fn add_lex_state( + &mut self, + state: LexState, + transition_info: &Vec, + large_character_sets: &Vec, + ) { if let Some(accept_action) = state.accept_action { add_line!(self, "ACCEPT_TOKEN({});", self.symbol_ids[&accept_action]); } - let mut ruled_out_characters = HashSet::new(); - for (characters, action) in state.advance_actions { - let previous_length = self.buffer.len(); + if let Some(eof_action) = state.eof_action { + add_line!(self, "if (eof) ADVANCE({});", eof_action.state); + } + for (i, (_, action)) in state.advance_actions.into_iter().enumerate() { + let transition = &transition_info[i]; add_whitespace!(self); - add!(self, "if ("); - if self.add_character_set_condition(&characters, &ruled_out_characters) { - add!(self, ") "); - self.add_advance_action(&action); - if let CharacterSet::Include(chars) = characters { - ruled_out_characters.extend(chars.iter().map(|c| *c as u32)); + + // If there is a helper function for this transition's character + // set, then generate a call to that helper function. + if let Some(call_id) = transition.call_id { + let info = &large_character_sets[call_id]; + if info.usage_count > 1 { + add!(self, "if ("); + if !transition.is_included { + add!(self, "!"); + } + add!( + self, + "{}_character_set_{}(lookahead)) ", + self.symbol_ids[&info.symbol], + info.index + ); + self.add_advance_action(&action); + add!(self, "\n"); + continue; } - } else { - self.buffer.truncate(previous_length); - self.add_advance_action(&action); } + + // Otherwise, generate code to compare the lookahead character + // with all of the character ranges. + if transition.ranges.len() > 0 { + add!(self, "if ("); + self.add_character_range_conditions(&transition.ranges, transition.is_included, 2); + add!(self, ") "); + } + self.add_advance_action(&action); add!(self, "\n"); } add_line!(self, "END_STATE();"); } - fn add_character_set_condition( - &mut self, - characters: &CharacterSet, - ruled_out_characters: &HashSet, - ) -> bool { - match characters { - CharacterSet::Include(chars) => { - let ranges = Self::get_ranges(chars, ruled_out_characters); - self.add_character_range_conditions(ranges, false) - } - CharacterSet::Exclude(chars) => { - let ranges = Some('\0'..'\0') - .into_iter() - .chain(Self::get_ranges(chars, ruled_out_characters)); - self.add_character_range_conditions(ranges, true) - } - } - } - fn add_character_range_conditions( &mut self, - ranges: impl Iterator>, - is_negated: bool, + ranges: &[Range], + is_included: bool, + indent_count: usize, ) -> bool { - let line_break = "\n "; + let mut line_break = "\n".to_string(); + for _ in 0..self.indent_level + indent_count { + line_break.push_str(" "); + } + let mut did_add = false; for range in ranges { - if is_negated { - if did_add { - add!(self, " &&{}", line_break); - } - if range.end == range.start { - add!(self, "lookahead != "); - self.add_character(range.start); - } else if range.end as u32 == range.start as u32 + 1 { - add!(self, "lookahead != "); - self.add_character(range.start); - add!(self, " &&{}lookahead != ", line_break); - self.add_character(range.end); - } else { - add!(self, "(lookahead < "); - self.add_character(range.start); - add!(self, " || "); - self.add_character(range.end); - add!(self, " < lookahead)"); - } - } else { + if is_included { if did_add { add!(self, " ||{}", line_break); } @@ -626,46 +874,31 @@ impl Generator { self.add_character(range.end); add!(self, ")"); } + } else { + if did_add { + add!(self, " &&{}", line_break); + } + if range.end == range.start { + add!(self, "lookahead != "); + self.add_character(range.start); + } else if range.end as u32 == range.start as u32 + 1 { + add!(self, "lookahead != "); + self.add_character(range.start); + add!(self, " &&{}lookahead != ", line_break); + self.add_character(range.end); + } else { + add!(self, "(lookahead < "); + self.add_character(range.start); + add!(self, " || "); + self.add_character(range.end); + add!(self, " < lookahead)"); + } } did_add = true; } did_add } - fn get_ranges<'a>( - chars: &'a Vec, - ruled_out_characters: &'a HashSet, - ) -> impl Iterator> + 'a { - let mut prev_range: Option> = None; - chars - .iter() - .map(|c| (*c, false)) - .chain(Some(('\0', true))) - .filter_map(move |(c, done)| { - if done { - return prev_range.clone(); - } - if ruled_out_characters.contains(&(c as u32)) { - return None; - } - if let Some(range) = prev_range.clone() { - let mut prev_range_successor = range.end as u32 + 1; - while prev_range_successor < c as u32 { - if !ruled_out_characters.contains(&prev_range_successor) { - prev_range = Some(c..c); - return Some(range); - } - prev_range_successor += 1; - } - prev_range = Some(range.start..c); - None - } else { - prev_range = Some(c..c); - None - } - }) - } - fn add_advance_action(&mut self, action: &AdvanceAction) { if action.in_main_token { add!(self, "ADVANCE({});", action.state); @@ -678,7 +911,12 @@ impl Generator { add_line!(self, "static TSLexMode ts_lex_modes[STATE_COUNT] = {{"); indent!(self); for (i, state) in self.parse_table.states.iter().enumerate() { - if state.external_lex_state_id > 0 { + if state.is_non_terminal_extra + && state.terminal_entries.len() == 1 + && *state.terminal_entries.iter().next().unwrap().0 == Symbol::end() + { + add_line!(self, "[{}] = {{(TSStateId)(-1)}},", i,); + } else if state.external_lex_state_id > 0 { add_line!( self, "[{}] = {{.lex_state = {}, .external_lex_state = {}}},", @@ -776,12 +1014,7 @@ impl Generator { add_line!( self, - "static uint16_t ts_parse_table[{}][SYMBOL_COUNT] = {{", - if self.next_abi { - "LARGE_STATE_COUNT" - } else { - "STATE_COUNT" - } + "static uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = {{", ); indent!(self); @@ -807,12 +1040,15 @@ impl Generator { terminal_entries.sort_unstable_by_key(|e| self.symbol_order.get(e.0)); nonterminal_entries.sort_unstable_by_key(|k| k.0); - for (symbol, state_id) in &nonterminal_entries { + for (symbol, action) in &nonterminal_entries { add_line!( self, "[{}] = STATE({}),", self.symbol_ids[symbol], - *state_id + match action { + GotoAction::Goto(state) => *state, + GotoAction::ShiftExtra => i, + } ); } @@ -865,9 +1101,15 @@ impl Generator { .or_default() .push(**symbol); } - for (symbol, state_id) in &state.nonterminal_entries { + for (symbol, action) in &state.nonterminal_entries { + let state_id = match action { + GotoAction::Goto(i) => *i, + GotoAction::ShiftExtra => { + self.large_state_count + small_state_indices.len() - 1 + } + }; symbols_by_value - .entry((*state_id, SymbolType::NonTerminal)) + .entry((state_id, SymbolType::NonTerminal)) .or_default() .push(*symbol); } @@ -931,7 +1173,7 @@ impl Generator { for (i, entry) in parse_table_entries { add!( self, - " [{}] = {{.count = {}, .reusable = {}}},", + " [{}] = {{.entry = {{.count = {}, .reusable = {}}}}},", i, entry.actions.len(), entry.reusable @@ -982,6 +1224,10 @@ impl Generator { let language_function_name = format!("tree_sitter_{}", self.language_name); let external_scanner_name = format!("{}_external_scanner", language_function_name); + add_line!(self, "#ifdef __cplusplus"); + add_line!(self, r#"extern "C" {{"#); + add_line!(self, "#endif"); + if !self.syntax_grammar.external_tokens.is_empty() { add_line!(self, "void *{}_create(void);", external_scanner_name); add_line!(self, "void {}_destroy(void *);", external_scanner_name); @@ -1020,31 +1266,12 @@ impl Generator { add_line!(self, ".symbol_count = SYMBOL_COUNT,"); add_line!(self, ".alias_count = ALIAS_COUNT,"); add_line!(self, ".token_count = TOKEN_COUNT,"); - - if self.next_abi { - add_line!(self, ".large_state_count = LARGE_STATE_COUNT,"); - } - + add_line!(self, ".external_token_count = EXTERNAL_TOKEN_COUNT,"); + add_line!(self, ".symbol_names = ts_symbol_names,"); add_line!(self, ".symbol_metadata = ts_symbol_metadata,"); - add_line!( - self, - ".parse_table = (const unsigned short *)ts_parse_table," - ); - - if self.large_state_count < self.parse_table.states.len() { - add_line!( - self, - ".small_parse_table = (const uint16_t *)ts_small_parse_table," - ); - add_line!( - self, - ".small_parse_table_map = (const uint32_t *)ts_small_parse_table_map," - ); - } - + add_line!(self, ".parse_table = (const uint16_t *)ts_parse_table,"); add_line!(self, ".parse_actions = ts_parse_actions,"); add_line!(self, ".lex_modes = ts_lex_modes,"); - add_line!(self, ".symbol_names = ts_symbol_names,"); if !self.parse_table.production_infos.is_empty() { add_line!( @@ -1052,27 +1279,12 @@ impl Generator { ".alias_sequences = (const TSSymbol *)ts_alias_sequences," ); } - - add_line!(self, ".field_count = FIELD_COUNT,"); - - if !self.field_names.is_empty() { - add_line!(self, ".field_names = ts_field_names,"); - add_line!( - self, - ".field_map_slices = (const TSFieldMapSlice *)ts_field_map_slices," - ); - add_line!( - self, - ".field_map_entries = (const TSFieldMapEntry *)ts_field_map_entries," - ); - } - add_line!( self, ".max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH," ); - add_line!(self, ".lex_fn = ts_lex,"); + add_line!(self, ".lex_fn = ts_lex,"); if let Some(keyword_capture_token) = self.keyword_capture_token { add_line!(self, ".keyword_lex_fn = ts_lex_keywords,"); add_line!( @@ -1082,8 +1294,6 @@ impl Generator { ); } - add_line!(self, ".external_token_count = EXTERNAL_TOKEN_COUNT,"); - if !self.syntax_grammar.external_tokens.is_empty() { add_line!(self, ".external_scanner = {{"); indent!(self); @@ -1097,12 +1307,47 @@ impl Generator { dedent!(self); add_line!(self, "}},"); } - dedent!(self); + add_line!(self, ".field_count = FIELD_COUNT,"); + if !self.field_names.is_empty() { + add_line!( + self, + ".field_map_slices = (const TSFieldMapSlice *)ts_field_map_slices," + ); + add_line!( + self, + ".field_map_entries = (const TSFieldMapEntry *)ts_field_map_entries," + ); + add_line!(self, ".field_names = ts_field_names,"); + } + + add_line!(self, ".large_state_count = LARGE_STATE_COUNT,"); + if self.large_state_count < self.parse_table.states.len() { + add_line!( + self, + ".small_parse_table = (const uint16_t *)ts_small_parse_table," + ); + add_line!( + self, + ".small_parse_table_map = (const uint32_t *)ts_small_parse_table_map," + ); + } + + add_line!(self, ".public_symbol_map = ts_symbol_map,"); + + if self.next_abi { + add_line!(self, ".alias_map = ts_non_terminal_alias_map,"); + add_line!(self, ".state_count = STATE_COUNT,"); + } + + dedent!(self); add_line!(self, "}};"); add_line!(self, "return &language;"); dedent!(self); add_line!(self, "}}"); + add_line!(self, "#ifdef __cplusplus"); + add_line!(self, "}}"); + add_line!(self, "#endif"); } fn get_parse_action_list_id( @@ -1255,10 +1500,12 @@ impl Generator { for c in name.chars() { match c { '\"' => result += "\\\"", + '?' => result += "\\?", '\\' => result += "\\\\", - '\t' => result += "\\t", + '\u{000c}' => result += "\\f", '\n' => result += "\\n", '\r' => result += "\\r", + '\t' => result += "\\t", _ => result.push(c), } } @@ -1266,18 +1513,20 @@ impl Generator { } fn add_character(&mut self, c: char) { - if c.is_ascii() { - match c { - '\0' => add!(self, "0"), - '\'' => add!(self, "'\\''"), - '\\' => add!(self, "'\\\\'"), - '\t' => add!(self, "'\\t'"), - '\n' => add!(self, "'\\n'"), - '\r' => add!(self, "'\\r'"), - _ => add!(self, "'{}'", c), + match c { + '\'' => add!(self, "'\\''"), + '\\' => add!(self, "'\\\\'"), + '\u{000c}' => add!(self, "'\\f'"), + '\n' => add!(self, "'\\n'"), + '\t' => add!(self, "'\\t'"), + '\r' => add!(self, "'\\r'"), + _ => { + if c == ' ' || c.is_ascii_graphic() { + add!(self, "'{}'", c) + } else { + add!(self, "{}", c as u32) + } } - } else { - add!(self, "{}", c as u32) } } } @@ -1294,7 +1543,7 @@ impl Generator { /// for keyword capture, if any. /// * `syntax_grammar` - The syntax grammar extracted from the language's grammar /// * `lexical_grammar` - The lexical grammar extracted from the language's grammar -/// * `simple_aliases` - A map describing the global rename rules that should apply. +/// * `default_aliases` - A map describing the global rename rules that should apply. /// the keys are symbols that are *always* aliased in the same way, and the values /// are the aliases that are applied to those symbols. /// * `next_abi` - A boolean indicating whether to opt into the new, unstable parse @@ -1307,7 +1556,7 @@ pub(crate) fn render_c_code( keyword_capture_token: Option, syntax_grammar: SyntaxGrammar, lexical_grammar: LexicalGrammar, - simple_aliases: AliasMap, + default_aliases: AliasMap, next_abi: bool, ) -> String { Generator { @@ -1321,59 +1570,14 @@ pub(crate) fn render_c_code( keyword_capture_token, syntax_grammar, lexical_grammar, - simple_aliases, + default_aliases, symbol_ids: HashMap::new(), symbol_order: HashMap::new(), alias_ids: HashMap::new(), - alias_map: BTreeMap::new(), + symbol_map: HashMap::new(), + unique_aliases: Vec::new(), field_names: Vec::new(), next_abi, } .generate() } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_get_char_ranges() { - struct Row { - chars: Vec, - ruled_out_chars: Vec, - expected_ranges: Vec>, - } - - let table = [ - Row { - chars: vec!['a'], - ruled_out_chars: vec![], - expected_ranges: vec!['a'..'a'], - }, - Row { - chars: vec!['a', 'b', 'c', 'e', 'z'], - ruled_out_chars: vec![], - expected_ranges: vec!['a'..'c', 'e'..'e', 'z'..'z'], - }, - Row { - chars: vec!['a', 'b', 'c', 'e', 'h', 'z'], - ruled_out_chars: vec!['d', 'f', 'g'], - expected_ranges: vec!['a'..'h', 'z'..'z'], - }, - ]; - - for Row { - chars, - ruled_out_chars, - expected_ranges, - } in table.iter() - { - let ruled_out_chars = ruled_out_chars - .into_iter() - .map(|c: &char| *c as u32) - .collect(); - let ranges = Generator::get_ranges(chars, &ruled_out_chars).collect::>(); - assert_eq!(ranges, *expected_ranges); - } - } -} diff --git a/cli/src/generate/rules.rs b/cli/src/generate/rules.rs index f3e39ebf..7676d61d 100644 --- a/cli/src/generate/rules.rs +++ b/cli/src/generate/rules.rs @@ -1,3 +1,4 @@ +use super::grammars::VariableType; use smallbitvec::SmallBitVec; use std::collections::HashMap; use std::iter::FromIterator; @@ -139,6 +140,16 @@ impl Rule { } } +impl Alias { + pub fn kind(&self) -> VariableType { + if self.is_named { + VariableType::Named + } else { + VariableType::Anonymous + } + } +} + #[cfg(test)] impl Rule { pub fn terminal(index: usize) -> Self { @@ -366,7 +377,7 @@ impl FromIterator for TokenSet { fn add_metadata(input: Rule, f: T) -> Rule { match input { - Rule::Metadata { rule, mut params } => { + Rule::Metadata { rule, mut params } if !params.is_token => { f(&mut params); Rule::Metadata { rule, params } } diff --git a/cli/src/generate/tables.rs b/cli/src/generate/tables.rs index fb593953..15b18a97 100644 --- a/cli/src/generate/tables.rs +++ b/cli/src/generate/tables.rs @@ -24,6 +24,12 @@ pub(crate) enum ParseAction { }, } +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub(crate) enum GotoAction { + Goto(ParseStateId), + ShiftExtra, +} + #[derive(Clone, Debug, PartialEq, Eq)] pub(crate) struct ParseTableEntry { pub actions: Vec, @@ -34,10 +40,11 @@ pub(crate) struct ParseTableEntry { pub(crate) struct ParseState { pub id: ParseStateId, pub terminal_entries: HashMap, - pub nonterminal_entries: HashMap, + pub nonterminal_entries: HashMap, pub lex_state_id: usize, pub external_lex_state_id: usize, pub core_id: usize, + pub is_non_terminal_extra: bool, } #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] @@ -70,6 +77,7 @@ pub(crate) struct AdvanceAction { #[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord)] pub(crate) struct LexState { pub accept_action: Option, + pub eof_action: Option, pub advance_actions: Vec<(CharacterSet, AdvanceAction)>, } @@ -103,7 +111,13 @@ impl ParseState { _ => None, }) }) - .chain(self.nonterminal_entries.iter().map(|(_, state)| *state)) + .chain(self.nonterminal_entries.iter().filter_map(|(_, action)| { + if let GotoAction::Goto(state) = action { + Some(*state) + } else { + None + } + })) } pub fn update_referenced_states(&mut self, mut f: F) @@ -121,15 +135,18 @@ impl ParseState { } } } - for (symbol, other_state) in &self.nonterminal_entries { - let result = f(*other_state, self); - if result != *other_state { - updates.push((*symbol, 0, result)); + for (symbol, action) in &self.nonterminal_entries { + if let GotoAction::Goto(other_state) = action { + let result = f(*other_state, self); + if result != *other_state { + updates.push((*symbol, 0, result)); + } } } for (symbol, action_index, new_state) in updates { if symbol.is_non_terminal() { - self.nonterminal_entries.insert(symbol, new_state); + self.nonterminal_entries + .insert(symbol, GotoAction::Goto(new_state)); } else { let entry = self.terminal_entries.get_mut(&symbol).unwrap(); if let ParseAction::Shift { is_repetition, .. } = entry.actions[action_index] { diff --git a/cli/src/highlight.rs b/cli/src/highlight.rs index dff8fd2c..330c9e57 100644 --- a/cli/src/highlight.rs +++ b/cli/src/highlight.rs @@ -1,26 +1,58 @@ +use super::util; use crate::error::Result; use crate::loader::Loader; -use ansi_term::{Color, Style}; +use ansi_term::Color; use lazy_static::lazy_static; use serde::ser::SerializeMap; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use serde_json::{json, Value}; use std::collections::HashMap; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::Arc; +use std::sync::atomic::AtomicUsize; use std::time::Instant; -use std::{fmt, fs, io, path, thread}; -use tree_sitter::{Language, PropertySheet}; -use tree_sitter_highlight::{highlight, highlight_html, Highlight, HighlightEvent, Properties}; +use std::{fs, io, path, str, usize}; +use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent, Highlighter, HtmlRenderer}; + +pub const HTML_HEADER: &'static str = " + + + Tree-sitter Highlighting + + + +"; + +pub const HTML_FOOTER: &'static str = " + +"; lazy_static! { static ref CSS_STYLES_BY_COLOR_ID: Vec = serde_json::from_str(include_str!("../vendor/xterm-colors.json")).unwrap(); } +#[derive(Debug, Default)] +pub struct Style { + pub ansi: ansi_term::Style, + pub css: Option, +} + +#[derive(Debug)] pub struct Theme { - ansi_styles: Vec>, - css_styles: Vec>, + pub styles: Vec - - -"; - -pub const HTML_FOOTER: &'static str = " - -"; - pub fn html( loader: &Loader, theme: &Theme, source: &[u8], - language: Language, - property_sheet: &PropertySheet, + config: &HighlightConfiguration, + quiet: bool, + print_time: bool, ) -> Result<()> { use std::io::Write; + let stdout = io::stdout(); let mut stdout = stdout.lock(); - write!(&mut stdout, "\n")?; + let time = Instant::now(); + let cancellation_flag = util::cancel_on_stdin(); + let mut highlighter = Highlighter::new(); - let cancellation_flag = cancel_on_stdin(); - let lines = highlight_html( - source, - language, - property_sheet, - Some(cancellation_flag.as_ref()), - |s| language_for_injection_string(loader, s), - |highlight| { - if let Some(css_style) = theme.css_style(highlight) { - css_style - } else { - "" - } - }, - ) - .map_err(|e| e.to_string())?; - for (i, line) in lines.into_iter().enumerate() { - write!( - &mut stdout, - "\n", - i + 1, - line - )?; + let events = highlighter.highlight(config, source, Some(&cancellation_flag), |string| { + loader.highlight_config_for_injection_string(string) + })?; + + let mut renderer = HtmlRenderer::new(); + renderer.render(events, source, &move |highlight| { + if let Some(css_style) = &theme.styles[highlight.0].css { + css_style.as_bytes() + } else { + "".as_bytes() + } + })?; + + if !quiet { + write!(&mut stdout, "
{}{}
\n")?; + for (i, line) in renderer.lines().enumerate() { + write!( + &mut stdout, + "\n", + i + 1, + line + )?; + } + + write!(&mut stdout, "
{}{}
\n")?; } - write!(&mut stdout, "\n")?; + + if print_time { + eprintln!("Time: {}ms", time.elapsed().as_millis()); + } + Ok(()) } - -fn language_for_injection_string<'a>( - loader: &'a Loader, - string: &str, -) -> Option<(Language, &'a PropertySheet)> { - match loader.language_configuration_for_injection_string(string) { - Err(e) => { - eprintln!( - "Failed to load language for injection string '{}': {}", - string, - e.message() - ); - None - } - Ok(None) => None, - Ok(Some((language, configuration))) => { - match configuration.highlight_property_sheet(language) { - Err(e) => { - eprintln!( - "Failed to load property sheet for injection string '{}': {}", - string, - e.message() - ); - None - } - Ok(None) => None, - Ok(Some(sheet)) => Some((language, sheet)), - } - } - } -} diff --git a/cli/src/lib.rs b/cli/src/lib.rs index 33a9904f..e00323b7 100644 --- a/cli/src/lib.rs +++ b/cli/src/lib.rs @@ -5,7 +5,11 @@ pub mod highlight; pub mod loader; pub mod logger; pub mod parse; +pub mod query; +pub mod query_testing; +pub mod tags; pub mod test; +pub mod test_highlight; pub mod util; pub mod wasm; pub mod web_ui; diff --git a/cli/src/loader.rs b/cli/src/loader.rs index 237718bb..3d5a9377 100644 --- a/cli/src/loader.rs +++ b/cli/src/loader.rs @@ -5,12 +5,15 @@ use regex::{Regex, RegexBuilder}; use serde_derive::Deserialize; use std::collections::HashMap; use std::io::BufReader; +use std::ops::Range; use std::path::{Path, PathBuf}; use std::process::Command; +use std::sync::Mutex; use std::time::SystemTime; use std::{fs, mem}; -use tree_sitter::{Language, PropertySheet}; -use tree_sitter_highlight::{load_property_sheet, Properties}; +use tree_sitter::{Language, QueryError}; +use tree_sitter_highlight::HighlightConfiguration; +use tree_sitter_tags::{Error as TagsError, TagsConfiguration}; #[cfg(unix)] const DYLIB_EXTENSION: &'static str = "so"; @@ -20,23 +23,31 @@ const DYLIB_EXTENSION: &'static str = "dll"; const BUILD_TARGET: &'static str = env!("BUILD_TARGET"); -#[derive(Default)] -pub struct LanguageConfiguration { +pub struct LanguageConfiguration<'a> { pub scope: Option, pub content_regex: Option, pub _first_line_regex: Option, pub injection_regex: Option, pub file_types: Vec, - pub highlight_property_sheet_path: Option, + pub root_path: PathBuf, + pub highlights_filenames: Option>, + pub injections_filenames: Option>, + pub locals_filenames: Option>, + pub tags_filenames: Option>, language_id: usize, - highlight_property_sheet: OnceCell>>, + highlight_config: OnceCell>, + tags_config: OnceCell>, + highlight_names: &'a Mutex>, + use_all_highlight_names: bool, } pub struct Loader { parser_lib_path: PathBuf, languages_by_id: Vec<(PathBuf, OnceCell)>, - language_configurations: Vec, + language_configurations: Vec>, language_configuration_ids_by_file_type: HashMap>, + highlight_names: Box>>, + use_all_highlight_names: bool, } unsafe impl Send for Loader {} @@ -49,9 +60,22 @@ impl Loader { languages_by_id: Vec::new(), language_configurations: Vec::new(), language_configuration_ids_by_file_type: HashMap::new(), + highlight_names: Box::new(Mutex::new(Vec::new())), + use_all_highlight_names: true, } } + pub fn configure_highlights(&mut self, names: &Vec) { + self.use_all_highlight_names = false; + let mut highlights = self.highlight_names.lock().unwrap(); + highlights.clear(); + highlights.extend(names.iter().cloned()); + } + + pub fn highlight_names(&self) -> Vec { + self.highlight_names.lock().unwrap().clone() + } + pub fn find_all_languages(&mut self, parser_src_paths: &Vec) -> Result<()> { for parser_container_dir in parser_src_paths.iter() { if let Ok(entries) = fs::read_dir(parser_container_dir) { @@ -134,11 +158,12 @@ impl Loader { if configuration_ids.len() == 1 { configuration = &self.language_configurations[configuration_ids[0]]; } - // If multiple language configurations match, then determine which // one to use by applying the configurations' content regexes. else { - let file_contents = fs::read_to_string(path)?; + let file_contents = fs::read(path) + .map_err(Error::wrap(|| format!("Failed to read path {:?}", path)))?; + let file_contents = String::from_utf8_lossy(&file_contents); let mut best_score = -2isize; let mut best_configuration_id = None; for configuration_id in configuration_ids { @@ -151,7 +176,6 @@ impl Loader { if let Some(mat) = content_regex.find(&file_contents) { score = (mat.end() - mat.start()) as isize; } - // If the content regex does not match, then *penalize* this // language configuration, so that language configurations // without content regexes are preferred over those with @@ -338,10 +362,63 @@ impl Loader { Ok(language) } - fn find_language_configurations_at_path<'a>( + pub fn highlight_config_for_injection_string<'a>( + &'a self, + string: &str, + ) -> Option<&'a HighlightConfiguration> { + match self.language_configuration_for_injection_string(string) { + Err(e) => { + eprintln!( + "Failed to load language for injection string '{}': {}", + string, + e.message() + ); + None + } + Ok(None) => None, + Ok(Some((language, configuration))) => match configuration.highlight_config(language) { + Err(e) => { + eprintln!( + "Failed to load property sheet for injection string '{}': {}", + string, + e.message() + ); + None + } + Ok(None) => None, + Ok(Some(config)) => Some(config), + }, + } + } + + pub fn find_language_configurations_at_path<'a>( &'a mut self, parser_path: &Path, ) -> Result<&[LanguageConfiguration]> { + #[derive(Deserialize)] + #[serde(untagged)] + enum PathsJSON { + Empty, + Single(String), + Multiple(Vec), + } + + impl Default for PathsJSON { + fn default() -> Self { + PathsJSON::Empty + } + } + + impl PathsJSON { + fn into_vec(self) -> Option> { + match self { + PathsJSON::Empty => None, + PathsJSON::Single(s) => Some(vec![s]), + PathsJSON::Multiple(s) => Some(s), + } + } + } + #[derive(Deserialize)] struct LanguageConfigurationJSON { #[serde(default)] @@ -355,7 +432,14 @@ impl Loader { first_line_regex: Option, #[serde(rename = "injection-regex")] injection_regex: Option, - highlights: Option, + #[serde(default)] + highlights: PathsJSON, + #[serde(default)] + injections: PathsJSON, + #[serde(default)] + locals: PathsJSON, + #[serde(default)] + tags: PathsJSON, } #[derive(Deserialize)] @@ -394,22 +478,21 @@ impl Loader { }); let configuration = LanguageConfiguration { + root_path: parser_path.to_path_buf(), scope: config_json.scope, language_id, file_types: config_json.file_types.unwrap_or(Vec::new()), - content_regex: config_json - .content_regex - .and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()), - _first_line_regex: config_json - .first_line_regex - .and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()), - injection_regex: config_json - .injection_regex - .and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()), - highlight_property_sheet_path: config_json - .highlights - .map(|h| parser_path.join(h)), - highlight_property_sheet: OnceCell::new(), + content_regex: Self::regex(config_json.content_regex), + _first_line_regex: Self::regex(config_json.first_line_regex), + injection_regex: Self::regex(config_json.injection_regex), + injections_filenames: config_json.injections.into_vec(), + locals_filenames: config_json.locals.into_vec(), + tags_filenames: config_json.tags.into_vec(), + highlights_filenames: config_json.highlights.into_vec(), + highlight_config: OnceCell::new(), + tags_config: OnceCell::new(), + highlight_names: &*self.highlight_names, + use_all_highlight_names: self.use_all_highlight_names, }; for file_type in &configuration.file_types { @@ -419,7 +502,8 @@ impl Loader { .push(self.language_configurations.len()); } - self.language_configurations.push(configuration); + self.language_configurations + .push(unsafe { mem::transmute(configuration) }); } } } @@ -427,52 +511,184 @@ impl Loader { if self.language_configurations.len() == initial_language_configuration_count && parser_path.join("src").join("grammar.json").exists() { - self.language_configurations.push(LanguageConfiguration { + let configuration = LanguageConfiguration { + root_path: parser_path.to_owned(), language_id: self.languages_by_id.len(), + file_types: Vec::new(), scope: None, content_regex: None, - injection_regex: None, - file_types: Vec::new(), _first_line_regex: None, - highlight_property_sheet_path: None, - highlight_property_sheet: OnceCell::new(), - }); + injection_regex: None, + injections_filenames: None, + locals_filenames: None, + highlights_filenames: None, + tags_filenames: None, + highlight_config: OnceCell::new(), + tags_config: OnceCell::new(), + highlight_names: &*self.highlight_names, + use_all_highlight_names: self.use_all_highlight_names, + }; + self.language_configurations + .push(unsafe { mem::transmute(configuration) }); self.languages_by_id .push((parser_path.to_owned(), OnceCell::new())); } Ok(&self.language_configurations[initial_language_configuration_count..]) } + + fn regex(pattern: Option) -> Option { + pattern.and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()) + } } -impl LanguageConfiguration { - pub fn highlight_property_sheet( - &self, - language: Language, - ) -> Result>> { - self.highlight_property_sheet +impl<'a> LanguageConfiguration<'a> { + pub fn highlight_config(&self, language: Language) -> Result> { + return self + .highlight_config .get_or_try_init(|| { - if let Some(path) = &self.highlight_property_sheet_path { - let sheet_json = fs::read_to_string(path).map_err(Error::wrap(|| { - format!( - "Failed to read property sheet {:?}", - path.file_name().unwrap() - ) - }))?; - let sheet = - load_property_sheet(language, &sheet_json).map_err(Error::wrap(|| { - format!( - "Failed to parse property sheet {:?}", - path.file_name().unwrap() - ) - }))?; - Ok(Some(sheet)) - } else { + let (highlights_query, highlight_ranges) = + self.read_queries(&self.highlights_filenames, "highlights.scm")?; + let (injections_query, injection_ranges) = + self.read_queries(&self.injections_filenames, "injections.scm")?; + let (locals_query, locals_ranges) = + self.read_queries(&self.locals_filenames, "locals.scm")?; + + if highlights_query.is_empty() { Ok(None) + } else { + let mut result = HighlightConfiguration::new( + language, + &highlights_query, + &injections_query, + &locals_query, + ) + .map_err(|error| { + if error.offset < injections_query.len() { + Self::include_path_in_query_error( + error, + &injection_ranges, + &injections_query, + 0, + ) + } else if error.offset < injections_query.len() + locals_query.len() { + Self::include_path_in_query_error( + error, + &locals_ranges, + &locals_query, + injections_query.len(), + ) + } else { + Self::include_path_in_query_error( + error, + &highlight_ranges, + &highlights_query, + injections_query.len() + locals_query.len(), + ) + } + })?; + let mut all_highlight_names = self.highlight_names.lock().unwrap(); + if self.use_all_highlight_names { + for capture_name in result.query.capture_names() { + if !all_highlight_names.contains(capture_name) { + all_highlight_names.push(capture_name.clone()); + } + } + } + result.configure(&all_highlight_names); + Ok(Some(result)) + } + }) + .map(Option::as_ref); + } + + pub fn tags_config(&self, language: Language) -> Result> { + self.tags_config + .get_or_try_init(|| { + let (tags_query, tags_ranges) = + self.read_queries(&self.tags_filenames, "tags.scm")?; + let (locals_query, locals_ranges) = + self.read_queries(&self.locals_filenames, "locals.scm")?; + if tags_query.is_empty() { + Ok(None) + } else { + TagsConfiguration::new(language, &tags_query, &locals_query) + .map(Some) + .map_err(|error| { + if let TagsError::Query(error) = error { + if error.offset < locals_query.len() { + Self::include_path_in_query_error( + error, + &locals_ranges, + &locals_query, + 0, + ) + } else { + Self::include_path_in_query_error( + error, + &tags_ranges, + &tags_query, + locals_query.len(), + ) + } + .into() + } else { + error.into() + } + }) } }) .map(Option::as_ref) } + + fn include_path_in_query_error<'b>( + mut error: QueryError, + ranges: &'b Vec<(String, Range)>, + source: &str, + start_offset: usize, + ) -> (&'b str, QueryError) { + let offset_within_section = error.offset - start_offset; + let (path, range) = ranges + .iter() + .find(|(_, range)| range.contains(&offset_within_section)) + .unwrap(); + error.offset = offset_within_section - range.start; + error.row = source[range.start..offset_within_section] + .chars() + .filter(|c| *c == '\n') + .count(); + (path.as_ref(), error) + } + + fn read_queries( + &self, + paths: &Option>, + default_path: &str, + ) -> Result<(String, Vec<(String, Range)>)> { + let mut query = String::new(); + let mut path_ranges = Vec::new(); + if let Some(paths) = paths.as_ref() { + for path in paths { + let abs_path = self.root_path.join(path); + let prev_query_len = query.len(); + query += &fs::read_to_string(&abs_path).map_err(Error::wrap(|| { + format!("Failed to read query file {:?}", path) + }))?; + path_ranges.push((path.clone(), prev_query_len..query.len())); + } + } else { + let queries_path = self.root_path.join("queries"); + let path = queries_path.join(default_path); + if path.exists() { + query = fs::read_to_string(&path).map_err(Error::wrap(|| { + format!("Failed to read query file {:?}", path) + }))?; + path_ranges.push((default_path.to_string(), 0..query.len())); + } + } + + Ok((query, path_ranges)) + } } fn needs_recompile( diff --git a/cli/src/main.rs b/cli/src/main.rs index 84b13da8..36ca5b77 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -1,10 +1,13 @@ use clap::{App, AppSettings, Arg, SubCommand}; use error::Error; +use glob::glob; use std::path::Path; use std::process::exit; use std::{env, fs, u64}; +use tree_sitter::Language; use tree_sitter_cli::{ - config, error, generate, highlight, loader, logger, parse, test, wasm, web_ui, + config, error, generate, highlight, loader, logger, parse, query, tags, test, test_highlight, + util, wasm, web_ui, }; const BUILD_VERSION: &'static str = env!("CARGO_PKG_VERSION"); @@ -38,8 +41,7 @@ fn run() -> error::Result<()> { .about("Generate a parser") .arg(Arg::with_name("grammar-path").index(1)) .arg(Arg::with_name("log").long("log")) - .arg(Arg::with_name("next-abi").long("next-abi")) - .arg(Arg::with_name("properties-only").long("properties")) + .arg(Arg::with_name("prev-abi").long("prev-abi")) .arg( Arg::with_name("report-states-for-rule") .long("report-states-for-rule") @@ -50,19 +52,20 @@ fn run() -> error::Result<()> { ) .subcommand( SubCommand::with_name("parse") - .about("Parse a file") + .about("Parse files") + .arg(Arg::with_name("paths-file").long("paths").takes_value(true)) .arg( - Arg::with_name("path") + Arg::with_name("paths") .index(1) .multiple(true) - .required(true), + .required(false), ) .arg(Arg::with_name("scope").long("scope").takes_value(true)) .arg(Arg::with_name("debug").long("debug").short("d")) .arg(Arg::with_name("debug-graph").long("debug-graph").short("D")) .arg(Arg::with_name("quiet").long("quiet").short("q")) + .arg(Arg::with_name("stat").long("stat").short("s")) .arg(Arg::with_name("time").long("time").short("t")) - .arg(Arg::with_name("allow-cancellation").long("cancel")) .arg(Arg::with_name("timeout").long("timeout").takes_value(true)) .arg( Arg::with_name("edits") @@ -73,6 +76,40 @@ fn run() -> error::Result<()> { .number_of_values(1), ), ) + .subcommand( + SubCommand::with_name("query") + .about("Search files using a syntax tree query") + .arg(Arg::with_name("query-path").index(1).required(true)) + .arg(Arg::with_name("paths-file").long("paths").takes_value(true)) + .arg( + Arg::with_name("paths") + .index(2) + .multiple(true) + .required(false), + ) + .arg( + Arg::with_name("byte-range") + .help("The range of byte offsets in which the query will be executed") + .long("byte-range") + .takes_value(true), + ) + .arg(Arg::with_name("scope").long("scope").takes_value(true)) + .arg(Arg::with_name("captures").long("captures").short("c")) + .arg(Arg::with_name("test").long("test")), + ) + .subcommand( + SubCommand::with_name("tags") + .arg(Arg::with_name("quiet").long("quiet").short("q")) + .arg(Arg::with_name("time").long("time").short("t")) + .arg(Arg::with_name("scope").long("scope").takes_value(true)) + .arg(Arg::with_name("paths-file").long("paths").takes_value(true)) + .arg( + Arg::with_name("paths") + .help("The source file to use") + .index(1) + .multiple(true), + ), + ) .subcommand( SubCommand::with_name("test") .about("Run a parser's tests") @@ -89,15 +126,17 @@ fn run() -> error::Result<()> { .subcommand( SubCommand::with_name("highlight") .about("Highlight a file") + .arg(Arg::with_name("paths-file").long("paths").takes_value(true)) .arg( - Arg::with_name("path") + Arg::with_name("paths") .index(1) .multiple(true) - .required(true), + .required(false), ) .arg(Arg::with_name("scope").long("scope").takes_value(true)) .arg(Arg::with_name("html").long("html").short("h")) - .arg(Arg::with_name("time").long("time").short("t")), + .arg(Arg::with_name("time").long("time").short("t")) + .arg(Arg::with_name("quiet").long("quiet").short("q")), ) .subcommand( SubCommand::with_name("build-wasm") @@ -110,7 +149,14 @@ fn run() -> error::Result<()> { .arg(Arg::with_name("path").index(1).multiple(true)), ) .subcommand( - SubCommand::with_name("web-ui").about("Test a parser interactively in the browser"), + SubCommand::with_name("web-ui") + .about("Test a parser interactively in the browser") + .arg( + Arg::with_name("quiet") + .long("quiet") + .short("q") + .help("open in default browser"), + ), ) .subcommand( SubCommand::with_name("dump-languages") @@ -128,7 +174,6 @@ fn run() -> error::Result<()> { config.save(&home_dir)?; } else if let Some(matches) = matches.subcommand_matches("generate") { let grammar_path = matches.value_of("grammar-path"); - let properties_only = matches.is_present("properties-only"); let report_symbol_name = matches.value_of("report-states-for-rule").or_else(|| { if matches.is_present("report-states") { Some("") @@ -139,24 +184,40 @@ fn run() -> error::Result<()> { if matches.is_present("log") { logger::init(); } - let next_abi = matches.is_present("next-abi"); + let prev_abi = matches.is_present("prev-abi"); generate::generate_parser_in_directory( ¤t_dir, grammar_path, - properties_only, - next_abi, + !prev_abi, report_symbol_name, )?; } else if let Some(matches) = matches.subcommand_matches("test") { let debug = matches.is_present("debug"); let debug_graph = matches.is_present("debug-graph"); - let filter = matches.value_of("filter"); let update = matches.is_present("update"); - let corpus_path = current_dir.join("corpus"); - if let Some(language) = loader.languages_at_path(¤t_dir)?.first() { - test::run_tests_at_path(*language, &corpus_path, debug, debug_graph, filter, update)?; - } else { - eprintln!("No language found"); + let filter = matches.value_of("filter"); + let languages = loader.languages_at_path(¤t_dir)?; + let language = languages + .first() + .ok_or_else(|| "No language found".to_string())?; + let test_dir = current_dir.join("test"); + + // Run the corpus tests. Look for them at two paths: `test/corpus` and `corpus`. + let mut test_corpus_dir = test_dir.join("corpus"); + if !test_corpus_dir.is_dir() { + test_corpus_dir = current_dir.join("corpus"); + } + if test_corpus_dir.is_dir() { + test::run_tests_at_path(*language, &test_corpus_dir, debug, debug_graph, filter, update)?; + } + + // Check that all of the queries are valid. + test::check_queries_at_path(*language, ¤t_dir.join("queries"))?; + + // Run the syntax highlighting tests. + let test_highlight_dir = test_dir.join("highlight"); + if test_highlight_dir.is_dir() { + test_highlight::test_highlights(&loader, &test_highlight_dir)?; } } else if let Some(matches) = matches.subcommand_matches("parse") { let debug = matches.is_present("debug"); @@ -166,56 +227,27 @@ fn run() -> error::Result<()> { let edits = matches .values_of("edits") .map_or(Vec::new(), |e| e.collect()); - let allow_cancellation = matches.is_present("allow-cancellation"); + let cancellation_flag = util::cancel_on_stdin(); + let timeout = matches .value_of("timeout") .map_or(0, |t| u64::from_str_radix(t, 10).unwrap()); - loader.find_all_languages(&config.parser_directories)?; - let paths = matches - .values_of("path") - .unwrap() - .into_iter() - .collect::>(); + + let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?; + let max_path_length = paths.iter().map(|p| p.chars().count()).max().unwrap(); let mut has_error = false; + loader.find_all_languages(&config.parser_directories)?; + + let should_track_stats = matches.is_present("stat"); + let mut stats = parse::Stats::default(); + for path in paths { - let path = Path::new(path); - let language = if let Some(scope) = matches.value_of("scope") { - if let Some(config) = - loader - .language_configuration_for_scope(scope) - .map_err(Error::wrap(|| { - format!("Failed to load language for scope '{}'", scope) - }))? - { - config.0 - } else { - return Error::err(format!("Unknown scope '{}'", scope)); - } - } else if let Some((lang, _)) = loader - .language_configuration_for_file_name(path) - .map_err(Error::wrap(|| { - format!( - "Failed to load language for file name {:?}", - path.file_name().unwrap() - ) - }))? - { - lang - } else if let Some(lang) = loader - .languages_at_path(¤t_dir) - .map_err(Error::wrap(|| { - "Failed to load language in current directory" - }))? - .first() - .cloned() - { - lang - } else { - eprintln!("No language found"); - return Ok(()); - }; - has_error |= parse::parse_file_at_path( + let path = Path::new(&path); + let language = + select_language(&mut loader, path, ¤t_dir, matches.value_of("scope"))?; + + let this_file_errored = parse::parse_file_at_path( language, path, &edits, @@ -225,36 +257,86 @@ fn run() -> error::Result<()> { timeout, debug, debug_graph, - allow_cancellation, + Some(&cancellation_flag), )?; + + if should_track_stats { + stats.total_parses += 1; + if !this_file_errored { + stats.successful_parses += 1; + } + } + + has_error |= this_file_errored; + } + + if should_track_stats { + println!("{}", stats) } if has_error { return Error::err(String::new()); } + } else if let Some(matches) = matches.subcommand_matches("query") { + let ordered_captures = matches.values_of("captures").is_some(); + let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?; + loader.find_all_languages(&config.parser_directories)?; + let language = select_language( + &mut loader, + Path::new(&paths[0]), + ¤t_dir, + matches.value_of("scope"), + )?; + let query_path = Path::new(matches.value_of("query-path").unwrap()); + let range = matches.value_of("byte-range").map(|br| { + let r: Vec<&str> = br.split(":").collect(); + (r[0].parse().unwrap(), r[1].parse().unwrap()) + }); + let should_test = matches.is_present("test"); + query::query_files_at_paths( + language, + paths, + query_path, + ordered_captures, + range, + should_test, + )?; + } else if let Some(matches) = matches.subcommand_matches("tags") { + loader.find_all_languages(&config.parser_directories)?; + let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?; + tags::generate_tags( + &loader, + matches.value_of("scope"), + &paths, + matches.is_present("quiet"), + matches.is_present("time"), + )?; } else if let Some(matches) = matches.subcommand_matches("highlight") { - let paths = matches.values_of("path").unwrap().into_iter(); - let html_mode = matches.is_present("html"); - let time = matches.is_present("time"); + loader.configure_highlights(&config.theme.highlight_names); loader.find_all_languages(&config.parser_directories)?; - if html_mode { + let time = matches.is_present("time"); + let quiet = matches.is_present("quiet"); + let html_mode = quiet || matches.is_present("html"); + let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?; + + if html_mode && !quiet { println!("{}", highlight::HTML_HEADER); } - let language_config; + let cancellation_flag = util::cancel_on_stdin(); + + let mut lang = None; if let Some(scope) = matches.value_of("scope") { - language_config = loader.language_configuration_for_scope(scope)?; - if language_config.is_none() { + lang = loader.language_configuration_for_scope(scope)?; + if lang.is_none() { return Error::err(format!("Unknown scope '{}'", scope)); } - } else { - language_config = None; } for path in paths { - let path = Path::new(path); - let (language, language_config) = match language_config { + let path = Path::new(&path); + let (language, language_config) = match lang { Some(v) => v, None => match loader.language_configuration_for_file_name(path)? { Some(v) => v, @@ -265,30 +347,56 @@ fn run() -> error::Result<()> { }, }; - if let Some(sheet) = language_config.highlight_property_sheet(language)? { + if let Some(highlight_config) = language_config.highlight_config(language)? { let source = fs::read(path)?; if html_mode { - highlight::html(&loader, &config.theme, &source, language, sheet)?; + highlight::html( + &loader, + &config.theme, + &source, + highlight_config, + quiet, + time, + )?; } else { - highlight::ansi(&loader, &config.theme, &source, language, sheet, time)?; + highlight::ansi( + &loader, + &config.theme, + &source, + highlight_config, + time, + Some(&cancellation_flag), + )?; } } else { - return Error::err(format!("No syntax highlighting property sheet specified")); + eprintln!("No syntax highlighting config found for path {:?}", path); } } + + if html_mode && !quiet { + println!("{}", highlight::HTML_FOOTER); + } } else if let Some(matches) = matches.subcommand_matches("build-wasm") { let grammar_path = current_dir.join(matches.value_of("path").unwrap_or("")); wasm::compile_language_to_wasm(&grammar_path, matches.is_present("docker"))?; - } else if matches.subcommand_matches("web-ui").is_some() { - web_ui::serve(¤t_dir); + } else if let Some(matches) = matches.subcommand_matches("web-ui") { + let open_in_browser = !matches.is_present("quiet"); + web_ui::serve(¤t_dir, open_in_browser); } else if matches.subcommand_matches("dump-languages").is_some() { loader.find_all_languages(&config.parser_directories)?; for (configuration, language_path) in loader.get_all_language_configurations() { println!( - "scope: {}\nparser: {:?}\nproperties: {:?}\nfile_types: {:?}\ncontent_regex: {:?}\ninjection_regex: {:?}\n", + concat!( + "scope: {}\n", + "parser: {:?}\n", + "highlights: {:?}\n", + "file_types: {:?}\n", + "content_regex: {:?}\n", + "injection_regex: {:?}\n", + ), configuration.scope.as_ref().unwrap_or(&String::new()), language_path, - configuration.highlight_property_sheet_path, + configuration.highlights_filenames, configuration.file_types, configuration.content_regex, configuration.injection_regex, @@ -298,3 +406,107 @@ fn run() -> error::Result<()> { Ok(()) } + +fn collect_paths<'a>( + paths_file: Option<&str>, + paths: Option>, +) -> error::Result> { + if let Some(paths_file) = paths_file { + return Ok(fs::read_to_string(paths_file) + .map_err(Error::wrap(|| { + format!("Failed to read paths file {}", paths_file) + }))? + .trim() + .split_ascii_whitespace() + .map(String::from) + .collect::>()); + } + + if let Some(paths) = paths { + let mut result = Vec::new(); + + let mut incorporate_path = |path: &str, positive| { + if positive { + result.push(path.to_string()); + } else { + if let Some(index) = result.iter().position(|p| p == path) { + result.remove(index); + } + } + }; + + for mut path in paths { + let mut positive = true; + if path.starts_with("!") { + positive = false; + path = path.trim_start_matches("!"); + } + + if Path::new(path).exists() { + incorporate_path(path, positive); + } else { + let paths = glob(path) + .map_err(Error::wrap(|| format!("Invalid glob pattern {:?}", path)))?; + for path in paths { + if let Some(path) = path?.to_str() { + incorporate_path(path, positive); + } + } + } + } + + if result.is_empty() { + Error::err( + "No files were found at or matched by the provided pathname/glob".to_string(), + )?; + } + + return Ok(result); + } + + Err(Error::new("Must provide one or more paths".to_string())) +} + +fn select_language( + loader: &mut loader::Loader, + path: &Path, + current_dir: &Path, + scope: Option<&str>, +) -> Result { + if let Some(scope) = scope { + if let Some(config) = + loader + .language_configuration_for_scope(scope) + .map_err(Error::wrap(|| { + format!("Failed to load language for scope '{}'", scope) + }))? + { + Ok(config.0) + } else { + return Error::err(format!("Unknown scope '{}'", scope)); + } + } else if let Some((lang, _)) = + loader + .language_configuration_for_file_name(path) + .map_err(Error::wrap(|| { + format!( + "Failed to load language for file name {:?}", + path.file_name().unwrap() + ) + }))? + { + Ok(lang) + } else if let Some(lang) = loader + .languages_at_path(¤t_dir) + .map_err(Error::wrap(|| { + "Failed to load language in current directory" + }))? + .first() + .cloned() + { + Ok(lang) + } else { + eprintln!("No language found"); + Error::err("No language found".to_string()) + } +} diff --git a/cli/src/parse.rs b/cli/src/parse.rs index d1ddb499..4d66df1d 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -2,9 +2,9 @@ use super::error::{Error, Result}; use super::util; use std::io::{self, Write}; use std::path::Path; -use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::atomic::AtomicUsize; use std::time::Instant; -use std::{fs, thread, usize}; +use std::{fmt, fs, usize}; use tree_sitter::{InputEdit, Language, LogType, Parser, Point, Tree}; #[derive(Debug)] @@ -14,6 +14,22 @@ pub struct Edit { pub inserted_text: Vec, } +#[derive(Debug, Default)] +pub struct Stats { + pub successful_parses: usize, + pub total_parses: usize, +} + +impl fmt::Display for Stats { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + return writeln!(f, "Total parses: {}; successful parses: {}; failed parses: {}; success percentage: {:.2}%", + self.total_parses, + self.successful_parses, + self.total_parses - self.successful_parses, + (self.successful_parses as f64) / (self.total_parses as f64) * 100.0); + } +} + pub fn parse_file_at_path( language: Language, path: &Path, @@ -24,7 +40,7 @@ pub fn parse_file_at_path( timeout: u64, debug: bool, debug_graph: bool, - allow_cancellation: bool, + cancellation_flag: Option<&AtomicUsize>, ) -> Result { let mut _log_session = None; let mut parser = Parser::new(); @@ -35,16 +51,7 @@ pub fn parse_file_at_path( // If the `--cancel` flag was passed, then cancel the parse // when the user types a newline. - if allow_cancellation { - let flag = Box::new(AtomicUsize::new(0)); - unsafe { parser.set_cancellation_flag(Some(&flag)) }; - thread::spawn(move || { - let mut line = String::new(); - io::stdin().read_line(&mut line).unwrap(); - eprintln!("Cancelling"); - flag.store(1, Ordering::Relaxed); - }); - } + unsafe { parser.set_cancellation_flag(cancellation_flag) }; // Set a timeout based on the `--time` flag. parser.set_timeout_micros(timeout); @@ -70,10 +77,18 @@ pub fn parse_file_at_path( let mut stdout = stdout.lock(); if let Some(mut tree) = tree { - for edit in edits { + if debug_graph && !edits.is_empty() { + println!("BEFORE:\n{}", String::from_utf8_lossy(&source_code)); + } + + for (i, edit) in edits.iter().enumerate() { let edit = parse_edit_flag(&source_code, edit)?; perform_edit(&mut tree, &mut source_code, &edit); tree = parser.parse(&source_code, Some(&tree)).unwrap(); + + if debug_graph { + println!("AFTER {}:\n{}", i, String::from_utf8_lossy(&source_code)); + } } let duration = time.elapsed(); diff --git a/cli/src/query.rs b/cli/src/query.rs new file mode 100644 index 00000000..485fdb82 --- /dev/null +++ b/cli/src/query.rs @@ -0,0 +1,100 @@ +use super::error::{Error, Result}; +use crate::query_testing; +use std::fs; +use std::io::{self, Write}; +use std::path::Path; +use tree_sitter::{Language, Node, Parser, Query, QueryCursor}; + +pub fn query_files_at_paths( + language: Language, + paths: Vec, + query_path: &Path, + ordered_captures: bool, + range: Option<(usize, usize)>, + should_test: bool, +) -> Result<()> { + let stdout = io::stdout(); + let mut stdout = stdout.lock(); + + let query_source = fs::read_to_string(query_path).map_err(Error::wrap(|| { + format!("Error reading query file {:?}", query_path) + }))?; + let query = Query::new(language, &query_source) + .map_err(|e| Error::new(format!("Query compilation failed: {:?}", e)))?; + + let mut query_cursor = QueryCursor::new(); + if let Some((beg, end)) = range { + query_cursor.set_byte_range(beg, end); + } + + let mut parser = Parser::new(); + parser.set_language(language).map_err(|e| e.to_string())?; + + for path in paths { + let mut results = Vec::new(); + + writeln!(&mut stdout, "{}", path)?; + + let source_code = fs::read(&path).map_err(Error::wrap(|| { + format!("Error reading source file {:?}", path) + }))?; + let text_callback = |n: Node| &source_code[n.byte_range()]; + let tree = parser.parse(&source_code, None).unwrap(); + + if ordered_captures { + for (mat, capture_index) in + query_cursor.captures(&query, tree.root_node(), text_callback) + { + let capture = mat.captures[capture_index]; + let capture_name = &query.capture_names()[capture.index as usize]; + writeln!( + &mut stdout, + " pattern: {}, capture: {}, row: {}, text: {:?}", + mat.pattern_index, + capture_name, + capture.node.start_position().row, + capture.node.utf8_text(&source_code).unwrap_or("") + )?; + results.push(query_testing::CaptureInfo { + name: capture_name.to_string(), + start: capture.node.start_position(), + end: capture.node.end_position(), + }); + } + } else { + for m in query_cursor.matches(&query, tree.root_node(), text_callback) { + writeln!(&mut stdout, " pattern: {}", m.pattern_index)?; + for capture in m.captures { + let start = capture.node.start_position(); + let end = capture.node.end_position(); + let capture_name = &query.capture_names()[capture.index as usize]; + if end.row == start.row { + writeln!( + &mut stdout, + " capture: {}, start: {}, text: {:?}", + capture_name, + start, + capture.node.utf8_text(&source_code).unwrap_or("") + )?; + } else { + writeln!( + &mut stdout, + " capture: {}, start: {}, end: {}", + capture_name, start, end, + )?; + } + results.push(query_testing::CaptureInfo { + name: capture_name.to_string(), + start: capture.node.start_position(), + end: capture.node.end_position(), + }); + } + } + } + if should_test { + query_testing::assert_expected_captures(results, path, &mut parser, language)? + } + } + + Ok(()) +} diff --git a/cli/src/query_testing.rs b/cli/src/query_testing.rs new file mode 100644 index 00000000..ef02ec69 --- /dev/null +++ b/cli/src/query_testing.rs @@ -0,0 +1,150 @@ +use crate::error; +use crate::error::Result; +use lazy_static::lazy_static; +use regex::Regex; +use std::fs; +use tree_sitter::{Language, Parser, Point}; + +lazy_static! { + static ref CAPTURE_NAME_REGEX: Regex = Regex::new("[\\w_\\-.]+").unwrap(); +} + +#[derive(Debug, Eq, PartialEq)] +pub struct CaptureInfo { + pub name: String, + pub start: Point, + pub end: Point, +} + +#[derive(Debug, PartialEq, Eq)] +pub struct Assertion { + pub position: Point, + pub expected_capture_name: String, +} + +/// Parse the given source code, finding all of the comments that contain +/// highlighting assertions. Return a vector of (position, expected highlight name) +/// pairs. +pub fn parse_position_comments( + parser: &mut Parser, + language: Language, + source: &[u8], +) -> Result> { + let mut result = Vec::new(); + let mut assertion_ranges = Vec::new(); + + // Parse the code. + parser.set_included_ranges(&[]).unwrap(); + parser.set_language(language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + + // Walk the tree, finding comment nodes that contain assertions. + let mut ascending = false; + let mut cursor = tree.root_node().walk(); + loop { + if ascending { + let node = cursor.node(); + + // Find every comment node. + if node.kind().contains("comment") { + if let Ok(text) = node.utf8_text(source) { + let mut position = node.start_position(); + if position.row == 0 { + continue; + } + + // Find the arrow character ("^" or '<-") in the comment. A left arrow + // refers to the column where the comment node starts. An up arrow refers + // to its own column. + let mut has_left_caret = false; + let mut has_arrow = false; + let mut arrow_end = 0; + for (i, c) in text.char_indices() { + arrow_end = i + 1; + if c == '-' && has_left_caret { + has_arrow = true; + break; + } + if c == '^' { + has_arrow = true; + position.column += i; + break; + } + has_left_caret = c == '<'; + } + + // If the comment node contains an arrow and a highlight name, record the + // highlight name and the position. + if let (true, Some(mat)) = + (has_arrow, CAPTURE_NAME_REGEX.find(&text[arrow_end..])) + { + assertion_ranges.push((node.start_position(), node.end_position())); + result.push(Assertion { + position: position, + expected_capture_name: mat.as_str().to_string(), + }); + } + } + } + + // Continue walking the tree. + if cursor.goto_next_sibling() { + ascending = false; + } else if !cursor.goto_parent() { + break; + } + } else if !cursor.goto_first_child() { + ascending = true; + } + } + + // Adjust the row number in each assertion's position to refer to the line of + // code *above* the assertion. There can be multiple lines of assertion comments, + // so the positions may have to be decremented by more than one row. + let mut i = 0; + for assertion in result.iter_mut() { + loop { + let on_assertion_line = assertion_ranges[i..] + .iter() + .any(|(start, _)| start.row == assertion.position.row); + if on_assertion_line { + assertion.position.row -= 1; + } else { + while i < assertion_ranges.len() + && assertion_ranges[i].0.row < assertion.position.row + { + i += 1; + } + break; + } + } + } + + // The assertions can end up out of order due to the line adjustments. + result.sort_unstable_by_key(|a| a.position); + + Ok(result) +} + +pub fn assert_expected_captures( + infos: Vec, + path: String, + parser: &mut Parser, + language: Language, +) -> Result<()> { + let contents = fs::read_to_string(path)?; + let pairs = parse_position_comments(parser, language, contents.as_bytes())?; + for info in &infos { + if let Some(found) = pairs.iter().find(|p| { + p.position.row == info.start.row && p.position >= info.start && p.position < info.end + }) { + if found.expected_capture_name != info.name && info.name != "name" { + Err(error::Error::new(format!( + "Assertion failed: at {}, found {}, expected {}", + info.start, found.expected_capture_name, info.name + )))? + } + } + } + Ok(()) +} diff --git a/cli/src/tags.rs b/cli/src/tags.rs new file mode 100644 index 00000000..802d8d06 --- /dev/null +++ b/cli/src/tags.rs @@ -0,0 +1,98 @@ +use super::loader::Loader; +use super::util; +use crate::error::{Error, Result}; +use std::io::{self, Write}; +use std::path::Path; +use std::time::Instant; +use std::{fs, str}; +use tree_sitter_tags::TagsContext; + +pub fn generate_tags( + loader: &Loader, + scope: Option<&str>, + paths: &[String], + quiet: bool, + time: bool, +) -> Result<()> { + let mut lang = None; + if let Some(scope) = scope { + lang = loader.language_configuration_for_scope(scope)?; + if lang.is_none() { + return Error::err(format!("Unknown scope '{}'", scope)); + } + } + + let mut context = TagsContext::new(); + let cancellation_flag = util::cancel_on_stdin(); + let stdout = io::stdout(); + let mut stdout = stdout.lock(); + + for path in paths { + let path = Path::new(&path); + let (language, language_config) = match lang { + Some(v) => v, + None => match loader.language_configuration_for_file_name(path)? { + Some(v) => v, + None => { + eprintln!("No language found for path {:?}", path); + continue; + } + }, + }; + + if let Some(tags_config) = language_config.tags_config(language)? { + let indent; + if paths.len() > 1 { + if !quiet { + writeln!(&mut stdout, "{}", path.to_string_lossy())?; + } + indent = "\t" + } else { + indent = ""; + }; + + let source = fs::read(path)?; + let t0 = Instant::now(); + for tag in context + .generate_tags(tags_config, &source, Some(&cancellation_flag))? + .0 + { + let tag = tag?; + if !quiet { + write!( + &mut stdout, + "{}{:<10}\t | {:<8}\t{} {} - {} `{}`", + indent, + str::from_utf8(&source[tag.name_range]).unwrap_or(""), + &tags_config.syntax_type_name(tag.syntax_type_id), + if tag.is_definition { "def" } else { "ref" }, + tag.span.start, + tag.span.end, + str::from_utf8(&source[tag.line_range]).unwrap_or(""), + )?; + if let Some(docs) = tag.docs { + if docs.len() > 120 { + write!(&mut stdout, "\t{:?}...", docs.get(0..120).unwrap_or(""))?; + } else { + write!(&mut stdout, "\t{:?}", &docs)?; + } + } + writeln!(&mut stdout, "")?; + } + } + + if time { + writeln!( + &mut stdout, + "{}time: {}ms", + indent, + t0.elapsed().as_millis(), + )?; + } + } else { + eprintln!("No tags config found for path {:?}", path); + } + } + + Ok(()) +} diff --git a/cli/src/test.rs b/cli/src/test.rs index 544ec249..50c27220 100644 --- a/cli/src/test.rs +++ b/cli/src/test.rs @@ -11,7 +11,7 @@ use std::fs; use std::io::{self, Write}; use std::path::{Path, PathBuf}; use std::str; -use tree_sitter::{Language, LogType, Parser}; +use tree_sitter::{Language, LogType, Parser, Query}; lazy_static! { static ref HEADER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^===+\r?\n([^=]*)\r?\n===+\r?\n") @@ -112,6 +112,24 @@ pub fn run_tests_at_path( } } +pub fn check_queries_at_path(language: Language, path: &Path) -> Result<()> { + if path.exists() { + for entry in fs::read_dir(path)? { + let entry = entry?; + let filepath = entry.file_name(); + let filepath = filepath.to_str().unwrap_or(""); + let hidden = filepath.starts_with("."); + if !hidden { + let content = fs::read_to_string(entry.path()).map_err(Error::wrap(|| { + format!("Error reading query file {:?}", entry.file_name()) + }))?; + Query::new(language, &content).map_err(|e| (filepath, e))?; + } + } + } + Ok(()) +} + pub fn print_diff_key() { println!( "\n{} / {}", diff --git a/cli/src/test_highlight.rs b/cli/src/test_highlight.rs new file mode 100644 index 00000000..df870bf6 --- /dev/null +++ b/cli/src/test_highlight.rs @@ -0,0 +1,275 @@ +use super::error::Result; +use crate::loader::Loader; +use crate::query_testing::{parse_position_comments, Assertion}; +use ansi_term::Colour; +use std::fs; +use std::path::Path; +use tree_sitter::Point; +use tree_sitter_highlight::{Highlight, HighlightConfiguration, HighlightEvent, Highlighter}; + +pub struct Failure { + row: usize, + column: usize, + expected_highlight: String, + actual_highlights: Vec, +} + +impl Failure { + pub fn message(&self) -> String { + let mut result = format!( + "Failure - row: {}, column: {}, expected highlight '{}', actual highlights: ", + self.row, self.column, self.expected_highlight + ); + if self.actual_highlights.is_empty() { + result += "none."; + } else { + for (i, actual_highlight) in self.actual_highlights.iter().enumerate() { + if i > 0 { + result += ", "; + } + result += "'"; + result += actual_highlight; + result += "'"; + } + } + result + } +} + +pub fn test_highlights(loader: &Loader, directory: &Path) -> Result<()> { + let mut failed = false; + let mut highlighter = Highlighter::new(); + + println!("syntax highlighting:"); + for highlight_test_file in fs::read_dir(directory)? { + let highlight_test_file = highlight_test_file?; + let test_file_path = highlight_test_file.path(); + let test_file_name = highlight_test_file.file_name(); + let (language, language_config) = loader + .language_configuration_for_file_name(&test_file_path)? + .ok_or_else(|| format!("No language found for path {:?}", test_file_path))?; + let highlight_config = language_config + .highlight_config(language)? + .ok_or_else(|| format!("No highlighting config found for {:?}", test_file_path))?; + match test_highlight( + &loader, + &mut highlighter, + highlight_config, + fs::read(&test_file_path)?.as_slice(), + ) { + Ok(assertion_count) => { + println!( + " āœ“ {} ({} assertions)", + Colour::Green.paint(test_file_name.to_string_lossy().as_ref()), + assertion_count + ); + } + Err(e) => { + println!( + " āœ— {}", + Colour::Red.paint(test_file_name.to_string_lossy().as_ref()) + ); + println!(" {}", e.message()); + failed = true; + } + } + } + + if failed { + Err(String::new().into()) + } else { + Ok(()) + } +} +pub fn iterate_assertions( + assertions: &Vec, + highlights: &Vec<(Point, Point, Highlight)>, + highlight_names: &Vec, +) -> Result { + // Iterate through all of the highlighting assertions, checking each one against the + // actual highlights. + let mut i = 0; + let mut actual_highlights = Vec::<&String>::new(); + for Assertion { + position, + expected_capture_name: expected_highlight, + } in assertions + { + let mut passed = false; + actual_highlights.clear(); + + 'highlight_loop: loop { + // The assertions are ordered by position, so skip past all of the highlights that + // end at or before this assertion's position. + if let Some(highlight) = highlights.get(i) { + if highlight.1 <= *position { + i += 1; + continue; + } + + // Iterate through all of the highlights that start at or before this assertion's, + // position, looking for one that matches the assertion. + let mut j = i; + while let (false, Some(highlight)) = (passed, highlights.get(j)) { + if highlight.0 > *position { + break 'highlight_loop; + } + + // If the highlight matches the assertion, this test passes. Otherwise, + // add this highlight to the list of actual highlights that span the + // assertion's position, in order to generate an error message in the event + // of a failure. + let highlight_name = &highlight_names[(highlight.2).0]; + if *highlight_name == *expected_highlight { + passed = true; + break 'highlight_loop; + } else { + actual_highlights.push(highlight_name); + } + + j += 1; + } + } else { + break; + } + } + + if !passed { + return Err(Failure { + row: position.row, + column: position.column, + expected_highlight: expected_highlight.clone(), + actual_highlights: actual_highlights.into_iter().cloned().collect(), + } + .into()); + } + } + + Ok(assertions.len()) +} + +pub fn test_highlight( + loader: &Loader, + highlighter: &mut Highlighter, + highlight_config: &HighlightConfiguration, + source: &[u8], +) -> Result { + // Highlight the file, and parse out all of the highlighting assertions. + let highlight_names = loader.highlight_names(); + let highlights = get_highlight_positions(loader, highlighter, highlight_config, source)?; + let assertions = + parse_position_comments(highlighter.parser(), highlight_config.language, source)?; + + iterate_assertions(&assertions, &highlights, &highlight_names)?; + + // Iterate through all of the highlighting assertions, checking each one against the + // actual highlights. + let mut i = 0; + let mut actual_highlights = Vec::<&String>::new(); + for Assertion { + position, + expected_capture_name: expected_highlight, + } in &assertions + { + let mut passed = false; + actual_highlights.clear(); + + 'highlight_loop: loop { + // The assertions are ordered by position, so skip past all of the highlights that + // end at or before this assertion's position. + if let Some(highlight) = highlights.get(i) { + if highlight.1 <= *position { + i += 1; + continue; + } + + // Iterate through all of the highlights that start at or before this assertion's, + // position, looking for one that matches the assertion. + let mut j = i; + while let (false, Some(highlight)) = (passed, highlights.get(j)) { + if highlight.0 > *position { + break 'highlight_loop; + } + + // If the highlight matches the assertion, this test passes. Otherwise, + // add this highlight to the list of actual highlights that span the + // assertion's position, in order to generate an error message in the event + // of a failure. + let highlight_name = &highlight_names[(highlight.2).0]; + if *highlight_name == *expected_highlight { + passed = true; + break 'highlight_loop; + } else { + actual_highlights.push(highlight_name); + } + + j += 1; + } + } else { + break; + } + } + + if !passed { + return Err(Failure { + row: position.row, + column: position.column, + expected_highlight: expected_highlight.clone(), + actual_highlights: actual_highlights.into_iter().cloned().collect(), + } + .into()); + } + } + + Ok(assertions.len()) +} + +pub fn get_highlight_positions( + loader: &Loader, + highlighter: &mut Highlighter, + highlight_config: &HighlightConfiguration, + source: &[u8], +) -> Result> { + let mut row = 0; + let mut column = 0; + let mut byte_offset = 0; + let mut was_newline = false; + let mut result = Vec::new(); + let mut highlight_stack = Vec::new(); + let source = String::from_utf8_lossy(source); + let mut char_indices = source.char_indices(); + for event in highlighter.highlight(highlight_config, source.as_bytes(), None, |string| { + loader.highlight_config_for_injection_string(string) + })? { + match event? { + HighlightEvent::HighlightStart(h) => highlight_stack.push(h), + HighlightEvent::HighlightEnd => { + highlight_stack.pop(); + } + HighlightEvent::Source { start, end } => { + let mut start_position = Point::new(row, column); + while byte_offset < end { + if byte_offset <= start { + start_position = Point::new(row, column); + } + if let Some((i, c)) = char_indices.next() { + if was_newline { + row += 1; + column = 0; + } else { + column += i - byte_offset; + } + was_newline = c == '\n'; + byte_offset = i; + } else { + break; + } + } + if let Some(highlight) = highlight_stack.last() { + result.push((start_position, Point::new(row, column), *highlight)) + } + } + } + } + Ok(result) +} diff --git a/cli/src/tests/corpus_test.rs b/cli/src/tests/corpus_test.rs index ed6226f2..202dcd70 100644 --- a/cli/src/tests/corpus_test.rs +++ b/cli/src/tests/corpus_test.rs @@ -21,7 +21,11 @@ const LANGUAGES: &'static [&'static str] = &[ "go", "html", "javascript", + "json", + "php", "python", + "ruby", + "rust", ]; lazy_static! { @@ -57,7 +61,11 @@ fn test_real_language_corpus_files() { } let language = get_language(language_name); - let corpus_dir = grammars_dir.join(language_name).join("corpus"); + let mut corpus_dir = grammars_dir.join(language_name).join("corpus"); + if !corpus_dir.is_dir() { + corpus_dir = grammars_dir.join(language_name).join("test").join("corpus"); + } + let error_corpus_file = error_corpus_dir.join(&format!("{}_errors.txt", language_name)); let main_tests = parse_tests(&corpus_dir).unwrap(); let error_tests = parse_tests(&error_corpus_file).unwrap_or(TestEntry::default()); @@ -300,7 +308,8 @@ fn check_consistent_sizes(tree: &Tree, input: &Vec) { let mut last_child_end_point = start_point; let mut some_child_has_changes = false; let mut actual_named_child_count = 0; - for child in node.children() { + for i in 0..node.child_count() { + let child = node.child(i).unwrap(); assert!(child.start_byte() >= last_child_end_byte); assert!(child.start_position() >= last_child_end_point); check(child, line_offsets); diff --git a/cli/src/tests/helpers/allocations.rs b/cli/src/tests/helpers/allocations.rs index c64762bd..2f89c173 100644 --- a/cli/src/tests/helpers/allocations.rs +++ b/cli/src/tests/helpers/allocations.rs @@ -51,6 +51,12 @@ pub fn stop_recording() { } } +pub fn record(f: impl FnOnce()) { + start_recording(); + f(); + stop_recording(); +} + fn record_alloc(ptr: *mut c_void) { let mut recorder = RECORDER.lock(); if recorder.enabled { diff --git a/cli/src/tests/helpers/fixtures.rs b/cli/src/tests/helpers/fixtures.rs index 4389797e..fc459777 100644 --- a/cli/src/tests/helpers/fixtures.rs +++ b/cli/src/tests/helpers/fixtures.rs @@ -2,8 +2,8 @@ use crate::loader::Loader; use lazy_static::lazy_static; use std::fs; use std::path::{Path, PathBuf}; -use tree_sitter::{Language, PropertySheet}; -use tree_sitter_highlight::{load_property_sheet, Properties}; +use tree_sitter::Language; +use tree_sitter_highlight::HighlightConfiguration; include!("./dirs.rs"); @@ -11,6 +11,10 @@ lazy_static! { static ref TEST_LOADER: Loader = Loader::new(SCRATCH_DIR.clone()); } +pub fn test_loader<'a>() -> &'a Loader { + &*TEST_LOADER +} + pub fn fixtures_dir<'a>() -> &'static Path { &FIXTURES_DIR } @@ -21,18 +25,33 @@ pub fn get_language(name: &str) -> Language { .unwrap() } -pub fn get_property_sheet_json(language_name: &str, sheet_name: &str) -> String { - let path = GRAMMARS_DIR - .join(language_name) - .join("src") - .join(sheet_name); - fs::read_to_string(path).unwrap() +pub fn get_language_queries_path(language_name: &str) -> PathBuf { + GRAMMARS_DIR.join(language_name).join("queries") } -pub fn get_property_sheet(language_name: &str, sheet_name: &str) -> PropertySheet { - let json = get_property_sheet_json(language_name, sheet_name); +pub fn get_highlight_config( + language_name: &str, + injection_query_filename: Option<&str>, + highlight_names: &[String], +) -> HighlightConfiguration { let language = get_language(language_name); - load_property_sheet(language, &json).unwrap() + let queries_path = get_language_queries_path(language_name); + let highlights_query = fs::read_to_string(queries_path.join("highlights.scm")).unwrap(); + let injections_query = if let Some(injection_query_filename) = injection_query_filename { + fs::read_to_string(queries_path.join(injection_query_filename)).unwrap() + } else { + String::new() + }; + let locals_query = fs::read_to_string(queries_path.join("locals.scm")).unwrap_or(String::new()); + let mut result = HighlightConfiguration::new( + language, + &highlights_query, + &injections_query, + &locals_query, + ) + .unwrap(); + result.configure(highlight_names); + result } pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> Language { diff --git a/cli/src/tests/helpers/mod.rs b/cli/src/tests/helpers/mod.rs index 2d1ce574..88928d55 100644 --- a/cli/src/tests/helpers/mod.rs +++ b/cli/src/tests/helpers/mod.rs @@ -1,5 +1,5 @@ pub(super) mod allocations; +pub(super) mod edits; pub(super) mod fixtures; pub(super) mod random; pub(super) mod scope_sequence; -pub(super) mod edits; diff --git a/cli/src/tests/highlight_test.rs b/cli/src/tests/highlight_test.rs index 34b545ff..6b09d64c 100644 --- a/cli/src/tests/highlight_test.rs +++ b/cli/src/tests/highlight_test.rs @@ -1,32 +1,92 @@ -use super::helpers::fixtures::{get_language, get_property_sheet, get_property_sheet_json}; +use super::helpers::fixtures::{get_highlight_config, get_language, get_language_queries_path}; use lazy_static::lazy_static; use std::ffi::CString; - use std::sync::atomic::{AtomicUsize, Ordering}; -use std::{ptr, slice, str}; -use tree_sitter::{Language, PropertySheet}; +use std::{fs, ptr, slice, str}; use tree_sitter_highlight::{ - c, highlight, highlight_html, Error, Highlight, HighlightEvent, Properties, + c, Error, Highlight, HighlightConfiguration, HighlightEvent, Highlighter, HtmlRenderer, }; lazy_static! { - static ref JS_SHEET: PropertySheet = - get_property_sheet("javascript", "highlights.json"); - static ref HTML_SHEET: PropertySheet = - get_property_sheet("html", "highlights.json"); - static ref EJS_SHEET: PropertySheet = - get_property_sheet("embedded-template", "highlights-ejs.json"); - static ref RUST_SHEET: PropertySheet = - get_property_sheet("rust", "highlights.json"); - static ref SCOPE_CLASS_STRINGS: Vec = { - let mut result = Vec::new(); - let mut i = 0; - while let Some(highlight) = Highlight::from_usize(i) { - result.push(format!("class={:?}", highlight)); - i += 1; - } - result - }; + static ref JS_HIGHLIGHT: HighlightConfiguration = + get_highlight_config("javascript", Some("injections.scm"), &HIGHLIGHT_NAMES); + static ref JSDOC_HIGHLIGHT: HighlightConfiguration = + get_highlight_config("jsdoc", None, &HIGHLIGHT_NAMES); + static ref HTML_HIGHLIGHT: HighlightConfiguration = + get_highlight_config("html", Some("injections.scm"), &HIGHLIGHT_NAMES); + static ref EJS_HIGHLIGHT: HighlightConfiguration = get_highlight_config( + "embedded-template", + Some("injections-ejs.scm"), + &HIGHLIGHT_NAMES + ); + static ref RUST_HIGHLIGHT: HighlightConfiguration = + get_highlight_config("rust", Some("injections.scm"), &HIGHLIGHT_NAMES); + static ref HIGHLIGHT_NAMES: Vec = [ + "attribute", + "carriage-return", + "comment", + "constant", + "constructor", + "function.builtin", + "function", + "embedded", + "keyword", + "operator", + "property.builtin", + "property", + "punctuation", + "punctuation.bracket", + "punctuation.delimiter", + "punctuation.special", + "string", + "tag", + "type.builtin", + "type", + "variable.builtin", + "variable.parameter", + "variable", + ] + .iter() + .cloned() + .map(String::from) + .collect(); + static ref HTML_ATTRS: Vec = HIGHLIGHT_NAMES + .iter() + .map(|s| format!("class={}", s)) + .collect(); +} + +#[test] +fn test_highlighting_javascript() { + let source = "const a = function(b) { return b + c; }"; + assert_eq!( + &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(), + &[vec![ + ("const", vec!["keyword"]), + (" ", vec![]), + ("a", vec!["function"]), + (" ", vec![]), + ("=", vec!["operator"]), + (" ", vec![]), + ("function", vec!["keyword"]), + ("(", vec!["punctuation.bracket"]), + ("b", vec!["variable.parameter"]), + (")", vec!["punctuation.bracket"]), + (" ", vec![]), + ("{", vec!["punctuation.bracket"]), + (" ", vec![]), + ("return", vec!["keyword"]), + (" ", vec![]), + ("b", vec!["variable.parameter"]), + (" ", vec![]), + ("+", vec!["operator"]), + (" ", vec![]), + ("c", vec!["variable"]), + (";", vec!["punctuation.delimiter"]), + (" ", vec![]), + ("}", vec!["punctuation.bracket"]), + ]] + ); } #[test] @@ -34,57 +94,65 @@ fn test_highlighting_injected_html_in_javascript() { let source = vec!["const s = html `
${a < b}
`;"].join("\n"); assert_eq!( - &to_token_vector(&source, get_language("javascript"), &JS_SHEET).unwrap(), + &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(), &[vec![ - ("const", vec![Highlight::Keyword]), + ("const", vec!["keyword"]), (" ", vec![]), - ("s", vec![Highlight::Variable]), + ("s", vec!["variable"]), (" ", vec![]), - ("=", vec![Highlight::Operator]), + ("=", vec!["operator"]), (" ", vec![]), - ("html", vec![Highlight::Function]), + ("html", vec!["function"]), (" ", vec![]), - ("`<", vec![Highlight::String]), - ("div", vec![Highlight::String, Highlight::Tag]), - (">", vec![Highlight::String]), - ( - "${", - vec![ - Highlight::String, - Highlight::Embedded, - Highlight::PunctuationSpecial - ] - ), - ( - "a", - vec![Highlight::String, Highlight::Embedded, Highlight::Variable] - ), - (" ", vec![Highlight::String, Highlight::Embedded]), - ( - "<", - vec![Highlight::String, Highlight::Embedded, Highlight::Operator] - ), - (" ", vec![Highlight::String, Highlight::Embedded]), - ( - "b", - vec![Highlight::String, Highlight::Embedded, Highlight::Variable] - ), - ( - "}", - vec![ - Highlight::String, - Highlight::Embedded, - Highlight::PunctuationSpecial - ] - ), - ("`", vec![Highlight::String]), - (";", vec![Highlight::PunctuationDelimiter]), + ("`", vec!["string"]), + ("<", vec!["string", "punctuation.bracket"]), + ("div", vec!["string", "tag"]), + (">", vec!["string", "punctuation.bracket"]), + ("${", vec!["string", "embedded", "punctuation.special"]), + ("a", vec!["string", "embedded", "variable"]), + (" ", vec!["string", "embedded"]), + ("<", vec!["string", "embedded", "operator"]), + (" ", vec!["string", "embedded"]), + ("b", vec!["string", "embedded", "variable"]), + ("}", vec!["string", "embedded", "punctuation.special"]), + ("", vec!["string", "punctuation.bracket"]), + ("`", vec!["string"]), + (";", vec!["punctuation.delimiter"]), ]] ); } +#[test] +fn test_highlighting_injected_javascript_in_html_mini() { + let source = ""; + + assert_eq!( + &to_token_vector(source, &HTML_HIGHLIGHT).unwrap(), + &[vec![ + ("<", vec!["punctuation.bracket"]), + ("script", vec!["tag"]), + (">", vec!["punctuation.bracket"]), + ("const", vec!["keyword"]), + (" ", vec![]), + ("x", vec!["variable"]), + (" ", vec![]), + ("=", vec!["operator"]), + (" ", vec![]), + ("new", vec!["keyword"]), + (" ", vec![]), + ("Thing", vec!["constructor"]), + ("(", vec!["punctuation.bracket"]), + (")", vec!["punctuation.bracket"]), + (";", vec!["punctuation.delimiter"]), + ("", vec!["punctuation.bracket"]), + ],] + ); +} + #[test] fn test_highlighting_injected_javascript_in_html() { let source = vec![ @@ -97,38 +165,44 @@ fn test_highlighting_injected_javascript_in_html() { .join("\n"); assert_eq!( - &to_token_vector(&source, get_language("html"), &HTML_SHEET).unwrap(), + &to_token_vector(&source, &HTML_HIGHLIGHT).unwrap(), &[ - vec![("<", vec![]), ("body", vec![Highlight::Tag]), (">", vec![]),], vec![ - (" <", vec![]), - ("script", vec![Highlight::Tag]), - (">", vec![]), + ("<", vec!["punctuation.bracket"]), + ("body", vec!["tag"]), + (">", vec!["punctuation.bracket"]), + ], + vec![ + (" ", vec![]), + ("<", vec!["punctuation.bracket"]), + ("script", vec!["tag"]), + (">", vec!["punctuation.bracket"]), ], vec![ (" ", vec![]), - ("const", vec![Highlight::Keyword]), + ("const", vec!["keyword"]), (" ", vec![]), - ("x", vec![Highlight::Variable]), + ("x", vec!["variable"]), (" ", vec![]), - ("=", vec![Highlight::Operator]), + ("=", vec!["operator"]), (" ", vec![]), - ("new", vec![Highlight::Keyword]), + ("new", vec!["keyword"]), (" ", vec![]), - ("Thing", vec![Highlight::Constructor]), - ("(", vec![Highlight::PunctuationBracket]), - (")", vec![Highlight::PunctuationBracket]), - (";", vec![Highlight::PunctuationDelimiter]), + ("Thing", vec!["constructor"]), + ("(", vec!["punctuation.bracket"]), + (")", vec!["punctuation.bracket"]), + (";", vec!["punctuation.delimiter"]), ], vec![ - (" ", vec![]), + (" ", vec![]), + ("", vec!["punctuation.bracket"]), ], vec![ - ("", vec![]), + ("", vec!["punctuation.bracket"]), ], ] ); @@ -147,13 +221,13 @@ fn test_highlighting_multiline_nodes_to_html() { .join("\n"); assert_eq!( - &to_html(&source, get_language("javascript"), &JS_SHEET,).unwrap(), + &to_html(&source, &JS_HIGHLIGHT).unwrap(), &[ - "const SOMETHING = `\n".to_string(), - " one ${\n".to_string(), - " two()\n".to_string(), - " } three\n".to_string(), - "`\n".to_string(), + "const SOMETHING = `\n".to_string(), + " one ${\n".to_string(), + " two()\n".to_string(), + " } three\n".to_string(), + "`\n".to_string(), ] ); } @@ -169,51 +243,51 @@ fn test_highlighting_with_local_variable_tracking() { .join("\n"); assert_eq!( - &to_token_vector(&source, get_language("javascript"), &JS_SHEET).unwrap(), + &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(), &[ vec![ - ("module", vec![Highlight::VariableBuiltin]), - (".", vec![Highlight::PunctuationDelimiter]), - ("exports", vec![Highlight::Property]), + ("module", vec!["variable.builtin"]), + (".", vec!["punctuation.delimiter"]), + ("exports", vec!["function"]), (" ", vec![]), - ("=", vec![Highlight::Operator]), + ("=", vec!["operator"]), (" ", vec![]), - ("function", vec![Highlight::Keyword]), + ("function", vec!["keyword"]), (" ", vec![]), - ("a", vec![Highlight::Function]), - ("(", vec![Highlight::PunctuationBracket]), - ("b", vec![Highlight::VariableParameter]), - (")", vec![Highlight::PunctuationBracket]), + ("a", vec!["function"]), + ("(", vec!["punctuation.bracket"]), + ("b", vec!["variable.parameter"]), + (")", vec!["punctuation.bracket"]), (" ", vec![]), - ("{", vec![Highlight::PunctuationBracket]) + ("{", vec!["punctuation.bracket"]) ], vec![ (" ", vec![]), - ("const", vec![Highlight::Keyword]), + ("const", vec!["keyword"]), (" ", vec![]), - ("module", vec![Highlight::Variable]), + ("module", vec!["variable"]), (" ", vec![]), - ("=", vec![Highlight::Operator]), + ("=", vec!["operator"]), (" ", vec![]), - ("c", vec![Highlight::Variable]), - (";", vec![Highlight::PunctuationDelimiter]) + ("c", vec!["variable"]), + (";", vec!["punctuation.delimiter"]) ], vec![ (" ", vec![]), - ("console", vec![Highlight::VariableBuiltin]), - (".", vec![Highlight::PunctuationDelimiter]), - ("log", vec![Highlight::Function]), - ("(", vec![Highlight::PunctuationBracket]), + ("console", vec!["variable.builtin"]), + (".", vec!["punctuation.delimiter"]), + ("log", vec!["function"]), + ("(", vec!["punctuation.bracket"]), // Not a builtin, because `module` was defined as a variable above. - ("module", vec![Highlight::Variable]), - (",", vec![Highlight::PunctuationDelimiter]), + ("module", vec!["variable"]), + (",", vec!["punctuation.delimiter"]), (" ", vec![]), // A parameter, because `b` was defined as a parameter above. - ("b", vec![Highlight::VariableParameter]), - (")", vec![Highlight::PunctuationBracket]), - (";", vec![Highlight::PunctuationDelimiter]), + ("b", vec!["variable.parameter"]), + (")", vec!["punctuation.bracket"]), + (";", vec!["punctuation.delimiter"]), ], - vec![("}", vec![Highlight::PunctuationBracket])] + vec![("}", vec!["punctuation.bracket"])] ], ); } @@ -234,41 +308,95 @@ fn test_highlighting_empty_lines() { .join("\n"); assert_eq!( - &to_html(&source, get_language("javascript"), &JS_SHEET,).unwrap(), + &to_html(&source, &JS_HIGHLIGHT,).unwrap(), &[ - "class A {\n".to_string(), + "class A {\n".to_string(), "\n".to_string(), - " b(c) {\n".to_string(), + " b(c) {\n".to_string(), "\n".to_string(), - " d(e)\n".to_string(), + " d(e)\n".to_string(), "\n".to_string(), - " }\n".to_string(), + " }\n".to_string(), "\n".to_string(), - "}\n".to_string(), + "}\n".to_string(), ] ); } #[test] -fn test_highlighting_ejs() { - let source = vec!["
<% foo() %>
"].join("\n"); +fn test_highlighting_carriage_returns() { + let source = "a = \"a\rb\"\r\nb\r"; assert_eq!( - &to_token_vector(&source, get_language("embedded-template"), &EJS_SHEET).unwrap(), + &to_html(&source, &JS_HIGHLIGHT).unwrap(), + &[ + "a = "ab"\n", + "b\n", + ], + ); +} + +#[test] +fn test_highlighting_ejs_with_html_and_javascript() { + let source = vec!["
<% foo() %>
"].join("\n"); + + assert_eq!( + &to_token_vector(&source, &EJS_HIGHLIGHT).unwrap(), &[[ - ("<", vec![]), - ("div", vec![Highlight::Tag]), - (">", vec![]), - ("<%", vec![Highlight::Keyword]), + ("<", vec!["punctuation.bracket"]), + ("div", vec!["tag"]), + (">", vec!["punctuation.bracket"]), + ("<%", vec!["keyword"]), (" ", vec![]), - ("foo", vec![Highlight::Function]), - ("(", vec![Highlight::PunctuationBracket]), - (")", vec![Highlight::PunctuationBracket]), + ("foo", vec!["function"]), + ("(", vec!["punctuation.bracket"]), + (")", vec!["punctuation.bracket"]), (" ", vec![]), - ("%>", vec![Highlight::Keyword]), - ("", vec![]) + ("%>", vec!["keyword"]), + ("", vec!["punctuation.bracket"]), + ("<", vec!["punctuation.bracket"]), + ("script", vec!["tag"]), + (">", vec!["punctuation.bracket"]), + (" ", vec![]), + ("bar", vec!["function"]), + ("(", vec!["punctuation.bracket"]), + (")", vec!["punctuation.bracket"]), + (" ", vec![]), + ("", vec!["punctuation.bracket"]), + ]], + ); +} + +#[test] +fn test_highlighting_javascript_with_jsdoc() { + // Regression test: the middle comment has no highlights. This should not prevent + // later injections from highlighting properly. + let source = vec!["a /* @see a */ b; /* nothing */ c; /* @see b */"].join("\n"); + + assert_eq!( + &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(), + &[[ + ("a", vec!["variable"]), + (" ", vec![]), + ("/* ", vec!["comment"]), + ("@see", vec!["comment", "keyword"]), + (" a */", vec!["comment"]), + (" ", vec![]), + ("b", vec!["variable"]), + (";", vec!["punctuation.delimiter"]), + (" ", vec![]), + ("/* nothing */", vec!["comment"]), + (" ", vec![]), + ("c", vec!["variable"]), + (";", vec!["punctuation.delimiter"]), + (" ", vec![]), + ("/* ", vec!["comment"]), + ("@see", vec!["comment", "keyword"]), + (" b */", vec!["comment"]) ]], ); } @@ -278,33 +406,36 @@ fn test_highlighting_with_content_children_included() { let source = vec!["assert!(", " a.b.c() < D::e::()", ");"].join("\n"); assert_eq!( - &to_token_vector(&source, get_language("rust"), &RUST_SHEET).unwrap(), + &to_token_vector(&source, &RUST_HIGHLIGHT).unwrap(), &[ vec![ - ("assert", vec![Highlight::Function]), - ("!", vec![Highlight::Function]), - ("(", vec![Highlight::PunctuationBracket]), + ("assert", vec!["function"]), + ("!", vec!["function"]), + ("(", vec!["punctuation.bracket"]), ], vec![ (" a", vec![]), - (".", vec![Highlight::PunctuationDelimiter]), - ("b", vec![Highlight::Property]), - (".", vec![Highlight::PunctuationDelimiter]), - ("c", vec![Highlight::Function]), - ("(", vec![Highlight::PunctuationBracket]), - (")", vec![Highlight::PunctuationBracket]), + (".", vec!["punctuation.delimiter"]), + ("b", vec!["property"]), + (".", vec!["punctuation.delimiter"]), + ("c", vec!["function"]), + ("(", vec!["punctuation.bracket"]), + (")", vec!["punctuation.bracket"]), (" < ", vec![]), - ("D", vec![Highlight::Type]), - ("::", vec![Highlight::PunctuationDelimiter]), - ("e", vec![Highlight::Function]), - ("::", vec![Highlight::PunctuationDelimiter]), - ("<", vec![Highlight::PunctuationBracket]), - ("F", vec![Highlight::Type]), - (">", vec![Highlight::PunctuationBracket]), - ("(", vec![Highlight::PunctuationBracket]), - (")", vec![Highlight::PunctuationBracket]), + ("D", vec!["type"]), + ("::", vec!["punctuation.delimiter"]), + ("e", vec!["function"]), + ("::", vec!["punctuation.delimiter"]), + ("<", vec!["punctuation.bracket"]), + ("F", vec!["type"]), + (">", vec!["punctuation.bracket"]), + ("(", vec!["punctuation.bracket"]), + (")", vec!["punctuation.bracket"]), ], - vec![(")", vec![Highlight::PunctuationBracket]), (";", vec![]),] + vec![ + (")", vec!["punctuation.bracket"]), + (";", vec!["punctuation.delimiter"]), + ] ], ); } @@ -325,73 +456,97 @@ fn test_highlighting_cancellation() { test_language_for_injection_string(name) }; - // Constructing the highlighter, which eagerly parses the outer document, - // should not fail. - let highlighter = highlight( - source.as_bytes(), - get_language("html"), - &HTML_SHEET, - Some(&cancellation_flag), - injection_callback, - ) - .unwrap(); + // The initial `highlight` call, which eagerly parses the outer document, should not fail. + let mut highlighter = Highlighter::new(); + let events = highlighter + .highlight( + &HTML_HIGHLIGHT, + source.as_bytes(), + Some(&cancellation_flag), + injection_callback, + ) + .unwrap(); - // Iterating the scopes should not panic. It should return an error - // once the cancellation is detected. - for event in highlighter { + // Iterating the scopes should not panic. It should return an error once the + // cancellation is detected. + for event in events { if let Err(e) = event { assert_eq!(e, Error::Cancelled); return; } } + panic!("Expected an error while iterating highlighter"); } #[test] fn test_highlighting_via_c_api() { - let js_lang = get_language("javascript"); - let html_lang = get_language("html"); - let js_sheet = get_property_sheet_json("javascript", "highlights.json"); - let js_sheet = c_string(&js_sheet); - let html_sheet = get_property_sheet_json("html", "highlights.json"); - let html_sheet = c_string(&html_sheet); + let highlights = vec![ + "class=tag\0", + "class=function\0", + "class=string\0", + "class=keyword\0", + ]; + let highlight_names = highlights + .iter() + .map(|h| h["class=".len()..].as_ptr() as *const i8) + .collect::>(); + let highlight_attrs = highlights + .iter() + .map(|h| h.as_bytes().as_ptr() as *const i8) + .collect::>(); + let highlighter = c::ts_highlighter_new( + &highlight_names[0] as *const *const i8, + &highlight_attrs[0] as *const *const i8, + highlights.len() as u32, + ); - let class_tag = c_string("class=tag"); - let class_function = c_string("class=function"); - let class_string = c_string("class=string"); - let class_keyword = c_string("class=keyword"); - - let js_scope_name = c_string("source.js"); - let html_scope_name = c_string("text.html.basic"); - let injection_regex = c_string("^(javascript|js)$"); let source_code = c_string(""); - let attribute_strings = &mut [ptr::null(); Highlight::Unknown as usize + 1]; - attribute_strings[Highlight::Tag as usize] = class_tag.as_ptr(); - attribute_strings[Highlight::String as usize] = class_string.as_ptr(); - attribute_strings[Highlight::Keyword as usize] = class_keyword.as_ptr(); - attribute_strings[Highlight::Function as usize] = class_function.as_ptr(); + let js_scope = c_string("source.js"); + let js_injection_regex = c_string("^javascript"); + let language = get_language("javascript"); + let queries = get_language_queries_path("javascript"); + let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap(); + let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap(); + let locals_query = fs::read_to_string(queries.join("locals.scm")).unwrap(); + c::ts_highlighter_add_language( + highlighter, + js_scope.as_ptr(), + js_injection_regex.as_ptr(), + language, + highlights_query.as_ptr() as *const i8, + injections_query.as_ptr() as *const i8, + locals_query.as_ptr() as *const i8, + highlights_query.len() as u32, + injections_query.len() as u32, + locals_query.len() as u32, + ); + + let html_scope = c_string("text.html.basic"); + let html_injection_regex = c_string("^html"); + let language = get_language("html"); + let queries = get_language_queries_path("html"); + let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap(); + let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap(); + c::ts_highlighter_add_language( + highlighter, + html_scope.as_ptr(), + html_injection_regex.as_ptr(), + language, + highlights_query.as_ptr() as *const i8, + injections_query.as_ptr() as *const i8, + ptr::null(), + highlights_query.len() as u32, + injections_query.len() as u32, + 0, + ); - let highlighter = c::ts_highlighter_new(attribute_strings.as_ptr()); let buffer = c::ts_highlight_buffer_new(); - c::ts_highlighter_add_language( - highlighter, - html_scope_name.as_ptr(), - html_lang, - html_sheet.as_ptr(), - ptr::null_mut(), - ); - c::ts_highlighter_add_language( - highlighter, - js_scope_name.as_ptr(), - js_lang, - js_sheet.as_ptr(), - injection_regex.as_ptr(), - ); c::ts_highlighter_highlight( highlighter, - html_scope_name.as_ptr(), + html_scope.as_ptr(), source_code.as_ptr(), source_code.as_bytes().len() as u32, buffer, @@ -421,8 +576,8 @@ fn test_highlighting_via_c_api() { lines, vec![ "<script>\n", - "const a = b('c');\n", - "c.d();\n", + "const a = b('c');\n", + "c.d();\n", "</script>\n", ] ); @@ -433,7 +588,7 @@ fn test_highlighting_via_c_api() { #[test] fn test_decode_utf8_lossy() { - use tree_sitter_highlight::util::LossyUtf8; + use tree_sitter::LossyUtf8; let parts = LossyUtf8::new(b"hi").collect::>(); assert_eq!(parts, vec!["hi"]); @@ -452,50 +607,60 @@ fn c_string(s: &str) -> CString { CString::new(s.as_bytes().to_vec()).unwrap() } -fn test_language_for_injection_string<'a>( - string: &str, -) -> Option<(Language, &'a PropertySheet)> { +fn test_language_for_injection_string<'a>(string: &str) -> Option<&'a HighlightConfiguration> { match string { - "javascript" => Some((get_language("javascript"), &JS_SHEET)), - "html" => Some((get_language("html"), &HTML_SHEET)), - "rust" => Some((get_language("rust"), &RUST_SHEET)), + "javascript" => Some(&JS_HIGHLIGHT), + "html" => Some(&HTML_HIGHLIGHT), + "rust" => Some(&RUST_HIGHLIGHT), + "jsdoc" => Some(&JSDOC_HIGHLIGHT), _ => None, } } fn to_html<'a>( src: &'a str, - language: Language, - property_sheet: &'a PropertySheet, + language_config: &'a HighlightConfiguration, ) -> Result, Error> { - highlight_html( - src.as_bytes(), - language, - property_sheet, + let src = src.as_bytes(); + let mut renderer = HtmlRenderer::new(); + let mut highlighter = Highlighter::new(); + let events = highlighter.highlight( + language_config, + src, None, &test_language_for_injection_string, - &|highlight| SCOPE_CLASS_STRINGS[highlight as usize].as_str(), - ) + )?; + + renderer.set_carriage_return_highlight( + HIGHLIGHT_NAMES + .iter() + .position(|s| s == "carriage-return") + .map(Highlight), + ); + renderer + .render(events, src, &|highlight| HTML_ATTRS[highlight.0].as_bytes()) + .unwrap(); + Ok(renderer.lines().map(|s| s.to_string()).collect()) } fn to_token_vector<'a>( src: &'a str, - language: Language, - property_sheet: &'a PropertySheet, -) -> Result)>>, Error> { + language_config: &'a HighlightConfiguration, +) -> Result)>>, Error> { let src = src.as_bytes(); + let mut highlighter = Highlighter::new(); let mut lines = Vec::new(); let mut highlights = Vec::new(); let mut line = Vec::new(); - for event in highlight( + let events = highlighter.highlight( + language_config, src, - language, - property_sheet, None, &test_language_for_injection_string, - )? { + )?; + for event in events { match event? { - HighlightEvent::HighlightStart(s) => highlights.push(s), + HighlightEvent::HighlightStart(s) => highlights.push(HIGHLIGHT_NAMES[s.0].as_str()), HighlightEvent::HighlightEnd => { highlights.pop(); } diff --git a/cli/src/tests/mod.rs b/cli/src/tests/mod.rs index 143e8297..24e8160e 100644 --- a/cli/src/tests/mod.rs +++ b/cli/src/tests/mod.rs @@ -3,5 +3,8 @@ mod helpers; mod highlight_test; mod node_test; mod parser_test; -mod properties_test; +mod pathological_test; +mod query_test; +mod tags_test; +mod test_highlight_test; mod tree_test; diff --git a/cli/src/tests/node_refs.rs b/cli/src/tests/node_refs.rs deleted file mode 100644 index 143ae7f6..00000000 --- a/cli/src/tests/node_refs.rs +++ /dev/null @@ -1,62 +0,0 @@ -use super::helpers::fixtures::get_test_language; -use crate::generate::generate_parser_for_grammar; -use tree_sitter::Parser; - -#[test] -fn test_basic_node_refs() { - let (parser_name, parser_code) = generate_parser_for_grammar( - r#" - { - "name": "test_grammar_with_refs", - "extras": [ - {"type": "PATTERN", "value": "\\s+"} - ], - "rules": { - "rule_a": { - "type": "SEQ", - "members": [ - { - "type": "REF", - "value": "ref_1", - "content": { - "type": "STRING", - "value": "child-1" - } - }, - { - "type": "CHOICE", - "members": [ - { - "type": "STRING", - "value": "child-2" - }, - { - "type": "BLANK" - } - ] - }, - { - "type": "REF", - "value": "ref_2", - "content": { - "type": "STRING", - "value": "child-3" - } - } - ] - } - } - } - "#, - ) - .unwrap(); - - let mut parser = Parser::new(); - let language = get_test_language(&parser_name, &parser_code, None); - parser.set_language(language).unwrap(); - - let tree = parser.parse("child-1 child-2 child-3", None).unwrap(); - let root_node = tree.root_node(); - assert_eq!(root_node.child_by_ref("ref_1"), root_node.child(0)); - assert_eq!(root_node.child_by_ref("ref_2"), root_node.child(2)); -} diff --git a/cli/src/tests/node_test.rs b/cli/src/tests/node_test.rs index 74e123f4..7e652cd5 100644 --- a/cli/src/tests/node_test.rs +++ b/cli/src/tests/node_test.rs @@ -167,6 +167,79 @@ fn test_node_child() { assert_eq!(tree.root_node().parent(), None); } +#[test] +fn test_node_children() { + let tree = parse_json_example(); + let mut cursor = tree.walk(); + let array_node = tree.root_node().child(0).unwrap(); + assert_eq!( + array_node + .children(&mut cursor) + .map(|n| n.kind()) + .collect::>(), + &["[", "number", ",", "false", ",", "object", "]",] + ); + assert_eq!( + array_node + .named_children(&mut cursor) + .map(|n| n.kind()) + .collect::>(), + &["number", "false", "object"] + ); + let object_node = array_node + .named_children(&mut cursor) + .find(|n| n.kind() == "object") + .unwrap(); + assert_eq!( + object_node + .children(&mut cursor) + .map(|n| n.kind()) + .collect::>(), + &["{", "pair", "}",] + ); +} + +#[test] +fn test_node_children_by_field_name() { + let mut parser = Parser::new(); + parser.set_language(get_language("python")).unwrap(); + let source = " + if one: + a() + elif two: + b() + elif three: + c() + elif four: + d() + "; + + let tree = parser.parse(source, None).unwrap(); + let node = tree.root_node().child(0).unwrap(); + assert_eq!(node.kind(), "if_statement"); + let mut cursor = tree.walk(); + let alternatives = node.children_by_field_name("alternative", &mut cursor); + let alternative_texts = + alternatives.map(|n| &source[n.child_by_field_name("condition").unwrap().byte_range()]); + assert_eq!( + alternative_texts.collect::>(), + &["two", "three", "four",] + ); +} + +#[test] +fn test_node_parent_of_child_by_field_name() { + let mut parser = Parser::new(); + parser.set_language(get_language("javascript")).unwrap(); + let tree = parser.parse("foo(a().b[0].c.d.e())", None).unwrap(); + let call_node = tree.root_node().named_child(0).unwrap().named_child(0).unwrap(); + assert_eq!(call_node.kind(), "call_expression"); + + // Regression test - when a field points to a hidden node (in this case, `_expression`) + // the hidden node should not be added to the node parent cache. + assert_eq!(call_node.child_by_field_name("function").unwrap().parent(), Some(call_node)); +} + #[test] fn test_node_named_child() { let tree = parse_json_example(); @@ -627,6 +700,63 @@ fn test_node_is_named_but_aliased_as_anonymous() { assert_eq!(root_node.named_child(0).unwrap().kind(), "c"); } +#[test] +fn test_node_numeric_symbols_respect_simple_aliases() { + let mut parser = Parser::new(); + parser.set_language(get_language("python")).unwrap(); + + // Example 1: + // Python argument lists can contain "splat" arguments, which are not allowed within + // other expressions. This includes `parenthesized_list_splat` nodes like `(*b)`. These + // `parenthesized_list_splat` nodes are aliased as `parenthesized_expression`. Their numeric + // `symbol`, aka `kind_id` should match that of a normal `parenthesized_expression`. + let tree = parser.parse("(a((*b)))", None).unwrap(); + let root = tree.root_node(); + assert_eq!( + root.to_sexp(), + "(module (expression_statement (parenthesized_expression (call function: (identifier) arguments: (argument_list (parenthesized_expression (list_splat (identifier))))))))", + ); + + let outer_expr_node = root.child(0).unwrap().child(0).unwrap(); + assert_eq!(outer_expr_node.kind(), "parenthesized_expression"); + + let inner_expr_node = outer_expr_node + .named_child(0) + .unwrap() + .child_by_field_name("arguments") + .unwrap() + .named_child(0) + .unwrap(); + assert_eq!(inner_expr_node.kind(), "parenthesized_expression"); + assert_eq!(inner_expr_node.kind_id(), outer_expr_node.kind_id()); + + // Example 2: + // Ruby handles the unary (negative) and binary (minus) `-` operators using two different + // tokens. One or more of these is an external token that's aliased as `-`. Their numeric + // kind ids should match. + parser.set_language(get_language("ruby")).unwrap(); + let tree = parser.parse("-a - b", None).unwrap(); + let root = tree.root_node(); + assert_eq!( + root.to_sexp(), + "(program (binary left: (unary operand: (identifier)) right: (identifier)))", + ); + + let binary_node = root.child(0).unwrap(); + assert_eq!(binary_node.kind(), "binary"); + + let unary_minus_node = binary_node + .child_by_field_name("left") + .unwrap() + .child(0) + .unwrap(); + assert_eq!(unary_minus_node.kind(), "-"); + + let binary_minus_node = binary_node.child_by_field_name("operator").unwrap(); + assert_eq!(binary_minus_node.kind(), "-"); + assert_eq!(unary_minus_node.kind_id(), binary_minus_node.kind_id()); +} + fn get_all_nodes(tree: &Tree) -> Vec { let mut result = Vec::new(); let mut visited_children = false; diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index 882f5963..b2b2560e 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -1,13 +1,14 @@ +use super::helpers::allocations; use super::helpers::edits::ReadRecorder; use super::helpers::fixtures::{get_language, get_test_language}; use crate::generate::generate_parser_for_grammar; use crate::parse::{perform_edit, Edit}; use std::sync::atomic::{AtomicUsize, Ordering}; use std::{thread, time}; -use tree_sitter::{InputEdit, LogType, Parser, Point, Range}; +use tree_sitter::{IncludedRangesError, InputEdit, LogType, Parser, Point, Range}; #[test] -fn test_basic_parsing() { +fn test_parsing_simple_string() { let mut parser = Parser::new(); parser.set_language(get_language("rust")).unwrap(); @@ -26,7 +27,11 @@ fn test_basic_parsing() { assert_eq!( root_node.to_sexp(), - "(source_file (struct_item (type_identifier) (field_declaration_list)) (function_item (identifier) (parameters) (block)))" + concat!( + "(source_file ", + "(struct_item name: (type_identifier) body: (field_declaration_list)) ", + "(function_item name: (identifier) parameters: (parameters) body: (block)))" + ) ); let struct_node = root_node.child(0).unwrap(); @@ -118,7 +123,17 @@ fn test_parsing_with_custom_utf8_input() { .unwrap(); let root = tree.root_node(); - assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))"); + assert_eq!( + root.to_sexp(), + concat!( + "(source_file ", + "(function_item ", + "(visibility_modifier) ", + "name: (identifier) ", + "parameters: (parameters) ", + "body: (block (integer_literal))))" + ) + ); assert_eq!(root.kind(), "source_file"); assert_eq!(root.has_error(), false); assert_eq!(root.child(0).unwrap().kind(), "function_item"); @@ -154,7 +169,10 @@ fn test_parsing_with_custom_utf16_input() { .unwrap(); let root = tree.root_node(); - assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))"); + assert_eq!( + root.to_sexp(), + "(source_file (function_item (visibility_modifier) name: (identifier) parameters: (parameters) body: (block (integer_literal))))" + ); assert_eq!(root.kind(), "source_file"); assert_eq!(root.has_error(), false); assert_eq!(root.child(0).unwrap().kind(), "function_item"); @@ -175,7 +193,10 @@ fn test_parsing_with_callback_returning_owned_strings() { .unwrap(); let root = tree.root_node(); - assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))"); + assert_eq!( + root.to_sexp(), + "(source_file (function_item (visibility_modifier) name: (identifier) parameters: (parameters) body: (block (integer_literal))))" + ); } #[test] @@ -192,7 +213,7 @@ fn test_parsing_text_with_byte_order_mark() { .unwrap(); assert_eq!( tree.root_node().to_sexp(), - "(source_file (function_item (identifier) (parameters) (block)))" + "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))" ); assert_eq!(tree.root_node().start_byte(), 2); @@ -200,7 +221,7 @@ fn test_parsing_text_with_byte_order_mark() { let mut tree = parser.parse("\u{FEFF}fn a() {}", None).unwrap(); assert_eq!( tree.root_node().to_sexp(), - "(source_file (function_item (identifier) (parameters) (block)))" + "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))" ); assert_eq!(tree.root_node().start_byte(), 3); @@ -216,7 +237,7 @@ fn test_parsing_text_with_byte_order_mark() { let mut tree = parser.parse(" \u{FEFF}fn a() {}", Some(&tree)).unwrap(); assert_eq!( tree.root_node().to_sexp(), - "(source_file (ERROR (UNEXPECTED 65279)) (function_item (identifier) (parameters) (block)))" + "(source_file (ERROR (UNEXPECTED 65279)) (function_item name: (identifier) parameters: (parameters) body: (block)))" ); assert_eq!(tree.root_node().start_byte(), 1); @@ -232,11 +253,52 @@ fn test_parsing_text_with_byte_order_mark() { let tree = parser.parse("\u{FEFF}fn a() {}", Some(&tree)).unwrap(); assert_eq!( tree.root_node().to_sexp(), - "(source_file (function_item (identifier) (parameters) (block)))" + "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))" ); assert_eq!(tree.root_node().start_byte(), 3); } +#[test] +fn test_parsing_invalid_chars_at_eof() { + let mut parser = Parser::new(); + parser.set_language(get_language("json")).unwrap(); + let tree = parser.parse(b"\xdf", None).unwrap(); + assert_eq!(tree.root_node().to_sexp(), "(ERROR (UNEXPECTED INVALID))"); +} + +#[test] +fn test_parsing_unexpected_null_characters_within_source() { + let mut parser = Parser::new(); + parser.set_language(get_language("javascript")).unwrap(); + let tree = parser.parse(b"var \0 something;", None).unwrap(); + assert_eq!( + tree.root_node().to_sexp(), + "(program (variable_declaration (ERROR (UNEXPECTED '\\0')) (variable_declarator name: (identifier))))" + ); +} + +#[test] +fn test_parsing_ends_when_input_callback_returns_empty() { + let mut parser = Parser::new(); + parser.set_language(get_language("javascript")).unwrap(); + let mut i = 0; + let source = b"abcdefghijklmnoqrs"; + let tree = parser + .parse_with( + &mut |offset, _| { + i += 1; + if offset >= 6 { + b"" + } else { + &source[offset..usize::min(source.len(), offset + 3)] + } + }, + None, + ) + .unwrap(); + assert_eq!(tree.root_node().end_byte(), 6); +} + // Incremental parsing #[test] @@ -333,6 +395,18 @@ fn test_parsing_after_editing_end_of_code() { assert_eq!(recorder.strings_read(), vec![" * ", "abc.d)",]); } +#[test] +fn test_parsing_empty_file_with_reused_tree() { + let mut parser = Parser::new(); + parser.set_language(get_language("rust")).unwrap(); + + let tree = parser.parse("", None); + parser.parse("", tree.as_ref()); + + let tree = parser.parse("\n ", None); + parser.parse("\n ", tree.as_ref()); +} + // Thread safety #[test] @@ -388,7 +462,7 @@ fn test_parsing_on_multiple_threads() { #[test] fn test_parsing_cancelled_by_another_thread() { - let cancellation_flag = Box::new(AtomicUsize::new(0)); + let cancellation_flag = std::sync::Arc::new(AtomicUsize::new(0)); let mut parser = Parser::new(); parser.set_language(get_language("javascript")).unwrap(); @@ -409,9 +483,10 @@ fn test_parsing_cancelled_by_another_thread() { ); assert!(tree.is_some()); + let flag = cancellation_flag.clone(); let cancel_thread = thread::spawn(move || { thread::sleep(time::Duration::from_millis(100)); - cancellation_flag.store(1, Ordering::SeqCst); + flag.store(1, Ordering::SeqCst); }); // Infinite input @@ -547,6 +622,56 @@ fn test_parsing_with_a_timeout_and_a_reset() { ); } +#[test] +fn test_parsing_with_a_timeout_and_implicit_reset() { + allocations::record(|| { + let mut parser = Parser::new(); + parser.set_language(get_language("javascript")).unwrap(); + + parser.set_timeout_micros(5); + let tree = parser.parse( + "[\"ok\", 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]", + None, + ); + assert!(tree.is_none()); + + // Changing the parser's language implicitly resets, discarding + // the previous partial parse. + parser.set_language(get_language("json")).unwrap(); + parser.set_timeout_micros(0); + let tree = parser.parse( + "[null, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]", + None, + ).unwrap(); + assert_eq!( + tree.root_node() + .named_child(0) + .unwrap() + .named_child(0) + .unwrap() + .kind(), + "null" + ); + }); +} + +#[test] +fn test_parsing_with_timeout_and_no_completion() { + allocations::record(|| { + let mut parser = Parser::new(); + parser.set_language(get_language("javascript")).unwrap(); + + parser.set_timeout_micros(5); + let tree = parser.parse( + "[\"ok\", 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]", + None, + ); + assert!(tree.is_none()); + + // drop the parser when it has an unfinished parse + }); +} + // Included Ranges #[test] @@ -559,7 +684,9 @@ fn test_parsing_with_one_included_range() { let script_content_node = html_tree.root_node().child(1).unwrap().child(1).unwrap(); assert_eq!(script_content_node.kind(), "raw_text"); - parser.set_included_ranges(&[script_content_node.range()]); + parser + .set_included_ranges(&[script_content_node.range()]) + .unwrap(); parser.set_language(get_language("javascript")).unwrap(); let js_tree = parser.parse(source_code, None).unwrap(); @@ -599,26 +726,28 @@ fn test_parsing_with_multiple_included_ranges() { let close_quote_node = template_string_node.child(3).unwrap(); parser.set_language(get_language("html")).unwrap(); - parser.set_included_ranges(&[ - Range { - start_byte: open_quote_node.end_byte(), - start_point: open_quote_node.end_position(), - end_byte: interpolation_node1.start_byte(), - end_point: interpolation_node1.start_position(), - }, - Range { - start_byte: interpolation_node1.end_byte(), - start_point: interpolation_node1.end_position(), - end_byte: interpolation_node2.start_byte(), - end_point: interpolation_node2.start_position(), - }, - Range { - start_byte: interpolation_node2.end_byte(), - start_point: interpolation_node2.end_position(), - end_byte: close_quote_node.start_byte(), - end_point: close_quote_node.start_position(), - }, - ]); + parser + .set_included_ranges(&[ + Range { + start_byte: open_quote_node.end_byte(), + start_point: open_quote_node.end_position(), + end_byte: interpolation_node1.start_byte(), + end_point: interpolation_node1.start_position(), + }, + Range { + start_byte: interpolation_node1.end_byte(), + start_point: interpolation_node1.end_position(), + end_byte: interpolation_node2.start_byte(), + end_point: interpolation_node2.start_position(), + }, + Range { + start_byte: interpolation_node2.end_byte(), + start_point: interpolation_node2.end_position(), + end_byte: close_quote_node.start_byte(), + end_point: close_quote_node.start_position(), + }, + ]) + .unwrap(); let html_tree = parser.parse(source_code, None).unwrap(); assert_eq!( @@ -667,6 +796,47 @@ fn test_parsing_with_multiple_included_ranges() { ); } +#[test] +fn test_parsing_error_in_invalid_included_ranges() { + let mut parser = Parser::new(); + + // Ranges are not ordered + let error = parser + .set_included_ranges(&[ + Range { + start_byte: 23, + end_byte: 29, + start_point: Point::new(0, 23), + end_point: Point::new(0, 29), + }, + Range { + start_byte: 0, + end_byte: 5, + start_point: Point::new(0, 0), + end_point: Point::new(0, 5), + }, + Range { + start_byte: 50, + end_byte: 60, + start_point: Point::new(0, 50), + end_point: Point::new(0, 60), + }, + ]) + .unwrap_err(); + assert_eq!(error, IncludedRangesError(1)); + + // Range ends before it starts + let error = parser + .set_included_ranges(&[Range { + start_byte: 10, + end_byte: 5, + start_point: Point::new(0, 10), + end_point: Point::new(0, 5), + }]) + .unwrap_err(); + assert_eq!(error, IncludedRangesError(0)); +} + #[test] fn test_parsing_utf16_code_with_errors_at_the_end_of_an_included_range() { let source_code = ""; @@ -677,12 +847,14 @@ fn test_parsing_utf16_code_with_errors_at_the_end_of_an_included_range() { let mut parser = Parser::new(); parser.set_language(get_language("javascript")).unwrap(); - parser.set_included_ranges(&[Range { - start_byte, - end_byte, - start_point: Point::new(0, start_byte), - end_point: Point::new(0, end_byte), - }]); + parser + .set_included_ranges(&[Range { + start_byte, + end_byte, + start_point: Point::new(0, start_byte), + end_point: Point::new(0, end_byte), + }]) + .unwrap(); let tree = parser.parse_utf16(&utf16_source_code, None).unwrap(); assert_eq!(tree.root_node().to_sexp(), "(program (ERROR (identifier)))"); } @@ -697,20 +869,22 @@ fn test_parsing_with_external_scanner_that_uses_included_range_boundaries() { let mut parser = Parser::new(); parser.set_language(get_language("javascript")).unwrap(); - parser.set_included_ranges(&[ - Range { - start_byte: range1_start_byte, - end_byte: range1_end_byte, - start_point: Point::new(0, range1_start_byte), - end_point: Point::new(0, range1_end_byte), - }, - Range { - start_byte: range2_start_byte, - end_byte: range2_end_byte, - start_point: Point::new(0, range2_start_byte), - end_point: Point::new(0, range2_end_byte), - }, - ]); + parser + .set_included_ranges(&[ + Range { + start_byte: range1_start_byte, + end_byte: range1_end_byte, + start_point: Point::new(0, range1_start_byte), + end_point: Point::new(0, range1_end_byte), + }, + Range { + start_byte: range2_start_byte, + end_byte: range2_end_byte, + start_point: Point::new(0, range2_start_byte), + end_point: Point::new(0, range2_end_byte), + }, + ]) + .unwrap(); let tree = parser.parse(source_code, None).unwrap(); let root = tree.root_node(); @@ -758,20 +932,22 @@ fn test_parsing_with_a_newly_excluded_range() { let directive_start = source_code.find("<%=").unwrap(); let directive_end = source_code.find("").unwrap(); let source_code_end = source_code.len(); - parser.set_included_ranges(&[ - Range { - start_byte: 0, - end_byte: directive_start, - start_point: Point::new(0, 0), - end_point: Point::new(0, directive_start), - }, - Range { - start_byte: directive_end, - end_byte: source_code_end, - start_point: Point::new(0, directive_end), - end_point: Point::new(0, source_code_end), - }, - ]); + parser + .set_included_ranges(&[ + Range { + start_byte: 0, + end_byte: directive_start, + start_point: Point::new(0, 0), + end_point: Point::new(0, directive_start), + }, + Range { + start_byte: directive_end, + end_byte: source_code_end, + start_point: Point::new(0, directive_end), + end_point: Point::new(0, source_code_end), + }, + ]) + .unwrap(); let tree = parser.parse(&source_code, Some(&first_tree)).unwrap(); assert_eq!( @@ -809,59 +985,73 @@ fn test_parsing_with_a_newly_excluded_range() { #[test] fn test_parsing_with_a_newly_included_range() { - let source_code = "
<%= foo() %>
<%= bar() %>"; - let first_code_start_index = source_code.find(" foo").unwrap(); - let first_code_end_index = first_code_start_index + 7; - let second_code_start_index = source_code.find(" bar").unwrap(); - let second_code_end_index = second_code_start_index + 7; - let ranges = [ - Range { - start_byte: first_code_start_index, - end_byte: first_code_end_index, - start_point: Point::new(0, first_code_start_index), - end_point: Point::new(0, first_code_end_index), - }, - Range { - start_byte: second_code_start_index, - end_byte: second_code_end_index, - start_point: Point::new(0, second_code_start_index), - end_point: Point::new(0, second_code_end_index), - }, - ]; + let source_code = "
<%= foo() %>
<%= bar() %><%= baz() %>"; + let range1_start = source_code.find(" foo").unwrap(); + let range2_start = source_code.find(" bar").unwrap(); + let range3_start = source_code.find(" baz").unwrap(); + let range1_end = range1_start + 7; + let range2_end = range2_start + 7; + let range3_end = range3_start + 7; // Parse only the first code directive as JavaScript let mut parser = Parser::new(); parser.set_language(get_language("javascript")).unwrap(); - parser.set_included_ranges(&ranges[0..1]); - let first_tree = parser.parse(source_code, None).unwrap(); + parser + .set_included_ranges(&[simple_range(range1_start, range1_end)]) + .unwrap(); + let tree = parser.parse(source_code, None).unwrap(); assert_eq!( - first_tree.root_node().to_sexp(), + tree.root_node().to_sexp(), concat!( "(program", " (expression_statement (call_expression function: (identifier) arguments: (arguments))))", ) ); - // Parse both the code directives as JavaScript, using the old tree as a reference. - parser.set_included_ranges(&ranges); - let tree = parser.parse(&source_code, Some(&first_tree)).unwrap(); + // Parse both the first and third code directives as JavaScript, using the old tree as a + // reference. + parser + .set_included_ranges(&[ + simple_range(range1_start, range1_end), + simple_range(range3_start, range3_end), + ]) + .unwrap(); + let tree2 = parser.parse(&source_code, Some(&tree)).unwrap(); assert_eq!( - tree.root_node().to_sexp(), + tree2.root_node().to_sexp(), concat!( "(program", " (expression_statement (call_expression function: (identifier) arguments: (arguments)))", " (expression_statement (call_expression function: (identifier) arguments: (arguments))))", ) ); - assert_eq!( - tree.changed_ranges(&first_tree).collect::>(), - vec![Range { - start_byte: first_code_end_index + 1, - end_byte: second_code_end_index + 1, - start_point: Point::new(0, first_code_end_index + 1), - end_point: Point::new(0, second_code_end_index + 1), - }] + tree2.changed_ranges(&tree).collect::>(), + &[simple_range(range1_end, range3_end)] + ); + + // Parse all three code directives as JavaScript, using the old tree as a + // reference. + parser + .set_included_ranges(&[ + simple_range(range1_start, range1_end), + simple_range(range2_start, range2_end), + simple_range(range3_start, range3_end), + ]) + .unwrap(); + let tree3 = parser.parse(&source_code, Some(&tree)).unwrap(); + assert_eq!( + tree3.root_node().to_sexp(), + concat!( + "(program", + " (expression_statement (call_expression function: (identifier) arguments: (arguments)))", + " (expression_statement (call_expression function: (identifier) arguments: (arguments)))", + " (expression_statement (call_expression function: (identifier) arguments: (arguments))))", + ) + ); + assert_eq!( + tree3.changed_ranges(&tree2).collect::>(), + &[simple_range(range2_start + 1, range2_end - 1)] ); } @@ -899,20 +1089,22 @@ fn test_parsing_with_included_ranges_and_missing_tokens() { // There's a missing `a` token at the beginning of the code. It must be inserted // at the beginning of the first included range, not at {0, 0}. let source_code = "__bc__bc__"; - parser.set_included_ranges(&[ - Range { - start_byte: 2, - end_byte: 4, - start_point: Point::new(0, 2), - end_point: Point::new(0, 4), - }, - Range { - start_byte: 6, - end_byte: 8, - start_point: Point::new(0, 6), - end_point: Point::new(0, 8), - }, - ]); + parser + .set_included_ranges(&[ + Range { + start_byte: 2, + end_byte: 4, + start_point: Point::new(0, 2), + end_point: Point::new(0, 4), + }, + Range { + start_byte: 6, + end_byte: 8, + start_point: Point::new(0, 6), + end_point: Point::new(0, 8), + }, + ]) + .unwrap(); let tree = parser.parse(source_code, None).unwrap(); let root = tree.root_node(); @@ -923,3 +1115,12 @@ fn test_parsing_with_included_ranges_and_missing_tokens() { assert_eq!(root.start_byte(), 2); assert_eq!(root.child(3).unwrap().start_byte(), 4); } + +fn simple_range(start: usize, end: usize) -> Range { + Range { + start_byte: start, + end_byte: end, + start_point: Point::new(0, start), + end_point: Point::new(0, end), + } +} diff --git a/cli/src/tests/pathological_test.rs b/cli/src/tests/pathological_test.rs new file mode 100644 index 00000000..7ebd5439 --- /dev/null +++ b/cli/src/tests/pathological_test.rs @@ -0,0 +1,15 @@ +use super::helpers::allocations; +use super::helpers::fixtures::get_language; +use tree_sitter::Parser; + +#[test] +fn test_pathological_example_1() { + let language = "cpp"; + let source = r#"*ss(qqX, - b: Option, -} - -#[test] -fn test_walk_with_properties_with_nth_child() { - let language = get_language("javascript"); - let property_sheet = PropertySheet::::new( - language, - &generate_property_sheet_string( - "/some/path.css", - " - binary_expression > identifier:nth-child(2) { - a: x; - } - - binary_expression > identifier { - a: y; - } - - identifier { - a: z; - } - ", - ), - ) - .unwrap(); - - let source_code = "a = b || c;"; - - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(source_code, None).unwrap(); - - let mut cursor = tree.walk_with_properties(&property_sheet, source_code.as_bytes()); - assert_eq!(cursor.node().kind(), "program"); - assert!(cursor.goto_first_child()); - assert_eq!(cursor.node().kind(), "expression_statement"); - assert!(cursor.goto_first_child()); - assert_eq!(cursor.node().kind(), "assignment_expression"); - - assert!(cursor.goto_first_child()); - assert_eq!(cursor.node().kind(), "identifier"); - assert_eq!( - *cursor.node_properties(), - Properties { - a: Some("z".to_string()), - b: None - } - ); - - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "="); - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "binary_expression"); - - assert!(cursor.goto_first_child()); - assert_eq!(cursor.node().kind(), "identifier"); - assert_eq!( - *cursor.node_properties(), - Properties { - a: Some("y".to_string()), - b: None - } - ); - - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "||"); - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "identifier"); - assert_eq!( - *cursor.node_properties(), - Properties { - a: Some("x".to_string()), - b: None - } - ); -} - -#[test] -fn test_walk_with_properties_with_regexes() { - let language = get_language("javascript"); - let property_sheet = PropertySheet::::new( - language, - &generate_property_sheet_string( - "/some/path.css", - " - identifier { - &[text='^[A-Z]'] { - a: y; - } - - &[text='^[A-Z_]+$'] { - a: z; - } - - a: x; - } - ", - ), - ) - .unwrap(); - - let source_code = "const ABC = Def(ghi);"; - - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(source_code, None).unwrap(); - - let mut cursor = tree.walk_with_properties(&property_sheet, source_code.as_bytes()); - assert_eq!(cursor.node().kind(), "program"); - assert!(cursor.goto_first_child()); - assert_eq!(cursor.node().kind(), "lexical_declaration"); - assert!(cursor.goto_first_child()); - assert_eq!(cursor.node().kind(), "const"); - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "variable_declarator"); - - // The later selector with a text regex overrides the earlier one. - assert!(cursor.goto_first_child()); - assert_eq!(cursor.node().kind(), "identifier"); - assert_eq!( - *cursor.node_properties(), - Properties { - a: Some("z".to_string()), - b: None - } - ); - - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "="); - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "call_expression"); - - // The selectors with text regexes override the selector without one. - assert!(cursor.goto_first_child()); - assert_eq!(cursor.node().kind(), "identifier"); - assert_eq!( - *cursor.node_properties(), - Properties { - a: Some("y".to_string()), - b: None - } - ); - - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "arguments"); - assert!(cursor.goto_first_child()); - assert_eq!(cursor.node().kind(), "("); - - // This node doesn't match either of the regexes. - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "identifier"); - assert_eq!( - *cursor.node_properties(), - Properties { - a: Some("x".to_string()), - b: None - } - ); -} - -#[test] -fn test_walk_with_properties_based_on_fields() { - let language = get_language("javascript"); - let property_sheet = PropertySheet::::new( - language, - &generate_property_sheet_string( - "/some/path.css", - " - arrow_function > .parameter { - a: x; - } - - function_declaration { - & > .parameters > identifier { - a: y; - } - - & > .name { - b: z; - } - } - - identifier { - a: w; - } - ", - ), - ) - .unwrap(); - - let source_code = "function a(b) { return c => c + b; }"; - - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(source_code, None).unwrap(); - let mut cursor = tree.walk_with_properties(&property_sheet, source_code.as_bytes()); - - assert!(cursor.goto_first_child()); - assert_eq!(cursor.node().kind(), "function_declaration"); - assert!(cursor.goto_first_child()); - assert_eq!(cursor.node().kind(), "function"); - assert_eq!(*cursor.node_properties(), Properties::default()); - - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "identifier"); - assert_eq!( - *cursor.node_properties(), - Properties { - a: Some("w".to_string()), - b: Some("z".to_string()) - } - ); - - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "formal_parameters"); - assert_eq!(*cursor.node_properties(), Properties::default()); - - assert!(cursor.goto_first_child()); - assert_eq!(cursor.node().kind(), "("); - assert_eq!(*cursor.node_properties(), Properties::default()); - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "identifier"); - assert_eq!( - *cursor.node_properties(), - Properties { - a: Some("y".to_string()), - b: None, - } - ); - - assert!(cursor.goto_parent()); - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "statement_block"); - assert!(cursor.goto_first_child()); - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "return_statement"); - assert!(cursor.goto_first_child()); - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "arrow_function"); - assert!(cursor.goto_first_child()); - assert_eq!(cursor.node().kind(), "identifier"); - assert_eq!( - *cursor.node_properties(), - Properties { - a: Some("x".to_string()), - b: None, - } - ); -} - -fn generate_property_sheet_string(path: &str, css: &str) -> String { - serde_json::to_string(&properties::generate_property_sheet(path, css, &HashSet::new()).unwrap()) - .unwrap() -} diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs new file mode 100644 index 00000000..02f222bb --- /dev/null +++ b/cli/src/tests/query_test.rs @@ -0,0 +1,3027 @@ +use super::helpers::allocations; +use super::helpers::fixtures::get_language; +use lazy_static::lazy_static; +use std::env; +use std::fmt::Write; +use tree_sitter::{ + Language, Node, Parser, Query, QueryCapture, QueryCursor, QueryError, QueryErrorKind, + QueryMatch, QueryPredicate, QueryPredicateArg, QueryProperty, +}; + +lazy_static! { + static ref EXAMPLE_FILTER: Option = env::var("TREE_SITTER_TEST_EXAMPLE_FILTER").ok(); +} + +#[test] +fn test_query_errors_on_invalid_syntax() { + allocations::record(|| { + let language = get_language("javascript"); + + assert!(Query::new(language, "(if_statement)").is_ok()); + assert!(Query::new( + language, + "(if_statement condition:(parenthesized_expression (identifier)))" + ) + .is_ok()); + + // Mismatched parens + assert_eq!( + Query::new(language, "(if_statement").unwrap_err().message, + [ + "(if_statement", // + " ^", + ] + .join("\n") + ); + assert_eq!( + Query::new(language, "; comment 1\n; comment 2\n (if_statement))") + .unwrap_err() + .message, + [ + " (if_statement))", // + " ^", + ] + .join("\n") + ); + + // Return an error at the *beginning* of a bare identifier not followed a colon. + // If there's a colon but no pattern, return an error at the end of the colon. + assert_eq!( + Query::new(language, "(if_statement identifier)") + .unwrap_err() + .message, + [ + "(if_statement identifier)", // + " ^", + ] + .join("\n") + ); + assert_eq!( + Query::new(language, "(if_statement condition:)") + .unwrap_err() + .message, + [ + "(if_statement condition:)", // + " ^", + ] + .join("\n") + ); + + // Return an error at the beginning of an unterminated string. + assert_eq!( + Query::new(language, r#"(identifier) "h "#) + .unwrap_err() + .message, + [ + r#"(identifier) "h "#, // + r#" ^"#, + ] + .join("\n") + ); + + assert_eq!( + Query::new(language, r#"((identifier) ()"#) + .unwrap_err() + .message, + [ + "((identifier) ()", // + " ^", + ] + .join("\n") + ); + assert_eq!( + Query::new(language, r#"((identifier) [])"#) + .unwrap_err() + .message, + [ + "((identifier) [])", // + " ^", + ] + .join("\n") + ); + assert_eq!( + Query::new(language, r#"((identifier) (#a)"#) + .unwrap_err() + .message, + [ + "((identifier) (#a)", // + " ^", + ] + .join("\n") + ); + assert_eq!( + Query::new(language, r#"((identifier) @x (#eq? @x a"#) + .unwrap_err() + .message, + [ + r#"((identifier) @x (#eq? @x a"#, + r#" ^"#, + ] + .join("\n") + ); + }); +} + +#[test] +fn test_query_errors_on_invalid_symbols() { + allocations::record(|| { + let language = get_language("javascript"); + + assert_eq!( + Query::new(language, "(clas)").unwrap_err(), + QueryError { + row: 0, + offset: 1, + column: 1, + kind: QueryErrorKind::NodeType, + message: "clas".to_string() + } + ); + assert_eq!( + Query::new(language, "(if_statement (arrayyyyy))").unwrap_err(), + QueryError { + row: 0, + offset: 15, + column: 15, + kind: QueryErrorKind::NodeType, + message: "arrayyyyy".to_string() + }, + ); + assert_eq!( + Query::new(language, "(if_statement condition: (non_existent3))").unwrap_err(), + QueryError { + row: 0, + offset: 26, + column: 26, + kind: QueryErrorKind::NodeType, + message: "non_existent3".to_string() + }, + ); + assert_eq!( + Query::new(language, "(if_statement condit: (identifier))").unwrap_err(), + QueryError { + row: 0, + offset: 14, + column: 14, + kind: QueryErrorKind::Field, + message: "condit".to_string() + }, + ); + assert_eq!( + Query::new(language, "(if_statement conditioning: (identifier))").unwrap_err(), + QueryError { + row: 0, + offset: 14, + column: 14, + kind: QueryErrorKind::Field, + message: "conditioning".to_string() + } + ); + }); +} + +#[test] +fn test_query_errors_on_invalid_predicates() { + allocations::record(|| { + let language = get_language("javascript"); + + assert_eq!( + Query::new(language, "((identifier) @id (@id))").unwrap_err(), + QueryError { + kind: QueryErrorKind::Syntax, + row: 0, + column: 19, + offset: 19, + message: [ + "((identifier) @id (@id))", // + " ^" + ] + .join("\n") + } + ); + assert_eq!( + Query::new(language, "((identifier) @id (#eq? @id))").unwrap_err(), + QueryError { + kind: QueryErrorKind::Predicate, + row: 0, + column: 0, + offset: 0, + message: "Wrong number of arguments to #eq? predicate. Expected 2, got 1." + .to_string() + } + ); + assert_eq!( + Query::new(language, "((identifier) @id (#eq? @id @ok))").unwrap_err(), + QueryError { + kind: QueryErrorKind::Capture, + row: 0, + column: 29, + offset: 29, + message: "ok".to_string(), + } + ); + }); +} + +#[test] +fn test_query_errors_on_impossible_patterns() { + let js_lang = get_language("javascript"); + let rb_lang = get_language("ruby"); + + allocations::record(|| { + assert_eq!( + Query::new( + js_lang, + "(binary_expression left: (identifier) left: (identifier))" + ), + Err(QueryError { + kind: QueryErrorKind::Structure, + row: 0, + offset: 38, + column: 38, + message: [ + "(binary_expression left: (identifier) left: (identifier))", + " ^" + ] + .join("\n"), + }) + ); + + Query::new( + js_lang, + "(function_declaration name: (identifier) (statement_block))", + ) + .unwrap(); + assert_eq!( + Query::new(js_lang, "(function_declaration name: (statement_block))"), + Err(QueryError { + kind: QueryErrorKind::Structure, + row: 0, + offset: 22, + column: 22, + message: [ + "(function_declaration name: (statement_block))", + " ^", + ] + .join("\n") + }) + ); + + Query::new(rb_lang, "(call receiver:(call))").unwrap(); + assert_eq!( + Query::new(rb_lang, "(call receiver:(binary))"), + Err(QueryError { + kind: QueryErrorKind::Structure, + row: 0, + offset: 6, + column: 6, + message: [ + "(call receiver:(binary))", // + " ^", + ] + .join("\n") + }) + ); + + Query::new( + js_lang, + "[ + (function (identifier)) + (function_declaration (identifier)) + (generator_function_declaration (identifier)) + ]", + ) + .unwrap(); + assert_eq!( + Query::new( + js_lang, + "[ + (function (identifier)) + (function_declaration (object)) + (generator_function_declaration (identifier)) + ]", + ), + Err(QueryError { + kind: QueryErrorKind::Structure, + row: 2, + offset: 88, + column: 42, + message: [ + " (function_declaration (object))", // + " ^", + ] + .join("\n") + }) + ); + + assert_eq!( + Query::new(js_lang, "(identifier (identifier))",), + Err(QueryError { + kind: QueryErrorKind::Structure, + row: 0, + offset: 12, + column: 12, + message: [ + "(identifier (identifier))", // + " ^", + ] + .join("\n") + }) + ); + assert_eq!( + Query::new(js_lang, "(true (true))",), + Err(QueryError { + kind: QueryErrorKind::Structure, + row: 0, + offset: 6, + column: 6, + message: [ + "(true (true))", // + " ^", + ] + .join("\n") + }) + ); + + Query::new( + js_lang, + "(if_statement + condition: (parenthesized_expression (_expression) @cond))", + ) + .unwrap(); + + assert_eq!( + Query::new(js_lang, "(if_statement condition: (_expression))",), + Err(QueryError { + kind: QueryErrorKind::Structure, + row: 0, + offset: 14, + column: 14, + message: [ + "(if_statement condition: (_expression))", // + " ^", + ] + .join("\n") + }) + ); + }); +} + +#[test] +fn test_query_verifies_possible_patterns_with_aliased_parent_nodes() { + allocations::record(|| { + let ruby = get_language("ruby"); + + Query::new(ruby, "(destructured_parameter (identifier))").unwrap(); + + assert_eq!( + Query::new(ruby, "(destructured_parameter (string))",), + Err(QueryError { + kind: QueryErrorKind::Structure, + row: 0, + offset: 24, + column: 24, + message: [ + "(destructured_parameter (string))", // + " ^", + ] + .join("\n") + }) + ); + }); +} + +#[test] +fn test_query_matches_with_simple_pattern() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + "(function_declaration name: (identifier) @fn-name)", + ) + .unwrap(); + + assert_query_matches( + language, + &query, + "function one() { two(); function three() {} }", + &[ + (0, vec![("fn-name", "one")]), + (0, vec![("fn-name", "three")]), + ], + ); + }); +} + +#[test] +fn test_query_matches_with_multiple_on_same_root() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + "(class_declaration + name: (identifier) @the-class-name + (class_body + (method_definition + name: (property_identifier) @the-method-name)))", + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + class Person { + // the constructor + constructor(name) { this.name = name; } + + // the getter + getFullName() { return this.name; } + } + ", + &[ + ( + 0, + vec![ + ("the-class-name", "Person"), + ("the-method-name", "constructor"), + ], + ), + ( + 0, + vec![ + ("the-class-name", "Person"), + ("the-method-name", "getFullName"), + ], + ), + ], + ); + }); +} + +#[test] +fn test_query_matches_with_multiple_patterns_different_roots() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + " + (function_declaration name:(identifier) @fn-def) + (call_expression function:(identifier) @fn-ref) + ", + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + function f1() { + f2(f3()); + } + ", + &[ + (0, vec![("fn-def", "f1")]), + (1, vec![("fn-ref", "f2")]), + (1, vec![("fn-ref", "f3")]), + ], + ); + }); +} + +#[test] +fn test_query_matches_with_multiple_patterns_same_root() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + " + (pair + key: (property_identifier) @method-def + value: (function)) + + (pair + key: (property_identifier) @method-def + value: (arrow_function)) + ", + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + a = { + b: () => { return c; }, + d: function() { return d; } + }; + ", + &[ + (1, vec![("method-def", "b")]), + (0, vec![("method-def", "d")]), + ], + ); + }); +} + +#[test] +fn test_query_matches_with_nesting_and_no_fields() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + " + (array + (array + (identifier) @x1 + (identifier) @x2)) + ", + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + [[a]]; + [[c, d], [e, f, g, h]]; + [[h], [i]]; + ", + &[ + (0, vec![("x1", "c"), ("x2", "d")]), + (0, vec![("x1", "e"), ("x2", "f")]), + (0, vec![("x1", "e"), ("x2", "g")]), + (0, vec![("x1", "f"), ("x2", "g")]), + (0, vec![("x1", "e"), ("x2", "h")]), + (0, vec![("x1", "f"), ("x2", "h")]), + (0, vec![("x1", "g"), ("x2", "h")]), + ], + ); + }); +} + +#[test] +fn test_query_matches_with_many_results() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new(language, "(array (identifier) @element)").unwrap(); + + assert_query_matches( + language, + &query, + &"[hello];\n".repeat(50), + &vec![(0, vec![("element", "hello")]); 50], + ); + }); +} + +#[test] +fn test_query_matches_with_many_overlapping_results() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (call_expression + function: (member_expression + property: (property_identifier) @method)) + (call_expression + function: (identifier) @function) + ((identifier) @constant + (#match? @constant "[A-Z\\d_]+")) + "#, + ) + .unwrap(); + + let count = 1024; + + // Deeply nested chained function calls: + // a + // .foo(bar(BAZ)) + // .foo(bar(BAZ)) + // .foo(bar(BAZ)) + // ... + let mut source = "a".to_string(); + source += &"\n .foo(bar(BAZ))".repeat(count); + + assert_query_matches( + language, + &query, + &source, + &[ + (0, vec![("method", "foo")]), + (1, vec![("function", "bar")]), + (2, vec![("constant", "BAZ")]), + ] + .iter() + .cloned() + .cycle() + .take(3 * count) + .collect::>(), + ); + }); +} + +#[test] +fn test_query_matches_capturing_error_nodes() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + " + (ERROR (identifier) @the-error-identifier) @the-error + ", + ) + .unwrap(); + + assert_query_matches( + language, + &query, + "function a(b,, c, d :e:) {}", + &[(0, vec![("the-error", ":e:"), ("the-error-identifier", "e")])], + ); + }); +} + +#[test] +fn test_query_matches_with_named_wildcard() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + " + (return_statement (_) @the-return-value) + (binary_expression operator: _ @the-operator) + ", + ) + .unwrap(); + + let source = "return a + b - c;"; + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + let mut cursor = QueryCursor::new(); + let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); + + assert_eq!( + collect_matches(matches, &query, source), + &[ + (0, vec![("the-return-value", "a + b - c")]), + (1, vec![("the-operator", "+")]), + (1, vec![("the-operator", "-")]), + ] + ); + }); +} + +#[test] +fn test_query_matches_with_wildcard_at_the_root() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + " + (_ + (comment) @doc + . + (function_declaration + name: (identifier) @name)) + ", + ) + .unwrap(); + + assert_query_matches( + language, + &query, + "/* one */ var x; /* two */ function y() {} /* three */ class Z {}", + &[(0, vec![("doc", "/* two */"), ("name", "y")])], + ); + + let query = Query::new( + language, + " + (_ (string) @a) + (_ (number) @b) + (_ (true) @c) + (_ (false) @d) + ", + ) + .unwrap(); + + assert_query_matches( + language, + &query, + "['hi', x(true), {y: false}]", + &[ + (0, vec![("a", "'hi'")]), + (2, vec![("c", "true")]), + (3, vec![("d", "false")]), + ], + ); + }); +} + +#[test] +fn test_query_matches_with_immediate_siblings() { + allocations::record(|| { + let language = get_language("python"); + + // The immediate child operator '.' can be used in three similar ways: + // 1. Before the first child node in a pattern, it means that there cannot be any + // named siblings before that child node. + // 2. After the last child node in a pattern, it means that there cannot be any named + // sibling after that child node. + // 2. Between two child nodes in a pattern, it specifies that there cannot be any + // named siblings between those two child snodes. + let query = Query::new( + language, + " + (dotted_name + (identifier) @parent + . + (identifier) @child) + (dotted_name + (identifier) @last-child + .) + (list + . + (_) @first-element) + ", + ) + .unwrap(); + + assert_query_matches( + language, + &query, + "import a.b.c.d; return [w, [1, y], z]", + &[ + (0, vec![("parent", "a"), ("child", "b")]), + (0, vec![("parent", "b"), ("child", "c")]), + (0, vec![("parent", "c"), ("child", "d")]), + (1, vec![("last-child", "d")]), + (2, vec![("first-element", "w")]), + (2, vec![("first-element", "1")]), + ], + ); + + let query = Query::new( + language, + " + (block . (_) @first-stmt) + (block (_) @stmt) + (block (_) @last-stmt .) + ", + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + if a: + b() + c() + if d(): e(); f() + g() + ", + &[ + (0, vec![("first-stmt", "b()")]), + (1, vec![("stmt", "b()")]), + (1, vec![("stmt", "c()")]), + (1, vec![("stmt", "if d(): e(); f()")]), + (0, vec![("first-stmt", "e()")]), + (1, vec![("stmt", "e()")]), + (1, vec![("stmt", "f()")]), + (2, vec![("last-stmt", "f()")]), + (1, vec![("stmt", "g()")]), + (2, vec![("last-stmt", "g()")]), + ], + ); + }); +} + +#[test] +fn test_query_matches_with_repeated_leaf_nodes() { + allocations::record(|| { + let language = get_language("javascript"); + + let query = Query::new( + language, + " + ( + (comment)+ @doc + . + (class_declaration + name: (identifier) @name) + ) + + ( + (comment)+ @doc + . + (function_declaration + name: (identifier) @name) + ) + ", + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + // one + // two + a(); + + // three + { + // four + // five + // six + class B {} + + // seven + c(); + + // eight + function d() {} + } + ", + &[ + ( + 0, + vec![ + ("doc", "// four"), + ("doc", "// five"), + ("doc", "// six"), + ("name", "B"), + ], + ), + (1, vec![("doc", "// eight"), ("name", "d")]), + ], + ); + }); +} + +#[test] +fn test_query_matches_with_optional_nodes_inside_of_repetitions() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new(language, r#"(array (","? (number) @num)+)"#).unwrap(); + + assert_query_matches( + language, + &query, + r#" + var a = [1, 2, 3, 4] + "#, + &[( + 0, + vec![("num", "1"), ("num", "2"), ("num", "3"), ("num", "4")], + )], + ); + }); +} + +#[test] +fn test_query_matches_with_top_level_repetitions() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (comment)+ @doc + "#, + ) + .unwrap(); + + assert_query_matches( + language, + &query, + r#" + // a + // b + // c + + d() + + // e + "#, + &[ + (0, vec![("doc", "// a"), ("doc", "// b"), ("doc", "// c")]), + (0, vec![("doc", "// e")]), + ], + ); + }); +} + +#[test] +fn test_query_matches_with_non_terminal_repetitions_within_root() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (_ + (expression_statement + (identifier) @id)+) + "#, + ) + .unwrap(); + + assert_query_matches( + language, + &query, + r#" + a; + b; + c; + "#, + &[(0, vec![("id", "a"), ("id", "b"), ("id", "c")])], + ); + }); +} + +#[test] +fn test_query_matches_with_nested_repetitions() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (variable_declaration + (","? (variable_declarator name: (identifier) @x))+)+ + "#, + ) + .unwrap(); + + assert_query_matches( + language, + &query, + r#" + var a = b, c, d + var e, f + + // more + var g + "#, + &[ + ( + 0, + vec![("x", "a"), ("x", "c"), ("x", "d"), ("x", "e"), ("x", "f")], + ), + (0, vec![("x", "g")]), + ], + ); + }); +} + +#[test] +fn test_query_matches_with_multiple_repetition_patterns_that_intersect_other_pattern() { + allocations::record(|| { + let language = get_language("javascript"); + + // When this query sees a comment, it must keep track of several potential + // matches: up to two for each pattern that begins with a comment. + let query = Query::new( + language, + r#" + (call_expression + function: (member_expression + property: (property_identifier) @name)) @ref.method + + ((comment)* @doc (function_declaration)) + ((comment)* @doc (generator_function_declaration)) + ((comment)* @doc (class_declaration)) + ((comment)* @doc (lexical_declaration)) + ((comment)* @doc (variable_declaration)) + ((comment)* @doc (method_definition)) + + (comment) @comment + "#, + ) + .unwrap(); + + // Here, a series of comments occurs in the middle of a match of the first + // pattern. To avoid exceeding the storage limits and discarding that outer + // match, the comment-related matches need to be managed efficiently. + let source = format!( + "theObject\n{}\n.theMethod()", + " // the comment\n".repeat(64) + ); + + assert_query_matches( + language, + &query, + &source, + &vec![(7, vec![("comment", "// the comment")]); 64] + .into_iter() + .chain(vec![( + 0, + vec![("ref.method", source.as_str()), ("name", "theMethod")], + )]) + .collect::>(), + ); + }); +} + +#[test] +fn test_query_matches_with_leading_zero_or_more_repeated_leaf_nodes() { + allocations::record(|| { + let language = get_language("javascript"); + + let query = Query::new( + language, + " + ( + (comment)* @doc + . + (function_declaration + name: (identifier) @name) + ) + ", + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + function a() { + // one + var b; + + function c() {} + + // two + // three + var d; + + // four + // five + function e() { + + } + } + + // six + ", + &[ + (0, vec![("name", "a")]), + (0, vec![("name", "c")]), + ( + 0, + vec![("doc", "// four"), ("doc", "// five"), ("name", "e")], + ), + ], + ); + }); +} + +#[test] +fn test_query_matches_with_trailing_optional_nodes() { + allocations::record(|| { + let language = get_language("javascript"); + + let query = Query::new( + language, + " + (class_declaration + name: (identifier) @class + (class_heritage + (identifier) @superclass)?) + ", + ) + .unwrap(); + + assert_query_matches(language, &query, "class A {}", &[(0, vec![("class", "A")])]); + + assert_query_matches( + language, + &query, + " + class A {} + class B extends C {} + class D extends (E.F) {} + ", + &[ + (0, vec![("class", "A")]), + (0, vec![("class", "B"), ("superclass", "C")]), + (0, vec![("class", "D")]), + ], + ); + }); +} + +#[test] +fn test_query_matches_with_nested_optional_nodes() { + allocations::record(|| { + let language = get_language("javascript"); + + // A function call, optionally containing a function call, which optionally contains a number + let query = Query::new( + language, + " + (call_expression + function: (identifier) @outer-fn + arguments: (arguments + (call_expression + function: (identifier) @inner-fn + arguments: (arguments + (number)? @num))?)) + ", + ) + .unwrap(); + + assert_query_matches( + language, + &query, + r#" + a(b, c(), d(null, 1, 2)) + e() + f(g()) + "#, + &[ + (0, vec![("outer-fn", "a"), ("inner-fn", "c")]), + (0, vec![("outer-fn", "c")]), + (0, vec![("outer-fn", "a"), ("inner-fn", "d"), ("num", "1")]), + (0, vec![("outer-fn", "a"), ("inner-fn", "d"), ("num", "2")]), + (0, vec![("outer-fn", "d")]), + (0, vec![("outer-fn", "e")]), + (0, vec![("outer-fn", "f"), ("inner-fn", "g")]), + (0, vec![("outer-fn", "g")]), + ], + ); + }); +} + +#[test] +fn test_query_matches_with_repeated_internal_nodes() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + " + (_ + (method_definition + (decorator (identifier) @deco)+ + name: (property_identifier) @name)) + ", + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + class A { + @c + @d + e() {} + } + ", + &[(0, vec![("deco", "c"), ("deco", "d"), ("name", "e")])], + ); + }) +} + +#[test] +fn test_query_matches_with_simple_alternatives() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + " + (pair + key: [(property_identifier) (string)] @key + value: [(function) @val1 (arrow_function) @val2]) + ", + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + a = { + b: c, + 'd': e => f, + g: { + h: function i() {}, + 'x': null, + j: _ => k + }, + 'l': function m() {}, + }; + ", + &[ + (0, vec![("key", "'d'"), ("val2", "e => f")]), + (0, vec![("key", "h"), ("val1", "function i() {}")]), + (0, vec![("key", "j"), ("val2", "_ => k")]), + (0, vec![("key", "'l'"), ("val1", "function m() {}")]), + ], + ); + }) +} + +#[test] +fn test_query_matches_with_alternatives_in_repetitions() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (array + [(identifier) (string)] @el + . + ( + "," + . + [(identifier) (string)] @el + )*) + "#, + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + a = [b, 'c', d, 1, e, 'f', 'g', h]; + ", + &[ + (0, vec![("el", "b"), ("el", "'c'"), ("el", "d")]), + ( + 0, + vec![("el", "e"), ("el", "'f'"), ("el", "'g'"), ("el", "h")], + ), + ], + ); + }) +} + +#[test] +fn test_query_matches_with_alternatives_at_root() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + [ + "if" + "else" + "function" + "throw" + "return" + ] @keyword + "#, + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + function a(b, c, d) { + if (b) { + return c; + } else { + throw d; + } + } + ", + &[ + (0, vec![("keyword", "function")]), + (0, vec![("keyword", "if")]), + (0, vec![("keyword", "return")]), + (0, vec![("keyword", "else")]), + (0, vec![("keyword", "throw")]), + ], + ); + }) +} + +#[test] +fn test_query_matches_with_alternatives_under_fields() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (assignment_expression + left: [ + (identifier) @variable + (member_expression property: (property_identifier) @variable) + ]) + "#, + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + a = b; + b = c.d; + e.f = g; + h.i = j.k; + ", + &[ + (0, vec![("variable", "a")]), + (0, vec![("variable", "b")]), + (0, vec![("variable", "f")]), + (0, vec![("variable", "i")]), + ], + ); + }); +} + +#[test] +fn test_query_matches_in_language_with_simple_aliases() { + allocations::record(|| { + let language = get_language("html"); + + // HTML uses different tokens to track start tags names, end + // tag names, script tag names, and style tag names. All of + // these tokens are aliased to `tag_name`. + let query = Query::new(language, "(tag_name) @tag").unwrap(); + + assert_query_matches( + language, + &query, + " +
+ + +
+ ", + &[ + (0, vec![("tag", "div")]), + (0, vec![("tag", "script")]), + (0, vec![("tag", "script")]), + (0, vec![("tag", "style")]), + (0, vec![("tag", "style")]), + (0, vec![("tag", "div")]), + ], + ); + }); +} + +#[test] +fn test_query_matches_with_different_tokens_with_the_same_string_value() { + allocations::record(|| { + // In Rust, there are two '<' tokens: one for the binary operator, + // and one with higher precedence for generics. + let language = get_language("rust"); + let query = Query::new( + language, + r#" + "<" @less + ">" @greater + "#, + ) + .unwrap(); + + assert_query_matches( + language, + &query, + "const A: B = d < e || f > g;", + &[ + (0, vec![("less", "<")]), + (1, vec![("greater", ">")]), + (0, vec![("less", "<")]), + (1, vec![("greater", ">")]), + ], + ); + }); +} + +#[test] +fn test_query_matches_with_too_many_permutations_to_track() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + " + (array (identifier) @pre (identifier) @post) + ", + ) + .unwrap(); + + let mut source = "hello, ".repeat(50); + source.insert(0, '['); + source.push_str("];"); + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + let mut cursor = QueryCursor::new(); + let matches = cursor.matches(&query, tree.root_node(), to_callback(&source)); + + // For this pathological query, some match permutations will be dropped. + // Just check that a subset of the results are returned, and crash or + // leak occurs. + assert_eq!( + collect_matches(matches, &query, source.as_str())[0], + (0, vec![("pre", "hello"), ("post", "hello")]), + ); + }); +} + +#[test] +fn test_query_matches_with_alternatives_and_too_many_permutations_to_track() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + " + ( + (comment) @doc + ; not immediate + (class_declaration) @class + ) + + (call_expression + function: [ + (identifier) @function + (member_expression property: (property_identifier) @method) + ]) + ", + ) + .unwrap(); + + let source = "/* hi */ a.b(); ".repeat(50); + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + let mut cursor = QueryCursor::new(); + let matches = cursor.matches(&query, tree.root_node(), to_callback(&source)); + + assert_eq!( + collect_matches(matches, &query, source.as_str()), + vec![(1, vec![("method", "b")]); 50], + ); + }); +} + +#[test] +fn test_query_matches_with_anonymous_tokens() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + ";" @punctuation + "&&" @operator + "\"" @quote + "#, + ) + .unwrap(); + + assert_query_matches( + language, + &query, + r#"foo(a && "b");"#, + &[ + (1, vec![("operator", "&&")]), + (2, vec![("quote", "\"")]), + (2, vec![("quote", "\"")]), + (0, vec![("punctuation", ";")]), + ], + ); + }); +} + +#[test] +fn test_query_matches_with_supertypes() { + allocations::record(|| { + let language = get_language("python"); + let query = Query::new( + language, + r#" + (argument_list (expression) @arg) + + (keyword_argument + value: (expression) @kw_arg) + + (assignment + left: (identifier) @var_def) + + (primary_expression/identifier) @var_ref + "#, + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + a = b.c( + [d], + # a comment + e=f + ) + ", + &[ + (2, vec![("var_def", "a")]), + (3, vec![("var_ref", "b")]), + (0, vec![("arg", "[d]")]), + (3, vec![("var_ref", "d")]), + (1, vec![("kw_arg", "f")]), + (3, vec![("var_ref", "f")]), + ], + ); + }); +} + +#[test] +fn test_query_matches_within_byte_range() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new(language, "(identifier) @element").unwrap(); + + let source = "[a, b, c, d, e, f, g]"; + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + + let mut cursor = QueryCursor::new(); + let matches = + cursor + .set_byte_range(5, 15) + .matches(&query, tree.root_node(), to_callback(source)); + + assert_eq!( + collect_matches(matches, &query, source), + &[ + (0, vec![("element", "c")]), + (0, vec![("element", "d")]), + (0, vec![("element", "e")]), + ] + ); + }); +} + +#[test] +fn test_query_captures_within_byte_range() { + allocations::record(|| { + let language = get_language("c"); + let query = Query::new( + language, + " + (call_expression + function: (identifier) @function + arguments: (argument_list (string_literal) @string.arg)) + + (string_literal) @string + ", + ) + .unwrap(); + + let source = r#"DEFUN ("safe-length", Fsafe_length, Ssafe_length, 1, 1, 0)"#; + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + + let mut cursor = QueryCursor::new(); + let captures = + cursor + .set_byte_range(3, 27) + .captures(&query, tree.root_node(), to_callback(source)); + + assert_eq!( + collect_captures(captures, &query, source), + &[ + ("function", "DEFUN"), + ("string.arg", "\"safe-length\""), + ("string", "\"safe-length\""), + ] + ); + }); +} + +#[test] +fn test_query_matches_different_queries_same_cursor() { + allocations::record(|| { + let language = get_language("javascript"); + let query1 = Query::new( + language, + " + (array (identifier) @id1) + ", + ) + .unwrap(); + let query2 = Query::new( + language, + " + (array (identifier) @id1) + (pair (identifier) @id2) + ", + ) + .unwrap(); + let query3 = Query::new( + language, + " + (array (identifier) @id1) + (pair (identifier) @id2) + (parenthesized_expression (identifier) @id3) + ", + ) + .unwrap(); + + let source = "[a, {b: b}, (c)];"; + + let mut parser = Parser::new(); + let mut cursor = QueryCursor::new(); + + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + + let matches = cursor.matches(&query1, tree.root_node(), to_callback(source)); + assert_eq!( + collect_matches(matches, &query1, source), + &[(0, vec![("id1", "a")]),] + ); + + let matches = cursor.matches(&query3, tree.root_node(), to_callback(source)); + assert_eq!( + collect_matches(matches, &query3, source), + &[ + (0, vec![("id1", "a")]), + (1, vec![("id2", "b")]), + (2, vec![("id3", "c")]), + ] + ); + + let matches = cursor.matches(&query2, tree.root_node(), to_callback(source)); + assert_eq!( + collect_matches(matches, &query2, source), + &[(0, vec![("id1", "a")]), (1, vec![("id2", "b")]),] + ); + }); +} + +#[test] +fn test_query_matches_with_multiple_captures_on_a_node() { + allocations::record(|| { + let language = get_language("javascript"); + let mut query = Query::new( + language, + "(function_declaration + (identifier) @name1 @name2 @name3 + (statement_block) @body1 @body2)", + ) + .unwrap(); + + let source = "function foo() { return 1; }"; + let mut parser = Parser::new(); + let mut cursor = QueryCursor::new(); + + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + + let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); + assert_eq!( + collect_matches(matches, &query, source), + &[( + 0, + vec![ + ("name1", "foo"), + ("name2", "foo"), + ("name3", "foo"), + ("body1", "{ return 1; }"), + ("body2", "{ return 1; }"), + ] + ),] + ); + + // disabling captures still works when there are multiple captures on a + // single node. + query.disable_capture("name2"); + let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); + assert_eq!( + collect_matches(matches, &query, source), + &[( + 0, + vec![ + ("name1", "foo"), + ("name3", "foo"), + ("body1", "{ return 1; }"), + ("body2", "{ return 1; }"), + ] + ),] + ); + }); +} + +#[test] +fn test_query_matches_with_captured_wildcard_at_root() { + allocations::record(|| { + let language = get_language("python"); + let query = Query::new( + language, + " + ; captured wildcard at the root + (_ [ + (except_clause (block) @block) + (finally_clause (block) @block) + ]) @stmt + + [ + (while_statement (block) @block) + (if_statement (block) @block) + + ; captured wildcard at the root within an alternation + (_ [ + (else_clause (block) @block) + (elif_clause (block) @block) + ]) + + (try_statement (block) @block) + (for_statement (block) @block) + ] @stmt + ", + ) + .unwrap(); + + let source = " + for i in j: + while True: + if a: + print b + elif c: + print d + else: + try: + print f + except: + print g + finally: + print h + else: + print i + " + .trim(); + + let mut parser = Parser::new(); + let mut cursor = QueryCursor::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + + let match_capture_names_and_rows = cursor + .matches(&query, tree.root_node(), to_callback(source)) + .map(|m| { + m.captures + .iter() + .map(|c| { + ( + query.capture_names()[c.index as usize].as_str(), + c.node.kind(), + c.node.start_position().row, + ) + }) + .collect::>() + }) + .collect::>(); + + assert_eq!( + match_capture_names_and_rows, + &[ + vec![("stmt", "for_statement", 0), ("block", "block", 1)], + vec![("stmt", "while_statement", 1), ("block", "block", 2)], + vec![("stmt", "if_statement", 2), ("block", "block", 3)], + vec![("stmt", "if_statement", 2), ("block", "block", 5)], + vec![("stmt", "if_statement", 2), ("block", "block", 7)], + vec![("stmt", "try_statement", 7), ("block", "block", 8)], + vec![("stmt", "try_statement", 7), ("block", "block", 10)], + vec![("stmt", "try_statement", 7), ("block", "block", 12)], + vec![("stmt", "while_statement", 1), ("block", "block", 14)], + ] + ) + }); +} + +#[test] +fn test_query_matches_with_no_captures() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (identifier) + (string) @s + "#, + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + a = 'hi'; + b = 'bye'; + ", + &[ + (0, vec![]), + (1, vec![("s", "'hi'")]), + (0, vec![]), + (1, vec![("s", "'bye'")]), + ], + ); + }); +} + +#[test] +fn test_query_matches_with_repeated_fields() { + allocations::record(|| { + let language = get_language("c"); + let query = Query::new( + language, + "(field_declaration declarator: (field_identifier) @field)", + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + struct S { + int a, b, c; + } + ", + &[ + (0, vec![("field", "a")]), + (0, vec![("field", "b")]), + (0, vec![("field", "c")]), + ], + ); + }); +} + +#[test] +fn test_query_captures_basic() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (pair + key: _ @method.def + (function + name: (identifier) @method.alias)) + + (variable_declarator + name: _ @function.def + value: (function + name: (identifier) @function.alias)) + + ":" @delimiter + "=" @operator + "#, + ) + .unwrap(); + + let source = " + a({ + bc: function de() { + const fg = function hi() {} + }, + jk: function lm() { + const no = function pq() {} + }, + }); + "; + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + let mut cursor = QueryCursor::new(); + let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); + + assert_eq!( + collect_matches(matches, &query, source), + &[ + (2, vec![("delimiter", ":")]), + (0, vec![("method.def", "bc"), ("method.alias", "de")]), + (3, vec![("operator", "=")]), + (1, vec![("function.def", "fg"), ("function.alias", "hi")]), + (2, vec![("delimiter", ":")]), + (0, vec![("method.def", "jk"), ("method.alias", "lm")]), + (3, vec![("operator", "=")]), + (1, vec![("function.def", "no"), ("function.alias", "pq")]), + ], + ); + + let captures = cursor.captures(&query, tree.root_node(), to_callback(source)); + assert_eq!( + collect_captures(captures, &query, source), + &[ + ("method.def", "bc"), + ("delimiter", ":"), + ("method.alias", "de"), + ("function.def", "fg"), + ("operator", "="), + ("function.alias", "hi"), + ("method.def", "jk"), + ("delimiter", ":"), + ("method.alias", "lm"), + ("function.def", "no"), + ("operator", "="), + ("function.alias", "pq"), + ] + ); + }); +} + +#[test] +fn test_query_captures_with_text_conditions() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + ((identifier) @constant + (#match? @constant "^[A-Z]{2,}$")) + + ((identifier) @constructor + (#match? @constructor "^[A-Z]")) + + ((identifier) @function.builtin + (#eq? @function.builtin "require")) + + ((identifier) @variable + (#not-match? @variable "^(lambda|load)$")) + "#, + ) + .unwrap(); + + let source = " + toad + load + panda + lambda + const ab = require('./ab'); + new Cd(EF); + "; + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + let mut cursor = QueryCursor::new(); + + let captures = cursor.captures(&query, tree.root_node(), to_callback(source)); + assert_eq!( + collect_captures(captures, &query, source), + &[ + ("variable", "toad"), + ("variable", "panda"), + ("variable", "ab"), + ("function.builtin", "require"), + ("variable", "require"), + ("constructor", "Cd"), + ("variable", "Cd"), + ("constant", "EF"), + ("constructor", "EF"), + ("variable", "EF"), + ], + ); + }); +} + +#[test] +fn test_query_captures_with_predicates() { + allocations::record(|| { + let language = get_language("javascript"); + + let query = Query::new( + language, + r#" + ((call_expression (identifier) @foo) + (#set! name something) + (#set! cool) + (#something! @foo omg)) + + ((property_identifier) @bar + (#is? cool) + (#is-not? name something))"#, + ) + .unwrap(); + + assert_eq!( + query.property_settings(0), + &[ + QueryProperty::new("name", Some("something"), None), + QueryProperty::new("cool", None, None), + ] + ); + assert_eq!( + query.general_predicates(0), + &[QueryPredicate { + operator: "something!".to_string().into_boxed_str(), + args: vec![ + QueryPredicateArg::Capture(0), + QueryPredicateArg::String("omg".to_string().into_boxed_str()), + ], + },] + ); + assert_eq!(query.property_settings(1), &[]); + assert_eq!(query.property_predicates(0), &[]); + assert_eq!( + query.property_predicates(1), + &[ + (QueryProperty::new("cool", None, None), true), + (QueryProperty::new("name", Some("something"), None), false), + ] + ); + }); +} + +#[test] +fn test_query_captures_with_quoted_predicate_args() { + allocations::record(|| { + let language = get_language("javascript"); + + // Double-quoted strings can contain: + // * special escape sequences like \n and \r + // * escaped double quotes with \* + // * literal backslashes with \\ + let query = Query::new( + language, + r#" + ((call_expression (identifier) @foo) + (#set! one "\"something\ngreat\"")) + + ((identifier) + (#set! two "\\s(\r?\n)*$")) + + ((function_declaration) + (#set! three "\"something\ngreat\"")) + "#, + ) + .unwrap(); + + assert_eq!( + query.property_settings(0), + &[QueryProperty::new( + "one", + Some("\"something\ngreat\""), + None + )] + ); + assert_eq!( + query.property_settings(1), + &[QueryProperty::new("two", Some("\\s(\r?\n)*$"), None)] + ); + assert_eq!( + query.property_settings(2), + &[QueryProperty::new( + "three", + Some("\"something\ngreat\""), + None + )] + ); + }); +} + +#[test] +fn test_query_captures_with_duplicates() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (variable_declarator + name: (identifier) @function + value: (function)) + + (identifier) @variable + "#, + ) + .unwrap(); + + let source = " + var x = function() {}; + "; + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + let mut cursor = QueryCursor::new(); + + let captures = cursor.captures(&query, tree.root_node(), to_callback(source)); + assert_eq!( + collect_captures(captures, &query, source), + &[("function", "x"), ("variable", "x"),], + ); + }); +} + +#[test] +fn test_query_captures_with_many_nested_results_without_fields() { + allocations::record(|| { + let language = get_language("javascript"); + + // Search for key-value pairs whose values are anonymous functions. + let query = Query::new( + language, + r#" + (pair + key: _ @method-def + (arrow_function)) + + ":" @colon + "," @comma + "#, + ) + .unwrap(); + + // The `pair` node for key `y` does not match any pattern, but inside of + // its value, it contains many other `pair` nodes that do match the pattern. + // The match for the *outer* pair should be terminated *before* descending into + // the object value, so that we can avoid needing to buffer all of the inner + // matches. + let method_count = 50; + let mut source = "x = { y: {\n".to_owned(); + for i in 0..method_count { + writeln!(&mut source, " method{}: $ => null,", i).unwrap(); + } + source.push_str("}};\n"); + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + let mut cursor = QueryCursor::new(); + + let captures = cursor.captures(&query, tree.root_node(), to_callback(&source)); + let captures = collect_captures(captures, &query, &source); + + assert_eq!( + &captures[0..13], + &[ + ("colon", ":"), + ("method-def", "method0"), + ("colon", ":"), + ("comma", ","), + ("method-def", "method1"), + ("colon", ":"), + ("comma", ","), + ("method-def", "method2"), + ("colon", ":"), + ("comma", ","), + ("method-def", "method3"), + ("colon", ":"), + ("comma", ","), + ] + ); + + // Ensure that we don't drop matches because of needing to buffer too many. + assert_eq!(captures.len(), 1 + 3 * method_count); + }); +} + +#[test] +fn test_query_captures_with_many_nested_results_with_fields() { + allocations::record(|| { + let language = get_language("javascript"); + + // Search expressions like `a ? a.b : null` + let query = Query::new( + language, + r#" + ((ternary_expression + condition: (identifier) @left + consequence: (member_expression + object: (identifier) @right) + alternative: (null)) + (#eq? @left @right)) + "#, + ) + .unwrap(); + + // The outer expression does not match the pattern, but the consequence of the ternary + // is an object that *does* contain many occurences of the pattern. + let count = 50; + let mut source = "a ? {".to_owned(); + for i in 0..count { + writeln!(&mut source, " x: y{} ? y{}.z : null,", i, i).unwrap(); + } + source.push_str("} : null;\n"); + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + let mut cursor = QueryCursor::new(); + + let captures = cursor.captures(&query, tree.root_node(), to_callback(&source)); + let captures = collect_captures(captures, &query, &source); + + assert_eq!( + &captures[0..20], + &[ + ("left", "y0"), + ("right", "y0"), + ("left", "y1"), + ("right", "y1"), + ("left", "y2"), + ("right", "y2"), + ("left", "y3"), + ("right", "y3"), + ("left", "y4"), + ("right", "y4"), + ("left", "y5"), + ("right", "y5"), + ("left", "y6"), + ("right", "y6"), + ("left", "y7"), + ("right", "y7"), + ("left", "y8"), + ("right", "y8"), + ("left", "y9"), + ("right", "y9"), + ] + ); + + // Ensure that we don't drop matches because of needing to buffer too many. + assert_eq!(captures.len(), 2 * count); + }); +} + +#[test] +fn test_query_captures_with_too_many_nested_results() { + allocations::record(|| { + let language = get_language("javascript"); + + // Search for method calls in general, and also method calls with a template string + // in place of an argument list (aka "tagged template strings") in particular. + // + // This second pattern, which looks for the tagged template strings, is expensive to + // use with the `captures()` method, because: + // 1. When calling `captures`, all of the captures must be returned in order of their + // appearance. + // 2. This pattern captures the root `call_expression`. + // 3. This pattern's result also depends on the final child (the template string). + // 4. In between the `call_expression` and the possible `template_string`, there can + // be an arbitrarily deep subtree. + // + // This means that, if any patterns match *after* the initial `call_expression` is + // captured, but before the final `template_string` is found, those matches must + // be buffered, in order to prevent captures from being returned out-of-order. + let query = Query::new( + language, + r#" + ;; easy šŸ‘‡ + (call_expression + function: (member_expression + property: (property_identifier) @method-name)) + + ;; hard šŸ‘‡ + (call_expression + function: (member_expression + property: (property_identifier) @template-tag) + arguments: (template_string)) @template-call + "#, + ) + .unwrap(); + + // There are a *lot* of matches in between the beginning of the outer `call_expression` + // (the call to `a(...).f`), which starts at the beginning of the file, and the final + // template string, which occurs at the end of the file. The query algorithm imposes a + // limit on the total number of matches which can be buffered at a time. But we don't + // want to neglect the inner matches just because of the expensive outer match, so we + // abandon the outer match (which would have captured `f` as a `template-tag`). + let source = " + a(b => { + b.c0().d0 `šŸ˜„`; + b.c1().d1 `šŸ˜„`; + b.c2().d2 `šŸ˜„`; + b.c3().d3 `šŸ˜„`; + b.c4().d4 `šŸ˜„`; + b.c5().d5 `šŸ˜„`; + b.c6().d6 `šŸ˜„`; + b.c7().d7 `šŸ˜„`; + b.c8().d8 `šŸ˜„`; + b.c9().d9 `šŸ˜„`; + }).e().f ``; + " + .trim(); + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + let mut cursor = QueryCursor::new(); + let captures = cursor.captures(&query, tree.root_node(), to_callback(&source)); + let captures = collect_captures(captures, &query, &source); + + assert_eq!( + &captures[0..4], + &[ + ("template-call", "b.c0().d0 `šŸ˜„`"), + ("method-name", "c0"), + ("method-name", "d0"), + ("template-tag", "d0"), + ] + ); + assert_eq!( + &captures[36..40], + &[ + ("template-call", "b.c9().d9 `šŸ˜„`"), + ("method-name", "c9"), + ("method-name", "d9"), + ("template-tag", "d9"), + ] + ); + assert_eq!( + &captures[40..], + &[("method-name", "e"), ("method-name", "f"),] + ); + }); +} + +#[test] +fn test_query_captures_with_definite_pattern_containing_many_nested_matches() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (array + "[" @l-bracket + "]" @r-bracket) + + "." @dot + "#, + ) + .unwrap(); + + // The '[' node must be returned before all of the '.' nodes, + // even though its pattern does not finish until the ']' node + // at the end of the document. But because the '[' is definite, + // it can be returned before the pattern finishes matching. + let source = " + [ + a.b.c.d.e.f.g.h.i, + a.b.c.d.e.f.g.h.i, + a.b.c.d.e.f.g.h.i, + a.b.c.d.e.f.g.h.i, + a.b.c.d.e.f.g.h.i, + ] + "; + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + let mut cursor = QueryCursor::new(); + + let captures = cursor.captures(&query, tree.root_node(), to_callback(source)); + assert_eq!( + collect_captures(captures, &query, source), + [("l-bracket", "[")] + .iter() + .chain([("dot", "."); 40].iter()) + .chain([("r-bracket", "]")].iter()) + .cloned() + .collect::>(), + ); + }); +} + +#[test] +fn test_query_captures_ordered_by_both_start_and_end_positions() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (call_expression) @call + (member_expression) @member + (identifier) @variable + "#, + ) + .unwrap(); + + let source = " + a.b(c.d().e).f; + "; + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + let mut cursor = QueryCursor::new(); + + let captures = cursor.captures(&query, tree.root_node(), to_callback(source)); + assert_eq!( + collect_captures(captures, &query, source), + &[ + ("member", "a.b(c.d().e).f"), + ("call", "a.b(c.d().e)"), + ("member", "a.b"), + ("variable", "a"), + ("member", "c.d().e"), + ("call", "c.d()"), + ("member", "c.d"), + ("variable", "c"), + ], + ); + }); +} + +#[test] +fn test_query_captures_with_matches_removed() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (binary_expression + left: (identifier) @left + operator: _ @op + right: (identifier) @right) + "#, + ) + .unwrap(); + + let source = " + a === b && c > d && e < f; + "; + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + let mut cursor = QueryCursor::new(); + + let mut captured_strings = Vec::new(); + for (m, i) in cursor.captures(&query, tree.root_node(), to_callback(source)) { + let capture = m.captures[i]; + let text = capture.node.utf8_text(source.as_bytes()).unwrap(); + if text == "a" { + m.remove(); + continue; + } + captured_strings.push(text); + } + + assert_eq!(captured_strings, &["c", ">", "d", "e", "<", "f",]); + }); +} + +#[test] +fn test_query_captures_and_matches_iterators_are_fused() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (comment) @comment + "#, + ) + .unwrap(); + + let source = " + // one + // two + // three + /* unfinished + "; + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + let mut cursor = QueryCursor::new(); + let mut captures = cursor.captures(&query, tree.root_node(), to_callback(source)); + + assert_eq!(captures.next().unwrap().0.captures[0].index, 0); + assert_eq!(captures.next().unwrap().0.captures[0].index, 0); + assert_eq!(captures.next().unwrap().0.captures[0].index, 0); + assert!(captures.next().is_none()); + assert!(captures.next().is_none()); + assert!(captures.next().is_none()); + drop(captures); + + let mut matches = cursor.matches(&query, tree.root_node(), to_callback(source)); + assert_eq!(matches.next().unwrap().captures[0].index, 0); + assert_eq!(matches.next().unwrap().captures[0].index, 0); + assert_eq!(matches.next().unwrap().captures[0].index, 0); + assert!(matches.next().is_none()); + assert!(matches.next().is_none()); + assert!(matches.next().is_none()); + }); +} + +#[test] +fn test_query_start_byte_for_pattern() { + let language = get_language("javascript"); + + let patterns_1 = r#" + "+" @operator + "-" @operator + "*" @operator + "=" @operator + "=>" @operator + "# + .trim_start(); + + let patterns_2 = " + (identifier) @a + (string) @b + " + .trim_start(); + + let patterns_3 = " + ((identifier) @b (#match? @b i)) + (function_declaration name: (identifier) @c) + (method_definition name: (property_identifier) @d) + " + .trim_start(); + + let mut source = String::new(); + source += patterns_1; + source += patterns_2; + source += patterns_3; + + let query = Query::new(language, &source).unwrap(); + + assert_eq!(query.start_byte_for_pattern(0), 0); + assert_eq!(query.start_byte_for_pattern(5), patterns_1.len()); + assert_eq!( + query.start_byte_for_pattern(7), + patterns_1.len() + patterns_2.len() + ); +} + +#[test] +fn test_query_capture_names() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (if_statement + condition: (parenthesized_expression (binary_expression + left: _ @left-operand + operator: "||" + right: _ @right-operand)) + consequence: (statement_block) @body) + + (while_statement + condition: _ @loop-condition) + "#, + ) + .unwrap(); + + assert_eq!( + query.capture_names(), + &[ + "left-operand".to_string(), + "right-operand".to_string(), + "body".to_string(), + "loop-condition".to_string(), + ] + ); + }); +} + +#[test] +fn test_query_with_no_patterns() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new(language, "").unwrap(); + assert!(query.capture_names().is_empty()); + assert_eq!(query.pattern_count(), 0); + }); +} + +#[test] +fn test_query_comments() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + " + ; this is my first comment + ; i have two comments here + (function_declaration + ; there is also a comment here + ; and here + name: (identifier) @fn-name)", + ) + .unwrap(); + + let source = "function one() { }"; + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + let mut cursor = QueryCursor::new(); + let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); + assert_eq!( + collect_matches(matches, &query, source), + &[(0, vec![("fn-name", "one")]),], + ); + }); +} + +#[test] +fn test_query_disable_pattern() { + allocations::record(|| { + let language = get_language("javascript"); + let mut query = Query::new( + language, + " + (function_declaration + name: (identifier) @name) + (function_declaration + body: (statement_block) @body) + (class_declaration + name: (identifier) @name) + (class_declaration + body: (class_body) @body) + ", + ) + .unwrap(); + + // disable the patterns that match names + query.disable_pattern(0); + query.disable_pattern(2); + + let source = "class A { constructor() {} } function b() { return 1; }"; + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + let mut cursor = QueryCursor::new(); + let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); + assert_eq!( + collect_matches(matches, &query, source), + &[ + (3, vec![("body", "{ constructor() {} }")]), + (1, vec![("body", "{ return 1; }")]), + ], + ); + }); +} + +#[test] +fn test_query_alternative_predicate_prefix() { + allocations::record(|| { + let language = get_language("c"); + let query = Query::new( + language, + r#" + ((call_expression + function: (identifier) @keyword + arguments: (argument_list + (string_literal) @function)) + (.eq? @keyword "DEFUN")) + "#, + ) + .unwrap(); + let source = r#" + DEFUN ("identity", Fidentity, Sidentity, 1, 1, 0, + doc: /* Return the argument unchanged. */ + attributes: const) + (Lisp_Object arg) + { + return arg; + } + "#; + assert_query_matches( + language, + &query, + source, + &[(0, vec![("keyword", "DEFUN"), ("function", "\"identity\"")])], + ); + }); +} + +#[test] +fn test_query_step_is_definite() { + struct Row { + language: Language, + description: &'static str, + pattern: &'static str, + results_by_substring: &'static [(&'static str, bool)], + } + + let rows = &[ + Row { + description: "no definite steps", + language: get_language("python"), + pattern: r#"(expression_statement (string))"#, + results_by_substring: &[("expression_statement", false), ("string", false)], + }, + Row { + description: "all definite steps", + language: get_language("javascript"), + pattern: r#"(object "{" "}")"#, + results_by_substring: &[("object", false), ("{", true), ("}", true)], + }, + Row { + description: "an indefinite step that is optional", + language: get_language("javascript"), + pattern: r#"(object "{" (identifier)? @foo "}")"#, + results_by_substring: &[ + ("object", false), + ("{", true), + ("(identifier)?", false), + ("}", true), + ], + }, + Row { + description: "multiple indefinite steps that are optional", + language: get_language("javascript"), + pattern: r#"(object "{" (identifier)? @id1 ("," (identifier) @id2)? "}")"#, + results_by_substring: &[ + ("object", false), + ("{", true), + ("(identifier)? @id1", false), + ("\",\"", false), + ("}", true), + ], + }, + Row { + description: "definite step after indefinite step", + language: get_language("javascript"), + pattern: r#"(pair (property_identifier) ":")"#, + results_by_substring: &[("pair", false), ("property_identifier", false), (":", true)], + }, + Row { + description: "indefinite step in between two definite steps", + language: get_language("javascript"), + pattern: r#"(ternary_expression + condition: (_) + "?" + consequence: (call_expression) + ":" + alternative: (_))"#, + results_by_substring: &[ + ("condition:", false), + ("\"?\"", false), + ("consequence:", false), + ("\":\"", true), + ("alternative:", true), + ], + }, + Row { + description: "one definite step after a repetition", + language: get_language("javascript"), + pattern: r#"(object "{" (_) "}")"#, + results_by_substring: &[("object", false), ("{", false), ("(_)", false), ("}", true)], + }, + Row { + description: "definite steps after multiple repetitions", + language: get_language("json"), + pattern: r#"(object "{" (pair) "," (pair) "," (_) "}")"#, + results_by_substring: &[ + ("object", false), + ("{", false), + ("(pair) \",\" (pair)", false), + ("(pair) \",\" (_)", false), + ("\",\" (_)", false), + ("(_)", true), + ("}", true), + ], + }, + Row { + description: "a definite with a field", + language: get_language("javascript"), + pattern: r#"(binary_expression left: (identifier) right: (_))"#, + results_by_substring: &[ + ("binary_expression", false), + ("(identifier)", false), + ("(_)", true), + ], + }, + Row { + description: "multiple definite steps with fields", + language: get_language("javascript"), + pattern: r#"(function_declaration name: (identifier) body: (statement_block))"#, + results_by_substring: &[ + ("function_declaration", false), + ("identifier", true), + ("statement_block", true), + ], + }, + Row { + description: "nesting, one definite step", + language: get_language("javascript"), + pattern: r#" + (function_declaration + name: (identifier) + body: (statement_block "{" (expression_statement) "}"))"#, + results_by_substring: &[ + ("function_declaration", false), + ("identifier", false), + ("statement_block", false), + ("{", false), + ("expression_statement", false), + ("}", true), + ], + }, + Row { + description: "definite step after some deeply nested hidden nodes", + language: get_language("ruby"), + pattern: r#" + (singleton_class + value: (constant) + "end") + "#, + results_by_substring: &[ + ("singleton_class", false), + ("constant", false), + ("end", true), + ], + }, + Row { + description: "nesting, no definite steps", + language: get_language("javascript"), + pattern: r#" + (call_expression + function: (member_expression + property: (property_identifier) @template-tag) + arguments: (template_string)) @template-call + "#, + results_by_substring: &[("property_identifier", false), ("template_string", false)], + }, + Row { + description: "a definite step after a nested node", + language: get_language("javascript"), + pattern: r#" + (subscript_expression + object: (member_expression + object: (identifier) @obj + property: (property_identifier) @prop) + "[") + "#, + results_by_substring: &[ + ("identifier", false), + ("property_identifier", true), + ("[", true), + ], + }, + Row { + description: "a step that is indefinite due to a predicate", + language: get_language("javascript"), + pattern: r#" + (subscript_expression + object: (member_expression + object: (identifier) @obj + property: (property_identifier) @prop) + "[" + (#match? @prop "foo")) + "#, + results_by_substring: &[ + ("identifier", false), + ("property_identifier", false), + ("[", true), + ], + }, + Row { + description: "alternation where one branch has definite steps", + language: get_language("javascript"), + pattern: r#" + [ + (unary_expression (identifier)) + (call_expression + function: (_) + arguments: (_)) + (binary_expression right:(call_expression)) + ] + "#, + results_by_substring: &[ + ("identifier", false), + ("right:", false), + ("function:", true), + ("arguments:", true), + ], + }, + Row { + description: "aliased parent node", + language: get_language("ruby"), + pattern: r#" + (method_parameters "(" (identifier) @id")") + "#, + results_by_substring: &[("\"(\"", false), ("(identifier)", false), ("\")\"", true)], + }, + Row { + description: "long, but not too long to analyze", + language: get_language("javascript"), + pattern: r#" + (object "{" (pair) (pair) (pair) (pair) "}") + "#, + results_by_substring: &[ + ("\"{\"", false), + ("(pair)", false), + ("(pair) \"}\"", false), + ("\"}\"", true), + ], + }, + Row { + description: "too long to analyze", + language: get_language("javascript"), + pattern: r#" + (object "{" (pair) (pair) (pair) (pair) (pair) (pair) (pair) (pair) (pair) (pair) (pair) (pair) "}") + "#, + results_by_substring: &[ + ("\"{\"", false), + ("(pair)", false), + ("(pair) \"}\"", false), + ("\"}\"", false), + ], + }, + Row { + description: "hidden nodes that have several fields", + language: get_language("java"), + pattern: r#" + (method_declaration name: (identifier)) + "#, + results_by_substring: &[("name:", true)], + }, + ]; + + allocations::record(|| { + eprintln!(""); + + for row in rows.iter() { + if let Some(filter) = EXAMPLE_FILTER.as_ref() { + if !row.description.contains(filter.as_str()) { + continue; + } + } + eprintln!(" query example: {:?}", row.description); + let query = Query::new(row.language, row.pattern).unwrap(); + for (substring, is_definite) in row.results_by_substring { + let offset = row.pattern.find(substring).unwrap(); + assert_eq!( + query.step_is_definite(offset), + *is_definite, + "Description: {}, Pattern: {:?}, substring: {:?}, expected is_definite to be {}", + row.description, + row.pattern + .split_ascii_whitespace() + .collect::>() + .join(" "), + substring, + is_definite, + ) + } + } + }); +} + +fn assert_query_matches( + language: Language, + query: &Query, + source: &str, + expected: &[(usize, Vec<(&str, &str)>)], +) { + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + let mut cursor = QueryCursor::new(); + let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); + assert_eq!(collect_matches(matches, &query, source), expected); +} + +fn collect_matches<'a>( + matches: impl Iterator>, + query: &'a Query, + source: &'a str, +) -> Vec<(usize, Vec<(&'a str, &'a str)>)> { + matches + .map(|m| { + ( + m.pattern_index, + format_captures(m.captures.iter().cloned(), query, source), + ) + }) + .collect() +} + +fn collect_captures<'a>( + captures: impl Iterator, usize)>, + query: &'a Query, + source: &'a str, +) -> Vec<(&'a str, &'a str)> { + format_captures(captures.map(|(m, i)| m.captures[i]), query, source) +} + +fn format_captures<'a>( + captures: impl Iterator>, + query: &'a Query, + source: &'a str, +) -> Vec<(&'a str, &'a str)> { + captures + .map(|capture| { + ( + query.capture_names()[capture.index as usize].as_str(), + capture.node.utf8_text(source.as_bytes()).unwrap(), + ) + }) + .collect() +} + +fn to_callback<'a>(source: &'a str) -> impl Fn(Node) -> &'a [u8] { + move |n| &source.as_bytes()[n.byte_range()] +} diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs new file mode 100644 index 00000000..2b058c0b --- /dev/null +++ b/cli/src/tests/tags_test.rs @@ -0,0 +1,437 @@ +use super::helpers::allocations; +use super::helpers::fixtures::{get_language, get_language_queries_path}; +use std::ffi::CStr; +use std::ffi::CString; +use std::{fs, ptr, slice, str}; +use tree_sitter::Point; +use tree_sitter_tags::c_lib as c; +use tree_sitter_tags::{Error, TagsConfiguration, TagsContext}; + +const PYTHON_TAG_QUERY: &'static str = r#" +( + (function_definition + name: (identifier) @name + body: (block . (expression_statement (string) @doc))) @definition.function + (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)") +) + +(function_definition + name: (identifier) @name) @definition.function + +( + (class_definition + name: (identifier) @name + body: (block + . (expression_statement (string) @doc))) @definition.class + (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)") +) + +(class_definition + name: (identifier) @name) @definition.class + +(call + function: (identifier) @name) @reference.call + +(call + function: (attribute + attribute: (identifier) @name)) @reference.call +"#; + +const JS_TAG_QUERY: &'static str = r#" +( + (comment)* @doc . + (class_declaration + name: (identifier) @name) @definition.class + (#select-adjacent! @doc @definition.class) + (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)") +) + +( + (comment)* @doc . + (method_definition + name: (property_identifier) @name) @definition.method + (#select-adjacent! @doc @definition.method) + (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)") +) + +( + (comment)* @doc . + (function_declaration + name: (identifier) @name) @definition.function + (#select-adjacent! @doc @definition.function) + (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)") +) + +(call_expression + function: (identifier) @name) @reference.call +"#; + +const RUBY_TAG_QUERY: &'static str = r#" +(method + name: (_) @name) @definition.method + +(method_call + method: (identifier) @name) @reference.call + +(setter (identifier) @ignore) + +((identifier) @name @reference.call + (#is-not? local)) +"#; + +#[test] +fn test_tags_python() { + let language = get_language("python"); + let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap(); + let mut tag_context = TagsContext::new(); + + let source = br#" + class Customer: + """ + Data about a customer + """ + + def age(self): + ''' + Get the customer's age + ''' + compute_age(self.id) + } + "#; + + let tags = tag_context + .generate_tags(&tags_config, source, None) + .unwrap() + .0 + .collect::, _>>() + .unwrap(); + + assert_eq!( + tags.iter() + .map(|t| ( + substr(source, &t.name_range), + tags_config.syntax_type_name(t.syntax_type_id) + )) + .collect::>(), + &[ + ("Customer", "class"), + ("age", "function"), + ("compute_age", "call"), + ] + ); + + assert_eq!(substr(source, &tags[0].line_range), "class Customer:"); + assert_eq!(substr(source, &tags[1].line_range), "def age(self):"); + assert_eq!(tags[0].docs.as_ref().unwrap(), "Data about a customer"); + assert_eq!(tags[1].docs.as_ref().unwrap(), "Get the customer's age"); +} + +#[test] +fn test_tags_javascript() { + let language = get_language("javascript"); + let tags_config = TagsConfiguration::new(language, JS_TAG_QUERY, "").unwrap(); + let source = br#" + // hi + + // Data about a customer. + // bla bla bla + class Customer { + /* + * Get the customer's age + */ + getAge() { + } + } + + // ok + + class Agent { + + } + "#; + + let mut tag_context = TagsContext::new(); + let tags = tag_context + .generate_tags(&tags_config, source, None) + .unwrap() + .0 + .collect::, _>>() + .unwrap(); + + assert_eq!( + tags.iter() + .map(|t| ( + substr(source, &t.name_range), + t.span.clone(), + tags_config.syntax_type_name(t.syntax_type_id) + )) + .collect::>(), + &[ + ("Customer", Point::new(5, 10)..Point::new(5, 18), "class",), + ("getAge", Point::new(9, 8)..Point::new(9, 14), "method",), + ("Agent", Point::new(15, 10)..Point::new(15, 15), "class",) + ] + ); + assert_eq!( + tags[0].docs.as_ref().unwrap(), + "Data about a customer.\nbla bla bla" + ); + assert_eq!(tags[1].docs.as_ref().unwrap(), "Get the customer's age"); + assert_eq!(tags[2].docs, None); +} + +#[test] +fn test_tags_columns_measured_in_utf16_code_units() { + let language = get_language("python"); + let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap(); + let mut tag_context = TagsContext::new(); + + let source = r#""ā¤ļøā¤ļøā¤ļø".hello_α_ω()"#.as_bytes(); + + let tag = tag_context + .generate_tags(&tags_config, source, None) + .unwrap() + .0 + .next() + .unwrap() + .unwrap(); + + assert_eq!(substr(source, &tag.name_range), "hello_α_ω"); + assert_eq!(tag.span, Point::new(0, 21)..Point::new(0, 32)); + assert_eq!(tag.utf16_column_range, 9..18); +} + +#[test] +fn test_tags_ruby() { + let language = get_language("ruby"); + let locals_query = + fs::read_to_string(get_language_queries_path("ruby").join("locals.scm")).unwrap(); + let tags_config = TagsConfiguration::new(language, RUBY_TAG_QUERY, &locals_query).unwrap(); + let source = strip_whitespace( + 8, + " + b = 1 + + def foo=() + c = 1 + + # a is a method because it is not in scope + # b is a method because `b` doesn't capture variables from its containing scope + bar a, b, c + + [1, 2, 3].each do |a| + # a is a parameter + # b is a method + # c is a variable, because the block captures variables from its containing scope. + baz a, b, c + end + end", + ); + + let mut tag_context = TagsContext::new(); + let tags = tag_context + .generate_tags(&tags_config, source.as_bytes(), None) + .unwrap() + .0 + .collect::, _>>() + .unwrap(); + + assert_eq!( + tags.iter() + .map(|t| ( + substr(source.as_bytes(), &t.name_range), + tags_config.syntax_type_name(t.syntax_type_id), + (t.span.start.row, t.span.start.column), + )) + .collect::>(), + &[ + ("foo=", "method", (2, 4)), + ("bar", "call", (7, 4)), + ("a", "call", (7, 8)), + ("b", "call", (7, 11)), + ("each", "call", (9, 14)), + ("baz", "call", (13, 8)), + ("b", "call", (13, 15),), + ] + ); +} + +#[test] +fn test_tags_cancellation() { + use std::sync::atomic::{AtomicUsize, Ordering}; + + allocations::record(|| { + // Large javascript document + let source = (0..500) + .map(|_| "/* hi */ class A { /* ok */ b() {} }\n") + .collect::(); + + let cancellation_flag = AtomicUsize::new(0); + let language = get_language("javascript"); + let tags_config = TagsConfiguration::new(language, JS_TAG_QUERY, "").unwrap(); + + let mut tag_context = TagsContext::new(); + let tags = tag_context + .generate_tags(&tags_config, source.as_bytes(), Some(&cancellation_flag)) + .unwrap(); + + for (i, tag) in tags.0.enumerate() { + if i == 150 { + cancellation_flag.store(1, Ordering::SeqCst); + } + if let Err(e) = tag { + assert_eq!(e, Error::Cancelled); + return; + } + } + + panic!("Expected to halt tagging with an error"); + }); +} + +#[test] +fn test_invalid_capture() { + let language = get_language("python"); + let e = TagsConfiguration::new(language, "(identifier) @method", "") + .expect_err("expected InvalidCapture error"); + assert_eq!(e, Error::InvalidCapture("method".to_string())); +} + +#[test] +fn test_tags_with_parse_error() { + let language = get_language("python"); + let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap(); + let mut tag_context = TagsContext::new(); + + let source = br#" + class Fine: pass + class Bad + "#; + + let (tags, failed) = tag_context + .generate_tags(&tags_config, source, None) + .unwrap(); + + let newtags = tags.collect::, _>>().unwrap(); + + assert!(failed, "syntax error should have been detected"); + + assert_eq!( + newtags.iter() + .map(|t| ( + substr(source, &t.name_range), + tags_config.syntax_type_name(t.syntax_type_id) + )) + .collect::>(), + &[ + ("Fine", "class"), + ] + ); +} + + +#[test] +fn test_tags_via_c_api() { + allocations::record(|| { + let tagger = c::ts_tagger_new(); + let buffer = c::ts_tags_buffer_new(); + let scope_name = "source.js"; + let language = get_language("javascript"); + + let source_code = strip_whitespace( + 12, + " + var a = 1; + + // one + // two + // three + function b() { + } + + // four + // five + class C extends D { + + } + + b(a);", + ); + + let c_scope_name = CString::new(scope_name).unwrap(); + let result = c::ts_tagger_add_language( + tagger, + c_scope_name.as_ptr(), + language, + JS_TAG_QUERY.as_ptr(), + ptr::null(), + JS_TAG_QUERY.len() as u32, + 0, + ); + assert_eq!(result, c::TSTagsError::Ok); + + let result = c::ts_tagger_tag( + tagger, + c_scope_name.as_ptr(), + source_code.as_ptr(), + source_code.len() as u32, + buffer, + ptr::null(), + ); + assert_eq!(result, c::TSTagsError::Ok); + let tags = unsafe { + slice::from_raw_parts( + c::ts_tags_buffer_tags(buffer), + c::ts_tags_buffer_tags_len(buffer) as usize, + ) + }; + let docs = str::from_utf8(unsafe { + slice::from_raw_parts( + c::ts_tags_buffer_docs(buffer) as *const u8, + c::ts_tags_buffer_docs_len(buffer) as usize, + ) + }) + .unwrap(); + + let syntax_types: Vec<&str> = unsafe { + let mut len: u32 = 0; + let ptr = + c::ts_tagger_syntax_kinds_for_scope_name(tagger, c_scope_name.as_ptr(), &mut len); + slice::from_raw_parts(ptr, len as usize) + .iter() + .map(|i| CStr::from_ptr(*i).to_str().unwrap()) + .collect() + }; + + assert_eq!( + tags.iter() + .map(|tag| ( + syntax_types[tag.syntax_type_id as usize], + &source_code[tag.name_start_byte as usize..tag.name_end_byte as usize], + &source_code[tag.line_start_byte as usize..tag.line_end_byte as usize], + &docs[tag.docs_start_byte as usize..tag.docs_end_byte as usize], + )) + .collect::>(), + &[ + ("function", "b", "function b() {", "one\ntwo\nthree"), + ("class", "C", "class C extends D {", "four\nfive"), + ("call", "b", "b(a);", "") + ] + ); + + c::ts_tags_buffer_delete(buffer); + c::ts_tagger_delete(tagger); + }); +} + +fn substr<'a>(source: &'a [u8], range: &std::ops::Range) -> &'a str { + std::str::from_utf8(&source[range.clone()]).unwrap() +} + +fn strip_whitespace(indent: usize, s: &str) -> String { + s.lines() + .skip(1) + .map(|line| &line[line.len().min(indent)..]) + .collect::>() + .join("\n") +} diff --git a/cli/src/tests/test_highlight_test.rs b/cli/src/tests/test_highlight_test.rs new file mode 100644 index 00000000..1a658281 --- /dev/null +++ b/cli/src/tests/test_highlight_test.rs @@ -0,0 +1,64 @@ +use super::helpers::fixtures::{get_highlight_config, get_language, test_loader}; +use crate::query_testing::{parse_position_comments, Assertion}; +use crate::test_highlight::get_highlight_positions; +use tree_sitter::{Parser, Point}; +use tree_sitter_highlight::{Highlight, Highlighter}; + +#[test] +fn test_highlight_test_with_basic_test() { + let language = get_language("javascript"); + let config = get_highlight_config( + "javascript", + Some("injections.scm"), + &[ + "function".to_string(), + "variable.parameter".to_string(), + "keyword".to_string(), + ], + ); + let source = [ + "var abc = function(d) {", + " // ^ function", + " // ^ keyword", + " return d + e;", + " // ^ variable.parameter", + "};", + ] + .join("\n"); + + let assertions = + parse_position_comments(&mut Parser::new(), language, source.as_bytes()).unwrap(); + assert_eq!( + assertions, + &[ + Assertion { + position: Point::new(0, 5), + expected_capture_name: "function".to_string() + }, + Assertion { + position: Point::new(0, 11), + expected_capture_name: "keyword".to_string() + }, + Assertion { + position: Point::new(3, 9), + expected_capture_name: "variable.parameter".to_string() + }, + ] + ); + + let mut highlighter = Highlighter::new(); + let highlight_positions = + get_highlight_positions(test_loader(), &mut highlighter, &config, source.as_bytes()) + .unwrap(); + assert_eq!( + highlight_positions, + &[ + (Point::new(0, 0), Point::new(0, 3), Highlight(2)), // "var" + (Point::new(0, 4), Point::new(0, 7), Highlight(0)), // "abc" + (Point::new(0, 10), Point::new(0, 18), Highlight(2)), // "function" + (Point::new(0, 19), Point::new(0, 20), Highlight(1)), // "d" + (Point::new(3, 2), Point::new(3, 8), Highlight(2)), // "return" + (Point::new(3, 9), Point::new(3, 10), Highlight(1)), // "d" + ] + ); +} diff --git a/cli/src/util.rs b/cli/src/util.rs index e880bea1..acafa662 100644 --- a/cli/src/util.rs +++ b/cli/src/util.rs @@ -1,12 +1,32 @@ +use super::error::{Error, Result}; +use std::io; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; +use std::thread; +use tree_sitter::Parser; + #[cfg(unix)] use std::path::PathBuf; #[cfg(unix)] use std::process::{Child, ChildStdin, Command, Stdio}; -use tree_sitter::Parser; #[cfg(unix)] const HTML_HEADER: &[u8] = b"\n\n\n"; +pub fn cancel_on_stdin() -> Arc { + let result = Arc::new(AtomicUsize::new(0)); + if atty::is(atty::Stream::Stdin) { + thread::spawn({ + let flag = result.clone(); + move || { + let mut line = String::new(); + io::stdin().read_line(&mut line).unwrap(); + flag.store(1, Ordering::Relaxed); + } + }); + } + result +} #[cfg(windows)] pub struct LogSession(); @@ -14,12 +34,12 @@ pub struct LogSession(); pub struct LogSession(PathBuf, Option, Option); #[cfg(windows)] -pub fn log_graphs(_parser: &mut Parser, _path: &str) -> std::io::Result { +pub fn log_graphs(_parser: &mut Parser, _path: &str) -> Result { Ok(LogSession()) } #[cfg(unix)] -pub fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result { +pub fn log_graphs(parser: &mut Parser, path: &str) -> Result { use std::io::Write; let mut dot_file = std::fs::File::create(path)?; @@ -29,11 +49,13 @@ pub fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result Resu } // Run `emcc` in a container using the `emscripten-slim` image - command.args(&["trzeci/emscripten-slim", "emcc"]); + command.args(&["emscripten/emsdk", "emcc"]); } else { - return Error::err("You must have either emcc or docker on your PATH to run this command".to_string()); + return Error::err( + "You must have either emcc or docker on your PATH to run this command".to_string(), + ); } command.args(&[ @@ -81,31 +83,22 @@ pub fn compile_language_to_wasm(language_dir: &Path, force_docker: bool) -> Resu "src", ]); - // Find source files to pass to emscripten - let src_entries = fs::read_dir(&src_dir).map_err(Error::wrap(|| { - format!("Failed to read source directory {:?}", src_dir) - }))?; + let src = Path::new("src"); + let parser_c_path = src.join("parser.c"); + let scanner_c_path = src.join("scanner.c"); + let scanner_cc_path = src.join("scanner.cc"); + let scanner_cpp_path = src.join("scanner.cpp"); - for entry in src_entries { - let entry = entry?; - let file_name = entry.file_name(); - - // Do not compile the node.js binding file. - if file_name - .to_str() - .map_or(false, |s| s.starts_with("binding")) - { - continue; - } - - // Compile any .c, .cc, or .cpp files - if let Some(extension) = Path::new(&file_name).extension().and_then(|s| s.to_str()) { - if extension == "c" || extension == "cc" || extension == "cpp" { - command.arg(Path::new("src").join(entry.file_name())); - } - } + if language_dir.join(&scanner_cc_path).exists() { + command.arg("-xc++").arg(&scanner_cc_path); + } else if language_dir.join(&scanner_cpp_path).exists() { + command.arg("-xc++").arg(&scanner_cpp_path); + } else if language_dir.join(&scanner_c_path).exists() { + command.arg(&scanner_c_path); } + command.arg(&parser_c_path); + let output = command .output() .map_err(Error::wrap(|| "Failed to run emcc command"))?; diff --git a/cli/src/web_ui.html b/cli/src/web_ui.html index 2422a3d8..45dd5db0 100644 --- a/cli/src/web_ui.html +++ b/cli/src/web_ui.html @@ -1,4 +1,5 @@ + tree-sitter THE_LANGUAGE_NAME @@ -7,7 +8,7 @@ -
+