diff --git a/.appveyor.yml b/.appveyor.yml deleted file mode 100644 index d463b7a2..00000000 --- a/.appveyor.yml +++ /dev/null @@ -1,50 +0,0 @@ -build: false -install: - # Terminate early unless building either a tag or a PR. - - if "%APPVEYOR_REPO_TAG%" == "false" if not "%APPVEYOR_REPO_BRANCH%" == "master" appveyor exit - - # Install rust - - appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe - - IF "%PLATFORM%" == "x86" rustup-init -y --default-toolchain stable --default-host i686-pc-windows-msvc - - IF "%PLATFORM%" == "x64" rustup-init -y --default-toolchain stable --default-host x86_64-pc-windows-msvc - - set PATH=%PATH%;C:\Users\appveyor\.cargo\bin - - rustc -vV - - cargo -vV - - # Install dependencies - - git submodule update --init - -platform: - - x64 - - x86 - -test_script: - # Fetch and regenerate the fixture parsers - - script\fetch-fixtures.cmd - - cargo build --release - - script\generate-fixtures.cmd - - # Run tests - - script\test.cmd - - script\benchmark.cmd - -before_deploy: - - move target\release\tree-sitter.exe tree-sitter.exe - - 7z a -tgzip tree-sitter-windows-%PLATFORM%.gz tree-sitter.exe - - appveyor PushArtifact tree-sitter-windows-%PLATFORM%.gz - -deploy: - description: '' - provider: GitHub - auth_token: - secure: VC9ntV5+inKoNteZyLQksKzWMKXF46P+Jx3JHKVSfF+o1rWtZn2iIHAVsQv5LaUi - artifact: /tree-sitter-windows-.*/ - draft: true - force_update: true - on: - APPVEYOR_REPO_TAG: true - -cache: - - target - - test\fixtures\grammars - - C:\Users\appveyor\.cargo diff --git a/.gitattributes b/.gitattributes index 4fcce330..44bf45c7 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,2 +1,5 @@ /lib/src/unicode/*.h linguist-vendored /lib/src/unicode/LICENSE linguist-vendored + +/cli/src/generate/prepare_grammar/*.json -diff +Cargo.lock -diff diff --git a/.github/scripts/cross.sh b/.github/scripts/cross.sh new file mode 100755 index 00000000..a52f0873 --- /dev/null +++ b/.github/scripts/cross.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# set -x +set -e + +if [ "$BUILD_CMD" != "cross" ]; then + echo "cross.sh - is a helper to assist only in cross compiling environments" >&2 + echo "To use this tool set the BUILD_CMD env var to the \"cross\" value" >&2 + exit 111 +fi + +if [ -z "$CROSS_IMAGE" ]; then + echo "The CROSS_IMAGE env var should be provided" >&2 + exit 111 +fi + +docker run --rm -v /home/runner:/home/runner -w "$PWD" "$CROSS_IMAGE" "$@" diff --git a/.github/scripts/make.sh b/.github/scripts/make.sh new file mode 100755 index 00000000..79192541 --- /dev/null +++ b/.github/scripts/make.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +# set -x +set -e + +if [ "$BUILD_CMD" == "cross" ]; then + if [ -z "$CC" ]; then + echo "make.sh: CC is not set" >&2 + exit 111 + fi + if [ -z "$AR" ]; then + echo "make.sh: AR is not set" >&2 + exit 111 + fi + + cross.sh make CC=$CC AR=$AR "$@" +else + make "$@" +fi diff --git a/.github/scripts/tree-sitter.sh b/.github/scripts/tree-sitter.sh new file mode 100755 index 00000000..0cac9153 --- /dev/null +++ b/.github/scripts/tree-sitter.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +# set -x +set -e + +if [ -z "$ROOT" ]; then + echo "The ROOT env var should be set to absolute path of a repo root folder" >&2 + exit 111 +fi + +if [ -z "$TARGET" ]; then + echo "The TARGET env var should be equal to a \`cargo build --target \` command value" >&2 + exit 111 +fi + +tree_sitter="$ROOT"/target/"$TARGET"/release/tree-sitter + +if [ "$BUILD_CMD" == "cross" ]; then + if [ -z "$CROSS_RUNNER" ]; then + echo "The CROSS_RUNNER env var should be set to a CARGO_TARGET_*_RUNNER env var value" >&2 + echo "that is available in a docker image used by the cross tool under the hood" >&2 + exit 111 + fi + + cross.sh $CROSS_RUNNER "$tree_sitter" "$@" +else + "$tree_sitter" "$@" +fi diff --git a/.github/workflows/CICD.yml b/.github/workflows/CICD.yml new file mode 100644 index 00000000..fcdb6ea9 --- /dev/null +++ b/.github/workflows/CICD.yml @@ -0,0 +1,85 @@ +name: CICD + +on: + workflow_dispatch: + pull_request: + types: + - opened + - reopened + - synchronize + - ready_for_review + push: + branches-ignore: + - release/v* + +concurrency: + group: > + ${{ github.workflow }} @ ${{ + github.ref == 'refs/heads/master' && github.ref_name || '' + }}${{ + github.ref == 'refs/heads/master' && github.sha + || github.event.pull_request.head.label || github.head_ref || github.ref + }} + cancel-in-progress: true + +jobs: + init: + name: Init + runs-on: ubuntu-latest + steps: + - name: Get PR head ref + if: ${{ github.event_name == 'pull_request' }} + id: pr_head_ref + run: | + echo "ref=refs/pull/${{ github.event.pull_request.number }}/head" >> $GITHUB_OUTPUT + outputs: + ref: >- + ${{ + (github.event_name == 'pull_request' && startsWith(github.head_ref, 'release/v')) + && steps.pr_head_ref.outputs.ref + || github.ref + }} + + fast_checks: + name: Fast checks + uses: ./.github/workflows/fast_checks.yml + + full_checks: + name: Full Rust checks + needs: fast_checks + uses: ./.github/workflows/full_rust_checks.yml + + min_version: + name: Minimum supported rust version + needs: fast_checks + uses: ./.github/workflows/msrv.yml + with: + package: tree-sitter-cli + + sanitize: + name: Sanitize + needs: [init, fast_checks] + uses: ./.github/workflows/sanitize.yml + + build: + name: Build & Test + needs: [init, fast_checks] + uses: ./.github/workflows/build.yml + with: + ref: ${{ needs.init.outputs.ref }} + + release: + name: Release + needs: [init, fast_checks, full_checks, min_version, build, sanitize] + if: > + github.event_name == 'pull_request' && + startsWith(github.head_ref, 'release/v') && + !github.event.pull_request.draft + uses: ./.github/workflows/release.yml + with: + ref: ${{ needs.init.outputs.ref }} + + publish: + name: Publish + needs: release + uses: ./.github/workflows/publish.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 00000000..8087251d --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,267 @@ +name: Build & Test + +env: + CARGO_TERM_COLOR: always + RUSTFLAGS: "-D warnings" + CROSS_DEBUG: 1 + +on: + workflow_call: + inputs: + ref: + default: ${{ github.ref }} + type: string + run-tests: + default: true + type: boolean + workflow_dispatch: + inputs: + run-tests: + description: Run tests + default: true + type: boolean + rust-test-threads: + description: Number of Rust test threads + default: "" + type: string + +jobs: + build: + name: ${{ matrix.platform }} (${{ matrix.target }}) (${{ matrix.os }}) + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + platform: + - linux-arm64 # + - linux-arm # + - linux-armhf # + - linux-armv5te # + - linux-armv7l # + - linux-x64 # + - linux-x86 # + - linux-i586 # + - linux-mips # + - linux-mips64 # + - linux-mipsel # + - linux-mips64el # + - linux-powerpc # + - linux-powerpc64 # + - linux-powerpc64el # + # - linux-riscv64gc # #2712 + - linux-s390x # + - linux-sparc64 # + - linux-thumbv7neon # + - windows-arm64 # + - windows-x64 # <-- No C library build - requires an additional adapted Makefile for `cl.exe` compiler + - windows-x86 # -- // -- + - macos-arm64 # <-- MacOS M1/M2 - no tests, only CLI build to be published on release artifacts + - macos-x64 # + + include: + # When adding a new `target`: + # 1. Define a new platform alias above + # 2. Add a new record to a matrix map in `cli/npm/install.js` + - { platform: linux-arm64 , target: aarch64-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } + - { platform: linux-arm , target: arm-unknown-linux-gnueabi , os: ubuntu-latest , use-cross: true } + - { platform: linux-armhf , target: arm-unknown-linux-gnueabihf , os: ubuntu-latest , use-cross: true } + - { platform: linux-armv5te , target: armv5te-unknown-linux-gnueabi , os: ubuntu-latest , use-cross: true } + - { platform: linux-armv7l , target: armv7-unknown-linux-gnueabihf , os: ubuntu-latest , use-cross: true } + - { platform: linux-x64 , target: x86_64-unknown-linux-gnu , os: ubuntu-20.04 } #2272 + - { platform: linux-x86 , target: i686-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } + - { platform: linux-i586 , target: i586-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } + - { platform: linux-mips , target: mips-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } + - { platform: linux-mips64 , target: mips64-unknown-linux-gnuabi64 , os: ubuntu-latest , use-cross: true } + - { platform: linux-mipsel , target: mipsel-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } + - { platform: linux-mips64el , target: mips64el-unknown-linux-gnuabi64 , os: ubuntu-latest , use-cross: true } + - { platform: linux-powerpc , target: powerpc-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } + - { platform: linux-powerpc64 , target: powerpc64-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } + - { platform: linux-powerpc64el , target: powerpc64le-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } + # - { platform: linux-riscv64gc , target: riscv64gc-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } #2712 + - { platform: linux-s390x , target: s390x-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } + - { platform: linux-sparc64 , target: sparc64-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } + - { platform: linux-thumbv7neon , target: thumbv7neon-unknown-linux-gnueabihf , os: ubuntu-latest , use-cross: true } + - { platform: windows-arm64 , target: aarch64-pc-windows-msvc , os: windows-latest } + - { platform: windows-x64 , target: x86_64-pc-windows-msvc , os: windows-latest } + - { platform: windows-x86 , target: i686-pc-windows-msvc , os: windows-latest } + - { platform: macos-arm64 , target: aarch64-apple-darwin , os: macos-latest } + - { platform: macos-x64 , target: x86_64-apple-darwin , os: macos-latest } + + # Cross compilers for C library + - { platform: linux-arm64 , cc: aarch64-linux-gnu-gcc , ar: aarch64-linux-gnu-ar } + - { platform: linux-arm , cc: arm-linux-gnueabi-gcc , ar: arm-linux-gnueabi-ar } + - { platform: linux-armhf , cc: arm-unknown-linux-gnueabihf-gcc , ar: arm-unknown-linux-gnueabihf-ar } + - { platform: linux-armv5te , cc: arm-linux-gnueabi-gcc , ar: arm-linux-gnueabi-ar } + - { platform: linux-armv7l , cc: arm-linux-gnueabihf-gcc , ar: arm-linux-gnueabihf-ar } + - { platform: linux-x86 , cc: i686-linux-gnu-gcc , ar: i686-linux-gnu-ar } + - { platform: linux-i586 , cc: i686-linux-gnu-gcc , ar: i686-linux-gnu-ar } + - { platform: linux-mips , cc: mips-linux-gnu-gcc , ar: mips-linux-gnu-ar } + - { platform: linux-mips64 , cc: mips64-linux-gnuabi64-gcc , ar: mips64-linux-gnuabi64-ar } + - { platform: linux-mipsel , cc: mipsel-linux-gnu-gcc , ar: mipsel-linux-gnu-ar } + - { platform: linux-mips64el , cc: mips64el-linux-gnuabi64-gcc , ar: mips64el-linux-gnuabi64-ar } + - { platform: linux-powerpc , cc: powerpc-linux-gnu-gcc , ar: powerpc-linux-gnu-ar } + - { platform: linux-powerpc64 , cc: powerpc64-linux-gnu-gcc , ar: powerpc64-linux-gnu-ar } + - { platform: linux-powerpc64el , cc: powerpc64le-linux-gnu-gcc , ar: powerpc64le-linux-gnu-ar } + # - { platform: linux-riscv64gc , cc: riscv64-linux-gnu-gcc , ar: riscv64-linux-gnu-ar } #2712 + - { platform: linux-s390x , cc: s390x-linux-gnu-gcc , ar: s390x-linux-gnu-ar } + - { platform: linux-sparc64 , cc: sparc64-linux-gnu-gcc , ar: sparc64-linux-gnu-ar } + - { platform: linux-thumbv7neon , cc: arm-linux-gnueabihf-gcc , ar: arm-linux-gnueabihf-ar } + + # Rust toolchains + - { platform: linux-mips , rust-toolchain: 1.71.1 } + - { platform: linux-mips64 , rust-toolchain: 1.71.1 } + - { platform: linux-mipsel , rust-toolchain: 1.71.1 } + - { platform: linux-mips64el , rust-toolchain: 1.71.1 } + + # See #2041 tree-sitter issue + - { platform: windows-x64 , rust-test-threads: 1 } + - { platform: windows-x86 , rust-test-threads: 1 } + + # CLI only build + - { platform: windows-arm64 , cli-only: true } + - { platform: macos-arm64 , cli-only: true } + + env: + BUILD_CMD: cargo + EMSCRIPTEN_VERSION: "" + EXE: ${{ contains(matrix.target, 'windows') && '.exe' || '' }} + + defaults: + run: + shell: bash + + steps: + - name: Checkout source code + uses: actions/checkout@v3 + with: + ref: ${{ inputs.ref }} + + - name: Read Emscripten version + run: | + echo "EMSCRIPTEN_VERSION=$(cat cli/loader/emscripten-version)" >> $GITHUB_ENV + + - name: Install Emscripten + if: ${{ !matrix.cli-only && !matrix.use-cross }} + uses: mymindstorm/setup-emsdk@v12 + with: + version: ${{ env.EMSCRIPTEN_VERSION }} + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + with: + targets: ${{ matrix.target }} + toolchain: ${{ matrix.rust-toolchain || 'stable' }} + + - name: Install cross + if: ${{ matrix.use-cross }} + uses: taiki-e/install-action@v2 + with: + tool: cross + + - name: Build custom cross image + if: ${{ matrix.use-cross && matrix.os == 'ubuntu-latest' }} + run: | + cd .. + + target="${{ matrix.target }}" + image=ghcr.io/cross-rs/$target:custom + echo "CROSS_IMAGE=$image" >> $GITHUB_ENV + + echo "[target.$target]" >> Cross.toml + echo "image = \"$image\"" >> Cross.toml + echo "CROSS_CONFIG=$PWD/Cross.toml" >> $GITHUB_ENV + + echo "FROM ghcr.io/cross-rs/$target:edge" >> Dockerfile + echo "ENV DEBIAN_FRONTEND=noninteractive" >> Dockerfile + echo "RUN apt-get update && apt-get install -y nodejs" >> Dockerfile + docker build -t $image . + + - name: Setup env extras + env: + RUST_TEST_THREADS: ${{ matrix.rust-test-threads || inputs.rust-test-threads || '' }} + USE_CROSS: ${{ matrix.use-cross }} + TARGET: ${{ matrix.target }} + CC: ${{ matrix.cc }} + AR: ${{ matrix.ar }} + IS_WINDOWS: ${{ contains(matrix.os, 'windows') }} + run: | + PATH="$PWD/.github/scripts:$PATH" + echo "$PWD/.github/scripts" >> $GITHUB_PATH + + echo "TREE_SITTER=tree-sitter.sh" >> $GITHUB_ENV + echo "TARGET=$TARGET" >> $GITHUB_ENV + echo "ROOT=$PWD" >> $GITHUB_ENV + + [ -n "$RUST_TEST_THREADS" ] && \ + echo "RUST_TEST_THREADS=$RUST_TEST_THREADS" >> $GITHUB_ENV + + [ -n "$CC" ] && echo "CC=$CC" >> $GITHUB_ENV + [ -n "$AR" ] && echo "AR=$AR" >> $GITHUB_ENV + + [ "$IS_WINDOWS" = "false" ] && echo "CFLAGS=-Werror" >> $GITHUB_ENV + + if [ "$USE_CROSS" == "true" ]; then + echo "BUILD_CMD=cross" >> $GITHUB_ENV + runner=$(BUILD_CMD=cross cross.sh bash -c "env | sed -nr '/^CARGO_TARGET_.*_RUNNER=/s///p'") + [ -n "$runner" ] && echo "CROSS_RUNNER=$runner" >> $GITHUB_ENV + fi + + - name: Build C library + if: ${{ !contains(matrix.os, 'windows') }} # Requires an additional adapted Makefile for `cl.exe` compiler + run: make.sh -j + + - name: Build wasm library + if: ${{ !matrix.cli-only && !matrix.use-cross }} # No sense to build on the same Github runner hosts many times + run: script/build-wasm + + - name: Build CLI + run: $BUILD_CMD build --release --target=${{ matrix.target }} + + - name: Info about CLI + if: ${{ startsWith(matrix.platform, 'linux') }} + run: | + min_glibc=$(objdump -p target/$TARGET/release/tree-sitter${{ env.EXE }} | sed -nr 's/.*(GLIBC_.+).*/\1/p' | sort -uV | tail -n1) + echo "🔗 Minimal **glibc** version required for CLI: ${min_glibc}">> $GITHUB_STEP_SUMMARY + + - name: Fetch fixtures + if: ${{ inputs.run-tests && !matrix.cli-only }} # Don't fetch fixtures for only CLI building targets + run: script/fetch-fixtures + + - name: Generate fixtures + if: ${{ inputs.run-tests && !matrix.cli-only }} # Can't natively run CLI on Github runner's host + run: script/generate-fixtures + + - name: Generate WASM fixtures + if: ${{ inputs.run-tests && !matrix.cli-only && !matrix.use-cross }} # See comment for the "Build wasm library" step + run: script/generate-fixtures-wasm + + - name: Run main tests + if: ${{ inputs.run-tests && !matrix.cli-only }} # Can't natively run CLI on Github runner's host + run: $BUILD_CMD test --target=${{ matrix.target }} + + - name: Run wasm tests + if: ${{ inputs.run-tests && !matrix.cli-only && !matrix.use-cross }} # See comment for the "Build wasm library" step + run: script/test-wasm + + - name: Run benchmarks + if: ${{ inputs.run-tests && !matrix.cli-only && !matrix.use-cross }} # Cross-compiled benchmarks make no sense + run: $BUILD_CMD bench benchmark -p tree-sitter-cli --target=${{ matrix.target }} + + - name: Upload CLI artifact + uses: actions/upload-artifact@v3 + with: + name: tree-sitter.${{ matrix.platform }} + path: target/${{ matrix.target }}/release/tree-sitter${{ env.EXE }} + if-no-files-found: error + retention-days: 7 + + - name: Upload WASM artifacts + if: ${{ matrix.platform == 'linux-x64' }} + uses: actions/upload-artifact@v3 + with: + name: tree-sitter.wasm + path: | + lib/binding_web/tree-sitter.js + lib/binding_web/tree-sitter.wasm + if-no-files-found: error + retention-days: 7 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index 361b4b4c..00000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,149 +0,0 @@ -name: CI - -on: - push: - branches: - - master - tags: - - v* - pull_request: - branches: - - "**" - -env: - CARGO_TERM_COLOR: always - CARGO_INCREMENTAL: 0 - -jobs: - unix-tests: - name: Unix tests - runs-on: ${{ matrix.os }} - strategy: - fail-fast: true - matrix: - os: - - macos-latest - - ubuntu-latest - steps: - - name: Checkout repo - uses: actions/checkout@v2 - - # Work around https://github.com/actions/cache/issues/403. - - name: Use GNU tar - if: matrix.os == 'macos-latest' - run: | - echo PATH="/usr/local/opt/gnu-tar/libexec/gnubin:$PATH" >> $GITHUB_ENV - - - name: Read Emscripten version - run: | - printf 'EMSCRIPTEN_VERSION=%s\n' "$(cat cli/loader/emscripten-version)" >> $GITHUB_ENV - - - name: Cache artifacts - id: cache - uses: actions/cache@v2 - with: - path: | - ~/.cargo/registry - ~/.cargo/git - target - key: ${{ runner.os }}-cargo-${{ hashFiles('Cargo.lock') }}-emscripten-${{ env.EMSCRIPTEN_VERSION }} - - - name: Install rust - if: steps.cache.outputs.cache-hit != 'true' - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - profile: minimal - - - name: Check Rust code formatting - run: cargo fmt -- --check - - - name: Install emscripten - uses: mymindstorm/setup-emsdk@v10 - with: - version: ${{ env.EMSCRIPTEN_VERSION }} - - - name: Build C library - run: make - - - name: Build wasm library - run: script/build-wasm - - - name: Build CLI - run: | - RUSTFLAGS="-D warnings" - cargo build --release - - - name: Set up fixture parsers - run: | - script/fetch-fixtures - script/generate-fixtures - script/generate-fixtures-wasm - - - name: Run main tests - run: cargo test - - - name: Run wasm tests - run: script/test-wasm - - - name: Run benchmarks - run: script/benchmark - - - name: Compress CLI binary - if: startsWith(github.ref, 'refs/tags/v') - run: | - cp target/release/tree-sitter . - export platform=$(echo ${{ runner.os }} | awk '{print tolower($0)}') - gzip --suffix "-${platform}-x64.gz" tree-sitter - - - name: Release - uses: softprops/action-gh-release@v1 - if: startsWith(github.ref, 'refs/tags/v') - with: - draft: true - files: | - tree-sitter-*.gz - lib/binding_web/tree-sitter.js - lib/binding_web/tree-sitter.wasm - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - windows-tests: - name: Windows tests - runs-on: windows-latest - steps: - - name: Checkout repo - uses: actions/checkout@v2 - - - name: Cache artifacts - id: cache - uses: actions/cache@v2 - with: - path: | - ~/.cargo/registry - ~/.cargo/git - target - key: ${{ runner.os }}-cargo-${{ hashFiles('Cargo.lock') }} - - - name: Install rust - if: steps.cache.outputs.cache-hit != 'true' - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - profile: minimal - - - name: Check Rust code formatting - run: cargo fmt -- --check - - - name: Build CLI - run: | - $env:RUSTFLAGS="-D warnings" - cargo build --release - - - name: Set up fixture parsers - run: | - script/fetch-fixtures.cmd - script/generate-fixtures.cmd - - - name: Run main tests - run: script/test diff --git a/.github/workflows/fast_checks.yml b/.github/workflows/fast_checks.yml new file mode 100644 index 00000000..ea474799 --- /dev/null +++ b/.github/workflows/fast_checks.yml @@ -0,0 +1,31 @@ +name: Fast checks to fail fast on any simple code issues + +env: + CARGO_TERM_COLOR: always + RUSTFLAGS: "-D warnings" + +on: + workflow_call: + +jobs: + check_rust_formatting: + name: Check Rust formating + runs-on: ubuntu-latest + steps: + + - name: Checkout source code + uses: actions/checkout@v3 + + - name: Run cargo fmt + run: cargo fmt -- --check + + check_c_warnings: + name: Check C warnings + runs-on: ubuntu-latest + steps: + + - name: Checkout source code + uses: actions/checkout@v3 + + - name: Make C library to check that it's able to compile without warnings + run: make -j CFLAGS="-Werror" diff --git a/.github/workflows/full_rust_checks.yml b/.github/workflows/full_rust_checks.yml new file mode 100644 index 00000000..2cc5f77d --- /dev/null +++ b/.github/workflows/full_rust_checks.yml @@ -0,0 +1,32 @@ +name: Full Rust codebase checks + +env: + CARGO_TERM_COLOR: always + RUSTFLAGS: "-D warnings" + +on: + workflow_call: + +jobs: + run: + name: Run checks + runs-on: ubuntu-latest + steps: + + - name: Checkout source code + uses: actions/checkout@v3 + + - name: Install rust toolchain + uses: dtolnay/rust-toolchain@master + with: + toolchain: stable + components: clippy, rustfmt + + - name: Run cargo fmt + run: cargo fmt -- --check + + # - name: Run clippy + # run: cargo clippy --all-targets + + - name: Run cargo check + run: cargo check --workspace --examples --tests --benches --bins diff --git a/.github/workflows/msrv.yml b/.github/workflows/msrv.yml new file mode 100644 index 00000000..3697930e --- /dev/null +++ b/.github/workflows/msrv.yml @@ -0,0 +1,42 @@ +name: Minimum supported rust version + +env: + CARGO_TERM_COLOR: always + RUSTFLAGS: "-D warnings" + +on: + workflow_call: + inputs: + package: + description: Target cargo package name + required: true + type: string + + +jobs: + run: + name: Run checks + runs-on: ubuntu-latest + steps: + + - name: Checkout source code + uses: actions/checkout@v3 + + - name: Get the MSRV from the package metadata + id: msrv + run: cargo metadata --no-deps --format-version 1 | jq -r '"version=" + (.packages[] | select(.name == "${{ inputs.package }}").rust_version)' >> $GITHUB_OUTPUT + + - name: Install rust toolchain (v${{ steps.msrv.outputs.version }}) + uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ steps.msrv.outputs.version }} + components: clippy, rustfmt + + - name: Run cargo fmt + run: cargo fmt -- --check + + # - name: Run clippy (on minimum supported rust version to prevent warnings we can't fix) + # run: cargo clippy --all-targets + + # - name: Run main tests + # run: cargo test diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 00000000..e1ad3e05 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,21 @@ +name: Publish to registries + +on: + workflow_call: + +jobs: + crates_io: + name: Publish to Crates.io + runs-on: ubuntu-latest + steps: + - name: Publish packages + run: | + echo "::warning::TODO: add a Crates.io publish logic" + + npm: + name: Publish to npmjs.com + runs-on: ubuntu-latest + steps: + - name: Publish packages + run: | + echo "::warning::TODO: add a npmjs.com publish logic" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 00000000..adf1021b --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,113 @@ +name: Release + +on: + workflow_call: + inputs: + ref: + default: ${{ github.ref }} + type: string + +jobs: + permissions: + name: Check permissions + runs-on: ubuntu-latest + outputs: + release_allowed: > + ${{ + github.repository_owner == 'tree-sitter' && + steps.maintainer.outputs.is_maintainer == 'true' && + steps.local_branch.outputs.is_local == 'true' + }} + steps: + + - name: Initated by a maintainer + id: maintainer + env: + GH_TOKEN: ${{ github.token }} + repo: ${{ github.repository }} + actor: ${{ github.actor }} + run: | + maintainer=$( + gh api "/repos/${repo}/collaborators" | + jq ".[] | {login, maintainer: .permissions | .maintain} | select(.login == \"${actor}\") | .maintainer" + ); + if [ "$maintainer" == "true" ]; then + echo "@${actor} has maintainer level permissions :rocket:" >> $GITHUB_STEP_SUMMARY; + echo "is_maintainer=true" >> $GITHUB_OUTPUT + fi + + - name: The ref branch is local + id: local_branch + env: + is_local: ${{ github.event.pull_request.head.repo.full_name == github.repository }} + run: | + echo "is_local=${is_local}" >> $GITHUB_OUTPUT + + release: + name: Release + needs: permissions + if: needs.permissions.outputs.release_allowed + runs-on: ubuntu-latest + permissions: + contents: write + steps: + + - name: Checkout source code + uses: actions/checkout@v3 + with: + ref: ${{ inputs.ref }} + + - name: Download build artifacts + uses: actions/download-artifact@v3 + with: + path: artifacts + + - name: Display structure of downloaded files + run: ls -lR + working-directory: artifacts + + - name: Prepare release artifacts + run: | + mkdir -p target + mv artifacts/tree-sitter.wasm/* target/ + rm -r artifacts/tree-sitter.wasm + for platform in $(cd artifacts; ls | sed 's/^tree-sitter\.//'); do + exe=$(ls artifacts/tree-sitter.$platform/tree-sitter*) + gzip --stdout --name $exe > target/tree-sitter-$platform.gz + done + rm -rf artifacts + ls -l target/ + + - name: Get tag name from a release/v* branch name + id: tag_name + env: + tag: ${{ github.head_ref }} + run: echo "tag=${tag#release/}" >> $GITHUB_OUTPUT + + - name: Add a release tag + env: + ref: ${{ inputs.ref }} + tag: ${{ steps.tag_name.outputs.tag }} + message: "Release ${{ steps.tag_name.outputs.tag }}" + run: | + git config user.name "$(git log -1 --pretty='%cn')" + git config user.email "$(git log -1 --pretty='%ce')" + git tag -a "$tag" HEAD -m "$message" + git push origin "$tag" + + - name: Create release + uses: softprops/action-gh-release@v1 + with: + name: ${{ steps.tag_name.outputs.tag }} + tag_name: ${{ steps.tag_name.outputs.tag }} + fail_on_unmatched_files: true + files: | + target/tree-sitter-*.gz + target/tree-sitter.wasm + target/tree-sitter.js + + - name: Merge release PR + env: + GH_TOKEN: ${{ github.token }} + run: | + gh pr merge ${{ github.event.pull_request.html_url }} --match-head-commit $(git rev-parse HEAD) --merge --delete-branch diff --git a/.github/workflows/sanitize.yml b/.github/workflows/sanitize.yml new file mode 100644 index 00000000..2ece182b --- /dev/null +++ b/.github/workflows/sanitize.yml @@ -0,0 +1,50 @@ +name: Sanitize + +env: + CARGO_TERM_COLOR: always + RUSTFLAGS: "-D warnings" + +on: + workflow_call: + +jobs: + check_undefined_behaviour: + name: Sanitizer checks + runs-on: ubuntu-latest + env: + TREE_SITTER: ${{ github.workspace }}/target/release/tree-sitter + steps: + - name: Checkout source code + uses: actions/checkout@v3 + + - name: Install UBSAN library + run: sudo apt-get update -y && sudo apt-get install -y libubsan1 + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + + - name: Build CLI + run: cargo build --release + + - name: Fetch fixtures + run: script/fetch-fixtures + + - name: Generate fixtures + run: script/generate-fixtures + + - name: Run main tests with undefined behaviour sanitizer (UBSAN) + env: + UBSAN_OPTIONS: halt_on_error=1 + CFLAGS: -fsanitize=undefined + RUSTFLAGS: ${{ env.RUSTFLAGS }} -lubsan + run: cargo test -- --test-threads 1 + + - name: Run main tests with address sanitizer (ASAN) + env: + ASAN_OPTIONS: halt_on_error=1 + CFLAGS: -fsanitize=address + RUSTFLAGS: ${{ env.RUSTFLAGS }} -Zsanitizer=address --cfg=sanitizing + run: | + rustup install nightly + rustup component add rust-src --toolchain nightly-x86_64-unknown-linux-gnu + cargo +nightly test -Z build-std --target x86_64-unknown-linux-gnu -- --test-threads 1 diff --git a/.gitignore b/.gitignore index 834fd20f..53550dd7 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ log*.html fuzz-results +/tree-sitter.pc test/fixtures/grammars/* !test/fixtures/grammars/.gitkeep package-lock.json @@ -24,4 +25,6 @@ docs/assets/js/tree-sitter.js *.obj *.exp *.lib -*.wasm \ No newline at end of file +*.wasm +.swiftpm +zig-* diff --git a/Cargo.lock b/Cargo.lock index 345c7a8c..fd1e06cb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13,9 +13,9 @@ dependencies = [ [[package]] name = "ahash" -version = "0.7.6" +version = "0.7.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +checksum = "5a824f2aa7e75a0c98c5a504fceb80649e9c35265d44525b5f94de4771a395cd" dependencies = [ "getrandom", "once_cell", @@ -24,22 +24,13 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "0.7.19" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4f55bd91a0978cbfd91c457a164bab8b4001c833b7f323132c0a4e1922dd44e" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" dependencies = [ "memchr", ] -[[package]] -name = "android_system_properties" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" -dependencies = [ - "libc", -] - [[package]] name = "ansi_term" version = "0.12.1" @@ -51,15 +42,15 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.66" +version = "1.0.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "216261ddc8289130e551ddcd5ce8a064710c0d064a4d2895c67151c92b5443f6" +checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" [[package]] name = "arrayvec" -version = "0.7.2" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "ascii" @@ -93,6 +84,29 @@ dependencies = [ "serde", ] +[[package]] +name = "bindgen" +version = "0.66.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b84e06fc203107bfbad243f4aba2af864eb7db3b1cf46ea0a023b0b433d2a7" +dependencies = [ + "bitflags 2.4.1", + "cexpr", + "clang-sys", + "lazy_static", + "lazycell", + "log", + "peeking_take_while", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn 2.0.38", + "which", +] + [[package]] name = "bitflags" version = "1.3.2" @@ -100,22 +114,52 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] -name = "bumpalo" -version = "3.11.1" +name = "bitflags" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba" +checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" + +[[package]] +name = "bumpalo" +version = "3.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" [[package]] name = "byteorder" -version = "1.4.3" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" [[package]] name = "cc" -version = "1.0.73" +version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +dependencies = [ + "libc", +] + +[[package]] +name = "cesu8" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" + +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] [[package]] name = "cfg-if" @@ -124,22 +168,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] -name = "chrono" -version = "0.4.22" +name = "chunked_transfer" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfd4d1b31faaa3a89d7934dbded3111da0d2ef28e3ebccdb4f0179f5929d1ef1" -dependencies = [ - "iana-time-zone", - "num-integer", - "num-traits", - "winapi", -] +checksum = "cca491388666e04d7248af3f60f0c40cfb0991c72205595d7c396e3510207d1a" [[package]] -name = "chunked_transfer" -version = "1.4.0" +name = "clang-sys" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fff857943da45f546682664a79488be82e69e43c1a7a2307679ab9afb3a66d2e" +checksum = "c688fc74432808e3eb684cae8830a86be1d66a2bd58e1f248ed0960a590baf6f" +dependencies = [ + "glob", + "libc", + "libloading", +] [[package]] name = "clap" @@ -149,7 +192,7 @@ checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" dependencies = [ "ansi_term", "atty", - "bitflags", + "bitflags 1.3.2", "strsim", "textwrap", "unicode-width", @@ -157,20 +200,30 @@ dependencies = [ ] [[package]] -name = "codespan-reporting" -version = "0.11.1" +name = "combine" +version = "4.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" +checksum = "35ed6e9d84f0b51a7f52daf1c7d71dd136fd7a3f41a8462b8cdb8c78d920fad4" dependencies = [ - "termcolor", - "unicode-width", + "bytes", + "memchr", +] + +[[package]] +name = "core-foundation" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146" +dependencies = [ + "core-foundation-sys", + "libc", ] [[package]] name = "core-foundation-sys" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" +checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" [[package]] name = "cpp_demangle" @@ -229,8 +282,8 @@ source = "git+https://github.com/maxbrunsfeld/wasmtime?rev=669e0aaab1181b23fcc4e dependencies = [ "cranelift-entity", "fxhash", - "hashbrown", - "indexmap", + "hashbrown 0.12.3", + "indexmap 1.9.3", "log", "smallvec", ] @@ -295,56 +348,22 @@ dependencies = [ [[package]] name = "ctor" -version = "0.1.26" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d2301688392eb071b0bf1a37be05c469d3cc4dbbd95df672fe28ab021e6a096" +checksum = "37e366bff8cd32dd8754b0991fb66b279dc48f598c3a18914852a6673deef583" dependencies = [ "quote", - "syn", + "syn 2.0.38", ] [[package]] -name = "cxx" -version = "1.0.80" +name = "ctrlc" +version = "3.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b7d4e43b25d3c994662706a1d4fcfc32aaa6afd287502c111b237093bb23f3a" +checksum = "82e95fbd621905b854affdc67943b043a0fbb6ed7385fd5a25650d19a8a6cfdf" dependencies = [ - "cc", - "cxxbridge-flags", - "cxxbridge-macro", - "link-cplusplus", -] - -[[package]] -name = "cxx-build" -version = "1.0.80" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84f8829ddc213e2c1368e51a2564c552b65a8cb6a28f31e576270ac81d5e5827" -dependencies = [ - "cc", - "codespan-reporting", - "once_cell", - "proc-macro2", - "quote", - "scratch", - "syn", -] - -[[package]] -name = "cxxbridge-flags" -version = "1.0.80" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e72537424b474af1460806647c41d4b6d35d09ef7fe031c5c2fa5766047cc56a" - -[[package]] -name = "cxxbridge-macro" -version = "1.0.80" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "309e4fb93eed90e1e14bea0da16b209f81813ba9fc7830c20ed151dd7bc0a4d7" -dependencies = [ - "proc-macro2", - "quote", - "syn", + "nix", + "windows-sys 0.48.0", ] [[package]] @@ -365,7 +384,16 @@ version = "3.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "30baa043103c9d0c2a57cf537cc2f35623889dc0d405e6c3cccfadbc81c71309" dependencies = [ - "dirs-sys", + "dirs-sys 0.3.7", +] + +[[package]] +name = "dirs" +version = "5.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" +dependencies = [ + "dirs-sys 0.4.1", ] [[package]] @@ -380,16 +408,28 @@ dependencies = [ ] [[package]] -name = "either" -version = "1.8.0" +name = "dirs-sys" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797" +checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" +dependencies = [ + "libc", + "option-ext", + "redox_users", + "windows-sys 0.48.0", +] + +[[package]] +name = "either" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" [[package]] name = "env_logger" -version = "0.9.1" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c90bf5f19754d10198ccb95b70664fc925bd1fc090a0fd9a6ebc54acc8cd6272" +checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7" dependencies = [ "atty", "humantime", @@ -398,6 +438,12 @@ dependencies = [ "termcolor", ] +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + [[package]] name = "errno" version = "0.2.8" @@ -409,6 +455,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "errno" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3e13f66a2f95e32a39eaa81f6b95d42878ca0e1db0c7543723dfe12557e860" +dependencies = [ + "libc", + "windows-sys 0.48.0", +] + [[package]] name = "errno-dragonfly" version = "0.1.2" @@ -427,18 +483,15 @@ checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" [[package]] name = "fastrand" -version = "1.8.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7a407cfaa3385c4ae6b23e84623d48c2798d06e3e6a1878f7f59f17b3f86499" -dependencies = [ - "instant", -] +checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" [[package]] name = "form_urlencoded" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9c384f161156f5260c24a097c56119f9be8c798586aecc13afbcbe7b7e26bf8" +checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" dependencies = [ "percent-encoding", ] @@ -454,9 +507,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.8" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" +checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" dependencies = [ "cfg-if", "libc", @@ -470,15 +523,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22030e2c5a68ec659fde1e949a745124b48e6fa8b045b7ed5bd1fe4ccc5c4e5d" dependencies = [ "fallible-iterator", - "indexmap", + "indexmap 1.9.3", "stable_deref_trait", ] [[package]] name = "glob" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "hashbrown" @@ -489,6 +542,12 @@ dependencies = [ "ahash", ] +[[package]] +name = "hashbrown" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f93e7192158dbcda357bdec5fb5788eebf8bbac027f3f33e719d29135ae84156" + [[package]] name = "hermit-abi" version = "0.1.19" @@ -499,49 +558,40 @@ dependencies = [ ] [[package]] -name = "html-escape" -version = "0.2.11" +name = "home" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8e7479fa1ef38eb49fb6a42c426be515df2d063f06cb8efd3e50af073dbc26c" +checksum = "5444c27eef6923071f7ebcc33e3444508466a76f7a2b93da00ed6e19f30c1ddb" +dependencies = [ + "windows-sys 0.48.0", +] + +[[package]] +name = "html-escape" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d1ad449764d627e22bfd7cd5e8868264fc9236e07c752972b4080cd351cb476" dependencies = [ "utf8-width", ] +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + [[package]] name = "humantime" version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" -[[package]] -name = "iana-time-zone" -version = "0.1.51" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5a6ef98976b22b3b7f2f3a806f858cb862044cfa66805aa3ad84cb3d3b785ed" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "wasm-bindgen", - "winapi", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0703ae284fc167426161c2e3f1da3ea71d94b21bedbcc9494e92b28e334e3dca" -dependencies = [ - "cxx", - "cxx-build", -] - [[package]] name = "idna" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e14ddfc70884202db2244c223200c204c2bda1bc6e0998d11b5e024d657209e6" +checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" dependencies = [ "unicode-bidi", "unicode-normalization", @@ -549,29 +599,36 @@ dependencies = [ [[package]] name = "indexmap" -version = "1.9.1" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", - "hashbrown", + "hashbrown 0.12.3", "serde", ] [[package]] -name = "instant" -version = "0.1.12" +name = "indexmap" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +checksum = "8adf3ddd720272c6ea8bf59463c04e0f93d0bbf7c5439b691bca2987e0270897" dependencies = [ - "cfg-if", + "equivalent", + "hashbrown 0.14.2", ] [[package]] -name = "io-lifetimes" -version = "0.7.4" +name = "indoc" +version = "2.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6e481ccbe3dea62107216d0d1138bb8ad8e5e5c43009a098bd1990272c497b0" +checksum = "1e186cfbae8084e513daff4240b4797e342f988cecda4fb6c939150f96315fd8" + +[[package]] +name = "io-lifetimes" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ce5ef949d49ee85593fc4d3f3f95ad61657076395cbbce23e2121fc5542074" [[package]] name = "itertools" @@ -584,15 +641,37 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.4" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" + +[[package]] +name = "jni" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a87aa2bb7d2af34197c04845522473242e1aa17c12f4935d5856491a7fb8c97" +dependencies = [ + "cesu8", + "cfg-if", + "combine", + "jni-sys", + "log", + "thiserror", + "walkdir", + "windows-sys 0.45.0", +] + +[[package]] +name = "jni-sys" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" [[package]] name = "js-sys" -version = "0.3.60" +version = "0.3.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47" +checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" dependencies = [ "wasm-bindgen", ] @@ -604,30 +683,27 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] -name = "libc" -version = "0.2.136" +name = "lazycell" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55edcf6c0bb319052dea84732cf99db461780fd5e8d3eb46ab6ff312ab31f197" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + +[[package]] +name = "libc" +version = "0.2.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" [[package]] name = "libloading" -version = "0.7.3" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efbc0f03f9a775e9f6aed295c6a1ba2253c5757a9e03d55c6caa46a681abcddd" +checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" dependencies = [ "cfg-if", "winapi", ] -[[package]] -name = "link-cplusplus" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9272ab7b96c9046fbc5bc56c06c117cb639fe2d509df0c421cad82d2915cf369" -dependencies = [ - "cc", -] - [[package]] name = "linux-raw-sys" version = "0.0.46" @@ -635,13 +711,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4d2456c373231a208ad294c33dc5bff30051eafd954cd4caae83a712b12854d" [[package]] -name = "log" -version = "0.4.17" +name = "linux-raw-sys" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" -dependencies = [ - "cfg-if", -] +checksum = "da2479e8c062e40bf0066ffa0bc823de0a9368974af99c9f6df941d2c231e03f" + +[[package]] +name = "log" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" [[package]] name = "mach" @@ -653,10 +732,19 @@ dependencies = [ ] [[package]] -name = "memchr" -version = "2.5.0" +name = "malloc_buf" +version = "0.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +checksum = "62bb907fe88d54d8d9ce32a3cceab4218ed2f6b7d35617cafe9adf84e43919cb" +dependencies = [ + "libc", +] + +[[package]] +name = "memchr" +version = "2.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" [[package]] name = "memoffset" @@ -668,22 +756,45 @@ dependencies = [ ] [[package]] -name = "num-integer" -version = "0.1.45" +name = "minimal-lexical" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "ndk-context" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27b02d87554356db9e9a873add8782d4ea6e3e58ea071a9adb9a2e8ddb884a8b" + +[[package]] +name = "nix" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053" dependencies = [ - "autocfg", - "num-traits", + "bitflags 2.4.1", + "cfg-if", + "libc", ] [[package]] -name = "num-traits" -version = "0.2.15" +name = "nom" +version = "7.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" dependencies = [ - "autocfg", + "memchr", + "minimal-lexical", +] + +[[package]] +name = "objc" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "915b1b472bc21c53464d6c8461c9d3af805ba1ef837e1cac254428f4a77177b1" +dependencies = [ + "malloc_buf", ] [[package]] @@ -693,61 +804,78 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "21158b2c33aa6d4561f1c0a6ea283ca92bc54802a93b263e910746d679a7eb53" dependencies = [ "crc32fast", - "hashbrown", - "indexmap", + "hashbrown 0.12.3", + "indexmap 1.9.3", "memchr", ] [[package]] name = "once_cell" -version = "1.15.0" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e82dad04139b71a90c080c8463fe0dc7902db5192d939bd0950f074d014339e1" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" [[package]] -name = "output_vt100" -version = "0.1.3" +name = "option-ext" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "628223faebab4e3e40667ee0b2336d34a5b960ff60ea743ddfdbcf7770bcfb66" -dependencies = [ - "winapi", -] +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" [[package]] name = "paste" -version = "1.0.9" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1de2e551fb905ac83f73f7aedf2f0cb4a0da7e35efa24a202a936269f1f18e1" +checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" + +[[package]] +name = "path-slash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e91099d4268b0e11973f036e885d652fb0b21fedcf69738c627f94db6a44f42" + +[[package]] +name = "peeking_take_while" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" [[package]] name = "percent-encoding" -version = "2.2.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" +checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" [[package]] name = "ppv-lite86" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "pretty_assertions" -version = "0.7.2" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cab0e7c02cf376875e9335e0ba1da535775beb5450d21e1dffca068818ed98b" +checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66" dependencies = [ - "ansi_term", - "ctor", "diff", - "output_vt100", + "yansi", +] + +[[package]] +name = "prettyplease" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d" +dependencies = [ + "proc-macro2", + "syn 2.0.38", ] [[package]] name = "proc-macro2" -version = "1.0.47" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725" +checksum = "134c189feb4956b20f6f547d2cf727d4c0fe06722b20a0eec87ed445a97f92da" dependencies = [ "unicode-ident", ] @@ -763,9 +891,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.21" +version = "1.0.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" dependencies = [ "proc-macro2", ] @@ -800,13 +928,28 @@ dependencies = [ "getrandom", ] +[[package]] +name = "raw-window-handle" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2ff9a1f06a88b01621b7ae906ef0211290d1c8a168a15542486a8f61c0833b9" + [[package]] name = "redox_syscall" version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" dependencies = [ - "bitflags", + "bitflags 1.3.2", +] + +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags 1.3.2", ] [[package]] @@ -816,15 +959,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" dependencies = [ "getrandom", - "redox_syscall", + "redox_syscall 0.2.16", "thiserror", ] [[package]] name = "regalloc2" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69025b4a161879ba90719837c06621c3d73cffa147a000aeacf458f6a9572485" +checksum = "91b2eab54204ea0117fe9a060537e0b07a4e72f7c7d182361ecc346cab2240e5" dependencies = [ "fxhash", "log", @@ -834,35 +977,44 @@ dependencies = [ [[package]] name = "regex" -version = "1.6.0" +version = "1.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b" +checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-automata", + "regex-syntax 0.8.2", +] + +[[package]] +name = "regex-automata" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax 0.8.2", ] [[package]] name = "regex-syntax" -version = "0.6.27" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" +checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" [[package]] -name = "remove_dir_all" -version = "0.5.3" +name = "regex-syntax" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" -dependencies = [ - "winapi", -] +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" [[package]] name = "rustc-demangle" -version = "0.1.21" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" +checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" [[package]] name = "rustc-hash" @@ -872,23 +1024,36 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustix" -version = "0.35.12" +version = "0.35.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "985947f9b6423159c4726323f373be0a21bdb514c5af06a849cb3d2dce2d01e8" +checksum = "5363f616a5244fd47fc1dd0a0b24c28a5c0154f5010c16332a7ad6f78f2e8b62" dependencies = [ - "bitflags", - "errno", + "bitflags 1.3.2", + "errno 0.2.8", "io-lifetimes", "libc", - "linux-raw-sys", - "windows-sys", + "linux-raw-sys 0.0.46", + "windows-sys 0.42.0", +] + +[[package]] +name = "rustix" +version = "0.38.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b426b0506e5d50a7d8dafcf2e81471400deb602392c7dd110815afb4eaf02a3" +dependencies = [ + "bitflags 2.4.1", + "errno 0.3.5", + "libc", + "linux-raw-sys 0.4.10", + "windows-sys 0.48.0", ] [[package]] name = "ryu" -version = "1.0.11" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09" +checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" [[package]] name = "same-file" @@ -899,55 +1064,64 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "scratch" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8132065adcfd6e02db789d9285a0deb2f3fcb04002865ab67d5fb103533898" - [[package]] name = "semver" -version = "1.0.14" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e25dfac463d778e353db5be2449d1cce89bd6fd23c9f1ea21310ce6e5a1b29c4" +checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090" [[package]] name = "serde" -version = "1.0.147" +version = "1.0.171" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d193d69bae983fc11a79df82342761dfbf28a99fc8d203dca4c3c1b590948965" +checksum = "30e27d1e4fd7659406c492fd6cfaf2066ba8773de45ca75e855590f856dc34a9" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.147" +version = "1.0.171" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f1d362ca8fc9c3e3a7484440752472d68a6caa98f1ab81d99b5dfe517cec852" +checksum = "389894603bd18c46fa56231694f8d827779c0951a667087194cf9de94ed24682" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.38", ] [[package]] name = "serde_json" -version = "1.0.87" +version = "1.0.107" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ce777b7b150d76b9cf60d28b55f5847135a003f7d7350c6be7a773508ce7d45" +checksum = "6b420ce6e3d8bd882e9b243c6eed35dbc9a6110c9769e74b584e0d68d1f20c65" dependencies = [ - "indexmap", + "indexmap 2.0.2", "itoa", "ryu", "serde", ] [[package]] -name = "slice-group-by" -version = "0.3.0" +name = "serde_spanned" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03b634d87b960ab1a38c4fe143b508576f075e7c978bfad18217645ebfdfa2ec" +checksum = "12022b835073e5b11e90a14f86838ceb1c8fb0325b72416845c487ac0fa95e80" +dependencies = [ + "serde", +] + +[[package]] +name = "shlex" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7cee0529a6d40f580e7a5e6c495c8fbfe21b7b52795ed4bb5e62cdf92bc6380" + +[[package]] +name = "slice-group-by" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "826167069c09b99d56f31e9ae5c99049e932a98c9dc2dac47645b08dbbf76ba7" [[package]] name = "smallbitvec" @@ -957,9 +1131,9 @@ checksum = "75ce4f9dc4a41b4c3476cc925f1efb11b66df373a8fde5d4b8915fa91b5d995e" [[package]] name = "smallvec" -version = "1.10.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" +checksum = "942b4a808e05215192e39f4ab80813e599068285906cc91aa64f923db842bd5a" [[package]] name = "stable_deref_trait" @@ -975,9 +1149,20 @@ checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" [[package]] name = "syn" -version = "1.0.103" +version = "1.0.109" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a864042229133ada95abf3b54fdc62ef5ccabe9515b64717bcb9a1919e59445d" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e96b79aaa137db8f61e26363a0c9b47d8b4ec75da28b7d1d614c2303e232408b" dependencies = [ "proc-macro2", "quote", @@ -986,29 +1171,28 @@ dependencies = [ [[package]] name = "target-lexicon" -version = "0.12.4" +version = "0.12.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c02424087780c9b71cc96799eaeddff35af2bc513278cda5c99fc1f5d026d3c1" +checksum = "14c39fd04924ca3a864207c66fc2cd7d22d7c016007f9ce846cbb9326331930a" [[package]] name = "tempfile" -version = "3.3.0" +version = "3.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" +checksum = "7ef1adac450ad7f4b3c28589471ade84f25f731a7a0fe30d71dfa9f60fd808e5" dependencies = [ "cfg-if", "fastrand", - "libc", - "redox_syscall", - "remove_dir_all", - "winapi", + "redox_syscall 0.4.1", + "rustix 0.38.21", + "windows-sys 0.48.0", ] [[package]] name = "termcolor" -version = "1.1.3" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" +checksum = "6093bad37da69aab9d123a8091e4be0aa4a03e4d601ec641c327398315f62b64" dependencies = [ "winapi-util", ] @@ -1024,35 +1208,34 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.37" +version = "1.0.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10deb33631e3c9018b9baf9dcbbc4f737320d2b576bac10f6aefa048fa407e3e" +checksum = "f9a7210f5c9a7156bb50aa36aed4c95afb51df0df00713949448cf9e97d382d2" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.37" +version = "1.0.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "982d17546b47146b28f7c22e3d08465f6b8903d0ea13c1660d9d84a6e7adcdbb" +checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.38", ] [[package]] name = "tiny_http" -version = "0.8.2" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ce51b50006056f590c9b7c3808c3bd70f0d1101666629713866c227d6e58d39" +checksum = "389915df6413a2e74fb181895f933386023c71110878cd0825588928e64cdc82" dependencies = [ "ascii", - "chrono", "chunked_transfer", + "httpdate", "log", - "url", ] [[package]] @@ -1066,23 +1249,49 @@ dependencies = [ [[package]] name = "tinyvec_macros" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "toml" -version = "0.5.9" +version = "0.7.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d82e1a7758622a465f8cee077614c73484dac5b836c02ff6a40d5d1010324d7" +checksum = "dd79e69d3b627db300ff956027cc6c3798cef26d22526befdfcd12feeb6d2257" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1" dependencies = [ "serde", ] [[package]] -name = "tree-sitter" -version = "0.20.9" +name = "toml_edit" +version = "0.19.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" dependencies = [ + "indexmap 2.0.2", + "serde", + "serde_spanned", + "toml_datetime", + "winnow", +] + +[[package]] +name = "tree-sitter" +version = "0.20.10" +dependencies = [ + "bindgen", "cc", "regex", "wasmtime", @@ -1091,24 +1300,28 @@ dependencies = [ [[package]] name = "tree-sitter-cli" -version = "0.20.7" +version = "0.20.8" dependencies = [ "ansi_term", "anyhow", "atty", "clap", "ctor", + "ctrlc", "difference", - "dirs", + "dirs 5.0.1", "glob", "html-escape", - "indexmap", + "indexmap 2.0.2", + "indoc", "lazy_static", "log", + "memchr", + "path-slash", "pretty_assertions", "rand", "regex", - "regex-syntax", + "regex-syntax 0.7.5", "rustc-hash", "semver", "serde", @@ -1122,6 +1335,8 @@ dependencies = [ "tree-sitter-highlight", "tree-sitter-loader", "tree-sitter-tags", + "tree-sitter-tests-proc-macro", + "unindent", "walkdir", "webbrowser", "which", @@ -1132,15 +1347,16 @@ name = "tree-sitter-config" version = "0.19.0" dependencies = [ "anyhow", - "dirs", + "dirs 3.0.2", "serde", "serde_json", ] [[package]] name = "tree-sitter-highlight" -version = "0.20.1" +version = "0.20.2" dependencies = [ + "lazy_static", "regex", "thiserror", "tree-sitter", @@ -1152,7 +1368,7 @@ version = "0.20.0" dependencies = [ "anyhow", "cc", - "dirs", + "dirs 3.0.2", "libloading", "once_cell", "regex", @@ -1174,17 +1390,27 @@ dependencies = [ "tree-sitter", ] +[[package]] +name = "tree-sitter-tests-proc-macro" +version = "0.0.0" +dependencies = [ + "proc-macro2", + "quote", + "rand", + "syn 1.0.109", +] + [[package]] name = "unicode-bidi" -version = "0.3.8" +version = "0.3.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992" +checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" [[package]] name = "unicode-ident" -version = "1.0.5" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "unicode-normalization" @@ -1197,15 +1423,21 @@ dependencies = [ [[package]] name = "unicode-width" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" + +[[package]] +name = "unindent" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" [[package]] name = "url" -version = "2.3.1" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d68c799ae75762b8c3fe375feb6600ef5602c883c5d21eb51c09f22b83c4643" +checksum = "143b538f18257fac9cad154828a57c6bf5157e1aa604d4816b5995bf6de87ae5" dependencies = [ "form_urlencoded", "idna", @@ -1232,12 +1464,11 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "walkdir" -version = "2.3.2" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" dependencies = [ "same-file", - "winapi", "winapi-util", ] @@ -1249,9 +1480,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.83" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268" +checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -1259,24 +1490,24 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.83" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142" +checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn", + "syn 2.0.38", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.83" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810" +checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -1284,22 +1515,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.83" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c" +checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.38", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.83" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f" +checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" [[package]] name = "wasmparser" @@ -1307,7 +1538,7 @@ version = "0.92.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da34cec2a8c23db906cdf8b26e988d7a7f0d549eb5d51299129647af61a1b37" dependencies = [ - "indexmap", + "indexmap 1.9.3", ] [[package]] @@ -1318,7 +1549,7 @@ dependencies = [ "anyhow", "bincode", "cfg-if", - "indexmap", + "indexmap 1.9.3", "libc", "log", "object", @@ -1332,7 +1563,7 @@ dependencies = [ "wasmtime-environ", "wasmtime-jit", "wasmtime-runtime", - "windows-sys", + "windows-sys 0.36.1", ] [[package]] @@ -1392,7 +1623,7 @@ dependencies = [ "anyhow", "cranelift-entity", "gimli", - "indexmap", + "indexmap 1.9.3", "log", "object", "serde", @@ -1422,7 +1653,7 @@ dependencies = [ "wasmtime-environ", "wasmtime-jit-icache-coherence", "wasmtime-runtime", - "windows-sys", + "windows-sys 0.36.1", ] [[package]] @@ -1440,7 +1671,7 @@ source = "git+https://github.com/maxbrunsfeld/wasmtime?rev=669e0aaab1181b23fcc4e dependencies = [ "cfg-if", "libc", - "windows-sys", + "windows-sys 0.36.1", ] [[package]] @@ -1451,19 +1682,19 @@ dependencies = [ "anyhow", "cc", "cfg-if", - "indexmap", + "indexmap 1.9.3", "libc", "log", "mach", "memoffset", "paste", "rand", - "rustix", + "rustix 0.35.16", "thiserror", "wasmtime-asm-macros", "wasmtime-environ", "wasmtime-jit-debug", - "windows-sys", + "windows-sys 0.36.1", ] [[package]] @@ -1479,9 +1710,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.60" +version = "0.3.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcda906d8be16e728fd5adc5b729afad4e444e106ab28cd1c7256e54fa61510f" +checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b" dependencies = [ "js-sys", "wasm-bindgen", @@ -1489,32 +1720,33 @@ dependencies = [ [[package]] name = "webbrowser" -version = "0.5.5" +version = "0.8.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecad156490d6b620308ed411cfee90d280b3cbd13e189ea0d3fada8acc89158a" +checksum = "82b2391658b02c27719fc5a0a73d6e696285138e8b12fba9d4baa70451023c71" dependencies = [ + "core-foundation", + "home", + "jni", + "log", + "ndk-context", + "objc", + "raw-window-handle", + "url", "web-sys", - "widestring", - "winapi", ] [[package]] name = "which" -version = "4.3.0" +version = "4.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c831fbbee9e129a8cf93e7747a82da9d95ba8e16621cae60ec2cdc849bacb7b" +checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" dependencies = [ "either", - "libc", + "home", "once_cell", + "rustix 0.38.21", ] -[[package]] -name = "widestring" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c168940144dd21fd8046987c16a46a33d5fc84eec29ef9dcddc2ac9e31526b7c" - [[package]] name = "winapi" version = "0.3.9" @@ -1533,9 +1765,9 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" dependencies = [ "winapi", ] @@ -1552,39 +1784,201 @@ version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" dependencies = [ - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_msvc", + "windows_aarch64_msvc 0.36.1", + "windows_i686_gnu 0.36.1", + "windows_i686_msvc 0.36.1", + "windows_x86_64_gnu 0.36.1", + "windows_x86_64_msvc 0.36.1", ] +[[package]] +name = "windows-sys" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets 0.42.2", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + [[package]] name = "windows_aarch64_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + [[package]] name = "windows_i686_gnu" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + [[package]] name = "windows_i686_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + [[package]] name = "windows_x86_64_gnu" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + [[package]] name = "windows_x86_64_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "winnow" +version = "0.5.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3b801d0e0a6726477cc207f60162da452f3a95adb368399bef20a946e06f65c" +dependencies = [ + "memchr", +] + +[[package]] +name = "yansi" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" diff --git a/Cargo.toml b/Cargo.toml index 38830584..bc2aedaa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,4 +1,21 @@ [workspace] default-members = ["cli"] - members = ["cli", "lib"] +resolver = "2" + +[workspace.package] +rust-version = "1.65" + +[profile.release] +strip = true # Automatically strip symbols from the binary. +lto = true # Link-time optimization. +opt-level = 3 # Optimization level 3. +codegen-units = 1 # Maximum size reduction optimizations. + +[profile.size] +inherits = "release" +opt-level = "s" # Optimize for size. + +[profile.profile] +inherits = "release" +strip = false diff --git a/LICENSE b/LICENSE index 4c220022..3f674119 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2018-2021 Max Brunsfeld +Copyright (c) 2018-2023 Max Brunsfeld Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/Makefile b/Makefile index 15505db0..6030852e 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -VERSION := 0.6.3 +VERSION := 0.20.10 # install directory layout PREFIX ?= /usr/local @@ -18,8 +18,8 @@ endif OBJ := $(SRC:.c=.o) # define default flags, and override to append mandatory flags -CFLAGS ?= -O3 -Wall -Wextra -Werror -override CFLAGS += -std=gnu99 -fPIC -Ilib/src -Ilib/src/wasm -Ilib/include +override CFLAGS := -O3 -std=gnu99 -fPIC -fvisibility=hidden -Wall -Wextra -Wshadow $(CFLAGS) +override CFLAGS += -Ilib/src -Ilib/src/wasm -Ilib/include # ABI versioning SONAME_MAJOR := 0 @@ -50,20 +50,27 @@ libtree-sitter.$(SOEXTVER): $(OBJ) $(CC) $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@ ln -sf $@ libtree-sitter.$(SOEXT) ln -sf $@ libtree-sitter.$(SOEXTVER_MAJOR) +ifneq ($(STRIP),) + $(STRIP) $@ +endif install: all - install -d '$(DESTDIR)$(LIBDIR)' - install -m755 libtree-sitter.a '$(DESTDIR)$(LIBDIR)'/libtree-sitter.a - install -m755 libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER) - ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER_MAJOR) - ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXT) - install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter - install -m644 lib/include/tree_sitter/*.h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/ - install -d '$(DESTDIR)$(PCLIBDIR)' sed -e 's|@LIBDIR@|$(LIBDIR)|;s|@INCLUDEDIR@|$(INCLUDEDIR)|;s|@VERSION@|$(VERSION)|' \ -e 's|=$(PREFIX)|=$${prefix}|' \ -e 's|@PREFIX@|$(PREFIX)|' \ - tree-sitter.pc.in > '$(DESTDIR)$(PCLIBDIR)'/tree-sitter.pc + tree-sitter.pc.in > tree-sitter.pc + + install -d '$(DESTDIR)$(LIBDIR)' + install -m644 libtree-sitter.a '$(DESTDIR)$(LIBDIR)'/ + install -m755 libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/ + ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER_MAJOR) + ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXT) + + install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter + install -m644 lib/include/tree_sitter/api.h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/ + + install -d '$(DESTDIR)$(PCLIBDIR)' + install -m644 tree-sitter.pc '$(DESTDIR)$(PCLIBDIR)'/ clean: rm -f lib/src/*.o libtree-sitter.a libtree-sitter.$(SOEXT) libtree-sitter.$(SOEXTVER_MAJOR) libtree-sitter.$(SOEXTVER) diff --git a/Package.swift b/Package.swift new file mode 100644 index 00000000..7966e529 --- /dev/null +++ b/Package.swift @@ -0,0 +1,39 @@ +// swift-tools-version: 5.8 +// The swift-tools-version declares the minimum version of Swift required to build this package. + +import PackageDescription + +let package = Package( + name: "TreeSitter", + products: [ + // Products define the executables and libraries a package produces, and make them visible to other packages. + .library( + name: "TreeSitter", + targets: ["TreeSitter"]), + ], + targets: [ + .target(name: "TreeSitter", + path: "lib", + exclude: [ + "binding_rust", + "binding_web", + "Cargo.toml", + "README.md", + "src/unicode/README.md", + "src/unicode/LICENSE", + "src/unicode/ICU_SHA", + "src/get_changed_ranges.c", + "src/tree_cursor.c", + "src/stack.c", + "src/node.c", + "src/lexer.c", + "src/parser.c", + "src/language.c", + "src/alloc.c", + "src/subtree.c", + "src/tree.c", + "src/query.c" + ], + sources: ["src/lib.c"]), + ] +) diff --git a/README.md b/README.md index 2d1e911d..e74c6e45 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,11 @@ # tree-sitter -[![Build Status](https://github.com/tree-sitter/tree-sitter/workflows/CI/badge.svg)](https://github.com/tree-sitter/tree-sitter/actions) -[![Build status](https://ci.appveyor.com/api/projects/status/vtmbd6i92e97l55w/branch/master?svg=true)](https://ci.appveyor.com/project/maxbrunsfeld/tree-sitter/branch/master) +[![CICD badge]][CICD] [![DOI](https://zenodo.org/badge/14164618.svg)](https://zenodo.org/badge/latestdoi/14164618) +[CICD badge]: https://github.com/tree-sitter/tree-sitter/actions/workflows/CICD.yml/badge.svg +[CICD]: https://github.com/tree-sitter/tree-sitter/actions/workflows/CICD.yml + Tree-sitter is a parser generator tool and an incremental parsing library. It can build a concrete syntax tree for a source file and efficiently update the syntax tree as the source file is edited. Tree-sitter aims to be: - **General** enough to parse any programming language diff --git a/build.zig b/build.zig new file mode 100644 index 00000000..8b6a7c33 --- /dev/null +++ b/build.zig @@ -0,0 +1,16 @@ +const std = @import("std"); + +pub fn build(b: *std.Build) void { + var lib = b.addStaticLibrary(.{ + .name = "tree-sitter", + .target = b.standardTargetOptions(.{}), + .optimize = b.standardOptimizeOption(.{}), + }); + + lib.linkLibC(); + lib.addCSourceFile(.{ .file = .{ .path = "lib/src/lib.c" }, .flags = &.{} }); + lib.addIncludePath(.{ .path = "lib/include" }); + lib.addIncludePath(.{ .path = "lib/src" }); + + b.installArtifact(lib); +} diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 10ee301a..0a877454 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -1,47 +1,53 @@ [package] name = "tree-sitter-cli" description = "CLI tool for developing, testing, and using Tree-sitter parsers" -version = "0.20.7" +version = "0.20.8" authors = ["Max Brunsfeld "] -edition = "2018" +edition = "2021" license = "MIT" readme = "README.md" keywords = ["incremental", "parsing"] categories = ["command-line-utilities", "parsing"] repository = "https://github.com/tree-sitter/tree-sitter" +rust-version.workspace = true [[bin]] name = "tree-sitter" path = "src/main.rs" +doc = false [[bench]] name = "benchmark" harness = false [dependencies] -ansi_term = "0.12" -anyhow = "1.0" -atty = "0.2" +ansi_term = "0.12.1" +anyhow = "1.0.72" +atty = "0.2.14" clap = "2.32" -difference = "2.0" -dirs = "3.0" -glob = "0.3.0" -html-escape = "0.2.6" -indexmap = "1" -lazy_static = "1.2.0" -regex = "1" -regex-syntax = "0.6.4" -rustc-hash = "1" -semver = "1.0" -serde = { version = "1.0.130", features = ["derive"] } +ctrlc = { version = "3.4.0", features = ["termination"] } +difference = "2.0.0" +dirs = "5.0.1" +glob = "0.3.1" +html-escape = "0.2.13" +indexmap = "2.0.0" +lazy_static = "1.4.0" +memchr = "2.6.3" +path-slash = "0.2.1" +regex = "1.9.1" +regex-syntax = "0.7.4" +rustc-hash = "1.1.0" +semver = "1.0.18" +# Due to https://github.com/serde-rs/serde/issues/2538 +serde = { version = "1.0, < 1.0.172", features = ["derive"] } smallbitvec = "2.5.1" -tiny_http = "0.8" -walkdir = "2.3" -webbrowser = "0.5.1" -which = "4.1.0" +tiny_http = "0.12.0" +walkdir = "2.3.3" +webbrowser = "0.8.10" +which = "4.4.0" [dependencies.tree-sitter] -version = "0.20.3" +version = "0.20.10" path = "../lib" features = ["wasm"] @@ -67,14 +73,18 @@ version = "1.0" features = ["preserve_order"] [dependencies.log] -version = "0.4.6" +version = "0.4.19" features = ["std"] [dev-dependencies] -rand = "0.8" -tempfile = "3" -pretty_assertions = "0.7.2" -ctor = "0.1" +tree_sitter_proc_macro = { path = "src/tests/proc_macro", package = "tree-sitter-tests-proc-macro" } + +rand = "0.8.5" +tempfile = "3.6.0" +pretty_assertions = "1.4.0" +ctor = "0.2.4" +unindent = "0.2.2" +indoc = "2.0.3" [build-dependencies] -toml = "0.5" +toml = "0.7.6" diff --git a/cli/README.md b/cli/README.md index fe45b17b..eb93bcfa 100644 --- a/cli/README.md +++ b/cli/README.md @@ -1,9 +1,11 @@ -Tree-sitter CLI -=============== +# Tree-sitter CLI -[![Build Status](https://travis-ci.org/tree-sitter/tree-sitter.svg?branch=master)](https://travis-ci.org/tree-sitter/tree-sitter) -[![Build status](https://ci.appveyor.com/api/projects/status/vtmbd6i92e97l55w/branch/master?svg=true)](https://ci.appveyor.com/project/maxbrunsfeld/tree-sitter/branch/master) -[![Crates.io](https://img.shields.io/crates/v/tree-sitter-cli.svg)](https://crates.io/crates/tree-sitter-cli) +[![crates.io badge]][crates.io] [![npmjs.com badge]][npmjs.com] + +[crates.io]: https://crates.io/crates/tree-sitter-cli +[crates.io badge]: https://img.shields.io/crates/v/tree-sitter-cli.svg?color=%23B48723 +[npmjs.com]: https://www.npmjs.org/package/tree-sitter-cli +[npmjs.com badge]: https://img.shields.io/npm/v/tree-sitter-cli.svg?color=%23BF4A4A The Tree-sitter CLI allows you to develop, test, and use Tree-sitter grammars from the command line. It works on MacOS, Linux, and Windows. @@ -21,7 +23,7 @@ or with `npm`: npm install tree-sitter-cli ``` -You can also download a pre-built binary for your platform from [the releases page](https://github.com/tree-sitter/tree-sitter/releases/latest). +You can also download a pre-built binary for your platform from [the releases page]. ### Dependencies @@ -32,8 +34,11 @@ The `tree-sitter` binary itself has no dependencies, but specific commands have ### Commands -* `generate` - The `tree-sitter generate` command will generate a Tree-sitter parser based on the grammar in the current working directory. See [the documentation](http://tree-sitter.github.io/tree-sitter/creating-parsers) for more information. +* `generate` - The `tree-sitter generate` command will generate a Tree-sitter parser based on the grammar in the current working directory. See [the documentation] for more information. -* `test` - The `tree-sitter test` command will run the unit tests for the Tree-sitter parser in the current working directory. See [the documentation](http://tree-sitter.github.io/tree-sitter/creating-parsers) for more information. +* `test` - The `tree-sitter test` command will run the unit tests for the Tree-sitter parser in the current working directory. See [the documentation] for more information. * `parse` - The `tree-sitter parse` command will parse a file (or list of files) using Tree-sitter parsers. + +[the documentation]: https://tree-sitter.github.io/tree-sitter/creating-parsers +[the releases page]: https://github.com/tree-sitter/tree-sitter/releases/latest diff --git a/cli/benches/benchmark.rs b/cli/benches/benchmark.rs index efb73f3a..ae82081a 100644 --- a/cli/benches/benchmark.rs +++ b/cli/benches/benchmark.rs @@ -104,6 +104,7 @@ fn main() { parse(&path, max_path_length, |source| { Query::new(language, str::from_utf8(source).unwrap()) + .with_context(|| format!("Query file path: {path:?}")) .expect("Failed to parse query"); }); } diff --git a/cli/build.rs b/cli/build.rs index 6f9b772b..53617cf0 100644 --- a/cli/build.rs +++ b/cli/build.rs @@ -1,3 +1,4 @@ +use std::ffi::OsStr; use std::path::{Path, PathBuf}; use std::{env, fs}; @@ -60,7 +61,39 @@ fn read_git_sha() -> Option { // If we're on a branch, read the SHA from the ref file. if head_content.starts_with("ref: ") { head_content.replace_range(0.."ref: ".len(), ""); - let ref_filename = git_dir_path.join(&head_content); + let ref_filename = { + // Go to real non-worktree gitdir + let git_dir_path = git_dir_path + .parent() + .map(|p| { + p.file_name() + .map(|n| n == OsStr::new("worktrees")) + .and_then(|x| x.then(|| p.parent())) + }) + .flatten() + .flatten() + .unwrap_or(&git_dir_path); + + let file = git_dir_path.join(&head_content); + if file.is_file() { + file + } else { + let packed_refs = git_dir_path.join("packed-refs"); + if let Ok(packed_refs_content) = fs::read_to_string(&packed_refs) { + for line in packed_refs_content.lines() { + if let Some((hash, r#ref)) = line.split_once(' ') { + if r#ref == head_content { + if let Some(path) = packed_refs.to_str() { + println!("cargo:rerun-if-changed={}", path); + } + return Some(hash.to_string()); + } + } + } + } + return None; + } + }; if let Some(path) = ref_filename.to_str() { println!("cargo:rerun-if-changed={}", path); } diff --git a/cli/config/Cargo.toml b/cli/config/Cargo.toml index ab3808ad..01847e93 100644 --- a/cli/config/Cargo.toml +++ b/cli/config/Cargo.toml @@ -9,12 +9,14 @@ readme = "README.md" keywords = ["incremental", "parsing"] categories = ["command-line-utilities", "parsing"] repository = "https://github.com/tree-sitter/tree-sitter" +rust-version.workspace = true [dependencies] anyhow = "1.0" dirs = "3.0" -serde = { version = "1.0.130", features = ["derive"] } +# Due to https://github.com/serde-rs/serde/issues/2538 +serde = { version = "1.0, < 1.0.172", features = ["derive"] } [dependencies.serde_json] -version = "1.0.45" +version = "1.0" features = ["preserve_order"] diff --git a/cli/config/README.md b/cli/config/README.md index 8cbfbcf4..e7d7b39b 100644 --- a/cli/config/README.md +++ b/cli/config/README.md @@ -1,5 +1,7 @@ -# `tree-sitter-config` +# Tree-sitter Config + +Manages Tree-sitter's configuration file. You can use a configuration file to control the behavior of the `tree-sitter` -command-line program. This crate implements the logic for finding and the +command-line program. This crate implements the logic for finding and the parsing the contents of the configuration file. diff --git a/cli/config/src/lib.rs b/cli/config/src/lib.rs index 0a5eba54..83b85358 100644 --- a/cli/config/src/lib.rs +++ b/cli/config/src/lib.rs @@ -1,4 +1,4 @@ -//! Manages tree-sitter's configuration file. +#![doc = include_str!("../README.md")] use anyhow::{anyhow, Context, Result}; use serde::{Deserialize, Serialize}; @@ -25,6 +25,9 @@ impl Config { if let Ok(path) = env::var("TREE_SITTER_DIR") { let mut path = PathBuf::from(path); path.push("config.json"); + if !path.exists() { + return Ok(None); + } if path.is_file() { return Ok(Some(path)); } @@ -37,7 +40,8 @@ impl Config { let legacy_path = dirs::home_dir() .ok_or(anyhow!("Cannot determine home directory"))? - .join(".tree-sitter/config.json"); + .join(".tree-sitter") + .join("config.json"); if legacy_path.is_file() { return Ok(Some(legacy_path)); } @@ -48,7 +52,8 @@ impl Config { fn xdg_config_file() -> Result { let xdg_path = dirs::config_dir() .ok_or(anyhow!("Cannot determine config directory"))? - .join("tree-sitter/config.json"); + .join("tree-sitter") + .join("config.json"); Ok(xdg_path) } @@ -79,7 +84,13 @@ impl Config { /// /// (Note that this is typically only done by the `tree-sitter init-config` command.) pub fn initial() -> Result { - let location = Self::xdg_config_file()?; + let location = if let Ok(path) = env::var("TREE_SITTER_DIR") { + let mut path = PathBuf::from(path); + path.push("config.json"); + path + } else { + Self::xdg_config_file()? + }; let config = serde_json::json!({}); Ok(Config { location, config }) } diff --git a/cli/loader/Cargo.toml b/cli/loader/Cargo.toml index ed6c9e8a..113e8399 100644 --- a/cli/loader/Cargo.toml +++ b/cli/loader/Cargo.toml @@ -9,6 +9,7 @@ readme = "README.md" keywords = ["incremental", "parsing"] categories = ["command-line-utilities", "parsing"] repository = "https://github.com/tree-sitter/tree-sitter" +rust-version.workspace = true [features] wasm = ["tree-sitter/wasm"] @@ -20,7 +21,8 @@ dirs = "3.0" libloading = "0.7" once_cell = "1.7" regex = "1" -serde = { version = "1.0.130", features = ["derive"] } +# Due to https://github.com/serde-rs/serde/issues/2538 +serde = { version = "1.0, < 1.0.172", features = ["derive"] } which = "4.1.0" [dependencies.serde_json] diff --git a/cli/loader/README.md b/cli/loader/README.md index 9889ec71..a3c18674 100644 --- a/cli/loader/README.md +++ b/cli/loader/README.md @@ -1,6 +1,6 @@ -# `tree-sitter-loader` +# Tree-sitter Loader The `tree-sitter` command-line program will dynamically find and build grammars at runtime, if you have cloned the grammars' repositories to your local -filesystem. This helper crate implements that logic, so that you can use it in +filesystem. This helper crate implements that logic, so that you can use it in your own program analysis tools, as well. diff --git a/cli/loader/emscripten-version b/cli/loader/emscripten-version index 199eda56..1f1a3970 100644 --- a/cli/loader/emscripten-version +++ b/cli/loader/emscripten-version @@ -1 +1 @@ -3.1.25 +3.1.37 diff --git a/cli/loader/src/lib.rs b/cli/loader/src/lib.rs index a0fb3249..619aa05e 100644 --- a/cli/loader/src/lib.rs +++ b/cli/loader/src/lib.rs @@ -1,3 +1,5 @@ +#![doc = include_str!("../README.md")] + use anyhow::{anyhow, Context, Error, Result}; use libloading::{Library, Symbol}; use once_cell::unsync::OnceCell; @@ -11,7 +13,7 @@ use std::path::{Path, PathBuf}; use std::process::Command; use std::sync::Mutex; use std::time::SystemTime; -use std::{fs, mem}; +use std::{env, fs, mem}; use tree_sitter::{Language, QueryError, QueryErrorKind}; use tree_sitter_highlight::HighlightConfiguration; use tree_sitter_tags::{Error as TagsError, TagsConfiguration}; @@ -72,12 +74,12 @@ impl Config { } #[cfg(unix)] -const DYLIB_EXTENSION: &'static str = "so"; +const DYLIB_EXTENSION: &str = "so"; #[cfg(windows)] const DYLIB_EXTENSION: &'static str = "dll"; -const BUILD_TARGET: &'static str = env!("BUILD_TARGET"); +const BUILD_TARGET: &str = env!("BUILD_TARGET"); pub struct LanguageConfiguration<'a> { pub scope: Option, @@ -90,6 +92,7 @@ pub struct LanguageConfiguration<'a> { pub injections_filenames: Option>, pub locals_filenames: Option>, pub tags_filenames: Option>, + pub language_name: String, language_id: usize, highlight_config: OnceCell>, tags_config: OnceCell>, @@ -102,6 +105,7 @@ pub struct Loader { languages_by_id: Vec<(PathBuf, OnceCell)>, language_configurations: Vec>, language_configuration_ids_by_file_type: HashMap>, + language_configuration_in_current_path: Option, highlight_names: Box>>, use_all_highlight_names: bool, debug_build: bool, @@ -115,9 +119,13 @@ unsafe impl Sync for Loader {} impl Loader { pub fn new() -> Result { - let parser_lib_path = dirs::cache_dir() - .ok_or(anyhow!("Cannot determine cache directory"))? - .join("tree-sitter/lib"); + let parser_lib_path = match env::var("TREE_SITTER_LIBDIR") { + Ok(path) => PathBuf::from(path), + _ => dirs::cache_dir() + .ok_or(anyhow!("Cannot determine cache directory"))? + .join("tree-sitter") + .join("lib"), + }; Ok(Self::with_parser_lib_path(parser_lib_path)) } @@ -127,6 +135,7 @@ impl Loader { languages_by_id: Vec::new(), language_configurations: Vec::new(), language_configuration_ids_by_file_type: HashMap::new(), + language_configuration_in_current_path: None, highlight_names: Box::new(Mutex::new(Vec::new())), use_all_highlight_names: true, debug_build: false, @@ -136,7 +145,7 @@ impl Loader { } } - pub fn configure_highlights(&mut self, names: &Vec) { + pub fn configure_highlights(&mut self, names: &[String]) { self.use_all_highlight_names = false; let mut highlights = self.highlight_names.lock().unwrap(); highlights.clear(); @@ -152,8 +161,7 @@ impl Loader { eprintln!("Warning: You have not configured any parser directories!"); eprintln!("Please run `tree-sitter init-config` and edit the resulting"); eprintln!("configuration file to indicate where we should look for"); - eprintln!("language grammars."); - eprintln!(""); + eprintln!("language grammars.\n"); } for parser_container_dir in &config.parser_directories { if let Ok(entries) = fs::read_dir(parser_container_dir) { @@ -163,6 +171,7 @@ impl Loader { if parser_dir_name.starts_with("tree-sitter-") { self.find_language_configurations_at_path( &parser_container_dir.join(parser_dir_name), + false, ) .ok(); } @@ -174,7 +183,7 @@ impl Loader { } pub fn languages_at_path(&mut self, path: &Path) -> Result> { - if let Ok(configurations) = self.find_language_configurations_at_path(path) { + if let Ok(configurations) = self.find_language_configurations_at_path(path, true) { let mut language_ids = configurations .iter() .map(|c| c.language_id) @@ -365,7 +374,7 @@ impl Loader { library_path.set_extension(DYLIB_EXTENSION); } - let recompile = needs_recompile(&library_path, &parser_path, &scanner_path) + let recompile = needs_recompile(&library_path, &parser_path, scanner_path.as_deref()) .with_context(|| "Failed to compare source and binary timestamps")?; if let Some(wasm_store) = self.wasm_store.lock().unwrap().as_mut() { @@ -411,7 +420,7 @@ impl Loader { header_path: &Path, parser_path: &Path, scanner_path: &Option, - output_path: &PathBuf, + library_path: &PathBuf, ) -> Result<(), Error> { let mut config = cc::Build::new(); config @@ -419,13 +428,15 @@ impl Loader { .opt_level(2) .cargo_metadata(false) .target(BUILD_TARGET) - .host(BUILD_TARGET); + .host(BUILD_TARGET) + .flag_if_supported("-Werror=implicit-function-declaration"); let compiler = config.get_compiler(); let mut command = Command::new(compiler.path()); for (key, value) in compiler.env() { command.env(key, value); } - if cfg!(windows) { + + if compiler.is_like_msvc() { command.args(&["/nologo", "/LD", "/I"]).arg(header_path); if self.debug_build { command.arg("/Od"); @@ -438,17 +449,20 @@ impl Loader { } command .arg("/link") - .arg(format!("/out:{}", output_path.to_str().unwrap())); + .arg(format!("/out:{}", library_path.to_str().unwrap())); } else { command .arg("-shared") - .arg("-fPIC") .arg("-fno-exceptions") .arg("-g") .arg("-I") .arg(header_path) .arg("-o") - .arg(output_path); + .arg(&library_path); + + if !cfg!(windows) { + command.arg("-fPIC"); + } if self.debug_build { command.arg("-O0"); @@ -469,6 +483,7 @@ impl Loader { } command.arg("-xc").arg(parser_path); } + let output = command .output() .with_context(|| "Failed to execute C compiler")?; @@ -479,6 +494,37 @@ impl Loader { String::from_utf8_lossy(&output.stderr) )); } + + #[cfg(any(target_os = "macos", target_os = "linux"))] + if scanner_path.is_some() { + let command = Command::new("nm") + .arg("-W") + .arg("-U") + .arg(&library_path) + .output(); + if let Ok(output) = command { + if output.status.success() { + let mut found_non_static = false; + for line in String::from_utf8_lossy(&output.stdout).lines() { + if line.contains(" T ") && !line.contains("tree_sitter_") { + if let Some(function_name) = + line.split_whitespace().collect::>().get(2) + { + if !found_non_static { + found_non_static = true; + eprintln!("Warning: Found non-static non-tree-sitter functions in external scannner"); + } + eprintln!(" `{function_name}`"); + } + } + } + if found_non_static { + eprintln!("Consider making these functions static, they can cause conflicts when another tree-sitter project uses the same function name"); + } + } + } + } + Ok(()) } @@ -581,6 +627,7 @@ impl Loader { pub fn highlight_config_for_injection_string<'a>( &'a self, string: &str, + apply_all_captures: bool, ) -> Option<&'a HighlightConfiguration> { match self.language_configuration_for_injection_string(string) { Err(e) => { @@ -591,38 +638,36 @@ impl Loader { None } Ok(None) => None, - Ok(Some((language, configuration))) => match configuration.highlight_config(language) { - Err(e) => { - eprintln!( - "Failed to load property sheet for injection string '{}': {}", - string, e - ); - None + Ok(Some((language, configuration))) => { + match configuration.highlight_config(language, apply_all_captures, None) { + Err(e) => { + eprintln!( + "Failed to load property sheet for injection string '{}': {}", + string, e + ); + None + } + Ok(None) => None, + Ok(Some(config)) => Some(config), } - Ok(None) => None, - Ok(Some(config)) => Some(config), - }, + } } } - pub fn find_language_configurations_at_path<'a>( - &'a mut self, + pub fn find_language_configurations_at_path( + &mut self, parser_path: &Path, + set_current_path_config: bool, ) -> Result<&[LanguageConfiguration]> { - #[derive(Deserialize)] + #[derive(Default, Deserialize)] #[serde(untagged)] enum PathsJSON { + #[default] Empty, Single(String), Multiple(Vec), } - impl Default for PathsJSON { - fn default() -> Self { - PathsJSON::Empty - } - } - impl PathsJSON { fn into_vec(self) -> Option> { match self { @@ -663,6 +708,11 @@ impl Loader { tree_sitter: Vec, } + #[derive(Deserialize)] + struct GrammarJSON { + name: String, + } + let initial_language_configuration_count = self.language_configurations.len(); if let Ok(package_json_contents) = fs::read_to_string(&parser_path.join("package.json")) { @@ -674,6 +724,13 @@ impl Loader { // the package.json, but defaults to the directory containing the package.json. let language_path = parser_path.join(config_json.path); + let grammar_path = language_path.join("src").join("grammar.json"); + let mut grammar_file = fs::File::open(grammar_path) + .with_context(|| "Failed to read grammar.json")?; + let grammar_json: GrammarJSON = + serde_json::from_reader(BufReader::new(&mut grammar_file)) + .with_context(|| "Failed to parse grammar.json")?; + // Determine if a previous language configuration in this package.json file // already uses the same language. let mut language_id = None; @@ -693,6 +750,7 @@ impl Loader { let configuration = LanguageConfiguration { root_path: parser_path.to_path_buf(), + language_name: grammar_json.name.clone(), scope: config_json.scope, language_id, file_types: config_json.file_types.unwrap_or(Vec::new()), @@ -705,19 +763,26 @@ impl Loader { highlights_filenames: config_json.highlights.into_vec(), highlight_config: OnceCell::new(), tags_config: OnceCell::new(), - highlight_names: &*self.highlight_names, + highlight_names: &self.highlight_names, use_all_highlight_names: self.use_all_highlight_names, }; for file_type in &configuration.file_types { self.language_configuration_ids_by_file_type .entry(file_type.to_string()) - .or_insert(Vec::new()) + .or_default() .push(self.language_configurations.len()); } self.language_configurations .push(unsafe { mem::transmute(configuration) }); + + if set_current_path_config + && self.language_configuration_in_current_path.is_none() + { + self.language_configuration_in_current_path = + Some(self.language_configurations.len() - 1); + } } } } @@ -725,8 +790,15 @@ impl Loader { if self.language_configurations.len() == initial_language_configuration_count && parser_path.join("src").join("grammar.json").exists() { + let grammar_path = parser_path.join("src").join("grammar.json"); + let mut grammar_file = + fs::File::open(grammar_path).with_context(|| "Failed to read grammar.json")?; + let grammar_json: GrammarJSON = + serde_json::from_reader(BufReader::new(&mut grammar_file)) + .with_context(|| "Failed to parse grammar.json")?; let configuration = LanguageConfiguration { root_path: parser_path.to_owned(), + language_name: grammar_json.name, language_id: self.languages_by_id.len(), file_types: Vec::new(), scope: None, @@ -739,7 +811,7 @@ impl Loader { tags_filenames: None, highlight_config: OnceCell::new(), tags_config: OnceCell::new(), - highlight_names: &*self.highlight_names, + highlight_names: &self.highlight_names, use_all_highlight_names: self.use_all_highlight_names, }; self.language_configurations @@ -764,11 +836,11 @@ impl Loader { if let Some(scope) = scope { if let Some(config) = self .language_configuration_for_scope(scope) - .with_context(|| format!("Failed to load language for scope '{}'", scope))? + .with_context(|| format!("Failed to load language for scope '{scope}'"))? { Ok(config.0) } else { - return Err(anyhow!("Unknown scope '{}'", scope)); + Err(anyhow!("Unknown scope '{scope}'")) } } else if let Some((lang, _)) = self .language_configuration_for_file_name(path) @@ -780,8 +852,10 @@ impl Loader { })? { Ok(lang) + } else if let Some(id) = self.language_configuration_in_current_path { + Ok(self.language_for_id(self.language_configurations[id].language_id)?) } else if let Some(lang) = self - .languages_at_path(¤t_dir) + .languages_at_path(current_dir) .with_context(|| "Failed to load language in current directory")? .first() .cloned() @@ -803,25 +877,76 @@ impl Loader { } impl<'a> LanguageConfiguration<'a> { - pub fn highlight_config(&self, language: Language) -> Result> { + pub fn highlight_config( + &self, + language: Language, + apply_all_captures: bool, + paths: Option<&[String]>, + ) -> Result> { + let (highlights_filenames, injections_filenames, locals_filenames) = match paths { + Some(paths) => ( + Some( + paths + .iter() + .filter(|p| p.ends_with("highlights.scm")) + .cloned() + .collect::>(), + ), + Some( + paths + .iter() + .filter(|p| p.ends_with("tags.scm")) + .cloned() + .collect::>(), + ), + Some( + paths + .iter() + .filter(|p| p.ends_with("locals.scm")) + .cloned() + .collect::>(), + ), + ), + None => (None, None, None), + }; return self .highlight_config .get_or_try_init(|| { - let (highlights_query, highlight_ranges) = - self.read_queries(&self.highlights_filenames, "highlights.scm")?; - let (injections_query, injection_ranges) = - self.read_queries(&self.injections_filenames, "injections.scm")?; - let (locals_query, locals_ranges) = - self.read_queries(&self.locals_filenames, "locals.scm")?; + let (highlights_query, highlight_ranges) = self.read_queries( + if highlights_filenames.is_some() { + highlights_filenames.as_deref() + } else { + self.highlights_filenames.as_deref() + }, + "highlights.scm", + )?; + let (injections_query, injection_ranges) = self.read_queries( + if injections_filenames.is_some() { + injections_filenames.as_deref() + } else { + self.injections_filenames.as_deref() + }, + "injections.scm", + )?; + let (locals_query, locals_ranges) = self.read_queries( + if locals_filenames.is_some() { + locals_filenames.as_deref() + } else { + self.locals_filenames.as_deref() + }, + "locals.scm", + )?; if highlights_query.is_empty() { Ok(None) } else { let mut result = HighlightConfiguration::new( language, + &self.language_name, &highlights_query, &injections_query, &locals_query, + apply_all_captures, ) .map_err(|error| match error.kind { QueryErrorKind::Language => Error::from(error), @@ -853,12 +978,12 @@ impl<'a> LanguageConfiguration<'a> { let mut all_highlight_names = self.highlight_names.lock().unwrap(); if self.use_all_highlight_names { for capture_name in result.query.capture_names() { - if !all_highlight_names.contains(capture_name) { - all_highlight_names.push(capture_name.clone()); + if !all_highlight_names.iter().any(|x| x == capture_name) { + all_highlight_names.push(capture_name.to_string()); } } } - result.configure(&all_highlight_names.as_slice()); + result.configure(all_highlight_names.as_slice()); Ok(Some(result)) } }) @@ -869,9 +994,9 @@ impl<'a> LanguageConfiguration<'a> { self.tags_config .get_or_try_init(|| { let (tags_query, tags_ranges) = - self.read_queries(&self.tags_filenames, "tags.scm")?; + self.read_queries(self.tags_filenames.as_deref(), "tags.scm")?; let (locals_query, locals_ranges) = - self.read_queries(&self.locals_filenames, "locals.scm")?; + self.read_queries(self.locals_filenames.as_deref(), "locals.scm")?; if tags_query.is_empty() { Ok(None) } else { @@ -894,7 +1019,6 @@ impl<'a> LanguageConfiguration<'a> { locals_query.len(), ) } - .into() } else { error.into() } @@ -904,9 +1028,9 @@ impl<'a> LanguageConfiguration<'a> { .map(Option::as_ref) } - fn include_path_in_query_error<'b>( + fn include_path_in_query_error( mut error: QueryError, - ranges: &'b Vec<(String, Range)>, + ranges: &[(String, Range)], source: &str, start_offset: usize, ) -> Error { @@ -914,7 +1038,7 @@ impl<'a> LanguageConfiguration<'a> { let (path, range) = ranges .iter() .find(|(_, range)| range.contains(&offset_within_section)) - .unwrap(); + .unwrap_or(ranges.last().unwrap()); error.offset = offset_within_section - range.start; error.row = source[range.start..offset_within_section] .chars() @@ -925,12 +1049,12 @@ impl<'a> LanguageConfiguration<'a> { fn read_queries( &self, - paths: &Option>, + paths: Option<&[String]>, default_path: &str, ) -> Result<(String, Vec<(String, Range)>)> { let mut query = String::new(); let mut path_ranges = Vec::new(); - if let Some(paths) = paths.as_ref() { + if let Some(paths) = paths { for path in paths { let abs_path = self.root_path.join(path); let prev_query_len = query.len(); @@ -955,7 +1079,7 @@ impl<'a> LanguageConfiguration<'a> { fn needs_recompile( lib_path: &Path, parser_c_path: &Path, - scanner_path: &Option, + scanner_path: Option<&Path>, ) -> Result { if !lib_path.exists() { return Ok(true); diff --git a/cli/npm/.gitignore b/cli/npm/.gitignore index 942b33a1..65e04cff 100644 --- a/cli/npm/.gitignore +++ b/cli/npm/.gitignore @@ -3,3 +3,4 @@ tree-sitter.exe *.gz *.tgz LICENSE +README.md diff --git a/cli/npm/dsl.d.ts b/cli/npm/dsl.d.ts index f2ee57f1..63f9ed49 100644 --- a/cli/npm/dsl.d.ts +++ b/cli/npm/dsl.d.ts @@ -1,19 +1,19 @@ -type AliasRule = {type: 'ALIAS'; named: boolean; content: Rule; value: string}; -type BlankRule = {type: 'BLANK'}; -type ChoiceRule = {type: 'CHOICE'; members: Rule[]}; -type FieldRule = {type: 'FIELD'; name: string; content: Rule}; -type ImmediateTokenRule = {type: 'IMMEDIATE_TOKEN'; content: Rule}; -type PatternRule = {type: 'PATTERN'; value: string}; -type PrecDynamicRule = {type: 'PREC_DYNAMIC'; content: Rule; value: number}; -type PrecLeftRule = {type: 'PREC_LEFT'; content: Rule; value: number}; -type PrecRightRule = {type: 'PREC_RIGHT'; content: Rule; value: number}; -type PrecRule = {type: 'PREC'; content: Rule; value: number}; -type Repeat1Rule = {type: 'REPEAT1'; content: Rule}; -type RepeatRule = {type: 'REPEAT'; content: Rule}; -type SeqRule = {type: 'SEQ'; members: Rule[]}; -type StringRule = {type: 'STRING'; value: string}; -type SymbolRule = {type: 'SYMBOL'; name: Name}; -type TokenRule = {type: 'TOKEN'; content: Rule}; +type AliasRule = { type: 'ALIAS'; named: boolean; content: Rule; value: string }; +type BlankRule = { type: 'BLANK' }; +type ChoiceRule = { type: 'CHOICE'; members: Rule[] }; +type FieldRule = { type: 'FIELD'; name: string; content: Rule }; +type ImmediateTokenRule = { type: 'IMMEDIATE_TOKEN'; content: Rule }; +type PatternRule = { type: 'PATTERN'; value: string }; +type PrecDynamicRule = { type: 'PREC_DYNAMIC'; content: Rule; value: number }; +type PrecLeftRule = { type: 'PREC_LEFT'; content: Rule; value: number }; +type PrecRightRule = { type: 'PREC_RIGHT'; content: Rule; value: number }; +type PrecRule = { type: 'PREC'; content: Rule; value: number }; +type Repeat1Rule = { type: 'REPEAT1'; content: Rule }; +type RepeatRule = { type: 'REPEAT'; content: Rule }; +type SeqRule = { type: 'SEQ'; members: Rule[] }; +type StringRule = { type: 'STRING'; value: string }; +type SymbolRule = { type: 'SYMBOL'; name: Name }; +type TokenRule = { type: 'TOKEN'; content: Rule }; type Rule = | AliasRule @@ -42,14 +42,15 @@ type GrammarSymbols = { type RuleBuilder = ( $: GrammarSymbols, + previous: Rule, ) => RuleOrLiteral; type RuleBuilders< RuleName extends string, BaseGrammarRuleName extends string > = { - [name in RuleName]: RuleBuilder; -}; + [name in RuleName]: RuleBuilder; + }; interface Grammar< RuleName extends string, @@ -68,11 +69,17 @@ interface Grammar< rules: Rules; /** - * An array of arrays of precedence names. Each inner array represents - * a *descending* ordering. Names listed earlier in one of these arrays - * have higher precedence than any names listed later in the same array. + * An array of arrays of precedence names or rules. Each inner array represents + * a *descending* ordering. Names/rules listed earlier in one of these arrays + * have higher precedence than any names/rules listed later in the same array. + * + * Using rules is just a shorthand way for using a name then calling prec() + * with that name. It is just a convenience. */ - precedences?: () => String[][], + precedences?: ( + $: GrammarSymbols, + previous: Rule[][], + ) => RuleOrLiteral[][], /** * An array of arrays of rule names. Each inner array represents a set of @@ -86,6 +93,7 @@ interface Grammar< */ conflicts?: ( $: GrammarSymbols, + previous: Rule[][], ) => RuleOrLiteral[][]; /** @@ -102,7 +110,7 @@ interface Grammar< externals?: ( $: Record>, previous: Rule[], - ) => SymbolRule[]; + ) => RuleOrLiteral[]; /** * An array of tokens that may appear anywhere in the language. This @@ -126,6 +134,7 @@ interface Grammar< */ inline?: ( $: GrammarSymbols, + previous: Rule[], ) => RuleOrLiteral[]; /** @@ -134,10 +143,11 @@ interface Grammar< * * @param $ grammar rules * - * @see http://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types + * @see https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types */ supertypes?: ( $: GrammarSymbols, + previous: Rule[], ) => RuleOrLiteral[]; /** @@ -153,8 +163,8 @@ interface Grammar< type GrammarSchema = { [K in keyof Grammar]: K extends 'rules' - ? Record - : Grammar[K]; + ? Record + : Grammar[K]; }; /** diff --git a/cli/npm/install.js b/cli/npm/install.js index 2790b47d..b2db3d7d 100755 --- a/cli/npm/install.js +++ b/cli/npm/install.js @@ -6,25 +6,54 @@ const http = require('http'); const https = require('https'); const packageJSON = require('./package.json'); -// Determine the URL of the file. -const platformName = { - 'darwin': 'macos', - 'linux': 'linux', - 'win32': 'windows' -}[process.platform]; - -let archName = { - 'x64': 'x64', - 'x86': 'x86', - 'ia32': 'x86' -}[process.arch]; - -// ARM macs can run x64 binaries via Rosetta. Rely on that for now. -if (platformName === 'macos' && process.arch === 'arm64') { - archName = 'x64'; +// Look to a results table in https://github.com/tree-sitter/tree-sitter/issues/2196 +const matrix = { + platform: { + 'darwin': { + name: 'macos', + arch: { + 'arm64': { name: 'arm64' }, + 'x64': { name: 'x64' }, + } + }, + 'linux': { + name: 'linux', + arch: { + 'arm64': { name: 'arm64' }, + 'arm': { name: 'arm' }, + 'armv7l': { name: 'armv7l' }, + 'x64': { name: 'x64' }, + 'x86': { name: 'x86' }, + 'i586': { name: 'i586' }, + 'mips': { name: 'mips' }, + 'mips64': { name: 'mips64' }, + 'mipsel': { name: 'mipsel' }, + 'mips64el': { name: 'mips64el' }, + 'ppc': { name: 'powerpc' }, + 'ppc64': { name: 'powerpc64' }, + 'ppc64el': { name: 'powerpc64el' }, + 'riscv64gc': { name: 'riscv64gc' }, + 's390x': { name: 's390x' }, + 'sparc64': { name: 'sparc64' }, + } + }, + 'win32': { + name: 'windows', + arch: { + 'arm64': { name: 'arm64' }, + 'x64': { name: 'x64' }, + 'x86': { name: 'x86' }, + 'ia32': { name: 'x86' }, + } + }, + }, } -if (!platformName || !archName) { +// Determine the URL of the file. +const platform = matrix.platform[process.platform]; +const arch = platform && platform.arch[process.arch]; + +if (!platform || !platform.name || !arch || !arch.name) { console.error( `Cannot install tree-sitter-cli for platform ${process.platform}, architecture ${process.arch}` ); @@ -32,7 +61,7 @@ if (!platformName || !archName) { } const releaseURL = `https://github.com/tree-sitter/tree-sitter/releases/download/v${packageJSON.version}`; -const assetName = `tree-sitter-${platformName}-${archName}.gz`; +const assetName = `tree-sitter-${platform.name}-${arch.name}.gz`; const assetURL = `${releaseURL}/${assetName}`; // Remove previously-downloaded files. diff --git a/cli/npm/package.json b/cli/npm/package.json index dfa53ab4..e60c3936 100644 --- a/cli/npm/package.json +++ b/cli/npm/package.json @@ -1,11 +1,11 @@ { "name": "tree-sitter-cli", - "version": "0.20.7", + "version": "0.20.8", "author": "Max Brunsfeld", "license": "MIT", "repository": { "type": "git", - "url": "http://github.com/tree-sitter/tree-sitter.git" + "url": "https://github.com/tree-sitter/tree-sitter.git" }, "description": "CLI for generating fast incremental parsers", "keywords": [ @@ -15,7 +15,8 @@ "main": "lib/api/index.js", "scripts": { "install": "node install.js", - "prepack": "cp ../../LICENSE ." + "prepack": "cp ../../LICENSE ../README.md .", + "postpack": "rm LICENSE README.md" }, "bin": { "tree-sitter": "cli.js" diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs index 10320263..d19d44de 100644 --- a/cli/src/generate/build_tables/build_parse_table.rs +++ b/cli/src/generate/build_tables/build_parse_table.rs @@ -57,6 +57,7 @@ struct ParseTableBuilder<'a> { parse_state_info_by_id: Vec>, parse_state_queue: VecDeque, non_terminal_extra_states: Vec<(Symbol, usize)>, + actual_conflicts: HashSet>, parse_table: ParseTable, } @@ -132,6 +133,20 @@ impl<'a> ParseTableBuilder<'a> { )?; } + if !self.actual_conflicts.is_empty() { + println!("Warning: unnecessary conflicts"); + for conflict in &self.actual_conflicts { + println!( + " {}", + conflict + .iter() + .map(|symbol| format!("`{}`", self.symbol_name(symbol))) + .collect::>() + .join(", ") + ); + } + } + Ok((self.parse_table, self.parse_state_info_by_id)) } @@ -582,6 +597,7 @@ impl<'a> ParseTableBuilder<'a> { .expected_conflicts .contains(&actual_conflict) { + self.actual_conflicts.remove(&actual_conflict); return Ok(()); } @@ -964,6 +980,7 @@ pub(crate) fn build_parse_table<'a>( inlines: &'a InlinedProductionMap, variable_info: &'a Vec, ) -> Result<(ParseTable, Vec, Vec>)> { + let actual_conflicts = syntax_grammar.expected_conflicts.iter().cloned().collect(); let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines); let mut following_tokens = vec![TokenSet::new(); lexical_grammar.variables.len()]; populate_following_tokens( @@ -979,6 +996,7 @@ pub(crate) fn build_parse_table<'a>( item_set_builder, variable_info, non_terminal_extra_states: Vec::new(), + actual_conflicts, state_ids_by_item_set: IndexMap::default(), core_ids_by_core: HashMap::new(), parse_state_info_by_id: Vec::new(), diff --git a/cli/src/generate/build_tables/token_conflicts.rs b/cli/src/generate/build_tables/token_conflicts.rs index 223d3481..485fdc96 100644 --- a/cli/src/generate/build_tables/token_conflicts.rs +++ b/cli/src/generate/build_tables/token_conflicts.rs @@ -390,12 +390,12 @@ mod tests { Variable { name: "token_0".to_string(), kind: VariableType::Named, - rule: Rule::pattern("[a-f]1|0x\\d"), + rule: Rule::pattern("[a-f]1|0x\\d", ""), }, Variable { name: "token_1".to_string(), kind: VariableType::Named, - rule: Rule::pattern("d*ef"), + rule: Rule::pattern("d*ef", ""), }, ], }) @@ -426,7 +426,7 @@ mod tests { Variable { name: "identifier".to_string(), kind: VariableType::Named, - rule: Rule::pattern("\\w+"), + rule: Rule::pattern("\\w+", ""), }, Variable { name: "instanceof".to_string(), @@ -471,7 +471,7 @@ mod tests { #[test] fn test_token_conflicts_with_separators() { let grammar = expand_tokens(ExtractedLexicalGrammar { - separators: vec![Rule::pattern("\\s")], + separators: vec![Rule::pattern("\\s", "")], variables: vec![ Variable { name: "x".to_string(), @@ -498,7 +498,7 @@ mod tests { #[test] fn test_token_conflicts_with_open_ended_tokens() { let grammar = expand_tokens(ExtractedLexicalGrammar { - separators: vec![Rule::pattern("\\s")], + separators: vec![Rule::pattern("\\s", "")], variables: vec![ Variable { name: "x".to_string(), @@ -508,7 +508,7 @@ mod tests { Variable { name: "anything".to_string(), kind: VariableType::Named, - rule: Rule::prec(Precedence::Integer(-1), Rule::pattern(".*")), + rule: Rule::prec(Precedence::Integer(-1), Rule::pattern(".*", "")), }, ], }) diff --git a/cli/src/generate/dsl.js b/cli/src/generate/dsl.js index 4281cee1..4fdfbef1 100644 --- a/cli/src/generate/dsl.js +++ b/cli/src/generate/dsl.js @@ -181,7 +181,11 @@ function normalize(value) { value }; case RegExp: - return { + return value.flags ? { + type: 'PATTERN', + value: value.source, + flags: value.flags + } : { type: 'PATTERN', value: value.source }; diff --git a/cli/src/generate/grammar-schema.json b/cli/src/generate/grammar-schema.json index 5ca35370..952aac80 100644 --- a/cli/src/generate/grammar-schema.json +++ b/cli/src/generate/grammar-schema.json @@ -63,7 +63,7 @@ }, "supertypes": { - "description": "A list of hidden rule names that should be considered supertypes in the generated node types file. See http://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types.", + "description": "A list of hidden rule names that should be considered supertypes in the generated node types file. See https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types.", "type": "array", "items": { "description": "the name of a rule in `rules` or `extras`", diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs index 4838828b..3c0aeda1 100644 --- a/cli/src/generate/mod.rs +++ b/cli/src/generate/mod.rs @@ -21,10 +21,10 @@ use anyhow::{anyhow, Context, Result}; use lazy_static::lazy_static; use regex::{Regex, RegexBuilder}; use semver::Version; -use std::fs; use std::io::Write; use std::path::{Path, PathBuf}; use std::process::{Command, Stdio}; +use std::{env, fs}; lazy_static! { static ref JSON_COMMENT_REGEX: Regex = RegexBuilder::new("^\\s*//.*") @@ -44,25 +44,27 @@ pub fn generate_parser_in_directory( abi_version: usize, generate_bindings: bool, report_symbol_name: Option<&str>, + js_runtime: Option<&str>, ) -> Result<()> { let src_path = repo_path.join("src"); let header_path = src_path.join("tree_sitter"); + // Read the grammar.json. + let grammar_json = match grammar_path { + Some(path) => load_grammar_file(path.as_ref(), js_runtime)?, + None => { + let grammar_js_path = grammar_path.map_or(repo_path.join("grammar.js"), |s| s.into()); + load_grammar_file(&grammar_js_path, js_runtime)? + } + }; + // Ensure that the output directories exist. fs::create_dir_all(&src_path)?; fs::create_dir_all(&header_path)?; - // Read the grammar.json. - let grammar_json; - match grammar_path { - Some(path) => { - grammar_json = load_grammar_file(path.as_ref())?; - } - None => { - let grammar_js_path = grammar_path.map_or(repo_path.join("grammar.js"), |s| s.into()); - grammar_json = load_grammar_file(&grammar_js_path)?; - fs::write(&src_path.join("grammar.json"), &grammar_json)?; - } + if grammar_path.is_none() { + fs::write(&src_path.join("grammar.json"), &grammar_json) + .with_context(|| format!("Failed to write grammar.json to {:?}", src_path))?; } // Parse and preprocess the grammar. @@ -155,10 +157,18 @@ fn generate_parser_for_grammar_with_opts( }) } -pub fn load_grammar_file(grammar_path: &Path) -> Result { +pub fn load_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result { + if grammar_path.is_dir() { + return Err(anyhow!( + "Path to a grammar file with `.js` or `.json` extension is required" + )); + } match grammar_path.extension().and_then(|e| e.to_str()) { - Some("js") => Ok(load_js_grammar_file(grammar_path)?), - Some("json") => Ok(fs::read_to_string(grammar_path)?), + Some("js") => Ok(load_js_grammar_file(grammar_path, js_runtime) + .with_context(|| "Failed to load grammar.js")?), + Some("json") => { + Ok(fs::read_to_string(grammar_path).with_context(|| "Failed to load grammar.json")?) + } _ => Err(anyhow!( "Unknown grammar file extension: {:?}", grammar_path @@ -166,21 +176,24 @@ pub fn load_grammar_file(grammar_path: &Path) -> Result { } } -fn load_js_grammar_file(grammar_path: &Path) -> Result { +fn load_js_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result { let grammar_path = fs::canonicalize(grammar_path)?; - let mut node_process = Command::new("node") + + let js_runtime = js_runtime.unwrap_or("node"); + + let mut node_process = Command::new(js_runtime) .env("TREE_SITTER_GRAMMAR_PATH", grammar_path) .stdin(Stdio::piped()) .stdout(Stdio::piped()) .spawn() - .expect("Failed to run `node`"); + .with_context(|| format!("Failed to run `{js_runtime}`"))?; let mut node_stdin = node_process .stdin .take() - .expect("Failed to open stdin for node"); + .with_context(|| "Failed to open stdin for node")?; let cli_version = Version::parse(env!("CARGO_PKG_VERSION")) - .expect("Could not parse this package's version as semver."); + .with_context(|| "Could not parse this package's version as semver.")?; write!( node_stdin, "global.TREE_SITTER_CLI_VERSION_MAJOR = {}; @@ -188,22 +201,22 @@ fn load_js_grammar_file(grammar_path: &Path) -> Result { global.TREE_SITTER_CLI_VERSION_PATCH = {};", cli_version.major, cli_version.minor, cli_version.patch, ) - .expect("Failed to write tree-sitter version to node's stdin"); + .with_context(|| "Failed to write tree-sitter version to node's stdin")?; let javascript_code = include_bytes!("./dsl.js"); node_stdin .write(javascript_code) - .expect("Failed to write grammar dsl to node's stdin"); + .with_context(|| "Failed to write grammar dsl to node's stdin")?; drop(node_stdin); let output = node_process .wait_with_output() - .expect("Failed to read output from node"); + .with_context(|| "Failed to read output from node")?; match output.status.code() { None => panic!("Node process was killed"), Some(0) => {} Some(code) => return Err(anyhow!("Node process exited with status {}", code)), } - - let mut result = String::from_utf8(output.stdout).expect("Got invalid UTF8 from node"); + let mut result = + String::from_utf8(output.stdout).with_context(|| "Got invalid UTF8 from node")?; result.push('\n'); Ok(result) } diff --git a/cli/src/generate/node_types.rs b/cli/src/generate/node_types.rs index 43918980..2b44cfd2 100644 --- a/cli/src/generate/node_types.rs +++ b/cli/src/generate/node_types.rs @@ -1172,12 +1172,12 @@ mod tests { Variable { name: "identifier".to_string(), kind: VariableType::Named, - rule: Rule::pattern("\\w+"), + rule: Rule::pattern("\\w+", ""), }, Variable { name: "foo_identifier".to_string(), kind: VariableType::Named, - rule: Rule::pattern("[\\w-]+"), + rule: Rule::pattern("[\\w-]+", ""), }, ], ..Default::default() @@ -1275,8 +1275,8 @@ mod tests { name: "script".to_string(), kind: VariableType::Named, rule: Rule::seq(vec![ - Rule::field("a".to_string(), Rule::pattern("hi")), - Rule::field("b".to_string(), Rule::pattern("bye")), + Rule::field("a".to_string(), Rule::pattern("hi", "")), + Rule::field("b".to_string(), Rule::pattern("bye", "")), ]), }], ..Default::default() diff --git a/cli/src/generate/parse_grammar.rs b/cli/src/generate/parse_grammar.rs index 7fda0b71..e8eca095 100644 --- a/cli/src/generate/parse_grammar.rs +++ b/cli/src/generate/parse_grammar.rs @@ -19,6 +19,7 @@ enum RuleJSON { }, PATTERN { value: String, + flags: Option, }, SYMBOL { name: String, @@ -143,7 +144,21 @@ fn parse_rule(json: RuleJSON) -> Rule { } => Rule::alias(parse_rule(*content), value, named), RuleJSON::BLANK => Rule::Blank, RuleJSON::STRING { value } => Rule::String(value), - RuleJSON::PATTERN { value } => Rule::Pattern(value), + RuleJSON::PATTERN { value, flags } => Rule::Pattern( + value, + flags.map_or(String::new(), |f| { + f.chars() + .filter(|c| { + if *c != 'i' { + eprintln!("Warning: unsupported flag {c}"); + false + } else { + *c != 'u' // silently ignore unicode flag + } + }) + .collect() + }), + ), RuleJSON::SYMBOL { name } => Rule::NamedSymbol(name), RuleJSON::CHOICE { members } => Rule::choice(members.into_iter().map(parse_rule).collect()), RuleJSON::FIELD { content, name } => Rule::field(name, parse_rule(*content)), diff --git a/cli/src/generate/prepare_grammar/expand_tokens.rs b/cli/src/generate/prepare_grammar/expand_tokens.rs index d6c73d9a..fdbd004f 100644 --- a/cli/src/generate/prepare_grammar/expand_tokens.rs +++ b/cli/src/generate/prepare_grammar/expand_tokens.rs @@ -139,10 +139,10 @@ pub(crate) fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result Result { match rule { - Rule::Pattern(s) => { + Rule::Pattern(s, f) => { let s = preprocess_regex(s); let ast = parse::Parser::new().parse(&s)?; - self.expand_regex(&ast, next_state_id) + self.expand_regex(&ast, next_state_id, f.contains('i')) } Rule::String(s) => { for c in s.chars().rev() { @@ -210,12 +210,42 @@ impl NfaBuilder { } } - fn expand_regex(&mut self, ast: &Ast, mut next_state_id: u32) -> Result { + fn expand_regex( + &mut self, + ast: &Ast, + mut next_state_id: u32, + case_insensitive: bool, + ) -> Result { + fn inverse_char(c: char) -> char { + match c { + 'a'..='z' => (c as u8 - b'a' + b'A') as char, + 'A'..='Z' => (c as u8 - b'A' + b'a') as char, + c => c, + } + } + + fn with_inverse_char(mut chars: CharacterSet) -> CharacterSet { + for char in chars.clone().chars() { + let inverted = inverse_char(char); + if char != inverted { + chars = chars.add_char(inverted); + } + } + chars + } + match ast { Ast::Empty(_) => Ok(false), Ast::Flags(_) => Err(anyhow!("Regex error: Flags are not supported")), Ast::Literal(literal) => { - self.push_advance(CharacterSet::from_char(literal.c), next_state_id); + let mut char_set = CharacterSet::from_char(literal.c); + if case_insensitive { + let inverted = inverse_char(literal.c); + if literal.c != inverted { + char_set = char_set.add_char(inverted); + } + } + self.push_advance(char_set, next_state_id); Ok(true) } Ast::Dot(_) => { @@ -229,6 +259,9 @@ impl NfaBuilder { if class.negated { chars = chars.negate(); } + if case_insensitive { + chars = with_inverse_char(chars); + } self.push_advance(chars, next_state_id); Ok(true) } @@ -237,6 +270,9 @@ impl NfaBuilder { if class.negated { chars = chars.negate(); } + if case_insensitive { + chars = with_inverse_char(chars); + } self.push_advance(chars, next_state_id); Ok(true) } @@ -245,48 +281,56 @@ impl NfaBuilder { if class.negated { chars = chars.negate(); } + if case_insensitive { + chars = with_inverse_char(chars); + } self.push_advance(chars, next_state_id); Ok(true) } }, Ast::Repetition(repetition) => match repetition.op.kind { RepetitionKind::ZeroOrOne => { - self.expand_zero_or_one(&repetition.ast, next_state_id) + self.expand_zero_or_one(&repetition.ast, next_state_id, case_insensitive) } RepetitionKind::OneOrMore => { - self.expand_one_or_more(&repetition.ast, next_state_id) + self.expand_one_or_more(&repetition.ast, next_state_id, case_insensitive) } RepetitionKind::ZeroOrMore => { - self.expand_zero_or_more(&repetition.ast, next_state_id) + self.expand_zero_or_more(&repetition.ast, next_state_id, case_insensitive) } RepetitionKind::Range(RepetitionRange::Exactly(count)) => { - self.expand_count(&repetition.ast, count, next_state_id) + self.expand_count(&repetition.ast, count, next_state_id, case_insensitive) } RepetitionKind::Range(RepetitionRange::AtLeast(min)) => { - if self.expand_zero_or_more(&repetition.ast, next_state_id)? { - self.expand_count(&repetition.ast, min, next_state_id) + if self.expand_zero_or_more(&repetition.ast, next_state_id, case_insensitive)? { + self.expand_count(&repetition.ast, min, next_state_id, case_insensitive) } else { Ok(false) } } RepetitionKind::Range(RepetitionRange::Bounded(min, max)) => { - let mut result = self.expand_count(&repetition.ast, min, next_state_id)?; + let mut result = + self.expand_count(&repetition.ast, min, next_state_id, case_insensitive)?; for _ in min..max { if result { next_state_id = self.nfa.last_state_id(); } - if self.expand_zero_or_one(&repetition.ast, next_state_id)? { + if self.expand_zero_or_one( + &repetition.ast, + next_state_id, + case_insensitive, + )? { result = true; } } Ok(result) } }, - Ast::Group(group) => self.expand_regex(&group.ast, next_state_id), + Ast::Group(group) => self.expand_regex(&group.ast, next_state_id, case_insensitive), Ast::Alternation(alternation) => { let mut alternative_state_ids = Vec::new(); for ast in alternation.asts.iter() { - if self.expand_regex(&ast, next_state_id)? { + if self.expand_regex(&ast, next_state_id, case_insensitive)? { alternative_state_ids.push(self.nfa.last_state_id()); } else { alternative_state_ids.push(next_state_id); @@ -304,7 +348,7 @@ impl NfaBuilder { Ast::Concat(concat) => { let mut result = false; for ast in concat.asts.iter().rev() { - if self.expand_regex(&ast, next_state_id)? { + if self.expand_regex(&ast, next_state_id, case_insensitive)? { result = true; next_state_id = self.nfa.last_state_id(); } @@ -335,13 +379,18 @@ impl NfaBuilder { } } - fn expand_one_or_more(&mut self, ast: &Ast, next_state_id: u32) -> Result { + fn expand_one_or_more( + &mut self, + ast: &Ast, + next_state_id: u32, + case_insensitive: bool, + ) -> Result { self.nfa.states.push(NfaState::Accept { variable_index: 0, precedence: 0, }); // Placeholder for split let split_state_id = self.nfa.last_state_id(); - if self.expand_regex(&ast, split_state_id)? { + if self.expand_regex(&ast, split_state_id, case_insensitive)? { self.nfa.states[split_state_id as usize] = NfaState::Split(self.nfa.last_state_id(), next_state_id); Ok(true) @@ -351,8 +400,13 @@ impl NfaBuilder { } } - fn expand_zero_or_one(&mut self, ast: &Ast, next_state_id: u32) -> Result { - if self.expand_regex(ast, next_state_id)? { + fn expand_zero_or_one( + &mut self, + ast: &Ast, + next_state_id: u32, + case_insensitive: bool, + ) -> Result { + if self.expand_regex(ast, next_state_id, case_insensitive)? { self.push_split(next_state_id); Ok(true) } else { @@ -360,8 +414,13 @@ impl NfaBuilder { } } - fn expand_zero_or_more(&mut self, ast: &Ast, next_state_id: u32) -> Result { - if self.expand_one_or_more(&ast, next_state_id)? { + fn expand_zero_or_more( + &mut self, + ast: &Ast, + next_state_id: u32, + case_insensitive: bool, + ) -> Result { + if self.expand_one_or_more(&ast, next_state_id, case_insensitive)? { self.push_split(next_state_id); Ok(true) } else { @@ -369,10 +428,16 @@ impl NfaBuilder { } } - fn expand_count(&mut self, ast: &Ast, count: u32, mut next_state_id: u32) -> Result { + fn expand_count( + &mut self, + ast: &Ast, + count: u32, + mut next_state_id: u32, + case_insensitive: bool, + ) -> Result { let mut result = false; for _ in 0..count { - if self.expand_regex(ast, next_state_id)? { + if self.expand_regex(ast, next_state_id, case_insensitive)? { result = true; next_state_id = self.nfa.last_state_id(); } @@ -475,7 +540,9 @@ impl NfaBuilder { .add_char(' ') .add_char('\t') .add_char('\r') - .add_char('\n'), + .add_char('\n') + .add_char('\x0B') + .add_char('\x0C'), ClassPerlKind::Word => CharacterSet::empty() .add_char('_') .add_range('A', 'Z') @@ -563,7 +630,7 @@ mod tests { let table = [ // regex with sequences and alternatives Row { - rules: vec![Rule::pattern("(a|b|c)d(e|f|g)h?")], + rules: vec![Rule::pattern("(a|b|c)d(e|f|g)h?", "")], separators: vec![], examples: vec![ ("ade1", Some((0, "ade"))), @@ -574,13 +641,13 @@ mod tests { }, // regex with repeats Row { - rules: vec![Rule::pattern("a*")], + rules: vec![Rule::pattern("a*", "")], separators: vec![], examples: vec![("aaa1", Some((0, "aaa"))), ("b", Some((0, "")))], }, // regex with repeats in sequences Row { - rules: vec![Rule::pattern("a((bc)+|(de)*)f")], + rules: vec![Rule::pattern("a((bc)+|(de)*)f", "")], separators: vec![], examples: vec![ ("af1", Some((0, "af"))), @@ -591,13 +658,13 @@ mod tests { }, // regex with character ranges Row { - rules: vec![Rule::pattern("[a-fA-F0-9]+")], + rules: vec![Rule::pattern("[a-fA-F0-9]+", "")], separators: vec![], examples: vec![("A1ff0.", Some((0, "A1ff0")))], }, // regex with perl character classes Row { - rules: vec![Rule::pattern("\\w\\d\\s")], + rules: vec![Rule::pattern("\\w\\d\\s", "")], separators: vec![], examples: vec![("_0 ", Some((0, "_0 ")))], }, @@ -611,7 +678,7 @@ mod tests { Row { rules: vec![Rule::repeat(Rule::seq(vec![ Rule::string("{"), - Rule::pattern("[a-f]+"), + Rule::pattern("[a-f]+", ""), Rule::string("}"), ]))], separators: vec![], @@ -624,9 +691,9 @@ mod tests { // longest match rule Row { rules: vec![ - Rule::pattern("a|bc"), - Rule::pattern("aa"), - Rule::pattern("bcd"), + Rule::pattern("a|bc", ""), + Rule::pattern("aa", ""), + Rule::pattern("bcd", ""), ], separators: vec![], examples: vec![ @@ -640,7 +707,7 @@ mod tests { }, // regex with an alternative including the empty string Row { - rules: vec![Rule::pattern("a(b|)+c")], + rules: vec![Rule::pattern("a(b|)+c", "")], separators: vec![], examples: vec![ ("ac.", Some((0, "ac"))), @@ -650,8 +717,8 @@ mod tests { }, // separators Row { - rules: vec![Rule::pattern("[a-f]+")], - separators: vec![Rule::string("\\\n"), Rule::pattern("\\s")], + rules: vec![Rule::pattern("[a-f]+", "")], + separators: vec![Rule::string("\\\n"), Rule::pattern("\\s", "")], examples: vec![ (" a", Some((0, "a"))), (" \nb", Some((0, "b"))), @@ -662,11 +729,11 @@ mod tests { // shorter tokens with higher precedence Row { rules: vec![ - Rule::prec(Precedence::Integer(2), Rule::pattern("abc")), - Rule::prec(Precedence::Integer(1), Rule::pattern("ab[cd]e")), - Rule::pattern("[a-e]+"), + Rule::prec(Precedence::Integer(2), Rule::pattern("abc", "")), + Rule::prec(Precedence::Integer(1), Rule::pattern("ab[cd]e", "")), + Rule::pattern("[a-e]+", ""), ], - separators: vec![Rule::string("\\\n"), Rule::pattern("\\s")], + separators: vec![Rule::string("\\\n"), Rule::pattern("\\s", "")], examples: vec![ ("abceef", Some((0, "abc"))), ("abdeef", Some((1, "abde"))), @@ -676,13 +743,13 @@ mod tests { // immediate tokens with higher precedence Row { rules: vec![ - Rule::prec(Precedence::Integer(1), Rule::pattern("[^a]+")), + Rule::prec(Precedence::Integer(1), Rule::pattern("[^a]+", "")), Rule::immediate_token(Rule::prec( Precedence::Integer(2), - Rule::pattern("[^ab]+"), + Rule::pattern("[^ab]+", ""), )), ], - separators: vec![Rule::pattern("\\s")], + separators: vec![Rule::pattern("\\s", "")], examples: vec![("cccb", Some((1, "ccc")))], }, Row { @@ -704,7 +771,7 @@ mod tests { // nested choices within sequences Row { rules: vec![Rule::seq(vec![ - Rule::pattern("[0-9]+"), + Rule::pattern("[0-9]+", ""), Rule::choice(vec![ Rule::Blank, Rule::choice(vec![Rule::seq(vec![ @@ -713,7 +780,7 @@ mod tests { Rule::Blank, Rule::choice(vec![Rule::string("+"), Rule::string("-")]), ]), - Rule::pattern("[0-9]+"), + Rule::pattern("[0-9]+", ""), ])]), ]), ])], @@ -730,7 +797,7 @@ mod tests { }, // nested groups Row { - rules: vec![Rule::seq(vec![Rule::pattern(r#"([^x\\]|\\(.|\n))+"#)])], + rules: vec![Rule::seq(vec![Rule::pattern(r#"([^x\\]|\\(.|\n))+"#, "")])], separators: vec![], examples: vec![("abcx", Some((0, "abc"))), ("abc\\0x", Some((0, "abc\\0")))], }, @@ -738,11 +805,11 @@ mod tests { Row { rules: vec![ // Escaped forward slash (used in JS because '/' is the regex delimiter) - Rule::pattern(r#"\/"#), + Rule::pattern(r#"\/"#, ""), // Escaped quotes - Rule::pattern(r#"\"\'"#), + Rule::pattern(r#"\"\'"#, ""), // Quote preceded by a literal backslash - Rule::pattern(r#"[\\']+"#), + Rule::pattern(r#"[\\']+"#, ""), ], separators: vec![], examples: vec![ @@ -754,8 +821,8 @@ mod tests { // unicode property escapes Row { rules: vec![ - Rule::pattern(r#"\p{L}+\P{L}+"#), - Rule::pattern(r#"\p{White_Space}+\P{White_Space}+[\p{White_Space}]*"#), + Rule::pattern(r#"\p{L}+\P{L}+"#, ""), + Rule::pattern(r#"\p{White_Space}+\P{White_Space}+[\p{White_Space}]*"#, ""), ], separators: vec![], examples: vec![ @@ -765,17 +832,17 @@ mod tests { }, // unicode property escapes in bracketed sets Row { - rules: vec![Rule::pattern(r#"[\p{L}\p{Nd}]+"#)], + rules: vec![Rule::pattern(r#"[\p{L}\p{Nd}]+"#, "")], separators: vec![], examples: vec![("abΨ12٣٣, ok", Some((0, "abΨ12٣٣")))], }, // unicode character escapes Row { rules: vec![ - Rule::pattern(r#"\u{00dc}"#), - Rule::pattern(r#"\U{000000dd}"#), - Rule::pattern(r#"\u00de"#), - Rule::pattern(r#"\U000000df"#), + Rule::pattern(r#"\u{00dc}"#, ""), + Rule::pattern(r#"\U{000000dd}"#, ""), + Rule::pattern(r#"\u00de"#, ""), + Rule::pattern(r#"\U000000df"#, ""), ], separators: vec![], examples: vec![ @@ -789,13 +856,13 @@ mod tests { Row { rules: vec![ // Un-escaped curly braces - Rule::pattern(r#"u{[0-9a-fA-F]+}"#), + Rule::pattern(r#"u{[0-9a-fA-F]+}"#, ""), // Already-escaped curly braces - Rule::pattern(r#"\{[ab]{3}\}"#), + Rule::pattern(r#"\{[ab]{3}\}"#, ""), // Unicode codepoints - Rule::pattern(r#"\u{1000A}"#), + Rule::pattern(r#"\u{1000A}"#, ""), // Unicode codepoints (lowercase) - Rule::pattern(r#"\u{1000b}"#), + Rule::pattern(r#"\u{1000b}"#, ""), ], separators: vec![], examples: vec![ @@ -807,7 +874,7 @@ mod tests { }, // Emojis Row { - rules: vec![Rule::pattern(r"\p{Emoji}+")], + rules: vec![Rule::pattern(r"\p{Emoji}+", "")], separators: vec![], examples: vec![ ("🐎", Some((0, "🐎"))), @@ -820,7 +887,7 @@ mod tests { }, // Intersection Row { - rules: vec![Rule::pattern(r"[[0-7]&&[4-9]]+")], + rules: vec![Rule::pattern(r"[[0-7]&&[4-9]]+", "")], separators: vec![], examples: vec![ ("456", Some((0, "456"))), @@ -833,7 +900,7 @@ mod tests { }, // Difference Row { - rules: vec![Rule::pattern(r"[[0-9]--[4-7]]+")], + rules: vec![Rule::pattern(r"[[0-9]--[4-7]]+", "")], separators: vec![], examples: vec![ ("123", Some((0, "123"))), @@ -846,7 +913,7 @@ mod tests { }, // Symmetric difference Row { - rules: vec![Rule::pattern(r"[[0-7]~~[4-9]]+")], + rules: vec![Rule::pattern(r"[[0-7]~~[4-9]]+", "")], separators: vec![], examples: vec![ ("123", Some((0, "123"))), @@ -867,7 +934,7 @@ mod tests { // [6-7]: y y // [3-9]--[5-7]: y y y y y // final regex: y y y y y y - rules: vec![Rule::pattern(r"[[[0-5]--[2-4]]~~[[3-9]--[6-7]]]+")], + rules: vec![Rule::pattern(r"[[[0-5]--[2-4]]~~[[3-9]--[6-7]]]+", "")], separators: vec![], examples: vec![ ("01", Some((0, "01"))), diff --git a/cli/src/generate/prepare_grammar/extract_default_aliases.rs b/cli/src/generate/prepare_grammar/extract_default_aliases.rs index d39bf8dd..ee44f489 100644 --- a/cli/src/generate/prepare_grammar/extract_default_aliases.rs +++ b/cli/src/generate/prepare_grammar/extract_default_aliases.rs @@ -31,7 +31,7 @@ pub(super) fn extract_default_aliases( for variable in syntax_grammar.variables.iter() { for production in variable.productions.iter() { for step in production.steps.iter() { - let mut status = match step.symbol.kind { + let status = match step.symbol.kind { SymbolType::External => &mut external_status_list[step.symbol.index], SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index], SymbolType::Terminal => &mut terminal_status_list[step.symbol.index], @@ -63,7 +63,7 @@ pub(super) fn extract_default_aliases( } for symbol in syntax_grammar.extra_symbols.iter() { - let mut status = match symbol.kind { + let status = match symbol.kind { SymbolType::External => &mut external_status_list[symbol.index], SymbolType::NonTerminal => &mut non_terminal_status_list[symbol.index], SymbolType::Terminal => &mut terminal_status_list[symbol.index], diff --git a/cli/src/generate/prepare_grammar/extract_tokens.rs b/cli/src/generate/prepare_grammar/extract_tokens.rs index 928f914c..54991829 100644 --- a/cli/src/generate/prepare_grammar/extract_tokens.rs +++ b/cli/src/generate/prepare_grammar/extract_tokens.rs @@ -49,7 +49,7 @@ pub(super) fn extract_tokens( }) = variable.rule { if i > 0 && extractor.extracted_usage_counts[index] == 1 { - let mut lexical_variable = &mut lexical_variables[index]; + let lexical_variable = &mut lexical_variables[index]; lexical_variable.kind = variable.kind; lexical_variable.name = variable.name; symbol_replacer.replacements.insert(i, index); @@ -209,7 +209,7 @@ impl TokenExtractor { } else { Rule::Metadata { params: params.clone(), - rule: Box::new(self.extract_tokens_in_rule((&rule).clone())), + rule: Box::new(self.extract_tokens_in_rule(&rule)), } } } @@ -320,7 +320,7 @@ mod test { "rule_0", Rule::repeat(Rule::seq(vec![ Rule::string("a"), - Rule::pattern("b"), + Rule::pattern("b", ""), Rule::choice(vec![ Rule::non_terminal(1), Rule::non_terminal(2), @@ -331,8 +331,8 @@ mod test { ]), ])), ), - Variable::named("rule_1", Rule::pattern("e")), - Variable::named("rule_2", Rule::pattern("b")), + Variable::named("rule_1", Rule::pattern("e", "")), + Variable::named("rule_2", Rule::pattern("b", "")), Variable::named( "rule_3", Rule::seq(vec![Rule::non_terminal(2), Rule::Blank]), @@ -378,12 +378,12 @@ mod test { lexical_grammar.variables, vec![ Variable::anonymous("a", Rule::string("a")), - Variable::auxiliary("rule_0_token1", Rule::pattern("b")), + Variable::auxiliary("rule_0_token1", Rule::pattern("b", "")), Variable::auxiliary( "rule_0_token2", Rule::repeat(Rule::choice(vec![Rule::string("c"), Rule::string("d"),])) ), - Variable::named("rule_1", Rule::pattern("e")), + Variable::named("rule_1", Rule::pattern("e", "")), ] ); } @@ -411,7 +411,7 @@ mod test { fn test_extracting_extra_symbols() { let mut grammar = build_grammar(vec![ Variable::named("rule_0", Rule::string("x")), - Variable::named("comment", Rule::pattern("//.*")), + Variable::named("comment", Rule::pattern("//.*", "")), ]); grammar.extra_symbols = vec![Rule::string(" "), Rule::non_terminal(1)]; diff --git a/cli/src/generate/prepare_grammar/process_inlines.rs b/cli/src/generate/prepare_grammar/process_inlines.rs index 206ef8d3..9452e35a 100644 --- a/cli/src/generate/prepare_grammar/process_inlines.rs +++ b/cli/src/generate/prepare_grammar/process_inlines.rs @@ -203,6 +203,12 @@ pub(super) fn process_inlines( lexical_grammar.variables[symbol.index].name, )) } + SymbolType::NonTerminal if symbol.index == 0 => { + return Err(anyhow!( + "Rule `{}` cannot be inlined because it is the first rule", + grammar.variables[symbol.index].name, + )) + } _ => {} } } diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index cb9f6c72..69fa3c48 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -129,6 +129,7 @@ impl Generator { } self.add_lex_modes_list(); + self.add_parse_table(); if !self.syntax_grammar.external_tokens.is_empty() { self.add_external_token_enum(); @@ -136,7 +137,6 @@ impl Generator { self.add_external_scanner_states_list(); } - self.add_parse_table(); self.add_parser_export(); self.buffer @@ -152,49 +152,51 @@ impl Generator { self.symbol_ids[&Symbol::end()].clone(), ); - self.symbol_map = self - .parse_table - .symbols - .iter() - .map(|symbol| { - let mut mapping = symbol; + self.symbol_map = HashMap::new(); - // There can be multiple symbols in the grammar that have the same name and kind, - // due to simple aliases. When that happens, ensure that they map to the same - // public-facing symbol. If one of the symbols is not aliased, choose that one - // to be the public-facing symbol. Otherwise, pick the symbol with the lowest - // numeric value. - if let Some(alias) = self.default_aliases.get(symbol) { - let kind = alias.kind(); - for other_symbol in &self.parse_table.symbols { - if let Some(other_alias) = self.default_aliases.get(other_symbol) { - if other_symbol < mapping && other_alias == alias { - mapping = other_symbol; + for symbol in self.parse_table.symbols.iter() { + let mut mapping = symbol; + + // There can be multiple symbols in the grammar that have the same name and kind, + // due to simple aliases. When that happens, ensure that they map to the same + // public-facing symbol. If one of the symbols is not aliased, choose that one + // to be the public-facing symbol. Otherwise, pick the symbol with the lowest + // numeric value. + if let Some(alias) = self.default_aliases.get(symbol) { + let kind = alias.kind(); + for other_symbol in &self.parse_table.symbols { + if let Some(other_alias) = self.default_aliases.get(other_symbol) { + if other_symbol < mapping && other_alias == alias { + mapping = other_symbol; + } + } else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) { + mapping = other_symbol; + break; + } + } + } + // Two anonymous tokens with different flags but the same string value + // should be represented with the same symbol in the public API. Examples: + // * "<" and token(prec(1, "<")) + // * "(" and token.immediate("(") + else if symbol.is_terminal() { + let metadata = self.metadata_for_symbol(*symbol); + for other_symbol in &self.parse_table.symbols { + let other_metadata = self.metadata_for_symbol(*other_symbol); + if other_metadata == metadata { + if let Some(mapped) = self.symbol_map.get(other_symbol) { + if mapped == symbol { + break; } - } else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) { - mapping = other_symbol; - break; - } - } - } - // Two anonymous tokens with different flags but the same string value - // should be represented with the same symbol in the public API. Examples: - // * "<" and token(prec(1, "<")) - // * "(" and token.immediate("(") - else if symbol.is_terminal() { - let metadata = self.metadata_for_symbol(*symbol); - for other_symbol in &self.parse_table.symbols { - let other_metadata = self.metadata_for_symbol(*other_symbol); - if other_metadata == metadata { - mapping = other_symbol; - break; } + mapping = other_symbol; + break; } } + } - (*symbol, *mapping) - }) - .collect(); + self.symbol_map.insert(*symbol, *mapping); + } for production_info in &self.parse_table.production_infos { // Build a list of all field names @@ -254,7 +256,7 @@ impl Generator { } fn add_includes(&mut self) { - add_line!(self, "#include "); + add_line!(self, "#include \"tree_sitter/parser.h\""); add_line!(self, ""); } @@ -336,7 +338,7 @@ impl Generator { } fn add_symbol_enum(&mut self) { - add_line!(self, "enum {{"); + add_line!(self, "enum ts_symbol_identifiers {{"); indent!(self); self.symbol_order.insert(Symbol::end(), 0); let mut i = 1; @@ -408,7 +410,7 @@ impl Generator { } fn add_field_name_enum(&mut self) { - add_line!(self, "enum {{"); + add_line!(self, "enum ts_field_identifiers {{"); indent!(self); for (i, field_name) in self.field_names.iter().enumerate() { add_line!(self, "{} = {},", self.field_id(field_name), i + 1); @@ -764,7 +766,6 @@ impl Generator { indent!(self); add_line!(self, "START_LEXER();"); - add_line!(self, "eof = lexer->eof(lexer);"); add_line!(self, "switch (state) {{"); indent!(self); @@ -879,14 +880,23 @@ impl Generator { add!(self, " ||{}", line_break); } if range.end == range.start { + if range.start == '\0' { + add!(self, "!eof && "); + } add!(self, "lookahead == "); self.add_character(range.start); } else if range.end as u32 == range.start as u32 + 1 { + if range.start == '\0' { + add!(self, "!eof && "); + } add!(self, "lookahead == "); self.add_character(range.start); add!(self, " ||{}lookahead == ", line_break); self.add_character(range.end); } else { + if range.start == '\0' { + add!(self, "!eof && "); + } add!(self, "("); self.add_character(range.start); add!(self, " <= lookahead && lookahead <= "); @@ -1016,7 +1026,7 @@ impl Generator { } fn add_external_token_enum(&mut self) { - add_line!(self, "enum {{"); + add_line!(self, "enum ts_external_scanner_symbol_identifiers {{"); indent!(self); for i in 0..self.syntax_grammar.external_tokens.len() { add_line!( @@ -1525,54 +1535,93 @@ impl Generator { fn sanitize_identifier(&self, name: &str) -> String { let mut result = String::with_capacity(name.len()); for c in name.chars() { - if ('a' <= c && c <= 'z') - || ('A' <= c && c <= 'Z') - || ('0' <= c && c <= '9') - || c == '_' - { + if c.is_ascii_alphanumeric() || c == '_' { result.push(c); } else { - let replacement = match c { - '~' => "TILDE", - '`' => "BQUOTE", - '!' => "BANG", - '@' => "AT", - '#' => "POUND", - '$' => "DOLLAR", - '%' => "PERCENT", - '^' => "CARET", - '&' => "AMP", - '*' => "STAR", - '(' => "LPAREN", - ')' => "RPAREN", - '-' => "DASH", - '+' => "PLUS", - '=' => "EQ", - '{' => "LBRACE", - '}' => "RBRACE", - '[' => "LBRACK", - ']' => "RBRACK", - '\\' => "BSLASH", - '|' => "PIPE", - ':' => "COLON", - ';' => "SEMI", - '"' => "DQUOTE", - '\'' => "SQUOTE", - '<' => "LT", - '>' => "GT", - ',' => "COMMA", - '.' => "DOT", - '?' => "QMARK", - '/' => "SLASH", - '\n' => "LF", - '\r' => "CR", - '\t' => "TAB", - _ => continue, - }; - if !result.is_empty() && !result.ends_with("_") { - result.push('_'); + 'special_chars: { + let replacement = match c { + ' ' if name.len() == 1 => "SPACE", + '~' => "TILDE", + '`' => "BQUOTE", + '!' => "BANG", + '@' => "AT", + '#' => "POUND", + '$' => "DOLLAR", + '%' => "PERCENT", + '^' => "CARET", + '&' => "AMP", + '*' => "STAR", + '(' => "LPAREN", + ')' => "RPAREN", + '-' => "DASH", + '+' => "PLUS", + '=' => "EQ", + '{' => "LBRACE", + '}' => "RBRACE", + '[' => "LBRACK", + ']' => "RBRACK", + '\\' => "BSLASH", + '|' => "PIPE", + ':' => "COLON", + ';' => "SEMI", + '"' => "DQUOTE", + '\'' => "SQUOTE", + '<' => "LT", + '>' => "GT", + ',' => "COMMA", + '.' => "DOT", + '?' => "QMARK", + '/' => "SLASH", + '\n' => "LF", + '\r' => "CR", + '\t' => "TAB", + '\0' => "NULL", + '\u{0001}' => "SOH", + '\u{0002}' => "STX", + '\u{0003}' => "ETX", + '\u{0004}' => "EOT", + '\u{0005}' => "ENQ", + '\u{0006}' => "ACK", + '\u{0007}' => "BEL", + '\u{0008}' => "BS", + '\u{000b}' => "VTAB", + '\u{000c}' => "FF", + '\u{000e}' => "SO", + '\u{000f}' => "SI", + '\u{0010}' => "DLE", + '\u{0011}' => "DC1", + '\u{0012}' => "DC2", + '\u{0013}' => "DC3", + '\u{0014}' => "DC4", + '\u{0015}' => "NAK", + '\u{0016}' => "SYN", + '\u{0017}' => "ETB", + '\u{0018}' => "CAN", + '\u{0019}' => "EM", + '\u{001a}' => "SUB", + '\u{001b}' => "ESC", + '\u{001c}' => "FS", + '\u{001d}' => "GS", + '\u{001e}' => "RS", + '\u{001f}' => "US", + '\u{007F}' => "DEL", + '\u{FEFF}' => "BOM", + '\u{0080}'..='\u{FFFF}' => { + result.push_str(&format!("u{:04x}", c as u32)); + break 'special_chars; + } + '\u{10000}'..='\u{10FFFF}' => { + result.push_str(&format!("U{:08x}", c as u32)); + break 'special_chars; + } + '0'..='9' | 'a'..='z' | 'A'..='Z' | '_' => unreachable!(), + ' ' => break 'special_chars, + }; + if !result.is_empty() && !result.ends_with("_") { + result.push('_'); + } + result += replacement; } - result += replacement; } } result @@ -1585,10 +1634,19 @@ impl Generator { '\"' => result += "\\\"", '?' => result += "\\?", '\\' => result += "\\\\", + '\u{0007}' => result += "\\a", + '\u{0008}' => result += "\\b", + '\u{000b}' => result += "\\v", '\u{000c}' => result += "\\f", '\n' => result += "\\n", '\r' => result += "\\r", '\t' => result += "\\t", + '\0' => result += "\\0", + '\u{0001}'..='\u{001f}' => result += &format!("\\x{:02x}", c as u32), + '\u{007F}'..='\u{FFFF}' => result += &format!("\\u{:04x}", c as u32), + '\u{10000}'..='\u{10FFFF}' => { + result.push_str(&format!("\\U{:08x}", c as u32)); + } _ => result.push(c), } } diff --git a/cli/src/generate/rules.rs b/cli/src/generate/rules.rs index 0e3ff898..c6f0dd33 100644 --- a/cli/src/generate/rules.rs +++ b/cli/src/generate/rules.rs @@ -56,7 +56,7 @@ pub(crate) struct Symbol { pub(crate) enum Rule { Blank, String(String), - Pattern(String), + Pattern(String, String), NamedSymbol(String), Symbol(Symbol), Choice(Vec), @@ -187,8 +187,8 @@ impl Rule { Rule::String(value.to_string()) } - pub fn pattern(value: &'static str) -> Self { - Rule::Pattern(value.to_string()) + pub fn pattern(value: &'static str, flags: &'static str) -> Self { + Rule::Pattern(value.to_string(), flags.to_string()) } } diff --git a/cli/src/highlight.rs b/cli/src/highlight.rs index 6cf1580e..a7a98936 100644 --- a/cli/src/highlight.rs +++ b/cli/src/highlight.rs @@ -1,4 +1,3 @@ -use super::util; use ansi_term::Color; use anyhow::Result; use lazy_static::lazy_static; @@ -281,7 +280,7 @@ fn style_to_css(style: ansi_term::Style) -> String { fn write_color(buffer: &mut String, color: Color) { if let Color::RGB(r, g, b) = &color { - write!(buffer, "color: #{:x?}{:x?}{:x?}", r, g, b).unwrap() + write!(buffer, "color: #{r:02x}{g:02x}{b:02x}").unwrap() } else { write!( buffer, @@ -349,7 +348,7 @@ pub fn ansi( let mut highlighter = Highlighter::new(); let events = highlighter.highlight(config, source, cancellation_flag, |string| { - loader.highlight_config_for_injection_string(string) + loader.highlight_config_for_injection_string(string, config.apply_all_captures) })?; let mut style_stack = vec![theme.default_style().ansi]; @@ -385,17 +384,17 @@ pub fn html( config: &HighlightConfiguration, quiet: bool, print_time: bool, + cancellation_flag: Option<&AtomicUsize>, ) -> Result<()> { use std::io::Write; let stdout = io::stdout(); let mut stdout = stdout.lock(); let time = Instant::now(); - let cancellation_flag = util::cancel_on_stdin(); let mut highlighter = Highlighter::new(); - let events = highlighter.highlight(config, source, Some(&cancellation_flag), |string| { - loader.highlight_config_for_injection_string(string) + let events = highlighter.highlight(config, source, cancellation_flag, |string| { + loader.highlight_config_for_injection_string(string, config.apply_all_captures) })?; let mut renderer = HtmlRenderer::new(); @@ -448,7 +447,7 @@ mod tests { env::set_var("COLORTERM", ""); parse_style(&mut style, Value::String(DARK_CYAN.to_string())); assert_eq!(style.ansi.foreground, Some(Color::Fixed(36))); - assert_eq!(style.css, Some("style=\'color: #0af87\'".to_string())); + assert_eq!(style.css, Some("style=\'color: #00af87\'".to_string())); // junglegreen is not an ANSI color and is preserved when the terminal supports it env::set_var("COLORTERM", "truecolor"); diff --git a/cli/src/lib.rs b/cli/src/lib.rs index d36417c2..549db773 100644 --- a/cli/src/lib.rs +++ b/cli/src/lib.rs @@ -1,3 +1,5 @@ +#![doc = include_str!("../README.md")] + pub mod generate; pub mod highlight; pub mod logger; @@ -14,3 +16,7 @@ pub mod wasm; #[cfg(test)] mod tests; + +// To run compile fail tests +#[cfg(doctest)] +mod tests; diff --git a/cli/src/main.rs b/cli/src/main.rs index f6bb88a1..0e59a150 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -1,12 +1,14 @@ -use anyhow::{anyhow, Context, Result}; +use anyhow::{anyhow, Context, Error, Result}; use clap::{App, AppSettings, Arg, SubCommand}; use glob::glob; -use std::path::Path; +use std::collections::HashSet; +use std::path::{Path, PathBuf}; use std::{env, fs, u64}; -use tree_sitter::{Parser, WasmStore}; +use tree_sitter::{ffi, Parser, Point, WasmStore}; use tree_sitter_cli::{ - generate, highlight, logger, parse, playground, query, tags, test, test_highlight, test_tags, - util, wasm, + generate, highlight, logger, + parse::{self, ParseFileOptions, ParseOutput}, + playground, query, tags, test, test_highlight, test_tags, util, wasm, }; use tree_sitter_config::Config; use tree_sitter_highlight::Highlighter; @@ -82,6 +84,9 @@ fn run() -> Result<()> { let wasm_arg = Arg::with_name("wasm") .long("wasm") .help("compile parsers to wasm instead of native dynamic libraries"); + let apply_all_captures_arg = Arg::with_name("apply-all-captures") + .help("Apply all captures to highlights") + .long("apply-all-captures"); let matches = App::new("tree-sitter") .author("Max Brunsfeld ") @@ -113,13 +118,33 @@ fn run() -> Result<()> { )), ) .arg(Arg::with_name("no-bindings").long("no-bindings")) + .arg( + Arg::with_name("build") + .long("build") + .short("b") + .help("Compile all defined languages in the current dir"), + ) + .arg(&debug_build_arg) + .arg( + Arg::with_name("libdir") + .long("libdir") + .takes_value(true) + .value_name("path"), + ) .arg( Arg::with_name("report-states-for-rule") .long("report-states-for-rule") .value_name("rule-name") .takes_value(true), ) - .arg(Arg::with_name("no-minimize").long("no-minimize")), + .arg( + Arg::with_name("js-runtime") + .long("js-runtime") + .takes_value(true) + .value_name("executable") + .env("TREE_SITTER_JS_RUNTIME") + .help("Use a JavaScript runtime other than node"), + ), ) .subcommand( SubCommand::with_name("parse") @@ -132,7 +157,8 @@ fn run() -> Result<()> { .arg(&debug_build_arg) .arg(&debug_graph_arg) .arg(&wasm_arg) - .arg(Arg::with_name("debug-xml").long("xml").short("x")) + .arg(Arg::with_name("output-dot").long("dot")) + .arg(Arg::with_name("output-xml").long("xml").short("x")) .arg( Arg::with_name("stat") .help("Show parsing statistic") @@ -155,6 +181,12 @@ fn run() -> Result<()> { .takes_value(true) .multiple(true) .number_of_values(1), + ) + .arg( + Arg::with_name("encoding") + .help("The encoding of the input files") + .long("encoding") + .takes_value(true), ), ) .subcommand( @@ -167,6 +199,8 @@ fn run() -> Result<()> { .index(1) .required(true), ) + .arg(&time_arg) + .arg(&quiet_arg) .arg(&paths_file_arg) .arg(&paths_arg.clone().index(2)) .arg( @@ -175,6 +209,12 @@ fn run() -> Result<()> { .long("byte-range") .takes_value(true), ) + .arg( + Arg::with_name("row-range") + .help("The range of rows in which the query will be executed") + .long("row-range") + .takes_value(true), + ) .arg(&scope_arg) .arg(Arg::with_name("captures").long("captures").short("c")) .arg(Arg::with_name("test").long("test")), @@ -208,7 +248,8 @@ fn run() -> Result<()> { .arg(&debug_arg) .arg(&debug_build_arg) .arg(&debug_graph_arg) - .arg(&wasm_arg), + .arg(&wasm_arg) + .arg(&apply_all_captures_arg), ) .subcommand( SubCommand::with_name("highlight") @@ -219,11 +260,31 @@ fn run() -> Result<()> { .long("html") .short("H"), ) + .arg( + Arg::with_name("check") + .help("Check that highlighting captures conform strictly to standards") + .long("check"), + ) + .arg( + Arg::with_name("captures-path") + .help("Path to a file with captures") + .long("captures-path") + .takes_value(true), + ) + .arg( + Arg::with_name("query-paths") + .help("Paths to files with queries") + .long("query-paths") + .takes_value(true) + .multiple(true) + .number_of_values(1), + ) .arg(&scope_arg) .arg(&time_arg) .arg(&quiet_arg) .arg(&paths_file_arg) - .arg(&paths_arg), + .arg(&paths_arg) + .arg(&apply_all_captures_arg), ) .subcommand( SubCommand::with_name("build-wasm") @@ -279,6 +340,10 @@ fn run() -> Result<()> { ("generate", Some(matches)) => { let grammar_path = matches.value_of("grammar-path"); + let debug_build = matches.is_present("debug-build"); + let build = matches.is_present("build"); + let libdir = matches.value_of("libdir"); + let js_runtime = matches.value_of("js-runtime"); let report_symbol_name = matches.value_of("report-states-for-rule").or_else(|| { if matches.is_present("report-states") { Some("") @@ -289,16 +354,18 @@ fn run() -> Result<()> { if matches.is_present("log") { logger::init(); } - let abi_version = - matches - .value_of("abi-version") - .map_or(DEFAULT_GENERATE_ABI_VERSION, |version| { - if version == "latest" { - tree_sitter::LANGUAGE_VERSION - } else { - version.parse().expect("invalid abi version flag") - } - }); + let abi_version = matches.value_of("abi-version").map_or( + Ok::<_, Error>(DEFAULT_GENERATE_ABI_VERSION), + |version| { + Ok(if version == "latest" { + tree_sitter::LANGUAGE_VERSION + } else { + version + .parse() + .with_context(|| "invalid abi version flag")? + }) + }, + )?; let generate_bindings = !matches.is_present("no-bindings"); generate::generate_parser_in_directory( ¤t_dir, @@ -306,7 +373,15 @@ fn run() -> Result<()> { abi_version, generate_bindings, report_symbol_name, + js_runtime, )?; + if build { + if let Some(path) = libdir { + loader = loader::Loader::with_parser_lib_path(PathBuf::from(path)); + } + loader.use_debug_build(debug_build); + loader.languages_at_path(¤t_dir)?; + } } ("test", Some(matches)) => { @@ -317,6 +392,12 @@ fn run() -> Result<()> { let filter = matches.value_of("filter"); let wasm = matches.is_present("wasm"); let mut parser = Parser::new(); + let apply_all_captures = matches.is_present("apply-all-captures"); + + if debug { + // For augmenting debug logging in external scanners + env::set_var("TREE_SITTER_DEBUG", "1"); + } loader.use_debug_build(debug_build); @@ -364,7 +445,12 @@ fn run() -> Result<()> { if let Some(store) = store.take() { highlighter.parser().set_wasm_store(store).unwrap(); } - test_highlight::test_highlights(&loader, &mut highlighter, &test_highlight_dir)?; + test_highlight::test_highlights( + &loader, + &mut highlighter, + &test_highlight_dir, + apply_all_captures, + )?; store = highlighter.parser().take_wasm_store(); } @@ -382,14 +468,33 @@ fn run() -> Result<()> { let debug = matches.is_present("debug"); let debug_graph = matches.is_present("debug-graph"); let debug_build = matches.is_present("debug-build"); - let debug_xml = matches.is_present("debug-xml"); - let quiet = matches.is_present("quiet"); + + let output = if matches.is_present("output-dot") { + ParseOutput::Dot + } else if matches.is_present("output-xml") { + ParseOutput::Xml + } else if matches.is_present("quiet") { + ParseOutput::Quiet + } else { + ParseOutput::Normal + }; + + let encoding = + matches + .values_of("encoding") + .map_or(Ok(None), |mut e| match e.next() { + Some("utf16") => Ok(Some(ffi::TSInputEncodingUTF16)), + Some("utf8") => Ok(Some(ffi::TSInputEncodingUTF8)), + Some(_) => Err(anyhow!("Invalid encoding. Expected one of: utf8, utf16")), + None => Ok(None), + })?; + let time = matches.is_present("time"); let wasm = matches.is_present("wasm"); let edits = matches .values_of("edits") .map_or(Vec::new(), |e| e.collect()); - let cancellation_flag = util::cancel_on_stdin(); + let cancellation_flag = util::cancel_on_signal(); let mut parser = Parser::new(); if debug { @@ -430,19 +535,21 @@ fn run() -> Result<()> { .set_language(language) .context("incompatible language")?; - let this_file_errored = parse::parse_file_at_path( - &mut parser, + let opts = ParseFileOptions { + language, path, - &edits, + edits: &edits, max_path_length, - quiet, - time, + output, + print_time: time, timeout, debug, debug_graph, - debug_xml, - Some(&cancellation_flag), - )?; + cancellation_flag: Some(&cancellation_flag), + encoding, + }; + + let this_file_errored = parse::parse_file_at_path(&mut parser, opts)?; if should_track_stats { stats.total_parses += 1; @@ -465,6 +572,8 @@ fn run() -> Result<()> { ("query", Some(matches)) => { let ordered_captures = matches.values_of("captures").is_some(); + let quiet = matches.values_of("quiet").is_some(); + let time = matches.values_of("time").is_some(); let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?; let loader_config = config.get()?; loader.find_all_languages(&loader_config)?; @@ -474,9 +583,17 @@ fn run() -> Result<()> { matches.value_of("scope"), )?; let query_path = Path::new(matches.value_of("query-path").unwrap()); - let range = matches.value_of("byte-range").map(|br| { - let r: Vec<&str> = br.split(":").collect(); - r[0].parse().unwrap()..r[1].parse().unwrap() + let byte_range = matches.value_of("byte-range").and_then(|arg| { + let mut parts = arg.split(":"); + let start = parts.next()?.parse().ok()?; + let end = parts.next().unwrap().parse().ok()?; + Some(start..end) + }); + let point_range = matches.value_of("row-range").and_then(|arg| { + let mut parts = arg.split(":"); + let start = parts.next()?.parse().ok()?; + let end = parts.next().unwrap().parse().ok()?; + Some(Point::new(start, 0)..Point::new(end, 0)) }); let should_test = matches.is_present("test"); query::query_files_at_paths( @@ -484,8 +601,11 @@ fn run() -> Result<()> { paths, query_path, ordered_captures, - range, + byte_range, + point_range, should_test, + quiet, + time, )?; } @@ -511,13 +631,15 @@ fn run() -> Result<()> { let time = matches.is_present("time"); let quiet = matches.is_present("quiet"); let html_mode = quiet || matches.is_present("html"); + let should_check = matches.is_present("check"); let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?; + let apply_all_captures = matches.is_present("apply-all-captures"); if html_mode && !quiet { println!("{}", highlight::HTML_HEADER); } - let cancellation_flag = util::cancel_on_stdin(); + let cancellation_flag = util::cancel_on_signal(); let mut lang = None; if let Some(scope) = matches.value_of("scope") { @@ -527,6 +649,15 @@ fn run() -> Result<()> { } } + let query_paths = matches.values_of("query-paths").map_or(None, |e| { + Some( + e.collect::>() + .into_iter() + .map(|s| s.to_string()) + .collect::>(), + ) + }); + for path in paths { let path = Path::new(&path); let (language, language_config) = match lang { @@ -540,7 +671,45 @@ fn run() -> Result<()> { }, }; - if let Some(highlight_config) = language_config.highlight_config(language)? { + if let Some(highlight_config) = language_config.highlight_config( + language, + apply_all_captures, + query_paths.as_deref(), + )? { + if should_check { + let names = if let Some(path) = matches.value_of("captures-path") { + let path = Path::new(path); + let file = fs::read_to_string(path)?; + let capture_names = file + .lines() + .filter_map(|line| { + if line.trim().is_empty() || line.trim().starts_with(';') { + return None; + } + line.split(';').next().map(|s| s.trim().trim_matches('"')) + }) + .collect::>(); + highlight_config.nonconformant_capture_names(&capture_names) + } else { + highlight_config.nonconformant_capture_names(&HashSet::new()) + }; + if names.is_empty() { + eprintln!("All highlight captures conform to standards."); + } else { + eprintln!( + "Non-standard highlight {} detected:", + if names.len() > 1 { + "captures" + } else { + "capture" + } + ); + for name in names { + eprintln!("* {}", name); + } + } + } + let source = fs::read(path)?; if html_mode { highlight::html( @@ -550,6 +719,7 @@ fn run() -> Result<()> { highlight_config, quiet, time, + Some(&cancellation_flag), )?; } else { highlight::ansi( @@ -582,7 +752,7 @@ fn run() -> Result<()> { ("playground", Some(matches)) => { let open_in_browser = !matches.is_present("quiet"); - playground::serve(¤t_dir, open_in_browser); + playground::serve(¤t_dir, open_in_browser)?; } ("dump-languages", Some(_)) => { diff --git a/cli/src/parse.rs b/cli/src/parse.rs index 4b2ca8fc..99d28a74 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -5,7 +5,7 @@ use std::path::Path; use std::sync::atomic::AtomicUsize; use std::time::Instant; use std::{fmt, fs, usize}; -use tree_sitter::{InputEdit, LogType, Parser, Point, Tree}; +use tree_sitter::{ffi, InputEdit, Language, LogType, Parser, Point, Tree}; #[derive(Debug)] pub struct Edit { @@ -30,36 +30,47 @@ impl fmt::Display for Stats { } } -pub fn parse_file_at_path( - parser: &mut Parser, - path: &Path, - edits: &Vec<&str>, - max_path_length: usize, - quiet: bool, - print_time: bool, - timeout: u64, - debug: bool, - debug_graph: bool, - debug_xml: bool, - cancellation_flag: Option<&AtomicUsize>, -) -> Result { +#[derive(Copy, Clone)] +pub enum ParseOutput { + Normal, + Quiet, + Xml, + Dot, +} + +pub struct ParseFileOptions<'a> { + pub language: Language, + pub path: &'a Path, + pub edits: &'a [&'a str], + pub max_path_length: usize, + pub output: ParseOutput, + pub print_time: bool, + pub timeout: u64, + pub debug: bool, + pub debug_graph: bool, + pub cancellation_flag: Option<&'a AtomicUsize>, + pub encoding: Option, +} + +pub fn parse_file_at_path(parser: &mut Parser, opts: ParseFileOptions) -> Result { let mut _log_session = None; - let mut source_code = - fs::read(path).with_context(|| format!("Error reading source file {:?}", path))?; + parser.set_language(opts.language)?; + let mut source_code = fs::read(opts.path) + .with_context(|| format!("Error reading source file {:?}", opts.path))?; // If the `--cancel` flag was passed, then cancel the parse // when the user types a newline. - unsafe { parser.set_cancellation_flag(cancellation_flag) }; + unsafe { parser.set_cancellation_flag(opts.cancellation_flag) }; // Set a timeout based on the `--time` flag. - parser.set_timeout_micros(timeout); + parser.set_timeout_micros(opts.timeout); // Render an HTML graph if `--debug-graph` was passed - if debug_graph { + if opts.debug_graph { _log_session = Some(util::log_graphs(parser, "log.html")?); } // Log to stderr if `--debug` was passed - else if debug { + else if opts.debug { parser.set_logger(Some(Box::new(|log_type, message| { if log_type == LogType::Lex { io::stderr().write(b" ").unwrap(); @@ -69,22 +80,44 @@ pub fn parse_file_at_path( } let time = Instant::now(); - let tree = parser.parse(&source_code, None); + + #[inline(always)] + fn is_utf16_bom(bom_bytes: &[u8]) -> bool { + bom_bytes == [0xFF, 0xFE] || bom_bytes == [0xFE, 0xFF] + } + + let tree = match opts.encoding { + Some(encoding) if encoding == ffi::TSInputEncodingUTF16 => { + let source_code_utf16 = source_code + .chunks_exact(2) + .map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]])) + .collect::>(); + parser.parse_utf16(&source_code_utf16, None) + } + None if source_code.len() >= 2 && is_utf16_bom(&source_code[0..2]) => { + let source_code_utf16 = source_code + .chunks_exact(2) + .map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]])) + .collect::>(); + parser.parse_utf16(&source_code_utf16, None) + } + _ => parser.parse(&source_code, None), + }; let stdout = io::stdout(); let mut stdout = stdout.lock(); if let Some(mut tree) = tree { - if debug_graph && !edits.is_empty() { + if opts.debug_graph && !opts.edits.is_empty() { println!("BEFORE:\n{}", String::from_utf8_lossy(&source_code)); } - for (i, edit) in edits.iter().enumerate() { + for (i, edit) in opts.edits.iter().enumerate() { let edit = parse_edit_flag(&source_code, edit)?; - perform_edit(&mut tree, &mut source_code, &edit); + perform_edit(&mut tree, &mut source_code, &edit)?; tree = parser.parse(&source_code, Some(&tree)).unwrap(); - if debug_graph { + if opts.debug_graph { println!("AFTER {}:\n{}", i, String::from_utf8_lossy(&source_code)); } } @@ -93,7 +126,7 @@ pub fn parse_file_at_path( let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000; let mut cursor = tree.walk(); - if !quiet { + if matches!(opts.output, ParseOutput::Normal) { let mut needs_newline = false; let mut indent_level = 0; let mut did_visit_children = false; @@ -149,7 +182,7 @@ pub fn parse_file_at_path( println!(""); } - if debug_xml { + if matches!(opts.output, ParseOutput::Xml) { let mut needs_newline = false; let mut indent_level = 0; let mut did_visit_children = false; @@ -204,6 +237,10 @@ pub fn parse_file_at_path( println!(""); } + if matches!(opts.output, ParseOutput::Dot) { + util::print_tree_graph(&tree, "log.html").unwrap(); + } + let mut first_error = None; loop { let node = cursor.node(); @@ -221,13 +258,13 @@ pub fn parse_file_at_path( } } - if first_error.is_some() || print_time { + if first_error.is_some() || opts.print_time { write!( &mut stdout, "{:width$}\t{} ms", - path.to_str().unwrap(), + opts.path.to_str().unwrap(), duration_ms, - width = max_path_length + width = opts.max_path_length )?; if let Some(node) = first_error { let start = node.start_position(); @@ -256,29 +293,29 @@ pub fn parse_file_at_path( } return Ok(first_error.is_some()); - } else if print_time { + } else if opts.print_time { let duration = time.elapsed(); let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000; writeln!( &mut stdout, "{:width$}\t{} ms (timed out)", - path.to_str().unwrap(), + opts.path.to_str().unwrap(), duration_ms, - width = max_path_length + width = opts.max_path_length )?; } Ok(false) } -pub fn perform_edit(tree: &mut Tree, input: &mut Vec, edit: &Edit) -> InputEdit { +pub fn perform_edit(tree: &mut Tree, input: &mut Vec, edit: &Edit) -> Result { let start_byte = edit.position; let old_end_byte = edit.position + edit.deleted_length; let new_end_byte = edit.position + edit.inserted_text.len(); - let start_position = position_for_offset(input, start_byte); - let old_end_position = position_for_offset(input, old_end_byte); + let start_position = position_for_offset(input, start_byte)?; + let old_end_position = position_for_offset(input, old_end_byte)?; input.splice(start_byte..old_end_byte, edit.inserted_text.iter().cloned()); - let new_end_position = position_for_offset(input, new_end_byte); + let new_end_position = position_for_offset(input, new_end_byte)?; let edit = InputEdit { start_byte, old_end_byte, @@ -288,7 +325,7 @@ pub fn perform_edit(tree: &mut Tree, input: &mut Vec, edit: &Edit) -> InputE new_end_position, }; tree.edit(&edit); - edit + Ok(edit) } fn parse_edit_flag(source_code: &Vec, flag: &str) -> Result { @@ -317,7 +354,7 @@ fn parse_edit_flag(source_code: &Vec, flag: &str) -> Result { let row = usize::from_str_radix(row, 10).map_err(|_| error())?; let column = parts.next().ok_or_else(error)?; let column = usize::from_str_radix(column, 10).map_err(|_| error())?; - offset_for_position(source_code, Point { row, column }) + offset_for_position(source_code, Point { row, column })? } else { usize::from_str_radix(position, 10).map_err(|_| error())? }; @@ -332,31 +369,48 @@ fn parse_edit_flag(source_code: &Vec, flag: &str) -> Result { }) } -fn offset_for_position(input: &Vec, position: Point) -> usize { - let mut current_position = Point { row: 0, column: 0 }; - for (i, c) in input.iter().enumerate() { - if *c as char == '\n' { - current_position.row += 1; - current_position.column = 0; - } else { - current_position.column += 1; - } - if current_position > position { - return i; +pub fn offset_for_position(input: &[u8], position: Point) -> Result { + let mut row = 0; + let mut offset = 0; + let mut iter = memchr::memchr_iter(b'\n', input); + loop { + if let Some(pos) = iter.next() { + if row < position.row { + row += 1; + offset = pos; + continue; + } } + offset += 1; + break; } - return input.len(); + if position.row - row > 0 { + return Err(anyhow!("Failed to address a row: {}", position.row)); + } + if let Some(pos) = iter.next() { + if (pos - offset < position.column) || (input[offset] == b'\n' && position.column > 0) { + return Err(anyhow!("Failed to address a column: {}", position.column)); + }; + } else if input.len() - offset < position.column { + return Err(anyhow!("Failed to address a column over the end")); + } + Ok(offset + position.column) } -fn position_for_offset(input: &Vec, offset: usize) -> Point { - let mut result = Point { row: 0, column: 0 }; - for c in &input[0..offset] { - if *c as char == '\n' { - result.row += 1; - result.column = 0; - } else { - result.column += 1; - } +pub fn position_for_offset(input: &[u8], offset: usize) -> Result { + if offset > input.len() { + return Err(anyhow!("Failed to address an offset: {offset}")); } - result + let mut result = Point { row: 0, column: 0 }; + let mut last = 0; + for pos in memchr::memchr_iter(b'\n', &input[..offset]) { + result.row += 1; + last = pos; + } + result.column = if result.row > 0 { + offset - last - 1 + } else { + offset + }; + Ok(result) } diff --git a/cli/src/playground.html b/cli/src/playground.html index 22c874df..b69f9351 100644 --- a/cli/src/playground.html +++ b/cli/src/playground.html @@ -3,8 +3,8 @@ tree-sitter THE_LANGUAGE_NAME - - + + diff --git a/cli/src/playground.rs b/cli/src/playground.rs index 5650ee47..cff25509 100644 --- a/cli/src/playground.rs +++ b/cli/src/playground.rs @@ -1,4 +1,5 @@ use super::wasm; +use anyhow::{anyhow, Context, Result}; use std::{ borrow::Cow, env, fs, @@ -7,12 +8,11 @@ use std::{ str::{self, FromStr as _}, }; use tiny_http::{Header, Response, Server}; -use webbrowser; macro_rules! optional_resource { ($name: tt, $path: tt) => { #[cfg(TREE_SITTER_EMBED_WASM_BINDING)] - fn $name(tree_sitter_dir: &Option) -> Cow<'static, [u8]> { + fn $name(tree_sitter_dir: Option<&PathBuf>) -> Cow<'static, [u8]> { if let Some(tree_sitter_dir) = tree_sitter_dir { Cow::Owned(fs::read(tree_sitter_dir.join($path)).unwrap()) } else { @@ -21,7 +21,7 @@ macro_rules! optional_resource { } #[cfg(not(TREE_SITTER_EMBED_WASM_BINDING))] - fn $name(tree_sitter_dir: &Option) -> Cow<'static, [u8]> { + fn $name(tree_sitter_dir: Option<&PathBuf>) -> Cow<'static, [u8]> { if let Some(tree_sitter_dir) = tree_sitter_dir { Cow::Owned(fs::read(tree_sitter_dir.join($path)).unwrap()) } else { @@ -35,7 +35,7 @@ optional_resource!(get_playground_js, "docs/assets/js/playground.js"); optional_resource!(get_lib_js, "lib/binding_web/tree-sitter.js"); optional_resource!(get_lib_wasm, "lib/binding_web/tree-sitter.wasm"); -fn get_main_html(tree_sitter_dir: &Option) -> Cow<'static, [u8]> { +fn get_main_html(tree_sitter_dir: Option<&PathBuf>) -> Cow<'static, [u8]> { if let Some(tree_sitter_dir) = tree_sitter_dir { Cow::Owned(fs::read(tree_sitter_dir.join("cli/src/playground.html")).unwrap()) } else { @@ -43,23 +43,10 @@ fn get_main_html(tree_sitter_dir: &Option) -> Cow<'static, [u8]> { } } -pub fn serve(grammar_path: &Path, open_in_browser: bool) { - let port = env::var("TREE_SITTER_PLAYGROUND_PORT") - .map(|v| v.parse::().expect("Invalid port specification")) - .unwrap_or_else( - |_| get_available_port().expect( - "Couldn't find an available port, try providing a port number via the TREE_SITTER_PLAYGROUND_PORT \ - environment variable" - ) - ); - let addr = format!( - "{}:{}", - env::var("TREE_SITTER_PLAYGROUND_ADDR").unwrap_or("127.0.0.1".to_owned()), - port - ); +pub fn serve(grammar_path: &Path, open_in_browser: bool) -> Result<()> { + let server = get_server()?; let (grammar_name, language_wasm) = wasm::load_language_wasm_file(&grammar_path).unwrap(); - let server = Server::http(&addr).expect("Failed to start web server"); - let url = format!("http://{}", addr); + let url = format!("http://{}", server.server_addr()); println!("Started playground on: {}", url); if open_in_browser { if let Err(_) = webbrowser::open(&url) { @@ -68,13 +55,13 @@ pub fn serve(grammar_path: &Path, open_in_browser: bool) { } let tree_sitter_dir = env::var("TREE_SITTER_BASE_DIR").map(PathBuf::from).ok(); - let main_html = str::from_utf8(&get_main_html(&tree_sitter_dir)) + let main_html = str::from_utf8(&get_main_html(tree_sitter_dir.as_ref())) .unwrap() .replace("THE_LANGUAGE_NAME", &grammar_name) .into_bytes(); - let playground_js = get_playground_js(&tree_sitter_dir); - let lib_js = get_lib_js(&tree_sitter_dir); - let lib_wasm = get_lib_wasm(&tree_sitter_dir); + let playground_js = get_playground_js(tree_sitter_dir.as_ref()); + let lib_js = get_lib_js(tree_sitter_dir.as_ref()); + let lib_wasm = get_lib_wasm(tree_sitter_dir.as_ref()); let html_header = Header::from_str("Content-Type: text/html").unwrap(); let js_header = Header::from_str("Content-Type: application/javascript").unwrap(); @@ -107,8 +94,12 @@ pub fn serve(grammar_path: &Path, open_in_browser: bool) { } _ => response(b"Not found", &html_header).with_status_code(404), }; - request.respond(res).expect("Failed to write HTTP response"); + request + .respond(res) + .with_context(|| "Failed to write HTTP response")?; } + + Ok(()) } fn redirect<'a>(url: &'a str) -> Response<&'a [u8]> { @@ -123,10 +114,30 @@ fn response<'a>(data: &'a [u8], header: &Header) -> Response<&'a [u8]> { .with_header(header.clone()) } -fn get_available_port() -> Option { - (8000..12000).find(port_is_available) +fn get_server() -> Result { + let addr = env::var("TREE_SITTER_PLAYGROUND_ADDR").unwrap_or("127.0.0.1".to_owned()); + let port = env::var("TREE_SITTER_PLAYGROUND_PORT") + .map(|v| { + v.parse::() + .with_context(|| "Invalid port specification") + }) + .ok(); + let listener = match port { + Some(port) => { + bind_to(&*addr, port?).with_context(|| "Failed to bind to the specified port")? + } + None => get_listener_on_available_port(&*addr) + .with_context(|| "Failed to find a free port to bind to it")?, + }; + let server = + Server::from_listener(listener, None).map_err(|_| anyhow!("Failed to start web server"))?; + Ok(server) } -fn port_is_available(port: &u16) -> bool { - TcpListener::bind(("127.0.0.1", *port)).is_ok() +fn get_listener_on_available_port(addr: &str) -> Option { + (8000..12000).find_map(|port| bind_to(addr, port)) +} + +fn bind_to(addr: &str, port: u16) -> Option { + TcpListener::bind(format!("{addr}:{port}")).ok() } diff --git a/cli/src/query.rs b/cli/src/query.rs index 73d6dd28..fc24cb05 100644 --- a/cli/src/query.rs +++ b/cli/src/query.rs @@ -5,16 +5,20 @@ use std::{ io::{self, Write}, ops::Range, path::Path, + time::Instant, }; -use tree_sitter::{Language, Parser, Query, QueryCursor}; +use tree_sitter::{Language, Parser, Point, Query, QueryCursor}; pub fn query_files_at_paths( language: Language, paths: Vec, query_path: &Path, ordered_captures: bool, - range: Option>, + byte_range: Option>, + point_range: Option>, should_test: bool, + quiet: bool, + print_time: bool, ) -> Result<()> { let stdout = io::stdout(); let mut stdout = stdout.lock(); @@ -24,9 +28,12 @@ pub fn query_files_at_paths( let query = Query::new(language, &query_source).with_context(|| "Query compilation failed")?; let mut query_cursor = QueryCursor::new(); - if let Some(range) = range { + if let Some(range) = byte_range { query_cursor.set_byte_range(range); } + if let Some(range) = point_range { + query_cursor.set_point_range(range); + } let mut parser = Parser::new(); parser.set_language(language)?; @@ -40,22 +47,25 @@ pub fn query_files_at_paths( fs::read(&path).with_context(|| format!("Error reading source file {:?}", path))?; let tree = parser.parse(&source_code, None).unwrap(); + let start = Instant::now(); if ordered_captures { for (mat, capture_index) in query_cursor.captures(&query, tree.root_node(), source_code.as_slice()) { let capture = mat.captures[capture_index]; let capture_name = &query.capture_names()[capture.index as usize]; - writeln!( - &mut stdout, - " pattern: {:>2}, capture: {} - {}, start: {}, end: {}, text: `{}`", - mat.pattern_index, - capture.index, - capture_name, - capture.node.start_position(), - capture.node.end_position(), - capture.node.utf8_text(&source_code).unwrap_or("") - )?; + if !quiet { + writeln!( + &mut stdout, + " pattern: {:>2}, capture: {} - {}, start: {}, end: {}, text: `{}`", + mat.pattern_index, + capture.index, + capture_name, + capture.node.start_position(), + capture.node.end_position(), + capture.node.utf8_text(&source_code).unwrap_or("") + )?; + } results.push(query_testing::CaptureInfo { name: capture_name.to_string(), start: capture.node.start_position(), @@ -64,27 +74,31 @@ pub fn query_files_at_paths( } } else { for m in query_cursor.matches(&query, tree.root_node(), source_code.as_slice()) { - writeln!(&mut stdout, " pattern: {}", m.pattern_index)?; + if !quiet { + writeln!(&mut stdout, " pattern: {}", m.pattern_index)?; + } for capture in m.captures { let start = capture.node.start_position(); let end = capture.node.end_position(); let capture_name = &query.capture_names()[capture.index as usize]; - if end.row == start.row { - writeln!( - &mut stdout, - " capture: {} - {}, start: {}, end: {}, text: `{}`", - capture.index, - capture_name, - start, - end, - capture.node.utf8_text(&source_code).unwrap_or("") - )?; - } else { - writeln!( - &mut stdout, - " capture: {}, start: {}, end: {}", - capture_name, start, end, - )?; + if !quiet { + if end.row == start.row { + writeln!( + &mut stdout, + " capture: {} - {}, start: {}, end: {}, text: `{}`", + capture.index, + capture_name, + start, + end, + capture.node.utf8_text(&source_code).unwrap_or("") + )?; + } else { + writeln!( + &mut stdout, + " capture: {}, start: {}, end: {}", + capture_name, start, end, + )?; + } } results.push(query_testing::CaptureInfo { name: capture_name.to_string(), @@ -103,6 +117,9 @@ pub fn query_files_at_paths( if should_test { query_testing::assert_expected_captures(results, path, &mut parser, language)? } + if print_time { + writeln!(&mut stdout, "{:?}", start.elapsed())?; + } } Ok(()) diff --git a/cli/src/query_testing.rs b/cli/src/query_testing.rs index 9950f12f..1f88d619 100644 --- a/cli/src/query_testing.rs +++ b/cli/src/query_testing.rs @@ -18,9 +18,20 @@ pub struct CaptureInfo { #[derive(Debug, PartialEq, Eq)] pub struct Assertion { pub position: Point, + pub negative: bool, pub expected_capture_name: String, } +impl Assertion { + pub fn new(row: usize, col: usize, negative: bool, expected_capture_name: String) -> Self { + Self { + position: Point::new(row, col), + negative, + expected_capture_name, + } + } +} + /// Parse the given source code, finding all of the comments that contain /// highlighting assertions. Return a vector of (position, expected highlight name) /// pairs. @@ -54,6 +65,7 @@ pub fn parse_position_comments( // to its own column. let mut has_left_caret = false; let mut has_arrow = false; + let mut negative = false; let mut arrow_end = 0; for (i, c) in text.char_indices() { arrow_end = i + 1; @@ -69,6 +81,19 @@ pub fn parse_position_comments( has_left_caret = c == '<'; } + // find any ! after arrows but before capture name + if has_arrow { + for (i, c) in text[arrow_end..].char_indices() { + if c == '!' { + negative = true; + arrow_end += i + 1; + break; + } else if !c.is_whitespace() { + break; + } + } + } + // If the comment node contains an arrow and a highlight name, record the // highlight name and the position. if let (true, Some(mat)) = @@ -76,7 +101,8 @@ pub fn parse_position_comments( { assertion_ranges.push((node.start_position(), node.end_position())); result.push(Assertion { - position: position, + position, + negative, expected_capture_name: mat.as_str().to_string(), }); } diff --git a/cli/src/tags.rs b/cli/src/tags.rs index 457955dd..f9f788ab 100644 --- a/cli/src/tags.rs +++ b/cli/src/tags.rs @@ -23,7 +23,7 @@ pub fn generate_tags( } let mut context = TagsContext::new(); - let cancellation_flag = util::cancel_on_stdin(); + let cancellation_flag = util::cancel_on_signal(); let stdout = io::stdout(); let mut stdout = stdout.lock(); diff --git a/cli/src/test.rs b/cli/src/test.rs index 1ab57dcf..80643aef 100644 --- a/cli/src/test.rs +++ b/cli/src/test.rs @@ -16,11 +16,11 @@ use walkdir::WalkDir; lazy_static! { static ref HEADER_REGEX: ByteRegex = - ByteRegexBuilder::new(r"^===+(?P[^=\r\n][^\r\n]*)?\r?\n(?P([^=\r\n][^\r\n]*\r?\n)+)===+(?P[^=\r\n][^\r\n]*)?\r?\n") + ByteRegexBuilder::new(r"^(?P(?:=+){3,})(?P[^=\r\n][^\r\n]*)?\r?\n(?P([^=\r\n][^\r\n]*\r?\n)+)===+(?P[^=\r\n][^\r\n]*)?\r?\n") .multi_line(true) .build() .unwrap(); - static ref DIVIDER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^---+(?P[^-\r\n][^\r\n]*)?\r?\n") + static ref DIVIDER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^(?P(?:-+){3,})(?P[^-\r\n][^\r\n]*)?\r?\n") .multi_line(true) .build() .unwrap(); @@ -40,6 +40,8 @@ pub enum TestEntry { name: String, input: Vec, output: String, + header_delim_len: usize, + divider_delim_len: usize, has_fields: bool, }, } @@ -177,13 +179,15 @@ fn run_tests( mut indent_level: i32, failures: &mut Vec<(String, String, String)>, update: bool, - corrected_entries: &mut Vec<(String, String, String)>, + corrected_entries: &mut Vec<(String, String, String, usize, usize)>, ) -> Result<()> { match test_entry { TestEntry::Example { name, input, output, + header_delim_len, + divider_delim_len, has_fields, } => { if let Some(filter) = filter { @@ -191,7 +195,13 @@ fn run_tests( if update { let input = String::from_utf8(input).unwrap(); let output = format_sexp(&output); - corrected_entries.push((name, input, output)); + corrected_entries.push(( + name, + input, + output, + header_delim_len, + divider_delim_len, + )); } return Ok(()); } @@ -201,21 +211,31 @@ fn run_tests( if !has_fields { actual = strip_sexp_fields(actual); } - for _ in 0..indent_level { - print!(" "); - } + print!("{}", " ".repeat(indent_level as usize)); if actual == output { println!("✓ {}", Colour::Green.paint(&name)); if update { let input = String::from_utf8(input).unwrap(); let output = format_sexp(&output); - corrected_entries.push((name, input, output)); + corrected_entries.push(( + name, + input, + output, + header_delim_len, + divider_delim_len, + )); } } else { if update { let input = String::from_utf8(input).unwrap(); let output = format_sexp(&actual); - corrected_entries.push((name.clone(), input, output)); + corrected_entries.push(( + name.clone(), + input, + output, + header_delim_len, + divider_delim_len, + )); println!("✓ {}", Colour::Blue.paint(&name)); } else { println!("✗ {}", Colour::Red.paint(&name)); @@ -229,9 +249,7 @@ fn run_tests( file_path, } => { if indent_level > 0 { - for _ in 0..indent_level { - print!(" "); - } + print!("{}", " ".repeat(indent_level as usize)); println!("{}:", name); } @@ -312,27 +330,32 @@ fn format_sexp_indented(sexp: &String, initial_indent_level: u32) -> String { formatted } -fn write_tests(file_path: &Path, corrected_entries: &Vec<(String, String, String)>) -> Result<()> { +fn write_tests( + file_path: &Path, + corrected_entries: &Vec<(String, String, String, usize, usize)>, +) -> Result<()> { let mut buffer = fs::File::create(file_path)?; write_tests_to_buffer(&mut buffer, corrected_entries) } fn write_tests_to_buffer( buffer: &mut impl Write, - corrected_entries: &Vec<(String, String, String)>, + corrected_entries: &Vec<(String, String, String, usize, usize)>, ) -> Result<()> { - for (i, (name, input, output)) in corrected_entries.iter().enumerate() { + for (i, (name, input, output, header_delim_len, divider_delim_len)) in + corrected_entries.iter().enumerate() + { if i > 0 { write!(buffer, "\n")?; } write!( buffer, "{}\n{}\n{}\n{}\n{}\n\n{}\n", - "=".repeat(80), + "=".repeat(*header_delim_len), name, - "=".repeat(80), + "=".repeat(*header_delim_len), input, - "-".repeat(80), + "-".repeat(*divider_delim_len), output.trim() )?; } @@ -351,9 +374,18 @@ pub fn parse_tests(path: &Path) -> io::Result { let entry = entry?; let hidden = entry.file_name().to_str().unwrap_or("").starts_with("."); if !hidden { - children.push(parse_tests(&entry.path())?); + children.push(entry.path()); } } + children.sort_by(|a, b| { + a.file_name() + .unwrap_or_default() + .cmp(&b.file_name().unwrap_or_default()) + }); + let children = children + .iter() + .map(|path| parse_tests(path)) + .collect::>>()?; Ok(TestEntry::Group { name, children, @@ -387,6 +419,7 @@ fn parse_test_content(name: String, content: String, file_path: Option) // Ignore any matches whose suffix does not match the first header // suffix in the file. let header_matches = HEADER_REGEX.captures_iter(&bytes).filter_map(|c| { + let header_delim_len = c.name("equals").map(|n| n.as_bytes().len()).unwrap_or(80); let suffix1 = c .name("suffix1") .map(|m| String::from_utf8_lossy(m.as_bytes())); @@ -398,13 +431,17 @@ fn parse_test_content(name: String, content: String, file_path: Option) let test_name = c .name("test_name") .map(|c| String::from_utf8_lossy(c.as_bytes()).trim_end().to_string()); - Some((header_range, test_name)) + let res = Some((header_delim_len, header_range, test_name)); + res } else { None } }); - for (header_range, test_name) in header_matches.chain(Some((bytes.len()..bytes.len(), None))) { + let mut prev_header_len = 80; + for (header_delim_len, header_range, test_name) in + header_matches.chain(Some((80, bytes.len()..bytes.len(), None))) + { // Find the longest line of dashes following each test description. That line // separates the input from the expected output. Ignore any matches whose suffix // does not match the first suffix in the file. @@ -412,19 +449,25 @@ fn parse_test_content(name: String, content: String, file_path: Option) let divider_range = DIVIDER_REGEX .captures_iter(&bytes[prev_header_end..header_range.start]) .filter_map(|m| { + let divider_delim_len = + m.name("hyphens").map(|m| m.as_bytes().len()).unwrap_or(80); let suffix = m .name("suffix") .map(|m| String::from_utf8_lossy(m.as_bytes())); if suffix == first_suffix { let range = m.get(0).unwrap().range(); - Some((prev_header_end + range.start)..(prev_header_end + range.end)) + let res = Some(( + divider_delim_len, + (prev_header_end + range.start)..(prev_header_end + range.end), + )); + res } else { None } }) - .max_by_key(|range| range.len()); + .max_by_key(|(_, range)| range.len()); - if let Some(divider_range) = divider_range { + if let Some((divider_delim_len, divider_range)) = divider_range { if let Ok(output) = str::from_utf8(&bytes[divider_range.end..header_range.start]) { let mut input = bytes[prev_header_end..divider_range.start].to_vec(); @@ -449,12 +492,15 @@ fn parse_test_content(name: String, content: String, file_path: Option) name: prev_name, input, output, + header_delim_len: prev_header_len, + divider_delim_len, has_fields, }); } } } prev_name = test_name.unwrap_or(String::new()); + prev_header_len = header_delim_len; prev_header_end = header_range.end; } TestEntry::Group { @@ -505,12 +551,16 @@ d name: "The first test".to_string(), input: "\na b c\n".as_bytes().to_vec(), output: "(a (b c))".to_string(), + header_delim_len: 15, + divider_delim_len: 3, has_fields: false, }, TestEntry::Example { name: "The second test".to_string(), input: "d".as_bytes().to_vec(), output: "(d)".to_string(), + header_delim_len: 16, + divider_delim_len: 3, has_fields: false, }, ], @@ -559,12 +609,16 @@ abc name: "Code with dashes".to_string(), input: "abc\n---\ndefg\n----\nhijkl".as_bytes().to_vec(), output: "(a (b))".to_string(), + header_delim_len: 18, + divider_delim_len: 7, has_fields: false, }, TestEntry::Example { name: "Code ending with dashes".to_string(), input: "abc\n-----------".as_bytes().to_vec(), output: "(c (d))".to_string(), + header_delim_len: 25, + divider_delim_len: 19, has_fields: false, }, ], @@ -608,11 +662,15 @@ abc "title 1".to_string(), "input 1".to_string(), "output 1".to_string(), + 80, + 80, ), ( "title 2".to_string(), "input 2".to_string(), "output 2".to_string(), + 80, + 80, ), ]; write_tests_to_buffer(&mut buffer, &corrected_entries).unwrap(); @@ -689,18 +747,24 @@ code name: "sexp with comment".to_string(), input: "code".as_bytes().to_vec(), output: "(a (b))".to_string(), + header_delim_len: 18, + divider_delim_len: 3, has_fields: false, }, TestEntry::Example { name: "sexp with comment between".to_string(), input: "code".as_bytes().to_vec(), output: "(a (b))".to_string(), + header_delim_len: 18, + divider_delim_len: 3, has_fields: false, }, TestEntry::Example { name: "sexp with ';'".to_string(), input: "code".as_bytes().to_vec(), output: "(MISSING \";\")".to_string(), + header_delim_len: 25, + divider_delim_len: 3, has_fields: false, } ], @@ -773,18 +837,24 @@ NOT A TEST HEADER name: "First test".to_string(), input: expected_input.clone(), output: "(a)".to_string(), + header_delim_len: 18, + divider_delim_len: 3, has_fields: false, }, TestEntry::Example { name: "Second test".to_string(), input: expected_input.clone(), output: "(a)".to_string(), + header_delim_len: 18, + divider_delim_len: 3, has_fields: false, }, TestEntry::Example { name: "Test name with = symbol".to_string(), input: expected_input.clone(), output: "(a)".to_string(), + header_delim_len: 25, + divider_delim_len: 3, has_fields: false, } ], @@ -828,12 +898,16 @@ code with ---- name: "name\nwith\nnewlines".to_string(), input: b"a".to_vec(), output: "(b)".to_string(), + header_delim_len: 15, + divider_delim_len: 3, has_fields: false, }, TestEntry::Example { name: "name with === signs".to_string(), input: b"code with ----".to_vec(), output: "(d)".to_string(), + header_delim_len: 20, + divider_delim_len: 3, has_fields: false, } ] diff --git a/cli/src/test_highlight.rs b/cli/src/test_highlight.rs index 5411fcb1..7e1a2927 100644 --- a/cli/src/test_highlight.rs +++ b/cli/src/test_highlight.rs @@ -42,41 +42,74 @@ pub fn test_highlights( loader: &Loader, highlighter: &mut Highlighter, directory: &Path, + apply_all_captures: bool, +) -> Result<()> { + println!("syntax highlighting:"); + test_highlights_indented(loader, highlighter, directory, apply_all_captures, 2) +} + +fn test_highlights_indented( + loader: &Loader, + highlighter: &mut Highlighter, + directory: &Path, + apply_all_captures: bool, + indent_level: usize, ) -> Result<()> { let mut failed = false; - println!("syntax highlighting:"); for highlight_test_file in fs::read_dir(directory)? { let highlight_test_file = highlight_test_file?; let test_file_path = highlight_test_file.path(); let test_file_name = highlight_test_file.file_name(); - let (language, language_config) = loader - .language_configuration_for_file_name(&test_file_path)? - .ok_or_else(|| anyhow!("No language found for path {:?}", test_file_path))?; - let highlight_config = language_config - .highlight_config(language)? - .ok_or_else(|| anyhow!("No highlighting config found for {:?}", test_file_path))?; - match test_highlight( - &loader, - highlighter, - highlight_config, - fs::read(&test_file_path)?.as_slice(), - ) { - Ok(assertion_count) => { - println!( - " ✓ {} ({} assertions)", - Colour::Green.paint(test_file_name.to_string_lossy().as_ref()), - assertion_count - ); - } - Err(e) => { - println!( - " ✗ {}", - Colour::Red.paint(test_file_name.to_string_lossy().as_ref()) - ); - println!(" {}", e); + print!( + "{indent:indent_level$}", + indent = "", + indent_level = indent_level * 2 + ); + if test_file_path.is_dir() && !test_file_path.read_dir()?.next().is_none() { + println!("{}:", test_file_name.into_string().unwrap()); + if let Err(_) = test_highlights_indented( + loader, + highlighter, + &test_file_path, + apply_all_captures, + indent_level + 1, + ) { failed = true; } + } else { + let (language, language_config) = loader + .language_configuration_for_file_name(&test_file_path)? + .ok_or_else(|| anyhow!("No language found for path {:?}", test_file_path))?; + let highlight_config = language_config + .highlight_config(language, apply_all_captures, None)? + .ok_or_else(|| anyhow!("No highlighting config found for {:?}", test_file_path))?; + match test_highlight( + &loader, + highlighter, + highlight_config, + fs::read(&test_file_path)?.as_slice(), + ) { + Ok(assertion_count) => { + println!( + "✓ {} ({} assertions)", + Colour::Green.paint(test_file_name.to_string_lossy().as_ref()), + assertion_count + ); + } + Err(e) => { + println!( + "✗ {}", + Colour::Red.paint(test_file_name.to_string_lossy().as_ref()) + ); + println!( + "{indent:indent_level$} {e}", + indent = "", + indent_level = indent_level * 2 + ); + failed = true; + } + } } } @@ -94,9 +127,10 @@ pub fn iterate_assertions( // Iterate through all of the highlighting assertions, checking each one against the // actual highlights. let mut i = 0; - let mut actual_highlights = Vec::<&String>::new(); + let mut actual_highlights = Vec::new(); for Assertion { position, + negative, expected_capture_name: expected_highlight, } in assertions { @@ -120,12 +154,13 @@ pub fn iterate_assertions( break 'highlight_loop; } - // If the highlight matches the assertion, this test passes. Otherwise, + // If the highlight matches the assertion, or if the highlight doesn't + // match the assertion but it's negative, this test passes. Otherwise, // add this highlight to the list of actual highlights that span the // assertion's position, in order to generate an error message in the event // of a failure. let highlight_name = &highlight_names[(highlight.2).0]; - if *highlight_name == *expected_highlight { + if (*highlight_name == *expected_highlight) == !negative { passed = true; break 'highlight_loop; } else { @@ -165,68 +200,7 @@ pub fn test_highlight( let assertions = parse_position_comments(highlighter.parser(), highlight_config.language, source)?; - iterate_assertions(&assertions, &highlights, &highlight_names)?; - - // Iterate through all of the highlighting assertions, checking each one against the - // actual highlights. - let mut i = 0; - let mut actual_highlights = Vec::<&String>::new(); - for Assertion { - position, - expected_capture_name: expected_highlight, - } in &assertions - { - let mut passed = false; - actual_highlights.clear(); - - 'highlight_loop: loop { - // The assertions are ordered by position, so skip past all of the highlights that - // end at or before this assertion's position. - if let Some(highlight) = highlights.get(i) { - if highlight.1 <= *position { - i += 1; - continue; - } - - // Iterate through all of the highlights that start at or before this assertion's, - // position, looking for one that matches the assertion. - let mut j = i; - while let (false, Some(highlight)) = (passed, highlights.get(j)) { - if highlight.0 > *position { - break 'highlight_loop; - } - - // If the highlight matches the assertion, this test passes. Otherwise, - // add this highlight to the list of actual highlights that span the - // assertion's position, in order to generate an error message in the event - // of a failure. - let highlight_name = &highlight_names[(highlight.2).0]; - if *highlight_name == *expected_highlight { - passed = true; - break 'highlight_loop; - } else { - actual_highlights.push(highlight_name); - } - - j += 1; - } - } else { - break; - } - } - - if !passed { - return Err(Failure { - row: position.row, - column: position.column, - expected_highlight: expected_highlight.clone(), - actual_highlights: actual_highlights.into_iter().cloned().collect(), - } - .into()); - } - } - - Ok(assertions.len()) + iterate_assertions(&assertions, &highlights, &highlight_names) } pub fn get_highlight_positions( @@ -244,7 +218,7 @@ pub fn get_highlight_positions( let source = String::from_utf8_lossy(source); let mut char_indices = source.char_indices(); for event in highlighter.highlight(highlight_config, source.as_bytes(), None, |string| { - loader.highlight_config_for_injection_string(string) + loader.highlight_config_for_injection_string(string, highlight_config.apply_all_captures) })? { match event? { HighlightEvent::HighlightStart(h) => highlight_stack.push(h), diff --git a/cli/src/test_tags.rs b/cli/src/test_tags.rs index 28694db9..0bf13fe2 100644 --- a/cli/src/test_tags.rs +++ b/cli/src/test_tags.rs @@ -95,6 +95,7 @@ pub fn test_tag( let mut actual_tags = Vec::<&String>::new(); for Assertion { position, + negative, expected_capture_name: expected_tag, } in &assertions { @@ -116,7 +117,7 @@ pub fn test_tag( } let tag_name = &tag.2; - if *tag_name == *expected_tag { + if (*tag_name == *expected_tag) == !negative { passed = true; break 'tag_loop; } else { @@ -124,6 +125,9 @@ pub fn test_tag( } j += 1; + if tag == tags.last().unwrap() { + break 'tag_loop; + } } } else { break; diff --git a/cli/src/tests/async_context_test.rs b/cli/src/tests/async_context_test.rs new file mode 100644 index 00000000..08226387 --- /dev/null +++ b/cli/src/tests/async_context_test.rs @@ -0,0 +1,279 @@ +use super::helpers::fixtures::get_language; +use std::future::Future; +use std::pin::{pin, Pin}; +use std::ptr; +use std::task::{self, Context, Poll, RawWaker, RawWakerVTable, Waker}; +use tree_sitter::Parser; + +#[test] +fn test_node_in_fut() { + let (ret, pended) = tokio_like_spawn(async { + let mut parser = Parser::new(); + let language = get_language("bash"); + parser.set_language(language).unwrap(); + + let tree = parser.parse("#", None).unwrap(); + + let root = tree.root_node(); + let root_ref = &root; + + let fut_val_fn = || async { + // eprintln!("fut_val_fn: {}", root.child(0).unwrap().kind()); + yield_now().await; + root.child(0).unwrap().kind() + }; + + yield_now().await; + + let fut_ref_fn = || async { + // eprintln!("fut_ref_fn: {}", root_ref.child(0).unwrap().kind()); + yield_now().await; + root_ref.child(0).unwrap().kind() + }; + + let f1 = fut_val_fn().await; + let f2 = fut_ref_fn().await; + assert_eq!(f1, f2); + + let fut_val = async { + // eprintln!("fut_val: {}", root.child(0).unwrap().kind()); + yield_now().await; + root.child(0).unwrap().kind() + }; + + let fut_ref = async { + // eprintln!("fut_ref: {}", root_ref.child(0).unwrap().kind()); + yield_now().await; + root_ref.child(0).unwrap().kind() + }; + + let f1 = fut_val.await; + let f2 = fut_ref.await; + assert_eq!(f1, f2); + + f1 + }) + .join(); + // eprintln!("pended: {pended:?}"); + assert_eq!(ret, "comment"); + assert_eq!(pended, 5); +} + +#[test] +fn test_node_and_cursor_ref_in_fut() { + let (_, pended) = tokio_like_spawn(async { + let mut parser = Parser::new(); + let language = get_language("bash"); + parser.set_language(language).unwrap(); + + let tree = parser.parse("#", None).unwrap(); + + let root = tree.root_node(); + let root_ref = &root; + + let mut cursor = tree.walk(); + let cursor_ref = &mut cursor; + + cursor_ref.goto_first_child(); + + let fut_val = async { + yield_now().await; + root.to_sexp(); + }; + + yield_now().await; + + let fut_ref = async { + yield_now().await; + root_ref.to_sexp(); + cursor_ref.goto_first_child(); + }; + + fut_val.await; + fut_ref.await; + + cursor_ref.goto_first_child(); + }) + .join(); + assert_eq!(pended, 3); +} + +#[test] +fn test_node_and_cursor_ref_in_fut_with_fut_fabrics() { + let (_, pended) = tokio_like_spawn(async { + let mut parser = Parser::new(); + let language = get_language("bash"); + parser.set_language(language).unwrap(); + + let tree = parser.parse("#", None).unwrap(); + + let root = tree.root_node(); + let root_ref = &root; + + let mut cursor = tree.walk(); + let cursor_ref = &mut cursor; + + cursor_ref.goto_first_child(); + + let fut_val = || async { + yield_now().await; + root.to_sexp(); + }; + + yield_now().await; + + let fut_ref = || async move { + yield_now().await; + root_ref.to_sexp(); + cursor_ref.goto_first_child(); + }; + + fut_val().await; + fut_val().await; + fut_ref().await; + }) + .join(); + assert_eq!(pended, 4); +} + +#[test] +fn test_node_and_cursor_ref_in_fut_with_inner_spawns() { + let (ret, pended) = tokio_like_spawn(async { + let mut parser = Parser::new(); + let language = get_language("bash"); + parser.set_language(language).unwrap(); + + let tree = parser.parse("#", None).unwrap(); + + let mut cursor = tree.walk(); + let cursor_ref = &mut cursor; + + cursor_ref.goto_first_child(); + + let fut_val = || { + let tree = tree.clone(); + async move { + let root = tree.root_node(); + let mut cursor = tree.walk(); + let cursor_ref = &mut cursor; + yield_now().await; + root.to_sexp(); + cursor_ref.goto_first_child(); + } + }; + + yield_now().await; + + let fut_ref = || { + let tree = tree.clone(); + async move { + let root = tree.root_node(); + let root_ref = &root; + let mut cursor = tree.walk(); + let cursor_ref = &mut cursor; + yield_now().await; + root_ref.to_sexp(); + cursor_ref.goto_first_child(); + } + }; + + let (_, p1) = tokio_like_spawn(fut_val()).await.unwrap(); + let (_, p2) = tokio_like_spawn(fut_ref()).await.unwrap(); + + cursor_ref.goto_first_child(); + + fut_val().await; + fut_val().await; + fut_ref().await; + + cursor_ref.goto_first_child(); + + p1 + p2 + }) + .join(); + assert_eq!(pended, 4); + assert_eq!(ret, 2); +} + +fn tokio_like_spawn(future: T) -> JoinHandle<(T::Output, usize)> +where + T: Future + Send + 'static, + T::Output: Send + 'static, +{ + // No runtime, just noop waker + + let waker = noop_waker(); + let mut cx = task::Context::from_waker(&waker); + + let mut pending = 0; + let mut future = pin!(future); + let ret = loop { + match future.as_mut().poll(&mut cx) { + Poll::Pending => pending += 1, + Poll::Ready(r) => { + // eprintln!("ready, pended: {pending}"); + break r; + } + } + }; + JoinHandle::new((ret, pending)) +} + +async fn yield_now() { + struct SimpleYieldNow { + yielded: bool, + } + + impl Future for SimpleYieldNow { + type Output = (); + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<()> { + cx.waker().clone().wake(); + if self.yielded { + return Poll::Ready(()); + } + self.yielded = true; + Poll::Pending + } + } + + SimpleYieldNow { yielded: false }.await +} + +pub fn noop_waker() -> Waker { + const VTABLE: RawWakerVTable = RawWakerVTable::new( + // Cloning just returns a new no-op raw waker + |_| RAW, + // `wake` does nothing + |_| {}, + // `wake_by_ref` does nothing + |_| {}, + // Dropping does nothing as we don't allocate anything + |_| {}, + ); + const RAW: RawWaker = RawWaker::new(ptr::null(), &VTABLE); + unsafe { Waker::from_raw(RAW) } +} + +struct JoinHandle { + data: Option, +} + +impl JoinHandle { + fn new(data: T) -> Self { + Self { data: Some(data) } + } + + fn join(&mut self) -> T { + self.data.take().unwrap() + } +} + +impl Future for JoinHandle { + type Output = std::result::Result; + + fn poll(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll { + let data = self.get_mut().data.take().unwrap(); + Poll::Ready(Ok(data)) + } +} diff --git a/cli/src/tests/corpus_test.rs b/cli/src/tests/corpus_test.rs index 401a99a5..589b1839 100644 --- a/cli/src/tests/corpus_test.rs +++ b/cli/src/tests/corpus_test.rs @@ -1,7 +1,8 @@ use super::helpers::{ allocations, edits::{get_random_edit, invert_edit}, - fixtures::{fixtures_dir, get_language, get_test_language}, + fixtures::{fixtures_dir, get_language, get_test_language, SCRATCH_BASE_DIR}, + new_seed, random::Rand, scope_sequence::ScopeSequence, EDIT_COUNT, EXAMPLE_FILTER, ITERATION_COUNT, LANGUAGE_FILTER, LOG_ENABLED, LOG_GRAPH_ENABLED, @@ -13,70 +14,81 @@ use crate::{ test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry}, util, }; -use std::fs; +use std::{collections::HashMap, env, fs}; use tree_sitter::{LogType, Node, Parser, Point, Range, Tree}; +use tree_sitter_proc_macro::test_with_seed; -#[test] -fn test_bash_corpus() { - test_language_corpus("bash"); +#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] +fn test_corpus_for_bash(seed: usize) { + test_language_corpus( + "bash", + seed, + Some(&[ + // Fragile tests where edit customization changes + // lead to significant parse tree structure changes. + "bash - corpus - commands - Nested Heredocs", + "bash - corpus - commands - Quoted Heredocs", + "bash - corpus - commands - Heredocs with weird characters", + ]), + ); } -#[test] -fn test_c_corpus() { - test_language_corpus("c"); +#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] +fn test_corpus_for_c(seed: usize) { + test_language_corpus("c", seed, None); } -#[test] -fn test_cpp_corpus() { - test_language_corpus("cpp"); +#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] +fn test_corpus_for_cpp(seed: usize) { + test_language_corpus("cpp", seed, None); } -#[test] -fn test_embedded_template_corpus() { - test_language_corpus("embedded-template"); +#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] +fn test_corpus_for_embedded_template(seed: usize) { + test_language_corpus("embedded-template", seed, None); } -#[test] -fn test_go_corpus() { - test_language_corpus("go"); +#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] +fn test_corpus_for_go(seed: usize) { + test_language_corpus("go", seed, None); } -#[test] -fn test_html_corpus() { - test_language_corpus("html"); +#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] +fn test_corpus_for_html(seed: usize) { + test_language_corpus("html", seed, None); } -#[test] -fn test_javascript_corpus() { - test_language_corpus("javascript"); +#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] +fn test_corpus_for_javascript(seed: usize) { + test_language_corpus("javascript", seed, None); } -#[test] -fn test_json_corpus() { - test_language_corpus("json"); +#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] +fn test_corpus_for_json(seed: usize) { + test_language_corpus("json", seed, None); } -#[test] -fn test_php_corpus() { - test_language_corpus("php"); +#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] +fn test_corpus_for_php(seed: usize) { + test_language_corpus("php", seed, None); } -#[test] -fn test_python_corpus() { - test_language_corpus("python"); +#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] +fn test_corpus_for_python(seed: usize) { + test_language_corpus("python", seed, None); } -#[test] -fn test_ruby_corpus() { - test_language_corpus("ruby"); +#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] +fn test_corpus_for_ruby(seed: usize) { + test_language_corpus("ruby", seed, None); } -#[test] -fn test_rust_corpus() { - test_language_corpus("rust"); +#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] +fn test_corpus_for_rust(seed: usize) { + test_language_corpus("rust", seed, None); } -fn test_language_corpus(language_name: &str) { +fn test_language_corpus(language_name: &str, start_seed: usize, skipped: Option<&[&str]>) { let grammars_dir = fixtures_dir().join("grammars"); let error_corpus_dir = fixtures_dir().join("error_corpus"); let template_corpus_dir = fixtures_dir().join("template_corpus"); @@ -98,10 +110,30 @@ fn test_language_corpus(language_name: &str) { t })); + let mut skipped = skipped.map(|x| HashMap::<&str, usize>::from_iter(x.iter().map(|x| (*x, 0)))); + let language = get_language(language_name); let mut failure_count = 0; - for test in tests { - println!(" {} example - {}", language_name, test.name); + + let log_seed = env::var("TREE_SITTER_LOG_SEED").is_ok(); + let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok(); + + if log_seed { + println!(" start seed: {}", start_seed); + } + + println!(); + for (test_index, test) in tests.iter().enumerate() { + let test_name = format!("{language_name} - {}", test.name); + if let Some(skipped) = skipped.as_mut() { + if let Some(counter) = skipped.get_mut(test_name.as_str()) { + println!(" {test_index}. {test_name} - SKIPPED"); + *counter += 1; + continue; + } + } + + println!(" {test_index}. {test_name}"); let passed = allocations::record(|| { let mut log_session = None; @@ -116,10 +148,7 @@ fn test_language_corpus(language_name: &str) { } if actual_output != test.output { - println!( - "Incorrect initial parse for {} - {}", - language_name, test.name, - ); + println!("Incorrect initial parse for {test_name}"); print_diff_key(); print_diff(&actual_output, &test.output); println!(""); @@ -140,7 +169,7 @@ fn test_language_corpus(language_name: &str) { drop(parser); for trial in 0..*ITERATION_COUNT { - let seed = *START_SEED + trial; + let seed = start_seed + trial; let passed = allocations::record(|| { let mut rand = Rand::new(seed); let mut log_session = None; @@ -158,10 +187,21 @@ fn test_language_corpus(language_name: &str) { for _ in 0..1 + rand.unsigned(*EDIT_COUNT) { let edit = get_random_edit(&mut rand, &input); undo_stack.push(invert_edit(&input, &edit)); - perform_edit(&mut tree, &mut input, &edit); + perform_edit(&mut tree, &mut input, &edit).unwrap(); } - // println!(" seed: {}", seed); + if log_seed { + println!(" {test_index}.{trial:<2} seed: {}", seed); + } + + if dump_edits { + fs::write( + SCRATCH_BASE_DIR + .join(format!("edit.{seed}.{test_index}.{trial} {test_name}")), + &input, + ) + .unwrap(); + } if *LOG_GRAPH_ENABLED { eprintln!("{}\n", String::from_utf8_lossy(&input)); @@ -173,16 +213,13 @@ fn test_language_corpus(language_name: &str) { // Check that the new tree is consistent. check_consistent_sizes(&tree2, &input); if let Err(message) = check_changed_ranges(&tree, &tree2, &input) { - println!( - "\nUnexpected scope change in seed {}\n{}\n\n", - seed, message - ); + println!("\nUnexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n",); return false; } // Undo all of the edits and re-parse again. while let Some(edit) = undo_stack.pop() { - perform_edit(&mut tree2, &mut input, &edit); + perform_edit(&mut tree2, &mut input, &edit).unwrap(); } if *LOG_GRAPH_ENABLED { eprintln!("{}\n", String::from_utf8_lossy(&input)); @@ -198,10 +235,7 @@ fn test_language_corpus(language_name: &str) { } if actual_output != test.output { - println!( - "Incorrect parse for {} - {} - seed {}", - language_name, test.name, seed - ); + println!("Incorrect parse for {test_name} - seed {seed}"); print_diff_key(); print_diff(&actual_output, &test.output); println!(""); @@ -211,7 +245,7 @@ fn test_language_corpus(language_name: &str) { // Check that the edited tree is consistent. check_consistent_sizes(&tree3, &input); if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) { - eprintln!("Unexpected scope change in seed {}\n{}\n\n", seed, message); + println!("Unexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n"); return false; } @@ -228,6 +262,18 @@ fn test_language_corpus(language_name: &str) { if failure_count > 0 { panic!("{} {} corpus tests failed", failure_count, language_name); } + + if let Some(skipped) = skipped.as_mut() { + skipped.retain(|_, v| *v == 0); + + if skipped.len() > 0 { + println!("Non matchable skip definitions:"); + for k in skipped.keys() { + println!(" {k}"); + } + panic!("Non matchable skip definitions needs to be removed"); + } + } } #[test] @@ -255,7 +301,7 @@ fn test_feature_corpus_files() { grammar_path = test_path.join("grammar.json"); } let error_message_path = test_path.join("expected_error.txt"); - let grammar_json = generate::load_grammar_file(&grammar_path).unwrap(); + let grammar_json = generate::load_grammar_file(&grammar_path, None).unwrap(); let generate_result = generate::generate_parser_for_grammar(&grammar_json); if error_message_path.exists() { @@ -424,7 +470,12 @@ fn set_included_ranges(parser: &mut Parser, input: &[u8], delimiters: Option<(&s let mut ranges = Vec::new(); let mut ix = 0; while ix < input.len() { - let Some(mut start_ix) = input[ix..].windows(2).position(|win| win == start.as_bytes()) else { break }; + let Some(mut start_ix) = input[ix..] + .windows(2) + .position(|win| win == start.as_bytes()) + else { + break; + }; start_ix += ix + start.len(); let end_ix = input[start_ix..] .windows(2) @@ -492,6 +543,7 @@ fn flatten_tests(test: TestEntry) -> Vec { input, output, has_fields, + .. } => { if !prefix.is_empty() { name.insert_str(0, " - "); diff --git a/cli/src/tests/github_issue_test.rs b/cli/src/tests/github_issue_test.rs new file mode 100644 index 00000000..bfc135ca --- /dev/null +++ b/cli/src/tests/github_issue_test.rs @@ -0,0 +1,42 @@ +// Tests in this mod need be executed with enabled UBSAN library: +// ``` +// UBSAN_OPTIONS="halt_on_error=1" \ +// CFLAGS="-fsanitize=undefined" \ +// RUSTFLAGS="-lubsan" \ +// cargo test --target $(rustc -vV | sed -nr 's/^host: //p') -- --test-threads 1 +// ``` + +use super::helpers::query_helpers::assert_query_matches; +use crate::tests::helpers::fixtures::get_language; +use indoc::indoc; +use tree_sitter::Query; + +#[test] +fn issue_2162_out_of_bound() { + let language = get_language("java"); + assert!(Query::new(language, "(package_declaration _ (_) @name _)").is_ok()); +} + +#[test] +fn issue_2107_first_child_group_anchor_had_no_effect() { + let language = get_language("c"); + let source_code = indoc! {r#" + void fun(int a, char b, int c) { }; + "#}; + let query = indoc! {r#" + (parameter_list + . + ( + (parameter_declaration) @constant + (#match? @constant "^int") + ) + ) + "#}; + let query = Query::new(language, query).unwrap(); + assert_query_matches( + language, + &query, + source_code, + &[(0, vec![("constant", "int a")])], + ); +} diff --git a/cli/src/tests/helpers/allocations.rs b/cli/src/tests/helpers/allocations.rs index 9a514014..43537633 100644 --- a/cli/src/tests/helpers/allocations.rs +++ b/cli/src/tests/helpers/allocations.rs @@ -2,7 +2,7 @@ use std::{ collections::HashMap, os::raw::c_void, sync::{ - atomic::{AtomicBool, AtomicU64, Ordering::SeqCst}, + atomic::{AtomicBool, AtomicUsize, Ordering::SeqCst}, Mutex, }, }; @@ -25,8 +25,8 @@ unsafe impl Sync for Allocation {} #[derive(Default)] struct AllocationRecorder { enabled: AtomicBool, - allocation_count: AtomicU64, - outstanding_allocations: Mutex>, + allocation_count: AtomicUsize, + outstanding_allocations: Mutex>, } thread_local! { @@ -83,6 +83,9 @@ fn record_alloc(ptr: *mut c_void) { } fn record_dealloc(ptr: *mut c_void) { + if ptr.is_null() { + panic!("Zero pointer deallocation!"); + } RECORDER.with(|recorder| { if recorder.enabled.load(SeqCst) { recorder @@ -107,9 +110,13 @@ unsafe extern "C" fn ts_record_calloc(count: usize, size: usize) -> *mut c_void } unsafe extern "C" fn ts_record_realloc(ptr: *mut c_void, size: usize) -> *mut c_void { - record_dealloc(ptr); let result = realloc(ptr, size); - record_alloc(result); + if ptr.is_null() { + record_alloc(result); + } else if ptr != result { + record_dealloc(ptr); + record_alloc(result); + } result } diff --git a/cli/src/tests/helpers/dirs.rs b/cli/src/tests/helpers/dirs.rs index 4bf345d8..7ff48ab7 100644 --- a/cli/src/tests/helpers/dirs.rs +++ b/cli/src/tests/helpers/dirs.rs @@ -1,11 +1,46 @@ lazy_static! { - static ref ROOT_DIR: PathBuf = PathBuf::from(env!("CARGO_MANIFEST_DIR")).parent().unwrap().to_owned(); - static ref FIXTURES_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures"); - static ref HEADER_DIR: PathBuf = ROOT_DIR.join("lib").join("include"); - static ref GRAMMARS_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures").join("grammars"); - static ref SCRATCH_DIR: PathBuf = { + pub static ref ROOT_DIR: PathBuf = PathBuf::from(env!("CARGO_MANIFEST_DIR")).parent().unwrap().to_owned(); + pub static ref FIXTURES_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures"); + pub static ref HEADER_DIR: PathBuf = ROOT_DIR.join("lib").join("include"); + pub static ref GRAMMARS_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures").join("grammars"); + pub static ref SCRATCH_BASE_DIR: PathBuf = { let result = ROOT_DIR.join("target").join("scratch"); fs::create_dir_all(&result).unwrap(); result }; + pub static ref SCRATCH_DIR: PathBuf = { + // https://doc.rust-lang.org/reference/conditional-compilation.html + let vendor = if cfg!(target_vendor = "apple") { + "apple" + } else if cfg!(target_vendor = "fortanix") { + "fortanix" + } else if cfg!(target_vendor = "pc") { + "pc" + } else { + "unknown" + }; + let env = if cfg!(target_env = "gnu") { + "gnu" + } else if cfg!(target_env = "msvc") { + "msvc" + } else if cfg!(target_env = "musl") { + "musl" + } else if cfg!(target_env = "sgx") { + "sgx" + } else { + "unknown" + }; + let endian = if cfg!(target_endian = "little") { + "little" + } else if cfg!(target_endian = "big") { + "big" + } else { + "unknown" + }; + + let machine = format!("{}-{}-{}-{}-{}", std::env::consts::ARCH, std::env::consts::OS, vendor, env, endian); + let result = SCRATCH_BASE_DIR.join(machine); + fs::create_dir_all(&result).unwrap(); + result + }; } diff --git a/cli/src/tests/helpers/fixtures.rs b/cli/src/tests/helpers/fixtures.rs index 672695ac..7cb981ae 100644 --- a/cli/src/tests/helpers/fixtures.rs +++ b/cli/src/tests/helpers/fixtures.rs @@ -1,6 +1,6 @@ use lazy_static::lazy_static; -use std::fs; use std::path::{Path, PathBuf}; +use std::{env, fs}; use tree_sitter::Language; use tree_sitter_highlight::HighlightConfiguration; use tree_sitter_loader::Loader; @@ -9,7 +9,13 @@ use tree_sitter_tags::TagsConfiguration; include!("./dirs.rs"); lazy_static! { - static ref TEST_LOADER: Loader = Loader::with_parser_lib_path(SCRATCH_DIR.join("lib")); + static ref TEST_LOADER: Loader = { + let mut loader = Loader::with_parser_lib_path(SCRATCH_DIR.clone()); + if env::var("TREE_SITTER_GRAMMAR_DEBUG").is_ok() { + loader.use_debug_build(true); + } + loader + }; } pub fn test_loader<'a>() -> &'a Loader { @@ -46,9 +52,11 @@ pub fn get_highlight_config( let locals_query = fs::read_to_string(queries_path.join("locals.scm")).unwrap_or(String::new()); let mut result = HighlightConfiguration::new( language, + language_name, &highlights_query, &injections_query, &locals_query, + false, ) .unwrap(); result.configure(&highlight_names); @@ -63,11 +71,7 @@ pub fn get_tags_config(language_name: &str) -> TagsConfiguration { TagsConfiguration::new(language, &tags_query, &locals_query).unwrap() } -pub fn get_test_language( - name: &str, - parser_code: &str, - scanner_src_path: Option<&Path>, -) -> Language { +pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> Language { let src_dir = SCRATCH_DIR.join("src").join(name); fs::create_dir_all(&src_dir).unwrap(); @@ -76,11 +80,16 @@ pub fn get_test_language( fs::write(&parser_path, parser_code).unwrap(); } - if let Some(scanner_src_path) = scanner_src_path { - let scanner_code = fs::read_to_string(&scanner_src_path).unwrap(); - let scanner_path = src_dir.join("scanner.c"); - if !fs::read_to_string(&scanner_path).map_or(false, |content| content == scanner_code) { - fs::write(&scanner_path, scanner_code).unwrap(); + if let Some(path) = path { + let scanner_path = path.join("scanner.c"); + if scanner_path.exists() { + let scanner_code = fs::read_to_string(&scanner_path).unwrap(); + let scanner_copy_path = src_dir.join("scanner.c"); + if !fs::read_to_string(&scanner_copy_path) + .map_or(false, |content| content == scanner_code) + { + fs::write(&scanner_copy_path, scanner_code).unwrap(); + } } } diff --git a/cli/src/tests/helpers/mod.rs b/cli/src/tests/helpers/mod.rs index def0ea3e..54df8809 100644 --- a/cli/src/tests/helpers/mod.rs +++ b/cli/src/tests/helpers/mod.rs @@ -6,7 +6,8 @@ pub(super) mod random; pub(super) mod scope_sequence; use lazy_static::lazy_static; -use std::{env, time, usize}; +use rand::Rng; +use std::env; lazy_static! { pub static ref LOG_ENABLED: bool = env::var("TREE_SITTER_LOG").is_ok(); @@ -16,11 +17,7 @@ lazy_static! { } lazy_static! { - pub static ref START_SEED: usize = - int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| time::SystemTime::now() - .duration_since(time::UNIX_EPOCH) - .unwrap() - .as_secs() as usize,); + pub static ref START_SEED: usize = new_seed(); pub static ref EDIT_COUNT: usize = int_env_var("TREE_SITTER_EDITS").unwrap_or(3); pub static ref ITERATION_COUNT: usize = int_env_var("TREE_SITTER_ITERATIONS").unwrap_or(10); } @@ -28,3 +25,10 @@ lazy_static! { fn int_env_var(name: &'static str) -> Option { env::var(name).ok().and_then(|e| e.parse().ok()) } + +pub(crate) fn new_seed() -> usize { + int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| { + let mut rng = rand::thread_rng(); + rng.gen::() + }) +} diff --git a/cli/src/tests/helpers/query_helpers.rs b/cli/src/tests/helpers/query_helpers.rs index 78ae559c..4d71dfd0 100644 --- a/cli/src/tests/helpers/query_helpers.rs +++ b/cli/src/tests/helpers/query_helpers.rs @@ -1,6 +1,8 @@ use rand::prelude::Rng; use std::{cmp::Ordering, fmt::Write, ops::Range}; -use tree_sitter::{Node, Point, Tree, TreeCursor}; +use tree_sitter::{ + Language, Node, Parser, Point, Query, QueryCapture, QueryCursor, QueryMatch, Tree, TreeCursor, +}; #[derive(Debug)] pub struct Pattern { @@ -304,3 +306,56 @@ fn compare_depth_first(a: Node, b: Node) -> Ordering { let b = b.byte_range(); a.start.cmp(&b.start).then_with(|| b.end.cmp(&a.end)) } + +pub fn assert_query_matches( + language: Language, + query: &Query, + source: &str, + expected: &[(usize, Vec<(&str, &str)>)], +) { + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + let mut cursor = QueryCursor::new(); + let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); + pretty_assertions::assert_eq!(collect_matches(matches, &query, source), expected); + pretty_assertions::assert_eq!(cursor.did_exceed_match_limit(), false); +} + +pub fn collect_matches<'a>( + matches: impl Iterator>, + query: &'a Query, + source: &'a str, +) -> Vec<(usize, Vec<(&'a str, &'a str)>)> { + matches + .map(|m| { + ( + m.pattern_index, + format_captures(m.captures.iter().cloned(), query, source), + ) + }) + .collect() +} + +pub fn collect_captures<'a>( + captures: impl Iterator, usize)>, + query: &'a Query, + source: &'a str, +) -> Vec<(&'a str, &'a str)> { + format_captures(captures.map(|(m, i)| m.captures[i]), query, source) +} + +fn format_captures<'a>( + captures: impl Iterator>, + query: &'a Query, + source: &'a str, +) -> Vec<(&'a str, &'a str)> { + captures + .map(|capture| { + ( + query.capture_names()[capture.index as usize], + capture.node.utf8_text(source.as_bytes()).unwrap(), + ) + }) + .collect() +} diff --git a/cli/src/tests/highlight_test.rs b/cli/src/tests/highlight_test.rs index e0b356d2..12c120ab 100644 --- a/cli/src/tests/highlight_test.rs +++ b/cli/src/tests/highlight_test.rs @@ -24,6 +24,7 @@ lazy_static! { get_highlight_config("rust", Some("injections.scm"), &HIGHLIGHT_NAMES); static ref HIGHLIGHT_NAMES: Vec = [ "attribute", + "boolean", "carriage-return", "comment", "constant", @@ -61,7 +62,7 @@ lazy_static! { fn test_highlighting_javascript() { let source = "const a = function(b) { return b + c; }"; assert_eq!( - &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(), + &to_token_vector(source, &JS_HIGHLIGHT).unwrap(), &[vec![ ("const", vec!["keyword"]), (" ", vec![]), @@ -71,14 +72,14 @@ fn test_highlighting_javascript() { (" ", vec![]), ("function", vec!["keyword"]), ("(", vec!["punctuation.bracket"]), - ("b", vec!["variable.parameter"]), + ("b", vec!["variable"]), (")", vec!["punctuation.bracket"]), (" ", vec![]), ("{", vec!["punctuation.bracket"]), (" ", vec![]), ("return", vec!["keyword"]), (" ", vec![]), - ("b", vec!["variable.parameter"]), + ("b", vec!["variable"]), (" ", vec![]), ("+", vec!["operator"]), (" ", vec![]), @@ -92,7 +93,7 @@ fn test_highlighting_javascript() { #[test] fn test_highlighting_injected_html_in_javascript() { - let source = vec!["const s = html `
${a < b}
`;"].join("\n"); + let source = ["const s = html `
${a < b}
`;"].join("\n"); assert_eq!( &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(), @@ -156,7 +157,7 @@ fn test_highlighting_injected_javascript_in_html_mini() { #[test] fn test_highlighting_injected_javascript_in_html() { - let source = vec![ + let source = [ "", " "].join("\n"); + let source = ["
<% foo() %>
"].join("\n"); assert_eq!( &to_token_vector(&source, &EJS_HIGHLIGHT).unwrap(), @@ -376,7 +377,7 @@ fn test_highlighting_ejs_with_html_and_javascript() { fn test_highlighting_javascript_with_jsdoc() { // Regression test: the middle comment has no highlights. This should not prevent // later injections from highlighting properly. - let source = vec!["a /* @see a */ b; /* nothing */ c; /* @see b */"].join("\n"); + let source = ["a /* @see a */ b; /* nothing */ c; /* @see b */"].join("\n"); assert_eq!( &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(), @@ -404,7 +405,7 @@ fn test_highlighting_javascript_with_jsdoc() { #[test] fn test_highlighting_with_content_children_included() { - let source = vec!["assert!(", " a.b.c() < D::e::()", ");"].join("\n"); + let source = ["assert!(", " a.b.c() < D::e::()", ");"].join("\n"); assert_eq!( &to_token_vector(&source, &RUST_HIGHLIGHT).unwrap(), @@ -482,7 +483,7 @@ fn test_highlighting_cancellation() { #[test] fn test_highlighting_via_c_api() { - let highlights = vec![ + let highlights = [ "class=tag\0", "class=function\0", "class=string\0", @@ -496,68 +497,82 @@ fn test_highlighting_via_c_api() { .iter() .map(|h| h.as_bytes().as_ptr() as *const c_char) .collect::>(); - let highlighter = c::ts_highlighter_new( - &highlight_names[0] as *const *const c_char, - &highlight_attrs[0] as *const *const c_char, - highlights.len() as u32, - ); + let highlighter = unsafe { + c::ts_highlighter_new( + &highlight_names[0] as *const *const c_char, + &highlight_attrs[0] as *const *const c_char, + highlights.len() as u32, + ) + }; let source_code = c_string(""); let js_scope = c_string("source.js"); let js_injection_regex = c_string("^javascript"); let language = get_language("javascript"); + let lang_name = c_string("javascript"); let queries = get_language_queries_path("javascript"); let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap(); let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap(); let locals_query = fs::read_to_string(queries.join("locals.scm")).unwrap(); - c::ts_highlighter_add_language( - highlighter, - js_scope.as_ptr(), - js_injection_regex.as_ptr(), - language, - highlights_query.as_ptr() as *const c_char, - injections_query.as_ptr() as *const c_char, - locals_query.as_ptr() as *const c_char, - highlights_query.len() as u32, - injections_query.len() as u32, - locals_query.len() as u32, - ); + unsafe { + c::ts_highlighter_add_language( + highlighter, + lang_name.as_ptr(), + js_scope.as_ptr(), + js_injection_regex.as_ptr(), + language, + highlights_query.as_ptr() as *const c_char, + injections_query.as_ptr() as *const c_char, + locals_query.as_ptr() as *const c_char, + highlights_query.len() as u32, + injections_query.len() as u32, + locals_query.len() as u32, + false, + ); + } let html_scope = c_string("text.html.basic"); let html_injection_regex = c_string("^html"); let language = get_language("html"); + let lang_name = c_string("html"); let queries = get_language_queries_path("html"); let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap(); let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap(); - c::ts_highlighter_add_language( - highlighter, - html_scope.as_ptr(), - html_injection_regex.as_ptr(), - language, - highlights_query.as_ptr() as *const c_char, - injections_query.as_ptr() as *const c_char, - ptr::null(), - highlights_query.len() as u32, - injections_query.len() as u32, - 0, - ); + unsafe { + c::ts_highlighter_add_language( + highlighter, + lang_name.as_ptr(), + html_scope.as_ptr(), + html_injection_regex.as_ptr(), + language, + highlights_query.as_ptr() as *const c_char, + injections_query.as_ptr() as *const c_char, + ptr::null(), + highlights_query.len() as u32, + injections_query.len() as u32, + 0, + false, + ); + } let buffer = c::ts_highlight_buffer_new(); - c::ts_highlighter_highlight( - highlighter, - html_scope.as_ptr(), - source_code.as_ptr(), - source_code.as_bytes().len() as u32, - buffer, - ptr::null_mut(), - ); + unsafe { + c::ts_highlighter_highlight( + highlighter, + html_scope.as_ptr(), + source_code.as_ptr(), + source_code.as_bytes().len() as u32, + buffer, + ptr::null_mut(), + ); + } - let output_bytes = c::ts_highlight_buffer_content(buffer); - let output_line_offsets = c::ts_highlight_buffer_line_offsets(buffer); - let output_len = c::ts_highlight_buffer_len(buffer); - let output_line_count = c::ts_highlight_buffer_line_count(buffer); + let output_bytes = unsafe { c::ts_highlight_buffer_content(buffer) }; + let output_line_offsets = unsafe { c::ts_highlight_buffer_line_offsets(buffer) }; + let output_len = unsafe { c::ts_highlight_buffer_len(buffer) }; + let output_line_count = unsafe { c::ts_highlight_buffer_line_count(buffer) }; let output_bytes = unsafe { slice::from_raw_parts(output_bytes, output_len as usize) }; let output_line_offsets = @@ -583,8 +598,69 @@ fn test_highlighting_via_c_api() { ] ); - c::ts_highlighter_delete(highlighter); - c::ts_highlight_buffer_delete(buffer); + unsafe { + c::ts_highlighter_delete(highlighter); + c::ts_highlight_buffer_delete(buffer); + } +} + +#[test] +fn test_highlighting_with_all_captures_applied() { + let source = "fn main(a: u32, b: u32) -> { let c = a + b; }"; + let language = get_language("rust"); + let highlights_query = indoc::indoc! {" + [ + \"fn\" + \"let\" + ] @keyword + (identifier) @variable + (function_item name: (identifier) @function) + (parameter pattern: (identifier) @variable.parameter) + (primitive_type) @type.builtin + \"=\" @operator + [ \"->\" \":\" \";\" ] @punctuation.delimiter + [ \"{\" \"}\" \"(\" \")\" ] @punctuation.bracket + "}; + let mut rust_highlight_reverse = + HighlightConfiguration::new(language, "rust", highlights_query, "", "", true).unwrap(); + rust_highlight_reverse.configure(&HIGHLIGHT_NAMES); + + assert_eq!( + &to_token_vector(source, &rust_highlight_reverse).unwrap(), + &[[ + ("fn", vec!["keyword"]), + (" ", vec![]), + ("main", vec!["function"]), + ("(", vec!["punctuation.bracket"]), + ("a", vec!["variable.parameter"]), + (":", vec!["punctuation.delimiter"]), + (" ", vec![]), + ("u32", vec!["type.builtin"]), + (", ", vec![]), + ("b", vec!["variable.parameter"]), + (":", vec!["punctuation.delimiter"]), + (" ", vec![]), + ("u32", vec!["type.builtin"]), + (")", vec!["punctuation.bracket"]), + (" ", vec![]), + ("->", vec!["punctuation.delimiter"]), + (" ", vec![]), + ("{", vec!["punctuation.bracket"]), + (" ", vec![]), + ("let", vec!["keyword"]), + (" ", vec![]), + ("c", vec!["variable"]), + (" ", vec![]), + ("=", vec!["operator"]), + (" ", vec![]), + ("a", vec!["variable"]), + (" + ", vec![]), + ("b", vec!["variable"]), + (";", vec!["punctuation.delimiter"]), + (" ", vec![]), + ("}", vec!["punctuation.bracket"]) + ]], + ); } #[test] @@ -667,20 +743,20 @@ fn to_token_vector<'a>( } HighlightEvent::Source { start, end } => { let s = str::from_utf8(&src[start..end]).unwrap(); - for (i, l) in s.split("\n").enumerate() { + for (i, l) in s.split('\n').enumerate() { let l = l.trim_end_matches('\r'); if i > 0 { lines.push(line); line = Vec::new(); } - if l.len() > 0 { + if !l.is_empty() { line.push((l, highlights.clone())); } } } } } - if line.len() > 0 { + if !line.is_empty() { lines.push(line); } Ok(lines) diff --git a/cli/src/tests/language_test.rs b/cli/src/tests/language_test.rs new file mode 100644 index 00000000..5dd04c46 --- /dev/null +++ b/cli/src/tests/language_test.rs @@ -0,0 +1,95 @@ +use super::helpers::fixtures::get_language; +use tree_sitter::Parser; + +#[test] +fn test_lookahead_iterator() { + let mut parser = Parser::new(); + let language = get_language("rust"); + parser.set_language(language).unwrap(); + + let tree = parser.parse("struct Stuff {}", None).unwrap(); + + let mut cursor = tree.walk(); + + assert!(cursor.goto_first_child()); // struct + assert!(cursor.goto_first_child()); // struct keyword + + let next_state = cursor.node().next_parse_state(); + assert_ne!(next_state, 0); + assert_eq!( + next_state, + language.next_state(cursor.node().parse_state(), cursor.node().grammar_id()) + ); + assert!((next_state as usize) < language.parse_state_count()); + assert!(cursor.goto_next_sibling()); // type_identifier + assert_eq!(next_state, cursor.node().parse_state()); + assert_eq!(cursor.node().grammar_name(), "identifier"); + assert_ne!(cursor.node().grammar_id(), cursor.node().kind_id()); + + let expected_symbols = ["identifier", "block_comment", "line_comment"]; + let mut lookahead = language.lookahead_iterator(next_state).unwrap(); + assert_eq!(lookahead.language(), language); + assert!(lookahead.iter_names().eq(expected_symbols)); + + lookahead.reset_state(next_state); + assert!(lookahead.iter_names().eq(expected_symbols)); + + lookahead.reset(language, next_state); + assert!(lookahead + .map(|s| language.node_kind_for_id(s).unwrap()) + .eq(expected_symbols)); +} + +#[test] +fn test_lookahead_iterator_modifiable_only_by_mut() { + let mut parser = Parser::new(); + let language = get_language("rust"); + parser.set_language(language).unwrap(); + + let tree = parser.parse("struct Stuff {}", None).unwrap(); + + let mut cursor = tree.walk(); + + assert!(cursor.goto_first_child()); // struct + assert!(cursor.goto_first_child()); // struct keyword + + let next_state = cursor.node().next_parse_state(); + assert_ne!(next_state, 0); + + let mut lookahead = language.lookahead_iterator(next_state).unwrap(); + let _ = lookahead.next(); + + let mut names = lookahead.iter_names(); + let _ = names.next(); +} + +/// It doesn't allowed to use lookahead iterator by shared ref: +/// error[E0596]: cannot borrow `lookahead` as mutable, as it is not declared as mutable +/// ```compile_fail +/// use tree_sitter::{Parser, Language}; +/// let mut parser = Parser::new(); +/// let language = unsafe { Language::from_raw(std::ptr::null()) }; +/// let tree = parser.parse("", None).unwrap(); +/// let mut cursor = tree.walk(); +/// let next_state = cursor.node().next_parse_state(); +/// let lookahead = language.lookahead_iterator(next_state).unwrap(); +/// let _ = lookahead.next(); +/// ``` + +/// It doesn't allowed to use lookahead names iterator by shared ref: +/// error[E0596]: cannot borrow `names` as mutable, as it is not declared as mutable +/// ```compile_fail +/// use tree_sitter::{Parser, Language}; +/// let mut parser = Parser::new(); +/// let language = unsafe { Language::from_raw(std::ptr::null()) }; +/// let tree = parser.parse("", None).unwrap(); +/// let mut cursor = tree.walk(); +/// let next_state = cursor.node().next_parse_state(); +/// if let Some(mut lookahead) = language.lookahead_iterator(next_state) { +/// let _ = lookahead.next(); +/// let names = lookahead.iter_names(); +/// let _ = names.next(); +/// } +/// ``` + +fn _dummy() {} diff --git a/cli/src/tests/mod.rs b/cli/src/tests/mod.rs index 1b804450..308fc2c5 100644 --- a/cli/src/tests/mod.rs +++ b/cli/src/tests/mod.rs @@ -1,11 +1,16 @@ +mod async_context_test; mod corpus_test; +mod github_issue_test; mod helpers; mod highlight_test; +mod language_test; mod node_test; +mod parser_hang_test; mod parser_test; mod pathological_test; mod query_test; mod tags_test; mod test_highlight_test; mod test_tags_test; +mod text_provider_test; mod tree_test; diff --git a/cli/src/tests/node_test.rs b/cli/src/tests/node_test.rs index 6d5ed61d..c4548d3e 100644 --- a/cli/src/tests/node_test.rs +++ b/cli/src/tests/node_test.rs @@ -252,12 +252,14 @@ fn test_node_parent_of_child_by_field_name() { fn test_node_field_name_for_child() { let mut parser = Parser::new(); parser.set_language(get_language("c")).unwrap(); - let tree = parser.parse("x + y;", None).unwrap(); + let tree = parser.parse("int w = x + y;", None).unwrap(); let translation_unit_node = tree.root_node(); - let binary_expression_node = translation_unit_node - .named_child(0) + let declaration_node = translation_unit_node.named_child(0).unwrap(); + + let binary_expression_node = declaration_node + .child_by_field_name("declarator") .unwrap() - .named_child(0) + .child_by_field_name("value") .unwrap(); assert_eq!(binary_expression_node.field_name_for_child(0), Some("left")); @@ -385,10 +387,52 @@ fn test_node_named_child_with_aliases_and_extras() { assert_eq!(root.named_child(4).unwrap().kind(), "C"); } +#[test] +fn test_node_descendant_count() { + let tree = parse_json_example(); + let value_node = tree.root_node(); + let all_nodes = get_all_nodes(&tree); + + assert_eq!(value_node.descendant_count(), all_nodes.len()); + + let mut cursor = value_node.walk(); + for (i, node) in all_nodes.iter().enumerate() { + cursor.goto_descendant(i); + assert_eq!(cursor.node(), *node, "index {i}"); + } + + for (i, node) in all_nodes.iter().enumerate().rev() { + cursor.goto_descendant(i); + assert_eq!(cursor.node(), *node, "rev index {i}"); + } +} + +#[test] +fn test_descendant_count_single_node_tree() { + let mut parser = Parser::new(); + parser + .set_language(get_language("embedded-template")) + .unwrap(); + let tree = parser.parse("hello", None).unwrap(); + + let nodes = get_all_nodes(&tree); + assert_eq!(nodes.len(), 2); + assert_eq!(tree.root_node().descendant_count(), 2); + + let mut cursor = tree.root_node().walk(); + + cursor.goto_descendant(0); + assert_eq!(cursor.depth(), 0); + assert_eq!(cursor.node(), nodes[0]); + cursor.goto_descendant(1); + assert_eq!(cursor.depth(), 1); + assert_eq!(cursor.node(), nodes[1]); +} + #[test] fn test_node_descendant_for_range() { let tree = parse_json_example(); - let array_node = tree.root_node().child(0).unwrap(); + let array_node = tree.root_node(); // Leaf node exactly matches the given bounds - byte query let colon_index = JSON_EXAMPLE.find(":").unwrap(); @@ -508,7 +552,7 @@ fn test_node_edit() { let edit = get_random_edit(&mut rand, &mut code); let mut tree2 = tree.clone(); - let edit = perform_edit(&mut tree2, &mut code, &edit); + let edit = perform_edit(&mut tree2, &mut code, &edit).unwrap(); for node in nodes_before.iter_mut() { node.edit(&edit); } @@ -841,15 +885,17 @@ fn get_all_nodes(tree: &Tree) -> Vec { let mut visited_children = false; let mut cursor = tree.walk(); loop { - result.push(cursor.node()); - if !visited_children && cursor.goto_first_child() { - continue; - } else if cursor.goto_next_sibling() { - visited_children = false; - } else if cursor.goto_parent() { - visited_children = true; + if !visited_children { + result.push(cursor.node()); + if !cursor.goto_first_child() { + visited_children = true; + } } else { - break; + if cursor.goto_next_sibling() { + visited_children = false; + } else if !cursor.goto_parent() { + break; + } } } return result; diff --git a/cli/src/tests/parser_hang_test.rs b/cli/src/tests/parser_hang_test.rs new file mode 100644 index 00000000..02a2689c --- /dev/null +++ b/cli/src/tests/parser_hang_test.rs @@ -0,0 +1,104 @@ +// For some reasons `Command::spawn` doesn't work in CI env for many exotic arches. +#![cfg(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing)))] + +use crate::{ + generate::{generate_parser_for_grammar, load_grammar_file}, + tests::helpers::fixtures::{fixtures_dir, get_test_language}, +}; +use std::{ + env::VarError, + process::{Command, Stdio}, +}; +use tree_sitter::Parser; + +// The `sanitizing` cfg is required to don't run tests under specific sunitizer +// because they don't work well with subprocesses _(it's an assumption)_. +// +// Bellow are two alternative examples of how to disable tests for some arches +// if a way with excluding the whole mod from compilation wouldn't work well. +// +// XXX: Also may be it makes sense to keep such tests as ignored by default +// to omit surprises and enable them on CI by passing an extra option explicitly: +// +// > cargo test -- --include-ignored +// +// #[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing)))] +// #[cfg_attr(not(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing))), ignore)] +// +#[test] +fn test_grammar_that_should_hang_and_not_segfault() { + let parent_sleep_millis = 1000; + let test_name = "test_grammar_that_should_hang_and_not_segfault"; + let test_var = "CARGO_HANG_TEST"; + + eprintln!(" {test_name}"); + + let tests_exec_path = std::env::args() + .nth(0) + .expect("Failed get get tests executable path"); + + match std::env::var(test_var) { + Ok(v) if v == test_name => { + eprintln!(" child process id {}", std::process::id()); + hang_test(); + } + + Err(VarError::NotPresent) => { + eprintln!(" parent process id {}", std::process::id()); + if true { + let mut command = Command::new(tests_exec_path); + command.arg(test_name).env(test_var, test_name); + if std::env::args().any(|x| x == "--nocapture") { + command.arg("--nocapture"); + } else { + command.stdout(Stdio::null()).stderr(Stdio::null()); + } + match command.spawn() { + Ok(mut child) => { + std::thread::sleep(std::time::Duration::from_millis(parent_sleep_millis)); + match child.try_wait() { + Ok(Some(status)) if status.success() => { + panic!("Child wasn't hang and exited successfully") + } + Ok(Some(status)) => panic!( + "Child wasn't hang and exited with status code: {:?}", + status.code() + ), + _ => (), + } + if let Err(e) = child.kill() { + eprintln!( + "Failed to kill hang test sub process id: {}, error: {e}", + child.id() + ); + } + } + Err(e) => panic!("{e}"), + } + } + } + + Err(e) => panic!("Env var error: {e}"), + _ => unreachable!(), + } + + fn hang_test() { + let test_grammar_dir = fixtures_dir() + .join("test_grammars") + .join("get_col_should_hang_not_crash"); + + let grammar_json = load_grammar_file(&test_grammar_dir.join("grammar.js"), None).unwrap(); + let (parser_name, parser_code) = + generate_parser_for_grammar(grammar_json.as_str()).unwrap(); + + let language = + get_test_language(&parser_name, &parser_code, Some(test_grammar_dir.as_path())); + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + + let code_that_should_hang = "\nHello"; + + parser.parse(code_that_should_hang, None).unwrap(); + } +} diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index 9e914a2f..5f6febee 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -15,6 +15,7 @@ use std::{ thread, time, }; use tree_sitter::{IncludedRangesError, InputEdit, LogType, Parser, Point, Range}; +use tree_sitter_proc_macro::retry; #[test] fn test_parsing_simple_string() { @@ -149,7 +150,7 @@ fn test_parsing_with_custom_utf8_input() { ) ); assert_eq!(root.kind(), "source_file"); - assert_eq!(root.has_error(), false); + assert!(!root.has_error()); assert_eq!(root.child(0).unwrap().kind(), "function_item"); } @@ -188,7 +189,7 @@ fn test_parsing_with_custom_utf16_input() { "(source_file (function_item (visibility_modifier) name: (identifier) parameters: (parameters) body: (block (integer_literal))))" ); assert_eq!(root.kind(), "source_file"); - assert_eq!(root.has_error(), false); + assert!(!root.has_error()); assert_eq!(root.child(0).unwrap().kind(), "function_item"); } @@ -277,7 +278,10 @@ fn test_parsing_invalid_chars_at_eof() { let mut parser = Parser::new(); parser.set_language(get_language("json")).unwrap(); let tree = parser.parse(b"\xdf", None).unwrap(); - assert_eq!(tree.root_node().to_sexp(), "(ERROR (UNEXPECTED INVALID))"); + assert_eq!( + tree.root_node().to_sexp(), + "(document (ERROR (UNEXPECTED INVALID)))" + ); } #[test] @@ -340,7 +344,8 @@ fn test_parsing_after_editing_beginning_of_code() { deleted_length: 0, inserted_text: b" || 5".to_vec(), }, - ); + ) + .unwrap(); let mut recorder = ReadRecorder::new(&code); let tree = parser @@ -387,7 +392,8 @@ fn test_parsing_after_editing_end_of_code() { deleted_length: 0, inserted_text: b".d".to_vec(), }, - ); + ) + .unwrap(); let mut recorder = ReadRecorder::new(&code); let tree = parser @@ -466,7 +472,8 @@ h + i deleted_length: 0, inserted_text: b"1234".to_vec(), }, - ); + ) + .unwrap(); assert_eq!( code, @@ -511,7 +518,7 @@ fn test_parsing_after_detecting_error_in_the_middle_of_a_string_token() { let tree = parser.parse(&source, None).unwrap(); assert_eq!( tree.root_node().to_sexp(), - "(module (expression_statement (assignment left: (identifier) right: (expression_list (identifier) (string)))))" + "(module (expression_statement (assignment left: (identifier) right: (expression_list (identifier) (string (string_start) (string_content) (string_end))))))" ); // Delete a suffix of the source code, starting in the middle of the string @@ -530,12 +537,12 @@ fn test_parsing_after_detecting_error_in_the_middle_of_a_string_token() { let undo = invert_edit(&source, &edit); let mut tree2 = tree.clone(); - perform_edit(&mut tree2, &mut source, &edit); + perform_edit(&mut tree2, &mut source, &edit).unwrap(); tree2 = parser.parse(&source, Some(&tree2)).unwrap(); assert!(tree2.root_node().has_error()); let mut tree3 = tree2.clone(); - perform_edit(&mut tree3, &mut source, &undo); + perform_edit(&mut tree3, &mut source, &undo).unwrap(); tree3 = parser.parse(&source, Some(&tree3)).unwrap(); assert_eq!(tree3.root_node().to_sexp(), tree.root_node().to_sexp(),); } @@ -644,6 +651,7 @@ fn test_parsing_cancelled_by_another_thread() { // Timeouts #[test] +#[retry(10)] fn test_parsing_with_a_timeout() { let mut parser = Parser::new(); parser.set_language(get_language("json")).unwrap(); @@ -662,8 +670,12 @@ fn test_parsing_with_a_timeout() { None, ); assert!(tree.is_none()); + #[cfg(not(target_arch = "sparc64"))] assert!(start_time.elapsed().as_micros() < 2000); + #[cfg(target_arch = "sparc64")] + assert!(start_time.elapsed().as_micros() < 8000); + // Continue parsing, but pause after 1 ms of processing. parser.set_timeout_micros(5000); let start_time = time::Instant::now(); @@ -701,6 +713,7 @@ fn test_parsing_with_a_timeout() { } #[test] +#[retry(10)] fn test_parsing_with_a_timeout_and_a_reset() { let mut parser = Parser::new(); parser.set_language(get_language("json")).unwrap(); @@ -756,6 +769,7 @@ fn test_parsing_with_a_timeout_and_a_reset() { } #[test] +#[retry(10)] fn test_parsing_with_a_timeout_and_implicit_reset() { allocations::record(|| { let mut parser = Parser::new(); @@ -789,6 +803,7 @@ fn test_parsing_with_a_timeout_and_implicit_reset() { } #[test] +#[retry(10)] fn test_parsing_with_timeout_and_no_completion() { allocations::record(|| { let mut parser = Parser::new(); @@ -828,7 +843,7 @@ fn test_parsing_with_one_included_range() { concat!( "(program (expression_statement (call_expression ", "function: (member_expression object: (identifier) property: (property_identifier)) ", - "arguments: (arguments (string)))))", + "arguments: (arguments (string (string_fragment))))))", ) ); assert_eq!( @@ -1177,7 +1192,7 @@ fn test_parsing_with_a_newly_included_range() { .set_included_ranges(&[simple_range(range1_start, range1_end)]) .unwrap(); let tree = parser - .parse_with(&mut chunked_input(&source_code, 3), None) + .parse_with(&mut chunked_input(source_code, 3), None) .unwrap(); assert_eq!( tree.root_node().to_sexp(), @@ -1196,7 +1211,7 @@ fn test_parsing_with_a_newly_included_range() { ]) .unwrap(); let tree2 = parser - .parse_with(&mut chunked_input(&source_code, 3), Some(&tree)) + .parse_with(&mut chunked_input(source_code, 3), Some(&tree)) .unwrap(); assert_eq!( tree2.root_node().to_sexp(), @@ -1220,7 +1235,7 @@ fn test_parsing_with_a_newly_included_range() { simple_range(range3_start, range3_end), ]) .unwrap(); - let tree3 = parser.parse(&source_code, Some(&tree)).unwrap(); + let tree3 = parser.parse(source_code, Some(&tree)).unwrap(); assert_eq!( tree3.root_node().to_sexp(), concat!( @@ -1297,6 +1312,85 @@ fn test_parsing_with_included_ranges_and_missing_tokens() { assert_eq!(root.child(3).unwrap().start_byte(), 4); } +#[test] +fn test_grammars_that_can_hang_on_eof() { + let (parser_name, parser_code) = generate_parser_for_grammar( + r#" + { + "name": "test_single_null_char_regex", + "rules": { + "source_file": { + "type": "SEQ", + "members": [ + { "type": "STRING", "value": "\"" }, + { "type": "PATTERN", "value": "[\\x00]*" }, + { "type": "STRING", "value": "\"" } + ] + } + }, + "extras": [ { "type": "PATTERN", "value": "\\s" } ] + } + "#, + ) + .unwrap(); + + let mut parser = Parser::new(); + parser + .set_language(get_test_language(&parser_name, &parser_code, None)) + .unwrap(); + parser.parse("\"", None).unwrap(); + + let (parser_name, parser_code) = generate_parser_for_grammar( + r#" + { + "name": "test_null_char_with_next_char_regex", + "rules": { + "source_file": { + "type": "SEQ", + "members": [ + { "type": "STRING", "value": "\"" }, + { "type": "PATTERN", "value": "[\\x00-\\x01]*" }, + { "type": "STRING", "value": "\"" } + ] + } + }, + "extras": [ { "type": "PATTERN", "value": "\\s" } ] + } + "#, + ) + .unwrap(); + + parser + .set_language(get_test_language(&parser_name, &parser_code, None)) + .unwrap(); + parser.parse("\"", None).unwrap(); + + let (parser_name, parser_code) = generate_parser_for_grammar( + r#" + { + "name": "test_null_char_with_range_regex", + "rules": { + "source_file": { + "type": "SEQ", + "members": [ + { "type": "STRING", "value": "\"" }, + { "type": "PATTERN", "value": "[\\x00-\\x7F]*" }, + { "type": "STRING", "value": "\"" } + ] + } + }, + "extras": [ { "type": "PATTERN", "value": "\\s" } ] + } + "#, + ) + .unwrap(); + + parser + .set_language(get_test_language(&parser_name, &parser_code, None)) + .unwrap(); + parser.parse("\"", None).unwrap(); +} + fn simple_range(start: usize, end: usize) -> Range { Range { start_byte: start, diff --git a/cli/src/tests/proc_macro/Cargo.toml b/cli/src/tests/proc_macro/Cargo.toml new file mode 100644 index 00000000..1ca1be39 --- /dev/null +++ b/cli/src/tests/proc_macro/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "tree-sitter-tests-proc-macro" +version = "0.0.0" +edition = "2021" +publish = false +rust-version.workspace = true + +[lib] +proc-macro = true + +[dependencies] +proc-macro2 = "1.0.63" +quote = "1" +rand = "0.8.5" +syn = { version = "1", features = ["full"] } diff --git a/cli/src/tests/proc_macro/src/lib.rs b/cli/src/tests/proc_macro/src/lib.rs new file mode 100644 index 00000000..4d92479f --- /dev/null +++ b/cli/src/tests/proc_macro/src/lib.rs @@ -0,0 +1,137 @@ +use proc_macro::TokenStream; +use proc_macro2::Span; +use quote::quote; +use syn::{ + parse::{Parse, ParseStream}, + parse_macro_input, Error, Expr, Ident, ItemFn, LitInt, Token, +}; + +#[proc_macro_attribute] +pub fn retry(args: TokenStream, input: TokenStream) -> TokenStream { + let count = parse_macro_input!(args as LitInt); + let input = parse_macro_input!(input as ItemFn); + let attrs = input.attrs.clone(); + let name = input.sig.ident.clone(); + + TokenStream::from(quote! { + #(#attrs),* + fn #name() { + #input + + for i in 0..=#count { + let result = std::panic::catch_unwind(|| { + #name(); + }); + + if result.is_ok() { + return; + } + + if i == #count { + std::panic::resume_unwind(result.unwrap_err()); + } + } + } + }) +} + +#[proc_macro_attribute] +pub fn test_with_seed(args: TokenStream, input: TokenStream) -> TokenStream { + struct Args { + retry: LitInt, + seed: Expr, + seed_fn: Option, + } + + impl Parse for Args { + fn parse(input: ParseStream) -> syn::Result { + let mut retry = None; + let mut seed = None; + let mut seed_fn = None; + + while !input.is_empty() { + let name = input.parse::()?; + match name.to_string().as_str() { + "retry" => { + input.parse::()?; + retry.replace(input.parse()?); + } + "seed" => { + input.parse::()?; + seed.replace(input.parse()?); + } + "seed_fn" => { + input.parse::()?; + seed_fn.replace(input.parse()?); + } + x => { + return Err(Error::new( + name.span(), + format!("Unsupported parameter `{x}`"), + )) + } + } + + if !input.is_empty() { + input.parse::()?; + } + } + + if retry.is_none() { + retry.replace(LitInt::new("0", Span::mixed_site())); + } + + Ok(Args { + retry: retry.expect("`retry` parameter is required"), + seed: seed.expect("`seed` parameter is required"), + seed_fn, + }) + } + } + + let Args { + retry, + seed, + seed_fn, + } = parse_macro_input!(args as Args); + + let seed_fn = seed_fn.iter(); + + let func = parse_macro_input!(input as ItemFn); + let attrs = func.attrs.clone(); + let name = func.sig.ident.clone(); + + // dbg!(quote::ToTokens::into_token_stream(&func)); + + TokenStream::from(quote! { + #[test] + #(#attrs),* + fn #name() { + #func + + let mut seed = #seed; + + for i in 0..=#retry { + let result = std::panic::catch_unwind(|| { + #name(seed); + }); + + if result.is_ok() { + return; + } + + if i == #retry { + std::panic::resume_unwind(result.unwrap_err()); + } + + #( + seed = #seed_fn(); + )* + + if i < #retry { + println!("\nRetry {}/{} with a new seed {}", i + 1, #retry, seed); + } + } + } + }) +} diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 31cb8035..13e4f8d0 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -1,15 +1,22 @@ use super::helpers::{ allocations, - fixtures::get_language, - query_helpers::{Match, Pattern}, + fixtures::{get_language, get_test_language}, + query_helpers::{assert_query_matches, Match, Pattern}, + ITERATION_COUNT, }; +use crate::{ + generate::generate_parser_for_grammar, + tests::helpers::query_helpers::{collect_captures, collect_matches}, +}; +use indoc::indoc; use lazy_static::lazy_static; use rand::{prelude::StdRng, SeedableRng}; use std::{env, fmt::Write}; use tree_sitter::{ - CaptureQuantifier, Language, Node, Parser, Point, Query, QueryCapture, QueryCursor, QueryError, - QueryErrorKind, QueryMatch, QueryPredicate, QueryPredicateArg, QueryProperty, + CaptureQuantifier, Language, Node, Parser, Point, Query, QueryCursor, QueryError, + QueryErrorKind, QueryPredicate, QueryPredicateArg, QueryProperty, }; +use unindent::Unindent; lazy_static! { static ref EXAMPLE_FILTER: Option = env::var("TREE_SITTER_TEST_EXAMPLE_FILTER").ok(); @@ -316,16 +323,16 @@ fn test_query_errors_on_impossible_patterns() { assert_eq!( Query::new( js_lang, - "(binary_expression left: (identifier) left: (identifier))" + "(binary_expression left: (expression (identifier)) left: (expression (identifier)))" ), Err(QueryError { kind: QueryErrorKind::Structure, row: 0, - offset: 38, - column: 38, + offset: 51, + column: 51, message: [ - "(binary_expression left: (identifier) left: (identifier))", - " ^" + "(binary_expression left: (expression (identifier)) left: (expression (identifier)))", + " ^", ] .join("\n"), }) @@ -430,19 +437,19 @@ fn test_query_errors_on_impossible_patterns() { Query::new( js_lang, "(if_statement - condition: (parenthesized_expression (_expression) @cond))", + condition: (parenthesized_expression (expression) @cond))", ) .unwrap(); assert_eq!( - Query::new(js_lang, "(if_statement condition: (_expression))",), + Query::new(js_lang, "(if_statement condition: (expression))",), Err(QueryError { kind: QueryErrorKind::Structure, row: 0, offset: 14, column: 14, message: [ - "(if_statement condition: (_expression))", // + "(if_statement condition: (expression))", // " ^", ] .join("\n") @@ -849,6 +856,33 @@ fn test_query_matches_with_wildcard_at_the_root() { }); } +#[test] +fn test_query_matches_with_wildcard_within_wildcard() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + " + (_ (_) @child) @parent + ", + ) + .unwrap(); + + assert_query_matches( + language, + &query, + "/* a */ b; c;", + &[ + (0, vec![("parent", "/* a */ b; c;"), ("child", "/* a */")]), + (0, vec![("parent", "/* a */ b; c;"), ("child", "b;")]), + (0, vec![("parent", "b;"), ("child", "b")]), + (0, vec![("parent", "/* a */ b; c;"), ("child", "c;")]), + (0, vec![("parent", "c;"), ("child", "c")]), + ], + ); + }); +} + #[test] fn test_query_matches_with_immediate_siblings() { allocations::record(|| { @@ -1162,11 +1196,20 @@ fn test_query_matches_with_non_terminal_repetitions_within_root() { language, &query, r#" + function f() { + d; + e; + f; + g; + } a; b; c; "#, - &[(0, vec![("id", "a"), ("id", "b"), ("id", "c")])], + &[ + (0, vec![("id", "d"), ("id", "e"), ("id", "f"), ("id", "g")]), + (0, vec![("id", "a"), ("id", "b"), ("id", "c")]), + ], ); }); } @@ -1683,7 +1726,7 @@ fn test_query_matches_with_too_many_permutations_to_track() { collect_matches(matches, &query, source.as_str())[0], (0, vec![("pre", "hello"), ("post", "hello")]), ); - assert_eq!(cursor.did_exceed_match_limit(), true); + assert!(cursor.did_exceed_match_limit()); }); } @@ -1732,7 +1775,7 @@ fn test_query_sibling_patterns_dont_match_children_of_an_error() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); assert_eq!( @@ -1782,7 +1825,54 @@ fn test_query_matches_with_alternatives_and_too_many_permutations_to_track() { collect_matches(matches, &query, source.as_str()), vec![(1, vec![("method", "b")]); 50], ); - assert_eq!(cursor.did_exceed_match_limit(), true); + assert!(cursor.did_exceed_match_limit()); + }); +} + +#[test] +fn test_repetitions_before_with_alternatives() { + allocations::record(|| { + let language = get_language("rust"); + let query = Query::new( + language, + r#" + ( + (line_comment)* @comment + . + [ + (struct_item name: (_) @name) + (function_item name: (_) @name) + (enum_item name: (_) @name) + (impl_item type: (_) @name) + ] + ) + "#, + ) + .unwrap(); + + assert_query_matches( + language, + &query, + r#" + // a + // b + fn c() {} + + // d + // e + impl F {} + "#, + &[ + ( + 0, + vec![("comment", "// a"), ("comment", "// b"), ("name", "c")], + ), + ( + 0, + vec![("comment", "// d"), ("comment", "// e"), ("name", "F")], + ), + ], + ); }); } @@ -1866,7 +1956,7 @@ fn test_query_matches_within_byte_range() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); @@ -1874,7 +1964,6 @@ fn test_query_matches_within_byte_range() { cursor .set_byte_range(0..8) .matches(&query, tree.root_node(), source.as_bytes()); - assert_eq!( collect_matches(matches, &query, source), &[ @@ -1888,7 +1977,6 @@ fn test_query_matches_within_byte_range() { cursor .set_byte_range(5..15) .matches(&query, tree.root_node(), source.as_bytes()); - assert_eq!( collect_matches(matches, &query, source), &[ @@ -1902,7 +1990,6 @@ fn test_query_matches_within_byte_range() { cursor .set_byte_range(12..0) .matches(&query, tree.root_node(), source.as_bytes()); - assert_eq!( collect_matches(matches, &query, source), &[ @@ -1920,20 +2007,28 @@ fn test_query_matches_within_point_range() { let language = get_language("javascript"); let query = Query::new(language, "(identifier) @element").unwrap(); - let source = "[a, b,\n c, d,\n e, f,\n g]"; + let source = " + [ + a, b, + c, d, + e, f, + g, h, + i, j, + k, l, + ] + " + .unindent(); let mut parser = Parser::new(); parser.set_language(language).unwrap(); let tree = parser.parse(&source, None).unwrap(); - let mut cursor = QueryCursor::new(); let matches = cursor - .set_point_range(Point::new(0, 0)..Point::new(1, 3)) + .set_point_range(Point::new(1, 0)..Point::new(2, 3)) .matches(&query, tree.root_node(), source.as_bytes()); - assert_eq!( - collect_matches(matches, &query, source), + collect_matches(matches, &query, &source), &[ (0, vec![("element", "a")]), (0, vec![("element", "b")]), @@ -1942,11 +2037,10 @@ fn test_query_matches_within_point_range() { ); let matches = cursor - .set_point_range(Point::new(1, 0)..Point::new(2, 3)) + .set_point_range(Point::new(2, 0)..Point::new(3, 3)) .matches(&query, tree.root_node(), source.as_bytes()); - assert_eq!( - collect_matches(matches, &query, source), + collect_matches(matches, &query, &source), &[ (0, vec![("element", "c")]), (0, vec![("element", "d")]), @@ -1954,16 +2048,19 @@ fn test_query_matches_within_point_range() { ] ); + // Zero end point is treated like no end point. let matches = cursor - .set_point_range(Point::new(2, 1)..Point::new(0, 0)) + .set_point_range(Point::new(4, 1)..Point::new(0, 0)) .matches(&query, tree.root_node(), source.as_bytes()); - assert_eq!( - collect_matches(matches, &query, source), + collect_matches(matches, &query, &source), &[ - (0, vec![("element", "e")]), - (0, vec![("element", "f")]), (0, vec![("element", "g")]), + (0, vec![("element", "h")]), + (0, vec![("element", "i")]), + (0, vec![("element", "j")]), + (0, vec![("element", "k")]), + (0, vec![("element", "l")]), ] ); }); @@ -1989,7 +2086,7 @@ fn test_query_captures_within_byte_range() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let captures = @@ -2025,7 +2122,7 @@ fn test_query_matches_with_unrooted_patterns_intersecting_byte_range() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); // within the type parameter list @@ -2163,16 +2260,16 @@ fn test_query_captures_within_byte_range_assigned_after_iterating() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let mut captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); // Retrieve some captures let mut results = Vec::new(); for (mat, capture_ix) in captures.by_ref().take(5) { - let capture = mat.captures[capture_ix as usize]; + let capture = mat.captures[capture_ix]; results.push(( - query.capture_names()[capture.index as usize].as_str(), + query.capture_names()[capture.index as usize], &source[capture.node.byte_range()], )); } @@ -2193,9 +2290,9 @@ fn test_query_captures_within_byte_range_assigned_after_iterating() { results.clear(); captures.set_byte_range(source.find("Ok").unwrap()..source.len()); for (mat, capture_ix) in captures { - let capture = mat.captures[capture_ix as usize]; + let capture = mat.captures[capture_ix]; results.push(( - query.capture_names()[capture.index as usize].as_str(), + query.capture_names()[capture.index as usize], &source[capture.node.byte_range()], )); } @@ -2210,6 +2307,57 @@ fn test_query_captures_within_byte_range_assigned_after_iterating() { }); } +#[test] +fn test_query_matches_within_range_of_long_repetition() { + allocations::record(|| { + let language = get_language("rust"); + let query = Query::new( + language, + " + (function_item name: (identifier) @fn-name) + ", + ) + .unwrap(); + + let source = " + fn zero() {} + fn one() {} + fn two() {} + fn three() {} + fn four() {} + fn five() {} + fn six() {} + fn seven() {} + fn eight() {} + fn nine() {} + fn ten() {} + fn eleven() {} + fn twelve() {} + " + .unindent(); + + let mut parser = Parser::new(); + let mut cursor = QueryCursor::new(); + + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + + let matches = cursor + .set_point_range(Point::new(8, 0)..Point::new(20, 0)) + .matches(&query, tree.root_node(), source.as_bytes()); + assert_eq!( + collect_matches(matches, &query, &source), + &[ + (0, vec![("fn-name", "eight")]), + (0, vec![("fn-name", "nine")]), + (0, vec![("fn-name", "ten")]), + (0, vec![("fn-name", "eleven")]), + (0, vec![("fn-name", "twelve")]), + ] + ); + }); +} + #[test] fn test_query_matches_different_queries_same_cursor() { allocations::record(|| { @@ -2245,7 +2393,7 @@ fn test_query_matches_different_queries_same_cursor() { let mut cursor = QueryCursor::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let matches = cursor.matches(&query1, tree.root_node(), source.as_bytes()); assert_eq!( @@ -2288,7 +2436,7 @@ fn test_query_matches_with_multiple_captures_on_a_node() { let mut cursor = QueryCursor::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); assert_eq!( @@ -2376,7 +2524,7 @@ fn test_query_matches_with_captured_wildcard_at_root() { let mut parser = Parser::new(); let mut cursor = QueryCursor::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let match_capture_names_and_rows = cursor .matches(&query, tree.root_node(), source.as_bytes()) @@ -2385,7 +2533,7 @@ fn test_query_matches_with_captured_wildcard_at_root() { .iter() .map(|c| { ( - query.capture_names()[c.index as usize].as_str(), + query.capture_names()[c.index as usize], c.node.kind(), c.node.start_position().row, ) @@ -2642,7 +2790,7 @@ fn test_query_captures_basic() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); @@ -2697,6 +2845,14 @@ fn test_query_captures_with_text_conditions() { ((identifier) @function.builtin (#eq? @function.builtin "require")) + ((identifier) @variable.builtin + (#any-of? @variable.builtin + "arguments" + "module" + "console" + "window" + "document")) + ((identifier) @variable (#not-match? @variable "^(lambda|load)$")) "#, @@ -2710,11 +2866,14 @@ fn test_query_captures_with_text_conditions() { lambda const ab = require('./ab'); new Cd(EF); + document; + module; + console; "; let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); @@ -2731,6 +2890,12 @@ fn test_query_captures_with_text_conditions() { ("constant", "EF"), ("constructor", "EF"), ("variable", "EF"), + ("variable.builtin", "document"), + ("variable", "document"), + ("variable.builtin", "module"), + ("variable", "module"), + ("variable.builtin", "console"), + ("variable", "console"), ], ); }); @@ -2769,7 +2934,8 @@ fn test_query_captures_with_predicates() { args: vec![ QueryPredicateArg::Capture(0), QueryPredicateArg::String("omg".to_string().into_boxed_str()), - ], + ] + .into_boxed_slice(), },] ); assert_eq!(query.property_settings(1), &[]); @@ -2853,7 +3019,7 @@ fn test_query_captures_with_duplicates() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); @@ -3055,11 +3221,11 @@ fn test_query_captures_with_too_many_nested_results() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); cursor.set_match_limit(32); let captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); - let captures = collect_captures(captures, &query, &source); + let captures = collect_captures(captures, &query, source); assert_eq!( &captures[0..4], @@ -3118,7 +3284,7 @@ fn test_query_captures_with_definite_pattern_containing_many_nested_matches() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); @@ -3154,7 +3320,7 @@ fn test_query_captures_ordered_by_both_start_and_end_positions() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); @@ -3195,7 +3361,7 @@ fn test_query_captures_with_matches_removed() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let mut captured_strings = Vec::new(); @@ -3239,7 +3405,7 @@ fn test_query_captures_with_matches_removed_before_they_finish() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let mut captured_strings = Vec::new(); @@ -3281,7 +3447,7 @@ fn test_query_captures_and_matches_iterators_are_fused() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let mut captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); @@ -3355,7 +3521,7 @@ fn test_query_text_callback_returns_chunks() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let captures = cursor.captures(&query, tree.root_node(), |node: Node| { chunks_in_range(node.byte_range()) @@ -3439,12 +3605,7 @@ fn test_query_capture_names() { assert_eq!( query.capture_names(), - &[ - "left-operand".to_string(), - "right-operand".to_string(), - "body".to_string(), - "loop-condition".to_string(), - ] + ["left-operand", "right-operand", "body", "loop-condition"] ); }); } @@ -3458,7 +3619,7 @@ fn test_query_lifetime_is_separate_from_nodes_lifetime() { let language = get_language("javascript"); let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + let tree = parser.parse(source, None).unwrap(); fn take_first_node_from_captures<'tree>( source: &str, @@ -3634,17 +3795,22 @@ fn test_query_random() { .parse(include_str!("helpers/query_helpers.rs"), None) .unwrap(); - // let start_seed = *SEED; let start_seed = 0; + let end_seed = start_seed + *ITERATION_COUNT; - for i in 0..100 { - let seed = (start_seed + i) as u64; + for seed in start_seed..(start_seed + end_seed) { + let seed = seed as u64; let mut rand = StdRng::seed_from_u64(seed); let (pattern_ast, _) = Pattern::random_pattern_in_tree(&pattern_tree, &mut rand); let pattern = pattern_ast.to_string(); let expected_matches = pattern_ast.matches_in_tree(&test_tree); - let query = Query::new(language, &pattern).unwrap(); + let query = match Query::new(language, &pattern) { + Ok(query) => query, + Err(e) => { + panic!("failed to build query for pattern {pattern} - {e}. seed: {seed}"); + } + }; let mut actual_matches = cursor .matches( &query, @@ -3656,7 +3822,7 @@ fn test_query_random() { captures: mat .captures .iter() - .map(|c| (query.capture_names()[c.index as usize].as_str(), c.node)) + .map(|c| (query.capture_names()[c.index as usize], c.node)) .collect::>(), }) .collect::>(); @@ -3766,10 +3932,10 @@ fn test_query_is_pattern_guaranteed_at_step() { Row { description: "a guaranteed step with a field", language: get_language("javascript"), - pattern: r#"(binary_expression left: (identifier) right: (_))"#, + pattern: r#"(binary_expression left: (expression) right: (_))"#, results_by_substring: &[ ("binary_expression", false), - ("(identifier)", false), + ("(expression)", false), ("(_)", true), ], }, @@ -3836,7 +4002,7 @@ fn test_query_is_pattern_guaranteed_at_step() { "#, results_by_substring: &[ ("identifier", false), - ("property_identifier", true), + ("property_identifier", false), ("[", true), ], }, @@ -4070,6 +4236,103 @@ fn test_query_is_pattern_rooted() { }); } +#[test] +fn test_query_is_pattern_non_local() { + struct Row { + description: &'static str, + pattern: &'static str, + language: Language, + is_non_local: bool, + } + + let rows = [ + Row { + description: "simple token", + pattern: r#"(identifier)"#, + language: get_language("python"), + is_non_local: false, + }, + Row { + description: "siblings that can occur in an argument list", + pattern: r#"((identifier) (identifier))"#, + language: get_language("python"), + is_non_local: true, + }, + Row { + description: "siblings that can occur in a statement block", + pattern: r#"((return_statement) (return_statement))"#, + language: get_language("python"), + is_non_local: true, + }, + Row { + description: "siblings that can occur in a source file", + pattern: r#"((function_definition) (class_definition))"#, + language: get_language("python"), + is_non_local: true, + }, + Row { + description: "siblings that can't occur in any repetition", + pattern: r#"("{" "}")"#, + language: get_language("python"), + is_non_local: false, + }, + Row { + description: "siblings that can't occur in any repetition, wildcard root", + pattern: r#"(_ "{" "}") @foo"#, + language: get_language("javascript"), + is_non_local: false, + }, + Row { + description: "siblings that can occur in a class body, wildcard root", + pattern: r#"(_ (method_definition) (method_definition)) @foo"#, + language: get_language("javascript"), + is_non_local: true, + }, + Row { + description: "top-level repetitions that can occur in a class body", + pattern: r#"(method_definition)+ @foo"#, + language: get_language("javascript"), + is_non_local: true, + }, + Row { + description: "top-level repetitions that can occur in a statement block", + pattern: r#"(return_statement)+ @foo"#, + language: get_language("javascript"), + is_non_local: true, + }, + Row { + description: "rooted pattern that can occur in a statement block", + pattern: r#"(return_statement) @foo"#, + language: get_language("javascript"), + is_non_local: false, + }, + ]; + + allocations::record(|| { + eprintln!(""); + + for row in &rows { + if let Some(filter) = EXAMPLE_FILTER.as_ref() { + if !row.description.contains(filter.as_str()) { + continue; + } + } + eprintln!(" query example: {:?}", row.description); + let query = Query::new(row.language, row.pattern).unwrap(); + assert_eq!( + query.is_pattern_non_local(0), + row.is_non_local, + "Description: {}, Pattern: {:?}", + row.description, + row.pattern + .split_ascii_whitespace() + .collect::>() + .join(" "), + ) + } + }); +} + #[test] fn test_capture_quantifiers() { struct Row { @@ -4307,55 +4570,410 @@ fn test_capture_quantifiers() { }); } -fn assert_query_matches( - language: Language, - query: &Query, - source: &str, - expected: &[(usize, Vec<(&str, &str)>)], -) { +#[test] +fn test_query_quantified_captures() { + struct Row { + description: &'static str, + language: Language, + code: &'static str, + pattern: &'static str, + captures: &'static [(&'static str, &'static str)], + } + + // #[rustfmt::skip] + let rows = &[ + Row { + description: "doc comments where all must match the prefix", + language: get_language("c"), + code: indoc! {" + /// foo + /// bar + /// baz + + void main() {} + + /// qux + /// quux + // quuz + "}, + pattern: r#" + ((comment)+ @comment.documentation + (#match? @comment.documentation "^///")) + "#, + captures: &[ + ("comment.documentation", "/// foo"), + ("comment.documentation", "/// bar"), + ("comment.documentation", "/// baz"), + ], + }, + Row { + description: "doc comments where one must match the prefix", + language: get_language("c"), + code: indoc! {" + /// foo + /// bar + /// baz + + void main() {} + + /// qux + /// quux + // quuz + "}, + pattern: r#" + ((comment)+ @comment.documentation + (#any-match? @comment.documentation "^///")) + "#, + captures: &[ + ("comment.documentation", "/// foo"), + ("comment.documentation", "/// bar"), + ("comment.documentation", "/// baz"), + ("comment.documentation", "/// qux"), + ("comment.documentation", "/// quux"), + ("comment.documentation", "// quuz"), + ], + }, + ]; + + allocations::record(|| { + for row in rows { + eprintln!(" quantified query example: {:?}", row.description); + + let mut parser = Parser::new(); + parser.set_language(row.language).unwrap(); + let tree = parser.parse(row.code, None).unwrap(); + + let query = Query::new(row.language, row.pattern).unwrap(); + + let mut cursor = QueryCursor::new(); + let matches = cursor.captures(&query, tree.root_node(), row.code.as_bytes()); + + assert_eq!(collect_captures(matches, &query, row.code), row.captures); + } + }); +} + +#[test] +fn test_query_max_start_depth() { + struct Row { + description: &'static str, + pattern: &'static str, + depth: u32, + matches: &'static [(usize, &'static [(&'static str, &'static str)])], + } + + let source = indoc! {" + if (a1 && a2) { + if (b1 && b2) { } + if (c) { } + } + if (d) { + if (e1 && e2) { } + if (f) { } + } + "}; + + #[rustfmt::skip] + let rows = &[ + Row { + description: "depth 0: match translation unit", + depth: 0, + pattern: r#" + (translation_unit) @capture + "#, + matches: &[ + (0, &[("capture", "if (a1 && a2) {\n if (b1 && b2) { }\n if (c) { }\n}\nif (d) {\n if (e1 && e2) { }\n if (f) { }\n}\n")]), + ] + }, + Row { + description: "depth 0: match none", + depth: 0, + pattern: r#" + (if_statement) @capture + "#, + matches: &[] + }, + Row { + description: "depth 1: match 2 if statements at the top level", + depth: 1, + pattern: r#" + (if_statement) @capture + "#, + matches : &[ + (0, &[("capture", "if (a1 && a2) {\n if (b1 && b2) { }\n if (c) { }\n}")]), + (0, &[("capture", "if (d) {\n if (e1 && e2) { }\n if (f) { }\n}")]), + ] + }, + Row { + description: "depth 1 with deep pattern: match the only the first if statement", + depth: 1, + pattern: r#" + (if_statement + condition: (parenthesized_expression + (binary_expression) + ) + ) @capture + "#, + matches: &[ + (0, &[("capture", "if (a1 && a2) {\n if (b1 && b2) { }\n if (c) { }\n}")]), + ] + }, + Row { + description: "depth 3 with deep pattern: match all if statements with a binexpr condition", + depth: 3, + pattern: r#" + (if_statement + condition: (parenthesized_expression + (binary_expression) + ) + ) @capture + "#, + matches: &[ + (0, &[("capture", "if (a1 && a2) {\n if (b1 && b2) { }\n if (c) { }\n}")]), + (0, &[("capture", "if (b1 && b2) { }")]), + (0, &[("capture", "if (e1 && e2) { }")]), + ] + }, + ]; + + allocations::record(|| { + let language = get_language("c"); + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + let mut cursor = QueryCursor::new(); + + for row in rows.iter() { + eprintln!(" query example: {:?}", row.description); + + let query = Query::new(language, row.pattern).unwrap(); + cursor.set_max_start_depth(Some(row.depth)); + + let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); + let expected = row + .matches + .iter() + .map(|x| (x.0, x.1.to_vec())) + .collect::>(); + + assert_eq!(collect_matches(matches, &query, source), expected); + } + }); +} + +#[test] +fn test_query_error_does_not_oob() { + let language = get_language("javascript"); + + assert_eq!( + Query::new(language, "(clas").unwrap_err(), + QueryError { + row: 0, + offset: 1, + column: 1, + kind: QueryErrorKind::NodeType, + message: "clas".to_string() + } + ); +} + +#[test] +fn test_consecutive_zero_or_modifiers() { + let language = get_language("javascript"); let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let tree = parser.parse(source, None).unwrap(); - let mut cursor = QueryCursor::new(); - let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); - assert_eq!(collect_matches(matches, &query, source), expected); - assert_eq!(cursor.did_exceed_match_limit(), false); + + let zero_source = ""; + let three_source = "/**/ /**/ /**/"; + + let zero_tree = parser.parse(zero_source, None).unwrap(); + let three_tree = parser.parse(three_source, None).unwrap(); + + let tests = [ + "(comment)*** @capture", + "(comment)??? @capture", + "(comment)*?* @capture", + "(comment)?*? @capture", + ]; + + for test in tests { + let query = Query::new(language, test).unwrap(); + + let mut cursor = QueryCursor::new(); + let mut matches = cursor.matches(&query, zero_tree.root_node(), zero_source.as_bytes()); + assert!(matches.next().is_some()); + + let mut cursor = QueryCursor::new(); + let matches = cursor.matches(&query, three_tree.root_node(), three_source.as_bytes()); + + let mut len_3 = false; + let mut len_1 = false; + + for m in matches { + if m.captures.len() == 3 { + len_3 = true; + } + if m.captures.len() == 1 { + len_1 = true; + } + } + + assert_eq!(len_3, test.contains('*')); + assert_eq!(len_1, test.contains("???")); + } } -fn collect_matches<'a>( - matches: impl Iterator>, - query: &'a Query, - source: &'a str, -) -> Vec<(usize, Vec<(&'a str, &'a str)>)> { - matches - .map(|m| { - ( - m.pattern_index, - format_captures(m.captures.iter().cloned(), query, source), - ) - }) - .collect() +#[test] +fn test_query_max_start_depth_more() { + struct Row { + depth: u32, + matches: &'static [(usize, &'static [(&'static str, &'static str)])], + } + + let source = indoc! {" + { + { } + { + { } + } + } + "}; + + #[rustfmt::skip] + let rows = &[ + Row { + depth: 0, + matches: &[ + (0, &[("capture", "{\n { }\n {\n { }\n }\n}")]) + ] + }, + Row { + depth: 1, + matches: &[ + (0, &[("capture", "{\n { }\n {\n { }\n }\n}")]), + (0, &[("capture", "{ }")]), + (0, &[("capture", "{\n { }\n }")]) + ] + }, + Row { + depth: 2, + matches: &[ + (0, &[("capture", "{\n { }\n {\n { }\n }\n}")]), + (0, &[("capture", "{ }")]), + (0, &[("capture", "{\n { }\n }")]), + (0, &[("capture", "{ }")]), + ] + }, + ]; + + allocations::record(|| { + let language = get_language("c"); + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + let mut cursor = QueryCursor::new(); + let query = Query::new(language, "(compound_statement) @capture").unwrap(); + + let mut matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); + let node = matches.next().unwrap().captures[0].node; + assert_eq!(node.kind(), "compound_statement"); + + for row in rows.iter() { + eprintln!(" depth: {}", row.depth); + + cursor.set_max_start_depth(Some(row.depth)); + + let matches = cursor.matches(&query, node, source.as_bytes()); + let expected = row + .matches + .iter() + .map(|x| (x.0, x.1.to_vec())) + .collect::>(); + + assert_eq!(collect_matches(matches, &query, source), expected); + } + }); } -fn collect_captures<'a>( - captures: impl Iterator, usize)>, - query: &'a Query, - source: &'a str, -) -> Vec<(&'a str, &'a str)> { - format_captures(captures.map(|(m, i)| m.captures[i]), query, source) -} +#[test] +fn test_grammar_with_aliased_literal_query() { + // module.exports = grammar({ + // name: 'test', + // + // rules: { + // source: $ => repeat(choice($.compound_statement, $.expansion)), + // + // compound_statement: $ => seq(alias(token(prec(-1, '}')), '}')), + // + // expansion: $ => seq('}'), + // }, + // }); + let (parser_name, parser_code) = generate_parser_for_grammar( + r#" + { + "name": "test", + "rules": { + "source": { + "type": "REPEAT", + "content": { + "type": "CHOICE", + "members": [ + { + "type": "SYMBOL", + "name": "compound_statement" + }, + { + "type": "SYMBOL", + "name": "expansion" + } + ] + } + }, + "compound_statement": { + "type": "SEQ", + "members": [ + { + "type": "ALIAS", + "content": { + "type": "TOKEN", + "content": { + "type": "PREC", + "value": -1, + "content": { + "type": "STRING", + "value": "}" + } + } + }, + "named": false, + "value": "}" + } + ] + }, + "expansion": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "}" + } + ] + } + } + } + "#, + ) + .unwrap(); -fn format_captures<'a>( - captures: impl Iterator>, - query: &'a Query, - source: &'a str, -) -> Vec<(&'a str, &'a str)> { - captures - .map(|capture| { - ( - query.capture_names()[capture.index as usize].as_str(), - capture.node.utf8_text(source.as_bytes()).unwrap(), - ) - }) - .collect() + let language = get_test_language(&parser_name, &parser_code, None); + + let query = Query::new( + language, + r#" + (compound_statement "}" @bracket1) + (expansion "}" @bracket2) + "#, + ); + + assert!(query.is_ok()); } diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs index 07e5d1de..20392749 100644 --- a/cli/src/tests/tags_test.rs +++ b/cli/src/tests/tags_test.rs @@ -9,7 +9,7 @@ use std::{ use tree_sitter::Point; use tree_sitter_tags::{c_lib as c, Error, TagsConfiguration, TagsContext}; -const PYTHON_TAG_QUERY: &'static str = r#" +const PYTHON_TAG_QUERY: &str = r#" ( (function_definition name: (identifier) @name @@ -39,7 +39,7 @@ const PYTHON_TAG_QUERY: &'static str = r#" attribute: (identifier) @name)) @reference.call "#; -const JS_TAG_QUERY: &'static str = r#" +const JS_TAG_QUERY: &str = r#" ( (comment)* @doc . (class_declaration @@ -68,7 +68,7 @@ const JS_TAG_QUERY: &'static str = r#" function: (identifier) @name) @reference.call "#; -const RUBY_TAG_QUERY: &'static str = r#" +const RUBY_TAG_QUERY: &str = r#" (method name: (_) @name) @definition.method @@ -359,25 +359,29 @@ fn test_tags_via_c_api() { ); let c_scope_name = CString::new(scope_name).unwrap(); - let result = c::ts_tagger_add_language( - tagger, - c_scope_name.as_ptr(), - language, - JS_TAG_QUERY.as_ptr(), - ptr::null(), - JS_TAG_QUERY.len() as u32, - 0, - ); + let result = unsafe { + c::ts_tagger_add_language( + tagger, + c_scope_name.as_ptr(), + language, + JS_TAG_QUERY.as_ptr(), + ptr::null(), + JS_TAG_QUERY.len() as u32, + 0, + ) + }; assert_eq!(result, c::TSTagsError::Ok); - let result = c::ts_tagger_tag( - tagger, - c_scope_name.as_ptr(), - source_code.as_ptr(), - source_code.len() as u32, - buffer, - ptr::null(), - ); + let result = unsafe { + c::ts_tagger_tag( + tagger, + c_scope_name.as_ptr(), + source_code.as_ptr(), + source_code.len() as u32, + buffer, + ptr::null(), + ) + }; assert_eq!(result, c::TSTagsError::Ok); let tags = unsafe { slice::from_raw_parts( @@ -419,8 +423,10 @@ fn test_tags_via_c_api() { ] ); - c::ts_tags_buffer_delete(buffer); - c::ts_tagger_delete(tagger); + unsafe { + c::ts_tags_buffer_delete(buffer); + c::ts_tagger_delete(tagger); + } }); } diff --git a/cli/src/tests/test_highlight_test.rs b/cli/src/tests/test_highlight_test.rs index af2c15c5..06ad7d59 100644 --- a/cli/src/tests/test_highlight_test.rs +++ b/cli/src/tests/test_highlight_test.rs @@ -12,7 +12,7 @@ fn test_highlight_test_with_basic_test() { Some("injections.scm"), &[ "function".to_string(), - "variable.parameter".to_string(), + "variable".to_string(), "keyword".to_string(), ], ); @@ -22,7 +22,8 @@ fn test_highlight_test_with_basic_test() { " // ^ function", " // ^ keyword", " return d + e;", - " // ^ variable.parameter", + " // ^ variable", + " // ^ !variable", "};", ] .join("\n"); @@ -32,18 +33,10 @@ fn test_highlight_test_with_basic_test() { assert_eq!( assertions, &[ - Assertion { - position: Point::new(1, 5), - expected_capture_name: "function".to_string() - }, - Assertion { - position: Point::new(1, 11), - expected_capture_name: "keyword".to_string() - }, - Assertion { - position: Point::new(4, 9), - expected_capture_name: "variable.parameter".to_string() - }, + Assertion::new(1, 5, false, String::from("function")), + Assertion::new(1, 11, false, String::from("keyword")), + Assertion::new(4, 9, false, String::from("variable")), + Assertion::new(4, 11, true, String::from("variable")), ] ); @@ -60,6 +53,7 @@ fn test_highlight_test_with_basic_test() { (Point::new(1, 19), Point::new(1, 20), Highlight(1)), // "d" (Point::new(4, 2), Point::new(4, 8), Highlight(2)), // "return" (Point::new(4, 9), Point::new(4, 10), Highlight(1)), // "d" + (Point::new(4, 13), Point::new(4, 14), Highlight(1)), // "e" ] ); } diff --git a/cli/src/tests/test_tags_test.rs b/cli/src/tests/test_tags_test.rs index 61f98abd..e682434e 100644 --- a/cli/src/tests/test_tags_test.rs +++ b/cli/src/tests/test_tags_test.rs @@ -16,6 +16,7 @@ fn test_tags_test_with_basic_test() { " # ^ reference.call", " return d(e)", " # ^ reference.call", + " # ^ !variable.parameter", "", ] .join("\n"); @@ -26,18 +27,10 @@ fn test_tags_test_with_basic_test() { assert_eq!( assertions, &[ - Assertion { - position: Point::new(1, 4), - expected_capture_name: "definition.function".to_string(), - }, - Assertion { - position: Point::new(3, 9), - expected_capture_name: "reference.call".to_string(), - }, - Assertion { - position: Point::new(5, 11), - expected_capture_name: "reference.call".to_string(), - }, + Assertion::new(1, 4, false, String::from("definition.function")), + Assertion::new(3, 9, false, String::from("reference.call")), + Assertion::new(5, 11, false, String::from("reference.call")), + Assertion::new(5, 13, true, String::from("variable.parameter")), ] ); diff --git a/cli/src/tests/text_provider_test.rs b/cli/src/tests/text_provider_test.rs new file mode 100644 index 00000000..cb0b38f6 --- /dev/null +++ b/cli/src/tests/text_provider_test.rs @@ -0,0 +1,173 @@ +use std::{iter, sync::Arc}; + +use crate::tests::helpers::fixtures::get_language; +use tree_sitter::{Language, Node, Parser, Point, Query, QueryCursor, TextProvider, Tree}; + +fn parse_text(text: impl AsRef<[u8]>) -> (Tree, Language) { + let language = get_language("c"); + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + (parser.parse(text, None).unwrap(), language) +} + +fn parse_text_with(callback: &mut F) -> (Tree, Language) +where + T: AsRef<[u8]>, + F: FnMut(usize, Point) -> T, +{ + let language = get_language("c"); + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse_with(callback, None).unwrap(); + // eprintln!("{}", tree.clone().root_node().to_sexp()); + assert_eq!("comment", tree.clone().root_node().child(0).unwrap().kind()); + (tree, language) +} + +fn tree_query>(tree: &Tree, text: impl TextProvider, language: Language) { + let query = Query::new(language, "((comment) @c (#eq? @c \"// comment\"))").unwrap(); + let mut cursor = QueryCursor::new(); + let mut captures = cursor.captures(&query, tree.root_node(), text); + let (match_, idx) = captures.next().unwrap(); + let capture = match_.captures[idx]; + assert_eq!(capture.index as usize, idx); + assert_eq!("comment", capture.node.kind()); +} + +fn check_parsing>( + parser_text: impl AsRef<[u8]>, + text_provider: impl TextProvider, +) { + let (tree, language) = parse_text(parser_text); + tree_query(&tree, text_provider, language); +} + +fn check_parsing_callback>( + parser_callback: &mut F, + text_provider: impl TextProvider, +) where + T: AsRef<[u8]>, + F: FnMut(usize, Point) -> T, +{ + let (tree, language) = parse_text_with(parser_callback); + tree_query(&tree, text_provider, language); +} + +#[test] +fn test_text_provider_for_str_slice() { + let text: &str = "// comment"; + + check_parsing(text, text.as_bytes()); + check_parsing(text.as_bytes(), text.as_bytes()); +} + +#[test] +fn test_text_provider_for_string() { + let text: String = "// comment".to_owned(); + + check_parsing(text.clone(), text.as_bytes()); + check_parsing(text.as_bytes(), text.as_bytes()); + check_parsing(<_ as AsRef<[u8]>>::as_ref(&text), text.as_bytes()); +} + +#[test] +fn test_text_provider_for_box_of_str_slice() { + let text: Box = "// comment".to_owned().into_boxed_str(); + + check_parsing(text.as_bytes(), text.as_bytes()); + check_parsing(<_ as AsRef>::as_ref(&text), text.as_bytes()); + check_parsing(text.as_ref(), text.as_ref().as_bytes()); + check_parsing(text.as_ref(), text.as_bytes()); +} + +#[test] +fn test_text_provider_for_box_of_bytes_slice() { + let text: Box<[u8]> = "// comment".to_owned().into_boxed_str().into_boxed_bytes(); + + check_parsing(text.as_ref(), text.as_ref()); + check_parsing(text.as_ref(), &*text); + check_parsing(&*text, &*text); +} + +#[test] +fn test_text_provider_for_vec_of_bytes() { + let text: Vec = "// comment".to_owned().into_bytes(); + + check_parsing(&*text, &*text); +} + +#[test] +fn test_text_provider_for_arc_of_bytes_slice() { + let text: Vec = "// comment".to_owned().into_bytes(); + let text: Arc<[u8]> = Arc::from(text); + + check_parsing(&*text, &*text); + check_parsing(text.as_ref(), text.as_ref()); + check_parsing(text.clone(), text.as_ref()); +} + +#[test] +fn test_text_provider_callback_with_str_slice() { + let text: &str = "// comment"; + + check_parsing(text, |_node: Node<'_>| iter::once(text)); + check_parsing_callback( + &mut |offset, _point| { + (offset < text.len()) + .then(|| text.as_bytes()) + .unwrap_or_default() + }, + |_node: Node<'_>| iter::once(text), + ); +} + +#[test] +fn test_text_provider_callback_with_owned_string_slice() { + let text: &str = "// comment"; + + check_parsing_callback( + &mut |offset, _point| { + (offset < text.len()) + .then(|| text.as_bytes()) + .unwrap_or_default() + }, + |_node: Node<'_>| { + let slice: String = text.to_owned(); + iter::once(slice) + }, + ); +} + +#[test] +fn test_text_provider_callback_with_owned_bytes_vec_slice() { + let text: &str = "// comment"; + + check_parsing_callback( + &mut |offset, _point| { + (offset < text.len()) + .then(|| text.as_bytes()) + .unwrap_or_default() + }, + |_node: Node<'_>| { + let slice: Vec = text.to_owned().into_bytes(); + iter::once(slice) + }, + ); +} + +#[test] +fn test_text_provider_callback_with_owned_arc_of_bytes_slice() { + let text: &str = "// comment"; + + check_parsing_callback( + &mut |offset, _point| { + (offset < text.len()) + .then(|| text.as_bytes()) + .unwrap_or_default() + }, + |_node: Node<'_>| { + let slice: Arc<[u8]> = text.to_owned().into_bytes().into(); + iter::once(slice) + }, + ); +} diff --git a/cli/src/tests/tree_test.rs b/cli/src/tests/tree_test.rs index be0c4ff1..f3792138 100644 --- a/cli/src/tests/tree_test.rs +++ b/cli/src/tests/tree_test.rs @@ -306,7 +306,7 @@ fn test_tree_cursor() { .parse( " struct Stuff { - a: A; + a: A, b: Option, } ", @@ -331,6 +331,88 @@ fn test_tree_cursor() { assert!(cursor.goto_next_sibling()); assert_eq!(cursor.node().kind(), "field_declaration_list"); assert_eq!(cursor.node().is_named(), true); + + assert!(cursor.goto_last_child()); + assert_eq!(cursor.node().kind(), "}"); + assert_eq!(cursor.node().is_named(), false); + assert_eq!(cursor.node().start_position(), Point { row: 4, column: 16 }); + + assert!(cursor.goto_previous_sibling()); + assert_eq!(cursor.node().kind(), ","); + assert_eq!(cursor.node().is_named(), false); + assert_eq!(cursor.node().start_position(), Point { row: 3, column: 32 }); + + assert!(cursor.goto_previous_sibling()); + assert_eq!(cursor.node().kind(), "field_declaration"); + assert_eq!(cursor.node().is_named(), true); + assert_eq!(cursor.node().start_position(), Point { row: 3, column: 20 }); + + assert!(cursor.goto_previous_sibling()); + assert_eq!(cursor.node().kind(), ","); + assert_eq!(cursor.node().is_named(), false); + assert_eq!(cursor.node().start_position(), Point { row: 2, column: 24 }); + + assert!(cursor.goto_previous_sibling()); + assert_eq!(cursor.node().kind(), "field_declaration"); + assert_eq!(cursor.node().is_named(), true); + assert_eq!(cursor.node().start_position(), Point { row: 2, column: 20 }); + + assert!(cursor.goto_previous_sibling()); + assert_eq!(cursor.node().kind(), "{"); + assert_eq!(cursor.node().is_named(), false); + assert_eq!(cursor.node().start_position(), Point { row: 1, column: 29 }); + + let mut copy = tree.walk(); + copy.reset_to(cursor); + + assert_eq!(copy.node().kind(), "{"); + assert_eq!(copy.node().is_named(), false); + + assert!(copy.goto_parent()); + assert_eq!(copy.node().kind(), "field_declaration_list"); + assert_eq!(copy.node().is_named(), true); + + assert!(copy.goto_parent()); + assert_eq!(copy.node().kind(), "struct_item"); +} + +#[test] +fn test_tree_cursor_previous_sibling() { + let mut parser = Parser::new(); + parser.set_language(get_language("rust")).unwrap(); + + let text = " + // Hi there + // This is fun! + // Another one! +"; + let tree = parser.parse(text, None).unwrap(); + + let mut cursor = tree.walk(); + assert_eq!(cursor.node().kind(), "source_file"); + + assert!(cursor.goto_last_child()); + assert_eq!(cursor.node().kind(), "line_comment"); + assert_eq!( + cursor.node().utf8_text(text.as_bytes()).unwrap(), + "// Another one!" + ); + + assert!(cursor.goto_previous_sibling()); + assert_eq!(cursor.node().kind(), "line_comment"); + assert_eq!( + cursor.node().utf8_text(text.as_bytes()).unwrap(), + "// This is fun!" + ); + + assert!(cursor.goto_previous_sibling()); + assert_eq!(cursor.node().kind(), "line_comment"); + assert_eq!( + cursor.node().utf8_text(text.as_bytes()).unwrap(), + "// Hi there" + ); + + assert!(!cursor.goto_previous_sibling()); } #[test] @@ -620,7 +702,7 @@ fn get_changed_ranges( source_code: &mut Vec, edit: Edit, ) -> Vec { - perform_edit(tree, source_code, &edit); + perform_edit(tree, source_code, &edit).unwrap(); let new_tree = parser.parse(&source_code, Some(tree)).unwrap(); let result = tree.changed_ranges(&new_tree).collect(); *tree = new_tree; diff --git a/cli/src/util.rs b/cli/src/util.rs index acd8acbf..0793e525 100644 --- a/cli/src/util.rs +++ b/cli/src/util.rs @@ -1,9 +1,7 @@ use anyhow::Result; -use std::io; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; -use std::thread; -use tree_sitter::Parser; +use tree_sitter::{Parser, Tree}; #[cfg(unix)] use anyhow::{anyhow, Context}; @@ -13,55 +11,86 @@ use std::path::PathBuf; use std::process::{Child, ChildStdin, Command, Stdio}; #[cfg(unix)] -const HTML_HEADER: &[u8] = b"\n\n\n"; +const HTML_HEADER: &[u8] = b" + -pub fn cancel_on_stdin() -> Arc { + + +"; + +pub fn cancel_on_signal() -> Arc { let result = Arc::new(AtomicUsize::new(0)); - if atty::is(atty::Stream::Stdin) { - thread::spawn({ - let flag = result.clone(); - move || { - let mut line = String::new(); - io::stdin().read_line(&mut line).unwrap(); - flag.store(1, Ordering::Relaxed); - } - }); - } + ctrlc::set_handler({ + let flag = result.clone(); + move || { + flag.store(1, Ordering::Relaxed); + } + }) + .expect("Error setting Ctrl-C handler"); result } + #[cfg(windows)] -pub struct LogSession(); +pub struct LogSession; #[cfg(unix)] -pub struct LogSession(PathBuf, Option, Option); +pub struct LogSession { + path: PathBuf, + dot_process: Option, + dot_process_stdin: Option, +} + +#[cfg(windows)] +pub fn print_tree_graph(_tree: &Tree, _path: &str) -> Result<()> { + Ok(()) +} #[cfg(windows)] pub fn log_graphs(_parser: &mut Parser, _path: &str) -> Result { - Ok(LogSession()) + Ok(LogSession) +} + +#[cfg(unix)] +pub fn print_tree_graph(tree: &Tree, path: &str) -> Result<()> { + let session = LogSession::new(path)?; + tree.print_dot_graph(session.dot_process_stdin.as_ref().unwrap()); + Ok(()) } #[cfg(unix)] pub fn log_graphs(parser: &mut Parser, path: &str) -> Result { - use std::io::Write; + let session = LogSession::new(path)?; + parser.print_dot_graphs(session.dot_process_stdin.as_ref().unwrap()); + Ok(session) +} - let mut dot_file = std::fs::File::create(path)?; - dot_file.write(HTML_HEADER)?; - let mut dot_process = Command::new("dot") - .arg("-Tsvg") - .stdin(Stdio::piped()) - .stdout(dot_file) - .spawn() - .with_context(|| "Failed to run the `dot` command. Check that graphviz is installed.")?; - let dot_stdin = dot_process - .stdin - .take() - .ok_or_else(|| anyhow!("Failed to open stdin for `dot` process."))?; - parser.print_dot_graphs(&dot_stdin); - Ok(LogSession( - PathBuf::from(path), - Some(dot_process), - Some(dot_stdin), - )) +#[cfg(unix)] +impl LogSession { + fn new(path: &str) -> Result { + use std::io::Write; + + let mut dot_file = std::fs::File::create(path)?; + dot_file.write(HTML_HEADER)?; + let mut dot_process = Command::new("dot") + .arg("-Tsvg") + .stdin(Stdio::piped()) + .stdout(dot_file) + .spawn() + .with_context(|| { + "Failed to run the `dot` command. Check that graphviz is installed." + })?; + let dot_stdin = dot_process + .stdin + .take() + .ok_or_else(|| anyhow!("Failed to open stdin for `dot` process."))?; + Ok(Self { + path: PathBuf::from(path), + dot_process: Some(dot_process), + dot_process_stdin: Some(dot_stdin), + }) + } } #[cfg(unix)] @@ -69,13 +98,13 @@ impl Drop for LogSession { fn drop(&mut self) { use std::fs; - drop(self.2.take().unwrap()); - let output = self.1.take().unwrap().wait_with_output().unwrap(); + drop(self.dot_process_stdin.take().unwrap()); + let output = self.dot_process.take().unwrap().wait_with_output().unwrap(); if output.status.success() { if cfg!(target_os = "macos") - && fs::metadata(&self.0).unwrap().len() > HTML_HEADER.len() as u64 + && fs::metadata(&self.path).unwrap().len() > HTML_HEADER.len() as u64 { - Command::new("open").arg(&self.0).output().unwrap(); + Command::new("open").arg(&self.path).output().unwrap(); } } else { eprintln!( diff --git a/cli/src/wasm.rs b/cli/src/wasm.rs index d63ec922..794ba353 100644 --- a/cli/src/wasm.rs +++ b/cli/src/wasm.rs @@ -1,5 +1,6 @@ use super::generate::parse_grammar::GrammarJSON; use anyhow::{anyhow, Context, Result}; +use path_slash::PathExt as _; use std::{ ffi::{OsStr, OsString}, fs, @@ -60,7 +61,7 @@ pub fn compile_language_to_wasm( volume_string = OsString::from(parent); volume_string.push(":/src:Z"); command.arg("--workdir"); - command.arg(&Path::new("/src").join(filename)); + command.arg(Path::new("/src").join(filename).to_slash_lossy().as_ref()); } else { volume_string = OsString::from(language_dir); volume_string.push(":/src:Z"); @@ -84,6 +85,11 @@ pub fn compile_language_to_wasm( // Run `emcc` in a container using the `emscripten-slim` image command.args(&[EMSCRIPTEN_TAG, "emcc"]); } else { + if force_docker { + return Err(anyhow!( + "You must have docker on your PATH to run this command with --docker" + )); + } return Err(anyhow!( "You must have either emcc or docker on your PATH to run this command" )); @@ -116,14 +122,18 @@ pub fn compile_language_to_wasm( let scanner_cpp_path = src.join("scanner.cpp"); if language_dir.join(&scanner_cc_path).exists() { - command.arg("-xc++").arg(&scanner_cc_path); + command + .arg("-xc++") + .arg(scanner_cc_path.to_slash_lossy().as_ref()); } else if language_dir.join(&scanner_cpp_path).exists() { - command.arg("-xc++").arg(&scanner_cpp_path); + command + .arg("-xc++") + .arg(scanner_cpp_path.to_slash_lossy().as_ref()); } else if language_dir.join(&scanner_c_path).exists() { - command.arg(&scanner_c_path); + command.arg(scanner_c_path.to_slash_lossy().as_ref()); } - command.arg(&parser_c_path); + command.arg(parser_c_path.to_slash_lossy().as_ref()); let output = command .output() diff --git a/docs/Gemfile b/docs/Gemfile index 91ceacd3..ee114290 100644 --- a/docs/Gemfile +++ b/docs/Gemfile @@ -1,2 +1,3 @@ source 'https://rubygems.org' -gem 'github-pages', group: :jekyll_plugins \ No newline at end of file +gem 'github-pages', group: :jekyll_plugins +gem "webrick" diff --git a/docs/Gemfile.lock b/docs/Gemfile.lock index 44aff756..ce19edac 100644 --- a/docs/Gemfile.lock +++ b/docs/Gemfile.lock @@ -1,258 +1,260 @@ GEM remote: https://rubygems.org/ specs: - activesupport (4.2.9) - i18n (~> 0.7) - minitest (~> 5.1) - thread_safe (~> 0.3, >= 0.3.4) - tzinfo (~> 1.1) - addressable (2.8.0) - public_suffix (>= 2.0.2, < 5.0) + activesupport (7.0.4.3) + concurrent-ruby (~> 1.0, >= 1.0.2) + i18n (>= 1.6, < 2) + minitest (>= 5.1) + tzinfo (~> 2.0) + addressable (2.8.1) + public_suffix (>= 2.0.2, < 6.0) coffee-script (2.4.1) coffee-script-source execjs coffee-script-source (1.11.1) colorator (1.1.0) - commonmarker (0.17.8) - ruby-enum (~> 0.5) - concurrent-ruby (1.0.5) - ethon (0.14.0) + commonmarker (0.23.10) + concurrent-ruby (1.2.2) + dnsruby (1.61.9) + simpleidn (~> 0.1) + em-websocket (0.5.3) + eventmachine (>= 0.12.9) + http_parser.rb (~> 0) + ethon (0.16.0) ffi (>= 1.15.0) - execjs (2.7.0) - faraday (1.5.1) - faraday-em_http (~> 1.0) - faraday-em_synchrony (~> 1.0) - faraday-excon (~> 1.1) - faraday-httpclient (~> 1.0.1) - faraday-net_http (~> 1.0) - faraday-net_http_persistent (~> 1.1) - faraday-patron (~> 1.0) - multipart-post (>= 1.2, < 3) + eventmachine (1.2.7) + execjs (2.8.1) + faraday (2.7.4) + faraday-net_http (>= 2.0, < 3.1) ruby2_keywords (>= 0.0.4) - faraday-em_http (1.0.0) - faraday-em_synchrony (1.0.0) - faraday-excon (1.1.0) - faraday-httpclient (1.0.1) - faraday-net_http (1.0.1) - faraday-net_http_persistent (1.2.0) - faraday-patron (1.0.0) - ffi (1.15.3) + faraday-net_http (3.0.2) + ffi (1.15.5) forwardable-extended (2.6.0) - gemoji (3.0.0) - github-pages (177) - activesupport (= 4.2.9) - github-pages-health-check (= 1.3.5) - jekyll (= 3.6.2) - jekyll-avatar (= 0.5.0) - jekyll-coffeescript (= 1.0.2) - jekyll-commonmark-ghpages (= 0.1.5) + gemoji (3.0.1) + github-pages (228) + github-pages-health-check (= 1.17.9) + jekyll (= 3.9.3) + jekyll-avatar (= 0.7.0) + jekyll-coffeescript (= 1.1.1) + jekyll-commonmark-ghpages (= 0.4.0) jekyll-default-layout (= 0.1.4) - jekyll-feed (= 0.9.2) - jekyll-gist (= 1.4.1) - jekyll-github-metadata (= 2.9.3) - jekyll-mentions (= 1.2.0) - jekyll-optional-front-matter (= 0.3.0) + jekyll-feed (= 0.15.1) + jekyll-gist (= 1.5.0) + jekyll-github-metadata (= 2.13.0) + jekyll-include-cache (= 0.2.1) + jekyll-mentions (= 1.6.0) + jekyll-optional-front-matter (= 0.3.2) jekyll-paginate (= 1.1.0) - jekyll-readme-index (= 0.2.0) - jekyll-redirect-from (= 0.12.1) - jekyll-relative-links (= 0.5.2) - jekyll-remote-theme (= 0.2.3) - jekyll-sass-converter (= 1.5.0) - jekyll-seo-tag (= 2.3.0) - jekyll-sitemap (= 1.1.1) - jekyll-swiss (= 0.4.0) - jekyll-theme-architect (= 0.1.0) - jekyll-theme-cayman (= 0.1.0) - jekyll-theme-dinky (= 0.1.0) - jekyll-theme-hacker (= 0.1.0) - jekyll-theme-leap-day (= 0.1.0) - jekyll-theme-merlot (= 0.1.0) - jekyll-theme-midnight (= 0.1.0) - jekyll-theme-minimal (= 0.1.0) - jekyll-theme-modernist (= 0.1.0) - jekyll-theme-primer (= 0.5.2) - jekyll-theme-slate (= 0.1.0) - jekyll-theme-tactile (= 0.1.0) - jekyll-theme-time-machine (= 0.1.0) - jekyll-titles-from-headings (= 0.5.0) - jemoji (= 0.8.1) - kramdown (= 1.16.2) - liquid (= 4.0.0) - listen (= 3.0.6) + jekyll-readme-index (= 0.3.0) + jekyll-redirect-from (= 0.16.0) + jekyll-relative-links (= 0.6.1) + jekyll-remote-theme (= 0.4.3) + jekyll-sass-converter (= 1.5.2) + jekyll-seo-tag (= 2.8.0) + jekyll-sitemap (= 1.4.0) + jekyll-swiss (= 1.0.0) + jekyll-theme-architect (= 0.2.0) + jekyll-theme-cayman (= 0.2.0) + jekyll-theme-dinky (= 0.2.0) + jekyll-theme-hacker (= 0.2.0) + jekyll-theme-leap-day (= 0.2.0) + jekyll-theme-merlot (= 0.2.0) + jekyll-theme-midnight (= 0.2.0) + jekyll-theme-minimal (= 0.2.0) + jekyll-theme-modernist (= 0.2.0) + jekyll-theme-primer (= 0.6.0) + jekyll-theme-slate (= 0.2.0) + jekyll-theme-tactile (= 0.2.0) + jekyll-theme-time-machine (= 0.2.0) + jekyll-titles-from-headings (= 0.5.3) + jemoji (= 0.12.0) + kramdown (= 2.3.2) + kramdown-parser-gfm (= 1.1.0) + liquid (= 4.0.4) mercenary (~> 0.3) - minima (= 2.1.1) - nokogiri (>= 1.8.1, < 2.0) - rouge (= 2.2.1) + minima (= 2.5.1) + nokogiri (>= 1.13.6, < 2.0) + rouge (= 3.26.0) terminal-table (~> 1.4) - github-pages-health-check (1.3.5) + github-pages-health-check (1.17.9) addressable (~> 2.3) - net-dns (~> 0.8) + dnsruby (~> 1.60) octokit (~> 4.0) - public_suffix (~> 2.0) - typhoeus (~> 0.7) - html-pipeline (2.7.1) + public_suffix (>= 3.0, < 5.0) + typhoeus (~> 1.3) + html-pipeline (2.14.3) activesupport (>= 2) nokogiri (>= 1.4) - i18n (0.9.5) + http_parser.rb (0.8.0) + i18n (1.12.0) concurrent-ruby (~> 1.0) - jekyll (3.6.2) + jekyll (3.9.3) addressable (~> 2.4) colorator (~> 1.0) + em-websocket (~> 0.5) + i18n (>= 0.7, < 2) jekyll-sass-converter (~> 1.0) - jekyll-watch (~> 1.1) - kramdown (~> 1.14) + jekyll-watch (~> 2.0) + kramdown (>= 1.17, < 3) liquid (~> 4.0) mercenary (~> 0.3.3) pathutil (~> 0.9) - rouge (>= 1.7, < 3) + rouge (>= 1.7, < 4) safe_yaml (~> 1.0) - jekyll-avatar (0.5.0) - jekyll (~> 3.0) - jekyll-coffeescript (1.0.2) + jekyll-avatar (0.7.0) + jekyll (>= 3.0, < 5.0) + jekyll-coffeescript (1.1.1) coffee-script (~> 2.2) coffee-script-source (~> 1.11.1) - jekyll-commonmark (1.1.0) - commonmarker (~> 0.14) - jekyll (>= 3.0, < 4.0) - jekyll-commonmark-ghpages (0.1.5) - commonmarker (~> 0.17.6) - jekyll-commonmark (~> 1) - rouge (~> 2) + jekyll-commonmark (1.4.0) + commonmarker (~> 0.22) + jekyll-commonmark-ghpages (0.4.0) + commonmarker (~> 0.23.7) + jekyll (~> 3.9.0) + jekyll-commonmark (~> 1.4.0) + rouge (>= 2.0, < 5.0) jekyll-default-layout (0.1.4) jekyll (~> 3.0) - jekyll-feed (0.9.2) - jekyll (~> 3.3) - jekyll-gist (1.4.1) + jekyll-feed (0.15.1) + jekyll (>= 3.7, < 5.0) + jekyll-gist (1.5.0) octokit (~> 4.2) - jekyll-github-metadata (2.9.3) - jekyll (~> 3.1) + jekyll-github-metadata (2.13.0) + jekyll (>= 3.4, < 5.0) octokit (~> 4.0, != 4.4.0) - jekyll-mentions (1.2.0) - activesupport (~> 4.0) + jekyll-include-cache (0.2.1) + jekyll (>= 3.7, < 5.0) + jekyll-mentions (1.6.0) html-pipeline (~> 2.3) - jekyll (~> 3.0) - jekyll-optional-front-matter (0.3.0) - jekyll (~> 3.0) + jekyll (>= 3.7, < 5.0) + jekyll-optional-front-matter (0.3.2) + jekyll (>= 3.0, < 5.0) jekyll-paginate (1.1.0) - jekyll-readme-index (0.2.0) - jekyll (~> 3.0) - jekyll-redirect-from (0.12.1) - jekyll (~> 3.3) - jekyll-relative-links (0.5.2) - jekyll (~> 3.3) - jekyll-remote-theme (0.2.3) - jekyll (~> 3.5) - rubyzip (>= 1.2.1, < 3.0) - typhoeus (>= 0.7, < 2.0) - jekyll-sass-converter (1.5.0) + jekyll-readme-index (0.3.0) + jekyll (>= 3.0, < 5.0) + jekyll-redirect-from (0.16.0) + jekyll (>= 3.3, < 5.0) + jekyll-relative-links (0.6.1) + jekyll (>= 3.3, < 5.0) + jekyll-remote-theme (0.4.3) + addressable (~> 2.0) + jekyll (>= 3.5, < 5.0) + jekyll-sass-converter (>= 1.0, <= 3.0.0, != 2.0.0) + rubyzip (>= 1.3.0, < 3.0) + jekyll-sass-converter (1.5.2) sass (~> 3.4) - jekyll-seo-tag (2.3.0) - jekyll (~> 3.3) - jekyll-sitemap (1.1.1) - jekyll (~> 3.3) - jekyll-swiss (0.4.0) - jekyll-theme-architect (0.1.0) - jekyll (~> 3.5) + jekyll-seo-tag (2.8.0) + jekyll (>= 3.8, < 5.0) + jekyll-sitemap (1.4.0) + jekyll (>= 3.7, < 5.0) + jekyll-swiss (1.0.0) + jekyll-theme-architect (0.2.0) + jekyll (> 3.5, < 5.0) jekyll-seo-tag (~> 2.0) - jekyll-theme-cayman (0.1.0) - jekyll (~> 3.5) + jekyll-theme-cayman (0.2.0) + jekyll (> 3.5, < 5.0) jekyll-seo-tag (~> 2.0) - jekyll-theme-dinky (0.1.0) - jekyll (~> 3.5) + jekyll-theme-dinky (0.2.0) + jekyll (> 3.5, < 5.0) jekyll-seo-tag (~> 2.0) - jekyll-theme-hacker (0.1.0) - jekyll (~> 3.5) + jekyll-theme-hacker (0.2.0) + jekyll (> 3.5, < 5.0) jekyll-seo-tag (~> 2.0) - jekyll-theme-leap-day (0.1.0) - jekyll (~> 3.5) + jekyll-theme-leap-day (0.2.0) + jekyll (> 3.5, < 5.0) jekyll-seo-tag (~> 2.0) - jekyll-theme-merlot (0.1.0) - jekyll (~> 3.5) + jekyll-theme-merlot (0.2.0) + jekyll (> 3.5, < 5.0) jekyll-seo-tag (~> 2.0) - jekyll-theme-midnight (0.1.0) - jekyll (~> 3.5) + jekyll-theme-midnight (0.2.0) + jekyll (> 3.5, < 5.0) jekyll-seo-tag (~> 2.0) - jekyll-theme-minimal (0.1.0) - jekyll (~> 3.5) + jekyll-theme-minimal (0.2.0) + jekyll (> 3.5, < 5.0) jekyll-seo-tag (~> 2.0) - jekyll-theme-modernist (0.1.0) - jekyll (~> 3.5) + jekyll-theme-modernist (0.2.0) + jekyll (> 3.5, < 5.0) jekyll-seo-tag (~> 2.0) - jekyll-theme-primer (0.5.2) - jekyll (~> 3.5) + jekyll-theme-primer (0.6.0) + jekyll (> 3.5, < 5.0) jekyll-github-metadata (~> 2.9) - jekyll-seo-tag (~> 2.2) - jekyll-theme-slate (0.1.0) - jekyll (~> 3.5) jekyll-seo-tag (~> 2.0) - jekyll-theme-tactile (0.1.0) - jekyll (~> 3.5) + jekyll-theme-slate (0.2.0) + jekyll (> 3.5, < 5.0) jekyll-seo-tag (~> 2.0) - jekyll-theme-time-machine (0.1.0) - jekyll (~> 3.5) + jekyll-theme-tactile (0.2.0) + jekyll (> 3.5, < 5.0) jekyll-seo-tag (~> 2.0) - jekyll-titles-from-headings (0.5.0) - jekyll (~> 3.3) - jekyll-watch (1.5.1) + jekyll-theme-time-machine (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-titles-from-headings (0.5.3) + jekyll (>= 3.3, < 5.0) + jekyll-watch (2.2.1) listen (~> 3.0) - jemoji (0.8.1) - activesupport (~> 4.0, >= 4.2.9) + jemoji (0.12.0) gemoji (~> 3.0) html-pipeline (~> 2.2) - jekyll (>= 3.0) - kramdown (1.16.2) - liquid (4.0.0) - listen (3.0.6) - rb-fsevent (>= 0.9.3) - rb-inotify (>= 0.9.7) + jekyll (>= 3.0, < 5.0) + kramdown (2.3.2) + rexml + kramdown-parser-gfm (1.1.0) + kramdown (~> 2.0) + liquid (4.0.4) + listen (3.8.0) + rb-fsevent (~> 0.10, >= 0.10.3) + rb-inotify (~> 0.9, >= 0.9.10) mercenary (0.3.6) - mini_portile2 (2.8.0) - minima (2.1.1) - jekyll (~> 3.3) - minitest (5.11.3) - multipart-post (2.1.1) - net-dns (0.9.0) - nokogiri (1.13.3) - mini_portile2 (~> 2.8.0) + minima (2.5.1) + jekyll (>= 3.5, < 5.0) + jekyll-feed (~> 0.9) + jekyll-seo-tag (~> 2.1) + minitest (5.18.0) + nokogiri (1.14.3-x86_64-linux) racc (~> 1.4) - octokit (4.21.0) - faraday (>= 0.9) - sawyer (~> 0.8.0, >= 0.5.3) + octokit (4.25.1) + faraday (>= 1, < 3) + sawyer (~> 0.9) pathutil (0.16.2) forwardable-extended (~> 2.6) - public_suffix (2.0.5) - racc (1.6.0) - rb-fsevent (0.11.0) + public_suffix (4.0.7) + racc (1.6.2) + rb-fsevent (0.11.2) rb-inotify (0.10.1) ffi (~> 1.0) - rouge (2.2.1) - ruby-enum (0.7.2) - i18n - ruby2_keywords (0.0.4) - rubyzip (2.0.0) + rexml (3.2.5) + rouge (3.26.0) + ruby2_keywords (0.0.5) + rubyzip (2.3.2) safe_yaml (1.0.5) sass (3.7.4) sass-listen (~> 4.0.0) sass-listen (4.0.0) rb-fsevent (~> 0.9, >= 0.9.4) rb-inotify (~> 0.9, >= 0.9.7) - sawyer (0.8.2) + sawyer (0.9.2) addressable (>= 2.3.5) - faraday (> 0.8, < 2.0) + faraday (>= 0.17.3, < 3) + simpleidn (0.2.1) + unf (~> 0.1.4) terminal-table (1.8.0) unicode-display_width (~> 1.1, >= 1.1.1) - thread_safe (0.3.6) - typhoeus (0.8.0) - ethon (>= 0.8.0) - tzinfo (1.2.5) - thread_safe (~> 0.1) - unicode-display_width (1.3.0) + typhoeus (1.4.0) + ethon (>= 0.9.0) + tzinfo (2.0.6) + concurrent-ruby (~> 1.0) + unf (0.1.4) + unf_ext + unf_ext (0.0.8.2) + unicode-display_width (1.8.0) + webrick (1.8.1) PLATFORMS - ruby + x86_64-linux DEPENDENCIES github-pages + webrick BUNDLED WITH - 1.16.1 + 2.4.8 diff --git a/docs/index.md b/docs/index.md index 1c4f958a..86f83de7 100644 --- a/docs/index.md +++ b/docs/index.md @@ -9,95 +9,142 @@ Tree-sitter is a parser generator tool and an incremental parsing library. It ca * **General** enough to parse any programming language * **Fast** enough to parse on every keystroke in a text editor * **Robust** enough to provide useful results even in the presence of syntax errors -* **Dependency-free** so that the runtime library (which is written in pure C) can be embedded in any application +* **Dependency-free** so that the runtime library (which is written in pure [C](https://github.com/tree-sitter/tree-sitter/tree/master/lib)) can be embedded in any application ### Language Bindings There are currently bindings that allow Tree-sitter to be used from the following languages: +* [Go](https://github.com/smacker/go-tree-sitter) * [Haskell](https://github.com/tree-sitter/haskell-tree-sitter) +* [Java](https://github.com/serenadeai/java-tree-sitter) * [JavaScript (Node.js)](https://github.com/tree-sitter/node-tree-sitter) * [JavaScript (Wasm)](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web) +* [Kotlin](https://github.com/oxisto/kotlintree) * [Lua](https://github.com/euclidianAce/ltreesitter) * [OCaml](https://github.com/returntocorp/ocaml-tree-sitter-core) +* [Perl](https://metacpan.org/pod/Text::Treesitter) * [Python](https://github.com/tree-sitter/py-tree-sitter) * [Ruby](https://github.com/tree-sitter/ruby-tree-sitter) +* [Ruby](https://github.com/calicoday/ruby-tree-sitter-ffi) * [Rust](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust) * [Swift](https://github.com/ChimeHQ/SwiftTreeSitter) -* [Kotlin](https://github.com/oxisto/kotlintree) -* [Java](https://github.com/serenadeai/java-tree-sitter) -### Available Parsers - -Parsers for these languages are fairly complete: +### Parsers +* [Ada](https://github.com/briot/tree-sitter-ada) +* [Agda](https://github.com/tree-sitter/tree-sitter-agda) +* [Apex](https://github.com/aheber/tree-sitter-sfapex) * [Bash](https://github.com/tree-sitter/tree-sitter-bash) +* [Beancount](https://github.com/zwpaper/tree-sitter-beancount) +* [Cap'n Proto](https://github.com/amaanq/tree-sitter-capnp) * [C](https://github.com/tree-sitter/tree-sitter-c) -* [C#](https://github.com/tree-sitter/tree-sitter-c-sharp) * [C++](https://github.com/tree-sitter/tree-sitter-cpp) +* [C#](https://github.com/tree-sitter/tree-sitter-c-sharp) +* [Clojure](https://github.com/sogaiu/tree-sitter-clojure) +* [CMake](https://github.com/uyha/tree-sitter-cmake) +* [Comment](https://github.com/stsewd/tree-sitter-comment) * [Common Lisp](https://github.com/theHamsta/tree-sitter-commonlisp) * [CSS](https://github.com/tree-sitter/tree-sitter-css) * [CUDA](https://github.com/theHamsta/tree-sitter-cuda) +* [Dart](https://github.com/UserNobody14/tree-sitter-dart) * [D](https://github.com/gdamore/tree-sitter-d) +* [Dockerfile](https://github.com/camdencheek/tree-sitter-dockerfile) * [DOT](https://github.com/rydesun/tree-sitter-dot) +* [Elixir](https://github.com/elixir-lang/tree-sitter-elixir) * [Elm](https://github.com/elm-tooling/tree-sitter-elm) * [Emacs Lisp](https://github.com/Wilfred/tree-sitter-elisp) * [Eno](https://github.com/eno-lang/tree-sitter-eno) * [ERB / EJS](https://github.com/tree-sitter/tree-sitter-embedded-template) * [Erlang](https://github.com/WhatsApp/tree-sitter-erlang/) * [Fennel](https://github.com/travonted/tree-sitter-fennel) +* [Fish](https://github.com/ram02z/tree-sitter-fish) +* [Formula](https://github.com/siraben/tree-sitter-formula) +* [Fortran](https://github.com/stadelmanma/tree-sitter-fortran) +* [gitattributes](https://github.com/ObserverOfTime/tree-sitter-gitattributes) +* [gitignore](https://github.com/shunsambongi/tree-sitter-gitignore) +* [Gleam](https://github.com/gleam-lang/tree-sitter-gleam) * [GLSL (OpenGL Shading Language)](https://github.com/theHamsta/tree-sitter-glsl) * [Go](https://github.com/tree-sitter/tree-sitter-go) +* [Go mod](https://github.com/camdencheek/tree-sitter-go-mod) +* [Go work](https://github.com/omertuc/tree-sitter-go-work) +* [Graphql](https://github.com/bkegley/tree-sitter-graphql) +* [Hack](https://github.com/slackhq/tree-sitter-hack) +* [Haskell](https://github.com/tree-sitter/tree-sitter-haskell) * [HCL](https://github.com/MichaHoffmann/tree-sitter-hcl) * [HTML](https://github.com/tree-sitter/tree-sitter-html) * [Java](https://github.com/tree-sitter/tree-sitter-java) * [JavaScript](https://github.com/tree-sitter/tree-sitter-javascript) +* [jq](https://github.com/flurie/tree-sitter-jq) +* [JSON5](https://github.com/Joakker/tree-sitter-json5) * [JSON](https://github.com/tree-sitter/tree-sitter-json) +* [Julia](https://github.com/tree-sitter/tree-sitter-julia) +* [Kotlin](https://github.com/fwcd/tree-sitter-kotlin) +* [LALRPOP](https://github.com/traxys/tree-sitter-lalrpop) +* [Latex](https://github.com/latex-lsp/tree-sitter-latex) +* [Lean](https://github.com/Julian/tree-sitter-lean) +* [LLVM](https://github.com/benwilliamgraham/tree-sitter-llvm) +* [LLVM MachineIR](https://github.com/Flakebi/tree-sitter-llvm-mir) +* [LLVM TableGen](https://github.com/Flakebi/tree-sitter-tablegen) * [Lua](https://github.com/Azganoth/tree-sitter-lua) * [Make](https://github.com/alemuller/tree-sitter-make) * [Markdown](https://github.com/ikatyang/tree-sitter-markdown) +* [Markdown](https://github.com/MDeiml/tree-sitter-markdown) +* [Meson](https://github.com/Decodetalkers/tree-sitter-meson) +* [Meson](https://github.com/staysail/tree-sitter-meson) +* [Motorola 68000 Assembly](https://github.com/grahambates/tree-sitter-m68k) +* [Nix](https://github.com/cstrahan/tree-sitter-nix) +* [Objective-C](https://github.com/jiyee/tree-sitter-objc) * [OCaml](https://github.com/tree-sitter/tree-sitter-ocaml) +* [Org](https://github.com/milisims/tree-sitter-org) +* [Pascal](https://github.com/Isopod/tree-sitter-pascal) +* [Perl](https://github.com/ganezdragon/tree-sitter-perl) +* [Perl](https://github.com/tree-sitter-perl/tree-sitter-perl) +* [Perl POD](https://github.com/tree-sitter-perl/tree-sitter-pod) * [PHP](https://github.com/tree-sitter/tree-sitter-php) +* [Portable Game Notation](https://github.com/rolandwalker/tree-sitter-pgn) +* [PowerShell](https://github.com/PowerShell/tree-sitter-PowerShell) +* [Protocol Buffers](https://github.com/mitchellh/tree-sitter-proto) * [Python](https://github.com/tree-sitter/tree-sitter-python) +* [QML](https://github.com/yuja/tree-sitter-qmljs) +* [Racket](https://github.com/6cdh/tree-sitter-racket) +* [Rasi](https://github.com/Fymyte/tree-sitter-rasi) +* [re2c](https://github.com/alemuller/tree-sitter-re2c) +* [Regex](https://github.com/tree-sitter/tree-sitter-regex) +* [Rego](https://github.com/FallenAngel97/tree-sitter-rego) +* [reStructuredText](https://github.com/stsewd/tree-sitter-rst) +* [R](https://github.com/r-lib/tree-sitter-r) * [Ruby](https://github.com/tree-sitter/tree-sitter-ruby) * [Rust](https://github.com/tree-sitter/tree-sitter-rust) -* [R](https://github.com/r-lib/tree-sitter-r) +* [Scala](https://github.com/tree-sitter/tree-sitter-scala) +* [Scheme](https://github.com/6cdh/tree-sitter-scheme) +* [Scss](https://github.com/serenadeai/tree-sitter-scss) * [S-expressions](https://github.com/AbstractMachinesLab/tree-sitter-sexp) +* [Smali](https://github.com/amaanq/tree-sitter-smali) +* [Smali](https://git.sr.ht/~yotam/tree-sitter-smali) +* [Sourcepawn](https://github.com/nilshelmig/tree-sitter-sourcepawn) * [SPARQL](https://github.com/BonaBeavis/tree-sitter-sparql) +* [SQL - BigQuery](https://github.com/takegue/tree-sitter-sql-bigquery) +* [SQL - PostgreSQL](https://github.com/m-novikov/tree-sitter-sql) +* [SQL - SQLite](https://github.com/dhcmrlchtdj/tree-sitter-sqlite) +* [SSH](https://github.com/metio/tree-sitter-ssh-client-config) * [Svelte](https://github.com/Himujjal/tree-sitter-svelte) * [Swift](https://github.com/alex-pinkus/tree-sitter-swift) * [SystemRDL](https://github.com/SystemRDL/tree-sitter-systemrdl) +* [Thrift](https://github.com/duskmoon314/tree-sitter-thrift) * [TOML](https://github.com/ikatyang/tree-sitter-toml) +* [Tree-sitter Query](https://github.com/nvim-treesitter/tree-sitter-query) * [Turtle](https://github.com/BonaBeavis/tree-sitter-turtle) * [Twig](https://github.com/gbprod/tree-sitter-twig) * [TypeScript](https://github.com/tree-sitter/tree-sitter-typescript) * [Verilog](https://github.com/tree-sitter/tree-sitter-verilog) * [VHDL](https://github.com/alemuller/tree-sitter-vhdl) * [Vue](https://github.com/ikatyang/tree-sitter-vue) -* [YAML](https://github.com/ikatyang/tree-sitter-yaml) * [WASM](https://github.com/wasm-lsp/tree-sitter-wasm) * [WGSL WebGPU Shading Language](https://github.com/mehmetoguzderin/tree-sitter-wgsl) - -Parsers for these languages are in development: - -* [Agda](https://github.com/tree-sitter/tree-sitter-agda) -* [Elixir](https://github.com/elixir-lang/tree-sitter-elixir) -* [Dockerfile](https://github.com/camdencheek/tree-sitter-dockerfile) -* [Go mod](https://github.com/camdencheek/tree-sitter-go-mod) -* [Hack](https://github.com/slackhq/tree-sitter-hack) -* [Haskell](https://github.com/tree-sitter/tree-sitter-haskell) -* [Julia](https://github.com/tree-sitter/tree-sitter-julia) -* [Kotlin](https://github.com/fwcd/tree-sitter-kotlin) -* [Nix](https://github.com/cstrahan/tree-sitter-nix) -* [Objective-C](https://github.com/jiyee/tree-sitter-objc) -* [Org](https://github.com/milisims/tree-sitter-org) -* [Perl](https://github.com/ganezdragon/tree-sitter-perl) -* [Protocol Buffers](https://github.com/mitchellh/tree-sitter-proto) -* [Racket](https://github.com/6cdh/tree-sitter-racket) -* [Scala](https://github.com/tree-sitter/tree-sitter-scala) -* [Sourcepawn](https://github.com/nilshelmig/tree-sitter-sourcepawn) -* [SQL](https://github.com/m-novikov/tree-sitter-sql) - +* [YAML](https://github.com/ikatyang/tree-sitter-yaml) +* [YANG](https://github.com/Hubro/tree-sitter-yang) +* [Zig](https://github.com/maxxnino/tree-sitter-zig) ### Talks on Tree-sitter @@ -109,9 +156,9 @@ Parsers for these languages are in development: The design of Tree-sitter was greatly influenced by the following research papers: -- [Practical Algorithms for Incremental Software Development Environments](https://www2.eecs.berkeley.edu/Pubs/TechRpts/1997/CSD-97-946.pdf) -- [Context Aware Scanning for Parsing Extensible Languages](https://www-users.cse.umn.edu/~evw/pubs/vanwyk07gpce/vanwyk07gpce.pdf) -- [Efficient and Flexible Incremental Parsing](http://harmonia.cs.berkeley.edu/papers/twagner-parsing.pdf) -- [Incremental Analysis of Real Programming Languages](http://harmonia.cs.berkeley.edu/papers/twagner-glr.pdf) -- [Error Detection and Recovery in LR Parsers](http://what-when-how.com/compiler-writing/bottom-up-parsing-compiler-writing-part-13) -- [Error Recovery for LR Parsers](https://apps.dtic.mil/sti/pdfs/ADA043470.pdf) +* [Practical Algorithms for Incremental Software Development Environments](https://www2.eecs.berkeley.edu/Pubs/TechRpts/1997/CSD-97-946.pdf) +* [Context Aware Scanning for Parsing Extensible Languages](https://www-users.cse.umn.edu/~evw/pubs/vanwyk07gpce/vanwyk07gpce.pdf) +* [Efficient and Flexible Incremental Parsing](https://harmonia.cs.berkeley.edu/papers/twagner-parsing.pdf) +* [Incremental Analysis of Real Programming Languages](https://harmonia.cs.berkeley.edu/papers/twagner-glr.pdf) +* [Error Detection and Recovery in LR Parsers](https://what-when-how.com/compiler-writing/bottom-up-parsing-compiler-writing-part-13) +* [Error Recovery for LR Parsers](https://apps.dtic.mil/sti/pdfs/ADA043470.pdf) diff --git a/docs/section-2-using-parsers.md b/docs/section-2-using-parsers.md index ac3f968f..5106a49c 100644 --- a/docs/section-2-using-parsers.md +++ b/docs/section-2-using-parsers.md @@ -290,7 +290,7 @@ This `ts_node_edit` function is _only_ needed in the case where you have retriev ### Multi-language Documents -Sometimes, different parts of a file may be written in different languages. For example, templating languages like [EJS](http://ejs.co) and [ERB](https://ruby-doc.org/stdlib-2.5.1/libdoc/erb/rdoc/ERB.html) allow you to generate HTML by writing a mixture of HTML and another language like JavaScript or Ruby. +Sometimes, different parts of a file may be written in different languages. For example, templating languages like [EJS](https://ejs.co) and [ERB](https://ruby-doc.org/stdlib-2.5.1/libdoc/erb/rdoc/ERB.html) allow you to generate HTML by writing a mixture of HTML and another language like JavaScript or Ruby. Tree-sitter handles these types of documents by allowing you to create a syntax tree based on the text in certain _ranges_ of a file. @@ -442,13 +442,13 @@ Many code analysis tasks involve searching for patterns in syntax trees. Tree-si A _query_ consists of one or more _patterns_, where each pattern is an [S-expression](https://en.wikipedia.org/wiki/S-expression) that matches a certain set of nodes in a syntax tree. The expression to match a given node consists of a pair of parentheses containing two things: the node's type, and optionally, a series of other S-expressions that match the node's children. For example, this pattern would match any `binary_expression` node whose children are both `number_literal` nodes: -``` +```scheme (binary_expression (number_literal) (number_literal)) ``` Children can also be omitted. For example, this would match any `binary_expression` where at least _one_ of child is a `string_literal` node: -``` +```scheme (binary_expression (string_literal)) ``` @@ -456,7 +456,7 @@ Children can also be omitted. For example, this would match any `binary_expressi In general, it's a good idea to make patterns more specific by specifying [field names](#node-field-names) associated with child nodes. You do this by prefixing a child pattern with a field name followed by a colon. For example, this pattern would match an `assignment_expression` node where the `left` child is a `member_expression` whose `object` is a `call_expression`. -``` +```scheme (assignment_expression left: (member_expression object: (call_expression))) @@ -464,9 +464,9 @@ In general, it's a good idea to make patterns more specific by specifying [field #### Negated Fields -You can also constrain a pattern so that it only matches nodes that *lack* a certain field. To do this, add a field name prefixed by a `!` within the parent pattern. For example, this pattern would match a class declaration with no type parameters: +You can also constrain a pattern so that it only matches nodes that _lack_ a certain field. To do this, add a field name prefixed by a `!` within the parent pattern. For example, this pattern would match a class declaration with no type parameters: -``` +```scheme (class_declaration name: (identifier) @class_name !type_parameters) @@ -476,7 +476,7 @@ You can also constrain a pattern so that it only matches nodes that *lack* a cer The parenthesized syntax for writing nodes only applies to [named nodes](#named-vs-anonymous-nodes). To match specific anonymous nodes, you write their name between double quotes. For example, this pattern would match any `binary_expression` where the operator is `!=` and the right side is `null`: -``` +```scheme (binary_expression operator: "!=" right: (null)) @@ -488,7 +488,7 @@ When matching patterns, you may want to process specific nodes within the patter For example, this pattern would match any assignment of a `function` to an `identifier`, and it would associate the name `the-function-name` with the identifier: -``` +```scheme (assignment_expression left: (identifier) @the-function-name right: (function)) @@ -496,7 +496,7 @@ For example, this pattern would match any assignment of a `function` to an `iden And this pattern would match all method definitions, associating the name `the-method-name` with the method name, `the-class-name` with the containing class name: -``` +```scheme (class_declaration name: (identifier) @the-class-name body: (class_body @@ -510,13 +510,13 @@ You can match a repeating sequence of sibling nodes using the postfix `+` and `* For example, this pattern would match a sequence of one or more comments: -``` +```scheme (comment)+ ``` This pattern would match a class declaration, capturing all of the decorators if any were present: -``` +```scheme (class_declaration (decorator)* @the-decorator name: (identifier) @the-name) @@ -524,7 +524,7 @@ This pattern would match a class declaration, capturing all of the decorators if You can also mark a node as optional using the `?` operator. For example, this pattern would match all function calls, capturing a string argument if one was present: -``` +```scheme (call_expression function: (identifier) @the-function arguments: (arguments (string)? @the-string-arg)) @@ -534,7 +534,7 @@ You can also mark a node as optional using the `?` operator. For example, this p You can also use parentheses for grouping a sequence of _sibling_ nodes. For example, this pattern would match a comment followed by a function declaration: -``` +```scheme ( (comment) (function_declaration) @@ -543,7 +543,7 @@ You can also use parentheses for grouping a sequence of _sibling_ nodes. For exa Any of the quantification operators mentioned above (`+`, `*`, and `?`) can also be applied to groups. For example, this pattern would match a comma-separated series of numbers: -``` +```scheme ( (number) ("," (number))* @@ -558,7 +558,7 @@ This is similar to _character classes_ from regular expressions (`[abc]` matches For example, this pattern would match a call to either a variable or an object property. In the case of a variable, capture it as `@function`, and in the case of a property, capture it as `@method`: -``` +```scheme (call_expression function: [ (identifier) @function @@ -569,7 +569,7 @@ In the case of a variable, capture it as `@function`, and in the case of a prope This pattern would match a set of possible keyword tokens, capturing them as `@keyword`: -``` +```scheme [ "break" "delete" @@ -592,7 +592,7 @@ and `_` will match any named or anonymous node. For example, this pattern would match any node inside a call: -``` +```scheme (call (_) @call.inner) ``` @@ -602,7 +602,7 @@ The anchor operator, `.`, is used to constrain the ways in which child patterns When `.` is placed before the _first_ child within a parent pattern, the child will only match when it is the first named node in the parent. For example, the below pattern matches a given `array` node at most once, assigning the `@the-element` capture to the first `identifier` node in the parent `array`: -``` +```scheme (array . (identifier) @the-element) ``` @@ -610,13 +610,13 @@ Without this anchor, the pattern would match once for every identifier in the ar Similarly, an anchor placed after a pattern's _last_ child will cause that child pattern to only match nodes that are the last named child of their parent. The below pattern matches only nodes that are the last named child within a `block`. -``` +```scheme (block (_) @last-expression .) ``` Finally, an anchor _between_ two child patterns will cause the patterns to only match nodes that are immediate siblings. The pattern below, given a long dotted name like `a.b.c.d`, will only match pairs of consecutive identifiers: `a, b`, `b, c`, and `c, d`. -``` +```scheme (dotted_name (identifier) @prev-id . @@ -629,20 +629,38 @@ The restrictions placed on a pattern by an anchor operator ignore anonymous node #### Predicates -You can also specify arbitrary metadata and conditions associated with a pattern by adding _predicate_ S-expressions anywhere within your pattern. Predicate S-expressions start with a _predicate name_ beginning with a `#` character. After that, they can contain an arbitrary number of `@`-prefixed capture names or strings. +You can also specify arbitrary metadata and conditions associated with a pattern +by adding _predicate_ S-expressions anywhere within your pattern. Predicate S-expressions +start with a _predicate name_ beginning with a `#` character. After that, they can +contain an arbitrary number of `@`-prefixed capture names or strings. -For example, this pattern would match identifier whose names is written in `SCREAMING_SNAKE_CASE`: +Tree-Sitter's CLI supports the following predicates by default: -``` -( - (identifier) @constant - (#match? @constant "^[A-Z][A-Z_]+") -) +##### eq?, not-eq?, any-eq?, any-not-eq? + +This family of predicates allows you to match against a single capture or string +value. + +The first argument must be a capture, but the second can be either a capture to +compare the two captures' text, or a string to compare first capture's text +against. + +The base predicate is "#eq?", but its complement "#not-eq?" can be used to _not_ +match a value. + +Consider the following example targeting C: + +```scheme +((identifier) @variable.builtin + (#eq? @variable.builtin "self")) ``` -And this pattern would match key-value pairs where the `value` is an identifier with the same name as the key: +This pattern would match any identifier that is `self` or `this`. -``` +And this pattern would match key-value pairs where the `value` is an identifier +with the same name as the key: + +```scheme ( (pair key: (property_identifier) @key-name @@ -651,7 +669,87 @@ And this pattern would match key-value pairs where the `value` is an identifier ) ``` -_Note_ - Predicates are not handled directly by the Tree-sitter C library. They are just exposed in a structured form so that higher-level code can perform the filtering. However, higher-level bindings to Tree-sitter like [the Rust crate](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust) or the [WebAssembly binding](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web) implement a few common predicates like `#eq?` and `#match?`. +The prefix "any-" is meant for use with quantified captures. Here's +an example finding a segment of empty comments + +```scheme +((comment)+ @comment.empty + (#any-eq? @comment.empty "//")) +``` + +Note that "#any-eq?" will match a quantified capture if +_any_ of the nodes match the predicate, while by default a quantified capture +will only match if _all_ the nodes match the predicate. + +##### match?, not-match?, any-match?, any-not-match? + +These predicates are similar to the eq? predicates, but they use regular expressions +to match against the capture's text. + +The first argument must be a capture, and the second must be a string containing +a regular expression. + +For example, this pattern would match identifier whose name is written in `SCREAMING_SNAKE_CASE`: + +```scheme +((identifier) @constant + (#match? @constant "^[A-Z][A-Z_]+")) +``` + +Here's an example finding potential documentation comments in C + +```scheme +((comment)+ @comment.documentation + (#match? @comment.documentation "^///\s+.*")) +``` + +Here's another example finding Cgo comments to potentially inject with C + +```scheme +((comment)+ @injection.content + . + (import_declaration + (import_spec path: (interpreted_string_literal) @_import_c)) + (#eq? @_import_c "\"C\"") + (#match? @injection.content "^//")) +``` + +##### any-of?, not-any-of? + +The "any-of?" predicate allows you to match a capture against multiple strings, +and will match if the capture's text is equal to any of the strings. + +Consider this example that targets JavaScript: + +```scheme +((identifier) @variable.builtin + (#any-of? @variable.builtin + "arguments" + "module" + "console" + "window" + "document")) +``` + +This will match any of the builtin variables in JavaScript. + +_Note_ — Predicates are not handled directly by the Tree-sitter C library. +They are just exposed in a structured form so that higher-level code can perform +the filtering. However, higher-level bindings to Tree-sitter like +[the Rust Crate](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust) +or the [WebAssembly binding](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web) +do implement a few common predicates like the `#eq?`, `#match?`, and `#any-of?` +predicates explained above. + +To recap about the predicates Tree-Sitter's bindings support: + +- `#eq?` checks for a direct match against a capture or string +- `#match?` checks for a match against a regular expression +- `#any-of?` checks for a match against a list of strings +- Adding `not-` to the beginning of any of these predicates will negate the match +- By default, a quantified capture will only match if _all_ of the nodes match the predicate +- Adding `any-` before the `eq` or `match` predicates will instead match if any of the nodes match the predicate + ### The Query API diff --git a/docs/section-3-creating-parsers.md b/docs/section-3-creating-parsers.md index 07f9d865..0ecef257 100644 --- a/docs/section-3-creating-parsers.md +++ b/docs/section-3-creating-parsers.md @@ -46,7 +46,7 @@ npm install --save nan npm install --save-dev tree-sitter-cli ``` -The last command will install the CLI into the `node_modules` folder in your working directory. An executable program called `tree-sitter` will be created inside of `node_modules/.bin/`. You may want to follow the Node.js convention of adding that folder to your your `PATH` so that you can easily run this program when working in this directory. +The last command will install the CLI into the `node_modules` folder in your working directory. An executable program called `tree-sitter` will be created inside of `node_modules/.bin/`. You may want to follow the Node.js convention of adding that folder to your `PATH` so that you can easily run this program when working in this directory. ```sh # In your shell profile script @@ -81,9 +81,16 @@ echo 'hello' > example-file tree-sitter parse example-file ``` +Alternatively, in Windows PowerShell: + +```pwsh +"hello" | Out-File example-file -Encoding utf8 +tree-sitter parse example-file +``` + This should print the following: -``` +```text (source_file [0, 0] - [1, 0]) ``` @@ -116,7 +123,7 @@ For each rule that you add to the grammar, you should first create a *test* that For example, you might have a file called `test/corpus/statements.txt` that contains a series of entries like this: -``` +```text ================== Return statements ================== @@ -142,7 +149,7 @@ func x() int { The expected output section can also *optionally* show the [*field names*][field-names-section] associated with each child node. To include field names in your tests, you write a node's field name followed by a colon, before the node itself in the S-expression: -``` +```text (source_file (function_definition name: (identifier) @@ -152,6 +159,26 @@ func x() int { (return_statement (number))))) ``` +* If your language's syntax conflicts with the `===` and `---` test separators, you can optionally add an arbitrary identical suffix (in the below example, `|||`) to disambiguate them: + +```text +==================||| +Basic module +==================||| + +---- MODULE Test ---- +increment(n) == n + 1 +==== + +---||| + +(source_file + (module (identifier) + (operator (identifier) + (parameter_list (identifier)) + (plus (identifier_ref) (number))))) +``` + These tests are important. They serve as the parser's API documentation, and they can be run every time you change the grammar to verify that everything still parses correctly. By default, the `tree-sitter test` command runs all of the tests in your `corpus` or `test/corpus/` folder. To run a particular test, you can use the `-f` flag: @@ -174,7 +201,7 @@ The `tree-sitter test` command will *also* run any syntax highlighting tests in You can run your parser on an arbitrary file using `tree-sitter parse`. This will print the resulting the syntax tree, including nodes' ranges and field names, like this: -``` +```text (source_file [0, 0] - [3, 0] (function_declaration [0, 0] - [2, 1] name: (identifier [0, 5] - [0, 9]) @@ -222,10 +249,10 @@ In addition to the `name` and `rules` fields, grammars have a few other optional * **`inline`** - an array of rule names that should be automatically *removed* from the grammar by replacing all of their usages with a copy of their definition. This is useful for rules that are used in multiple places but for which you *don't* want to create syntax tree nodes at runtime. * **`conflicts`** - an array of arrays of rule names. Each inner array represents a set of rules that's involved in an *LR(1) conflict* that is *intended to exist* in the grammar. When these conflicts occur at runtime, Tree-sitter will use the GLR algorithm to explore all of the possible interpretations. If *multiple* parses end up succeeding, Tree-sitter will pick the subtree whose corresponding rule has the highest total *dynamic precedence*. * **`externals`** - an array of token names which can be returned by an [*external scanner*](#external-scanners). External scanners allow you to write custom C code which runs during the lexing process in order to handle lexical rules (e.g. Python's indentation tokens) that cannot be described by regular expressions. +* **`precedences`** - an array of array of strings, where each array of strings defines named precedence levels in descending order. These names can be used in the `prec` functions to define precedence relative only to other names in the array, rather than globally. Can only be used with parse precedence, not lexical precedence. * **`word`** - the name of a token that will match keywords for the purpose of the [keyword extraction](#keyword-extraction) optimization. * **`supertypes`** an array of hidden rule names which should be considered to be 'supertypes' in the generated [*node types* file][static-node-types]. - ## Writing the Grammar Writing a grammar requires creativity. There are an infinite number of CFGs (context-free grammars) that can be used to describe any given language. In order to produce a good Tree-sitter parser, you need to create a grammar with two important properties: @@ -349,7 +376,7 @@ return x + y; According to the specification, this line is a `ReturnStatement`, the fragment `x + y` is an `AdditiveExpression`, and `x` and `y` are both `IdentifierReferences`. The relationship between these constructs is captured by a complex series of production rules: -``` +```text ReturnStatement -> 'return' Expression Expression -> AssignmentExpression AssignmentExpression -> ConditionalExpression @@ -406,7 +433,7 @@ To produce a readable syntax tree, we'd like to model JavaScript expressions usi Of course, this flat structure is highly ambiguous. If we try to generate a parser, Tree-sitter gives us an error message: -``` +```text Error: Unresolved conflict for symbol sequence: '-' _expression • '*' … @@ -442,7 +469,7 @@ For an expression like `-a * b`, it's not clear whether the `-` operator applies Applying a higher precedence in `unary_expression` fixes that conflict, but there is still another conflict: -``` +```text Error: Unresolved conflict for symbol sequence: _expression '*' _expression • '*' … @@ -498,11 +525,11 @@ Tree-sitter's parsing process is divided into two phases: parsing (which is desc ### Conflicting Tokens -Grammars often contain multiple tokens that can match the same characters. For example, a grammar might contain the tokens (`"if"` and `/[a-z]+/`). Tree-sitter differentiates between these conflicting tokens in a few ways: +Grammars often contain multiple tokens that can match the same characters. For example, a grammar might contain the tokens (`"if"` and `/[a-z]+/`). Tree-sitter differentiates between these conflicting tokens in a few ways. 1. **Context-aware Lexing** - Tree-sitter performs lexing on-demand, during the parsing process. At any given position in a source document, the lexer only tries to recognize tokens that are *valid* at that position in the document. -2. **Lexical Precedence** - When the precedence functions described [above](#the-grammar-dsl) are used within the `token` function, the given precedence values serve as instructions to the lexer. If there are two valid tokens that match the characters at a given position in the document, Tree-sitter will select the one with the higher precedence. +2. **Lexical Precedence** - When the precedence functions described [above](#the-grammar-dsl) are used *within* the `token` function, the given explicit precedence values serve as instructions to the lexer. If there are two valid tokens that match the characters at a given position in the document, Tree-sitter will select the one with the higher precedence. 3. **Match Length** - If multiple valid tokens with the same precedence match the characters at a given position in a document, Tree-sitter will select the token that matches the [longest sequence of characters][longest-match]. @@ -510,6 +537,12 @@ Grammars often contain multiple tokens that can match the same characters. For e 5. **Rule Order** - If none of the above criteria can be used to select one token over another, Tree-sitter will prefer the token that appears earlier in the grammar. +If there is an external scanner it may have [an additional impact](#other-external-scanner-details) over regular tokens defined in the grammar. + +### Lexical Precedence vs. Parse Precedence + +One common mistake involves not distinguishing *lexical precedence* from *parse precedence*. Parse precedence determines which rule is chosen to interpret a given sequence of tokens. *Lexical precedence* determines which token is chosen to interpret at a given position of text and it is a lower-level operation that is done first. The above list fully captures Tree-sitter's lexical precedence rules, and you will probably refer back to this section of the documentation more often than any other. Most of the time when you really get stuck, you're dealing with a lexical precedence problem. Pay particular attention to the difference in meaning between using `prec` inside of the `token` function versus outside of it. The *lexical precedence* syntax is `token(prec(N, ...))`. + ### Keywords Many languages have a set of *keyword* tokens (e.g. `if`, `for`, `return`), as well as a more general token (e.g. `identifier`) that matches any word, including many of the keyword strings. For example, JavaScript has a keyword `instanceof`, which is used as a binary operator, like this: @@ -568,6 +601,7 @@ Aside from improving error detection, keyword extraction also has performance be ### External Scanners Many languages have some tokens whose structure is impossible or inconvenient to describe with a regular expression. Some examples: + * [Indent and dedent][indent-tokens] tokens in Python * [Heredocs][heredoc] in Bash and Ruby * [Percent strings][percent-string] in Ruby @@ -592,7 +626,7 @@ grammar({ Then, add another C or C++ source file to your project. Currently, its path must be `src/scanner.c` or `src/scanner.cc` for the CLI to recognize it. Be sure to add this file to the `sources` section of your `binding.gyp` file so that it will be included when your project is compiled by Node.js and uncomment the appropriate block in your `bindings/rust/build.rs` file so that it will be included in your Rust crate. -In this new source file, define an [`enum`][enum] type containing the names of all of your external tokens. The ordering of this enum must match the order in your grammar's `externals` array. +In this new source file, define an [`enum`][enum] type containing the names of all of your external tokens. The ordering of this enum must match the order in your grammar's `externals` array; the actual names do not matter. ```c #include @@ -616,7 +650,6 @@ void * tree_sitter_my_language_external_scanner_create() { This function should create your scanner object. It will only be called once anytime your language is set on a parser. Often, you will want to allocate memory on the heap and return a pointer to it. If your external scanner doesn't need to maintain any state, it's ok to return `NULL`. - #### Destroy ```c @@ -655,6 +688,7 @@ void tree_sitter_my_language_external_scanner_deserialize( ``` This function should *restore* the state of your scanner based the bytes that were previously written by the `serialize` function. It is called with a pointer to your scanner, a pointer to the buffer of bytes, and the number of bytes that should be read. +It is good practice to explicitly erase your scanner state variables at the start of this function, before restoring their values from the byte buffer. #### Scan @@ -672,15 +706,16 @@ This function is responsible for recognizing external tokens. It should return ` * **`int32_t lookahead`** - The current next character in the input stream, represented as a 32-bit unicode code point. * **`TSSymbol result_symbol`** - The symbol that was recognized. Your scan function should *assign* to this field one of the values from the `TokenType` enum, described above. -* **`void (*advance)(TSLexer *, bool skip)`** - A function for advancing to the next character. If you pass `true` for the second argument, the current character will be treated as whitespace. +* **`void (*advance)(TSLexer *, bool skip)`** - A function for advancing to the next character. If you pass `true` for the second argument, the current character will be treated as whitespace; whitespace won't be included in the text range associated with tokens emitted by the external scanner. * **`void (*mark_end)(TSLexer *)`** - A function for marking the end of the recognized token. This allows matching tokens that require multiple characters of lookahead. By default (if you don't call `mark_end`), any character that you moved past using the `advance` function will be included in the size of the token. But once you call `mark_end`, then any later calls to `advance` will *not* increase the size of the returned token. You can call `mark_end` multiple times to increase the size of the token. * **`uint32_t (*get_column)(TSLexer *)`** - A function for querying the current column position of the lexer. It returns the number of codepoints since the start of the current line. The codepoint position is recalculated on every call to this function by reading from the start of the line. -* **`bool (*is_at_included_range_start)(TSLexer *)`** - A function for checking if the parser has just skipped some characters in the document. When parsing an embedded document using the `ts_parser_set_included_ranges` function (described in the [multi-language document section][multi-language-section]), your scanner may want to apply some special behavior when moving to a disjoint part of the document. For example, in [EJS documents][ejs], the JavaScript parser uses this function to enable inserting automatic semicolon tokens in between the code directives, delimited by `<%` and `%>`. +* **`bool (*is_at_included_range_start)(const TSLexer *)`** - A function for checking whether the parser has just skipped some characters in the document. When parsing an embedded document using the `ts_parser_set_included_ranges` function (described in the [multi-language document section][multi-language-section]), the scanner may want to apply some special behavior when moving to a disjoint part of the document. For example, in [EJS documents][ejs], the JavaScript parser uses this function to enable inserting automatic semicolon tokens in between the code directives, delimited by `<%` and `%>`. +* **`bool (*eof)(const TSLexer *)`** - A function for determining whether the lexer is at the end of the file. The value of `lookahead` will be `0` at the end of a file, but this function should be used instead of checking for that value because the `0` or "NUL" value is also a valid character that could be present in the file being parsed. -The third argument to the `scan` function is an array of booleans that indicates which of your external tokens are currently expected by the parser. You should only look for a given token if it is valid according to this array. At the same time, you cannot backtrack, so you may need to combine certain pieces of logic. +The third argument to the `scan` function is an array of booleans that indicates which of external tokens are currently expected by the parser. You should only look for a given token if it is valid according to this array. At the same time, you cannot backtrack, so you may need to combine certain pieces of logic. ```c -if (valid_symbols[INDENT] || valid_symbol[DEDENT]) { +if (valid_symbols[INDENT] || valid_symbols[DEDENT]) { // ... logic that is common to both `INDENT` and `DEDENT` @@ -694,8 +729,23 @@ if (valid_symbols[INDENT] || valid_symbol[DEDENT]) { } ``` +#### Other External Scanner Details + +If a token in the `externals` array is valid at a given position in the parse, the external scanner will be called first before anything else is done. This means the external scanner functions as a powerful override of Tree-sitter's lexing behavior, and can be used to solve problems that can't be cracked with ordinary lexical, parse, or dynamic precedence. + +If a syntax error is encountered during regular parsing, Tree-sitter's first action during error recovery will be to call the external scanner's `scan` function with all tokens marked valid. The scanner should detect this case and handle it appropriately. One simple method of detection is to add an unused token to the end of the `externals` array, for example `externals: $ => [$.token1, $.token2, $.error_sentinel]`, then check whether that token is marked valid to determine whether Tree-sitter is in error correction mode. + +If you put terminal keywords in the `externals` array, for example `externals: $ => ['if', 'then', 'else']`, then any time those terminals are present in the grammar they will be tokenized by the external scanner. It is similar to writing `externals: [$.if_keyword, $.then_keyword, $.else_keyword]` then using `alias($.if_keyword, 'if')` in the grammar. + +If in the `externals` array use literal keywords then lexing works in two steps, the external scanner will be called first and if it sets a resulting token and returns `true` then the token considered as recognized and Tree-sitter moves to a next token. But the external scanner may return `false` and in this case Tree-sitter fallbacks to the internal lexing mechanism. + +In case of some keywords defined in the `externals` array in a rule referencing form like `$.if_keyword` and there is no additional definition of that rule in the grammar rules, e.g., `if_keyword: $ => 'if'` then fallback to the internal lexer isn't possible because Tree-sitter doesn't know the actual keyword and it's fully the external scanner resposibilty to recognize such tokens. + +External scanners are a common cause of infinite loops. +Be very careful when emitting zero-width tokens from your external scanner, and if you consume characters in a loop be sure use the `eof` function to check whether you are at the end of the file. + [ambiguous-grammar]: https://en.wikipedia.org/wiki/Ambiguous_grammar -[antlr]: http://www.antlr.org/ +[antlr]: https://www.antlr.org [bison-dprec]: https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html [bison]: https://en.wikipedia.org/wiki/GNU_bison [c-linkage]: https://en.cppreference.com/w/cpp/language/language_linkage diff --git a/docs/section-4-syntax-highlighting.md b/docs/section-4-syntax-highlighting.md index a6e5d74c..8fd73cf6 100644 --- a/docs/section-4-syntax-highlighting.md +++ b/docs/section-4-syntax-highlighting.md @@ -9,8 +9,6 @@ Syntax highlighting is a very common feature in applications that deal with code This document explains how the Tree-sitter syntax highlighting system works, using the command line interface. If you are using `tree-sitter-highlight` library (either from C or from Rust), all of these concepts are still applicable, but the configuration data is provided using in-memory objects, rather than files. -**Note - If you are working on syntax highlighting in the [Atom](https://atom.io/) text editor, you should consult [the grammar-creation page](https://flight-manual.atom.io/hacking-atom/sections/creating-a-grammar/) of the Atom Flight Manual, *not* this document. Atom currently uses a different syntax highlighting system that is also based on Tree-sitter, but is older than the one described here.** - ## Overview All of the files needed to highlight a given language are normally included in the same git repository as the Tree-sitter grammar for that language (for example, [`tree-sitter-javascript`](https://github.com/tree-sitter/tree-sitter-javascript), [`tree-sitter-ruby`](https://github.com/tree-sitter/tree-sitter-ruby)). In order to run syntax highlighting from the command-line, three types of files are needed: @@ -27,9 +25,9 @@ The Tree-sitter CLI automatically creates two directories in your home folder. These directories are created in the "normal" place for your platform: -- On Linux, `~/.config/tree-sitter` and `~/.cache/tree-sitter` -- On Mac, `~/Library/Application Support/tree-sitter` and `~/Library/Caches/tree-sitter` -- On Windows, `C:\Users\[username]\AppData\Roaming\tree-sitter` and `C:\Users\[username]\AppData\Local\tree-sitter` +* On Linux, `~/.config/tree-sitter` and `~/.cache/tree-sitter` +* On Mac, `~/Library/Application Support/tree-sitter` and `~/Library/Caches/tree-sitter` +* On Windows, `C:\Users\[username]\AppData\Roaming\tree-sitter` and `C:\Users\[username]\AppData\Local\tree-sitter` The CLI will work if there's no config file present, falling back on default values for each configuration option. To create a config file that you can edit, run this command: @@ -63,6 +61,7 @@ In your config file, the `"theme"` value is an object whose keys are dot-separat #### Highlight Names A theme can contain multiple keys that share a common subsequence. Examples: + * `variable` and `variable.parameter` * `function`, `function.builtin`, and `function.method` @@ -160,7 +159,7 @@ func increment(a int) int { With this syntax tree: -``` +```scheme (source_file (function_declaration name: (identifier) @@ -180,6 +179,7 @@ With this syntax tree: #### Example Query Suppose we wanted to render this code with the following colors: + * keywords `func` and `return` in purple * function `increment` in blue * type `int` in green @@ -187,7 +187,7 @@ Suppose we wanted to render this code with the following colors: We can assign each of these categories a *highlight name* using a query like this: -``` +```scheme ; highlights.scm "func" @keyword @@ -254,7 +254,7 @@ list = [item] With this syntax tree: -``` +```scheme (program (method name: (identifier) @@ -297,7 +297,7 @@ There are several different types of names within this method: Let's write some queries that let us clearly distinguish between these types of names. First, set up the highlighting query, as described in the previous section. We'll assign distinct colors to method calls, method definitions, and formal parameters: -``` +```scheme ; highlights.scm (call method: (identifier) @function.method) @@ -314,7 +314,7 @@ Let's write some queries that let us clearly distinguish between these types of Then, we'll set up a local variable query to keep track of the variables and scopes. Here, we're indicating that methods and blocks create local *scopes*, parameters and assignments create *definitions*, and other identifiers should be considered *references*: -``` +```scheme ; locals.scm (method) @local.scope @@ -347,6 +347,7 @@ Running `tree-sitter highlight` on this ruby file would produce output like this ### Language Injection Some source files contain code written in multiple different languages. Examples include: + * HTML files, which can contain JavaScript inside of `