Merge branch 'master' into wasm-language

This commit is contained in:
Max Brunsfeld 2023-10-27 11:57:04 +01:00
commit f4e2f68f14
161 changed files with 10293 additions and 4253 deletions

View file

@ -1,50 +0,0 @@
build: false
install:
# Terminate early unless building either a tag or a PR.
- if "%APPVEYOR_REPO_TAG%" == "false" if not "%APPVEYOR_REPO_BRANCH%" == "master" appveyor exit
# Install rust
- appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe
- IF "%PLATFORM%" == "x86" rustup-init -y --default-toolchain stable --default-host i686-pc-windows-msvc
- IF "%PLATFORM%" == "x64" rustup-init -y --default-toolchain stable --default-host x86_64-pc-windows-msvc
- set PATH=%PATH%;C:\Users\appveyor\.cargo\bin
- rustc -vV
- cargo -vV
# Install dependencies
- git submodule update --init
platform:
- x64
- x86
test_script:
# Fetch and regenerate the fixture parsers
- script\fetch-fixtures.cmd
- cargo build --release
- script\generate-fixtures.cmd
# Run tests
- script\test.cmd
- script\benchmark.cmd
before_deploy:
- move target\release\tree-sitter.exe tree-sitter.exe
- 7z a -tgzip tree-sitter-windows-%PLATFORM%.gz tree-sitter.exe
- appveyor PushArtifact tree-sitter-windows-%PLATFORM%.gz
deploy:
description: ''
provider: GitHub
auth_token:
secure: VC9ntV5+inKoNteZyLQksKzWMKXF46P+Jx3JHKVSfF+o1rWtZn2iIHAVsQv5LaUi
artifact: /tree-sitter-windows-.*/
draft: true
force_update: true
on:
APPVEYOR_REPO_TAG: true
cache:
- target
- test\fixtures\grammars
- C:\Users\appveyor\.cargo

3
.gitattributes vendored
View file

@ -1,2 +1,5 @@
/lib/src/unicode/*.h linguist-vendored
/lib/src/unicode/LICENSE linguist-vendored
/cli/src/generate/prepare_grammar/*.json -diff
Cargo.lock -diff

17
.github/scripts/cross.sh vendored Executable file
View file

@ -0,0 +1,17 @@
#!/bin/bash
# set -x
set -e
if [ "$BUILD_CMD" != "cross" ]; then
echo "cross.sh - is a helper to assist only in cross compiling environments" >&2
echo "To use this tool set the BUILD_CMD env var to the \"cross\" value" >&2
exit 111
fi
if [ -z "$CROSS_IMAGE" ]; then
echo "The CROSS_IMAGE env var should be provided" >&2
exit 111
fi
docker run --rm -v /home/runner:/home/runner -w "$PWD" "$CROSS_IMAGE" "$@"

19
.github/scripts/make.sh vendored Executable file
View file

@ -0,0 +1,19 @@
#!/bin/bash
# set -x
set -e
if [ "$BUILD_CMD" == "cross" ]; then
if [ -z "$CC" ]; then
echo "make.sh: CC is not set" >&2
exit 111
fi
if [ -z "$AR" ]; then
echo "make.sh: AR is not set" >&2
exit 111
fi
cross.sh make CC=$CC AR=$AR "$@"
else
make "$@"
fi

28
.github/scripts/tree-sitter.sh vendored Executable file
View file

@ -0,0 +1,28 @@
#!/bin/bash
# set -x
set -e
if [ -z "$ROOT" ]; then
echo "The ROOT env var should be set to absolute path of a repo root folder" >&2
exit 111
fi
if [ -z "$TARGET" ]; then
echo "The TARGET env var should be equal to a \`cargo build --target <TARGET>\` command value" >&2
exit 111
fi
tree_sitter="$ROOT"/target/"$TARGET"/release/tree-sitter
if [ "$BUILD_CMD" == "cross" ]; then
if [ -z "$CROSS_RUNNER" ]; then
echo "The CROSS_RUNNER env var should be set to a CARGO_TARGET_*_RUNNER env var value" >&2
echo "that is available in a docker image used by the cross tool under the hood" >&2
exit 111
fi
cross.sh $CROSS_RUNNER "$tree_sitter" "$@"
else
"$tree_sitter" "$@"
fi

85
.github/workflows/CICD.yml vendored Normal file
View file

@ -0,0 +1,85 @@
name: CICD
on:
workflow_dispatch:
pull_request:
types:
- opened
- reopened
- synchronize
- ready_for_review
push:
branches-ignore:
- release/v*
concurrency:
group: >
${{ github.workflow }} @ ${{
github.ref == 'refs/heads/master' && github.ref_name || ''
}}${{
github.ref == 'refs/heads/master' && github.sha
|| github.event.pull_request.head.label || github.head_ref || github.ref
}}
cancel-in-progress: true
jobs:
init:
name: Init
runs-on: ubuntu-latest
steps:
- name: Get PR head ref
if: ${{ github.event_name == 'pull_request' }}
id: pr_head_ref
run: |
echo "ref=refs/pull/${{ github.event.pull_request.number }}/head" >> $GITHUB_OUTPUT
outputs:
ref: >-
${{
(github.event_name == 'pull_request' && startsWith(github.head_ref, 'release/v'))
&& steps.pr_head_ref.outputs.ref
|| github.ref
}}
fast_checks:
name: Fast checks
uses: ./.github/workflows/fast_checks.yml
full_checks:
name: Full Rust checks
needs: fast_checks
uses: ./.github/workflows/full_rust_checks.yml
min_version:
name: Minimum supported rust version
needs: fast_checks
uses: ./.github/workflows/msrv.yml
with:
package: tree-sitter-cli
sanitize:
name: Sanitize
needs: [init, fast_checks]
uses: ./.github/workflows/sanitize.yml
build:
name: Build & Test
needs: [init, fast_checks]
uses: ./.github/workflows/build.yml
with:
ref: ${{ needs.init.outputs.ref }}
release:
name: Release
needs: [init, fast_checks, full_checks, min_version, build, sanitize]
if: >
github.event_name == 'pull_request' &&
startsWith(github.head_ref, 'release/v') &&
!github.event.pull_request.draft
uses: ./.github/workflows/release.yml
with:
ref: ${{ needs.init.outputs.ref }}
publish:
name: Publish
needs: release
uses: ./.github/workflows/publish.yml

267
.github/workflows/build.yml vendored Normal file
View file

@ -0,0 +1,267 @@
name: Build & Test
env:
CARGO_TERM_COLOR: always
RUSTFLAGS: "-D warnings"
CROSS_DEBUG: 1
on:
workflow_call:
inputs:
ref:
default: ${{ github.ref }}
type: string
run-tests:
default: true
type: boolean
workflow_dispatch:
inputs:
run-tests:
description: Run tests
default: true
type: boolean
rust-test-threads:
description: Number of Rust test threads
default: ""
type: string
jobs:
build:
name: ${{ matrix.platform }} (${{ matrix.target }}) (${{ matrix.os }})
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
platform:
- linux-arm64 #
- linux-arm #
- linux-armhf #
- linux-armv5te #
- linux-armv7l #
- linux-x64 #
- linux-x86 #
- linux-i586 #
- linux-mips #
- linux-mips64 #
- linux-mipsel #
- linux-mips64el #
- linux-powerpc #
- linux-powerpc64 #
- linux-powerpc64el #
# - linux-riscv64gc # #2712
- linux-s390x #
- linux-sparc64 #
- linux-thumbv7neon #
- windows-arm64 #
- windows-x64 # <-- No C library build - requires an additional adapted Makefile for `cl.exe` compiler
- windows-x86 # -- // --
- macos-arm64 # <-- MacOS M1/M2 - no tests, only CLI build to be published on release artifacts
- macos-x64 #
include:
# When adding a new `target`:
# 1. Define a new platform alias above
# 2. Add a new record to a matrix map in `cli/npm/install.js`
- { platform: linux-arm64 , target: aarch64-unknown-linux-gnu , os: ubuntu-latest , use-cross: true }
- { platform: linux-arm , target: arm-unknown-linux-gnueabi , os: ubuntu-latest , use-cross: true }
- { platform: linux-armhf , target: arm-unknown-linux-gnueabihf , os: ubuntu-latest , use-cross: true }
- { platform: linux-armv5te , target: armv5te-unknown-linux-gnueabi , os: ubuntu-latest , use-cross: true }
- { platform: linux-armv7l , target: armv7-unknown-linux-gnueabihf , os: ubuntu-latest , use-cross: true }
- { platform: linux-x64 , target: x86_64-unknown-linux-gnu , os: ubuntu-20.04 } #2272
- { platform: linux-x86 , target: i686-unknown-linux-gnu , os: ubuntu-latest , use-cross: true }
- { platform: linux-i586 , target: i586-unknown-linux-gnu , os: ubuntu-latest , use-cross: true }
- { platform: linux-mips , target: mips-unknown-linux-gnu , os: ubuntu-latest , use-cross: true }
- { platform: linux-mips64 , target: mips64-unknown-linux-gnuabi64 , os: ubuntu-latest , use-cross: true }
- { platform: linux-mipsel , target: mipsel-unknown-linux-gnu , os: ubuntu-latest , use-cross: true }
- { platform: linux-mips64el , target: mips64el-unknown-linux-gnuabi64 , os: ubuntu-latest , use-cross: true }
- { platform: linux-powerpc , target: powerpc-unknown-linux-gnu , os: ubuntu-latest , use-cross: true }
- { platform: linux-powerpc64 , target: powerpc64-unknown-linux-gnu , os: ubuntu-latest , use-cross: true }
- { platform: linux-powerpc64el , target: powerpc64le-unknown-linux-gnu , os: ubuntu-latest , use-cross: true }
# - { platform: linux-riscv64gc , target: riscv64gc-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } #2712
- { platform: linux-s390x , target: s390x-unknown-linux-gnu , os: ubuntu-latest , use-cross: true }
- { platform: linux-sparc64 , target: sparc64-unknown-linux-gnu , os: ubuntu-latest , use-cross: true }
- { platform: linux-thumbv7neon , target: thumbv7neon-unknown-linux-gnueabihf , os: ubuntu-latest , use-cross: true }
- { platform: windows-arm64 , target: aarch64-pc-windows-msvc , os: windows-latest }
- { platform: windows-x64 , target: x86_64-pc-windows-msvc , os: windows-latest }
- { platform: windows-x86 , target: i686-pc-windows-msvc , os: windows-latest }
- { platform: macos-arm64 , target: aarch64-apple-darwin , os: macos-latest }
- { platform: macos-x64 , target: x86_64-apple-darwin , os: macos-latest }
# Cross compilers for C library
- { platform: linux-arm64 , cc: aarch64-linux-gnu-gcc , ar: aarch64-linux-gnu-ar }
- { platform: linux-arm , cc: arm-linux-gnueabi-gcc , ar: arm-linux-gnueabi-ar }
- { platform: linux-armhf , cc: arm-unknown-linux-gnueabihf-gcc , ar: arm-unknown-linux-gnueabihf-ar }
- { platform: linux-armv5te , cc: arm-linux-gnueabi-gcc , ar: arm-linux-gnueabi-ar }
- { platform: linux-armv7l , cc: arm-linux-gnueabihf-gcc , ar: arm-linux-gnueabihf-ar }
- { platform: linux-x86 , cc: i686-linux-gnu-gcc , ar: i686-linux-gnu-ar }
- { platform: linux-i586 , cc: i686-linux-gnu-gcc , ar: i686-linux-gnu-ar }
- { platform: linux-mips , cc: mips-linux-gnu-gcc , ar: mips-linux-gnu-ar }
- { platform: linux-mips64 , cc: mips64-linux-gnuabi64-gcc , ar: mips64-linux-gnuabi64-ar }
- { platform: linux-mipsel , cc: mipsel-linux-gnu-gcc , ar: mipsel-linux-gnu-ar }
- { platform: linux-mips64el , cc: mips64el-linux-gnuabi64-gcc , ar: mips64el-linux-gnuabi64-ar }
- { platform: linux-powerpc , cc: powerpc-linux-gnu-gcc , ar: powerpc-linux-gnu-ar }
- { platform: linux-powerpc64 , cc: powerpc64-linux-gnu-gcc , ar: powerpc64-linux-gnu-ar }
- { platform: linux-powerpc64el , cc: powerpc64le-linux-gnu-gcc , ar: powerpc64le-linux-gnu-ar }
# - { platform: linux-riscv64gc , cc: riscv64-linux-gnu-gcc , ar: riscv64-linux-gnu-ar } #2712
- { platform: linux-s390x , cc: s390x-linux-gnu-gcc , ar: s390x-linux-gnu-ar }
- { platform: linux-sparc64 , cc: sparc64-linux-gnu-gcc , ar: sparc64-linux-gnu-ar }
- { platform: linux-thumbv7neon , cc: arm-linux-gnueabihf-gcc , ar: arm-linux-gnueabihf-ar }
# Rust toolchains
- { platform: linux-mips , rust-toolchain: 1.71.1 }
- { platform: linux-mips64 , rust-toolchain: 1.71.1 }
- { platform: linux-mipsel , rust-toolchain: 1.71.1 }
- { platform: linux-mips64el , rust-toolchain: 1.71.1 }
# See #2041 tree-sitter issue
- { platform: windows-x64 , rust-test-threads: 1 }
- { platform: windows-x86 , rust-test-threads: 1 }
# CLI only build
- { platform: windows-arm64 , cli-only: true }
- { platform: macos-arm64 , cli-only: true }
env:
BUILD_CMD: cargo
EMSCRIPTEN_VERSION: ""
EXE: ${{ contains(matrix.target, 'windows') && '.exe' || '' }}
defaults:
run:
shell: bash
steps:
- name: Checkout source code
uses: actions/checkout@v3
with:
ref: ${{ inputs.ref }}
- name: Read Emscripten version
run: |
echo "EMSCRIPTEN_VERSION=$(cat cli/loader/emscripten-version)" >> $GITHUB_ENV
- name: Install Emscripten
if: ${{ !matrix.cli-only && !matrix.use-cross }}
uses: mymindstorm/setup-emsdk@v12
with:
version: ${{ env.EMSCRIPTEN_VERSION }}
- name: Install Rust toolchain
uses: dtolnay/rust-toolchain@stable
with:
targets: ${{ matrix.target }}
toolchain: ${{ matrix.rust-toolchain || 'stable' }}
- name: Install cross
if: ${{ matrix.use-cross }}
uses: taiki-e/install-action@v2
with:
tool: cross
- name: Build custom cross image
if: ${{ matrix.use-cross && matrix.os == 'ubuntu-latest' }}
run: |
cd ..
target="${{ matrix.target }}"
image=ghcr.io/cross-rs/$target:custom
echo "CROSS_IMAGE=$image" >> $GITHUB_ENV
echo "[target.$target]" >> Cross.toml
echo "image = \"$image\"" >> Cross.toml
echo "CROSS_CONFIG=$PWD/Cross.toml" >> $GITHUB_ENV
echo "FROM ghcr.io/cross-rs/$target:edge" >> Dockerfile
echo "ENV DEBIAN_FRONTEND=noninteractive" >> Dockerfile
echo "RUN apt-get update && apt-get install -y nodejs" >> Dockerfile
docker build -t $image .
- name: Setup env extras
env:
RUST_TEST_THREADS: ${{ matrix.rust-test-threads || inputs.rust-test-threads || '' }}
USE_CROSS: ${{ matrix.use-cross }}
TARGET: ${{ matrix.target }}
CC: ${{ matrix.cc }}
AR: ${{ matrix.ar }}
IS_WINDOWS: ${{ contains(matrix.os, 'windows') }}
run: |
PATH="$PWD/.github/scripts:$PATH"
echo "$PWD/.github/scripts" >> $GITHUB_PATH
echo "TREE_SITTER=tree-sitter.sh" >> $GITHUB_ENV
echo "TARGET=$TARGET" >> $GITHUB_ENV
echo "ROOT=$PWD" >> $GITHUB_ENV
[ -n "$RUST_TEST_THREADS" ] && \
echo "RUST_TEST_THREADS=$RUST_TEST_THREADS" >> $GITHUB_ENV
[ -n "$CC" ] && echo "CC=$CC" >> $GITHUB_ENV
[ -n "$AR" ] && echo "AR=$AR" >> $GITHUB_ENV
[ "$IS_WINDOWS" = "false" ] && echo "CFLAGS=-Werror" >> $GITHUB_ENV
if [ "$USE_CROSS" == "true" ]; then
echo "BUILD_CMD=cross" >> $GITHUB_ENV
runner=$(BUILD_CMD=cross cross.sh bash -c "env | sed -nr '/^CARGO_TARGET_.*_RUNNER=/s///p'")
[ -n "$runner" ] && echo "CROSS_RUNNER=$runner" >> $GITHUB_ENV
fi
- name: Build C library
if: ${{ !contains(matrix.os, 'windows') }} # Requires an additional adapted Makefile for `cl.exe` compiler
run: make.sh -j
- name: Build wasm library
if: ${{ !matrix.cli-only && !matrix.use-cross }} # No sense to build on the same Github runner hosts many times
run: script/build-wasm
- name: Build CLI
run: $BUILD_CMD build --release --target=${{ matrix.target }}
- name: Info about CLI
if: ${{ startsWith(matrix.platform, 'linux') }}
run: |
min_glibc=$(objdump -p target/$TARGET/release/tree-sitter${{ env.EXE }} | sed -nr 's/.*(GLIBC_.+).*/\1/p' | sort -uV | tail -n1)
echo "🔗 Minimal **glibc** version required for CLI: ${min_glibc}">> $GITHUB_STEP_SUMMARY
- name: Fetch fixtures
if: ${{ inputs.run-tests && !matrix.cli-only }} # Don't fetch fixtures for only CLI building targets
run: script/fetch-fixtures
- name: Generate fixtures
if: ${{ inputs.run-tests && !matrix.cli-only }} # Can't natively run CLI on Github runner's host
run: script/generate-fixtures
- name: Generate WASM fixtures
if: ${{ inputs.run-tests && !matrix.cli-only && !matrix.use-cross }} # See comment for the "Build wasm library" step
run: script/generate-fixtures-wasm
- name: Run main tests
if: ${{ inputs.run-tests && !matrix.cli-only }} # Can't natively run CLI on Github runner's host
run: $BUILD_CMD test --target=${{ matrix.target }}
- name: Run wasm tests
if: ${{ inputs.run-tests && !matrix.cli-only && !matrix.use-cross }} # See comment for the "Build wasm library" step
run: script/test-wasm
- name: Run benchmarks
if: ${{ inputs.run-tests && !matrix.cli-only && !matrix.use-cross }} # Cross-compiled benchmarks make no sense
run: $BUILD_CMD bench benchmark -p tree-sitter-cli --target=${{ matrix.target }}
- name: Upload CLI artifact
uses: actions/upload-artifact@v3
with:
name: tree-sitter.${{ matrix.platform }}
path: target/${{ matrix.target }}/release/tree-sitter${{ env.EXE }}
if-no-files-found: error
retention-days: 7
- name: Upload WASM artifacts
if: ${{ matrix.platform == 'linux-x64' }}
uses: actions/upload-artifact@v3
with:
name: tree-sitter.wasm
path: |
lib/binding_web/tree-sitter.js
lib/binding_web/tree-sitter.wasm
if-no-files-found: error
retention-days: 7

View file

@ -1,149 +0,0 @@
name: CI
on:
push:
branches:
- master
tags:
- v*
pull_request:
branches:
- "**"
env:
CARGO_TERM_COLOR: always
CARGO_INCREMENTAL: 0
jobs:
unix-tests:
name: Unix tests
runs-on: ${{ matrix.os }}
strategy:
fail-fast: true
matrix:
os:
- macos-latest
- ubuntu-latest
steps:
- name: Checkout repo
uses: actions/checkout@v2
# Work around https://github.com/actions/cache/issues/403.
- name: Use GNU tar
if: matrix.os == 'macos-latest'
run: |
echo PATH="/usr/local/opt/gnu-tar/libexec/gnubin:$PATH" >> $GITHUB_ENV
- name: Read Emscripten version
run: |
printf 'EMSCRIPTEN_VERSION=%s\n' "$(cat cli/loader/emscripten-version)" >> $GITHUB_ENV
- name: Cache artifacts
id: cache
uses: actions/cache@v2
with:
path: |
~/.cargo/registry
~/.cargo/git
target
key: ${{ runner.os }}-cargo-${{ hashFiles('Cargo.lock') }}-emscripten-${{ env.EMSCRIPTEN_VERSION }}
- name: Install rust
if: steps.cache.outputs.cache-hit != 'true'
uses: actions-rs/toolchain@v1
with:
toolchain: stable
profile: minimal
- name: Check Rust code formatting
run: cargo fmt -- --check
- name: Install emscripten
uses: mymindstorm/setup-emsdk@v10
with:
version: ${{ env.EMSCRIPTEN_VERSION }}
- name: Build C library
run: make
- name: Build wasm library
run: script/build-wasm
- name: Build CLI
run: |
RUSTFLAGS="-D warnings"
cargo build --release
- name: Set up fixture parsers
run: |
script/fetch-fixtures
script/generate-fixtures
script/generate-fixtures-wasm
- name: Run main tests
run: cargo test
- name: Run wasm tests
run: script/test-wasm
- name: Run benchmarks
run: script/benchmark
- name: Compress CLI binary
if: startsWith(github.ref, 'refs/tags/v')
run: |
cp target/release/tree-sitter .
export platform=$(echo ${{ runner.os }} | awk '{print tolower($0)}')
gzip --suffix "-${platform}-x64.gz" tree-sitter
- name: Release
uses: softprops/action-gh-release@v1
if: startsWith(github.ref, 'refs/tags/v')
with:
draft: true
files: |
tree-sitter-*.gz
lib/binding_web/tree-sitter.js
lib/binding_web/tree-sitter.wasm
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
windows-tests:
name: Windows tests
runs-on: windows-latest
steps:
- name: Checkout repo
uses: actions/checkout@v2
- name: Cache artifacts
id: cache
uses: actions/cache@v2
with:
path: |
~/.cargo/registry
~/.cargo/git
target
key: ${{ runner.os }}-cargo-${{ hashFiles('Cargo.lock') }}
- name: Install rust
if: steps.cache.outputs.cache-hit != 'true'
uses: actions-rs/toolchain@v1
with:
toolchain: stable
profile: minimal
- name: Check Rust code formatting
run: cargo fmt -- --check
- name: Build CLI
run: |
$env:RUSTFLAGS="-D warnings"
cargo build --release
- name: Set up fixture parsers
run: |
script/fetch-fixtures.cmd
script/generate-fixtures.cmd
- name: Run main tests
run: script/test

31
.github/workflows/fast_checks.yml vendored Normal file
View file

@ -0,0 +1,31 @@
name: Fast checks to fail fast on any simple code issues
env:
CARGO_TERM_COLOR: always
RUSTFLAGS: "-D warnings"
on:
workflow_call:
jobs:
check_rust_formatting:
name: Check Rust formating
runs-on: ubuntu-latest
steps:
- name: Checkout source code
uses: actions/checkout@v3
- name: Run cargo fmt
run: cargo fmt -- --check
check_c_warnings:
name: Check C warnings
runs-on: ubuntu-latest
steps:
- name: Checkout source code
uses: actions/checkout@v3
- name: Make C library to check that it's able to compile without warnings
run: make -j CFLAGS="-Werror"

32
.github/workflows/full_rust_checks.yml vendored Normal file
View file

@ -0,0 +1,32 @@
name: Full Rust codebase checks
env:
CARGO_TERM_COLOR: always
RUSTFLAGS: "-D warnings"
on:
workflow_call:
jobs:
run:
name: Run checks
runs-on: ubuntu-latest
steps:
- name: Checkout source code
uses: actions/checkout@v3
- name: Install rust toolchain
uses: dtolnay/rust-toolchain@master
with:
toolchain: stable
components: clippy, rustfmt
- name: Run cargo fmt
run: cargo fmt -- --check
# - name: Run clippy
# run: cargo clippy --all-targets
- name: Run cargo check
run: cargo check --workspace --examples --tests --benches --bins

42
.github/workflows/msrv.yml vendored Normal file
View file

@ -0,0 +1,42 @@
name: Minimum supported rust version
env:
CARGO_TERM_COLOR: always
RUSTFLAGS: "-D warnings"
on:
workflow_call:
inputs:
package:
description: Target cargo package name
required: true
type: string
jobs:
run:
name: Run checks
runs-on: ubuntu-latest
steps:
- name: Checkout source code
uses: actions/checkout@v3
- name: Get the MSRV from the package metadata
id: msrv
run: cargo metadata --no-deps --format-version 1 | jq -r '"version=" + (.packages[] | select(.name == "${{ inputs.package }}").rust_version)' >> $GITHUB_OUTPUT
- name: Install rust toolchain (v${{ steps.msrv.outputs.version }})
uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ steps.msrv.outputs.version }}
components: clippy, rustfmt
- name: Run cargo fmt
run: cargo fmt -- --check
# - name: Run clippy (on minimum supported rust version to prevent warnings we can't fix)
# run: cargo clippy --all-targets
# - name: Run main tests
# run: cargo test

21
.github/workflows/publish.yml vendored Normal file
View file

@ -0,0 +1,21 @@
name: Publish to registries
on:
workflow_call:
jobs:
crates_io:
name: Publish to Crates.io
runs-on: ubuntu-latest
steps:
- name: Publish packages
run: |
echo "::warning::TODO: add a Crates.io publish logic"
npm:
name: Publish to npmjs.com
runs-on: ubuntu-latest
steps:
- name: Publish packages
run: |
echo "::warning::TODO: add a npmjs.com publish logic"

113
.github/workflows/release.yml vendored Normal file
View file

@ -0,0 +1,113 @@
name: Release
on:
workflow_call:
inputs:
ref:
default: ${{ github.ref }}
type: string
jobs:
permissions:
name: Check permissions
runs-on: ubuntu-latest
outputs:
release_allowed: >
${{
github.repository_owner == 'tree-sitter' &&
steps.maintainer.outputs.is_maintainer == 'true' &&
steps.local_branch.outputs.is_local == 'true'
}}
steps:
- name: Initated by a maintainer
id: maintainer
env:
GH_TOKEN: ${{ github.token }}
repo: ${{ github.repository }}
actor: ${{ github.actor }}
run: |
maintainer=$(
gh api "/repos/${repo}/collaborators" |
jq ".[] | {login, maintainer: .permissions | .maintain} | select(.login == \"${actor}\") | .maintainer"
);
if [ "$maintainer" == "true" ]; then
echo "@${actor} has maintainer level permissions :rocket:" >> $GITHUB_STEP_SUMMARY;
echo "is_maintainer=true" >> $GITHUB_OUTPUT
fi
- name: The ref branch is local
id: local_branch
env:
is_local: ${{ github.event.pull_request.head.repo.full_name == github.repository }}
run: |
echo "is_local=${is_local}" >> $GITHUB_OUTPUT
release:
name: Release
needs: permissions
if: needs.permissions.outputs.release_allowed
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Checkout source code
uses: actions/checkout@v3
with:
ref: ${{ inputs.ref }}
- name: Download build artifacts
uses: actions/download-artifact@v3
with:
path: artifacts
- name: Display structure of downloaded files
run: ls -lR
working-directory: artifacts
- name: Prepare release artifacts
run: |
mkdir -p target
mv artifacts/tree-sitter.wasm/* target/
rm -r artifacts/tree-sitter.wasm
for platform in $(cd artifacts; ls | sed 's/^tree-sitter\.//'); do
exe=$(ls artifacts/tree-sitter.$platform/tree-sitter*)
gzip --stdout --name $exe > target/tree-sitter-$platform.gz
done
rm -rf artifacts
ls -l target/
- name: Get tag name from a release/v* branch name
id: tag_name
env:
tag: ${{ github.head_ref }}
run: echo "tag=${tag#release/}" >> $GITHUB_OUTPUT
- name: Add a release tag
env:
ref: ${{ inputs.ref }}
tag: ${{ steps.tag_name.outputs.tag }}
message: "Release ${{ steps.tag_name.outputs.tag }}"
run: |
git config user.name "$(git log -1 --pretty='%cn')"
git config user.email "$(git log -1 --pretty='%ce')"
git tag -a "$tag" HEAD -m "$message"
git push origin "$tag"
- name: Create release
uses: softprops/action-gh-release@v1
with:
name: ${{ steps.tag_name.outputs.tag }}
tag_name: ${{ steps.tag_name.outputs.tag }}
fail_on_unmatched_files: true
files: |
target/tree-sitter-*.gz
target/tree-sitter.wasm
target/tree-sitter.js
- name: Merge release PR
env:
GH_TOKEN: ${{ github.token }}
run: |
gh pr merge ${{ github.event.pull_request.html_url }} --match-head-commit $(git rev-parse HEAD) --merge --delete-branch

50
.github/workflows/sanitize.yml vendored Normal file
View file

@ -0,0 +1,50 @@
name: Sanitize
env:
CARGO_TERM_COLOR: always
RUSTFLAGS: "-D warnings"
on:
workflow_call:
jobs:
check_undefined_behaviour:
name: Sanitizer checks
runs-on: ubuntu-latest
env:
TREE_SITTER: ${{ github.workspace }}/target/release/tree-sitter
steps:
- name: Checkout source code
uses: actions/checkout@v3
- name: Install UBSAN library
run: sudo apt-get update -y && sudo apt-get install -y libubsan1
- name: Install Rust toolchain
uses: dtolnay/rust-toolchain@stable
- name: Build CLI
run: cargo build --release
- name: Fetch fixtures
run: script/fetch-fixtures
- name: Generate fixtures
run: script/generate-fixtures
- name: Run main tests with undefined behaviour sanitizer (UBSAN)
env:
UBSAN_OPTIONS: halt_on_error=1
CFLAGS: -fsanitize=undefined
RUSTFLAGS: ${{ env.RUSTFLAGS }} -lubsan
run: cargo test -- --test-threads 1
- name: Run main tests with address sanitizer (ASAN)
env:
ASAN_OPTIONS: halt_on_error=1
CFLAGS: -fsanitize=address
RUSTFLAGS: ${{ env.RUSTFLAGS }} -Zsanitizer=address --cfg=sanitizing
run: |
rustup install nightly
rustup component add rust-src --toolchain nightly-x86_64-unknown-linux-gnu
cargo +nightly test -Z build-std --target x86_64-unknown-linux-gnu -- --test-threads 1

5
.gitignore vendored
View file

@ -7,6 +7,7 @@ log*.html
fuzz-results
/tree-sitter.pc
test/fixtures/grammars/*
!test/fixtures/grammars/.gitkeep
package-lock.json
@ -24,4 +25,6 @@ docs/assets/js/tree-sitter.js
*.obj
*.exp
*.lib
*.wasm
*.wasm
.swiftpm
zig-*

1074
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,4 +1,21 @@
[workspace]
default-members = ["cli"]
members = ["cli", "lib"]
resolver = "2"
[workspace.package]
rust-version = "1.65"
[profile.release]
strip = true # Automatically strip symbols from the binary.
lto = true # Link-time optimization.
opt-level = 3 # Optimization level 3.
codegen-units = 1 # Maximum size reduction optimizations.
[profile.size]
inherits = "release"
opt-level = "s" # Optimize for size.
[profile.profile]
inherits = "release"
strip = false

View file

@ -1,6 +1,6 @@
The MIT License (MIT)
Copyright (c) 2018-2021 Max Brunsfeld
Copyright (c) 2018-2023 Max Brunsfeld
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View file

@ -1,4 +1,4 @@
VERSION := 0.6.3
VERSION := 0.20.10
# install directory layout
PREFIX ?= /usr/local
@ -18,8 +18,8 @@ endif
OBJ := $(SRC:.c=.o)
# define default flags, and override to append mandatory flags
CFLAGS ?= -O3 -Wall -Wextra -Werror
override CFLAGS += -std=gnu99 -fPIC -Ilib/src -Ilib/src/wasm -Ilib/include
override CFLAGS := -O3 -std=gnu99 -fPIC -fvisibility=hidden -Wall -Wextra -Wshadow $(CFLAGS)
override CFLAGS += -Ilib/src -Ilib/src/wasm -Ilib/include
# ABI versioning
SONAME_MAJOR := 0
@ -50,20 +50,27 @@ libtree-sitter.$(SOEXTVER): $(OBJ)
$(CC) $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@
ln -sf $@ libtree-sitter.$(SOEXT)
ln -sf $@ libtree-sitter.$(SOEXTVER_MAJOR)
ifneq ($(STRIP),)
$(STRIP) $@
endif
install: all
install -d '$(DESTDIR)$(LIBDIR)'
install -m755 libtree-sitter.a '$(DESTDIR)$(LIBDIR)'/libtree-sitter.a
install -m755 libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER)
ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER_MAJOR)
ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXT)
install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter
install -m644 lib/include/tree_sitter/*.h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/
install -d '$(DESTDIR)$(PCLIBDIR)'
sed -e 's|@LIBDIR@|$(LIBDIR)|;s|@INCLUDEDIR@|$(INCLUDEDIR)|;s|@VERSION@|$(VERSION)|' \
-e 's|=$(PREFIX)|=$${prefix}|' \
-e 's|@PREFIX@|$(PREFIX)|' \
tree-sitter.pc.in > '$(DESTDIR)$(PCLIBDIR)'/tree-sitter.pc
tree-sitter.pc.in > tree-sitter.pc
install -d '$(DESTDIR)$(LIBDIR)'
install -m644 libtree-sitter.a '$(DESTDIR)$(LIBDIR)'/
install -m755 libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/
ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER_MAJOR)
ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXT)
install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter
install -m644 lib/include/tree_sitter/api.h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/
install -d '$(DESTDIR)$(PCLIBDIR)'
install -m644 tree-sitter.pc '$(DESTDIR)$(PCLIBDIR)'/
clean:
rm -f lib/src/*.o libtree-sitter.a libtree-sitter.$(SOEXT) libtree-sitter.$(SOEXTVER_MAJOR) libtree-sitter.$(SOEXTVER)

39
Package.swift Normal file
View file

@ -0,0 +1,39 @@
// swift-tools-version: 5.8
// The swift-tools-version declares the minimum version of Swift required to build this package.
import PackageDescription
let package = Package(
name: "TreeSitter",
products: [
// Products define the executables and libraries a package produces, and make them visible to other packages.
.library(
name: "TreeSitter",
targets: ["TreeSitter"]),
],
targets: [
.target(name: "TreeSitter",
path: "lib",
exclude: [
"binding_rust",
"binding_web",
"Cargo.toml",
"README.md",
"src/unicode/README.md",
"src/unicode/LICENSE",
"src/unicode/ICU_SHA",
"src/get_changed_ranges.c",
"src/tree_cursor.c",
"src/stack.c",
"src/node.c",
"src/lexer.c",
"src/parser.c",
"src/language.c",
"src/alloc.c",
"src/subtree.c",
"src/tree.c",
"src/query.c"
],
sources: ["src/lib.c"]),
]
)

View file

@ -1,9 +1,11 @@
# tree-sitter
[![Build Status](https://github.com/tree-sitter/tree-sitter/workflows/CI/badge.svg)](https://github.com/tree-sitter/tree-sitter/actions)
[![Build status](https://ci.appveyor.com/api/projects/status/vtmbd6i92e97l55w/branch/master?svg=true)](https://ci.appveyor.com/project/maxbrunsfeld/tree-sitter/branch/master)
[![CICD badge]][CICD]
[![DOI](https://zenodo.org/badge/14164618.svg)](https://zenodo.org/badge/latestdoi/14164618)
[CICD badge]: https://github.com/tree-sitter/tree-sitter/actions/workflows/CICD.yml/badge.svg
[CICD]: https://github.com/tree-sitter/tree-sitter/actions/workflows/CICD.yml
Tree-sitter is a parser generator tool and an incremental parsing library. It can build a concrete syntax tree for a source file and efficiently update the syntax tree as the source file is edited. Tree-sitter aims to be:
- **General** enough to parse any programming language

16
build.zig Normal file
View file

@ -0,0 +1,16 @@
const std = @import("std");
pub fn build(b: *std.Build) void {
var lib = b.addStaticLibrary(.{
.name = "tree-sitter",
.target = b.standardTargetOptions(.{}),
.optimize = b.standardOptimizeOption(.{}),
});
lib.linkLibC();
lib.addCSourceFile(.{ .file = .{ .path = "lib/src/lib.c" }, .flags = &.{} });
lib.addIncludePath(.{ .path = "lib/include" });
lib.addIncludePath(.{ .path = "lib/src" });
b.installArtifact(lib);
}

View file

@ -1,47 +1,53 @@
[package]
name = "tree-sitter-cli"
description = "CLI tool for developing, testing, and using Tree-sitter parsers"
version = "0.20.7"
version = "0.20.8"
authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
edition = "2018"
edition = "2021"
license = "MIT"
readme = "README.md"
keywords = ["incremental", "parsing"]
categories = ["command-line-utilities", "parsing"]
repository = "https://github.com/tree-sitter/tree-sitter"
rust-version.workspace = true
[[bin]]
name = "tree-sitter"
path = "src/main.rs"
doc = false
[[bench]]
name = "benchmark"
harness = false
[dependencies]
ansi_term = "0.12"
anyhow = "1.0"
atty = "0.2"
ansi_term = "0.12.1"
anyhow = "1.0.72"
atty = "0.2.14"
clap = "2.32"
difference = "2.0"
dirs = "3.0"
glob = "0.3.0"
html-escape = "0.2.6"
indexmap = "1"
lazy_static = "1.2.0"
regex = "1"
regex-syntax = "0.6.4"
rustc-hash = "1"
semver = "1.0"
serde = { version = "1.0.130", features = ["derive"] }
ctrlc = { version = "3.4.0", features = ["termination"] }
difference = "2.0.0"
dirs = "5.0.1"
glob = "0.3.1"
html-escape = "0.2.13"
indexmap = "2.0.0"
lazy_static = "1.4.0"
memchr = "2.6.3"
path-slash = "0.2.1"
regex = "1.9.1"
regex-syntax = "0.7.4"
rustc-hash = "1.1.0"
semver = "1.0.18"
# Due to https://github.com/serde-rs/serde/issues/2538
serde = { version = "1.0, < 1.0.172", features = ["derive"] }
smallbitvec = "2.5.1"
tiny_http = "0.8"
walkdir = "2.3"
webbrowser = "0.5.1"
which = "4.1.0"
tiny_http = "0.12.0"
walkdir = "2.3.3"
webbrowser = "0.8.10"
which = "4.4.0"
[dependencies.tree-sitter]
version = "0.20.3"
version = "0.20.10"
path = "../lib"
features = ["wasm"]
@ -67,14 +73,18 @@ version = "1.0"
features = ["preserve_order"]
[dependencies.log]
version = "0.4.6"
version = "0.4.19"
features = ["std"]
[dev-dependencies]
rand = "0.8"
tempfile = "3"
pretty_assertions = "0.7.2"
ctor = "0.1"
tree_sitter_proc_macro = { path = "src/tests/proc_macro", package = "tree-sitter-tests-proc-macro" }
rand = "0.8.5"
tempfile = "3.6.0"
pretty_assertions = "1.4.0"
ctor = "0.2.4"
unindent = "0.2.2"
indoc = "2.0.3"
[build-dependencies]
toml = "0.5"
toml = "0.7.6"

View file

@ -1,9 +1,11 @@
Tree-sitter CLI
===============
# Tree-sitter CLI
[![Build Status](https://travis-ci.org/tree-sitter/tree-sitter.svg?branch=master)](https://travis-ci.org/tree-sitter/tree-sitter)
[![Build status](https://ci.appveyor.com/api/projects/status/vtmbd6i92e97l55w/branch/master?svg=true)](https://ci.appveyor.com/project/maxbrunsfeld/tree-sitter/branch/master)
[![Crates.io](https://img.shields.io/crates/v/tree-sitter-cli.svg)](https://crates.io/crates/tree-sitter-cli)
[![crates.io badge]][crates.io] [![npmjs.com badge]][npmjs.com]
[crates.io]: https://crates.io/crates/tree-sitter-cli
[crates.io badge]: https://img.shields.io/crates/v/tree-sitter-cli.svg?color=%23B48723
[npmjs.com]: https://www.npmjs.org/package/tree-sitter-cli
[npmjs.com badge]: https://img.shields.io/npm/v/tree-sitter-cli.svg?color=%23BF4A4A
The Tree-sitter CLI allows you to develop, test, and use Tree-sitter grammars from the command line. It works on MacOS, Linux, and Windows.
@ -21,7 +23,7 @@ or with `npm`:
npm install tree-sitter-cli
```
You can also download a pre-built binary for your platform from [the releases page](https://github.com/tree-sitter/tree-sitter/releases/latest).
You can also download a pre-built binary for your platform from [the releases page].
### Dependencies
@ -32,8 +34,11 @@ The `tree-sitter` binary itself has no dependencies, but specific commands have
### Commands
* `generate` - The `tree-sitter generate` command will generate a Tree-sitter parser based on the grammar in the current working directory. See [the documentation](http://tree-sitter.github.io/tree-sitter/creating-parsers) for more information.
* `generate` - The `tree-sitter generate` command will generate a Tree-sitter parser based on the grammar in the current working directory. See [the documentation] for more information.
* `test` - The `tree-sitter test` command will run the unit tests for the Tree-sitter parser in the current working directory. See [the documentation](http://tree-sitter.github.io/tree-sitter/creating-parsers) for more information.
* `test` - The `tree-sitter test` command will run the unit tests for the Tree-sitter parser in the current working directory. See [the documentation] for more information.
* `parse` - The `tree-sitter parse` command will parse a file (or list of files) using Tree-sitter parsers.
[the documentation]: https://tree-sitter.github.io/tree-sitter/creating-parsers
[the releases page]: https://github.com/tree-sitter/tree-sitter/releases/latest

View file

@ -104,6 +104,7 @@ fn main() {
parse(&path, max_path_length, |source| {
Query::new(language, str::from_utf8(source).unwrap())
.with_context(|| format!("Query file path: {path:?}"))
.expect("Failed to parse query");
});
}

View file

@ -1,3 +1,4 @@
use std::ffi::OsStr;
use std::path::{Path, PathBuf};
use std::{env, fs};
@ -60,7 +61,39 @@ fn read_git_sha() -> Option<String> {
// If we're on a branch, read the SHA from the ref file.
if head_content.starts_with("ref: ") {
head_content.replace_range(0.."ref: ".len(), "");
let ref_filename = git_dir_path.join(&head_content);
let ref_filename = {
// Go to real non-worktree gitdir
let git_dir_path = git_dir_path
.parent()
.map(|p| {
p.file_name()
.map(|n| n == OsStr::new("worktrees"))
.and_then(|x| x.then(|| p.parent()))
})
.flatten()
.flatten()
.unwrap_or(&git_dir_path);
let file = git_dir_path.join(&head_content);
if file.is_file() {
file
} else {
let packed_refs = git_dir_path.join("packed-refs");
if let Ok(packed_refs_content) = fs::read_to_string(&packed_refs) {
for line in packed_refs_content.lines() {
if let Some((hash, r#ref)) = line.split_once(' ') {
if r#ref == head_content {
if let Some(path) = packed_refs.to_str() {
println!("cargo:rerun-if-changed={}", path);
}
return Some(hash.to_string());
}
}
}
}
return None;
}
};
if let Some(path) = ref_filename.to_str() {
println!("cargo:rerun-if-changed={}", path);
}

View file

@ -9,12 +9,14 @@ readme = "README.md"
keywords = ["incremental", "parsing"]
categories = ["command-line-utilities", "parsing"]
repository = "https://github.com/tree-sitter/tree-sitter"
rust-version.workspace = true
[dependencies]
anyhow = "1.0"
dirs = "3.0"
serde = { version = "1.0.130", features = ["derive"] }
# Due to https://github.com/serde-rs/serde/issues/2538
serde = { version = "1.0, < 1.0.172", features = ["derive"] }
[dependencies.serde_json]
version = "1.0.45"
version = "1.0"
features = ["preserve_order"]

View file

@ -1,5 +1,7 @@
# `tree-sitter-config`
# Tree-sitter Config
Manages Tree-sitter's configuration file.
You can use a configuration file to control the behavior of the `tree-sitter`
command-line program. This crate implements the logic for finding and the
command-line program. This crate implements the logic for finding and the
parsing the contents of the configuration file.

View file

@ -1,4 +1,4 @@
//! Manages tree-sitter's configuration file.
#![doc = include_str!("../README.md")]
use anyhow::{anyhow, Context, Result};
use serde::{Deserialize, Serialize};
@ -25,6 +25,9 @@ impl Config {
if let Ok(path) = env::var("TREE_SITTER_DIR") {
let mut path = PathBuf::from(path);
path.push("config.json");
if !path.exists() {
return Ok(None);
}
if path.is_file() {
return Ok(Some(path));
}
@ -37,7 +40,8 @@ impl Config {
let legacy_path = dirs::home_dir()
.ok_or(anyhow!("Cannot determine home directory"))?
.join(".tree-sitter/config.json");
.join(".tree-sitter")
.join("config.json");
if legacy_path.is_file() {
return Ok(Some(legacy_path));
}
@ -48,7 +52,8 @@ impl Config {
fn xdg_config_file() -> Result<PathBuf> {
let xdg_path = dirs::config_dir()
.ok_or(anyhow!("Cannot determine config directory"))?
.join("tree-sitter/config.json");
.join("tree-sitter")
.join("config.json");
Ok(xdg_path)
}
@ -79,7 +84,13 @@ impl Config {
///
/// (Note that this is typically only done by the `tree-sitter init-config` command.)
pub fn initial() -> Result<Config> {
let location = Self::xdg_config_file()?;
let location = if let Ok(path) = env::var("TREE_SITTER_DIR") {
let mut path = PathBuf::from(path);
path.push("config.json");
path
} else {
Self::xdg_config_file()?
};
let config = serde_json::json!({});
Ok(Config { location, config })
}

View file

@ -9,6 +9,7 @@ readme = "README.md"
keywords = ["incremental", "parsing"]
categories = ["command-line-utilities", "parsing"]
repository = "https://github.com/tree-sitter/tree-sitter"
rust-version.workspace = true
[features]
wasm = ["tree-sitter/wasm"]
@ -20,7 +21,8 @@ dirs = "3.0"
libloading = "0.7"
once_cell = "1.7"
regex = "1"
serde = { version = "1.0.130", features = ["derive"] }
# Due to https://github.com/serde-rs/serde/issues/2538
serde = { version = "1.0, < 1.0.172", features = ["derive"] }
which = "4.1.0"
[dependencies.serde_json]

View file

@ -1,6 +1,6 @@
# `tree-sitter-loader`
# Tree-sitter Loader
The `tree-sitter` command-line program will dynamically find and build grammars
at runtime, if you have cloned the grammars' repositories to your local
filesystem. This helper crate implements that logic, so that you can use it in
filesystem. This helper crate implements that logic, so that you can use it in
your own program analysis tools, as well.

View file

@ -1 +1 @@
3.1.25
3.1.37

View file

@ -1,3 +1,5 @@
#![doc = include_str!("../README.md")]
use anyhow::{anyhow, Context, Error, Result};
use libloading::{Library, Symbol};
use once_cell::unsync::OnceCell;
@ -11,7 +13,7 @@ use std::path::{Path, PathBuf};
use std::process::Command;
use std::sync::Mutex;
use std::time::SystemTime;
use std::{fs, mem};
use std::{env, fs, mem};
use tree_sitter::{Language, QueryError, QueryErrorKind};
use tree_sitter_highlight::HighlightConfiguration;
use tree_sitter_tags::{Error as TagsError, TagsConfiguration};
@ -72,12 +74,12 @@ impl Config {
}
#[cfg(unix)]
const DYLIB_EXTENSION: &'static str = "so";
const DYLIB_EXTENSION: &str = "so";
#[cfg(windows)]
const DYLIB_EXTENSION: &'static str = "dll";
const BUILD_TARGET: &'static str = env!("BUILD_TARGET");
const BUILD_TARGET: &str = env!("BUILD_TARGET");
pub struct LanguageConfiguration<'a> {
pub scope: Option<String>,
@ -90,6 +92,7 @@ pub struct LanguageConfiguration<'a> {
pub injections_filenames: Option<Vec<String>>,
pub locals_filenames: Option<Vec<String>>,
pub tags_filenames: Option<Vec<String>>,
pub language_name: String,
language_id: usize,
highlight_config: OnceCell<Option<HighlightConfiguration>>,
tags_config: OnceCell<Option<TagsConfiguration>>,
@ -102,6 +105,7 @@ pub struct Loader {
languages_by_id: Vec<(PathBuf, OnceCell<Language>)>,
language_configurations: Vec<LanguageConfiguration<'static>>,
language_configuration_ids_by_file_type: HashMap<String, Vec<usize>>,
language_configuration_in_current_path: Option<usize>,
highlight_names: Box<Mutex<Vec<String>>>,
use_all_highlight_names: bool,
debug_build: bool,
@ -115,9 +119,13 @@ unsafe impl Sync for Loader {}
impl Loader {
pub fn new() -> Result<Self> {
let parser_lib_path = dirs::cache_dir()
.ok_or(anyhow!("Cannot determine cache directory"))?
.join("tree-sitter/lib");
let parser_lib_path = match env::var("TREE_SITTER_LIBDIR") {
Ok(path) => PathBuf::from(path),
_ => dirs::cache_dir()
.ok_or(anyhow!("Cannot determine cache directory"))?
.join("tree-sitter")
.join("lib"),
};
Ok(Self::with_parser_lib_path(parser_lib_path))
}
@ -127,6 +135,7 @@ impl Loader {
languages_by_id: Vec::new(),
language_configurations: Vec::new(),
language_configuration_ids_by_file_type: HashMap::new(),
language_configuration_in_current_path: None,
highlight_names: Box::new(Mutex::new(Vec::new())),
use_all_highlight_names: true,
debug_build: false,
@ -136,7 +145,7 @@ impl Loader {
}
}
pub fn configure_highlights(&mut self, names: &Vec<String>) {
pub fn configure_highlights(&mut self, names: &[String]) {
self.use_all_highlight_names = false;
let mut highlights = self.highlight_names.lock().unwrap();
highlights.clear();
@ -152,8 +161,7 @@ impl Loader {
eprintln!("Warning: You have not configured any parser directories!");
eprintln!("Please run `tree-sitter init-config` and edit the resulting");
eprintln!("configuration file to indicate where we should look for");
eprintln!("language grammars.");
eprintln!("");
eprintln!("language grammars.\n");
}
for parser_container_dir in &config.parser_directories {
if let Ok(entries) = fs::read_dir(parser_container_dir) {
@ -163,6 +171,7 @@ impl Loader {
if parser_dir_name.starts_with("tree-sitter-") {
self.find_language_configurations_at_path(
&parser_container_dir.join(parser_dir_name),
false,
)
.ok();
}
@ -174,7 +183,7 @@ impl Loader {
}
pub fn languages_at_path(&mut self, path: &Path) -> Result<Vec<Language>> {
if let Ok(configurations) = self.find_language_configurations_at_path(path) {
if let Ok(configurations) = self.find_language_configurations_at_path(path, true) {
let mut language_ids = configurations
.iter()
.map(|c| c.language_id)
@ -365,7 +374,7 @@ impl Loader {
library_path.set_extension(DYLIB_EXTENSION);
}
let recompile = needs_recompile(&library_path, &parser_path, &scanner_path)
let recompile = needs_recompile(&library_path, &parser_path, scanner_path.as_deref())
.with_context(|| "Failed to compare source and binary timestamps")?;
if let Some(wasm_store) = self.wasm_store.lock().unwrap().as_mut() {
@ -411,7 +420,7 @@ impl Loader {
header_path: &Path,
parser_path: &Path,
scanner_path: &Option<PathBuf>,
output_path: &PathBuf,
library_path: &PathBuf,
) -> Result<(), Error> {
let mut config = cc::Build::new();
config
@ -419,13 +428,15 @@ impl Loader {
.opt_level(2)
.cargo_metadata(false)
.target(BUILD_TARGET)
.host(BUILD_TARGET);
.host(BUILD_TARGET)
.flag_if_supported("-Werror=implicit-function-declaration");
let compiler = config.get_compiler();
let mut command = Command::new(compiler.path());
for (key, value) in compiler.env() {
command.env(key, value);
}
if cfg!(windows) {
if compiler.is_like_msvc() {
command.args(&["/nologo", "/LD", "/I"]).arg(header_path);
if self.debug_build {
command.arg("/Od");
@ -438,17 +449,20 @@ impl Loader {
}
command
.arg("/link")
.arg(format!("/out:{}", output_path.to_str().unwrap()));
.arg(format!("/out:{}", library_path.to_str().unwrap()));
} else {
command
.arg("-shared")
.arg("-fPIC")
.arg("-fno-exceptions")
.arg("-g")
.arg("-I")
.arg(header_path)
.arg("-o")
.arg(output_path);
.arg(&library_path);
if !cfg!(windows) {
command.arg("-fPIC");
}
if self.debug_build {
command.arg("-O0");
@ -469,6 +483,7 @@ impl Loader {
}
command.arg("-xc").arg(parser_path);
}
let output = command
.output()
.with_context(|| "Failed to execute C compiler")?;
@ -479,6 +494,37 @@ impl Loader {
String::from_utf8_lossy(&output.stderr)
));
}
#[cfg(any(target_os = "macos", target_os = "linux"))]
if scanner_path.is_some() {
let command = Command::new("nm")
.arg("-W")
.arg("-U")
.arg(&library_path)
.output();
if let Ok(output) = command {
if output.status.success() {
let mut found_non_static = false;
for line in String::from_utf8_lossy(&output.stdout).lines() {
if line.contains(" T ") && !line.contains("tree_sitter_") {
if let Some(function_name) =
line.split_whitespace().collect::<Vec<_>>().get(2)
{
if !found_non_static {
found_non_static = true;
eprintln!("Warning: Found non-static non-tree-sitter functions in external scannner");
}
eprintln!(" `{function_name}`");
}
}
}
if found_non_static {
eprintln!("Consider making these functions static, they can cause conflicts when another tree-sitter project uses the same function name");
}
}
}
}
Ok(())
}
@ -581,6 +627,7 @@ impl Loader {
pub fn highlight_config_for_injection_string<'a>(
&'a self,
string: &str,
apply_all_captures: bool,
) -> Option<&'a HighlightConfiguration> {
match self.language_configuration_for_injection_string(string) {
Err(e) => {
@ -591,38 +638,36 @@ impl Loader {
None
}
Ok(None) => None,
Ok(Some((language, configuration))) => match configuration.highlight_config(language) {
Err(e) => {
eprintln!(
"Failed to load property sheet for injection string '{}': {}",
string, e
);
None
Ok(Some((language, configuration))) => {
match configuration.highlight_config(language, apply_all_captures, None) {
Err(e) => {
eprintln!(
"Failed to load property sheet for injection string '{}': {}",
string, e
);
None
}
Ok(None) => None,
Ok(Some(config)) => Some(config),
}
Ok(None) => None,
Ok(Some(config)) => Some(config),
},
}
}
}
pub fn find_language_configurations_at_path<'a>(
&'a mut self,
pub fn find_language_configurations_at_path(
&mut self,
parser_path: &Path,
set_current_path_config: bool,
) -> Result<&[LanguageConfiguration]> {
#[derive(Deserialize)]
#[derive(Default, Deserialize)]
#[serde(untagged)]
enum PathsJSON {
#[default]
Empty,
Single(String),
Multiple(Vec<String>),
}
impl Default for PathsJSON {
fn default() -> Self {
PathsJSON::Empty
}
}
impl PathsJSON {
fn into_vec(self) -> Option<Vec<String>> {
match self {
@ -663,6 +708,11 @@ impl Loader {
tree_sitter: Vec<LanguageConfigurationJSON>,
}
#[derive(Deserialize)]
struct GrammarJSON {
name: String,
}
let initial_language_configuration_count = self.language_configurations.len();
if let Ok(package_json_contents) = fs::read_to_string(&parser_path.join("package.json")) {
@ -674,6 +724,13 @@ impl Loader {
// the package.json, but defaults to the directory containing the package.json.
let language_path = parser_path.join(config_json.path);
let grammar_path = language_path.join("src").join("grammar.json");
let mut grammar_file = fs::File::open(grammar_path)
.with_context(|| "Failed to read grammar.json")?;
let grammar_json: GrammarJSON =
serde_json::from_reader(BufReader::new(&mut grammar_file))
.with_context(|| "Failed to parse grammar.json")?;
// Determine if a previous language configuration in this package.json file
// already uses the same language.
let mut language_id = None;
@ -693,6 +750,7 @@ impl Loader {
let configuration = LanguageConfiguration {
root_path: parser_path.to_path_buf(),
language_name: grammar_json.name.clone(),
scope: config_json.scope,
language_id,
file_types: config_json.file_types.unwrap_or(Vec::new()),
@ -705,19 +763,26 @@ impl Loader {
highlights_filenames: config_json.highlights.into_vec(),
highlight_config: OnceCell::new(),
tags_config: OnceCell::new(),
highlight_names: &*self.highlight_names,
highlight_names: &self.highlight_names,
use_all_highlight_names: self.use_all_highlight_names,
};
for file_type in &configuration.file_types {
self.language_configuration_ids_by_file_type
.entry(file_type.to_string())
.or_insert(Vec::new())
.or_default()
.push(self.language_configurations.len());
}
self.language_configurations
.push(unsafe { mem::transmute(configuration) });
if set_current_path_config
&& self.language_configuration_in_current_path.is_none()
{
self.language_configuration_in_current_path =
Some(self.language_configurations.len() - 1);
}
}
}
}
@ -725,8 +790,15 @@ impl Loader {
if self.language_configurations.len() == initial_language_configuration_count
&& parser_path.join("src").join("grammar.json").exists()
{
let grammar_path = parser_path.join("src").join("grammar.json");
let mut grammar_file =
fs::File::open(grammar_path).with_context(|| "Failed to read grammar.json")?;
let grammar_json: GrammarJSON =
serde_json::from_reader(BufReader::new(&mut grammar_file))
.with_context(|| "Failed to parse grammar.json")?;
let configuration = LanguageConfiguration {
root_path: parser_path.to_owned(),
language_name: grammar_json.name,
language_id: self.languages_by_id.len(),
file_types: Vec::new(),
scope: None,
@ -739,7 +811,7 @@ impl Loader {
tags_filenames: None,
highlight_config: OnceCell::new(),
tags_config: OnceCell::new(),
highlight_names: &*self.highlight_names,
highlight_names: &self.highlight_names,
use_all_highlight_names: self.use_all_highlight_names,
};
self.language_configurations
@ -764,11 +836,11 @@ impl Loader {
if let Some(scope) = scope {
if let Some(config) = self
.language_configuration_for_scope(scope)
.with_context(|| format!("Failed to load language for scope '{}'", scope))?
.with_context(|| format!("Failed to load language for scope '{scope}'"))?
{
Ok(config.0)
} else {
return Err(anyhow!("Unknown scope '{}'", scope));
Err(anyhow!("Unknown scope '{scope}'"))
}
} else if let Some((lang, _)) = self
.language_configuration_for_file_name(path)
@ -780,8 +852,10 @@ impl Loader {
})?
{
Ok(lang)
} else if let Some(id) = self.language_configuration_in_current_path {
Ok(self.language_for_id(self.language_configurations[id].language_id)?)
} else if let Some(lang) = self
.languages_at_path(&current_dir)
.languages_at_path(current_dir)
.with_context(|| "Failed to load language in current directory")?
.first()
.cloned()
@ -803,25 +877,76 @@ impl Loader {
}
impl<'a> LanguageConfiguration<'a> {
pub fn highlight_config(&self, language: Language) -> Result<Option<&HighlightConfiguration>> {
pub fn highlight_config(
&self,
language: Language,
apply_all_captures: bool,
paths: Option<&[String]>,
) -> Result<Option<&HighlightConfiguration>> {
let (highlights_filenames, injections_filenames, locals_filenames) = match paths {
Some(paths) => (
Some(
paths
.iter()
.filter(|p| p.ends_with("highlights.scm"))
.cloned()
.collect::<Vec<_>>(),
),
Some(
paths
.iter()
.filter(|p| p.ends_with("tags.scm"))
.cloned()
.collect::<Vec<_>>(),
),
Some(
paths
.iter()
.filter(|p| p.ends_with("locals.scm"))
.cloned()
.collect::<Vec<_>>(),
),
),
None => (None, None, None),
};
return self
.highlight_config
.get_or_try_init(|| {
let (highlights_query, highlight_ranges) =
self.read_queries(&self.highlights_filenames, "highlights.scm")?;
let (injections_query, injection_ranges) =
self.read_queries(&self.injections_filenames, "injections.scm")?;
let (locals_query, locals_ranges) =
self.read_queries(&self.locals_filenames, "locals.scm")?;
let (highlights_query, highlight_ranges) = self.read_queries(
if highlights_filenames.is_some() {
highlights_filenames.as_deref()
} else {
self.highlights_filenames.as_deref()
},
"highlights.scm",
)?;
let (injections_query, injection_ranges) = self.read_queries(
if injections_filenames.is_some() {
injections_filenames.as_deref()
} else {
self.injections_filenames.as_deref()
},
"injections.scm",
)?;
let (locals_query, locals_ranges) = self.read_queries(
if locals_filenames.is_some() {
locals_filenames.as_deref()
} else {
self.locals_filenames.as_deref()
},
"locals.scm",
)?;
if highlights_query.is_empty() {
Ok(None)
} else {
let mut result = HighlightConfiguration::new(
language,
&self.language_name,
&highlights_query,
&injections_query,
&locals_query,
apply_all_captures,
)
.map_err(|error| match error.kind {
QueryErrorKind::Language => Error::from(error),
@ -853,12 +978,12 @@ impl<'a> LanguageConfiguration<'a> {
let mut all_highlight_names = self.highlight_names.lock().unwrap();
if self.use_all_highlight_names {
for capture_name in result.query.capture_names() {
if !all_highlight_names.contains(capture_name) {
all_highlight_names.push(capture_name.clone());
if !all_highlight_names.iter().any(|x| x == capture_name) {
all_highlight_names.push(capture_name.to_string());
}
}
}
result.configure(&all_highlight_names.as_slice());
result.configure(all_highlight_names.as_slice());
Ok(Some(result))
}
})
@ -869,9 +994,9 @@ impl<'a> LanguageConfiguration<'a> {
self.tags_config
.get_or_try_init(|| {
let (tags_query, tags_ranges) =
self.read_queries(&self.tags_filenames, "tags.scm")?;
self.read_queries(self.tags_filenames.as_deref(), "tags.scm")?;
let (locals_query, locals_ranges) =
self.read_queries(&self.locals_filenames, "locals.scm")?;
self.read_queries(self.locals_filenames.as_deref(), "locals.scm")?;
if tags_query.is_empty() {
Ok(None)
} else {
@ -894,7 +1019,6 @@ impl<'a> LanguageConfiguration<'a> {
locals_query.len(),
)
}
.into()
} else {
error.into()
}
@ -904,9 +1028,9 @@ impl<'a> LanguageConfiguration<'a> {
.map(Option::as_ref)
}
fn include_path_in_query_error<'b>(
fn include_path_in_query_error(
mut error: QueryError,
ranges: &'b Vec<(String, Range<usize>)>,
ranges: &[(String, Range<usize>)],
source: &str,
start_offset: usize,
) -> Error {
@ -914,7 +1038,7 @@ impl<'a> LanguageConfiguration<'a> {
let (path, range) = ranges
.iter()
.find(|(_, range)| range.contains(&offset_within_section))
.unwrap();
.unwrap_or(ranges.last().unwrap());
error.offset = offset_within_section - range.start;
error.row = source[range.start..offset_within_section]
.chars()
@ -925,12 +1049,12 @@ impl<'a> LanguageConfiguration<'a> {
fn read_queries(
&self,
paths: &Option<Vec<String>>,
paths: Option<&[String]>,
default_path: &str,
) -> Result<(String, Vec<(String, Range<usize>)>)> {
let mut query = String::new();
let mut path_ranges = Vec::new();
if let Some(paths) = paths.as_ref() {
if let Some(paths) = paths {
for path in paths {
let abs_path = self.root_path.join(path);
let prev_query_len = query.len();
@ -955,7 +1079,7 @@ impl<'a> LanguageConfiguration<'a> {
fn needs_recompile(
lib_path: &Path,
parser_c_path: &Path,
scanner_path: &Option<PathBuf>,
scanner_path: Option<&Path>,
) -> Result<bool> {
if !lib_path.exists() {
return Ok(true);

1
cli/npm/.gitignore vendored
View file

@ -3,3 +3,4 @@ tree-sitter.exe
*.gz
*.tgz
LICENSE
README.md

62
cli/npm/dsl.d.ts vendored
View file

@ -1,19 +1,19 @@
type AliasRule = {type: 'ALIAS'; named: boolean; content: Rule; value: string};
type BlankRule = {type: 'BLANK'};
type ChoiceRule = {type: 'CHOICE'; members: Rule[]};
type FieldRule = {type: 'FIELD'; name: string; content: Rule};
type ImmediateTokenRule = {type: 'IMMEDIATE_TOKEN'; content: Rule};
type PatternRule = {type: 'PATTERN'; value: string};
type PrecDynamicRule = {type: 'PREC_DYNAMIC'; content: Rule; value: number};
type PrecLeftRule = {type: 'PREC_LEFT'; content: Rule; value: number};
type PrecRightRule = {type: 'PREC_RIGHT'; content: Rule; value: number};
type PrecRule = {type: 'PREC'; content: Rule; value: number};
type Repeat1Rule = {type: 'REPEAT1'; content: Rule};
type RepeatRule = {type: 'REPEAT'; content: Rule};
type SeqRule = {type: 'SEQ'; members: Rule[]};
type StringRule = {type: 'STRING'; value: string};
type SymbolRule<Name extends string> = {type: 'SYMBOL'; name: Name};
type TokenRule = {type: 'TOKEN'; content: Rule};
type AliasRule = { type: 'ALIAS'; named: boolean; content: Rule; value: string };
type BlankRule = { type: 'BLANK' };
type ChoiceRule = { type: 'CHOICE'; members: Rule[] };
type FieldRule = { type: 'FIELD'; name: string; content: Rule };
type ImmediateTokenRule = { type: 'IMMEDIATE_TOKEN'; content: Rule };
type PatternRule = { type: 'PATTERN'; value: string };
type PrecDynamicRule = { type: 'PREC_DYNAMIC'; content: Rule; value: number };
type PrecLeftRule = { type: 'PREC_LEFT'; content: Rule; value: number };
type PrecRightRule = { type: 'PREC_RIGHT'; content: Rule; value: number };
type PrecRule = { type: 'PREC'; content: Rule; value: number };
type Repeat1Rule = { type: 'REPEAT1'; content: Rule };
type RepeatRule = { type: 'REPEAT'; content: Rule };
type SeqRule = { type: 'SEQ'; members: Rule[] };
type StringRule = { type: 'STRING'; value: string };
type SymbolRule<Name extends string> = { type: 'SYMBOL'; name: Name };
type TokenRule = { type: 'TOKEN'; content: Rule };
type Rule =
| AliasRule
@ -42,14 +42,15 @@ type GrammarSymbols<RuleName extends string> = {
type RuleBuilder<RuleName extends string> = (
$: GrammarSymbols<RuleName>,
previous: Rule,
) => RuleOrLiteral;
type RuleBuilders<
RuleName extends string,
BaseGrammarRuleName extends string
> = {
[name in RuleName]: RuleBuilder<RuleName | BaseGrammarRuleName>;
};
[name in RuleName]: RuleBuilder<RuleName | BaseGrammarRuleName>;
};
interface Grammar<
RuleName extends string,
@ -68,11 +69,17 @@ interface Grammar<
rules: Rules;
/**
* An array of arrays of precedence names. Each inner array represents
* a *descending* ordering. Names listed earlier in one of these arrays
* have higher precedence than any names listed later in the same array.
* An array of arrays of precedence names or rules. Each inner array represents
* a *descending* ordering. Names/rules listed earlier in one of these arrays
* have higher precedence than any names/rules listed later in the same array.
*
* Using rules is just a shorthand way for using a name then calling prec()
* with that name. It is just a convenience.
*/
precedences?: () => String[][],
precedences?: (
$: GrammarSymbols<RuleName | BaseGrammarRuleName>,
previous: Rule[][],
) => RuleOrLiteral[][],
/**
* An array of arrays of rule names. Each inner array represents a set of
@ -86,6 +93,7 @@ interface Grammar<
*/
conflicts?: (
$: GrammarSymbols<RuleName | BaseGrammarRuleName>,
previous: Rule[][],
) => RuleOrLiteral[][];
/**
@ -102,7 +110,7 @@ interface Grammar<
externals?: (
$: Record<string, SymbolRule<string>>,
previous: Rule[],
) => SymbolRule<string>[];
) => RuleOrLiteral[];
/**
* An array of tokens that may appear anywhere in the language. This
@ -126,6 +134,7 @@ interface Grammar<
*/
inline?: (
$: GrammarSymbols<RuleName | BaseGrammarRuleName>,
previous: Rule[],
) => RuleOrLiteral[];
/**
@ -134,10 +143,11 @@ interface Grammar<
*
* @param $ grammar rules
*
* @see http://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
* @see https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
*/
supertypes?: (
$: GrammarSymbols<RuleName | BaseGrammarRuleName>,
previous: Rule[],
) => RuleOrLiteral[];
/**
@ -153,8 +163,8 @@ interface Grammar<
type GrammarSchema<RuleName extends string> = {
[K in keyof Grammar<RuleName>]: K extends 'rules'
? Record<RuleName, Rule>
: Grammar<RuleName>[K];
? Record<RuleName, Rule>
: Grammar<RuleName>[K];
};
/**

View file

@ -6,25 +6,54 @@ const http = require('http');
const https = require('https');
const packageJSON = require('./package.json');
// Determine the URL of the file.
const platformName = {
'darwin': 'macos',
'linux': 'linux',
'win32': 'windows'
}[process.platform];
let archName = {
'x64': 'x64',
'x86': 'x86',
'ia32': 'x86'
}[process.arch];
// ARM macs can run x64 binaries via Rosetta. Rely on that for now.
if (platformName === 'macos' && process.arch === 'arm64') {
archName = 'x64';
// Look to a results table in https://github.com/tree-sitter/tree-sitter/issues/2196
const matrix = {
platform: {
'darwin': {
name: 'macos',
arch: {
'arm64': { name: 'arm64' },
'x64': { name: 'x64' },
}
},
'linux': {
name: 'linux',
arch: {
'arm64': { name: 'arm64' },
'arm': { name: 'arm' },
'armv7l': { name: 'armv7l' },
'x64': { name: 'x64' },
'x86': { name: 'x86' },
'i586': { name: 'i586' },
'mips': { name: 'mips' },
'mips64': { name: 'mips64' },
'mipsel': { name: 'mipsel' },
'mips64el': { name: 'mips64el' },
'ppc': { name: 'powerpc' },
'ppc64': { name: 'powerpc64' },
'ppc64el': { name: 'powerpc64el' },
'riscv64gc': { name: 'riscv64gc' },
's390x': { name: 's390x' },
'sparc64': { name: 'sparc64' },
}
},
'win32': {
name: 'windows',
arch: {
'arm64': { name: 'arm64' },
'x64': { name: 'x64' },
'x86': { name: 'x86' },
'ia32': { name: 'x86' },
}
},
},
}
if (!platformName || !archName) {
// Determine the URL of the file.
const platform = matrix.platform[process.platform];
const arch = platform && platform.arch[process.arch];
if (!platform || !platform.name || !arch || !arch.name) {
console.error(
`Cannot install tree-sitter-cli for platform ${process.platform}, architecture ${process.arch}`
);
@ -32,7 +61,7 @@ if (!platformName || !archName) {
}
const releaseURL = `https://github.com/tree-sitter/tree-sitter/releases/download/v${packageJSON.version}`;
const assetName = `tree-sitter-${platformName}-${archName}.gz`;
const assetName = `tree-sitter-${platform.name}-${arch.name}.gz`;
const assetURL = `${releaseURL}/${assetName}`;
// Remove previously-downloaded files.

View file

@ -1,11 +1,11 @@
{
"name": "tree-sitter-cli",
"version": "0.20.7",
"version": "0.20.8",
"author": "Max Brunsfeld",
"license": "MIT",
"repository": {
"type": "git",
"url": "http://github.com/tree-sitter/tree-sitter.git"
"url": "https://github.com/tree-sitter/tree-sitter.git"
},
"description": "CLI for generating fast incremental parsers",
"keywords": [
@ -15,7 +15,8 @@
"main": "lib/api/index.js",
"scripts": {
"install": "node install.js",
"prepack": "cp ../../LICENSE ."
"prepack": "cp ../../LICENSE ../README.md .",
"postpack": "rm LICENSE README.md"
},
"bin": {
"tree-sitter": "cli.js"

View file

@ -57,6 +57,7 @@ struct ParseTableBuilder<'a> {
parse_state_info_by_id: Vec<ParseStateInfo<'a>>,
parse_state_queue: VecDeque<ParseStateQueueEntry>,
non_terminal_extra_states: Vec<(Symbol, usize)>,
actual_conflicts: HashSet<Vec<Symbol>>,
parse_table: ParseTable,
}
@ -132,6 +133,20 @@ impl<'a> ParseTableBuilder<'a> {
)?;
}
if !self.actual_conflicts.is_empty() {
println!("Warning: unnecessary conflicts");
for conflict in &self.actual_conflicts {
println!(
" {}",
conflict
.iter()
.map(|symbol| format!("`{}`", self.symbol_name(symbol)))
.collect::<Vec<_>>()
.join(", ")
);
}
}
Ok((self.parse_table, self.parse_state_info_by_id))
}
@ -582,6 +597,7 @@ impl<'a> ParseTableBuilder<'a> {
.expected_conflicts
.contains(&actual_conflict)
{
self.actual_conflicts.remove(&actual_conflict);
return Ok(());
}
@ -964,6 +980,7 @@ pub(crate) fn build_parse_table<'a>(
inlines: &'a InlinedProductionMap,
variable_info: &'a Vec<VariableInfo>,
) -> Result<(ParseTable, Vec<TokenSet>, Vec<ParseStateInfo<'a>>)> {
let actual_conflicts = syntax_grammar.expected_conflicts.iter().cloned().collect();
let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines);
let mut following_tokens = vec![TokenSet::new(); lexical_grammar.variables.len()];
populate_following_tokens(
@ -979,6 +996,7 @@ pub(crate) fn build_parse_table<'a>(
item_set_builder,
variable_info,
non_terminal_extra_states: Vec::new(),
actual_conflicts,
state_ids_by_item_set: IndexMap::default(),
core_ids_by_core: HashMap::new(),
parse_state_info_by_id: Vec::new(),

View file

@ -390,12 +390,12 @@ mod tests {
Variable {
name: "token_0".to_string(),
kind: VariableType::Named,
rule: Rule::pattern("[a-f]1|0x\\d"),
rule: Rule::pattern("[a-f]1|0x\\d", ""),
},
Variable {
name: "token_1".to_string(),
kind: VariableType::Named,
rule: Rule::pattern("d*ef"),
rule: Rule::pattern("d*ef", ""),
},
],
})
@ -426,7 +426,7 @@ mod tests {
Variable {
name: "identifier".to_string(),
kind: VariableType::Named,
rule: Rule::pattern("\\w+"),
rule: Rule::pattern("\\w+", ""),
},
Variable {
name: "instanceof".to_string(),
@ -471,7 +471,7 @@ mod tests {
#[test]
fn test_token_conflicts_with_separators() {
let grammar = expand_tokens(ExtractedLexicalGrammar {
separators: vec![Rule::pattern("\\s")],
separators: vec![Rule::pattern("\\s", "")],
variables: vec![
Variable {
name: "x".to_string(),
@ -498,7 +498,7 @@ mod tests {
#[test]
fn test_token_conflicts_with_open_ended_tokens() {
let grammar = expand_tokens(ExtractedLexicalGrammar {
separators: vec![Rule::pattern("\\s")],
separators: vec![Rule::pattern("\\s", "")],
variables: vec![
Variable {
name: "x".to_string(),
@ -508,7 +508,7 @@ mod tests {
Variable {
name: "anything".to_string(),
kind: VariableType::Named,
rule: Rule::prec(Precedence::Integer(-1), Rule::pattern(".*")),
rule: Rule::prec(Precedence::Integer(-1), Rule::pattern(".*", "")),
},
],
})

View file

@ -181,7 +181,11 @@ function normalize(value) {
value
};
case RegExp:
return {
return value.flags ? {
type: 'PATTERN',
value: value.source,
flags: value.flags
} : {
type: 'PATTERN',
value: value.source
};

View file

@ -63,7 +63,7 @@
},
"supertypes": {
"description": "A list of hidden rule names that should be considered supertypes in the generated node types file. See http://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types.",
"description": "A list of hidden rule names that should be considered supertypes in the generated node types file. See https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types.",
"type": "array",
"items": {
"description": "the name of a rule in `rules` or `extras`",

View file

@ -21,10 +21,10 @@ use anyhow::{anyhow, Context, Result};
use lazy_static::lazy_static;
use regex::{Regex, RegexBuilder};
use semver::Version;
use std::fs;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use std::{env, fs};
lazy_static! {
static ref JSON_COMMENT_REGEX: Regex = RegexBuilder::new("^\\s*//.*")
@ -44,25 +44,27 @@ pub fn generate_parser_in_directory(
abi_version: usize,
generate_bindings: bool,
report_symbol_name: Option<&str>,
js_runtime: Option<&str>,
) -> Result<()> {
let src_path = repo_path.join("src");
let header_path = src_path.join("tree_sitter");
// Read the grammar.json.
let grammar_json = match grammar_path {
Some(path) => load_grammar_file(path.as_ref(), js_runtime)?,
None => {
let grammar_js_path = grammar_path.map_or(repo_path.join("grammar.js"), |s| s.into());
load_grammar_file(&grammar_js_path, js_runtime)?
}
};
// Ensure that the output directories exist.
fs::create_dir_all(&src_path)?;
fs::create_dir_all(&header_path)?;
// Read the grammar.json.
let grammar_json;
match grammar_path {
Some(path) => {
grammar_json = load_grammar_file(path.as_ref())?;
}
None => {
let grammar_js_path = grammar_path.map_or(repo_path.join("grammar.js"), |s| s.into());
grammar_json = load_grammar_file(&grammar_js_path)?;
fs::write(&src_path.join("grammar.json"), &grammar_json)?;
}
if grammar_path.is_none() {
fs::write(&src_path.join("grammar.json"), &grammar_json)
.with_context(|| format!("Failed to write grammar.json to {:?}", src_path))?;
}
// Parse and preprocess the grammar.
@ -155,10 +157,18 @@ fn generate_parser_for_grammar_with_opts(
})
}
pub fn load_grammar_file(grammar_path: &Path) -> Result<String> {
pub fn load_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result<String> {
if grammar_path.is_dir() {
return Err(anyhow!(
"Path to a grammar file with `.js` or `.json` extension is required"
));
}
match grammar_path.extension().and_then(|e| e.to_str()) {
Some("js") => Ok(load_js_grammar_file(grammar_path)?),
Some("json") => Ok(fs::read_to_string(grammar_path)?),
Some("js") => Ok(load_js_grammar_file(grammar_path, js_runtime)
.with_context(|| "Failed to load grammar.js")?),
Some("json") => {
Ok(fs::read_to_string(grammar_path).with_context(|| "Failed to load grammar.json")?)
}
_ => Err(anyhow!(
"Unknown grammar file extension: {:?}",
grammar_path
@ -166,21 +176,24 @@ pub fn load_grammar_file(grammar_path: &Path) -> Result<String> {
}
}
fn load_js_grammar_file(grammar_path: &Path) -> Result<String> {
fn load_js_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result<String> {
let grammar_path = fs::canonicalize(grammar_path)?;
let mut node_process = Command::new("node")
let js_runtime = js_runtime.unwrap_or("node");
let mut node_process = Command::new(js_runtime)
.env("TREE_SITTER_GRAMMAR_PATH", grammar_path)
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.expect("Failed to run `node`");
.with_context(|| format!("Failed to run `{js_runtime}`"))?;
let mut node_stdin = node_process
.stdin
.take()
.expect("Failed to open stdin for node");
.with_context(|| "Failed to open stdin for node")?;
let cli_version = Version::parse(env!("CARGO_PKG_VERSION"))
.expect("Could not parse this package's version as semver.");
.with_context(|| "Could not parse this package's version as semver.")?;
write!(
node_stdin,
"global.TREE_SITTER_CLI_VERSION_MAJOR = {};
@ -188,22 +201,22 @@ fn load_js_grammar_file(grammar_path: &Path) -> Result<String> {
global.TREE_SITTER_CLI_VERSION_PATCH = {};",
cli_version.major, cli_version.minor, cli_version.patch,
)
.expect("Failed to write tree-sitter version to node's stdin");
.with_context(|| "Failed to write tree-sitter version to node's stdin")?;
let javascript_code = include_bytes!("./dsl.js");
node_stdin
.write(javascript_code)
.expect("Failed to write grammar dsl to node's stdin");
.with_context(|| "Failed to write grammar dsl to node's stdin")?;
drop(node_stdin);
let output = node_process
.wait_with_output()
.expect("Failed to read output from node");
.with_context(|| "Failed to read output from node")?;
match output.status.code() {
None => panic!("Node process was killed"),
Some(0) => {}
Some(code) => return Err(anyhow!("Node process exited with status {}", code)),
}
let mut result = String::from_utf8(output.stdout).expect("Got invalid UTF8 from node");
let mut result =
String::from_utf8(output.stdout).with_context(|| "Got invalid UTF8 from node")?;
result.push('\n');
Ok(result)
}

View file

@ -1172,12 +1172,12 @@ mod tests {
Variable {
name: "identifier".to_string(),
kind: VariableType::Named,
rule: Rule::pattern("\\w+"),
rule: Rule::pattern("\\w+", ""),
},
Variable {
name: "foo_identifier".to_string(),
kind: VariableType::Named,
rule: Rule::pattern("[\\w-]+"),
rule: Rule::pattern("[\\w-]+", ""),
},
],
..Default::default()
@ -1275,8 +1275,8 @@ mod tests {
name: "script".to_string(),
kind: VariableType::Named,
rule: Rule::seq(vec![
Rule::field("a".to_string(), Rule::pattern("hi")),
Rule::field("b".to_string(), Rule::pattern("bye")),
Rule::field("a".to_string(), Rule::pattern("hi", "")),
Rule::field("b".to_string(), Rule::pattern("bye", "")),
]),
}],
..Default::default()

View file

@ -19,6 +19,7 @@ enum RuleJSON {
},
PATTERN {
value: String,
flags: Option<String>,
},
SYMBOL {
name: String,
@ -143,7 +144,21 @@ fn parse_rule(json: RuleJSON) -> Rule {
} => Rule::alias(parse_rule(*content), value, named),
RuleJSON::BLANK => Rule::Blank,
RuleJSON::STRING { value } => Rule::String(value),
RuleJSON::PATTERN { value } => Rule::Pattern(value),
RuleJSON::PATTERN { value, flags } => Rule::Pattern(
value,
flags.map_or(String::new(), |f| {
f.chars()
.filter(|c| {
if *c != 'i' {
eprintln!("Warning: unsupported flag {c}");
false
} else {
*c != 'u' // silently ignore unicode flag
}
})
.collect()
}),
),
RuleJSON::SYMBOL { name } => Rule::NamedSymbol(name),
RuleJSON::CHOICE { members } => Rule::choice(members.into_iter().map(parse_rule).collect()),
RuleJSON::FIELD { content, name } => Rule::field(name, parse_rule(*content)),

View file

@ -139,10 +139,10 @@ pub(crate) fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<Lexi
impl NfaBuilder {
fn expand_rule(&mut self, rule: &Rule, mut next_state_id: u32) -> Result<bool> {
match rule {
Rule::Pattern(s) => {
Rule::Pattern(s, f) => {
let s = preprocess_regex(s);
let ast = parse::Parser::new().parse(&s)?;
self.expand_regex(&ast, next_state_id)
self.expand_regex(&ast, next_state_id, f.contains('i'))
}
Rule::String(s) => {
for c in s.chars().rev() {
@ -210,12 +210,42 @@ impl NfaBuilder {
}
}
fn expand_regex(&mut self, ast: &Ast, mut next_state_id: u32) -> Result<bool> {
fn expand_regex(
&mut self,
ast: &Ast,
mut next_state_id: u32,
case_insensitive: bool,
) -> Result<bool> {
fn inverse_char(c: char) -> char {
match c {
'a'..='z' => (c as u8 - b'a' + b'A') as char,
'A'..='Z' => (c as u8 - b'A' + b'a') as char,
c => c,
}
}
fn with_inverse_char(mut chars: CharacterSet) -> CharacterSet {
for char in chars.clone().chars() {
let inverted = inverse_char(char);
if char != inverted {
chars = chars.add_char(inverted);
}
}
chars
}
match ast {
Ast::Empty(_) => Ok(false),
Ast::Flags(_) => Err(anyhow!("Regex error: Flags are not supported")),
Ast::Literal(literal) => {
self.push_advance(CharacterSet::from_char(literal.c), next_state_id);
let mut char_set = CharacterSet::from_char(literal.c);
if case_insensitive {
let inverted = inverse_char(literal.c);
if literal.c != inverted {
char_set = char_set.add_char(inverted);
}
}
self.push_advance(char_set, next_state_id);
Ok(true)
}
Ast::Dot(_) => {
@ -229,6 +259,9 @@ impl NfaBuilder {
if class.negated {
chars = chars.negate();
}
if case_insensitive {
chars = with_inverse_char(chars);
}
self.push_advance(chars, next_state_id);
Ok(true)
}
@ -237,6 +270,9 @@ impl NfaBuilder {
if class.negated {
chars = chars.negate();
}
if case_insensitive {
chars = with_inverse_char(chars);
}
self.push_advance(chars, next_state_id);
Ok(true)
}
@ -245,48 +281,56 @@ impl NfaBuilder {
if class.negated {
chars = chars.negate();
}
if case_insensitive {
chars = with_inverse_char(chars);
}
self.push_advance(chars, next_state_id);
Ok(true)
}
},
Ast::Repetition(repetition) => match repetition.op.kind {
RepetitionKind::ZeroOrOne => {
self.expand_zero_or_one(&repetition.ast, next_state_id)
self.expand_zero_or_one(&repetition.ast, next_state_id, case_insensitive)
}
RepetitionKind::OneOrMore => {
self.expand_one_or_more(&repetition.ast, next_state_id)
self.expand_one_or_more(&repetition.ast, next_state_id, case_insensitive)
}
RepetitionKind::ZeroOrMore => {
self.expand_zero_or_more(&repetition.ast, next_state_id)
self.expand_zero_or_more(&repetition.ast, next_state_id, case_insensitive)
}
RepetitionKind::Range(RepetitionRange::Exactly(count)) => {
self.expand_count(&repetition.ast, count, next_state_id)
self.expand_count(&repetition.ast, count, next_state_id, case_insensitive)
}
RepetitionKind::Range(RepetitionRange::AtLeast(min)) => {
if self.expand_zero_or_more(&repetition.ast, next_state_id)? {
self.expand_count(&repetition.ast, min, next_state_id)
if self.expand_zero_or_more(&repetition.ast, next_state_id, case_insensitive)? {
self.expand_count(&repetition.ast, min, next_state_id, case_insensitive)
} else {
Ok(false)
}
}
RepetitionKind::Range(RepetitionRange::Bounded(min, max)) => {
let mut result = self.expand_count(&repetition.ast, min, next_state_id)?;
let mut result =
self.expand_count(&repetition.ast, min, next_state_id, case_insensitive)?;
for _ in min..max {
if result {
next_state_id = self.nfa.last_state_id();
}
if self.expand_zero_or_one(&repetition.ast, next_state_id)? {
if self.expand_zero_or_one(
&repetition.ast,
next_state_id,
case_insensitive,
)? {
result = true;
}
}
Ok(result)
}
},
Ast::Group(group) => self.expand_regex(&group.ast, next_state_id),
Ast::Group(group) => self.expand_regex(&group.ast, next_state_id, case_insensitive),
Ast::Alternation(alternation) => {
let mut alternative_state_ids = Vec::new();
for ast in alternation.asts.iter() {
if self.expand_regex(&ast, next_state_id)? {
if self.expand_regex(&ast, next_state_id, case_insensitive)? {
alternative_state_ids.push(self.nfa.last_state_id());
} else {
alternative_state_ids.push(next_state_id);
@ -304,7 +348,7 @@ impl NfaBuilder {
Ast::Concat(concat) => {
let mut result = false;
for ast in concat.asts.iter().rev() {
if self.expand_regex(&ast, next_state_id)? {
if self.expand_regex(&ast, next_state_id, case_insensitive)? {
result = true;
next_state_id = self.nfa.last_state_id();
}
@ -335,13 +379,18 @@ impl NfaBuilder {
}
}
fn expand_one_or_more(&mut self, ast: &Ast, next_state_id: u32) -> Result<bool> {
fn expand_one_or_more(
&mut self,
ast: &Ast,
next_state_id: u32,
case_insensitive: bool,
) -> Result<bool> {
self.nfa.states.push(NfaState::Accept {
variable_index: 0,
precedence: 0,
}); // Placeholder for split
let split_state_id = self.nfa.last_state_id();
if self.expand_regex(&ast, split_state_id)? {
if self.expand_regex(&ast, split_state_id, case_insensitive)? {
self.nfa.states[split_state_id as usize] =
NfaState::Split(self.nfa.last_state_id(), next_state_id);
Ok(true)
@ -351,8 +400,13 @@ impl NfaBuilder {
}
}
fn expand_zero_or_one(&mut self, ast: &Ast, next_state_id: u32) -> Result<bool> {
if self.expand_regex(ast, next_state_id)? {
fn expand_zero_or_one(
&mut self,
ast: &Ast,
next_state_id: u32,
case_insensitive: bool,
) -> Result<bool> {
if self.expand_regex(ast, next_state_id, case_insensitive)? {
self.push_split(next_state_id);
Ok(true)
} else {
@ -360,8 +414,13 @@ impl NfaBuilder {
}
}
fn expand_zero_or_more(&mut self, ast: &Ast, next_state_id: u32) -> Result<bool> {
if self.expand_one_or_more(&ast, next_state_id)? {
fn expand_zero_or_more(
&mut self,
ast: &Ast,
next_state_id: u32,
case_insensitive: bool,
) -> Result<bool> {
if self.expand_one_or_more(&ast, next_state_id, case_insensitive)? {
self.push_split(next_state_id);
Ok(true)
} else {
@ -369,10 +428,16 @@ impl NfaBuilder {
}
}
fn expand_count(&mut self, ast: &Ast, count: u32, mut next_state_id: u32) -> Result<bool> {
fn expand_count(
&mut self,
ast: &Ast,
count: u32,
mut next_state_id: u32,
case_insensitive: bool,
) -> Result<bool> {
let mut result = false;
for _ in 0..count {
if self.expand_regex(ast, next_state_id)? {
if self.expand_regex(ast, next_state_id, case_insensitive)? {
result = true;
next_state_id = self.nfa.last_state_id();
}
@ -475,7 +540,9 @@ impl NfaBuilder {
.add_char(' ')
.add_char('\t')
.add_char('\r')
.add_char('\n'),
.add_char('\n')
.add_char('\x0B')
.add_char('\x0C'),
ClassPerlKind::Word => CharacterSet::empty()
.add_char('_')
.add_range('A', 'Z')
@ -563,7 +630,7 @@ mod tests {
let table = [
// regex with sequences and alternatives
Row {
rules: vec![Rule::pattern("(a|b|c)d(e|f|g)h?")],
rules: vec![Rule::pattern("(a|b|c)d(e|f|g)h?", "")],
separators: vec![],
examples: vec![
("ade1", Some((0, "ade"))),
@ -574,13 +641,13 @@ mod tests {
},
// regex with repeats
Row {
rules: vec![Rule::pattern("a*")],
rules: vec![Rule::pattern("a*", "")],
separators: vec![],
examples: vec![("aaa1", Some((0, "aaa"))), ("b", Some((0, "")))],
},
// regex with repeats in sequences
Row {
rules: vec![Rule::pattern("a((bc)+|(de)*)f")],
rules: vec![Rule::pattern("a((bc)+|(de)*)f", "")],
separators: vec![],
examples: vec![
("af1", Some((0, "af"))),
@ -591,13 +658,13 @@ mod tests {
},
// regex with character ranges
Row {
rules: vec![Rule::pattern("[a-fA-F0-9]+")],
rules: vec![Rule::pattern("[a-fA-F0-9]+", "")],
separators: vec![],
examples: vec![("A1ff0.", Some((0, "A1ff0")))],
},
// regex with perl character classes
Row {
rules: vec![Rule::pattern("\\w\\d\\s")],
rules: vec![Rule::pattern("\\w\\d\\s", "")],
separators: vec![],
examples: vec![("_0 ", Some((0, "_0 ")))],
},
@ -611,7 +678,7 @@ mod tests {
Row {
rules: vec![Rule::repeat(Rule::seq(vec![
Rule::string("{"),
Rule::pattern("[a-f]+"),
Rule::pattern("[a-f]+", ""),
Rule::string("}"),
]))],
separators: vec![],
@ -624,9 +691,9 @@ mod tests {
// longest match rule
Row {
rules: vec![
Rule::pattern("a|bc"),
Rule::pattern("aa"),
Rule::pattern("bcd"),
Rule::pattern("a|bc", ""),
Rule::pattern("aa", ""),
Rule::pattern("bcd", ""),
],
separators: vec![],
examples: vec![
@ -640,7 +707,7 @@ mod tests {
},
// regex with an alternative including the empty string
Row {
rules: vec![Rule::pattern("a(b|)+c")],
rules: vec![Rule::pattern("a(b|)+c", "")],
separators: vec![],
examples: vec![
("ac.", Some((0, "ac"))),
@ -650,8 +717,8 @@ mod tests {
},
// separators
Row {
rules: vec![Rule::pattern("[a-f]+")],
separators: vec![Rule::string("\\\n"), Rule::pattern("\\s")],
rules: vec![Rule::pattern("[a-f]+", "")],
separators: vec![Rule::string("\\\n"), Rule::pattern("\\s", "")],
examples: vec![
(" a", Some((0, "a"))),
(" \nb", Some((0, "b"))),
@ -662,11 +729,11 @@ mod tests {
// shorter tokens with higher precedence
Row {
rules: vec![
Rule::prec(Precedence::Integer(2), Rule::pattern("abc")),
Rule::prec(Precedence::Integer(1), Rule::pattern("ab[cd]e")),
Rule::pattern("[a-e]+"),
Rule::prec(Precedence::Integer(2), Rule::pattern("abc", "")),
Rule::prec(Precedence::Integer(1), Rule::pattern("ab[cd]e", "")),
Rule::pattern("[a-e]+", ""),
],
separators: vec![Rule::string("\\\n"), Rule::pattern("\\s")],
separators: vec![Rule::string("\\\n"), Rule::pattern("\\s", "")],
examples: vec![
("abceef", Some((0, "abc"))),
("abdeef", Some((1, "abde"))),
@ -676,13 +743,13 @@ mod tests {
// immediate tokens with higher precedence
Row {
rules: vec![
Rule::prec(Precedence::Integer(1), Rule::pattern("[^a]+")),
Rule::prec(Precedence::Integer(1), Rule::pattern("[^a]+", "")),
Rule::immediate_token(Rule::prec(
Precedence::Integer(2),
Rule::pattern("[^ab]+"),
Rule::pattern("[^ab]+", ""),
)),
],
separators: vec![Rule::pattern("\\s")],
separators: vec![Rule::pattern("\\s", "")],
examples: vec![("cccb", Some((1, "ccc")))],
},
Row {
@ -704,7 +771,7 @@ mod tests {
// nested choices within sequences
Row {
rules: vec![Rule::seq(vec![
Rule::pattern("[0-9]+"),
Rule::pattern("[0-9]+", ""),
Rule::choice(vec![
Rule::Blank,
Rule::choice(vec![Rule::seq(vec![
@ -713,7 +780,7 @@ mod tests {
Rule::Blank,
Rule::choice(vec![Rule::string("+"), Rule::string("-")]),
]),
Rule::pattern("[0-9]+"),
Rule::pattern("[0-9]+", ""),
])]),
]),
])],
@ -730,7 +797,7 @@ mod tests {
},
// nested groups
Row {
rules: vec![Rule::seq(vec![Rule::pattern(r#"([^x\\]|\\(.|\n))+"#)])],
rules: vec![Rule::seq(vec![Rule::pattern(r#"([^x\\]|\\(.|\n))+"#, "")])],
separators: vec![],
examples: vec![("abcx", Some((0, "abc"))), ("abc\\0x", Some((0, "abc\\0")))],
},
@ -738,11 +805,11 @@ mod tests {
Row {
rules: vec![
// Escaped forward slash (used in JS because '/' is the regex delimiter)
Rule::pattern(r#"\/"#),
Rule::pattern(r#"\/"#, ""),
// Escaped quotes
Rule::pattern(r#"\"\'"#),
Rule::pattern(r#"\"\'"#, ""),
// Quote preceded by a literal backslash
Rule::pattern(r#"[\\']+"#),
Rule::pattern(r#"[\\']+"#, ""),
],
separators: vec![],
examples: vec![
@ -754,8 +821,8 @@ mod tests {
// unicode property escapes
Row {
rules: vec![
Rule::pattern(r#"\p{L}+\P{L}+"#),
Rule::pattern(r#"\p{White_Space}+\P{White_Space}+[\p{White_Space}]*"#),
Rule::pattern(r#"\p{L}+\P{L}+"#, ""),
Rule::pattern(r#"\p{White_Space}+\P{White_Space}+[\p{White_Space}]*"#, ""),
],
separators: vec![],
examples: vec![
@ -765,17 +832,17 @@ mod tests {
},
// unicode property escapes in bracketed sets
Row {
rules: vec![Rule::pattern(r#"[\p{L}\p{Nd}]+"#)],
rules: vec![Rule::pattern(r#"[\p{L}\p{Nd}]+"#, "")],
separators: vec![],
examples: vec![("abΨ12٣٣, ok", Some((0, "abΨ12٣٣")))],
},
// unicode character escapes
Row {
rules: vec![
Rule::pattern(r#"\u{00dc}"#),
Rule::pattern(r#"\U{000000dd}"#),
Rule::pattern(r#"\u00de"#),
Rule::pattern(r#"\U000000df"#),
Rule::pattern(r#"\u{00dc}"#, ""),
Rule::pattern(r#"\U{000000dd}"#, ""),
Rule::pattern(r#"\u00de"#, ""),
Rule::pattern(r#"\U000000df"#, ""),
],
separators: vec![],
examples: vec![
@ -789,13 +856,13 @@ mod tests {
Row {
rules: vec![
// Un-escaped curly braces
Rule::pattern(r#"u{[0-9a-fA-F]+}"#),
Rule::pattern(r#"u{[0-9a-fA-F]+}"#, ""),
// Already-escaped curly braces
Rule::pattern(r#"\{[ab]{3}\}"#),
Rule::pattern(r#"\{[ab]{3}\}"#, ""),
// Unicode codepoints
Rule::pattern(r#"\u{1000A}"#),
Rule::pattern(r#"\u{1000A}"#, ""),
// Unicode codepoints (lowercase)
Rule::pattern(r#"\u{1000b}"#),
Rule::pattern(r#"\u{1000b}"#, ""),
],
separators: vec![],
examples: vec![
@ -807,7 +874,7 @@ mod tests {
},
// Emojis
Row {
rules: vec![Rule::pattern(r"\p{Emoji}+")],
rules: vec![Rule::pattern(r"\p{Emoji}+", "")],
separators: vec![],
examples: vec![
("🐎", Some((0, "🐎"))),
@ -820,7 +887,7 @@ mod tests {
},
// Intersection
Row {
rules: vec![Rule::pattern(r"[[0-7]&&[4-9]]+")],
rules: vec![Rule::pattern(r"[[0-7]&&[4-9]]+", "")],
separators: vec![],
examples: vec![
("456", Some((0, "456"))),
@ -833,7 +900,7 @@ mod tests {
},
// Difference
Row {
rules: vec![Rule::pattern(r"[[0-9]--[4-7]]+")],
rules: vec![Rule::pattern(r"[[0-9]--[4-7]]+", "")],
separators: vec![],
examples: vec![
("123", Some((0, "123"))),
@ -846,7 +913,7 @@ mod tests {
},
// Symmetric difference
Row {
rules: vec![Rule::pattern(r"[[0-7]~~[4-9]]+")],
rules: vec![Rule::pattern(r"[[0-7]~~[4-9]]+", "")],
separators: vec![],
examples: vec![
("123", Some((0, "123"))),
@ -867,7 +934,7 @@ mod tests {
// [6-7]: y y
// [3-9]--[5-7]: y y y y y
// final regex: y y y y y y
rules: vec![Rule::pattern(r"[[[0-5]--[2-4]]~~[[3-9]--[6-7]]]+")],
rules: vec![Rule::pattern(r"[[[0-5]--[2-4]]~~[[3-9]--[6-7]]]+", "")],
separators: vec![],
examples: vec![
("01", Some((0, "01"))),

View file

@ -31,7 +31,7 @@ pub(super) fn extract_default_aliases(
for variable in syntax_grammar.variables.iter() {
for production in variable.productions.iter() {
for step in production.steps.iter() {
let mut status = match step.symbol.kind {
let status = match step.symbol.kind {
SymbolType::External => &mut external_status_list[step.symbol.index],
SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index],
SymbolType::Terminal => &mut terminal_status_list[step.symbol.index],
@ -63,7 +63,7 @@ pub(super) fn extract_default_aliases(
}
for symbol in syntax_grammar.extra_symbols.iter() {
let mut status = match symbol.kind {
let status = match symbol.kind {
SymbolType::External => &mut external_status_list[symbol.index],
SymbolType::NonTerminal => &mut non_terminal_status_list[symbol.index],
SymbolType::Terminal => &mut terminal_status_list[symbol.index],

View file

@ -49,7 +49,7 @@ pub(super) fn extract_tokens(
}) = variable.rule
{
if i > 0 && extractor.extracted_usage_counts[index] == 1 {
let mut lexical_variable = &mut lexical_variables[index];
let lexical_variable = &mut lexical_variables[index];
lexical_variable.kind = variable.kind;
lexical_variable.name = variable.name;
symbol_replacer.replacements.insert(i, index);
@ -209,7 +209,7 @@ impl TokenExtractor {
} else {
Rule::Metadata {
params: params.clone(),
rule: Box::new(self.extract_tokens_in_rule((&rule).clone())),
rule: Box::new(self.extract_tokens_in_rule(&rule)),
}
}
}
@ -320,7 +320,7 @@ mod test {
"rule_0",
Rule::repeat(Rule::seq(vec![
Rule::string("a"),
Rule::pattern("b"),
Rule::pattern("b", ""),
Rule::choice(vec![
Rule::non_terminal(1),
Rule::non_terminal(2),
@ -331,8 +331,8 @@ mod test {
]),
])),
),
Variable::named("rule_1", Rule::pattern("e")),
Variable::named("rule_2", Rule::pattern("b")),
Variable::named("rule_1", Rule::pattern("e", "")),
Variable::named("rule_2", Rule::pattern("b", "")),
Variable::named(
"rule_3",
Rule::seq(vec![Rule::non_terminal(2), Rule::Blank]),
@ -378,12 +378,12 @@ mod test {
lexical_grammar.variables,
vec![
Variable::anonymous("a", Rule::string("a")),
Variable::auxiliary("rule_0_token1", Rule::pattern("b")),
Variable::auxiliary("rule_0_token1", Rule::pattern("b", "")),
Variable::auxiliary(
"rule_0_token2",
Rule::repeat(Rule::choice(vec![Rule::string("c"), Rule::string("d"),]))
),
Variable::named("rule_1", Rule::pattern("e")),
Variable::named("rule_1", Rule::pattern("e", "")),
]
);
}
@ -411,7 +411,7 @@ mod test {
fn test_extracting_extra_symbols() {
let mut grammar = build_grammar(vec![
Variable::named("rule_0", Rule::string("x")),
Variable::named("comment", Rule::pattern("//.*")),
Variable::named("comment", Rule::pattern("//.*", "")),
]);
grammar.extra_symbols = vec![Rule::string(" "), Rule::non_terminal(1)];

View file

@ -203,6 +203,12 @@ pub(super) fn process_inlines(
lexical_grammar.variables[symbol.index].name,
))
}
SymbolType::NonTerminal if symbol.index == 0 => {
return Err(anyhow!(
"Rule `{}` cannot be inlined because it is the first rule",
grammar.variables[symbol.index].name,
))
}
_ => {}
}
}

View file

@ -129,6 +129,7 @@ impl Generator {
}
self.add_lex_modes_list();
self.add_parse_table();
if !self.syntax_grammar.external_tokens.is_empty() {
self.add_external_token_enum();
@ -136,7 +137,6 @@ impl Generator {
self.add_external_scanner_states_list();
}
self.add_parse_table();
self.add_parser_export();
self.buffer
@ -152,49 +152,51 @@ impl Generator {
self.symbol_ids[&Symbol::end()].clone(),
);
self.symbol_map = self
.parse_table
.symbols
.iter()
.map(|symbol| {
let mut mapping = symbol;
self.symbol_map = HashMap::new();
// There can be multiple symbols in the grammar that have the same name and kind,
// due to simple aliases. When that happens, ensure that they map to the same
// public-facing symbol. If one of the symbols is not aliased, choose that one
// to be the public-facing symbol. Otherwise, pick the symbol with the lowest
// numeric value.
if let Some(alias) = self.default_aliases.get(symbol) {
let kind = alias.kind();
for other_symbol in &self.parse_table.symbols {
if let Some(other_alias) = self.default_aliases.get(other_symbol) {
if other_symbol < mapping && other_alias == alias {
mapping = other_symbol;
for symbol in self.parse_table.symbols.iter() {
let mut mapping = symbol;
// There can be multiple symbols in the grammar that have the same name and kind,
// due to simple aliases. When that happens, ensure that they map to the same
// public-facing symbol. If one of the symbols is not aliased, choose that one
// to be the public-facing symbol. Otherwise, pick the symbol with the lowest
// numeric value.
if let Some(alias) = self.default_aliases.get(symbol) {
let kind = alias.kind();
for other_symbol in &self.parse_table.symbols {
if let Some(other_alias) = self.default_aliases.get(other_symbol) {
if other_symbol < mapping && other_alias == alias {
mapping = other_symbol;
}
} else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) {
mapping = other_symbol;
break;
}
}
}
// Two anonymous tokens with different flags but the same string value
// should be represented with the same symbol in the public API. Examples:
// * "<" and token(prec(1, "<"))
// * "(" and token.immediate("(")
else if symbol.is_terminal() {
let metadata = self.metadata_for_symbol(*symbol);
for other_symbol in &self.parse_table.symbols {
let other_metadata = self.metadata_for_symbol(*other_symbol);
if other_metadata == metadata {
if let Some(mapped) = self.symbol_map.get(other_symbol) {
if mapped == symbol {
break;
}
} else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) {
mapping = other_symbol;
break;
}
}
}
// Two anonymous tokens with different flags but the same string value
// should be represented with the same symbol in the public API. Examples:
// * "<" and token(prec(1, "<"))
// * "(" and token.immediate("(")
else if symbol.is_terminal() {
let metadata = self.metadata_for_symbol(*symbol);
for other_symbol in &self.parse_table.symbols {
let other_metadata = self.metadata_for_symbol(*other_symbol);
if other_metadata == metadata {
mapping = other_symbol;
break;
}
mapping = other_symbol;
break;
}
}
}
(*symbol, *mapping)
})
.collect();
self.symbol_map.insert(*symbol, *mapping);
}
for production_info in &self.parse_table.production_infos {
// Build a list of all field names
@ -254,7 +256,7 @@ impl Generator {
}
fn add_includes(&mut self) {
add_line!(self, "#include <tree_sitter/parser.h>");
add_line!(self, "#include \"tree_sitter/parser.h\"");
add_line!(self, "");
}
@ -336,7 +338,7 @@ impl Generator {
}
fn add_symbol_enum(&mut self) {
add_line!(self, "enum {{");
add_line!(self, "enum ts_symbol_identifiers {{");
indent!(self);
self.symbol_order.insert(Symbol::end(), 0);
let mut i = 1;
@ -408,7 +410,7 @@ impl Generator {
}
fn add_field_name_enum(&mut self) {
add_line!(self, "enum {{");
add_line!(self, "enum ts_field_identifiers {{");
indent!(self);
for (i, field_name) in self.field_names.iter().enumerate() {
add_line!(self, "{} = {},", self.field_id(field_name), i + 1);
@ -764,7 +766,6 @@ impl Generator {
indent!(self);
add_line!(self, "START_LEXER();");
add_line!(self, "eof = lexer->eof(lexer);");
add_line!(self, "switch (state) {{");
indent!(self);
@ -879,14 +880,23 @@ impl Generator {
add!(self, " ||{}", line_break);
}
if range.end == range.start {
if range.start == '\0' {
add!(self, "!eof && ");
}
add!(self, "lookahead == ");
self.add_character(range.start);
} else if range.end as u32 == range.start as u32 + 1 {
if range.start == '\0' {
add!(self, "!eof && ");
}
add!(self, "lookahead == ");
self.add_character(range.start);
add!(self, " ||{}lookahead == ", line_break);
self.add_character(range.end);
} else {
if range.start == '\0' {
add!(self, "!eof && ");
}
add!(self, "(");
self.add_character(range.start);
add!(self, " <= lookahead && lookahead <= ");
@ -1016,7 +1026,7 @@ impl Generator {
}
fn add_external_token_enum(&mut self) {
add_line!(self, "enum {{");
add_line!(self, "enum ts_external_scanner_symbol_identifiers {{");
indent!(self);
for i in 0..self.syntax_grammar.external_tokens.len() {
add_line!(
@ -1525,54 +1535,93 @@ impl Generator {
fn sanitize_identifier(&self, name: &str) -> String {
let mut result = String::with_capacity(name.len());
for c in name.chars() {
if ('a' <= c && c <= 'z')
|| ('A' <= c && c <= 'Z')
|| ('0' <= c && c <= '9')
|| c == '_'
{
if c.is_ascii_alphanumeric() || c == '_' {
result.push(c);
} else {
let replacement = match c {
'~' => "TILDE",
'`' => "BQUOTE",
'!' => "BANG",
'@' => "AT",
'#' => "POUND",
'$' => "DOLLAR",
'%' => "PERCENT",
'^' => "CARET",
'&' => "AMP",
'*' => "STAR",
'(' => "LPAREN",
')' => "RPAREN",
'-' => "DASH",
'+' => "PLUS",
'=' => "EQ",
'{' => "LBRACE",
'}' => "RBRACE",
'[' => "LBRACK",
']' => "RBRACK",
'\\' => "BSLASH",
'|' => "PIPE",
':' => "COLON",
';' => "SEMI",
'"' => "DQUOTE",
'\'' => "SQUOTE",
'<' => "LT",
'>' => "GT",
',' => "COMMA",
'.' => "DOT",
'?' => "QMARK",
'/' => "SLASH",
'\n' => "LF",
'\r' => "CR",
'\t' => "TAB",
_ => continue,
};
if !result.is_empty() && !result.ends_with("_") {
result.push('_');
'special_chars: {
let replacement = match c {
' ' if name.len() == 1 => "SPACE",
'~' => "TILDE",
'`' => "BQUOTE",
'!' => "BANG",
'@' => "AT",
'#' => "POUND",
'$' => "DOLLAR",
'%' => "PERCENT",
'^' => "CARET",
'&' => "AMP",
'*' => "STAR",
'(' => "LPAREN",
')' => "RPAREN",
'-' => "DASH",
'+' => "PLUS",
'=' => "EQ",
'{' => "LBRACE",
'}' => "RBRACE",
'[' => "LBRACK",
']' => "RBRACK",
'\\' => "BSLASH",
'|' => "PIPE",
':' => "COLON",
';' => "SEMI",
'"' => "DQUOTE",
'\'' => "SQUOTE",
'<' => "LT",
'>' => "GT",
',' => "COMMA",
'.' => "DOT",
'?' => "QMARK",
'/' => "SLASH",
'\n' => "LF",
'\r' => "CR",
'\t' => "TAB",
'\0' => "NULL",
'\u{0001}' => "SOH",
'\u{0002}' => "STX",
'\u{0003}' => "ETX",
'\u{0004}' => "EOT",
'\u{0005}' => "ENQ",
'\u{0006}' => "ACK",
'\u{0007}' => "BEL",
'\u{0008}' => "BS",
'\u{000b}' => "VTAB",
'\u{000c}' => "FF",
'\u{000e}' => "SO",
'\u{000f}' => "SI",
'\u{0010}' => "DLE",
'\u{0011}' => "DC1",
'\u{0012}' => "DC2",
'\u{0013}' => "DC3",
'\u{0014}' => "DC4",
'\u{0015}' => "NAK",
'\u{0016}' => "SYN",
'\u{0017}' => "ETB",
'\u{0018}' => "CAN",
'\u{0019}' => "EM",
'\u{001a}' => "SUB",
'\u{001b}' => "ESC",
'\u{001c}' => "FS",
'\u{001d}' => "GS",
'\u{001e}' => "RS",
'\u{001f}' => "US",
'\u{007F}' => "DEL",
'\u{FEFF}' => "BOM",
'\u{0080}'..='\u{FFFF}' => {
result.push_str(&format!("u{:04x}", c as u32));
break 'special_chars;
}
'\u{10000}'..='\u{10FFFF}' => {
result.push_str(&format!("U{:08x}", c as u32));
break 'special_chars;
}
'0'..='9' | 'a'..='z' | 'A'..='Z' | '_' => unreachable!(),
' ' => break 'special_chars,
};
if !result.is_empty() && !result.ends_with("_") {
result.push('_');
}
result += replacement;
}
result += replacement;
}
}
result
@ -1585,10 +1634,19 @@ impl Generator {
'\"' => result += "\\\"",
'?' => result += "\\?",
'\\' => result += "\\\\",
'\u{0007}' => result += "\\a",
'\u{0008}' => result += "\\b",
'\u{000b}' => result += "\\v",
'\u{000c}' => result += "\\f",
'\n' => result += "\\n",
'\r' => result += "\\r",
'\t' => result += "\\t",
'\0' => result += "\\0",
'\u{0001}'..='\u{001f}' => result += &format!("\\x{:02x}", c as u32),
'\u{007F}'..='\u{FFFF}' => result += &format!("\\u{:04x}", c as u32),
'\u{10000}'..='\u{10FFFF}' => {
result.push_str(&format!("\\U{:08x}", c as u32));
}
_ => result.push(c),
}
}

View file

@ -56,7 +56,7 @@ pub(crate) struct Symbol {
pub(crate) enum Rule {
Blank,
String(String),
Pattern(String),
Pattern(String, String),
NamedSymbol(String),
Symbol(Symbol),
Choice(Vec<Rule>),
@ -187,8 +187,8 @@ impl Rule {
Rule::String(value.to_string())
}
pub fn pattern(value: &'static str) -> Self {
Rule::Pattern(value.to_string())
pub fn pattern(value: &'static str, flags: &'static str) -> Self {
Rule::Pattern(value.to_string(), flags.to_string())
}
}

View file

@ -1,4 +1,3 @@
use super::util;
use ansi_term::Color;
use anyhow::Result;
use lazy_static::lazy_static;
@ -281,7 +280,7 @@ fn style_to_css(style: ansi_term::Style) -> String {
fn write_color(buffer: &mut String, color: Color) {
if let Color::RGB(r, g, b) = &color {
write!(buffer, "color: #{:x?}{:x?}{:x?}", r, g, b).unwrap()
write!(buffer, "color: #{r:02x}{g:02x}{b:02x}").unwrap()
} else {
write!(
buffer,
@ -349,7 +348,7 @@ pub fn ansi(
let mut highlighter = Highlighter::new();
let events = highlighter.highlight(config, source, cancellation_flag, |string| {
loader.highlight_config_for_injection_string(string)
loader.highlight_config_for_injection_string(string, config.apply_all_captures)
})?;
let mut style_stack = vec![theme.default_style().ansi];
@ -385,17 +384,17 @@ pub fn html(
config: &HighlightConfiguration,
quiet: bool,
print_time: bool,
cancellation_flag: Option<&AtomicUsize>,
) -> Result<()> {
use std::io::Write;
let stdout = io::stdout();
let mut stdout = stdout.lock();
let time = Instant::now();
let cancellation_flag = util::cancel_on_stdin();
let mut highlighter = Highlighter::new();
let events = highlighter.highlight(config, source, Some(&cancellation_flag), |string| {
loader.highlight_config_for_injection_string(string)
let events = highlighter.highlight(config, source, cancellation_flag, |string| {
loader.highlight_config_for_injection_string(string, config.apply_all_captures)
})?;
let mut renderer = HtmlRenderer::new();
@ -448,7 +447,7 @@ mod tests {
env::set_var("COLORTERM", "");
parse_style(&mut style, Value::String(DARK_CYAN.to_string()));
assert_eq!(style.ansi.foreground, Some(Color::Fixed(36)));
assert_eq!(style.css, Some("style=\'color: #0af87\'".to_string()));
assert_eq!(style.css, Some("style=\'color: #00af87\'".to_string()));
// junglegreen is not an ANSI color and is preserved when the terminal supports it
env::set_var("COLORTERM", "truecolor");

View file

@ -1,3 +1,5 @@
#![doc = include_str!("../README.md")]
pub mod generate;
pub mod highlight;
pub mod logger;
@ -14,3 +16,7 @@ pub mod wasm;
#[cfg(test)]
mod tests;
// To run compile fail tests
#[cfg(doctest)]
mod tests;

View file

@ -1,12 +1,14 @@
use anyhow::{anyhow, Context, Result};
use anyhow::{anyhow, Context, Error, Result};
use clap::{App, AppSettings, Arg, SubCommand};
use glob::glob;
use std::path::Path;
use std::collections::HashSet;
use std::path::{Path, PathBuf};
use std::{env, fs, u64};
use tree_sitter::{Parser, WasmStore};
use tree_sitter::{ffi, Parser, Point, WasmStore};
use tree_sitter_cli::{
generate, highlight, logger, parse, playground, query, tags, test, test_highlight, test_tags,
util, wasm,
generate, highlight, logger,
parse::{self, ParseFileOptions, ParseOutput},
playground, query, tags, test, test_highlight, test_tags, util, wasm,
};
use tree_sitter_config::Config;
use tree_sitter_highlight::Highlighter;
@ -82,6 +84,9 @@ fn run() -> Result<()> {
let wasm_arg = Arg::with_name("wasm")
.long("wasm")
.help("compile parsers to wasm instead of native dynamic libraries");
let apply_all_captures_arg = Arg::with_name("apply-all-captures")
.help("Apply all captures to highlights")
.long("apply-all-captures");
let matches = App::new("tree-sitter")
.author("Max Brunsfeld <maxbrunsfeld@gmail.com>")
@ -113,13 +118,33 @@ fn run() -> Result<()> {
)),
)
.arg(Arg::with_name("no-bindings").long("no-bindings"))
.arg(
Arg::with_name("build")
.long("build")
.short("b")
.help("Compile all defined languages in the current dir"),
)
.arg(&debug_build_arg)
.arg(
Arg::with_name("libdir")
.long("libdir")
.takes_value(true)
.value_name("path"),
)
.arg(
Arg::with_name("report-states-for-rule")
.long("report-states-for-rule")
.value_name("rule-name")
.takes_value(true),
)
.arg(Arg::with_name("no-minimize").long("no-minimize")),
.arg(
Arg::with_name("js-runtime")
.long("js-runtime")
.takes_value(true)
.value_name("executable")
.env("TREE_SITTER_JS_RUNTIME")
.help("Use a JavaScript runtime other than node"),
),
)
.subcommand(
SubCommand::with_name("parse")
@ -132,7 +157,8 @@ fn run() -> Result<()> {
.arg(&debug_build_arg)
.arg(&debug_graph_arg)
.arg(&wasm_arg)
.arg(Arg::with_name("debug-xml").long("xml").short("x"))
.arg(Arg::with_name("output-dot").long("dot"))
.arg(Arg::with_name("output-xml").long("xml").short("x"))
.arg(
Arg::with_name("stat")
.help("Show parsing statistic")
@ -155,6 +181,12 @@ fn run() -> Result<()> {
.takes_value(true)
.multiple(true)
.number_of_values(1),
)
.arg(
Arg::with_name("encoding")
.help("The encoding of the input files")
.long("encoding")
.takes_value(true),
),
)
.subcommand(
@ -167,6 +199,8 @@ fn run() -> Result<()> {
.index(1)
.required(true),
)
.arg(&time_arg)
.arg(&quiet_arg)
.arg(&paths_file_arg)
.arg(&paths_arg.clone().index(2))
.arg(
@ -175,6 +209,12 @@ fn run() -> Result<()> {
.long("byte-range")
.takes_value(true),
)
.arg(
Arg::with_name("row-range")
.help("The range of rows in which the query will be executed")
.long("row-range")
.takes_value(true),
)
.arg(&scope_arg)
.arg(Arg::with_name("captures").long("captures").short("c"))
.arg(Arg::with_name("test").long("test")),
@ -208,7 +248,8 @@ fn run() -> Result<()> {
.arg(&debug_arg)
.arg(&debug_build_arg)
.arg(&debug_graph_arg)
.arg(&wasm_arg),
.arg(&wasm_arg)
.arg(&apply_all_captures_arg),
)
.subcommand(
SubCommand::with_name("highlight")
@ -219,11 +260,31 @@ fn run() -> Result<()> {
.long("html")
.short("H"),
)
.arg(
Arg::with_name("check")
.help("Check that highlighting captures conform strictly to standards")
.long("check"),
)
.arg(
Arg::with_name("captures-path")
.help("Path to a file with captures")
.long("captures-path")
.takes_value(true),
)
.arg(
Arg::with_name("query-paths")
.help("Paths to files with queries")
.long("query-paths")
.takes_value(true)
.multiple(true)
.number_of_values(1),
)
.arg(&scope_arg)
.arg(&time_arg)
.arg(&quiet_arg)
.arg(&paths_file_arg)
.arg(&paths_arg),
.arg(&paths_arg)
.arg(&apply_all_captures_arg),
)
.subcommand(
SubCommand::with_name("build-wasm")
@ -279,6 +340,10 @@ fn run() -> Result<()> {
("generate", Some(matches)) => {
let grammar_path = matches.value_of("grammar-path");
let debug_build = matches.is_present("debug-build");
let build = matches.is_present("build");
let libdir = matches.value_of("libdir");
let js_runtime = matches.value_of("js-runtime");
let report_symbol_name = matches.value_of("report-states-for-rule").or_else(|| {
if matches.is_present("report-states") {
Some("")
@ -289,16 +354,18 @@ fn run() -> Result<()> {
if matches.is_present("log") {
logger::init();
}
let abi_version =
matches
.value_of("abi-version")
.map_or(DEFAULT_GENERATE_ABI_VERSION, |version| {
if version == "latest" {
tree_sitter::LANGUAGE_VERSION
} else {
version.parse().expect("invalid abi version flag")
}
});
let abi_version = matches.value_of("abi-version").map_or(
Ok::<_, Error>(DEFAULT_GENERATE_ABI_VERSION),
|version| {
Ok(if version == "latest" {
tree_sitter::LANGUAGE_VERSION
} else {
version
.parse()
.with_context(|| "invalid abi version flag")?
})
},
)?;
let generate_bindings = !matches.is_present("no-bindings");
generate::generate_parser_in_directory(
&current_dir,
@ -306,7 +373,15 @@ fn run() -> Result<()> {
abi_version,
generate_bindings,
report_symbol_name,
js_runtime,
)?;
if build {
if let Some(path) = libdir {
loader = loader::Loader::with_parser_lib_path(PathBuf::from(path));
}
loader.use_debug_build(debug_build);
loader.languages_at_path(&current_dir)?;
}
}
("test", Some(matches)) => {
@ -317,6 +392,12 @@ fn run() -> Result<()> {
let filter = matches.value_of("filter");
let wasm = matches.is_present("wasm");
let mut parser = Parser::new();
let apply_all_captures = matches.is_present("apply-all-captures");
if debug {
// For augmenting debug logging in external scanners
env::set_var("TREE_SITTER_DEBUG", "1");
}
loader.use_debug_build(debug_build);
@ -364,7 +445,12 @@ fn run() -> Result<()> {
if let Some(store) = store.take() {
highlighter.parser().set_wasm_store(store).unwrap();
}
test_highlight::test_highlights(&loader, &mut highlighter, &test_highlight_dir)?;
test_highlight::test_highlights(
&loader,
&mut highlighter,
&test_highlight_dir,
apply_all_captures,
)?;
store = highlighter.parser().take_wasm_store();
}
@ -382,14 +468,33 @@ fn run() -> Result<()> {
let debug = matches.is_present("debug");
let debug_graph = matches.is_present("debug-graph");
let debug_build = matches.is_present("debug-build");
let debug_xml = matches.is_present("debug-xml");
let quiet = matches.is_present("quiet");
let output = if matches.is_present("output-dot") {
ParseOutput::Dot
} else if matches.is_present("output-xml") {
ParseOutput::Xml
} else if matches.is_present("quiet") {
ParseOutput::Quiet
} else {
ParseOutput::Normal
};
let encoding =
matches
.values_of("encoding")
.map_or(Ok(None), |mut e| match e.next() {
Some("utf16") => Ok(Some(ffi::TSInputEncodingUTF16)),
Some("utf8") => Ok(Some(ffi::TSInputEncodingUTF8)),
Some(_) => Err(anyhow!("Invalid encoding. Expected one of: utf8, utf16")),
None => Ok(None),
})?;
let time = matches.is_present("time");
let wasm = matches.is_present("wasm");
let edits = matches
.values_of("edits")
.map_or(Vec::new(), |e| e.collect());
let cancellation_flag = util::cancel_on_stdin();
let cancellation_flag = util::cancel_on_signal();
let mut parser = Parser::new();
if debug {
@ -430,19 +535,21 @@ fn run() -> Result<()> {
.set_language(language)
.context("incompatible language")?;
let this_file_errored = parse::parse_file_at_path(
&mut parser,
let opts = ParseFileOptions {
language,
path,
&edits,
edits: &edits,
max_path_length,
quiet,
time,
output,
print_time: time,
timeout,
debug,
debug_graph,
debug_xml,
Some(&cancellation_flag),
)?;
cancellation_flag: Some(&cancellation_flag),
encoding,
};
let this_file_errored = parse::parse_file_at_path(&mut parser, opts)?;
if should_track_stats {
stats.total_parses += 1;
@ -465,6 +572,8 @@ fn run() -> Result<()> {
("query", Some(matches)) => {
let ordered_captures = matches.values_of("captures").is_some();
let quiet = matches.values_of("quiet").is_some();
let time = matches.values_of("time").is_some();
let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
let loader_config = config.get()?;
loader.find_all_languages(&loader_config)?;
@ -474,9 +583,17 @@ fn run() -> Result<()> {
matches.value_of("scope"),
)?;
let query_path = Path::new(matches.value_of("query-path").unwrap());
let range = matches.value_of("byte-range").map(|br| {
let r: Vec<&str> = br.split(":").collect();
r[0].parse().unwrap()..r[1].parse().unwrap()
let byte_range = matches.value_of("byte-range").and_then(|arg| {
let mut parts = arg.split(":");
let start = parts.next()?.parse().ok()?;
let end = parts.next().unwrap().parse().ok()?;
Some(start..end)
});
let point_range = matches.value_of("row-range").and_then(|arg| {
let mut parts = arg.split(":");
let start = parts.next()?.parse().ok()?;
let end = parts.next().unwrap().parse().ok()?;
Some(Point::new(start, 0)..Point::new(end, 0))
});
let should_test = matches.is_present("test");
query::query_files_at_paths(
@ -484,8 +601,11 @@ fn run() -> Result<()> {
paths,
query_path,
ordered_captures,
range,
byte_range,
point_range,
should_test,
quiet,
time,
)?;
}
@ -511,13 +631,15 @@ fn run() -> Result<()> {
let time = matches.is_present("time");
let quiet = matches.is_present("quiet");
let html_mode = quiet || matches.is_present("html");
let should_check = matches.is_present("check");
let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
let apply_all_captures = matches.is_present("apply-all-captures");
if html_mode && !quiet {
println!("{}", highlight::HTML_HEADER);
}
let cancellation_flag = util::cancel_on_stdin();
let cancellation_flag = util::cancel_on_signal();
let mut lang = None;
if let Some(scope) = matches.value_of("scope") {
@ -527,6 +649,15 @@ fn run() -> Result<()> {
}
}
let query_paths = matches.values_of("query-paths").map_or(None, |e| {
Some(
e.collect::<Vec<_>>()
.into_iter()
.map(|s| s.to_string())
.collect::<Vec<_>>(),
)
});
for path in paths {
let path = Path::new(&path);
let (language, language_config) = match lang {
@ -540,7 +671,45 @@ fn run() -> Result<()> {
},
};
if let Some(highlight_config) = language_config.highlight_config(language)? {
if let Some(highlight_config) = language_config.highlight_config(
language,
apply_all_captures,
query_paths.as_deref(),
)? {
if should_check {
let names = if let Some(path) = matches.value_of("captures-path") {
let path = Path::new(path);
let file = fs::read_to_string(path)?;
let capture_names = file
.lines()
.filter_map(|line| {
if line.trim().is_empty() || line.trim().starts_with(';') {
return None;
}
line.split(';').next().map(|s| s.trim().trim_matches('"'))
})
.collect::<HashSet<_>>();
highlight_config.nonconformant_capture_names(&capture_names)
} else {
highlight_config.nonconformant_capture_names(&HashSet::new())
};
if names.is_empty() {
eprintln!("All highlight captures conform to standards.");
} else {
eprintln!(
"Non-standard highlight {} detected:",
if names.len() > 1 {
"captures"
} else {
"capture"
}
);
for name in names {
eprintln!("* {}", name);
}
}
}
let source = fs::read(path)?;
if html_mode {
highlight::html(
@ -550,6 +719,7 @@ fn run() -> Result<()> {
highlight_config,
quiet,
time,
Some(&cancellation_flag),
)?;
} else {
highlight::ansi(
@ -582,7 +752,7 @@ fn run() -> Result<()> {
("playground", Some(matches)) => {
let open_in_browser = !matches.is_present("quiet");
playground::serve(&current_dir, open_in_browser);
playground::serve(&current_dir, open_in_browser)?;
}
("dump-languages", Some(_)) => {

View file

@ -5,7 +5,7 @@ use std::path::Path;
use std::sync::atomic::AtomicUsize;
use std::time::Instant;
use std::{fmt, fs, usize};
use tree_sitter::{InputEdit, LogType, Parser, Point, Tree};
use tree_sitter::{ffi, InputEdit, Language, LogType, Parser, Point, Tree};
#[derive(Debug)]
pub struct Edit {
@ -30,36 +30,47 @@ impl fmt::Display for Stats {
}
}
pub fn parse_file_at_path(
parser: &mut Parser,
path: &Path,
edits: &Vec<&str>,
max_path_length: usize,
quiet: bool,
print_time: bool,
timeout: u64,
debug: bool,
debug_graph: bool,
debug_xml: bool,
cancellation_flag: Option<&AtomicUsize>,
) -> Result<bool> {
#[derive(Copy, Clone)]
pub enum ParseOutput {
Normal,
Quiet,
Xml,
Dot,
}
pub struct ParseFileOptions<'a> {
pub language: Language,
pub path: &'a Path,
pub edits: &'a [&'a str],
pub max_path_length: usize,
pub output: ParseOutput,
pub print_time: bool,
pub timeout: u64,
pub debug: bool,
pub debug_graph: bool,
pub cancellation_flag: Option<&'a AtomicUsize>,
pub encoding: Option<u32>,
}
pub fn parse_file_at_path(parser: &mut Parser, opts: ParseFileOptions) -> Result<bool> {
let mut _log_session = None;
let mut source_code =
fs::read(path).with_context(|| format!("Error reading source file {:?}", path))?;
parser.set_language(opts.language)?;
let mut source_code = fs::read(opts.path)
.with_context(|| format!("Error reading source file {:?}", opts.path))?;
// If the `--cancel` flag was passed, then cancel the parse
// when the user types a newline.
unsafe { parser.set_cancellation_flag(cancellation_flag) };
unsafe { parser.set_cancellation_flag(opts.cancellation_flag) };
// Set a timeout based on the `--time` flag.
parser.set_timeout_micros(timeout);
parser.set_timeout_micros(opts.timeout);
// Render an HTML graph if `--debug-graph` was passed
if debug_graph {
if opts.debug_graph {
_log_session = Some(util::log_graphs(parser, "log.html")?);
}
// Log to stderr if `--debug` was passed
else if debug {
else if opts.debug {
parser.set_logger(Some(Box::new(|log_type, message| {
if log_type == LogType::Lex {
io::stderr().write(b" ").unwrap();
@ -69,22 +80,44 @@ pub fn parse_file_at_path(
}
let time = Instant::now();
let tree = parser.parse(&source_code, None);
#[inline(always)]
fn is_utf16_bom(bom_bytes: &[u8]) -> bool {
bom_bytes == [0xFF, 0xFE] || bom_bytes == [0xFE, 0xFF]
}
let tree = match opts.encoding {
Some(encoding) if encoding == ffi::TSInputEncodingUTF16 => {
let source_code_utf16 = source_code
.chunks_exact(2)
.map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]]))
.collect::<Vec<_>>();
parser.parse_utf16(&source_code_utf16, None)
}
None if source_code.len() >= 2 && is_utf16_bom(&source_code[0..2]) => {
let source_code_utf16 = source_code
.chunks_exact(2)
.map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]]))
.collect::<Vec<_>>();
parser.parse_utf16(&source_code_utf16, None)
}
_ => parser.parse(&source_code, None),
};
let stdout = io::stdout();
let mut stdout = stdout.lock();
if let Some(mut tree) = tree {
if debug_graph && !edits.is_empty() {
if opts.debug_graph && !opts.edits.is_empty() {
println!("BEFORE:\n{}", String::from_utf8_lossy(&source_code));
}
for (i, edit) in edits.iter().enumerate() {
for (i, edit) in opts.edits.iter().enumerate() {
let edit = parse_edit_flag(&source_code, edit)?;
perform_edit(&mut tree, &mut source_code, &edit);
perform_edit(&mut tree, &mut source_code, &edit)?;
tree = parser.parse(&source_code, Some(&tree)).unwrap();
if debug_graph {
if opts.debug_graph {
println!("AFTER {}:\n{}", i, String::from_utf8_lossy(&source_code));
}
}
@ -93,7 +126,7 @@ pub fn parse_file_at_path(
let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000;
let mut cursor = tree.walk();
if !quiet {
if matches!(opts.output, ParseOutput::Normal) {
let mut needs_newline = false;
let mut indent_level = 0;
let mut did_visit_children = false;
@ -149,7 +182,7 @@ pub fn parse_file_at_path(
println!("");
}
if debug_xml {
if matches!(opts.output, ParseOutput::Xml) {
let mut needs_newline = false;
let mut indent_level = 0;
let mut did_visit_children = false;
@ -204,6 +237,10 @@ pub fn parse_file_at_path(
println!("");
}
if matches!(opts.output, ParseOutput::Dot) {
util::print_tree_graph(&tree, "log.html").unwrap();
}
let mut first_error = None;
loop {
let node = cursor.node();
@ -221,13 +258,13 @@ pub fn parse_file_at_path(
}
}
if first_error.is_some() || print_time {
if first_error.is_some() || opts.print_time {
write!(
&mut stdout,
"{:width$}\t{} ms",
path.to_str().unwrap(),
opts.path.to_str().unwrap(),
duration_ms,
width = max_path_length
width = opts.max_path_length
)?;
if let Some(node) = first_error {
let start = node.start_position();
@ -256,29 +293,29 @@ pub fn parse_file_at_path(
}
return Ok(first_error.is_some());
} else if print_time {
} else if opts.print_time {
let duration = time.elapsed();
let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000;
writeln!(
&mut stdout,
"{:width$}\t{} ms (timed out)",
path.to_str().unwrap(),
opts.path.to_str().unwrap(),
duration_ms,
width = max_path_length
width = opts.max_path_length
)?;
}
Ok(false)
}
pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) -> InputEdit {
pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) -> Result<InputEdit> {
let start_byte = edit.position;
let old_end_byte = edit.position + edit.deleted_length;
let new_end_byte = edit.position + edit.inserted_text.len();
let start_position = position_for_offset(input, start_byte);
let old_end_position = position_for_offset(input, old_end_byte);
let start_position = position_for_offset(input, start_byte)?;
let old_end_position = position_for_offset(input, old_end_byte)?;
input.splice(start_byte..old_end_byte, edit.inserted_text.iter().cloned());
let new_end_position = position_for_offset(input, new_end_byte);
let new_end_position = position_for_offset(input, new_end_byte)?;
let edit = InputEdit {
start_byte,
old_end_byte,
@ -288,7 +325,7 @@ pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) -> InputE
new_end_position,
};
tree.edit(&edit);
edit
Ok(edit)
}
fn parse_edit_flag(source_code: &Vec<u8>, flag: &str) -> Result<Edit> {
@ -317,7 +354,7 @@ fn parse_edit_flag(source_code: &Vec<u8>, flag: &str) -> Result<Edit> {
let row = usize::from_str_radix(row, 10).map_err(|_| error())?;
let column = parts.next().ok_or_else(error)?;
let column = usize::from_str_radix(column, 10).map_err(|_| error())?;
offset_for_position(source_code, Point { row, column })
offset_for_position(source_code, Point { row, column })?
} else {
usize::from_str_radix(position, 10).map_err(|_| error())?
};
@ -332,31 +369,48 @@ fn parse_edit_flag(source_code: &Vec<u8>, flag: &str) -> Result<Edit> {
})
}
fn offset_for_position(input: &Vec<u8>, position: Point) -> usize {
let mut current_position = Point { row: 0, column: 0 };
for (i, c) in input.iter().enumerate() {
if *c as char == '\n' {
current_position.row += 1;
current_position.column = 0;
} else {
current_position.column += 1;
}
if current_position > position {
return i;
pub fn offset_for_position(input: &[u8], position: Point) -> Result<usize> {
let mut row = 0;
let mut offset = 0;
let mut iter = memchr::memchr_iter(b'\n', input);
loop {
if let Some(pos) = iter.next() {
if row < position.row {
row += 1;
offset = pos;
continue;
}
}
offset += 1;
break;
}
return input.len();
if position.row - row > 0 {
return Err(anyhow!("Failed to address a row: {}", position.row));
}
if let Some(pos) = iter.next() {
if (pos - offset < position.column) || (input[offset] == b'\n' && position.column > 0) {
return Err(anyhow!("Failed to address a column: {}", position.column));
};
} else if input.len() - offset < position.column {
return Err(anyhow!("Failed to address a column over the end"));
}
Ok(offset + position.column)
}
fn position_for_offset(input: &Vec<u8>, offset: usize) -> Point {
let mut result = Point { row: 0, column: 0 };
for c in &input[0..offset] {
if *c as char == '\n' {
result.row += 1;
result.column = 0;
} else {
result.column += 1;
}
pub fn position_for_offset(input: &[u8], offset: usize) -> Result<Point> {
if offset > input.len() {
return Err(anyhow!("Failed to address an offset: {offset}"));
}
result
let mut result = Point { row: 0, column: 0 };
let mut last = 0;
for pos in memchr::memchr_iter(b'\n', &input[..offset]) {
result.row += 1;
last = pos;
}
result.column = if result.row > 0 {
offset - last - 1
} else {
offset
};
Ok(result)
}

View file

@ -3,8 +3,8 @@
<title>tree-sitter THE_LANGUAGE_NAME</title>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/codemirror/5.45.0/codemirror.min.css">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/clusterize.js/0.18.0/clusterize.min.css">
<link rel="icon" type="image/png" href="http://tree-sitter.github.io/tree-sitter/assets/images/favicon-32x32.png" sizes="32x32" />
<link rel="icon" type="image/png" href="http://tree-sitter.github.io/tree-sitter/assets/images/favicon-16x16.png" sizes="16x16" />
<link rel="icon" type="image/png" href="https://tree-sitter.github.io/tree-sitter/assets/images/favicon-32x32.png" sizes="32x32" />
<link rel="icon" type="image/png" href="https://tree-sitter.github.io/tree-sitter/assets/images/favicon-16x16.png" sizes="16x16" />
</head>
<body>

View file

@ -1,4 +1,5 @@
use super::wasm;
use anyhow::{anyhow, Context, Result};
use std::{
borrow::Cow,
env, fs,
@ -7,12 +8,11 @@ use std::{
str::{self, FromStr as _},
};
use tiny_http::{Header, Response, Server};
use webbrowser;
macro_rules! optional_resource {
($name: tt, $path: tt) => {
#[cfg(TREE_SITTER_EMBED_WASM_BINDING)]
fn $name(tree_sitter_dir: &Option<PathBuf>) -> Cow<'static, [u8]> {
fn $name(tree_sitter_dir: Option<&PathBuf>) -> Cow<'static, [u8]> {
if let Some(tree_sitter_dir) = tree_sitter_dir {
Cow::Owned(fs::read(tree_sitter_dir.join($path)).unwrap())
} else {
@ -21,7 +21,7 @@ macro_rules! optional_resource {
}
#[cfg(not(TREE_SITTER_EMBED_WASM_BINDING))]
fn $name(tree_sitter_dir: &Option<PathBuf>) -> Cow<'static, [u8]> {
fn $name(tree_sitter_dir: Option<&PathBuf>) -> Cow<'static, [u8]> {
if let Some(tree_sitter_dir) = tree_sitter_dir {
Cow::Owned(fs::read(tree_sitter_dir.join($path)).unwrap())
} else {
@ -35,7 +35,7 @@ optional_resource!(get_playground_js, "docs/assets/js/playground.js");
optional_resource!(get_lib_js, "lib/binding_web/tree-sitter.js");
optional_resource!(get_lib_wasm, "lib/binding_web/tree-sitter.wasm");
fn get_main_html(tree_sitter_dir: &Option<PathBuf>) -> Cow<'static, [u8]> {
fn get_main_html(tree_sitter_dir: Option<&PathBuf>) -> Cow<'static, [u8]> {
if let Some(tree_sitter_dir) = tree_sitter_dir {
Cow::Owned(fs::read(tree_sitter_dir.join("cli/src/playground.html")).unwrap())
} else {
@ -43,23 +43,10 @@ fn get_main_html(tree_sitter_dir: &Option<PathBuf>) -> Cow<'static, [u8]> {
}
}
pub fn serve(grammar_path: &Path, open_in_browser: bool) {
let port = env::var("TREE_SITTER_PLAYGROUND_PORT")
.map(|v| v.parse::<u16>().expect("Invalid port specification"))
.unwrap_or_else(
|_| get_available_port().expect(
"Couldn't find an available port, try providing a port number via the TREE_SITTER_PLAYGROUND_PORT \
environment variable"
)
);
let addr = format!(
"{}:{}",
env::var("TREE_SITTER_PLAYGROUND_ADDR").unwrap_or("127.0.0.1".to_owned()),
port
);
pub fn serve(grammar_path: &Path, open_in_browser: bool) -> Result<()> {
let server = get_server()?;
let (grammar_name, language_wasm) = wasm::load_language_wasm_file(&grammar_path).unwrap();
let server = Server::http(&addr).expect("Failed to start web server");
let url = format!("http://{}", addr);
let url = format!("http://{}", server.server_addr());
println!("Started playground on: {}", url);
if open_in_browser {
if let Err(_) = webbrowser::open(&url) {
@ -68,13 +55,13 @@ pub fn serve(grammar_path: &Path, open_in_browser: bool) {
}
let tree_sitter_dir = env::var("TREE_SITTER_BASE_DIR").map(PathBuf::from).ok();
let main_html = str::from_utf8(&get_main_html(&tree_sitter_dir))
let main_html = str::from_utf8(&get_main_html(tree_sitter_dir.as_ref()))
.unwrap()
.replace("THE_LANGUAGE_NAME", &grammar_name)
.into_bytes();
let playground_js = get_playground_js(&tree_sitter_dir);
let lib_js = get_lib_js(&tree_sitter_dir);
let lib_wasm = get_lib_wasm(&tree_sitter_dir);
let playground_js = get_playground_js(tree_sitter_dir.as_ref());
let lib_js = get_lib_js(tree_sitter_dir.as_ref());
let lib_wasm = get_lib_wasm(tree_sitter_dir.as_ref());
let html_header = Header::from_str("Content-Type: text/html").unwrap();
let js_header = Header::from_str("Content-Type: application/javascript").unwrap();
@ -107,8 +94,12 @@ pub fn serve(grammar_path: &Path, open_in_browser: bool) {
}
_ => response(b"Not found", &html_header).with_status_code(404),
};
request.respond(res).expect("Failed to write HTTP response");
request
.respond(res)
.with_context(|| "Failed to write HTTP response")?;
}
Ok(())
}
fn redirect<'a>(url: &'a str) -> Response<&'a [u8]> {
@ -123,10 +114,30 @@ fn response<'a>(data: &'a [u8], header: &Header) -> Response<&'a [u8]> {
.with_header(header.clone())
}
fn get_available_port() -> Option<u16> {
(8000..12000).find(port_is_available)
fn get_server() -> Result<Server> {
let addr = env::var("TREE_SITTER_PLAYGROUND_ADDR").unwrap_or("127.0.0.1".to_owned());
let port = env::var("TREE_SITTER_PLAYGROUND_PORT")
.map(|v| {
v.parse::<u16>()
.with_context(|| "Invalid port specification")
})
.ok();
let listener = match port {
Some(port) => {
bind_to(&*addr, port?).with_context(|| "Failed to bind to the specified port")?
}
None => get_listener_on_available_port(&*addr)
.with_context(|| "Failed to find a free port to bind to it")?,
};
let server =
Server::from_listener(listener, None).map_err(|_| anyhow!("Failed to start web server"))?;
Ok(server)
}
fn port_is_available(port: &u16) -> bool {
TcpListener::bind(("127.0.0.1", *port)).is_ok()
fn get_listener_on_available_port(addr: &str) -> Option<TcpListener> {
(8000..12000).find_map(|port| bind_to(addr, port))
}
fn bind_to(addr: &str, port: u16) -> Option<TcpListener> {
TcpListener::bind(format!("{addr}:{port}")).ok()
}

View file

@ -5,16 +5,20 @@ use std::{
io::{self, Write},
ops::Range,
path::Path,
time::Instant,
};
use tree_sitter::{Language, Parser, Query, QueryCursor};
use tree_sitter::{Language, Parser, Point, Query, QueryCursor};
pub fn query_files_at_paths(
language: Language,
paths: Vec<String>,
query_path: &Path,
ordered_captures: bool,
range: Option<Range<usize>>,
byte_range: Option<Range<usize>>,
point_range: Option<Range<Point>>,
should_test: bool,
quiet: bool,
print_time: bool,
) -> Result<()> {
let stdout = io::stdout();
let mut stdout = stdout.lock();
@ -24,9 +28,12 @@ pub fn query_files_at_paths(
let query = Query::new(language, &query_source).with_context(|| "Query compilation failed")?;
let mut query_cursor = QueryCursor::new();
if let Some(range) = range {
if let Some(range) = byte_range {
query_cursor.set_byte_range(range);
}
if let Some(range) = point_range {
query_cursor.set_point_range(range);
}
let mut parser = Parser::new();
parser.set_language(language)?;
@ -40,22 +47,25 @@ pub fn query_files_at_paths(
fs::read(&path).with_context(|| format!("Error reading source file {:?}", path))?;
let tree = parser.parse(&source_code, None).unwrap();
let start = Instant::now();
if ordered_captures {
for (mat, capture_index) in
query_cursor.captures(&query, tree.root_node(), source_code.as_slice())
{
let capture = mat.captures[capture_index];
let capture_name = &query.capture_names()[capture.index as usize];
writeln!(
&mut stdout,
" pattern: {:>2}, capture: {} - {}, start: {}, end: {}, text: `{}`",
mat.pattern_index,
capture.index,
capture_name,
capture.node.start_position(),
capture.node.end_position(),
capture.node.utf8_text(&source_code).unwrap_or("")
)?;
if !quiet {
writeln!(
&mut stdout,
" pattern: {:>2}, capture: {} - {}, start: {}, end: {}, text: `{}`",
mat.pattern_index,
capture.index,
capture_name,
capture.node.start_position(),
capture.node.end_position(),
capture.node.utf8_text(&source_code).unwrap_or("")
)?;
}
results.push(query_testing::CaptureInfo {
name: capture_name.to_string(),
start: capture.node.start_position(),
@ -64,27 +74,31 @@ pub fn query_files_at_paths(
}
} else {
for m in query_cursor.matches(&query, tree.root_node(), source_code.as_slice()) {
writeln!(&mut stdout, " pattern: {}", m.pattern_index)?;
if !quiet {
writeln!(&mut stdout, " pattern: {}", m.pattern_index)?;
}
for capture in m.captures {
let start = capture.node.start_position();
let end = capture.node.end_position();
let capture_name = &query.capture_names()[capture.index as usize];
if end.row == start.row {
writeln!(
&mut stdout,
" capture: {} - {}, start: {}, end: {}, text: `{}`",
capture.index,
capture_name,
start,
end,
capture.node.utf8_text(&source_code).unwrap_or("")
)?;
} else {
writeln!(
&mut stdout,
" capture: {}, start: {}, end: {}",
capture_name, start, end,
)?;
if !quiet {
if end.row == start.row {
writeln!(
&mut stdout,
" capture: {} - {}, start: {}, end: {}, text: `{}`",
capture.index,
capture_name,
start,
end,
capture.node.utf8_text(&source_code).unwrap_or("")
)?;
} else {
writeln!(
&mut stdout,
" capture: {}, start: {}, end: {}",
capture_name, start, end,
)?;
}
}
results.push(query_testing::CaptureInfo {
name: capture_name.to_string(),
@ -103,6 +117,9 @@ pub fn query_files_at_paths(
if should_test {
query_testing::assert_expected_captures(results, path, &mut parser, language)?
}
if print_time {
writeln!(&mut stdout, "{:?}", start.elapsed())?;
}
}
Ok(())

View file

@ -18,9 +18,20 @@ pub struct CaptureInfo {
#[derive(Debug, PartialEq, Eq)]
pub struct Assertion {
pub position: Point,
pub negative: bool,
pub expected_capture_name: String,
}
impl Assertion {
pub fn new(row: usize, col: usize, negative: bool, expected_capture_name: String) -> Self {
Self {
position: Point::new(row, col),
negative,
expected_capture_name,
}
}
}
/// Parse the given source code, finding all of the comments that contain
/// highlighting assertions. Return a vector of (position, expected highlight name)
/// pairs.
@ -54,6 +65,7 @@ pub fn parse_position_comments(
// to its own column.
let mut has_left_caret = false;
let mut has_arrow = false;
let mut negative = false;
let mut arrow_end = 0;
for (i, c) in text.char_indices() {
arrow_end = i + 1;
@ -69,6 +81,19 @@ pub fn parse_position_comments(
has_left_caret = c == '<';
}
// find any ! after arrows but before capture name
if has_arrow {
for (i, c) in text[arrow_end..].char_indices() {
if c == '!' {
negative = true;
arrow_end += i + 1;
break;
} else if !c.is_whitespace() {
break;
}
}
}
// If the comment node contains an arrow and a highlight name, record the
// highlight name and the position.
if let (true, Some(mat)) =
@ -76,7 +101,8 @@ pub fn parse_position_comments(
{
assertion_ranges.push((node.start_position(), node.end_position()));
result.push(Assertion {
position: position,
position,
negative,
expected_capture_name: mat.as_str().to_string(),
});
}

View file

@ -23,7 +23,7 @@ pub fn generate_tags(
}
let mut context = TagsContext::new();
let cancellation_flag = util::cancel_on_stdin();
let cancellation_flag = util::cancel_on_signal();
let stdout = io::stdout();
let mut stdout = stdout.lock();

View file

@ -16,11 +16,11 @@ use walkdir::WalkDir;
lazy_static! {
static ref HEADER_REGEX: ByteRegex =
ByteRegexBuilder::new(r"^===+(?P<suffix1>[^=\r\n][^\r\n]*)?\r?\n(?P<test_name>([^=\r\n][^\r\n]*\r?\n)+)===+(?P<suffix2>[^=\r\n][^\r\n]*)?\r?\n")
ByteRegexBuilder::new(r"^(?P<equals>(?:=+){3,})(?P<suffix1>[^=\r\n][^\r\n]*)?\r?\n(?P<test_name>([^=\r\n][^\r\n]*\r?\n)+)===+(?P<suffix2>[^=\r\n][^\r\n]*)?\r?\n")
.multi_line(true)
.build()
.unwrap();
static ref DIVIDER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^---+(?P<suffix>[^-\r\n][^\r\n]*)?\r?\n")
static ref DIVIDER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^(?P<hyphens>(?:-+){3,})(?P<suffix>[^-\r\n][^\r\n]*)?\r?\n")
.multi_line(true)
.build()
.unwrap();
@ -40,6 +40,8 @@ pub enum TestEntry {
name: String,
input: Vec<u8>,
output: String,
header_delim_len: usize,
divider_delim_len: usize,
has_fields: bool,
},
}
@ -177,13 +179,15 @@ fn run_tests(
mut indent_level: i32,
failures: &mut Vec<(String, String, String)>,
update: bool,
corrected_entries: &mut Vec<(String, String, String)>,
corrected_entries: &mut Vec<(String, String, String, usize, usize)>,
) -> Result<()> {
match test_entry {
TestEntry::Example {
name,
input,
output,
header_delim_len,
divider_delim_len,
has_fields,
} => {
if let Some(filter) = filter {
@ -191,7 +195,13 @@ fn run_tests(
if update {
let input = String::from_utf8(input).unwrap();
let output = format_sexp(&output);
corrected_entries.push((name, input, output));
corrected_entries.push((
name,
input,
output,
header_delim_len,
divider_delim_len,
));
}
return Ok(());
}
@ -201,21 +211,31 @@ fn run_tests(
if !has_fields {
actual = strip_sexp_fields(actual);
}
for _ in 0..indent_level {
print!(" ");
}
print!("{}", " ".repeat(indent_level as usize));
if actual == output {
println!("{}", Colour::Green.paint(&name));
if update {
let input = String::from_utf8(input).unwrap();
let output = format_sexp(&output);
corrected_entries.push((name, input, output));
corrected_entries.push((
name,
input,
output,
header_delim_len,
divider_delim_len,
));
}
} else {
if update {
let input = String::from_utf8(input).unwrap();
let output = format_sexp(&actual);
corrected_entries.push((name.clone(), input, output));
corrected_entries.push((
name.clone(),
input,
output,
header_delim_len,
divider_delim_len,
));
println!("{}", Colour::Blue.paint(&name));
} else {
println!("{}", Colour::Red.paint(&name));
@ -229,9 +249,7 @@ fn run_tests(
file_path,
} => {
if indent_level > 0 {
for _ in 0..indent_level {
print!(" ");
}
print!("{}", " ".repeat(indent_level as usize));
println!("{}:", name);
}
@ -312,27 +330,32 @@ fn format_sexp_indented(sexp: &String, initial_indent_level: u32) -> String {
formatted
}
fn write_tests(file_path: &Path, corrected_entries: &Vec<(String, String, String)>) -> Result<()> {
fn write_tests(
file_path: &Path,
corrected_entries: &Vec<(String, String, String, usize, usize)>,
) -> Result<()> {
let mut buffer = fs::File::create(file_path)?;
write_tests_to_buffer(&mut buffer, corrected_entries)
}
fn write_tests_to_buffer(
buffer: &mut impl Write,
corrected_entries: &Vec<(String, String, String)>,
corrected_entries: &Vec<(String, String, String, usize, usize)>,
) -> Result<()> {
for (i, (name, input, output)) in corrected_entries.iter().enumerate() {
for (i, (name, input, output, header_delim_len, divider_delim_len)) in
corrected_entries.iter().enumerate()
{
if i > 0 {
write!(buffer, "\n")?;
}
write!(
buffer,
"{}\n{}\n{}\n{}\n{}\n\n{}\n",
"=".repeat(80),
"=".repeat(*header_delim_len),
name,
"=".repeat(80),
"=".repeat(*header_delim_len),
input,
"-".repeat(80),
"-".repeat(*divider_delim_len),
output.trim()
)?;
}
@ -351,9 +374,18 @@ pub fn parse_tests(path: &Path) -> io::Result<TestEntry> {
let entry = entry?;
let hidden = entry.file_name().to_str().unwrap_or("").starts_with(".");
if !hidden {
children.push(parse_tests(&entry.path())?);
children.push(entry.path());
}
}
children.sort_by(|a, b| {
a.file_name()
.unwrap_or_default()
.cmp(&b.file_name().unwrap_or_default())
});
let children = children
.iter()
.map(|path| parse_tests(path))
.collect::<io::Result<Vec<TestEntry>>>()?;
Ok(TestEntry::Group {
name,
children,
@ -387,6 +419,7 @@ fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>)
// Ignore any matches whose suffix does not match the first header
// suffix in the file.
let header_matches = HEADER_REGEX.captures_iter(&bytes).filter_map(|c| {
let header_delim_len = c.name("equals").map(|n| n.as_bytes().len()).unwrap_or(80);
let suffix1 = c
.name("suffix1")
.map(|m| String::from_utf8_lossy(m.as_bytes()));
@ -398,13 +431,17 @@ fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>)
let test_name = c
.name("test_name")
.map(|c| String::from_utf8_lossy(c.as_bytes()).trim_end().to_string());
Some((header_range, test_name))
let res = Some((header_delim_len, header_range, test_name));
res
} else {
None
}
});
for (header_range, test_name) in header_matches.chain(Some((bytes.len()..bytes.len(), None))) {
let mut prev_header_len = 80;
for (header_delim_len, header_range, test_name) in
header_matches.chain(Some((80, bytes.len()..bytes.len(), None)))
{
// Find the longest line of dashes following each test description. That line
// separates the input from the expected output. Ignore any matches whose suffix
// does not match the first suffix in the file.
@ -412,19 +449,25 @@ fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>)
let divider_range = DIVIDER_REGEX
.captures_iter(&bytes[prev_header_end..header_range.start])
.filter_map(|m| {
let divider_delim_len =
m.name("hyphens").map(|m| m.as_bytes().len()).unwrap_or(80);
let suffix = m
.name("suffix")
.map(|m| String::from_utf8_lossy(m.as_bytes()));
if suffix == first_suffix {
let range = m.get(0).unwrap().range();
Some((prev_header_end + range.start)..(prev_header_end + range.end))
let res = Some((
divider_delim_len,
(prev_header_end + range.start)..(prev_header_end + range.end),
));
res
} else {
None
}
})
.max_by_key(|range| range.len());
.max_by_key(|(_, range)| range.len());
if let Some(divider_range) = divider_range {
if let Some((divider_delim_len, divider_range)) = divider_range {
if let Ok(output) = str::from_utf8(&bytes[divider_range.end..header_range.start]) {
let mut input = bytes[prev_header_end..divider_range.start].to_vec();
@ -449,12 +492,15 @@ fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>)
name: prev_name,
input,
output,
header_delim_len: prev_header_len,
divider_delim_len,
has_fields,
});
}
}
}
prev_name = test_name.unwrap_or(String::new());
prev_header_len = header_delim_len;
prev_header_end = header_range.end;
}
TestEntry::Group {
@ -505,12 +551,16 @@ d
name: "The first test".to_string(),
input: "\na b c\n".as_bytes().to_vec(),
output: "(a (b c))".to_string(),
header_delim_len: 15,
divider_delim_len: 3,
has_fields: false,
},
TestEntry::Example {
name: "The second test".to_string(),
input: "d".as_bytes().to_vec(),
output: "(d)".to_string(),
header_delim_len: 16,
divider_delim_len: 3,
has_fields: false,
},
],
@ -559,12 +609,16 @@ abc
name: "Code with dashes".to_string(),
input: "abc\n---\ndefg\n----\nhijkl".as_bytes().to_vec(),
output: "(a (b))".to_string(),
header_delim_len: 18,
divider_delim_len: 7,
has_fields: false,
},
TestEntry::Example {
name: "Code ending with dashes".to_string(),
input: "abc\n-----------".as_bytes().to_vec(),
output: "(c (d))".to_string(),
header_delim_len: 25,
divider_delim_len: 19,
has_fields: false,
},
],
@ -608,11 +662,15 @@ abc
"title 1".to_string(),
"input 1".to_string(),
"output 1".to_string(),
80,
80,
),
(
"title 2".to_string(),
"input 2".to_string(),
"output 2".to_string(),
80,
80,
),
];
write_tests_to_buffer(&mut buffer, &corrected_entries).unwrap();
@ -689,18 +747,24 @@ code
name: "sexp with comment".to_string(),
input: "code".as_bytes().to_vec(),
output: "(a (b))".to_string(),
header_delim_len: 18,
divider_delim_len: 3,
has_fields: false,
},
TestEntry::Example {
name: "sexp with comment between".to_string(),
input: "code".as_bytes().to_vec(),
output: "(a (b))".to_string(),
header_delim_len: 18,
divider_delim_len: 3,
has_fields: false,
},
TestEntry::Example {
name: "sexp with ';'".to_string(),
input: "code".as_bytes().to_vec(),
output: "(MISSING \";\")".to_string(),
header_delim_len: 25,
divider_delim_len: 3,
has_fields: false,
}
],
@ -773,18 +837,24 @@ NOT A TEST HEADER
name: "First test".to_string(),
input: expected_input.clone(),
output: "(a)".to_string(),
header_delim_len: 18,
divider_delim_len: 3,
has_fields: false,
},
TestEntry::Example {
name: "Second test".to_string(),
input: expected_input.clone(),
output: "(a)".to_string(),
header_delim_len: 18,
divider_delim_len: 3,
has_fields: false,
},
TestEntry::Example {
name: "Test name with = symbol".to_string(),
input: expected_input.clone(),
output: "(a)".to_string(),
header_delim_len: 25,
divider_delim_len: 3,
has_fields: false,
}
],
@ -828,12 +898,16 @@ code with ----
name: "name\nwith\nnewlines".to_string(),
input: b"a".to_vec(),
output: "(b)".to_string(),
header_delim_len: 15,
divider_delim_len: 3,
has_fields: false,
},
TestEntry::Example {
name: "name with === signs".to_string(),
input: b"code with ----".to_vec(),
output: "(d)".to_string(),
header_delim_len: 20,
divider_delim_len: 3,
has_fields: false,
}
]

View file

@ -42,41 +42,74 @@ pub fn test_highlights(
loader: &Loader,
highlighter: &mut Highlighter,
directory: &Path,
apply_all_captures: bool,
) -> Result<()> {
println!("syntax highlighting:");
test_highlights_indented(loader, highlighter, directory, apply_all_captures, 2)
}
fn test_highlights_indented(
loader: &Loader,
highlighter: &mut Highlighter,
directory: &Path,
apply_all_captures: bool,
indent_level: usize,
) -> Result<()> {
let mut failed = false;
println!("syntax highlighting:");
for highlight_test_file in fs::read_dir(directory)? {
let highlight_test_file = highlight_test_file?;
let test_file_path = highlight_test_file.path();
let test_file_name = highlight_test_file.file_name();
let (language, language_config) = loader
.language_configuration_for_file_name(&test_file_path)?
.ok_or_else(|| anyhow!("No language found for path {:?}", test_file_path))?;
let highlight_config = language_config
.highlight_config(language)?
.ok_or_else(|| anyhow!("No highlighting config found for {:?}", test_file_path))?;
match test_highlight(
&loader,
highlighter,
highlight_config,
fs::read(&test_file_path)?.as_slice(),
) {
Ok(assertion_count) => {
println!(
" ✓ {} ({} assertions)",
Colour::Green.paint(test_file_name.to_string_lossy().as_ref()),
assertion_count
);
}
Err(e) => {
println!(
" ✗ {}",
Colour::Red.paint(test_file_name.to_string_lossy().as_ref())
);
println!(" {}", e);
print!(
"{indent:indent_level$}",
indent = "",
indent_level = indent_level * 2
);
if test_file_path.is_dir() && !test_file_path.read_dir()?.next().is_none() {
println!("{}:", test_file_name.into_string().unwrap());
if let Err(_) = test_highlights_indented(
loader,
highlighter,
&test_file_path,
apply_all_captures,
indent_level + 1,
) {
failed = true;
}
} else {
let (language, language_config) = loader
.language_configuration_for_file_name(&test_file_path)?
.ok_or_else(|| anyhow!("No language found for path {:?}", test_file_path))?;
let highlight_config = language_config
.highlight_config(language, apply_all_captures, None)?
.ok_or_else(|| anyhow!("No highlighting config found for {:?}", test_file_path))?;
match test_highlight(
&loader,
highlighter,
highlight_config,
fs::read(&test_file_path)?.as_slice(),
) {
Ok(assertion_count) => {
println!(
"✓ {} ({} assertions)",
Colour::Green.paint(test_file_name.to_string_lossy().as_ref()),
assertion_count
);
}
Err(e) => {
println!(
"✗ {}",
Colour::Red.paint(test_file_name.to_string_lossy().as_ref())
);
println!(
"{indent:indent_level$} {e}",
indent = "",
indent_level = indent_level * 2
);
failed = true;
}
}
}
}
@ -94,9 +127,10 @@ pub fn iterate_assertions(
// Iterate through all of the highlighting assertions, checking each one against the
// actual highlights.
let mut i = 0;
let mut actual_highlights = Vec::<&String>::new();
let mut actual_highlights = Vec::new();
for Assertion {
position,
negative,
expected_capture_name: expected_highlight,
} in assertions
{
@ -120,12 +154,13 @@ pub fn iterate_assertions(
break 'highlight_loop;
}
// If the highlight matches the assertion, this test passes. Otherwise,
// If the highlight matches the assertion, or if the highlight doesn't
// match the assertion but it's negative, this test passes. Otherwise,
// add this highlight to the list of actual highlights that span the
// assertion's position, in order to generate an error message in the event
// of a failure.
let highlight_name = &highlight_names[(highlight.2).0];
if *highlight_name == *expected_highlight {
if (*highlight_name == *expected_highlight) == !negative {
passed = true;
break 'highlight_loop;
} else {
@ -165,68 +200,7 @@ pub fn test_highlight(
let assertions =
parse_position_comments(highlighter.parser(), highlight_config.language, source)?;
iterate_assertions(&assertions, &highlights, &highlight_names)?;
// Iterate through all of the highlighting assertions, checking each one against the
// actual highlights.
let mut i = 0;
let mut actual_highlights = Vec::<&String>::new();
for Assertion {
position,
expected_capture_name: expected_highlight,
} in &assertions
{
let mut passed = false;
actual_highlights.clear();
'highlight_loop: loop {
// The assertions are ordered by position, so skip past all of the highlights that
// end at or before this assertion's position.
if let Some(highlight) = highlights.get(i) {
if highlight.1 <= *position {
i += 1;
continue;
}
// Iterate through all of the highlights that start at or before this assertion's,
// position, looking for one that matches the assertion.
let mut j = i;
while let (false, Some(highlight)) = (passed, highlights.get(j)) {
if highlight.0 > *position {
break 'highlight_loop;
}
// If the highlight matches the assertion, this test passes. Otherwise,
// add this highlight to the list of actual highlights that span the
// assertion's position, in order to generate an error message in the event
// of a failure.
let highlight_name = &highlight_names[(highlight.2).0];
if *highlight_name == *expected_highlight {
passed = true;
break 'highlight_loop;
} else {
actual_highlights.push(highlight_name);
}
j += 1;
}
} else {
break;
}
}
if !passed {
return Err(Failure {
row: position.row,
column: position.column,
expected_highlight: expected_highlight.clone(),
actual_highlights: actual_highlights.into_iter().cloned().collect(),
}
.into());
}
}
Ok(assertions.len())
iterate_assertions(&assertions, &highlights, &highlight_names)
}
pub fn get_highlight_positions(
@ -244,7 +218,7 @@ pub fn get_highlight_positions(
let source = String::from_utf8_lossy(source);
let mut char_indices = source.char_indices();
for event in highlighter.highlight(highlight_config, source.as_bytes(), None, |string| {
loader.highlight_config_for_injection_string(string)
loader.highlight_config_for_injection_string(string, highlight_config.apply_all_captures)
})? {
match event? {
HighlightEvent::HighlightStart(h) => highlight_stack.push(h),

View file

@ -95,6 +95,7 @@ pub fn test_tag(
let mut actual_tags = Vec::<&String>::new();
for Assertion {
position,
negative,
expected_capture_name: expected_tag,
} in &assertions
{
@ -116,7 +117,7 @@ pub fn test_tag(
}
let tag_name = &tag.2;
if *tag_name == *expected_tag {
if (*tag_name == *expected_tag) == !negative {
passed = true;
break 'tag_loop;
} else {
@ -124,6 +125,9 @@ pub fn test_tag(
}
j += 1;
if tag == tags.last().unwrap() {
break 'tag_loop;
}
}
} else {
break;

View file

@ -0,0 +1,279 @@
use super::helpers::fixtures::get_language;
use std::future::Future;
use std::pin::{pin, Pin};
use std::ptr;
use std::task::{self, Context, Poll, RawWaker, RawWakerVTable, Waker};
use tree_sitter::Parser;
#[test]
fn test_node_in_fut() {
let (ret, pended) = tokio_like_spawn(async {
let mut parser = Parser::new();
let language = get_language("bash");
parser.set_language(language).unwrap();
let tree = parser.parse("#", None).unwrap();
let root = tree.root_node();
let root_ref = &root;
let fut_val_fn = || async {
// eprintln!("fut_val_fn: {}", root.child(0).unwrap().kind());
yield_now().await;
root.child(0).unwrap().kind()
};
yield_now().await;
let fut_ref_fn = || async {
// eprintln!("fut_ref_fn: {}", root_ref.child(0).unwrap().kind());
yield_now().await;
root_ref.child(0).unwrap().kind()
};
let f1 = fut_val_fn().await;
let f2 = fut_ref_fn().await;
assert_eq!(f1, f2);
let fut_val = async {
// eprintln!("fut_val: {}", root.child(0).unwrap().kind());
yield_now().await;
root.child(0).unwrap().kind()
};
let fut_ref = async {
// eprintln!("fut_ref: {}", root_ref.child(0).unwrap().kind());
yield_now().await;
root_ref.child(0).unwrap().kind()
};
let f1 = fut_val.await;
let f2 = fut_ref.await;
assert_eq!(f1, f2);
f1
})
.join();
// eprintln!("pended: {pended:?}");
assert_eq!(ret, "comment");
assert_eq!(pended, 5);
}
#[test]
fn test_node_and_cursor_ref_in_fut() {
let (_, pended) = tokio_like_spawn(async {
let mut parser = Parser::new();
let language = get_language("bash");
parser.set_language(language).unwrap();
let tree = parser.parse("#", None).unwrap();
let root = tree.root_node();
let root_ref = &root;
let mut cursor = tree.walk();
let cursor_ref = &mut cursor;
cursor_ref.goto_first_child();
let fut_val = async {
yield_now().await;
root.to_sexp();
};
yield_now().await;
let fut_ref = async {
yield_now().await;
root_ref.to_sexp();
cursor_ref.goto_first_child();
};
fut_val.await;
fut_ref.await;
cursor_ref.goto_first_child();
})
.join();
assert_eq!(pended, 3);
}
#[test]
fn test_node_and_cursor_ref_in_fut_with_fut_fabrics() {
let (_, pended) = tokio_like_spawn(async {
let mut parser = Parser::new();
let language = get_language("bash");
parser.set_language(language).unwrap();
let tree = parser.parse("#", None).unwrap();
let root = tree.root_node();
let root_ref = &root;
let mut cursor = tree.walk();
let cursor_ref = &mut cursor;
cursor_ref.goto_first_child();
let fut_val = || async {
yield_now().await;
root.to_sexp();
};
yield_now().await;
let fut_ref = || async move {
yield_now().await;
root_ref.to_sexp();
cursor_ref.goto_first_child();
};
fut_val().await;
fut_val().await;
fut_ref().await;
})
.join();
assert_eq!(pended, 4);
}
#[test]
fn test_node_and_cursor_ref_in_fut_with_inner_spawns() {
let (ret, pended) = tokio_like_spawn(async {
let mut parser = Parser::new();
let language = get_language("bash");
parser.set_language(language).unwrap();
let tree = parser.parse("#", None).unwrap();
let mut cursor = tree.walk();
let cursor_ref = &mut cursor;
cursor_ref.goto_first_child();
let fut_val = || {
let tree = tree.clone();
async move {
let root = tree.root_node();
let mut cursor = tree.walk();
let cursor_ref = &mut cursor;
yield_now().await;
root.to_sexp();
cursor_ref.goto_first_child();
}
};
yield_now().await;
let fut_ref = || {
let tree = tree.clone();
async move {
let root = tree.root_node();
let root_ref = &root;
let mut cursor = tree.walk();
let cursor_ref = &mut cursor;
yield_now().await;
root_ref.to_sexp();
cursor_ref.goto_first_child();
}
};
let (_, p1) = tokio_like_spawn(fut_val()).await.unwrap();
let (_, p2) = tokio_like_spawn(fut_ref()).await.unwrap();
cursor_ref.goto_first_child();
fut_val().await;
fut_val().await;
fut_ref().await;
cursor_ref.goto_first_child();
p1 + p2
})
.join();
assert_eq!(pended, 4);
assert_eq!(ret, 2);
}
fn tokio_like_spawn<T>(future: T) -> JoinHandle<(T::Output, usize)>
where
T: Future + Send + 'static,
T::Output: Send + 'static,
{
// No runtime, just noop waker
let waker = noop_waker();
let mut cx = task::Context::from_waker(&waker);
let mut pending = 0;
let mut future = pin!(future);
let ret = loop {
match future.as_mut().poll(&mut cx) {
Poll::Pending => pending += 1,
Poll::Ready(r) => {
// eprintln!("ready, pended: {pending}");
break r;
}
}
};
JoinHandle::new((ret, pending))
}
async fn yield_now() {
struct SimpleYieldNow {
yielded: bool,
}
impl Future for SimpleYieldNow {
type Output = ();
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<()> {
cx.waker().clone().wake();
if self.yielded {
return Poll::Ready(());
}
self.yielded = true;
Poll::Pending
}
}
SimpleYieldNow { yielded: false }.await
}
pub fn noop_waker() -> Waker {
const VTABLE: RawWakerVTable = RawWakerVTable::new(
// Cloning just returns a new no-op raw waker
|_| RAW,
// `wake` does nothing
|_| {},
// `wake_by_ref` does nothing
|_| {},
// Dropping does nothing as we don't allocate anything
|_| {},
);
const RAW: RawWaker = RawWaker::new(ptr::null(), &VTABLE);
unsafe { Waker::from_raw(RAW) }
}
struct JoinHandle<T> {
data: Option<T>,
}
impl<T> JoinHandle<T> {
fn new(data: T) -> Self {
Self { data: Some(data) }
}
fn join(&mut self) -> T {
self.data.take().unwrap()
}
}
impl<T: Unpin> Future for JoinHandle<T> {
type Output = std::result::Result<T, ()>;
fn poll(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Self::Output> {
let data = self.get_mut().data.take().unwrap();
Poll::Ready(Ok(data))
}
}

View file

@ -1,7 +1,8 @@
use super::helpers::{
allocations,
edits::{get_random_edit, invert_edit},
fixtures::{fixtures_dir, get_language, get_test_language},
fixtures::{fixtures_dir, get_language, get_test_language, SCRATCH_BASE_DIR},
new_seed,
random::Rand,
scope_sequence::ScopeSequence,
EDIT_COUNT, EXAMPLE_FILTER, ITERATION_COUNT, LANGUAGE_FILTER, LOG_ENABLED, LOG_GRAPH_ENABLED,
@ -13,70 +14,81 @@ use crate::{
test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry},
util,
};
use std::fs;
use std::{collections::HashMap, env, fs};
use tree_sitter::{LogType, Node, Parser, Point, Range, Tree};
use tree_sitter_proc_macro::test_with_seed;
#[test]
fn test_bash_corpus() {
test_language_corpus("bash");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_bash(seed: usize) {
test_language_corpus(
"bash",
seed,
Some(&[
// Fragile tests where edit customization changes
// lead to significant parse tree structure changes.
"bash - corpus - commands - Nested Heredocs",
"bash - corpus - commands - Quoted Heredocs",
"bash - corpus - commands - Heredocs with weird characters",
]),
);
}
#[test]
fn test_c_corpus() {
test_language_corpus("c");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_c(seed: usize) {
test_language_corpus("c", seed, None);
}
#[test]
fn test_cpp_corpus() {
test_language_corpus("cpp");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_cpp(seed: usize) {
test_language_corpus("cpp", seed, None);
}
#[test]
fn test_embedded_template_corpus() {
test_language_corpus("embedded-template");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_embedded_template(seed: usize) {
test_language_corpus("embedded-template", seed, None);
}
#[test]
fn test_go_corpus() {
test_language_corpus("go");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_go(seed: usize) {
test_language_corpus("go", seed, None);
}
#[test]
fn test_html_corpus() {
test_language_corpus("html");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_html(seed: usize) {
test_language_corpus("html", seed, None);
}
#[test]
fn test_javascript_corpus() {
test_language_corpus("javascript");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_javascript(seed: usize) {
test_language_corpus("javascript", seed, None);
}
#[test]
fn test_json_corpus() {
test_language_corpus("json");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_json(seed: usize) {
test_language_corpus("json", seed, None);
}
#[test]
fn test_php_corpus() {
test_language_corpus("php");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_php(seed: usize) {
test_language_corpus("php", seed, None);
}
#[test]
fn test_python_corpus() {
test_language_corpus("python");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_python(seed: usize) {
test_language_corpus("python", seed, None);
}
#[test]
fn test_ruby_corpus() {
test_language_corpus("ruby");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_ruby(seed: usize) {
test_language_corpus("ruby", seed, None);
}
#[test]
fn test_rust_corpus() {
test_language_corpus("rust");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_rust(seed: usize) {
test_language_corpus("rust", seed, None);
}
fn test_language_corpus(language_name: &str) {
fn test_language_corpus(language_name: &str, start_seed: usize, skipped: Option<&[&str]>) {
let grammars_dir = fixtures_dir().join("grammars");
let error_corpus_dir = fixtures_dir().join("error_corpus");
let template_corpus_dir = fixtures_dir().join("template_corpus");
@ -98,10 +110,30 @@ fn test_language_corpus(language_name: &str) {
t
}));
let mut skipped = skipped.map(|x| HashMap::<&str, usize>::from_iter(x.iter().map(|x| (*x, 0))));
let language = get_language(language_name);
let mut failure_count = 0;
for test in tests {
println!(" {} example - {}", language_name, test.name);
let log_seed = env::var("TREE_SITTER_LOG_SEED").is_ok();
let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok();
if log_seed {
println!(" start seed: {}", start_seed);
}
println!();
for (test_index, test) in tests.iter().enumerate() {
let test_name = format!("{language_name} - {}", test.name);
if let Some(skipped) = skipped.as_mut() {
if let Some(counter) = skipped.get_mut(test_name.as_str()) {
println!(" {test_index}. {test_name} - SKIPPED");
*counter += 1;
continue;
}
}
println!(" {test_index}. {test_name}");
let passed = allocations::record(|| {
let mut log_session = None;
@ -116,10 +148,7 @@ fn test_language_corpus(language_name: &str) {
}
if actual_output != test.output {
println!(
"Incorrect initial parse for {} - {}",
language_name, test.name,
);
println!("Incorrect initial parse for {test_name}");
print_diff_key();
print_diff(&actual_output, &test.output);
println!("");
@ -140,7 +169,7 @@ fn test_language_corpus(language_name: &str) {
drop(parser);
for trial in 0..*ITERATION_COUNT {
let seed = *START_SEED + trial;
let seed = start_seed + trial;
let passed = allocations::record(|| {
let mut rand = Rand::new(seed);
let mut log_session = None;
@ -158,10 +187,21 @@ fn test_language_corpus(language_name: &str) {
for _ in 0..1 + rand.unsigned(*EDIT_COUNT) {
let edit = get_random_edit(&mut rand, &input);
undo_stack.push(invert_edit(&input, &edit));
perform_edit(&mut tree, &mut input, &edit);
perform_edit(&mut tree, &mut input, &edit).unwrap();
}
// println!(" seed: {}", seed);
if log_seed {
println!(" {test_index}.{trial:<2} seed: {}", seed);
}
if dump_edits {
fs::write(
SCRATCH_BASE_DIR
.join(format!("edit.{seed}.{test_index}.{trial} {test_name}")),
&input,
)
.unwrap();
}
if *LOG_GRAPH_ENABLED {
eprintln!("{}\n", String::from_utf8_lossy(&input));
@ -173,16 +213,13 @@ fn test_language_corpus(language_name: &str) {
// Check that the new tree is consistent.
check_consistent_sizes(&tree2, &input);
if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
println!(
"\nUnexpected scope change in seed {}\n{}\n\n",
seed, message
);
println!("\nUnexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n",);
return false;
}
// Undo all of the edits and re-parse again.
while let Some(edit) = undo_stack.pop() {
perform_edit(&mut tree2, &mut input, &edit);
perform_edit(&mut tree2, &mut input, &edit).unwrap();
}
if *LOG_GRAPH_ENABLED {
eprintln!("{}\n", String::from_utf8_lossy(&input));
@ -198,10 +235,7 @@ fn test_language_corpus(language_name: &str) {
}
if actual_output != test.output {
println!(
"Incorrect parse for {} - {} - seed {}",
language_name, test.name, seed
);
println!("Incorrect parse for {test_name} - seed {seed}");
print_diff_key();
print_diff(&actual_output, &test.output);
println!("");
@ -211,7 +245,7 @@ fn test_language_corpus(language_name: &str) {
// Check that the edited tree is consistent.
check_consistent_sizes(&tree3, &input);
if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
eprintln!("Unexpected scope change in seed {}\n{}\n\n", seed, message);
println!("Unexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n");
return false;
}
@ -228,6 +262,18 @@ fn test_language_corpus(language_name: &str) {
if failure_count > 0 {
panic!("{} {} corpus tests failed", failure_count, language_name);
}
if let Some(skipped) = skipped.as_mut() {
skipped.retain(|_, v| *v == 0);
if skipped.len() > 0 {
println!("Non matchable skip definitions:");
for k in skipped.keys() {
println!(" {k}");
}
panic!("Non matchable skip definitions needs to be removed");
}
}
}
#[test]
@ -255,7 +301,7 @@ fn test_feature_corpus_files() {
grammar_path = test_path.join("grammar.json");
}
let error_message_path = test_path.join("expected_error.txt");
let grammar_json = generate::load_grammar_file(&grammar_path).unwrap();
let grammar_json = generate::load_grammar_file(&grammar_path, None).unwrap();
let generate_result = generate::generate_parser_for_grammar(&grammar_json);
if error_message_path.exists() {
@ -424,7 +470,12 @@ fn set_included_ranges(parser: &mut Parser, input: &[u8], delimiters: Option<(&s
let mut ranges = Vec::new();
let mut ix = 0;
while ix < input.len() {
let Some(mut start_ix) = input[ix..].windows(2).position(|win| win == start.as_bytes()) else { break };
let Some(mut start_ix) = input[ix..]
.windows(2)
.position(|win| win == start.as_bytes())
else {
break;
};
start_ix += ix + start.len();
let end_ix = input[start_ix..]
.windows(2)
@ -492,6 +543,7 @@ fn flatten_tests(test: TestEntry) -> Vec<FlattenedTest> {
input,
output,
has_fields,
..
} => {
if !prefix.is_empty() {
name.insert_str(0, " - ");

View file

@ -0,0 +1,42 @@
// Tests in this mod need be executed with enabled UBSAN library:
// ```
// UBSAN_OPTIONS="halt_on_error=1" \
// CFLAGS="-fsanitize=undefined" \
// RUSTFLAGS="-lubsan" \
// cargo test --target $(rustc -vV | sed -nr 's/^host: //p') -- --test-threads 1
// ```
use super::helpers::query_helpers::assert_query_matches;
use crate::tests::helpers::fixtures::get_language;
use indoc::indoc;
use tree_sitter::Query;
#[test]
fn issue_2162_out_of_bound() {
let language = get_language("java");
assert!(Query::new(language, "(package_declaration _ (_) @name _)").is_ok());
}
#[test]
fn issue_2107_first_child_group_anchor_had_no_effect() {
let language = get_language("c");
let source_code = indoc! {r#"
void fun(int a, char b, int c) { };
"#};
let query = indoc! {r#"
(parameter_list
.
(
(parameter_declaration) @constant
(#match? @constant "^int")
)
)
"#};
let query = Query::new(language, query).unwrap();
assert_query_matches(
language,
&query,
source_code,
&[(0, vec![("constant", "int a")])],
);
}

View file

@ -2,7 +2,7 @@ use std::{
collections::HashMap,
os::raw::c_void,
sync::{
atomic::{AtomicBool, AtomicU64, Ordering::SeqCst},
atomic::{AtomicBool, AtomicUsize, Ordering::SeqCst},
Mutex,
},
};
@ -25,8 +25,8 @@ unsafe impl Sync for Allocation {}
#[derive(Default)]
struct AllocationRecorder {
enabled: AtomicBool,
allocation_count: AtomicU64,
outstanding_allocations: Mutex<HashMap<Allocation, u64>>,
allocation_count: AtomicUsize,
outstanding_allocations: Mutex<HashMap<Allocation, usize>>,
}
thread_local! {
@ -83,6 +83,9 @@ fn record_alloc(ptr: *mut c_void) {
}
fn record_dealloc(ptr: *mut c_void) {
if ptr.is_null() {
panic!("Zero pointer deallocation!");
}
RECORDER.with(|recorder| {
if recorder.enabled.load(SeqCst) {
recorder
@ -107,9 +110,13 @@ unsafe extern "C" fn ts_record_calloc(count: usize, size: usize) -> *mut c_void
}
unsafe extern "C" fn ts_record_realloc(ptr: *mut c_void, size: usize) -> *mut c_void {
record_dealloc(ptr);
let result = realloc(ptr, size);
record_alloc(result);
if ptr.is_null() {
record_alloc(result);
} else if ptr != result {
record_dealloc(ptr);
record_alloc(result);
}
result
}

View file

@ -1,11 +1,46 @@
lazy_static! {
static ref ROOT_DIR: PathBuf = PathBuf::from(env!("CARGO_MANIFEST_DIR")).parent().unwrap().to_owned();
static ref FIXTURES_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures");
static ref HEADER_DIR: PathBuf = ROOT_DIR.join("lib").join("include");
static ref GRAMMARS_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures").join("grammars");
static ref SCRATCH_DIR: PathBuf = {
pub static ref ROOT_DIR: PathBuf = PathBuf::from(env!("CARGO_MANIFEST_DIR")).parent().unwrap().to_owned();
pub static ref FIXTURES_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures");
pub static ref HEADER_DIR: PathBuf = ROOT_DIR.join("lib").join("include");
pub static ref GRAMMARS_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures").join("grammars");
pub static ref SCRATCH_BASE_DIR: PathBuf = {
let result = ROOT_DIR.join("target").join("scratch");
fs::create_dir_all(&result).unwrap();
result
};
pub static ref SCRATCH_DIR: PathBuf = {
// https://doc.rust-lang.org/reference/conditional-compilation.html
let vendor = if cfg!(target_vendor = "apple") {
"apple"
} else if cfg!(target_vendor = "fortanix") {
"fortanix"
} else if cfg!(target_vendor = "pc") {
"pc"
} else {
"unknown"
};
let env = if cfg!(target_env = "gnu") {
"gnu"
} else if cfg!(target_env = "msvc") {
"msvc"
} else if cfg!(target_env = "musl") {
"musl"
} else if cfg!(target_env = "sgx") {
"sgx"
} else {
"unknown"
};
let endian = if cfg!(target_endian = "little") {
"little"
} else if cfg!(target_endian = "big") {
"big"
} else {
"unknown"
};
let machine = format!("{}-{}-{}-{}-{}", std::env::consts::ARCH, std::env::consts::OS, vendor, env, endian);
let result = SCRATCH_BASE_DIR.join(machine);
fs::create_dir_all(&result).unwrap();
result
};
}

View file

@ -1,6 +1,6 @@
use lazy_static::lazy_static;
use std::fs;
use std::path::{Path, PathBuf};
use std::{env, fs};
use tree_sitter::Language;
use tree_sitter_highlight::HighlightConfiguration;
use tree_sitter_loader::Loader;
@ -9,7 +9,13 @@ use tree_sitter_tags::TagsConfiguration;
include!("./dirs.rs");
lazy_static! {
static ref TEST_LOADER: Loader = Loader::with_parser_lib_path(SCRATCH_DIR.join("lib"));
static ref TEST_LOADER: Loader = {
let mut loader = Loader::with_parser_lib_path(SCRATCH_DIR.clone());
if env::var("TREE_SITTER_GRAMMAR_DEBUG").is_ok() {
loader.use_debug_build(true);
}
loader
};
}
pub fn test_loader<'a>() -> &'a Loader {
@ -46,9 +52,11 @@ pub fn get_highlight_config(
let locals_query = fs::read_to_string(queries_path.join("locals.scm")).unwrap_or(String::new());
let mut result = HighlightConfiguration::new(
language,
language_name,
&highlights_query,
&injections_query,
&locals_query,
false,
)
.unwrap();
result.configure(&highlight_names);
@ -63,11 +71,7 @@ pub fn get_tags_config(language_name: &str) -> TagsConfiguration {
TagsConfiguration::new(language, &tags_query, &locals_query).unwrap()
}
pub fn get_test_language(
name: &str,
parser_code: &str,
scanner_src_path: Option<&Path>,
) -> Language {
pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> Language {
let src_dir = SCRATCH_DIR.join("src").join(name);
fs::create_dir_all(&src_dir).unwrap();
@ -76,11 +80,16 @@ pub fn get_test_language(
fs::write(&parser_path, parser_code).unwrap();
}
if let Some(scanner_src_path) = scanner_src_path {
let scanner_code = fs::read_to_string(&scanner_src_path).unwrap();
let scanner_path = src_dir.join("scanner.c");
if !fs::read_to_string(&scanner_path).map_or(false, |content| content == scanner_code) {
fs::write(&scanner_path, scanner_code).unwrap();
if let Some(path) = path {
let scanner_path = path.join("scanner.c");
if scanner_path.exists() {
let scanner_code = fs::read_to_string(&scanner_path).unwrap();
let scanner_copy_path = src_dir.join("scanner.c");
if !fs::read_to_string(&scanner_copy_path)
.map_or(false, |content| content == scanner_code)
{
fs::write(&scanner_copy_path, scanner_code).unwrap();
}
}
}

View file

@ -6,7 +6,8 @@ pub(super) mod random;
pub(super) mod scope_sequence;
use lazy_static::lazy_static;
use std::{env, time, usize};
use rand::Rng;
use std::env;
lazy_static! {
pub static ref LOG_ENABLED: bool = env::var("TREE_SITTER_LOG").is_ok();
@ -16,11 +17,7 @@ lazy_static! {
}
lazy_static! {
pub static ref START_SEED: usize =
int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| time::SystemTime::now()
.duration_since(time::UNIX_EPOCH)
.unwrap()
.as_secs() as usize,);
pub static ref START_SEED: usize = new_seed();
pub static ref EDIT_COUNT: usize = int_env_var("TREE_SITTER_EDITS").unwrap_or(3);
pub static ref ITERATION_COUNT: usize = int_env_var("TREE_SITTER_ITERATIONS").unwrap_or(10);
}
@ -28,3 +25,10 @@ lazy_static! {
fn int_env_var(name: &'static str) -> Option<usize> {
env::var(name).ok().and_then(|e| e.parse().ok())
}
pub(crate) fn new_seed() -> usize {
int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| {
let mut rng = rand::thread_rng();
rng.gen::<usize>()
})
}

View file

@ -1,6 +1,8 @@
use rand::prelude::Rng;
use std::{cmp::Ordering, fmt::Write, ops::Range};
use tree_sitter::{Node, Point, Tree, TreeCursor};
use tree_sitter::{
Language, Node, Parser, Point, Query, QueryCapture, QueryCursor, QueryMatch, Tree, TreeCursor,
};
#[derive(Debug)]
pub struct Pattern {
@ -304,3 +306,56 @@ fn compare_depth_first(a: Node, b: Node) -> Ordering {
let b = b.byte_range();
a.start.cmp(&b.start).then_with(|| b.end.cmp(&a.end))
}
pub fn assert_query_matches(
language: Language,
query: &Query,
source: &str,
expected: &[(usize, Vec<(&str, &str)>)],
) {
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse(source, None).unwrap();
let mut cursor = QueryCursor::new();
let matches = cursor.matches(&query, tree.root_node(), source.as_bytes());
pretty_assertions::assert_eq!(collect_matches(matches, &query, source), expected);
pretty_assertions::assert_eq!(cursor.did_exceed_match_limit(), false);
}
pub fn collect_matches<'a>(
matches: impl Iterator<Item = QueryMatch<'a, 'a>>,
query: &'a Query,
source: &'a str,
) -> Vec<(usize, Vec<(&'a str, &'a str)>)> {
matches
.map(|m| {
(
m.pattern_index,
format_captures(m.captures.iter().cloned(), query, source),
)
})
.collect()
}
pub fn collect_captures<'a>(
captures: impl Iterator<Item = (QueryMatch<'a, 'a>, usize)>,
query: &'a Query,
source: &'a str,
) -> Vec<(&'a str, &'a str)> {
format_captures(captures.map(|(m, i)| m.captures[i]), query, source)
}
fn format_captures<'a>(
captures: impl Iterator<Item = QueryCapture<'a>>,
query: &'a Query,
source: &'a str,
) -> Vec<(&'a str, &'a str)> {
captures
.map(|capture| {
(
query.capture_names()[capture.index as usize],
capture.node.utf8_text(source.as_bytes()).unwrap(),
)
})
.collect()
}

View file

@ -24,6 +24,7 @@ lazy_static! {
get_highlight_config("rust", Some("injections.scm"), &HIGHLIGHT_NAMES);
static ref HIGHLIGHT_NAMES: Vec<String> = [
"attribute",
"boolean",
"carriage-return",
"comment",
"constant",
@ -61,7 +62,7 @@ lazy_static! {
fn test_highlighting_javascript() {
let source = "const a = function(b) { return b + c; }";
assert_eq!(
&to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
&to_token_vector(source, &JS_HIGHLIGHT).unwrap(),
&[vec![
("const", vec!["keyword"]),
(" ", vec![]),
@ -71,14 +72,14 @@ fn test_highlighting_javascript() {
(" ", vec![]),
("function", vec!["keyword"]),
("(", vec!["punctuation.bracket"]),
("b", vec!["variable.parameter"]),
("b", vec!["variable"]),
(")", vec!["punctuation.bracket"]),
(" ", vec![]),
("{", vec!["punctuation.bracket"]),
(" ", vec![]),
("return", vec!["keyword"]),
(" ", vec![]),
("b", vec!["variable.parameter"]),
("b", vec!["variable"]),
(" ", vec![]),
("+", vec!["operator"]),
(" ", vec![]),
@ -92,7 +93,7 @@ fn test_highlighting_javascript() {
#[test]
fn test_highlighting_injected_html_in_javascript() {
let source = vec!["const s = html `<div>${a < b}</div>`;"].join("\n");
let source = ["const s = html `<div>${a < b}</div>`;"].join("\n");
assert_eq!(
&to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
@ -156,7 +157,7 @@ fn test_highlighting_injected_javascript_in_html_mini() {
#[test]
fn test_highlighting_injected_javascript_in_html() {
let source = vec![
let source = [
"<body>",
" <script>",
" const x = new Thing();",
@ -211,7 +212,7 @@ fn test_highlighting_injected_javascript_in_html() {
#[test]
fn test_highlighting_multiline_nodes_to_html() {
let source = vec![
let source = [
"const SOMETHING = `",
" one ${",
" two()",
@ -235,7 +236,7 @@ fn test_highlighting_multiline_nodes_to_html() {
#[test]
fn test_highlighting_with_local_variable_tracking() {
let source = vec![
let source = [
"module.exports = function a(b) {",
" const module = c;",
" console.log(module, b);",
@ -257,7 +258,7 @@ fn test_highlighting_with_local_variable_tracking() {
(" ", vec![]),
("a", vec!["function"]),
("(", vec!["punctuation.bracket"]),
("b", vec!["variable.parameter"]),
("b", vec!["variable"]),
(")", vec!["punctuation.bracket"]),
(" ", vec![]),
("{", vec!["punctuation.bracket"])
@ -284,7 +285,7 @@ fn test_highlighting_with_local_variable_tracking() {
(",", vec!["punctuation.delimiter"]),
(" ", vec![]),
// A parameter, because `b` was defined as a parameter above.
("b", vec!["variable.parameter"]),
("b", vec!["variable"]),
(")", vec!["punctuation.bracket"]),
(";", vec!["punctuation.delimiter"]),
],
@ -295,7 +296,7 @@ fn test_highlighting_with_local_variable_tracking() {
#[test]
fn test_highlighting_empty_lines() {
let source = vec![
let source = [
"class A {",
"",
" b(c) {",
@ -313,7 +314,7 @@ fn test_highlighting_empty_lines() {
&[
"<span class=keyword>class</span> <span class=constructor>A</span> <span class=punctuation.bracket>{</span>\n".to_string(),
"\n".to_string(),
" <span class=function>b</span><span class=punctuation.bracket>(</span><span class=variable.parameter>c</span><span class=punctuation.bracket>)</span> <span class=punctuation.bracket>{</span>\n".to_string(),
" <span class=function>b</span><span class=punctuation.bracket>(</span><span class=variable>c</span><span class=punctuation.bracket>)</span> <span class=punctuation.bracket>{</span>\n".to_string(),
"\n".to_string(),
" <span class=function>d</span><span class=punctuation.bracket>(</span><span class=variable>e</span><span class=punctuation.bracket>)</span>\n".to_string(),
"\n".to_string(),
@ -329,7 +330,7 @@ fn test_highlighting_carriage_returns() {
let source = "a = \"a\rb\"\r\nb\r";
assert_eq!(
&to_html(&source, &JS_HIGHLIGHT).unwrap(),
&to_html(source, &JS_HIGHLIGHT).unwrap(),
&[
"<span class=variable>a</span> <span class=operator>=</span> <span class=string>&quot;a<span class=carriage-return></span>b&quot;</span>\n",
"<span class=variable>b</span>\n",
@ -339,7 +340,7 @@ fn test_highlighting_carriage_returns() {
#[test]
fn test_highlighting_ejs_with_html_and_javascript() {
let source = vec!["<div><% foo() %></div><script> bar() </script>"].join("\n");
let source = ["<div><% foo() %></div><script> bar() </script>"].join("\n");
assert_eq!(
&to_token_vector(&source, &EJS_HIGHLIGHT).unwrap(),
@ -376,7 +377,7 @@ fn test_highlighting_ejs_with_html_and_javascript() {
fn test_highlighting_javascript_with_jsdoc() {
// Regression test: the middle comment has no highlights. This should not prevent
// later injections from highlighting properly.
let source = vec!["a /* @see a */ b; /* nothing */ c; /* @see b */"].join("\n");
let source = ["a /* @see a */ b; /* nothing */ c; /* @see b */"].join("\n");
assert_eq!(
&to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
@ -404,7 +405,7 @@ fn test_highlighting_javascript_with_jsdoc() {
#[test]
fn test_highlighting_with_content_children_included() {
let source = vec!["assert!(", " a.b.c() < D::e::<F>()", ");"].join("\n");
let source = ["assert!(", " a.b.c() < D::e::<F>()", ");"].join("\n");
assert_eq!(
&to_token_vector(&source, &RUST_HIGHLIGHT).unwrap(),
@ -482,7 +483,7 @@ fn test_highlighting_cancellation() {
#[test]
fn test_highlighting_via_c_api() {
let highlights = vec![
let highlights = [
"class=tag\0",
"class=function\0",
"class=string\0",
@ -496,68 +497,82 @@ fn test_highlighting_via_c_api() {
.iter()
.map(|h| h.as_bytes().as_ptr() as *const c_char)
.collect::<Vec<_>>();
let highlighter = c::ts_highlighter_new(
&highlight_names[0] as *const *const c_char,
&highlight_attrs[0] as *const *const c_char,
highlights.len() as u32,
);
let highlighter = unsafe {
c::ts_highlighter_new(
&highlight_names[0] as *const *const c_char,
&highlight_attrs[0] as *const *const c_char,
highlights.len() as u32,
)
};
let source_code = c_string("<script>\nconst a = b('c');\nc.d();\n</script>");
let js_scope = c_string("source.js");
let js_injection_regex = c_string("^javascript");
let language = get_language("javascript");
let lang_name = c_string("javascript");
let queries = get_language_queries_path("javascript");
let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap();
let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap();
let locals_query = fs::read_to_string(queries.join("locals.scm")).unwrap();
c::ts_highlighter_add_language(
highlighter,
js_scope.as_ptr(),
js_injection_regex.as_ptr(),
language,
highlights_query.as_ptr() as *const c_char,
injections_query.as_ptr() as *const c_char,
locals_query.as_ptr() as *const c_char,
highlights_query.len() as u32,
injections_query.len() as u32,
locals_query.len() as u32,
);
unsafe {
c::ts_highlighter_add_language(
highlighter,
lang_name.as_ptr(),
js_scope.as_ptr(),
js_injection_regex.as_ptr(),
language,
highlights_query.as_ptr() as *const c_char,
injections_query.as_ptr() as *const c_char,
locals_query.as_ptr() as *const c_char,
highlights_query.len() as u32,
injections_query.len() as u32,
locals_query.len() as u32,
false,
);
}
let html_scope = c_string("text.html.basic");
let html_injection_regex = c_string("^html");
let language = get_language("html");
let lang_name = c_string("html");
let queries = get_language_queries_path("html");
let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap();
let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap();
c::ts_highlighter_add_language(
highlighter,
html_scope.as_ptr(),
html_injection_regex.as_ptr(),
language,
highlights_query.as_ptr() as *const c_char,
injections_query.as_ptr() as *const c_char,
ptr::null(),
highlights_query.len() as u32,
injections_query.len() as u32,
0,
);
unsafe {
c::ts_highlighter_add_language(
highlighter,
lang_name.as_ptr(),
html_scope.as_ptr(),
html_injection_regex.as_ptr(),
language,
highlights_query.as_ptr() as *const c_char,
injections_query.as_ptr() as *const c_char,
ptr::null(),
highlights_query.len() as u32,
injections_query.len() as u32,
0,
false,
);
}
let buffer = c::ts_highlight_buffer_new();
c::ts_highlighter_highlight(
highlighter,
html_scope.as_ptr(),
source_code.as_ptr(),
source_code.as_bytes().len() as u32,
buffer,
ptr::null_mut(),
);
unsafe {
c::ts_highlighter_highlight(
highlighter,
html_scope.as_ptr(),
source_code.as_ptr(),
source_code.as_bytes().len() as u32,
buffer,
ptr::null_mut(),
);
}
let output_bytes = c::ts_highlight_buffer_content(buffer);
let output_line_offsets = c::ts_highlight_buffer_line_offsets(buffer);
let output_len = c::ts_highlight_buffer_len(buffer);
let output_line_count = c::ts_highlight_buffer_line_count(buffer);
let output_bytes = unsafe { c::ts_highlight_buffer_content(buffer) };
let output_line_offsets = unsafe { c::ts_highlight_buffer_line_offsets(buffer) };
let output_len = unsafe { c::ts_highlight_buffer_len(buffer) };
let output_line_count = unsafe { c::ts_highlight_buffer_line_count(buffer) };
let output_bytes = unsafe { slice::from_raw_parts(output_bytes, output_len as usize) };
let output_line_offsets =
@ -583,8 +598,69 @@ fn test_highlighting_via_c_api() {
]
);
c::ts_highlighter_delete(highlighter);
c::ts_highlight_buffer_delete(buffer);
unsafe {
c::ts_highlighter_delete(highlighter);
c::ts_highlight_buffer_delete(buffer);
}
}
#[test]
fn test_highlighting_with_all_captures_applied() {
let source = "fn main(a: u32, b: u32) -> { let c = a + b; }";
let language = get_language("rust");
let highlights_query = indoc::indoc! {"
[
\"fn\"
\"let\"
] @keyword
(identifier) @variable
(function_item name: (identifier) @function)
(parameter pattern: (identifier) @variable.parameter)
(primitive_type) @type.builtin
\"=\" @operator
[ \"->\" \":\" \";\" ] @punctuation.delimiter
[ \"{\" \"}\" \"(\" \")\" ] @punctuation.bracket
"};
let mut rust_highlight_reverse =
HighlightConfiguration::new(language, "rust", highlights_query, "", "", true).unwrap();
rust_highlight_reverse.configure(&HIGHLIGHT_NAMES);
assert_eq!(
&to_token_vector(source, &rust_highlight_reverse).unwrap(),
&[[
("fn", vec!["keyword"]),
(" ", vec![]),
("main", vec!["function"]),
("(", vec!["punctuation.bracket"]),
("a", vec!["variable.parameter"]),
(":", vec!["punctuation.delimiter"]),
(" ", vec![]),
("u32", vec!["type.builtin"]),
(", ", vec![]),
("b", vec!["variable.parameter"]),
(":", vec!["punctuation.delimiter"]),
(" ", vec![]),
("u32", vec!["type.builtin"]),
(")", vec!["punctuation.bracket"]),
(" ", vec![]),
("->", vec!["punctuation.delimiter"]),
(" ", vec![]),
("{", vec!["punctuation.bracket"]),
(" ", vec![]),
("let", vec!["keyword"]),
(" ", vec![]),
("c", vec!["variable"]),
(" ", vec![]),
("=", vec!["operator"]),
(" ", vec![]),
("a", vec!["variable"]),
(" + ", vec![]),
("b", vec!["variable"]),
(";", vec!["punctuation.delimiter"]),
(" ", vec![]),
("}", vec!["punctuation.bracket"])
]],
);
}
#[test]
@ -667,20 +743,20 @@ fn to_token_vector<'a>(
}
HighlightEvent::Source { start, end } => {
let s = str::from_utf8(&src[start..end]).unwrap();
for (i, l) in s.split("\n").enumerate() {
for (i, l) in s.split('\n').enumerate() {
let l = l.trim_end_matches('\r');
if i > 0 {
lines.push(line);
line = Vec::new();
}
if l.len() > 0 {
if !l.is_empty() {
line.push((l, highlights.clone()));
}
}
}
}
}
if line.len() > 0 {
if !line.is_empty() {
lines.push(line);
}
Ok(lines)

View file

@ -0,0 +1,95 @@
use super::helpers::fixtures::get_language;
use tree_sitter::Parser;
#[test]
fn test_lookahead_iterator() {
let mut parser = Parser::new();
let language = get_language("rust");
parser.set_language(language).unwrap();
let tree = parser.parse("struct Stuff {}", None).unwrap();
let mut cursor = tree.walk();
assert!(cursor.goto_first_child()); // struct
assert!(cursor.goto_first_child()); // struct keyword
let next_state = cursor.node().next_parse_state();
assert_ne!(next_state, 0);
assert_eq!(
next_state,
language.next_state(cursor.node().parse_state(), cursor.node().grammar_id())
);
assert!((next_state as usize) < language.parse_state_count());
assert!(cursor.goto_next_sibling()); // type_identifier
assert_eq!(next_state, cursor.node().parse_state());
assert_eq!(cursor.node().grammar_name(), "identifier");
assert_ne!(cursor.node().grammar_id(), cursor.node().kind_id());
let expected_symbols = ["identifier", "block_comment", "line_comment"];
let mut lookahead = language.lookahead_iterator(next_state).unwrap();
assert_eq!(lookahead.language(), language);
assert!(lookahead.iter_names().eq(expected_symbols));
lookahead.reset_state(next_state);
assert!(lookahead.iter_names().eq(expected_symbols));
lookahead.reset(language, next_state);
assert!(lookahead
.map(|s| language.node_kind_for_id(s).unwrap())
.eq(expected_symbols));
}
#[test]
fn test_lookahead_iterator_modifiable_only_by_mut() {
let mut parser = Parser::new();
let language = get_language("rust");
parser.set_language(language).unwrap();
let tree = parser.parse("struct Stuff {}", None).unwrap();
let mut cursor = tree.walk();
assert!(cursor.goto_first_child()); // struct
assert!(cursor.goto_first_child()); // struct keyword
let next_state = cursor.node().next_parse_state();
assert_ne!(next_state, 0);
let mut lookahead = language.lookahead_iterator(next_state).unwrap();
let _ = lookahead.next();
let mut names = lookahead.iter_names();
let _ = names.next();
}
/// It doesn't allowed to use lookahead iterator by shared ref:
/// error[E0596]: cannot borrow `lookahead` as mutable, as it is not declared as mutable
/// ```compile_fail
/// use tree_sitter::{Parser, Language};
/// let mut parser = Parser::new();
/// let language = unsafe { Language::from_raw(std::ptr::null()) };
/// let tree = parser.parse("", None).unwrap();
/// let mut cursor = tree.walk();
/// let next_state = cursor.node().next_parse_state();
/// let lookahead = language.lookahead_iterator(next_state).unwrap();
/// let _ = lookahead.next();
/// ```
/// It doesn't allowed to use lookahead names iterator by shared ref:
/// error[E0596]: cannot borrow `names` as mutable, as it is not declared as mutable
/// ```compile_fail
/// use tree_sitter::{Parser, Language};
/// let mut parser = Parser::new();
/// let language = unsafe { Language::from_raw(std::ptr::null()) };
/// let tree = parser.parse("", None).unwrap();
/// let mut cursor = tree.walk();
/// let next_state = cursor.node().next_parse_state();
/// if let Some(mut lookahead) = language.lookahead_iterator(next_state) {
/// let _ = lookahead.next();
/// let names = lookahead.iter_names();
/// let _ = names.next();
/// }
/// ```
fn _dummy() {}

View file

@ -1,11 +1,16 @@
mod async_context_test;
mod corpus_test;
mod github_issue_test;
mod helpers;
mod highlight_test;
mod language_test;
mod node_test;
mod parser_hang_test;
mod parser_test;
mod pathological_test;
mod query_test;
mod tags_test;
mod test_highlight_test;
mod test_tags_test;
mod text_provider_test;
mod tree_test;

View file

@ -252,12 +252,14 @@ fn test_node_parent_of_child_by_field_name() {
fn test_node_field_name_for_child() {
let mut parser = Parser::new();
parser.set_language(get_language("c")).unwrap();
let tree = parser.parse("x + y;", None).unwrap();
let tree = parser.parse("int w = x + y;", None).unwrap();
let translation_unit_node = tree.root_node();
let binary_expression_node = translation_unit_node
.named_child(0)
let declaration_node = translation_unit_node.named_child(0).unwrap();
let binary_expression_node = declaration_node
.child_by_field_name("declarator")
.unwrap()
.named_child(0)
.child_by_field_name("value")
.unwrap();
assert_eq!(binary_expression_node.field_name_for_child(0), Some("left"));
@ -385,10 +387,52 @@ fn test_node_named_child_with_aliases_and_extras() {
assert_eq!(root.named_child(4).unwrap().kind(), "C");
}
#[test]
fn test_node_descendant_count() {
let tree = parse_json_example();
let value_node = tree.root_node();
let all_nodes = get_all_nodes(&tree);
assert_eq!(value_node.descendant_count(), all_nodes.len());
let mut cursor = value_node.walk();
for (i, node) in all_nodes.iter().enumerate() {
cursor.goto_descendant(i);
assert_eq!(cursor.node(), *node, "index {i}");
}
for (i, node) in all_nodes.iter().enumerate().rev() {
cursor.goto_descendant(i);
assert_eq!(cursor.node(), *node, "rev index {i}");
}
}
#[test]
fn test_descendant_count_single_node_tree() {
let mut parser = Parser::new();
parser
.set_language(get_language("embedded-template"))
.unwrap();
let tree = parser.parse("hello", None).unwrap();
let nodes = get_all_nodes(&tree);
assert_eq!(nodes.len(), 2);
assert_eq!(tree.root_node().descendant_count(), 2);
let mut cursor = tree.root_node().walk();
cursor.goto_descendant(0);
assert_eq!(cursor.depth(), 0);
assert_eq!(cursor.node(), nodes[0]);
cursor.goto_descendant(1);
assert_eq!(cursor.depth(), 1);
assert_eq!(cursor.node(), nodes[1]);
}
#[test]
fn test_node_descendant_for_range() {
let tree = parse_json_example();
let array_node = tree.root_node().child(0).unwrap();
let array_node = tree.root_node();
// Leaf node exactly matches the given bounds - byte query
let colon_index = JSON_EXAMPLE.find(":").unwrap();
@ -508,7 +552,7 @@ fn test_node_edit() {
let edit = get_random_edit(&mut rand, &mut code);
let mut tree2 = tree.clone();
let edit = perform_edit(&mut tree2, &mut code, &edit);
let edit = perform_edit(&mut tree2, &mut code, &edit).unwrap();
for node in nodes_before.iter_mut() {
node.edit(&edit);
}
@ -841,15 +885,17 @@ fn get_all_nodes(tree: &Tree) -> Vec<Node> {
let mut visited_children = false;
let mut cursor = tree.walk();
loop {
result.push(cursor.node());
if !visited_children && cursor.goto_first_child() {
continue;
} else if cursor.goto_next_sibling() {
visited_children = false;
} else if cursor.goto_parent() {
visited_children = true;
if !visited_children {
result.push(cursor.node());
if !cursor.goto_first_child() {
visited_children = true;
}
} else {
break;
if cursor.goto_next_sibling() {
visited_children = false;
} else if !cursor.goto_parent() {
break;
}
}
}
return result;

View file

@ -0,0 +1,104 @@
// For some reasons `Command::spawn` doesn't work in CI env for many exotic arches.
#![cfg(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing)))]
use crate::{
generate::{generate_parser_for_grammar, load_grammar_file},
tests::helpers::fixtures::{fixtures_dir, get_test_language},
};
use std::{
env::VarError,
process::{Command, Stdio},
};
use tree_sitter::Parser;
// The `sanitizing` cfg is required to don't run tests under specific sunitizer
// because they don't work well with subprocesses _(it's an assumption)_.
//
// Bellow are two alternative examples of how to disable tests for some arches
// if a way with excluding the whole mod from compilation wouldn't work well.
//
// XXX: Also may be it makes sense to keep such tests as ignored by default
// to omit surprises and enable them on CI by passing an extra option explicitly:
//
// > cargo test -- --include-ignored
//
// #[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing)))]
// #[cfg_attr(not(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing))), ignore)]
//
#[test]
fn test_grammar_that_should_hang_and_not_segfault() {
let parent_sleep_millis = 1000;
let test_name = "test_grammar_that_should_hang_and_not_segfault";
let test_var = "CARGO_HANG_TEST";
eprintln!(" {test_name}");
let tests_exec_path = std::env::args()
.nth(0)
.expect("Failed get get tests executable path");
match std::env::var(test_var) {
Ok(v) if v == test_name => {
eprintln!(" child process id {}", std::process::id());
hang_test();
}
Err(VarError::NotPresent) => {
eprintln!(" parent process id {}", std::process::id());
if true {
let mut command = Command::new(tests_exec_path);
command.arg(test_name).env(test_var, test_name);
if std::env::args().any(|x| x == "--nocapture") {
command.arg("--nocapture");
} else {
command.stdout(Stdio::null()).stderr(Stdio::null());
}
match command.spawn() {
Ok(mut child) => {
std::thread::sleep(std::time::Duration::from_millis(parent_sleep_millis));
match child.try_wait() {
Ok(Some(status)) if status.success() => {
panic!("Child wasn't hang and exited successfully")
}
Ok(Some(status)) => panic!(
"Child wasn't hang and exited with status code: {:?}",
status.code()
),
_ => (),
}
if let Err(e) = child.kill() {
eprintln!(
"Failed to kill hang test sub process id: {}, error: {e}",
child.id()
);
}
}
Err(e) => panic!("{e}"),
}
}
}
Err(e) => panic!("Env var error: {e}"),
_ => unreachable!(),
}
fn hang_test() {
let test_grammar_dir = fixtures_dir()
.join("test_grammars")
.join("get_col_should_hang_not_crash");
let grammar_json = load_grammar_file(&test_grammar_dir.join("grammar.js"), None).unwrap();
let (parser_name, parser_code) =
generate_parser_for_grammar(grammar_json.as_str()).unwrap();
let language =
get_test_language(&parser_name, &parser_code, Some(test_grammar_dir.as_path()));
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let code_that_should_hang = "\nHello";
parser.parse(code_that_should_hang, None).unwrap();
}
}

View file

@ -15,6 +15,7 @@ use std::{
thread, time,
};
use tree_sitter::{IncludedRangesError, InputEdit, LogType, Parser, Point, Range};
use tree_sitter_proc_macro::retry;
#[test]
fn test_parsing_simple_string() {
@ -149,7 +150,7 @@ fn test_parsing_with_custom_utf8_input() {
)
);
assert_eq!(root.kind(), "source_file");
assert_eq!(root.has_error(), false);
assert!(!root.has_error());
assert_eq!(root.child(0).unwrap().kind(), "function_item");
}
@ -188,7 +189,7 @@ fn test_parsing_with_custom_utf16_input() {
"(source_file (function_item (visibility_modifier) name: (identifier) parameters: (parameters) body: (block (integer_literal))))"
);
assert_eq!(root.kind(), "source_file");
assert_eq!(root.has_error(), false);
assert!(!root.has_error());
assert_eq!(root.child(0).unwrap().kind(), "function_item");
}
@ -277,7 +278,10 @@ fn test_parsing_invalid_chars_at_eof() {
let mut parser = Parser::new();
parser.set_language(get_language("json")).unwrap();
let tree = parser.parse(b"\xdf", None).unwrap();
assert_eq!(tree.root_node().to_sexp(), "(ERROR (UNEXPECTED INVALID))");
assert_eq!(
tree.root_node().to_sexp(),
"(document (ERROR (UNEXPECTED INVALID)))"
);
}
#[test]
@ -340,7 +344,8 @@ fn test_parsing_after_editing_beginning_of_code() {
deleted_length: 0,
inserted_text: b" || 5".to_vec(),
},
);
)
.unwrap();
let mut recorder = ReadRecorder::new(&code);
let tree = parser
@ -387,7 +392,8 @@ fn test_parsing_after_editing_end_of_code() {
deleted_length: 0,
inserted_text: b".d".to_vec(),
},
);
)
.unwrap();
let mut recorder = ReadRecorder::new(&code);
let tree = parser
@ -466,7 +472,8 @@ h + i
deleted_length: 0,
inserted_text: b"1234".to_vec(),
},
);
)
.unwrap();
assert_eq!(
code,
@ -511,7 +518,7 @@ fn test_parsing_after_detecting_error_in_the_middle_of_a_string_token() {
let tree = parser.parse(&source, None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(module (expression_statement (assignment left: (identifier) right: (expression_list (identifier) (string)))))"
"(module (expression_statement (assignment left: (identifier) right: (expression_list (identifier) (string (string_start) (string_content) (string_end))))))"
);
// Delete a suffix of the source code, starting in the middle of the string
@ -530,12 +537,12 @@ fn test_parsing_after_detecting_error_in_the_middle_of_a_string_token() {
let undo = invert_edit(&source, &edit);
let mut tree2 = tree.clone();
perform_edit(&mut tree2, &mut source, &edit);
perform_edit(&mut tree2, &mut source, &edit).unwrap();
tree2 = parser.parse(&source, Some(&tree2)).unwrap();
assert!(tree2.root_node().has_error());
let mut tree3 = tree2.clone();
perform_edit(&mut tree3, &mut source, &undo);
perform_edit(&mut tree3, &mut source, &undo).unwrap();
tree3 = parser.parse(&source, Some(&tree3)).unwrap();
assert_eq!(tree3.root_node().to_sexp(), tree.root_node().to_sexp(),);
}
@ -644,6 +651,7 @@ fn test_parsing_cancelled_by_another_thread() {
// Timeouts
#[test]
#[retry(10)]
fn test_parsing_with_a_timeout() {
let mut parser = Parser::new();
parser.set_language(get_language("json")).unwrap();
@ -662,8 +670,12 @@ fn test_parsing_with_a_timeout() {
None,
);
assert!(tree.is_none());
#[cfg(not(target_arch = "sparc64"))]
assert!(start_time.elapsed().as_micros() < 2000);
#[cfg(target_arch = "sparc64")]
assert!(start_time.elapsed().as_micros() < 8000);
// Continue parsing, but pause after 1 ms of processing.
parser.set_timeout_micros(5000);
let start_time = time::Instant::now();
@ -701,6 +713,7 @@ fn test_parsing_with_a_timeout() {
}
#[test]
#[retry(10)]
fn test_parsing_with_a_timeout_and_a_reset() {
let mut parser = Parser::new();
parser.set_language(get_language("json")).unwrap();
@ -756,6 +769,7 @@ fn test_parsing_with_a_timeout_and_a_reset() {
}
#[test]
#[retry(10)]
fn test_parsing_with_a_timeout_and_implicit_reset() {
allocations::record(|| {
let mut parser = Parser::new();
@ -789,6 +803,7 @@ fn test_parsing_with_a_timeout_and_implicit_reset() {
}
#[test]
#[retry(10)]
fn test_parsing_with_timeout_and_no_completion() {
allocations::record(|| {
let mut parser = Parser::new();
@ -828,7 +843,7 @@ fn test_parsing_with_one_included_range() {
concat!(
"(program (expression_statement (call_expression ",
"function: (member_expression object: (identifier) property: (property_identifier)) ",
"arguments: (arguments (string)))))",
"arguments: (arguments (string (string_fragment))))))",
)
);
assert_eq!(
@ -1177,7 +1192,7 @@ fn test_parsing_with_a_newly_included_range() {
.set_included_ranges(&[simple_range(range1_start, range1_end)])
.unwrap();
let tree = parser
.parse_with(&mut chunked_input(&source_code, 3), None)
.parse_with(&mut chunked_input(source_code, 3), None)
.unwrap();
assert_eq!(
tree.root_node().to_sexp(),
@ -1196,7 +1211,7 @@ fn test_parsing_with_a_newly_included_range() {
])
.unwrap();
let tree2 = parser
.parse_with(&mut chunked_input(&source_code, 3), Some(&tree))
.parse_with(&mut chunked_input(source_code, 3), Some(&tree))
.unwrap();
assert_eq!(
tree2.root_node().to_sexp(),
@ -1220,7 +1235,7 @@ fn test_parsing_with_a_newly_included_range() {
simple_range(range3_start, range3_end),
])
.unwrap();
let tree3 = parser.parse(&source_code, Some(&tree)).unwrap();
let tree3 = parser.parse(source_code, Some(&tree)).unwrap();
assert_eq!(
tree3.root_node().to_sexp(),
concat!(
@ -1297,6 +1312,85 @@ fn test_parsing_with_included_ranges_and_missing_tokens() {
assert_eq!(root.child(3).unwrap().start_byte(), 4);
}
#[test]
fn test_grammars_that_can_hang_on_eof() {
let (parser_name, parser_code) = generate_parser_for_grammar(
r#"
{
"name": "test_single_null_char_regex",
"rules": {
"source_file": {
"type": "SEQ",
"members": [
{ "type": "STRING", "value": "\"" },
{ "type": "PATTERN", "value": "[\\x00]*" },
{ "type": "STRING", "value": "\"" }
]
}
},
"extras": [ { "type": "PATTERN", "value": "\\s" } ]
}
"#,
)
.unwrap();
let mut parser = Parser::new();
parser
.set_language(get_test_language(&parser_name, &parser_code, None))
.unwrap();
parser.parse("\"", None).unwrap();
let (parser_name, parser_code) = generate_parser_for_grammar(
r#"
{
"name": "test_null_char_with_next_char_regex",
"rules": {
"source_file": {
"type": "SEQ",
"members": [
{ "type": "STRING", "value": "\"" },
{ "type": "PATTERN", "value": "[\\x00-\\x01]*" },
{ "type": "STRING", "value": "\"" }
]
}
},
"extras": [ { "type": "PATTERN", "value": "\\s" } ]
}
"#,
)
.unwrap();
parser
.set_language(get_test_language(&parser_name, &parser_code, None))
.unwrap();
parser.parse("\"", None).unwrap();
let (parser_name, parser_code) = generate_parser_for_grammar(
r#"
{
"name": "test_null_char_with_range_regex",
"rules": {
"source_file": {
"type": "SEQ",
"members": [
{ "type": "STRING", "value": "\"" },
{ "type": "PATTERN", "value": "[\\x00-\\x7F]*" },
{ "type": "STRING", "value": "\"" }
]
}
},
"extras": [ { "type": "PATTERN", "value": "\\s" } ]
}
"#,
)
.unwrap();
parser
.set_language(get_test_language(&parser_name, &parser_code, None))
.unwrap();
parser.parse("\"", None).unwrap();
}
fn simple_range(start: usize, end: usize) -> Range {
Range {
start_byte: start,

View file

@ -0,0 +1,15 @@
[package]
name = "tree-sitter-tests-proc-macro"
version = "0.0.0"
edition = "2021"
publish = false
rust-version.workspace = true
[lib]
proc-macro = true
[dependencies]
proc-macro2 = "1.0.63"
quote = "1"
rand = "0.8.5"
syn = { version = "1", features = ["full"] }

View file

@ -0,0 +1,137 @@
use proc_macro::TokenStream;
use proc_macro2::Span;
use quote::quote;
use syn::{
parse::{Parse, ParseStream},
parse_macro_input, Error, Expr, Ident, ItemFn, LitInt, Token,
};
#[proc_macro_attribute]
pub fn retry(args: TokenStream, input: TokenStream) -> TokenStream {
let count = parse_macro_input!(args as LitInt);
let input = parse_macro_input!(input as ItemFn);
let attrs = input.attrs.clone();
let name = input.sig.ident.clone();
TokenStream::from(quote! {
#(#attrs),*
fn #name() {
#input
for i in 0..=#count {
let result = std::panic::catch_unwind(|| {
#name();
});
if result.is_ok() {
return;
}
if i == #count {
std::panic::resume_unwind(result.unwrap_err());
}
}
}
})
}
#[proc_macro_attribute]
pub fn test_with_seed(args: TokenStream, input: TokenStream) -> TokenStream {
struct Args {
retry: LitInt,
seed: Expr,
seed_fn: Option<Ident>,
}
impl Parse for Args {
fn parse(input: ParseStream) -> syn::Result<Self> {
let mut retry = None;
let mut seed = None;
let mut seed_fn = None;
while !input.is_empty() {
let name = input.parse::<Ident>()?;
match name.to_string().as_str() {
"retry" => {
input.parse::<Token![=]>()?;
retry.replace(input.parse()?);
}
"seed" => {
input.parse::<Token![=]>()?;
seed.replace(input.parse()?);
}
"seed_fn" => {
input.parse::<Token![=]>()?;
seed_fn.replace(input.parse()?);
}
x => {
return Err(Error::new(
name.span(),
format!("Unsupported parameter `{x}`"),
))
}
}
if !input.is_empty() {
input.parse::<Token![,]>()?;
}
}
if retry.is_none() {
retry.replace(LitInt::new("0", Span::mixed_site()));
}
Ok(Args {
retry: retry.expect("`retry` parameter is required"),
seed: seed.expect("`seed` parameter is required"),
seed_fn,
})
}
}
let Args {
retry,
seed,
seed_fn,
} = parse_macro_input!(args as Args);
let seed_fn = seed_fn.iter();
let func = parse_macro_input!(input as ItemFn);
let attrs = func.attrs.clone();
let name = func.sig.ident.clone();
// dbg!(quote::ToTokens::into_token_stream(&func));
TokenStream::from(quote! {
#[test]
#(#attrs),*
fn #name() {
#func
let mut seed = #seed;
for i in 0..=#retry {
let result = std::panic::catch_unwind(|| {
#name(seed);
});
if result.is_ok() {
return;
}
if i == #retry {
std::panic::resume_unwind(result.unwrap_err());
}
#(
seed = #seed_fn();
)*
if i < #retry {
println!("\nRetry {}/{} with a new seed {}", i + 1, #retry, seed);
}
}
}
})
}

File diff suppressed because it is too large Load diff

View file

@ -9,7 +9,7 @@ use std::{
use tree_sitter::Point;
use tree_sitter_tags::{c_lib as c, Error, TagsConfiguration, TagsContext};
const PYTHON_TAG_QUERY: &'static str = r#"
const PYTHON_TAG_QUERY: &str = r#"
(
(function_definition
name: (identifier) @name
@ -39,7 +39,7 @@ const PYTHON_TAG_QUERY: &'static str = r#"
attribute: (identifier) @name)) @reference.call
"#;
const JS_TAG_QUERY: &'static str = r#"
const JS_TAG_QUERY: &str = r#"
(
(comment)* @doc .
(class_declaration
@ -68,7 +68,7 @@ const JS_TAG_QUERY: &'static str = r#"
function: (identifier) @name) @reference.call
"#;
const RUBY_TAG_QUERY: &'static str = r#"
const RUBY_TAG_QUERY: &str = r#"
(method
name: (_) @name) @definition.method
@ -359,25 +359,29 @@ fn test_tags_via_c_api() {
);
let c_scope_name = CString::new(scope_name).unwrap();
let result = c::ts_tagger_add_language(
tagger,
c_scope_name.as_ptr(),
language,
JS_TAG_QUERY.as_ptr(),
ptr::null(),
JS_TAG_QUERY.len() as u32,
0,
);
let result = unsafe {
c::ts_tagger_add_language(
tagger,
c_scope_name.as_ptr(),
language,
JS_TAG_QUERY.as_ptr(),
ptr::null(),
JS_TAG_QUERY.len() as u32,
0,
)
};
assert_eq!(result, c::TSTagsError::Ok);
let result = c::ts_tagger_tag(
tagger,
c_scope_name.as_ptr(),
source_code.as_ptr(),
source_code.len() as u32,
buffer,
ptr::null(),
);
let result = unsafe {
c::ts_tagger_tag(
tagger,
c_scope_name.as_ptr(),
source_code.as_ptr(),
source_code.len() as u32,
buffer,
ptr::null(),
)
};
assert_eq!(result, c::TSTagsError::Ok);
let tags = unsafe {
slice::from_raw_parts(
@ -419,8 +423,10 @@ fn test_tags_via_c_api() {
]
);
c::ts_tags_buffer_delete(buffer);
c::ts_tagger_delete(tagger);
unsafe {
c::ts_tags_buffer_delete(buffer);
c::ts_tagger_delete(tagger);
}
});
}

View file

@ -12,7 +12,7 @@ fn test_highlight_test_with_basic_test() {
Some("injections.scm"),
&[
"function".to_string(),
"variable.parameter".to_string(),
"variable".to_string(),
"keyword".to_string(),
],
);
@ -22,7 +22,8 @@ fn test_highlight_test_with_basic_test() {
" // ^ function",
" // ^ keyword",
" return d + e;",
" // ^ variable.parameter",
" // ^ variable",
" // ^ !variable",
"};",
]
.join("\n");
@ -32,18 +33,10 @@ fn test_highlight_test_with_basic_test() {
assert_eq!(
assertions,
&[
Assertion {
position: Point::new(1, 5),
expected_capture_name: "function".to_string()
},
Assertion {
position: Point::new(1, 11),
expected_capture_name: "keyword".to_string()
},
Assertion {
position: Point::new(4, 9),
expected_capture_name: "variable.parameter".to_string()
},
Assertion::new(1, 5, false, String::from("function")),
Assertion::new(1, 11, false, String::from("keyword")),
Assertion::new(4, 9, false, String::from("variable")),
Assertion::new(4, 11, true, String::from("variable")),
]
);
@ -60,6 +53,7 @@ fn test_highlight_test_with_basic_test() {
(Point::new(1, 19), Point::new(1, 20), Highlight(1)), // "d"
(Point::new(4, 2), Point::new(4, 8), Highlight(2)), // "return"
(Point::new(4, 9), Point::new(4, 10), Highlight(1)), // "d"
(Point::new(4, 13), Point::new(4, 14), Highlight(1)), // "e"
]
);
}

View file

@ -16,6 +16,7 @@ fn test_tags_test_with_basic_test() {
" # ^ reference.call",
" return d(e)",
" # ^ reference.call",
" # ^ !variable.parameter",
"",
]
.join("\n");
@ -26,18 +27,10 @@ fn test_tags_test_with_basic_test() {
assert_eq!(
assertions,
&[
Assertion {
position: Point::new(1, 4),
expected_capture_name: "definition.function".to_string(),
},
Assertion {
position: Point::new(3, 9),
expected_capture_name: "reference.call".to_string(),
},
Assertion {
position: Point::new(5, 11),
expected_capture_name: "reference.call".to_string(),
},
Assertion::new(1, 4, false, String::from("definition.function")),
Assertion::new(3, 9, false, String::from("reference.call")),
Assertion::new(5, 11, false, String::from("reference.call")),
Assertion::new(5, 13, true, String::from("variable.parameter")),
]
);

View file

@ -0,0 +1,173 @@
use std::{iter, sync::Arc};
use crate::tests::helpers::fixtures::get_language;
use tree_sitter::{Language, Node, Parser, Point, Query, QueryCursor, TextProvider, Tree};
fn parse_text(text: impl AsRef<[u8]>) -> (Tree, Language) {
let language = get_language("c");
let mut parser = Parser::new();
parser.set_language(language).unwrap();
(parser.parse(text, None).unwrap(), language)
}
fn parse_text_with<T, F>(callback: &mut F) -> (Tree, Language)
where
T: AsRef<[u8]>,
F: FnMut(usize, Point) -> T,
{
let language = get_language("c");
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse_with(callback, None).unwrap();
// eprintln!("{}", tree.clone().root_node().to_sexp());
assert_eq!("comment", tree.clone().root_node().child(0).unwrap().kind());
(tree, language)
}
fn tree_query<I: AsRef<[u8]>>(tree: &Tree, text: impl TextProvider<I>, language: Language) {
let query = Query::new(language, "((comment) @c (#eq? @c \"// comment\"))").unwrap();
let mut cursor = QueryCursor::new();
let mut captures = cursor.captures(&query, tree.root_node(), text);
let (match_, idx) = captures.next().unwrap();
let capture = match_.captures[idx];
assert_eq!(capture.index as usize, idx);
assert_eq!("comment", capture.node.kind());
}
fn check_parsing<I: AsRef<[u8]>>(
parser_text: impl AsRef<[u8]>,
text_provider: impl TextProvider<I>,
) {
let (tree, language) = parse_text(parser_text);
tree_query(&tree, text_provider, language);
}
fn check_parsing_callback<T, F, I: AsRef<[u8]>>(
parser_callback: &mut F,
text_provider: impl TextProvider<I>,
) where
T: AsRef<[u8]>,
F: FnMut(usize, Point) -> T,
{
let (tree, language) = parse_text_with(parser_callback);
tree_query(&tree, text_provider, language);
}
#[test]
fn test_text_provider_for_str_slice() {
let text: &str = "// comment";
check_parsing(text, text.as_bytes());
check_parsing(text.as_bytes(), text.as_bytes());
}
#[test]
fn test_text_provider_for_string() {
let text: String = "// comment".to_owned();
check_parsing(text.clone(), text.as_bytes());
check_parsing(text.as_bytes(), text.as_bytes());
check_parsing(<_ as AsRef<[u8]>>::as_ref(&text), text.as_bytes());
}
#[test]
fn test_text_provider_for_box_of_str_slice() {
let text: Box<str> = "// comment".to_owned().into_boxed_str();
check_parsing(text.as_bytes(), text.as_bytes());
check_parsing(<_ as AsRef<str>>::as_ref(&text), text.as_bytes());
check_parsing(text.as_ref(), text.as_ref().as_bytes());
check_parsing(text.as_ref(), text.as_bytes());
}
#[test]
fn test_text_provider_for_box_of_bytes_slice() {
let text: Box<[u8]> = "// comment".to_owned().into_boxed_str().into_boxed_bytes();
check_parsing(text.as_ref(), text.as_ref());
check_parsing(text.as_ref(), &*text);
check_parsing(&*text, &*text);
}
#[test]
fn test_text_provider_for_vec_of_bytes() {
let text: Vec<u8> = "// comment".to_owned().into_bytes();
check_parsing(&*text, &*text);
}
#[test]
fn test_text_provider_for_arc_of_bytes_slice() {
let text: Vec<u8> = "// comment".to_owned().into_bytes();
let text: Arc<[u8]> = Arc::from(text);
check_parsing(&*text, &*text);
check_parsing(text.as_ref(), text.as_ref());
check_parsing(text.clone(), text.as_ref());
}
#[test]
fn test_text_provider_callback_with_str_slice() {
let text: &str = "// comment";
check_parsing(text, |_node: Node<'_>| iter::once(text));
check_parsing_callback(
&mut |offset, _point| {
(offset < text.len())
.then(|| text.as_bytes())
.unwrap_or_default()
},
|_node: Node<'_>| iter::once(text),
);
}
#[test]
fn test_text_provider_callback_with_owned_string_slice() {
let text: &str = "// comment";
check_parsing_callback(
&mut |offset, _point| {
(offset < text.len())
.then(|| text.as_bytes())
.unwrap_or_default()
},
|_node: Node<'_>| {
let slice: String = text.to_owned();
iter::once(slice)
},
);
}
#[test]
fn test_text_provider_callback_with_owned_bytes_vec_slice() {
let text: &str = "// comment";
check_parsing_callback(
&mut |offset, _point| {
(offset < text.len())
.then(|| text.as_bytes())
.unwrap_or_default()
},
|_node: Node<'_>| {
let slice: Vec<u8> = text.to_owned().into_bytes();
iter::once(slice)
},
);
}
#[test]
fn test_text_provider_callback_with_owned_arc_of_bytes_slice() {
let text: &str = "// comment";
check_parsing_callback(
&mut |offset, _point| {
(offset < text.len())
.then(|| text.as_bytes())
.unwrap_or_default()
},
|_node: Node<'_>| {
let slice: Arc<[u8]> = text.to_owned().into_bytes().into();
iter::once(slice)
},
);
}

View file

@ -306,7 +306,7 @@ fn test_tree_cursor() {
.parse(
"
struct Stuff {
a: A;
a: A,
b: Option<B>,
}
",
@ -331,6 +331,88 @@ fn test_tree_cursor() {
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "field_declaration_list");
assert_eq!(cursor.node().is_named(), true);
assert!(cursor.goto_last_child());
assert_eq!(cursor.node().kind(), "}");
assert_eq!(cursor.node().is_named(), false);
assert_eq!(cursor.node().start_position(), Point { row: 4, column: 16 });
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), ",");
assert_eq!(cursor.node().is_named(), false);
assert_eq!(cursor.node().start_position(), Point { row: 3, column: 32 });
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), "field_declaration");
assert_eq!(cursor.node().is_named(), true);
assert_eq!(cursor.node().start_position(), Point { row: 3, column: 20 });
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), ",");
assert_eq!(cursor.node().is_named(), false);
assert_eq!(cursor.node().start_position(), Point { row: 2, column: 24 });
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), "field_declaration");
assert_eq!(cursor.node().is_named(), true);
assert_eq!(cursor.node().start_position(), Point { row: 2, column: 20 });
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), "{");
assert_eq!(cursor.node().is_named(), false);
assert_eq!(cursor.node().start_position(), Point { row: 1, column: 29 });
let mut copy = tree.walk();
copy.reset_to(cursor);
assert_eq!(copy.node().kind(), "{");
assert_eq!(copy.node().is_named(), false);
assert!(copy.goto_parent());
assert_eq!(copy.node().kind(), "field_declaration_list");
assert_eq!(copy.node().is_named(), true);
assert!(copy.goto_parent());
assert_eq!(copy.node().kind(), "struct_item");
}
#[test]
fn test_tree_cursor_previous_sibling() {
let mut parser = Parser::new();
parser.set_language(get_language("rust")).unwrap();
let text = "
// Hi there
// This is fun!
// Another one!
";
let tree = parser.parse(text, None).unwrap();
let mut cursor = tree.walk();
assert_eq!(cursor.node().kind(), "source_file");
assert!(cursor.goto_last_child());
assert_eq!(cursor.node().kind(), "line_comment");
assert_eq!(
cursor.node().utf8_text(text.as_bytes()).unwrap(),
"// Another one!"
);
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), "line_comment");
assert_eq!(
cursor.node().utf8_text(text.as_bytes()).unwrap(),
"// This is fun!"
);
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), "line_comment");
assert_eq!(
cursor.node().utf8_text(text.as_bytes()).unwrap(),
"// Hi there"
);
assert!(!cursor.goto_previous_sibling());
}
#[test]
@ -620,7 +702,7 @@ fn get_changed_ranges(
source_code: &mut Vec<u8>,
edit: Edit,
) -> Vec<Range> {
perform_edit(tree, source_code, &edit);
perform_edit(tree, source_code, &edit).unwrap();
let new_tree = parser.parse(&source_code, Some(tree)).unwrap();
let result = tree.changed_ranges(&new_tree).collect();
*tree = new_tree;

View file

@ -1,9 +1,7 @@
use anyhow::Result;
use std::io;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use std::thread;
use tree_sitter::Parser;
use tree_sitter::{Parser, Tree};
#[cfg(unix)]
use anyhow::{anyhow, Context};
@ -13,55 +11,86 @@ use std::path::PathBuf;
use std::process::{Child, ChildStdin, Command, Stdio};
#[cfg(unix)]
const HTML_HEADER: &[u8] = b"<!DOCTYPE html>\n<style>svg { width: 100%; }</style>\n\n";
const HTML_HEADER: &[u8] = b"
<!DOCTYPE html>
pub fn cancel_on_stdin() -> Arc<AtomicUsize> {
<style>
svg { width: 100%; }
</style>
";
pub fn cancel_on_signal() -> Arc<AtomicUsize> {
let result = Arc::new(AtomicUsize::new(0));
if atty::is(atty::Stream::Stdin) {
thread::spawn({
let flag = result.clone();
move || {
let mut line = String::new();
io::stdin().read_line(&mut line).unwrap();
flag.store(1, Ordering::Relaxed);
}
});
}
ctrlc::set_handler({
let flag = result.clone();
move || {
flag.store(1, Ordering::Relaxed);
}
})
.expect("Error setting Ctrl-C handler");
result
}
#[cfg(windows)]
pub struct LogSession();
pub struct LogSession;
#[cfg(unix)]
pub struct LogSession(PathBuf, Option<Child>, Option<ChildStdin>);
pub struct LogSession {
path: PathBuf,
dot_process: Option<Child>,
dot_process_stdin: Option<ChildStdin>,
}
#[cfg(windows)]
pub fn print_tree_graph(_tree: &Tree, _path: &str) -> Result<()> {
Ok(())
}
#[cfg(windows)]
pub fn log_graphs(_parser: &mut Parser, _path: &str) -> Result<LogSession> {
Ok(LogSession())
Ok(LogSession)
}
#[cfg(unix)]
pub fn print_tree_graph(tree: &Tree, path: &str) -> Result<()> {
let session = LogSession::new(path)?;
tree.print_dot_graph(session.dot_process_stdin.as_ref().unwrap());
Ok(())
}
#[cfg(unix)]
pub fn log_graphs(parser: &mut Parser, path: &str) -> Result<LogSession> {
use std::io::Write;
let session = LogSession::new(path)?;
parser.print_dot_graphs(session.dot_process_stdin.as_ref().unwrap());
Ok(session)
}
let mut dot_file = std::fs::File::create(path)?;
dot_file.write(HTML_HEADER)?;
let mut dot_process = Command::new("dot")
.arg("-Tsvg")
.stdin(Stdio::piped())
.stdout(dot_file)
.spawn()
.with_context(|| "Failed to run the `dot` command. Check that graphviz is installed.")?;
let dot_stdin = dot_process
.stdin
.take()
.ok_or_else(|| anyhow!("Failed to open stdin for `dot` process."))?;
parser.print_dot_graphs(&dot_stdin);
Ok(LogSession(
PathBuf::from(path),
Some(dot_process),
Some(dot_stdin),
))
#[cfg(unix)]
impl LogSession {
fn new(path: &str) -> Result<Self> {
use std::io::Write;
let mut dot_file = std::fs::File::create(path)?;
dot_file.write(HTML_HEADER)?;
let mut dot_process = Command::new("dot")
.arg("-Tsvg")
.stdin(Stdio::piped())
.stdout(dot_file)
.spawn()
.with_context(|| {
"Failed to run the `dot` command. Check that graphviz is installed."
})?;
let dot_stdin = dot_process
.stdin
.take()
.ok_or_else(|| anyhow!("Failed to open stdin for `dot` process."))?;
Ok(Self {
path: PathBuf::from(path),
dot_process: Some(dot_process),
dot_process_stdin: Some(dot_stdin),
})
}
}
#[cfg(unix)]
@ -69,13 +98,13 @@ impl Drop for LogSession {
fn drop(&mut self) {
use std::fs;
drop(self.2.take().unwrap());
let output = self.1.take().unwrap().wait_with_output().unwrap();
drop(self.dot_process_stdin.take().unwrap());
let output = self.dot_process.take().unwrap().wait_with_output().unwrap();
if output.status.success() {
if cfg!(target_os = "macos")
&& fs::metadata(&self.0).unwrap().len() > HTML_HEADER.len() as u64
&& fs::metadata(&self.path).unwrap().len() > HTML_HEADER.len() as u64
{
Command::new("open").arg(&self.0).output().unwrap();
Command::new("open").arg(&self.path).output().unwrap();
}
} else {
eprintln!(

View file

@ -1,5 +1,6 @@
use super::generate::parse_grammar::GrammarJSON;
use anyhow::{anyhow, Context, Result};
use path_slash::PathExt as _;
use std::{
ffi::{OsStr, OsString},
fs,
@ -60,7 +61,7 @@ pub fn compile_language_to_wasm(
volume_string = OsString::from(parent);
volume_string.push(":/src:Z");
command.arg("--workdir");
command.arg(&Path::new("/src").join(filename));
command.arg(Path::new("/src").join(filename).to_slash_lossy().as_ref());
} else {
volume_string = OsString::from(language_dir);
volume_string.push(":/src:Z");
@ -84,6 +85,11 @@ pub fn compile_language_to_wasm(
// Run `emcc` in a container using the `emscripten-slim` image
command.args(&[EMSCRIPTEN_TAG, "emcc"]);
} else {
if force_docker {
return Err(anyhow!(
"You must have docker on your PATH to run this command with --docker"
));
}
return Err(anyhow!(
"You must have either emcc or docker on your PATH to run this command"
));
@ -116,14 +122,18 @@ pub fn compile_language_to_wasm(
let scanner_cpp_path = src.join("scanner.cpp");
if language_dir.join(&scanner_cc_path).exists() {
command.arg("-xc++").arg(&scanner_cc_path);
command
.arg("-xc++")
.arg(scanner_cc_path.to_slash_lossy().as_ref());
} else if language_dir.join(&scanner_cpp_path).exists() {
command.arg("-xc++").arg(&scanner_cpp_path);
command
.arg("-xc++")
.arg(scanner_cpp_path.to_slash_lossy().as_ref());
} else if language_dir.join(&scanner_c_path).exists() {
command.arg(&scanner_c_path);
command.arg(scanner_c_path.to_slash_lossy().as_ref());
}
command.arg(&parser_c_path);
command.arg(parser_c_path.to_slash_lossy().as_ref());
let output = command
.output()

View file

@ -1,2 +1,3 @@
source 'https://rubygems.org'
gem 'github-pages', group: :jekyll_plugins
gem 'github-pages', group: :jekyll_plugins
gem "webrick"

View file

@ -1,258 +1,260 @@
GEM
remote: https://rubygems.org/
specs:
activesupport (4.2.9)
i18n (~> 0.7)
minitest (~> 5.1)
thread_safe (~> 0.3, >= 0.3.4)
tzinfo (~> 1.1)
addressable (2.8.0)
public_suffix (>= 2.0.2, < 5.0)
activesupport (7.0.4.3)
concurrent-ruby (~> 1.0, >= 1.0.2)
i18n (>= 1.6, < 2)
minitest (>= 5.1)
tzinfo (~> 2.0)
addressable (2.8.1)
public_suffix (>= 2.0.2, < 6.0)
coffee-script (2.4.1)
coffee-script-source
execjs
coffee-script-source (1.11.1)
colorator (1.1.0)
commonmarker (0.17.8)
ruby-enum (~> 0.5)
concurrent-ruby (1.0.5)
ethon (0.14.0)
commonmarker (0.23.10)
concurrent-ruby (1.2.2)
dnsruby (1.61.9)
simpleidn (~> 0.1)
em-websocket (0.5.3)
eventmachine (>= 0.12.9)
http_parser.rb (~> 0)
ethon (0.16.0)
ffi (>= 1.15.0)
execjs (2.7.0)
faraday (1.5.1)
faraday-em_http (~> 1.0)
faraday-em_synchrony (~> 1.0)
faraday-excon (~> 1.1)
faraday-httpclient (~> 1.0.1)
faraday-net_http (~> 1.0)
faraday-net_http_persistent (~> 1.1)
faraday-patron (~> 1.0)
multipart-post (>= 1.2, < 3)
eventmachine (1.2.7)
execjs (2.8.1)
faraday (2.7.4)
faraday-net_http (>= 2.0, < 3.1)
ruby2_keywords (>= 0.0.4)
faraday-em_http (1.0.0)
faraday-em_synchrony (1.0.0)
faraday-excon (1.1.0)
faraday-httpclient (1.0.1)
faraday-net_http (1.0.1)
faraday-net_http_persistent (1.2.0)
faraday-patron (1.0.0)
ffi (1.15.3)
faraday-net_http (3.0.2)
ffi (1.15.5)
forwardable-extended (2.6.0)
gemoji (3.0.0)
github-pages (177)
activesupport (= 4.2.9)
github-pages-health-check (= 1.3.5)
jekyll (= 3.6.2)
jekyll-avatar (= 0.5.0)
jekyll-coffeescript (= 1.0.2)
jekyll-commonmark-ghpages (= 0.1.5)
gemoji (3.0.1)
github-pages (228)
github-pages-health-check (= 1.17.9)
jekyll (= 3.9.3)
jekyll-avatar (= 0.7.0)
jekyll-coffeescript (= 1.1.1)
jekyll-commonmark-ghpages (= 0.4.0)
jekyll-default-layout (= 0.1.4)
jekyll-feed (= 0.9.2)
jekyll-gist (= 1.4.1)
jekyll-github-metadata (= 2.9.3)
jekyll-mentions (= 1.2.0)
jekyll-optional-front-matter (= 0.3.0)
jekyll-feed (= 0.15.1)
jekyll-gist (= 1.5.0)
jekyll-github-metadata (= 2.13.0)
jekyll-include-cache (= 0.2.1)
jekyll-mentions (= 1.6.0)
jekyll-optional-front-matter (= 0.3.2)
jekyll-paginate (= 1.1.0)
jekyll-readme-index (= 0.2.0)
jekyll-redirect-from (= 0.12.1)
jekyll-relative-links (= 0.5.2)
jekyll-remote-theme (= 0.2.3)
jekyll-sass-converter (= 1.5.0)
jekyll-seo-tag (= 2.3.0)
jekyll-sitemap (= 1.1.1)
jekyll-swiss (= 0.4.0)
jekyll-theme-architect (= 0.1.0)
jekyll-theme-cayman (= 0.1.0)
jekyll-theme-dinky (= 0.1.0)
jekyll-theme-hacker (= 0.1.0)
jekyll-theme-leap-day (= 0.1.0)
jekyll-theme-merlot (= 0.1.0)
jekyll-theme-midnight (= 0.1.0)
jekyll-theme-minimal (= 0.1.0)
jekyll-theme-modernist (= 0.1.0)
jekyll-theme-primer (= 0.5.2)
jekyll-theme-slate (= 0.1.0)
jekyll-theme-tactile (= 0.1.0)
jekyll-theme-time-machine (= 0.1.0)
jekyll-titles-from-headings (= 0.5.0)
jemoji (= 0.8.1)
kramdown (= 1.16.2)
liquid (= 4.0.0)
listen (= 3.0.6)
jekyll-readme-index (= 0.3.0)
jekyll-redirect-from (= 0.16.0)
jekyll-relative-links (= 0.6.1)
jekyll-remote-theme (= 0.4.3)
jekyll-sass-converter (= 1.5.2)
jekyll-seo-tag (= 2.8.0)
jekyll-sitemap (= 1.4.0)
jekyll-swiss (= 1.0.0)
jekyll-theme-architect (= 0.2.0)
jekyll-theme-cayman (= 0.2.0)
jekyll-theme-dinky (= 0.2.0)
jekyll-theme-hacker (= 0.2.0)
jekyll-theme-leap-day (= 0.2.0)
jekyll-theme-merlot (= 0.2.0)
jekyll-theme-midnight (= 0.2.0)
jekyll-theme-minimal (= 0.2.0)
jekyll-theme-modernist (= 0.2.0)
jekyll-theme-primer (= 0.6.0)
jekyll-theme-slate (= 0.2.0)
jekyll-theme-tactile (= 0.2.0)
jekyll-theme-time-machine (= 0.2.0)
jekyll-titles-from-headings (= 0.5.3)
jemoji (= 0.12.0)
kramdown (= 2.3.2)
kramdown-parser-gfm (= 1.1.0)
liquid (= 4.0.4)
mercenary (~> 0.3)
minima (= 2.1.1)
nokogiri (>= 1.8.1, < 2.0)
rouge (= 2.2.1)
minima (= 2.5.1)
nokogiri (>= 1.13.6, < 2.0)
rouge (= 3.26.0)
terminal-table (~> 1.4)
github-pages-health-check (1.3.5)
github-pages-health-check (1.17.9)
addressable (~> 2.3)
net-dns (~> 0.8)
dnsruby (~> 1.60)
octokit (~> 4.0)
public_suffix (~> 2.0)
typhoeus (~> 0.7)
html-pipeline (2.7.1)
public_suffix (>= 3.0, < 5.0)
typhoeus (~> 1.3)
html-pipeline (2.14.3)
activesupport (>= 2)
nokogiri (>= 1.4)
i18n (0.9.5)
http_parser.rb (0.8.0)
i18n (1.12.0)
concurrent-ruby (~> 1.0)
jekyll (3.6.2)
jekyll (3.9.3)
addressable (~> 2.4)
colorator (~> 1.0)
em-websocket (~> 0.5)
i18n (>= 0.7, < 2)
jekyll-sass-converter (~> 1.0)
jekyll-watch (~> 1.1)
kramdown (~> 1.14)
jekyll-watch (~> 2.0)
kramdown (>= 1.17, < 3)
liquid (~> 4.0)
mercenary (~> 0.3.3)
pathutil (~> 0.9)
rouge (>= 1.7, < 3)
rouge (>= 1.7, < 4)
safe_yaml (~> 1.0)
jekyll-avatar (0.5.0)
jekyll (~> 3.0)
jekyll-coffeescript (1.0.2)
jekyll-avatar (0.7.0)
jekyll (>= 3.0, < 5.0)
jekyll-coffeescript (1.1.1)
coffee-script (~> 2.2)
coffee-script-source (~> 1.11.1)
jekyll-commonmark (1.1.0)
commonmarker (~> 0.14)
jekyll (>= 3.0, < 4.0)
jekyll-commonmark-ghpages (0.1.5)
commonmarker (~> 0.17.6)
jekyll-commonmark (~> 1)
rouge (~> 2)
jekyll-commonmark (1.4.0)
commonmarker (~> 0.22)
jekyll-commonmark-ghpages (0.4.0)
commonmarker (~> 0.23.7)
jekyll (~> 3.9.0)
jekyll-commonmark (~> 1.4.0)
rouge (>= 2.0, < 5.0)
jekyll-default-layout (0.1.4)
jekyll (~> 3.0)
jekyll-feed (0.9.2)
jekyll (~> 3.3)
jekyll-gist (1.4.1)
jekyll-feed (0.15.1)
jekyll (>= 3.7, < 5.0)
jekyll-gist (1.5.0)
octokit (~> 4.2)
jekyll-github-metadata (2.9.3)
jekyll (~> 3.1)
jekyll-github-metadata (2.13.0)
jekyll (>= 3.4, < 5.0)
octokit (~> 4.0, != 4.4.0)
jekyll-mentions (1.2.0)
activesupport (~> 4.0)
jekyll-include-cache (0.2.1)
jekyll (>= 3.7, < 5.0)
jekyll-mentions (1.6.0)
html-pipeline (~> 2.3)
jekyll (~> 3.0)
jekyll-optional-front-matter (0.3.0)
jekyll (~> 3.0)
jekyll (>= 3.7, < 5.0)
jekyll-optional-front-matter (0.3.2)
jekyll (>= 3.0, < 5.0)
jekyll-paginate (1.1.0)
jekyll-readme-index (0.2.0)
jekyll (~> 3.0)
jekyll-redirect-from (0.12.1)
jekyll (~> 3.3)
jekyll-relative-links (0.5.2)
jekyll (~> 3.3)
jekyll-remote-theme (0.2.3)
jekyll (~> 3.5)
rubyzip (>= 1.2.1, < 3.0)
typhoeus (>= 0.7, < 2.0)
jekyll-sass-converter (1.5.0)
jekyll-readme-index (0.3.0)
jekyll (>= 3.0, < 5.0)
jekyll-redirect-from (0.16.0)
jekyll (>= 3.3, < 5.0)
jekyll-relative-links (0.6.1)
jekyll (>= 3.3, < 5.0)
jekyll-remote-theme (0.4.3)
addressable (~> 2.0)
jekyll (>= 3.5, < 5.0)
jekyll-sass-converter (>= 1.0, <= 3.0.0, != 2.0.0)
rubyzip (>= 1.3.0, < 3.0)
jekyll-sass-converter (1.5.2)
sass (~> 3.4)
jekyll-seo-tag (2.3.0)
jekyll (~> 3.3)
jekyll-sitemap (1.1.1)
jekyll (~> 3.3)
jekyll-swiss (0.4.0)
jekyll-theme-architect (0.1.0)
jekyll (~> 3.5)
jekyll-seo-tag (2.8.0)
jekyll (>= 3.8, < 5.0)
jekyll-sitemap (1.4.0)
jekyll (>= 3.7, < 5.0)
jekyll-swiss (1.0.0)
jekyll-theme-architect (0.2.0)
jekyll (> 3.5, < 5.0)
jekyll-seo-tag (~> 2.0)
jekyll-theme-cayman (0.1.0)
jekyll (~> 3.5)
jekyll-theme-cayman (0.2.0)
jekyll (> 3.5, < 5.0)
jekyll-seo-tag (~> 2.0)
jekyll-theme-dinky (0.1.0)
jekyll (~> 3.5)
jekyll-theme-dinky (0.2.0)
jekyll (> 3.5, < 5.0)
jekyll-seo-tag (~> 2.0)
jekyll-theme-hacker (0.1.0)
jekyll (~> 3.5)
jekyll-theme-hacker (0.2.0)
jekyll (> 3.5, < 5.0)
jekyll-seo-tag (~> 2.0)
jekyll-theme-leap-day (0.1.0)
jekyll (~> 3.5)
jekyll-theme-leap-day (0.2.0)
jekyll (> 3.5, < 5.0)
jekyll-seo-tag (~> 2.0)
jekyll-theme-merlot (0.1.0)
jekyll (~> 3.5)
jekyll-theme-merlot (0.2.0)
jekyll (> 3.5, < 5.0)
jekyll-seo-tag (~> 2.0)
jekyll-theme-midnight (0.1.0)
jekyll (~> 3.5)
jekyll-theme-midnight (0.2.0)
jekyll (> 3.5, < 5.0)
jekyll-seo-tag (~> 2.0)
jekyll-theme-minimal (0.1.0)
jekyll (~> 3.5)
jekyll-theme-minimal (0.2.0)
jekyll (> 3.5, < 5.0)
jekyll-seo-tag (~> 2.0)
jekyll-theme-modernist (0.1.0)
jekyll (~> 3.5)
jekyll-theme-modernist (0.2.0)
jekyll (> 3.5, < 5.0)
jekyll-seo-tag (~> 2.0)
jekyll-theme-primer (0.5.2)
jekyll (~> 3.5)
jekyll-theme-primer (0.6.0)
jekyll (> 3.5, < 5.0)
jekyll-github-metadata (~> 2.9)
jekyll-seo-tag (~> 2.2)
jekyll-theme-slate (0.1.0)
jekyll (~> 3.5)
jekyll-seo-tag (~> 2.0)
jekyll-theme-tactile (0.1.0)
jekyll (~> 3.5)
jekyll-theme-slate (0.2.0)
jekyll (> 3.5, < 5.0)
jekyll-seo-tag (~> 2.0)
jekyll-theme-time-machine (0.1.0)
jekyll (~> 3.5)
jekyll-theme-tactile (0.2.0)
jekyll (> 3.5, < 5.0)
jekyll-seo-tag (~> 2.0)
jekyll-titles-from-headings (0.5.0)
jekyll (~> 3.3)
jekyll-watch (1.5.1)
jekyll-theme-time-machine (0.2.0)
jekyll (> 3.5, < 5.0)
jekyll-seo-tag (~> 2.0)
jekyll-titles-from-headings (0.5.3)
jekyll (>= 3.3, < 5.0)
jekyll-watch (2.2.1)
listen (~> 3.0)
jemoji (0.8.1)
activesupport (~> 4.0, >= 4.2.9)
jemoji (0.12.0)
gemoji (~> 3.0)
html-pipeline (~> 2.2)
jekyll (>= 3.0)
kramdown (1.16.2)
liquid (4.0.0)
listen (3.0.6)
rb-fsevent (>= 0.9.3)
rb-inotify (>= 0.9.7)
jekyll (>= 3.0, < 5.0)
kramdown (2.3.2)
rexml
kramdown-parser-gfm (1.1.0)
kramdown (~> 2.0)
liquid (4.0.4)
listen (3.8.0)
rb-fsevent (~> 0.10, >= 0.10.3)
rb-inotify (~> 0.9, >= 0.9.10)
mercenary (0.3.6)
mini_portile2 (2.8.0)
minima (2.1.1)
jekyll (~> 3.3)
minitest (5.11.3)
multipart-post (2.1.1)
net-dns (0.9.0)
nokogiri (1.13.3)
mini_portile2 (~> 2.8.0)
minima (2.5.1)
jekyll (>= 3.5, < 5.0)
jekyll-feed (~> 0.9)
jekyll-seo-tag (~> 2.1)
minitest (5.18.0)
nokogiri (1.14.3-x86_64-linux)
racc (~> 1.4)
octokit (4.21.0)
faraday (>= 0.9)
sawyer (~> 0.8.0, >= 0.5.3)
octokit (4.25.1)
faraday (>= 1, < 3)
sawyer (~> 0.9)
pathutil (0.16.2)
forwardable-extended (~> 2.6)
public_suffix (2.0.5)
racc (1.6.0)
rb-fsevent (0.11.0)
public_suffix (4.0.7)
racc (1.6.2)
rb-fsevent (0.11.2)
rb-inotify (0.10.1)
ffi (~> 1.0)
rouge (2.2.1)
ruby-enum (0.7.2)
i18n
ruby2_keywords (0.0.4)
rubyzip (2.0.0)
rexml (3.2.5)
rouge (3.26.0)
ruby2_keywords (0.0.5)
rubyzip (2.3.2)
safe_yaml (1.0.5)
sass (3.7.4)
sass-listen (~> 4.0.0)
sass-listen (4.0.0)
rb-fsevent (~> 0.9, >= 0.9.4)
rb-inotify (~> 0.9, >= 0.9.7)
sawyer (0.8.2)
sawyer (0.9.2)
addressable (>= 2.3.5)
faraday (> 0.8, < 2.0)
faraday (>= 0.17.3, < 3)
simpleidn (0.2.1)
unf (~> 0.1.4)
terminal-table (1.8.0)
unicode-display_width (~> 1.1, >= 1.1.1)
thread_safe (0.3.6)
typhoeus (0.8.0)
ethon (>= 0.8.0)
tzinfo (1.2.5)
thread_safe (~> 0.1)
unicode-display_width (1.3.0)
typhoeus (1.4.0)
ethon (>= 0.9.0)
tzinfo (2.0.6)
concurrent-ruby (~> 1.0)
unf (0.1.4)
unf_ext
unf_ext (0.0.8.2)
unicode-display_width (1.8.0)
webrick (1.8.1)
PLATFORMS
ruby
x86_64-linux
DEPENDENCIES
github-pages
webrick
BUNDLED WITH
1.16.1
2.4.8

View file

@ -9,95 +9,142 @@ Tree-sitter is a parser generator tool and an incremental parsing library. It ca
* **General** enough to parse any programming language
* **Fast** enough to parse on every keystroke in a text editor
* **Robust** enough to provide useful results even in the presence of syntax errors
* **Dependency-free** so that the runtime library (which is written in pure C) can be embedded in any application
* **Dependency-free** so that the runtime library (which is written in pure [C](https://github.com/tree-sitter/tree-sitter/tree/master/lib)) can be embedded in any application
### Language Bindings
There are currently bindings that allow Tree-sitter to be used from the following languages:
* [Go](https://github.com/smacker/go-tree-sitter)
* [Haskell](https://github.com/tree-sitter/haskell-tree-sitter)
* [Java](https://github.com/serenadeai/java-tree-sitter)
* [JavaScript (Node.js)](https://github.com/tree-sitter/node-tree-sitter)
* [JavaScript (Wasm)](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web)
* [Kotlin](https://github.com/oxisto/kotlintree)
* [Lua](https://github.com/euclidianAce/ltreesitter)
* [OCaml](https://github.com/returntocorp/ocaml-tree-sitter-core)
* [Perl](https://metacpan.org/pod/Text::Treesitter)
* [Python](https://github.com/tree-sitter/py-tree-sitter)
* [Ruby](https://github.com/tree-sitter/ruby-tree-sitter)
* [Ruby](https://github.com/calicoday/ruby-tree-sitter-ffi)
* [Rust](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust)
* [Swift](https://github.com/ChimeHQ/SwiftTreeSitter)
* [Kotlin](https://github.com/oxisto/kotlintree)
* [Java](https://github.com/serenadeai/java-tree-sitter)
### Available Parsers
Parsers for these languages are fairly complete:
### Parsers
* [Ada](https://github.com/briot/tree-sitter-ada)
* [Agda](https://github.com/tree-sitter/tree-sitter-agda)
* [Apex](https://github.com/aheber/tree-sitter-sfapex)
* [Bash](https://github.com/tree-sitter/tree-sitter-bash)
* [Beancount](https://github.com/zwpaper/tree-sitter-beancount)
* [Cap'n Proto](https://github.com/amaanq/tree-sitter-capnp)
* [C](https://github.com/tree-sitter/tree-sitter-c)
* [C#](https://github.com/tree-sitter/tree-sitter-c-sharp)
* [C++](https://github.com/tree-sitter/tree-sitter-cpp)
* [C#](https://github.com/tree-sitter/tree-sitter-c-sharp)
* [Clojure](https://github.com/sogaiu/tree-sitter-clojure)
* [CMake](https://github.com/uyha/tree-sitter-cmake)
* [Comment](https://github.com/stsewd/tree-sitter-comment)
* [Common Lisp](https://github.com/theHamsta/tree-sitter-commonlisp)
* [CSS](https://github.com/tree-sitter/tree-sitter-css)
* [CUDA](https://github.com/theHamsta/tree-sitter-cuda)
* [Dart](https://github.com/UserNobody14/tree-sitter-dart)
* [D](https://github.com/gdamore/tree-sitter-d)
* [Dockerfile](https://github.com/camdencheek/tree-sitter-dockerfile)
* [DOT](https://github.com/rydesun/tree-sitter-dot)
* [Elixir](https://github.com/elixir-lang/tree-sitter-elixir)
* [Elm](https://github.com/elm-tooling/tree-sitter-elm)
* [Emacs Lisp](https://github.com/Wilfred/tree-sitter-elisp)
* [Eno](https://github.com/eno-lang/tree-sitter-eno)
* [ERB / EJS](https://github.com/tree-sitter/tree-sitter-embedded-template)
* [Erlang](https://github.com/WhatsApp/tree-sitter-erlang/)
* [Fennel](https://github.com/travonted/tree-sitter-fennel)
* [Fish](https://github.com/ram02z/tree-sitter-fish)
* [Formula](https://github.com/siraben/tree-sitter-formula)
* [Fortran](https://github.com/stadelmanma/tree-sitter-fortran)
* [gitattributes](https://github.com/ObserverOfTime/tree-sitter-gitattributes)
* [gitignore](https://github.com/shunsambongi/tree-sitter-gitignore)
* [Gleam](https://github.com/gleam-lang/tree-sitter-gleam)
* [GLSL (OpenGL Shading Language)](https://github.com/theHamsta/tree-sitter-glsl)
* [Go](https://github.com/tree-sitter/tree-sitter-go)
* [Go mod](https://github.com/camdencheek/tree-sitter-go-mod)
* [Go work](https://github.com/omertuc/tree-sitter-go-work)
* [Graphql](https://github.com/bkegley/tree-sitter-graphql)
* [Hack](https://github.com/slackhq/tree-sitter-hack)
* [Haskell](https://github.com/tree-sitter/tree-sitter-haskell)
* [HCL](https://github.com/MichaHoffmann/tree-sitter-hcl)
* [HTML](https://github.com/tree-sitter/tree-sitter-html)
* [Java](https://github.com/tree-sitter/tree-sitter-java)
* [JavaScript](https://github.com/tree-sitter/tree-sitter-javascript)
* [jq](https://github.com/flurie/tree-sitter-jq)
* [JSON5](https://github.com/Joakker/tree-sitter-json5)
* [JSON](https://github.com/tree-sitter/tree-sitter-json)
* [Julia](https://github.com/tree-sitter/tree-sitter-julia)
* [Kotlin](https://github.com/fwcd/tree-sitter-kotlin)
* [LALRPOP](https://github.com/traxys/tree-sitter-lalrpop)
* [Latex](https://github.com/latex-lsp/tree-sitter-latex)
* [Lean](https://github.com/Julian/tree-sitter-lean)
* [LLVM](https://github.com/benwilliamgraham/tree-sitter-llvm)
* [LLVM MachineIR](https://github.com/Flakebi/tree-sitter-llvm-mir)
* [LLVM TableGen](https://github.com/Flakebi/tree-sitter-tablegen)
* [Lua](https://github.com/Azganoth/tree-sitter-lua)
* [Make](https://github.com/alemuller/tree-sitter-make)
* [Markdown](https://github.com/ikatyang/tree-sitter-markdown)
* [Markdown](https://github.com/MDeiml/tree-sitter-markdown)
* [Meson](https://github.com/Decodetalkers/tree-sitter-meson)
* [Meson](https://github.com/staysail/tree-sitter-meson)
* [Motorola 68000 Assembly](https://github.com/grahambates/tree-sitter-m68k)
* [Nix](https://github.com/cstrahan/tree-sitter-nix)
* [Objective-C](https://github.com/jiyee/tree-sitter-objc)
* [OCaml](https://github.com/tree-sitter/tree-sitter-ocaml)
* [Org](https://github.com/milisims/tree-sitter-org)
* [Pascal](https://github.com/Isopod/tree-sitter-pascal)
* [Perl](https://github.com/ganezdragon/tree-sitter-perl)
* [Perl](https://github.com/tree-sitter-perl/tree-sitter-perl)
* [Perl POD](https://github.com/tree-sitter-perl/tree-sitter-pod)
* [PHP](https://github.com/tree-sitter/tree-sitter-php)
* [Portable Game Notation](https://github.com/rolandwalker/tree-sitter-pgn)
* [PowerShell](https://github.com/PowerShell/tree-sitter-PowerShell)
* [Protocol Buffers](https://github.com/mitchellh/tree-sitter-proto)
* [Python](https://github.com/tree-sitter/tree-sitter-python)
* [QML](https://github.com/yuja/tree-sitter-qmljs)
* [Racket](https://github.com/6cdh/tree-sitter-racket)
* [Rasi](https://github.com/Fymyte/tree-sitter-rasi)
* [re2c](https://github.com/alemuller/tree-sitter-re2c)
* [Regex](https://github.com/tree-sitter/tree-sitter-regex)
* [Rego](https://github.com/FallenAngel97/tree-sitter-rego)
* [reStructuredText](https://github.com/stsewd/tree-sitter-rst)
* [R](https://github.com/r-lib/tree-sitter-r)
* [Ruby](https://github.com/tree-sitter/tree-sitter-ruby)
* [Rust](https://github.com/tree-sitter/tree-sitter-rust)
* [R](https://github.com/r-lib/tree-sitter-r)
* [Scala](https://github.com/tree-sitter/tree-sitter-scala)
* [Scheme](https://github.com/6cdh/tree-sitter-scheme)
* [Scss](https://github.com/serenadeai/tree-sitter-scss)
* [S-expressions](https://github.com/AbstractMachinesLab/tree-sitter-sexp)
* [Smali](https://github.com/amaanq/tree-sitter-smali)
* [Smali](https://git.sr.ht/~yotam/tree-sitter-smali)
* [Sourcepawn](https://github.com/nilshelmig/tree-sitter-sourcepawn)
* [SPARQL](https://github.com/BonaBeavis/tree-sitter-sparql)
* [SQL - BigQuery](https://github.com/takegue/tree-sitter-sql-bigquery)
* [SQL - PostgreSQL](https://github.com/m-novikov/tree-sitter-sql)
* [SQL - SQLite](https://github.com/dhcmrlchtdj/tree-sitter-sqlite)
* [SSH](https://github.com/metio/tree-sitter-ssh-client-config)
* [Svelte](https://github.com/Himujjal/tree-sitter-svelte)
* [Swift](https://github.com/alex-pinkus/tree-sitter-swift)
* [SystemRDL](https://github.com/SystemRDL/tree-sitter-systemrdl)
* [Thrift](https://github.com/duskmoon314/tree-sitter-thrift)
* [TOML](https://github.com/ikatyang/tree-sitter-toml)
* [Tree-sitter Query](https://github.com/nvim-treesitter/tree-sitter-query)
* [Turtle](https://github.com/BonaBeavis/tree-sitter-turtle)
* [Twig](https://github.com/gbprod/tree-sitter-twig)
* [TypeScript](https://github.com/tree-sitter/tree-sitter-typescript)
* [Verilog](https://github.com/tree-sitter/tree-sitter-verilog)
* [VHDL](https://github.com/alemuller/tree-sitter-vhdl)
* [Vue](https://github.com/ikatyang/tree-sitter-vue)
* [YAML](https://github.com/ikatyang/tree-sitter-yaml)
* [WASM](https://github.com/wasm-lsp/tree-sitter-wasm)
* [WGSL WebGPU Shading Language](https://github.com/mehmetoguzderin/tree-sitter-wgsl)
Parsers for these languages are in development:
* [Agda](https://github.com/tree-sitter/tree-sitter-agda)
* [Elixir](https://github.com/elixir-lang/tree-sitter-elixir)
* [Dockerfile](https://github.com/camdencheek/tree-sitter-dockerfile)
* [Go mod](https://github.com/camdencheek/tree-sitter-go-mod)
* [Hack](https://github.com/slackhq/tree-sitter-hack)
* [Haskell](https://github.com/tree-sitter/tree-sitter-haskell)
* [Julia](https://github.com/tree-sitter/tree-sitter-julia)
* [Kotlin](https://github.com/fwcd/tree-sitter-kotlin)
* [Nix](https://github.com/cstrahan/tree-sitter-nix)
* [Objective-C](https://github.com/jiyee/tree-sitter-objc)
* [Org](https://github.com/milisims/tree-sitter-org)
* [Perl](https://github.com/ganezdragon/tree-sitter-perl)
* [Protocol Buffers](https://github.com/mitchellh/tree-sitter-proto)
* [Racket](https://github.com/6cdh/tree-sitter-racket)
* [Scala](https://github.com/tree-sitter/tree-sitter-scala)
* [Sourcepawn](https://github.com/nilshelmig/tree-sitter-sourcepawn)
* [SQL](https://github.com/m-novikov/tree-sitter-sql)
* [YAML](https://github.com/ikatyang/tree-sitter-yaml)
* [YANG](https://github.com/Hubro/tree-sitter-yang)
* [Zig](https://github.com/maxxnino/tree-sitter-zig)
### Talks on Tree-sitter
@ -109,9 +156,9 @@ Parsers for these languages are in development:
The design of Tree-sitter was greatly influenced by the following research papers:
- [Practical Algorithms for Incremental Software Development Environments](https://www2.eecs.berkeley.edu/Pubs/TechRpts/1997/CSD-97-946.pdf)
- [Context Aware Scanning for Parsing Extensible Languages](https://www-users.cse.umn.edu/~evw/pubs/vanwyk07gpce/vanwyk07gpce.pdf)
- [Efficient and Flexible Incremental Parsing](http://harmonia.cs.berkeley.edu/papers/twagner-parsing.pdf)
- [Incremental Analysis of Real Programming Languages](http://harmonia.cs.berkeley.edu/papers/twagner-glr.pdf)
- [Error Detection and Recovery in LR Parsers](http://what-when-how.com/compiler-writing/bottom-up-parsing-compiler-writing-part-13)
- [Error Recovery for LR Parsers](https://apps.dtic.mil/sti/pdfs/ADA043470.pdf)
* [Practical Algorithms for Incremental Software Development Environments](https://www2.eecs.berkeley.edu/Pubs/TechRpts/1997/CSD-97-946.pdf)
* [Context Aware Scanning for Parsing Extensible Languages](https://www-users.cse.umn.edu/~evw/pubs/vanwyk07gpce/vanwyk07gpce.pdf)
* [Efficient and Flexible Incremental Parsing](https://harmonia.cs.berkeley.edu/papers/twagner-parsing.pdf)
* [Incremental Analysis of Real Programming Languages](https://harmonia.cs.berkeley.edu/papers/twagner-glr.pdf)
* [Error Detection and Recovery in LR Parsers](https://what-when-how.com/compiler-writing/bottom-up-parsing-compiler-writing-part-13)
* [Error Recovery for LR Parsers](https://apps.dtic.mil/sti/pdfs/ADA043470.pdf)

View file

@ -290,7 +290,7 @@ This `ts_node_edit` function is _only_ needed in the case where you have retriev
### Multi-language Documents
Sometimes, different parts of a file may be written in different languages. For example, templating languages like [EJS](http://ejs.co) and [ERB](https://ruby-doc.org/stdlib-2.5.1/libdoc/erb/rdoc/ERB.html) allow you to generate HTML by writing a mixture of HTML and another language like JavaScript or Ruby.
Sometimes, different parts of a file may be written in different languages. For example, templating languages like [EJS](https://ejs.co) and [ERB](https://ruby-doc.org/stdlib-2.5.1/libdoc/erb/rdoc/ERB.html) allow you to generate HTML by writing a mixture of HTML and another language like JavaScript or Ruby.
Tree-sitter handles these types of documents by allowing you to create a syntax tree based on the text in certain _ranges_ of a file.
@ -442,13 +442,13 @@ Many code analysis tasks involve searching for patterns in syntax trees. Tree-si
A _query_ consists of one or more _patterns_, where each pattern is an [S-expression](https://en.wikipedia.org/wiki/S-expression) that matches a certain set of nodes in a syntax tree. The expression to match a given node consists of a pair of parentheses containing two things: the node's type, and optionally, a series of other S-expressions that match the node's children. For example, this pattern would match any `binary_expression` node whose children are both `number_literal` nodes:
```
```scheme
(binary_expression (number_literal) (number_literal))
```
Children can also be omitted. For example, this would match any `binary_expression` where at least _one_ of child is a `string_literal` node:
```
```scheme
(binary_expression (string_literal))
```
@ -456,7 +456,7 @@ Children can also be omitted. For example, this would match any `binary_expressi
In general, it's a good idea to make patterns more specific by specifying [field names](#node-field-names) associated with child nodes. You do this by prefixing a child pattern with a field name followed by a colon. For example, this pattern would match an `assignment_expression` node where the `left` child is a `member_expression` whose `object` is a `call_expression`.
```
```scheme
(assignment_expression
left: (member_expression
object: (call_expression)))
@ -464,9 +464,9 @@ In general, it's a good idea to make patterns more specific by specifying [field
#### Negated Fields
You can also constrain a pattern so that it only matches nodes that *lack* a certain field. To do this, add a field name prefixed by a `!` within the parent pattern. For example, this pattern would match a class declaration with no type parameters:
You can also constrain a pattern so that it only matches nodes that _lack_ a certain field. To do this, add a field name prefixed by a `!` within the parent pattern. For example, this pattern would match a class declaration with no type parameters:
```
```scheme
(class_declaration
name: (identifier) @class_name
!type_parameters)
@ -476,7 +476,7 @@ You can also constrain a pattern so that it only matches nodes that *lack* a cer
The parenthesized syntax for writing nodes only applies to [named nodes](#named-vs-anonymous-nodes). To match specific anonymous nodes, you write their name between double quotes. For example, this pattern would match any `binary_expression` where the operator is `!=` and the right side is `null`:
```
```scheme
(binary_expression
operator: "!="
right: (null))
@ -488,7 +488,7 @@ When matching patterns, you may want to process specific nodes within the patter
For example, this pattern would match any assignment of a `function` to an `identifier`, and it would associate the name `the-function-name` with the identifier:
```
```scheme
(assignment_expression
left: (identifier) @the-function-name
right: (function))
@ -496,7 +496,7 @@ For example, this pattern would match any assignment of a `function` to an `iden
And this pattern would match all method definitions, associating the name `the-method-name` with the method name, `the-class-name` with the containing class name:
```
```scheme
(class_declaration
name: (identifier) @the-class-name
body: (class_body
@ -510,13 +510,13 @@ You can match a repeating sequence of sibling nodes using the postfix `+` and `*
For example, this pattern would match a sequence of one or more comments:
```
```scheme
(comment)+
```
This pattern would match a class declaration, capturing all of the decorators if any were present:
```
```scheme
(class_declaration
(decorator)* @the-decorator
name: (identifier) @the-name)
@ -524,7 +524,7 @@ This pattern would match a class declaration, capturing all of the decorators if
You can also mark a node as optional using the `?` operator. For example, this pattern would match all function calls, capturing a string argument if one was present:
```
```scheme
(call_expression
function: (identifier) @the-function
arguments: (arguments (string)? @the-string-arg))
@ -534,7 +534,7 @@ You can also mark a node as optional using the `?` operator. For example, this p
You can also use parentheses for grouping a sequence of _sibling_ nodes. For example, this pattern would match a comment followed by a function declaration:
```
```scheme
(
(comment)
(function_declaration)
@ -543,7 +543,7 @@ You can also use parentheses for grouping a sequence of _sibling_ nodes. For exa
Any of the quantification operators mentioned above (`+`, `*`, and `?`) can also be applied to groups. For example, this pattern would match a comma-separated series of numbers:
```
```scheme
(
(number)
("," (number))*
@ -558,7 +558,7 @@ This is similar to _character classes_ from regular expressions (`[abc]` matches
For example, this pattern would match a call to either a variable or an object property.
In the case of a variable, capture it as `@function`, and in the case of a property, capture it as `@method`:
```
```scheme
(call_expression
function: [
(identifier) @function
@ -569,7 +569,7 @@ In the case of a variable, capture it as `@function`, and in the case of a prope
This pattern would match a set of possible keyword tokens, capturing them as `@keyword`:
```
```scheme
[
"break"
"delete"
@ -592,7 +592,7 @@ and `_` will match any named or anonymous node.
For example, this pattern would match any node inside a call:
```
```scheme
(call (_) @call.inner)
```
@ -602,7 +602,7 @@ The anchor operator, `.`, is used to constrain the ways in which child patterns
When `.` is placed before the _first_ child within a parent pattern, the child will only match when it is the first named node in the parent. For example, the below pattern matches a given `array` node at most once, assigning the `@the-element` capture to the first `identifier` node in the parent `array`:
```
```scheme
(array . (identifier) @the-element)
```
@ -610,13 +610,13 @@ Without this anchor, the pattern would match once for every identifier in the ar
Similarly, an anchor placed after a pattern's _last_ child will cause that child pattern to only match nodes that are the last named child of their parent. The below pattern matches only nodes that are the last named child within a `block`.
```
```scheme
(block (_) @last-expression .)
```
Finally, an anchor _between_ two child patterns will cause the patterns to only match nodes that are immediate siblings. The pattern below, given a long dotted name like `a.b.c.d`, will only match pairs of consecutive identifiers: `a, b`, `b, c`, and `c, d`.
```
```scheme
(dotted_name
(identifier) @prev-id
.
@ -629,20 +629,38 @@ The restrictions placed on a pattern by an anchor operator ignore anonymous node
#### Predicates
You can also specify arbitrary metadata and conditions associated with a pattern by adding _predicate_ S-expressions anywhere within your pattern. Predicate S-expressions start with a _predicate name_ beginning with a `#` character. After that, they can contain an arbitrary number of `@`-prefixed capture names or strings.
You can also specify arbitrary metadata and conditions associated with a pattern
by adding _predicate_ S-expressions anywhere within your pattern. Predicate S-expressions
start with a _predicate name_ beginning with a `#` character. After that, they can
contain an arbitrary number of `@`-prefixed capture names or strings.
For example, this pattern would match identifier whose names is written in `SCREAMING_SNAKE_CASE`:
Tree-Sitter's CLI supports the following predicates by default:
```
(
(identifier) @constant
(#match? @constant "^[A-Z][A-Z_]+")
)
##### eq?, not-eq?, any-eq?, any-not-eq?
This family of predicates allows you to match against a single capture or string
value.
The first argument must be a capture, but the second can be either a capture to
compare the two captures' text, or a string to compare first capture's text
against.
The base predicate is "#eq?", but its complement "#not-eq?" can be used to _not_
match a value.
Consider the following example targeting C:
```scheme
((identifier) @variable.builtin
(#eq? @variable.builtin "self"))
```
And this pattern would match key-value pairs where the `value` is an identifier with the same name as the key:
This pattern would match any identifier that is `self` or `this`.
```
And this pattern would match key-value pairs where the `value` is an identifier
with the same name as the key:
```scheme
(
(pair
key: (property_identifier) @key-name
@ -651,7 +669,87 @@ And this pattern would match key-value pairs where the `value` is an identifier
)
```
_Note_ - Predicates are not handled directly by the Tree-sitter C library. They are just exposed in a structured form so that higher-level code can perform the filtering. However, higher-level bindings to Tree-sitter like [the Rust crate](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust) or the [WebAssembly binding](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web) implement a few common predicates like `#eq?` and `#match?`.
The prefix "any-" is meant for use with quantified captures. Here's
an example finding a segment of empty comments
```scheme
((comment)+ @comment.empty
(#any-eq? @comment.empty "//"))
```
Note that "#any-eq?" will match a quantified capture if
_any_ of the nodes match the predicate, while by default a quantified capture
will only match if _all_ the nodes match the predicate.
##### match?, not-match?, any-match?, any-not-match?
These predicates are similar to the eq? predicates, but they use regular expressions
to match against the capture's text.
The first argument must be a capture, and the second must be a string containing
a regular expression.
For example, this pattern would match identifier whose name is written in `SCREAMING_SNAKE_CASE`:
```scheme
((identifier) @constant
(#match? @constant "^[A-Z][A-Z_]+"))
```
Here's an example finding potential documentation comments in C
```scheme
((comment)+ @comment.documentation
(#match? @comment.documentation "^///\s+.*"))
```
Here's another example finding Cgo comments to potentially inject with C
```scheme
((comment)+ @injection.content
.
(import_declaration
(import_spec path: (interpreted_string_literal) @_import_c))
(#eq? @_import_c "\"C\"")
(#match? @injection.content "^//"))
```
##### any-of?, not-any-of?
The "any-of?" predicate allows you to match a capture against multiple strings,
and will match if the capture's text is equal to any of the strings.
Consider this example that targets JavaScript:
```scheme
((identifier) @variable.builtin
(#any-of? @variable.builtin
"arguments"
"module"
"console"
"window"
"document"))
```
This will match any of the builtin variables in JavaScript.
_Note_ — Predicates are not handled directly by the Tree-sitter C library.
They are just exposed in a structured form so that higher-level code can perform
the filtering. However, higher-level bindings to Tree-sitter like
[the Rust Crate](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust)
or the [WebAssembly binding](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web)
do implement a few common predicates like the `#eq?`, `#match?`, and `#any-of?`
predicates explained above.
To recap about the predicates Tree-Sitter's bindings support:
- `#eq?` checks for a direct match against a capture or string
- `#match?` checks for a match against a regular expression
- `#any-of?` checks for a match against a list of strings
- Adding `not-` to the beginning of any of these predicates will negate the match
- By default, a quantified capture will only match if _all_ of the nodes match the predicate
- Adding `any-` before the `eq` or `match` predicates will instead match if any of the nodes match the predicate
### The Query API

View file

@ -46,7 +46,7 @@ npm install --save nan
npm install --save-dev tree-sitter-cli
```
The last command will install the CLI into the `node_modules` folder in your working directory. An executable program called `tree-sitter` will be created inside of `node_modules/.bin/`. You may want to follow the Node.js convention of adding that folder to your your `PATH` so that you can easily run this program when working in this directory.
The last command will install the CLI into the `node_modules` folder in your working directory. An executable program called `tree-sitter` will be created inside of `node_modules/.bin/`. You may want to follow the Node.js convention of adding that folder to your `PATH` so that you can easily run this program when working in this directory.
```sh
# In your shell profile script
@ -81,9 +81,16 @@ echo 'hello' > example-file
tree-sitter parse example-file
```
Alternatively, in Windows PowerShell:
```pwsh
"hello" | Out-File example-file -Encoding utf8
tree-sitter parse example-file
```
This should print the following:
```
```text
(source_file [0, 0] - [1, 0])
```
@ -116,7 +123,7 @@ For each rule that you add to the grammar, you should first create a *test* that
For example, you might have a file called `test/corpus/statements.txt` that contains a series of entries like this:
```
```text
==================
Return statements
==================
@ -142,7 +149,7 @@ func x() int {
The expected output section can also *optionally* show the [*field names*][field-names-section] associated with each child node. To include field names in your tests, you write a node's field name followed by a colon, before the node itself in the S-expression:
```
```text
(source_file
(function_definition
name: (identifier)
@ -152,6 +159,26 @@ func x() int {
(return_statement (number)))))
```
* If your language's syntax conflicts with the `===` and `---` test separators, you can optionally add an arbitrary identical suffix (in the below example, `|||`) to disambiguate them:
```text
==================|||
Basic module
==================|||
---- MODULE Test ----
increment(n) == n + 1
====
---|||
(source_file
(module (identifier)
(operator (identifier)
(parameter_list (identifier))
(plus (identifier_ref) (number)))))
```
These tests are important. They serve as the parser's API documentation, and they can be run every time you change the grammar to verify that everything still parses correctly.
By default, the `tree-sitter test` command runs all of the tests in your `corpus` or `test/corpus/` folder. To run a particular test, you can use the `-f` flag:
@ -174,7 +201,7 @@ The `tree-sitter test` command will *also* run any syntax highlighting tests in
You can run your parser on an arbitrary file using `tree-sitter parse`. This will print the resulting the syntax tree, including nodes' ranges and field names, like this:
```
```text
(source_file [0, 0] - [3, 0]
(function_declaration [0, 0] - [2, 1]
name: (identifier [0, 5] - [0, 9])
@ -222,10 +249,10 @@ In addition to the `name` and `rules` fields, grammars have a few other optional
* **`inline`** - an array of rule names that should be automatically *removed* from the grammar by replacing all of their usages with a copy of their definition. This is useful for rules that are used in multiple places but for which you *don't* want to create syntax tree nodes at runtime.
* **`conflicts`** - an array of arrays of rule names. Each inner array represents a set of rules that's involved in an *LR(1) conflict* that is *intended to exist* in the grammar. When these conflicts occur at runtime, Tree-sitter will use the GLR algorithm to explore all of the possible interpretations. If *multiple* parses end up succeeding, Tree-sitter will pick the subtree whose corresponding rule has the highest total *dynamic precedence*.
* **`externals`** - an array of token names which can be returned by an [*external scanner*](#external-scanners). External scanners allow you to write custom C code which runs during the lexing process in order to handle lexical rules (e.g. Python's indentation tokens) that cannot be described by regular expressions.
* **`precedences`** - an array of array of strings, where each array of strings defines named precedence levels in descending order. These names can be used in the `prec` functions to define precedence relative only to other names in the array, rather than globally. Can only be used with parse precedence, not lexical precedence.
* **`word`** - the name of a token that will match keywords for the purpose of the [keyword extraction](#keyword-extraction) optimization.
* **`supertypes`** an array of hidden rule names which should be considered to be 'supertypes' in the generated [*node types* file][static-node-types].
## Writing the Grammar
Writing a grammar requires creativity. There are an infinite number of CFGs (context-free grammars) that can be used to describe any given language. In order to produce a good Tree-sitter parser, you need to create a grammar with two important properties:
@ -349,7 +376,7 @@ return x + y;
According to the specification, this line is a `ReturnStatement`, the fragment `x + y` is an `AdditiveExpression`, and `x` and `y` are both `IdentifierReferences`. The relationship between these constructs is captured by a complex series of production rules:
```
```text
ReturnStatement -> 'return' Expression
Expression -> AssignmentExpression
AssignmentExpression -> ConditionalExpression
@ -406,7 +433,7 @@ To produce a readable syntax tree, we'd like to model JavaScript expressions usi
Of course, this flat structure is highly ambiguous. If we try to generate a parser, Tree-sitter gives us an error message:
```
```text
Error: Unresolved conflict for symbol sequence:
'-' _expression • '*' …
@ -442,7 +469,7 @@ For an expression like `-a * b`, it's not clear whether the `-` operator applies
Applying a higher precedence in `unary_expression` fixes that conflict, but there is still another conflict:
```
```text
Error: Unresolved conflict for symbol sequence:
_expression '*' _expression • '*' …
@ -498,11 +525,11 @@ Tree-sitter's parsing process is divided into two phases: parsing (which is desc
### Conflicting Tokens
Grammars often contain multiple tokens that can match the same characters. For example, a grammar might contain the tokens (`"if"` and `/[a-z]+/`). Tree-sitter differentiates between these conflicting tokens in a few ways:
Grammars often contain multiple tokens that can match the same characters. For example, a grammar might contain the tokens (`"if"` and `/[a-z]+/`). Tree-sitter differentiates between these conflicting tokens in a few ways.
1. **Context-aware Lexing** - Tree-sitter performs lexing on-demand, during the parsing process. At any given position in a source document, the lexer only tries to recognize tokens that are *valid* at that position in the document.
2. **Lexical Precedence** - When the precedence functions described [above](#the-grammar-dsl) are used within the `token` function, the given precedence values serve as instructions to the lexer. If there are two valid tokens that match the characters at a given position in the document, Tree-sitter will select the one with the higher precedence.
2. **Lexical Precedence** - When the precedence functions described [above](#the-grammar-dsl) are used *within* the `token` function, the given explicit precedence values serve as instructions to the lexer. If there are two valid tokens that match the characters at a given position in the document, Tree-sitter will select the one with the higher precedence.
3. **Match Length** - If multiple valid tokens with the same precedence match the characters at a given position in a document, Tree-sitter will select the token that matches the [longest sequence of characters][longest-match].
@ -510,6 +537,12 @@ Grammars often contain multiple tokens that can match the same characters. For e
5. **Rule Order** - If none of the above criteria can be used to select one token over another, Tree-sitter will prefer the token that appears earlier in the grammar.
If there is an external scanner it may have [an additional impact](#other-external-scanner-details) over regular tokens defined in the grammar.
### Lexical Precedence vs. Parse Precedence
One common mistake involves not distinguishing *lexical precedence* from *parse precedence*. Parse precedence determines which rule is chosen to interpret a given sequence of tokens. *Lexical precedence* determines which token is chosen to interpret at a given position of text and it is a lower-level operation that is done first. The above list fully captures Tree-sitter's lexical precedence rules, and you will probably refer back to this section of the documentation more often than any other. Most of the time when you really get stuck, you're dealing with a lexical precedence problem. Pay particular attention to the difference in meaning between using `prec` inside of the `token` function versus outside of it. The *lexical precedence* syntax is `token(prec(N, ...))`.
### Keywords
Many languages have a set of *keyword* tokens (e.g. `if`, `for`, `return`), as well as a more general token (e.g. `identifier`) that matches any word, including many of the keyword strings. For example, JavaScript has a keyword `instanceof`, which is used as a binary operator, like this:
@ -568,6 +601,7 @@ Aside from improving error detection, keyword extraction also has performance be
### External Scanners
Many languages have some tokens whose structure is impossible or inconvenient to describe with a regular expression. Some examples:
* [Indent and dedent][indent-tokens] tokens in Python
* [Heredocs][heredoc] in Bash and Ruby
* [Percent strings][percent-string] in Ruby
@ -592,7 +626,7 @@ grammar({
Then, add another C or C++ source file to your project. Currently, its path must be `src/scanner.c` or `src/scanner.cc` for the CLI to recognize it. Be sure to add this file to the `sources` section of your `binding.gyp` file so that it will be included when your project is compiled by Node.js and uncomment the appropriate block in your `bindings/rust/build.rs` file so that it will be included in your Rust crate.
In this new source file, define an [`enum`][enum] type containing the names of all of your external tokens. The ordering of this enum must match the order in your grammar's `externals` array.
In this new source file, define an [`enum`][enum] type containing the names of all of your external tokens. The ordering of this enum must match the order in your grammar's `externals` array; the actual names do not matter.
```c
#include <tree_sitter/parser.h>
@ -616,7 +650,6 @@ void * tree_sitter_my_language_external_scanner_create() {
This function should create your scanner object. It will only be called once anytime your language is set on a parser. Often, you will want to allocate memory on the heap and return a pointer to it. If your external scanner doesn't need to maintain any state, it's ok to return `NULL`.
#### Destroy
```c
@ -655,6 +688,7 @@ void tree_sitter_my_language_external_scanner_deserialize(
```
This function should *restore* the state of your scanner based the bytes that were previously written by the `serialize` function. It is called with a pointer to your scanner, a pointer to the buffer of bytes, and the number of bytes that should be read.
It is good practice to explicitly erase your scanner state variables at the start of this function, before restoring their values from the byte buffer.
#### Scan
@ -672,15 +706,16 @@ This function is responsible for recognizing external tokens. It should return `
* **`int32_t lookahead`** - The current next character in the input stream, represented as a 32-bit unicode code point.
* **`TSSymbol result_symbol`** - The symbol that was recognized. Your scan function should *assign* to this field one of the values from the `TokenType` enum, described above.
* **`void (*advance)(TSLexer *, bool skip)`** - A function for advancing to the next character. If you pass `true` for the second argument, the current character will be treated as whitespace.
* **`void (*advance)(TSLexer *, bool skip)`** - A function for advancing to the next character. If you pass `true` for the second argument, the current character will be treated as whitespace; whitespace won't be included in the text range associated with tokens emitted by the external scanner.
* **`void (*mark_end)(TSLexer *)`** - A function for marking the end of the recognized token. This allows matching tokens that require multiple characters of lookahead. By default (if you don't call `mark_end`), any character that you moved past using the `advance` function will be included in the size of the token. But once you call `mark_end`, then any later calls to `advance` will *not* increase the size of the returned token. You can call `mark_end` multiple times to increase the size of the token.
* **`uint32_t (*get_column)(TSLexer *)`** - A function for querying the current column position of the lexer. It returns the number of codepoints since the start of the current line. The codepoint position is recalculated on every call to this function by reading from the start of the line.
* **`bool (*is_at_included_range_start)(TSLexer *)`** - A function for checking if the parser has just skipped some characters in the document. When parsing an embedded document using the `ts_parser_set_included_ranges` function (described in the [multi-language document section][multi-language-section]), your scanner may want to apply some special behavior when moving to a disjoint part of the document. For example, in [EJS documents][ejs], the JavaScript parser uses this function to enable inserting automatic semicolon tokens in between the code directives, delimited by `<%` and `%>`.
* **`bool (*is_at_included_range_start)(const TSLexer *)`** - A function for checking whether the parser has just skipped some characters in the document. When parsing an embedded document using the `ts_parser_set_included_ranges` function (described in the [multi-language document section][multi-language-section]), the scanner may want to apply some special behavior when moving to a disjoint part of the document. For example, in [EJS documents][ejs], the JavaScript parser uses this function to enable inserting automatic semicolon tokens in between the code directives, delimited by `<%` and `%>`.
* **`bool (*eof)(const TSLexer *)`** - A function for determining whether the lexer is at the end of the file. The value of `lookahead` will be `0` at the end of a file, but this function should be used instead of checking for that value because the `0` or "NUL" value is also a valid character that could be present in the file being parsed.
The third argument to the `scan` function is an array of booleans that indicates which of your external tokens are currently expected by the parser. You should only look for a given token if it is valid according to this array. At the same time, you cannot backtrack, so you may need to combine certain pieces of logic.
The third argument to the `scan` function is an array of booleans that indicates which of external tokens are currently expected by the parser. You should only look for a given token if it is valid according to this array. At the same time, you cannot backtrack, so you may need to combine certain pieces of logic.
```c
if (valid_symbols[INDENT] || valid_symbol[DEDENT]) {
if (valid_symbols[INDENT] || valid_symbols[DEDENT]) {
// ... logic that is common to both `INDENT` and `DEDENT`
@ -694,8 +729,23 @@ if (valid_symbols[INDENT] || valid_symbol[DEDENT]) {
}
```
#### Other External Scanner Details
If a token in the `externals` array is valid at a given position in the parse, the external scanner will be called first before anything else is done. This means the external scanner functions as a powerful override of Tree-sitter's lexing behavior, and can be used to solve problems that can't be cracked with ordinary lexical, parse, or dynamic precedence.
If a syntax error is encountered during regular parsing, Tree-sitter's first action during error recovery will be to call the external scanner's `scan` function with all tokens marked valid. The scanner should detect this case and handle it appropriately. One simple method of detection is to add an unused token to the end of the `externals` array, for example `externals: $ => [$.token1, $.token2, $.error_sentinel]`, then check whether that token is marked valid to determine whether Tree-sitter is in error correction mode.
If you put terminal keywords in the `externals` array, for example `externals: $ => ['if', 'then', 'else']`, then any time those terminals are present in the grammar they will be tokenized by the external scanner. It is similar to writing `externals: [$.if_keyword, $.then_keyword, $.else_keyword]` then using `alias($.if_keyword, 'if')` in the grammar.
If in the `externals` array use literal keywords then lexing works in two steps, the external scanner will be called first and if it sets a resulting token and returns `true` then the token considered as recognized and Tree-sitter moves to a next token. But the external scanner may return `false` and in this case Tree-sitter fallbacks to the internal lexing mechanism.
In case of some keywords defined in the `externals` array in a rule referencing form like `$.if_keyword` and there is no additional definition of that rule in the grammar rules, e.g., `if_keyword: $ => 'if'` then fallback to the internal lexer isn't possible because Tree-sitter doesn't know the actual keyword and it's fully the external scanner resposibilty to recognize such tokens.
External scanners are a common cause of infinite loops.
Be very careful when emitting zero-width tokens from your external scanner, and if you consume characters in a loop be sure use the `eof` function to check whether you are at the end of the file.
[ambiguous-grammar]: https://en.wikipedia.org/wiki/Ambiguous_grammar
[antlr]: http://www.antlr.org/
[antlr]: https://www.antlr.org
[bison-dprec]: https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html
[bison]: https://en.wikipedia.org/wiki/GNU_bison
[c-linkage]: https://en.cppreference.com/w/cpp/language/language_linkage

View file

@ -9,8 +9,6 @@ Syntax highlighting is a very common feature in applications that deal with code
This document explains how the Tree-sitter syntax highlighting system works, using the command line interface. If you are using `tree-sitter-highlight` library (either from C or from Rust), all of these concepts are still applicable, but the configuration data is provided using in-memory objects, rather than files.
**Note - If you are working on syntax highlighting in the [Atom](https://atom.io/) text editor, you should consult [the grammar-creation page](https://flight-manual.atom.io/hacking-atom/sections/creating-a-grammar/) of the Atom Flight Manual, *not* this document. Atom currently uses a different syntax highlighting system that is also based on Tree-sitter, but is older than the one described here.**
## Overview
All of the files needed to highlight a given language are normally included in the same git repository as the Tree-sitter grammar for that language (for example, [`tree-sitter-javascript`](https://github.com/tree-sitter/tree-sitter-javascript), [`tree-sitter-ruby`](https://github.com/tree-sitter/tree-sitter-ruby)). In order to run syntax highlighting from the command-line, three types of files are needed:
@ -27,9 +25,9 @@ The Tree-sitter CLI automatically creates two directories in your home folder.
These directories are created in the "normal" place for your platform:
- On Linux, `~/.config/tree-sitter` and `~/.cache/tree-sitter`
- On Mac, `~/Library/Application Support/tree-sitter` and `~/Library/Caches/tree-sitter`
- On Windows, `C:\Users\[username]\AppData\Roaming\tree-sitter` and `C:\Users\[username]\AppData\Local\tree-sitter`
* On Linux, `~/.config/tree-sitter` and `~/.cache/tree-sitter`
* On Mac, `~/Library/Application Support/tree-sitter` and `~/Library/Caches/tree-sitter`
* On Windows, `C:\Users\[username]\AppData\Roaming\tree-sitter` and `C:\Users\[username]\AppData\Local\tree-sitter`
The CLI will work if there's no config file present, falling back on default values for each configuration option. To create a config file that you can edit, run this command:
@ -63,6 +61,7 @@ In your config file, the `"theme"` value is an object whose keys are dot-separat
#### Highlight Names
A theme can contain multiple keys that share a common subsequence. Examples:
* `variable` and `variable.parameter`
* `function`, `function.builtin`, and `function.method`
@ -160,7 +159,7 @@ func increment(a int) int {
With this syntax tree:
```
```scheme
(source_file
(function_declaration
name: (identifier)
@ -180,6 +179,7 @@ With this syntax tree:
#### Example Query
Suppose we wanted to render this code with the following colors:
* keywords `func` and `return` in purple
* function `increment` in blue
* type `int` in green
@ -187,7 +187,7 @@ Suppose we wanted to render this code with the following colors:
We can assign each of these categories a *highlight name* using a query like this:
```
```scheme
; highlights.scm
"func" @keyword
@ -254,7 +254,7 @@ list = [item]
With this syntax tree:
```
```scheme
(program
(method
name: (identifier)
@ -297,7 +297,7 @@ There are several different types of names within this method:
Let's write some queries that let us clearly distinguish between these types of names. First, set up the highlighting query, as described in the previous section. We'll assign distinct colors to method calls, method definitions, and formal parameters:
```
```scheme
; highlights.scm
(call method: (identifier) @function.method)
@ -314,7 +314,7 @@ Let's write some queries that let us clearly distinguish between these types of
Then, we'll set up a local variable query to keep track of the variables and scopes. Here, we're indicating that methods and blocks create local *scopes*, parameters and assignments create *definitions*, and other identifiers should be considered *references*:
```
```scheme
; locals.scm
(method) @local.scope
@ -347,6 +347,7 @@ Running `tree-sitter highlight` on this ruby file would produce output like this
### Language Injection
Some source files contain code written in multiple different languages. Examples include:
* HTML files, which can contain JavaScript inside of `<script>` tags and CSS inside of `<style>` tags
* [ERB](https://en.wikipedia.org/wiki/ERuby) files, which contain Ruby inside of `<% %>` tags, and HTML outside of those tags
* PHP files, which can contain HTML between the `<php` tags
@ -361,8 +362,18 @@ All of these examples can be modeled in terms of a *parent* syntax tree and one
The language injection behavior can also be configured by some properties associated with patterns:
* `injection.language` - can be used to hard-code the name of a specific language.
* `injection.combined` - indicates that *all* of the matching nodes in the tree should have their content parsed as *one* nested document.
* `injection.include-children` - indicates that the `@injection.content` node's *entire* text should be re-parsed, including the text of its child nodes. By default, child nodes' text will be *excluded* from the injected document.
* `injection.combined` - indicates that *all* of the matching nodes in the tree
should have their content parsed as *one* nested document.
* `injection.include-children` - indicates that the `@injection.content` node's
*entire* text should be re-parsed, including the text of its child nodes. By default,
child nodes' text will be *excluded* from the injected document.
* `injection.self` - indicates that the `@injection.content` node should be parsed
using the same language as the node itself. This is useful for cases where the
node's language is not known until runtime (e.g. via inheriting another language)
* `injection.parent` indicates that the `@injection.content` node should be parsed
using the same language as the node's parent language. This is only meant for injections
that need to refer back to the parent language to parse the node's text inside
the injected language.
#### Examples
@ -376,7 +387,7 @@ BASH
With this syntax tree:
```
```scheme
(program
(method_call
method: (identifier)
@ -390,7 +401,7 @@ With this syntax tree:
The following query would specify that the contents of the heredoc should be parsed using a language named "BASH" (because that is the text of the `heredoc_end` node):
```
```scheme
(heredoc_body
(heredoc_end) @injection.language) @injection.content
```
@ -398,7 +409,7 @@ The following query would specify that the contents of the heredoc should be par
You can also force the language using the `#set!` predicate.
For example, this will force the language to be always `ruby`.
```
```scheme
((heredoc_body) @injection.content
(#set! injection.language "ruby"))
```
@ -427,6 +438,9 @@ var abc = function(d) {
// ^ string
// ^ variable
}
baz();
^ !variable
};
```
@ -437,3 +451,5 @@ From the Sublime text docs:
> **Caret**: ^ this will test the following selector against the scope on the most recent non-test line. It will test it at the same column the ^ is in. Consecutive ^s will test each column against the selector.
>
> **Arrow**: <- this will test the following selector against the scope on the most recent non-test line. It will test it at the same column as the comment character is in.
Note that an exclamation mark (`!`) can be used to negate a selector. For example, `!keyword` will match any scope that is not the `keyword` class.

View file

@ -13,7 +13,7 @@ syntax trees up-to-date as the source code changes. `libtree-sitter` is designed
The CLI is
used to generate a parser for a language by supplying a [context-free grammar](https://en.wikipedia.org/wiki/Context-free_grammar) describing the
language. The CLI is a build tool; it is no longer needed once a parser has been generated. It is written in Rust, and is available on [crates.io](https://crates.io), [npm](http://npmjs.com), and as a pre-built binary [on GitHub](https://github.com/tree-sitter/tree-sitter/releases/latest).
language. The CLI is a build tool; it is no longer needed once a parser has been generated. It is written in Rust, and is available on [crates.io](https://crates.io), [npm](https://npmjs.com), and as a pre-built binary [on GitHub](https://github.com/tree-sitter/tree-sitter/releases/latest).
## The CLI
@ -21,7 +21,7 @@ The `tree-sitter` CLI's most important feature is the `generate` subcommand. Thi
### Parsing a Grammar
First, Tree-sitter must must evaluate the JavaScript code in `grammar.js` and convert the grammar to a JSON format. It does this by shelling out to `node`. The format of the grammars is formally specified by the JSON schema in [grammar-schema.json](https://github.com/tree-sitter/tree-sitter/blob/master/cli/src/generate/grammar-schema.json). The parsing is implemented in [parse_grammar.rs](https://github.com/tree-sitter/tree-sitter/blob/master/cli/src/generate/parse_grammar.rs).
First, Tree-sitter must evaluate the JavaScript code in `grammar.js` and convert the grammar to a JSON format. It does this by shelling out to `node`. The format of the grammars is formally specified by the JSON schema in [grammar-schema.json](https://github.com/tree-sitter/tree-sitter/blob/master/cli/src/generate/grammar-schema.json). The parsing is implemented in [parse_grammar.rs](https://github.com/tree-sitter/tree-sitter/blob/master/cli/src/generate/parse_grammar.rs).
### Grammar Rules
@ -35,8 +35,6 @@ At the end of these transformations, the initial grammar is split into two gramm
### Building Parse Tables
## The Runtime
WIP

View file

@ -96,18 +96,18 @@ script/test -l javascript -e Arrays
The main [`tree-sitter/tree-sitter`](https://github.com/tree-sitter/tree-sitter) repository contains the source code for several packages that are published to package registries for different languages:
- Rust crates on [crates.io](https://crates.io):
- [`tree-sitter`](https://crates.io/crates/tree-sitter) - A Rust binding to the core library
- [`tree-sitter-highlight`](https://crates.io/crates/tree-sitter-highlight) - The syntax-highlighting library
- [`tree-sitter-cli`](https://crates.io/crates/tree-sitter-cli) - The command-line tool
- JavaScript modules on [npmjs.com](https://npmjs.com):
- [`web-tree-sitter`](https://www.npmjs.com/package/web-tree-sitter) - A WASM-based JavaScript binding to the core library
- [`tree-sitter-cli`](https://www.npmjs.com/package/tree-sitter-cli) - The command-line tool
* Rust crates on [crates.io](https://crates.io):
* [`tree-sitter`](https://crates.io/crates/tree-sitter) - A Rust binding to the core library
* [`tree-sitter-highlight`](https://crates.io/crates/tree-sitter-highlight) - The syntax-highlighting library
* [`tree-sitter-cli`](https://crates.io/crates/tree-sitter-cli) - The command-line tool
* JavaScript modules on [npmjs.com](https://npmjs.com):
* [`web-tree-sitter`](https://www.npmjs.com/package/web-tree-sitter) - A WASM-based JavaScript binding to the core library
* [`tree-sitter-cli`](https://www.npmjs.com/package/tree-sitter-cli) - The command-line tool
There are also several other dependent repositories that contain other published packages:
- [`tree-sitter/node-tree-sitter`](https://github.com/tree-sitter/node-tree-sitter) - Node.js bindings to the core library, published as [`tree-sitter`](https://www.npmjs.com/package/tree-sitter) on npmjs.com
- [`tree-sitter/py-tree-sitter`](https://github.com/tree-sitter/py-tree-sitter) - Python bindings to the core library, published as [`tree-sitter`](https://pypi.org/project/tree-sitter) on [PyPI.org](https://pypi.org).
* [`tree-sitter/node-tree-sitter`](https://github.com/tree-sitter/node-tree-sitter) - Node.js bindings to the core library, published as [`tree-sitter`](https://www.npmjs.com/package/tree-sitter) on npmjs.com
* [`tree-sitter/py-tree-sitter`](https://github.com/tree-sitter/py-tree-sitter) - Python bindings to the core library, published as [`tree-sitter`](https://pypi.org/project/tree-sitter) on [PyPI.org](https://pypi.org).
## Publishing New Releases

View file

@ -9,7 +9,7 @@ Tree-sitter can be used in conjunction with its [tree query language](https://tr
## Tagging and captures
*Tagging* is the act of identifying the entities that can be named in a program. We use Tree-sitter queries to find those entities. Having found them, you use a syntax capture to label the entity and its name.
_Tagging_ is the act of identifying the entities that can be named in a program. We use Tree-sitter queries to find those entities. Having found them, you use a syntax capture to label the entity and its name.
The essence of a given tag lies in two pieces of data: the _role_ of the entity that is matched (i.e. whether it is a definition or a reference) and the _kind_ of that entity, which describes how the entity is used (i.e. whether it's a class definition, function call, variable reference, and so on). Our convention is to use a syntax capture following the `@role.kind` capture name format, and another inner capture, always called `@name`, that pulls out the name of a given identifier.
@ -19,14 +19,14 @@ You may optionally include a capture named `@doc` to bind a docstring. For conve
This [query](https://github.com/tree-sitter/tree-sitter-python/blob/78c4e9b6b2f08e1be23b541ffced47b15e2972ad/queries/tags.scm#L4-L5) recognizes Python function definitions and captures their declared name. The `function_definition` syntax node is defined in the [Python Tree-sitter grammar](https://github.com/tree-sitter/tree-sitter-python/blob/78c4e9b6b2f08e1be23b541ffced47b15e2972ad/grammar.js#L354).
``` scheme
```scheme
(function_definition
name: (identifier) @name) @definition.function
```
A more sophisticated query can be found in the [JavaScript Tree-sitter repository](https://github.com/tree-sitter/tree-sitter-javascript/blob/fdeb68ac8d2bd5a78b943528bb68ceda3aade2eb/queries/tags.scm#L63-L70):
``` scheme
```scheme
(assignment_expression
left: [
(identifier) @name
@ -39,7 +39,7 @@ A more sophisticated query can be found in the [JavaScript Tree-sitter repositor
An even more sophisticated query is in the [Ruby Tree-sitter repository](https://github.com/tree-sitter/tree-sitter-ruby/blob/1ebfdb288842dae5a9233e2509a135949023dd82/queries/tags.scm#L24-L43), which uses built-in functions to strip the Ruby comment character (`#`) from the docstrings associated with a class or singleton-class declaration, then selects only the docstrings adjacent to the node matched as `@definition.class`.
``` scheme
```scheme
(
(comment)* @doc
.
@ -79,7 +79,7 @@ The below table describes a standard vocabulary for kinds and roles during the t
You can use the `tree-sitter tags` command to test out a tags query file, passing as arguments one or more files to tag. We can run this tool from within the Tree-sitter Ruby repository, over code in a file called `test.rb`:
``` ruby
```ruby
module Foo
class Bar
# won't be included
@ -93,7 +93,7 @@ end
Invoking `tree-sitter tags test.rb` produces the following console output, representing matched entities' name, role, location, first line, and docstring:
```
```text
test.rb
Foo | module def (0, 7) - (0, 10) `module Foo`
Bar | class def (1, 8) - (1, 11) `class Bar`

View file

@ -1,10 +1,10 @@
[package]
name = "tree-sitter-highlight"
description = "Library for performing syntax highlighting with Tree-sitter"
version = "0.20.1"
version = "0.20.2"
authors = [
"Max Brunsfeld <maxbrunsfeld@gmail.com>",
"Tim Clem <timothy.clem@gmail.com>",
"Max Brunsfeld <maxbrunsfeld@gmail.com>",
"Tim Clem <timothy.clem@gmail.com>",
]
license = "MIT"
readme = "README.md"
@ -12,13 +12,15 @@ edition = "2018"
keywords = ["incremental", "parsing", "syntax", "highlighting"]
categories = ["parsing", "text-editors"]
repository = "https://github.com/tree-sitter/tree-sitter"
rust-version.workspace = true
[lib]
crate-type = ["lib", "staticlib"]
[dependencies]
regex = "1"
thiserror = "1.0"
lazy_static = "1.4.0"
regex = "1.9.1"
thiserror = "1.0.43"
[dependencies.tree-sitter]
version = "0.20"

View file

@ -1,8 +1,9 @@
# `tree-sitter-highlight`
# Tree-sitter Highlight
[![Build Status](https://travis-ci.org/tree-sitter/tree-sitter.svg?branch=master)](https://travis-ci.org/tree-sitter/tree-sitter)
[![Build status](https://ci.appveyor.com/api/projects/status/vtmbd6i92e97l55w/branch/master?svg=true)](https://ci.appveyor.com/project/maxbrunsfeld/tree-sitter/branch/master)
[![Crates.io](https://img.shields.io/crates/v/tree-sitter-highlight.svg)](https://crates.io/crates/tree-sitter-highlight)
[![crates.io badge]][crates.io]
[crates.io]: https://crates.io/crates/tree-sitter-highlight
[crates.io badge]: https://img.shields.io/crates/v/tree-sitter-highlight.svg?color=%23B48723
### Usage
@ -10,15 +11,14 @@ Add this crate, and the language-specific crates for whichever languages you wan
```toml
[dependencies]
tree-sitter-highlight = "0.19"
tree-sitter-html = "0.19"
tree-sitter-highlight = "^0.20"
tree-sitter-javascript = "0.19"
```
Define the list of highlight names that you will recognize:
```rust
let highlight_names = &[
let highlight_names = [
"attribute",
"constant",
"function.builtin",
@ -45,29 +45,21 @@ Create a highlighter. You need one of these for each thread that you're using fo
```rust
use tree_sitter_highlight::Highlighter;
let highlighter = Highlighter::new();
let mut highlighter = Highlighter::new();
```
Load some highlighting queries from the `queries` directory of some language repositories:
Load some highlighting queries from the `queries` directory of the language repository:
```rust
use tree_sitter_highlight::HighlightConfiguration;
let html_language = unsafe { tree_sitter_html() };
let javascript_language = unsafe { tree_sitter_javascript() };
let javascript_language = tree_sitter_javascript::language();
let html_config = HighlightConfiguration::new(
tree_sitter_html::language(),
tree_sitter_html::HIGHLIGHTS_QUERY,
tree_sitter_html::INJECTIONS_QUERY,
"",
).unwrap();
let javascript_config = HighlightConfiguration::new(
tree_sitter_javascript::language(),
tree_sitter_javascript::HIGHLIGHTS_QUERY,
tree_sitter_javascript::INJECTIONS_QUERY,
tree_sitter_javascript::LCOALS_QUERY,
let mut javascript_config = HighlightConfiguration::new(
javascript_language,
tree_sitter_javascript::HIGHLIGHT_QUERY,
tree_sitter_javascript::INJECTION_QUERY,
tree_sitter_javascript::LOCALS_QUERY,
).unwrap();
```

View file

@ -48,7 +48,8 @@ TSHighlightError ts_highlighter_add_language(
const char *locals_query,
uint32_t highlight_query_len,
uint32_t injection_query_len,
uint32_t locals_query_len
uint32_t locals_query_len,
bool apply_all_captures
);
// Compute syntax highlighting for a given document. You must first

View file

@ -29,25 +29,30 @@ pub enum ErrorCode {
InvalidUtf8,
InvalidRegex,
InvalidQuery,
InvalidLanguageName,
}
/// Create a new [`TSHighlighter`] instance.
///
/// # Safety
///
/// The caller must ensure that the `highlight_names` and `attribute_strings` arrays are valid for
/// the lifetime of the returned [`TSHighlighter`] instance, and are non-null.
#[no_mangle]
pub extern "C" fn ts_highlighter_new(
pub unsafe extern "C" fn ts_highlighter_new(
highlight_names: *const *const c_char,
attribute_strings: *const *const c_char,
highlight_count: u32,
) -> *mut TSHighlighter {
let highlight_names =
unsafe { slice::from_raw_parts(highlight_names, highlight_count as usize) };
let attribute_strings =
unsafe { slice::from_raw_parts(attribute_strings, highlight_count as usize) };
let highlight_names = slice::from_raw_parts(highlight_names, highlight_count as usize);
let attribute_strings = slice::from_raw_parts(attribute_strings, highlight_count as usize);
let highlight_names = highlight_names
.into_iter()
.map(|s| unsafe { CStr::from_ptr(*s).to_string_lossy().to_string() })
.iter()
.map(|s| CStr::from_ptr(*s).to_string_lossy().to_string())
.collect::<Vec<_>>();
let attribute_strings = attribute_strings
.into_iter()
.map(|s| unsafe { CStr::from_ptr(*s).to_bytes() })
.iter()
.map(|s| CStr::from_ptr(*s).to_bytes())
.collect();
let carriage_return_index = highlight_names.iter().position(|s| s == "carriage-return");
Box::into_raw(Box::new(TSHighlighter {
@ -58,9 +63,21 @@ pub extern "C" fn ts_highlighter_new(
}))
}
/// Add a language to a [`TSHighlighter`] instance.
///
/// Returns an [`ErrorCode`] indicating whether the language was added successfully or not.
///
/// # Safety
///
/// `this` must be non-null and must be a valid pointer to a [`TSHighlighter`] instance
/// created by [`ts_highlighter_new`].
///
/// The caller must ensure that any `*const c_char` (C-style string) parameters are valid for the lifetime of
/// the [`TSHighlighter`] instance, and are non-null.
#[no_mangle]
pub extern "C" fn ts_highlighter_add_language(
pub unsafe extern "C" fn ts_highlighter_add_language(
this: *mut TSHighlighter,
language_name: *const c_char,
scope_name: *const c_char,
injection_regex: *const c_char,
language: Language,
@ -70,10 +87,11 @@ pub extern "C" fn ts_highlighter_add_language(
highlight_query_len: u32,
injection_query_len: u32,
locals_query_len: u32,
apply_all_captures: bool,
) -> ErrorCode {
let f = move || {
let this = unwrap_mut_ptr(this);
let scope_name = unsafe { CStr::from_ptr(scope_name) };
let scope_name = CStr::from_ptr(scope_name);
let scope_name = scope_name
.to_str()
.or(Err(ErrorCode::InvalidUtf8))?
@ -81,38 +99,45 @@ pub extern "C" fn ts_highlighter_add_language(
let injection_regex = if injection_regex.is_null() {
None
} else {
let pattern = unsafe { CStr::from_ptr(injection_regex) };
let pattern = CStr::from_ptr(injection_regex);
let pattern = pattern.to_str().or(Err(ErrorCode::InvalidUtf8))?;
Some(Regex::new(pattern).or(Err(ErrorCode::InvalidRegex))?)
};
let highlight_query = unsafe {
slice::from_raw_parts(highlight_query as *const u8, highlight_query_len as usize)
};
let highlight_query =
slice::from_raw_parts(highlight_query as *const u8, highlight_query_len as usize);
let highlight_query = str::from_utf8(highlight_query).or(Err(ErrorCode::InvalidUtf8))?;
let injection_query = if injection_query_len > 0 {
let query = unsafe {
slice::from_raw_parts(injection_query as *const u8, injection_query_len as usize)
};
let query =
slice::from_raw_parts(injection_query as *const u8, injection_query_len as usize);
str::from_utf8(query).or(Err(ErrorCode::InvalidUtf8))?
} else {
""
};
let locals_query = if locals_query_len > 0 {
let query = unsafe {
slice::from_raw_parts(locals_query as *const u8, locals_query_len as usize)
};
let query = slice::from_raw_parts(locals_query as *const u8, locals_query_len as usize);
str::from_utf8(query).or(Err(ErrorCode::InvalidUtf8))?
} else {
""
};
let mut config =
HighlightConfiguration::new(language, highlight_query, injection_query, locals_query)
.or(Err(ErrorCode::InvalidQuery))?;
config.configure(&this.highlight_names.as_slice());
let lang = CStr::from_ptr(language_name)
.to_str()
.or(Err(ErrorCode::InvalidLanguageName))?;
let mut config = HighlightConfiguration::new(
language,
lang,
highlight_query,
injection_query,
locals_query,
apply_all_captures,
)
.or(Err(ErrorCode::InvalidQuery))?;
config.configure(this.highlight_names.as_slice());
this.languages.insert(scope_name, (injection_regex, config));
Ok(())
@ -132,42 +157,102 @@ pub extern "C" fn ts_highlight_buffer_new() -> *mut TSHighlightBuffer {
}))
}
/// Deletes a [`TSHighlighter`] instance.
///
/// # Safety
///
/// `this` must be non-null and must be a valid pointer to a [`TSHighlighter`] instance
/// created by [`ts_highlighter_new`].
///
/// It cannot be used after this function is called.
#[no_mangle]
pub extern "C" fn ts_highlighter_delete(this: *mut TSHighlighter) {
drop(unsafe { Box::from_raw(this) })
pub unsafe extern "C" fn ts_highlighter_delete(this: *mut TSHighlighter) {
drop(Box::from_raw(this))
}
/// Deletes a [`TSHighlightBuffer`] instance.
///
/// # Safety
///
/// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance
/// created by [`ts_highlight_buffer_new`]
///
/// It cannot be used after this function is called.
#[no_mangle]
pub extern "C" fn ts_highlight_buffer_delete(this: *mut TSHighlightBuffer) {
drop(unsafe { Box::from_raw(this) })
pub unsafe extern "C" fn ts_highlight_buffer_delete(this: *mut TSHighlightBuffer) {
drop(Box::from_raw(this))
}
/// Get the HTML content of a [`TSHighlightBuffer`] instance as a raw pointer.
///
/// # Safety
///
/// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance
/// created by [`ts_highlight_buffer_new`].
///
/// The returned pointer, a C-style string, must not outlive the [`TSHighlightBuffer`] instance, else the
/// data will point to garbage.
///
/// To get the length of the HTML content, use [`ts_highlight_buffer_len`].
#[no_mangle]
pub extern "C" fn ts_highlight_buffer_content(this: *const TSHighlightBuffer) -> *const u8 {
pub unsafe extern "C" fn ts_highlight_buffer_content(this: *const TSHighlightBuffer) -> *const u8 {
let this = unwrap_ptr(this);
this.renderer.html.as_slice().as_ptr()
}
/// Get the line offsets of a [`TSHighlightBuffer`] instance as a C-style array.
///
/// # Safety
///
/// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance
/// created by [`ts_highlight_buffer_new`].
///
/// The returned pointer, a C-style array of [`u32`]s, must not outlive the [`TSHighlightBuffer`] instance, else the
/// data will point to garbage.
///
/// To get the length of the array, use [`ts_highlight_buffer_line_count`].
#[no_mangle]
pub extern "C" fn ts_highlight_buffer_line_offsets(this: *const TSHighlightBuffer) -> *const u32 {
pub unsafe extern "C" fn ts_highlight_buffer_line_offsets(
this: *const TSHighlightBuffer,
) -> *const u32 {
let this = unwrap_ptr(this);
this.renderer.line_offsets.as_slice().as_ptr()
}
/// Get the length of the HTML content of a [`TSHighlightBuffer`] instance.
///
/// # Safety
///
/// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance
/// created by [`ts_highlight_buffer_new`].
#[no_mangle]
pub extern "C" fn ts_highlight_buffer_len(this: *const TSHighlightBuffer) -> u32 {
pub unsafe extern "C" fn ts_highlight_buffer_len(this: *const TSHighlightBuffer) -> u32 {
let this = unwrap_ptr(this);
this.renderer.html.len() as u32
}
/// Get the number of lines in a [`TSHighlightBuffer`] instance.
///
/// # Safety
///
/// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance
/// created by [`ts_highlight_buffer_new`].
#[no_mangle]
pub extern "C" fn ts_highlight_buffer_line_count(this: *const TSHighlightBuffer) -> u32 {
pub unsafe extern "C" fn ts_highlight_buffer_line_count(this: *const TSHighlightBuffer) -> u32 {
let this = unwrap_ptr(this);
this.renderer.line_offsets.len() as u32
}
/// Highlight a string of source code.
///
/// # Safety
///
/// The caller must ensure that `scope_name`, `source_code`, `output`, and `cancellation_flag` are valid for
/// the lifetime of the [`TSHighlighter`] instance, and are non-null.
///
/// `this` must be a non-null pointer to a [`TSHighlighter`] instance created by [`ts_highlighter_new`]
#[no_mangle]
pub extern "C" fn ts_highlighter_highlight(
pub unsafe extern "C" fn ts_highlighter_highlight(
this: *const TSHighlighter,
scope_name: *const c_char,
source_code: *const c_char,
@ -177,10 +262,9 @@ pub extern "C" fn ts_highlighter_highlight(
) -> ErrorCode {
let this = unwrap_ptr(this);
let output = unwrap_mut_ptr(output);
let scope_name = unwrap(unsafe { CStr::from_ptr(scope_name).to_str() });
let source_code =
unsafe { slice::from_raw_parts(source_code as *const u8, source_code_len as usize) };
let cancellation_flag = unsafe { cancellation_flag.as_ref() };
let scope_name = unwrap(CStr::from_ptr(scope_name).to_str());
let source_code = slice::from_raw_parts(source_code as *const u8, source_code_len as usize);
let cancellation_flag = cancellation_flag.as_ref();
this.highlight(source_code, scope_name, output, cancellation_flag)
}
@ -225,15 +309,8 @@ impl TSHighlighter {
.renderer
.render(highlights, source_code, &|s| self.attribute_strings[s.0]);
match result {
Err(Error::Cancelled) => {
return ErrorCode::Timeout;
}
Err(Error::InvalidLanguage) => {
return ErrorCode::InvalidLanguage;
}
Err(Error::Unknown) => {
return ErrorCode::Timeout;
}
Err(Error::Cancelled) | Err(Error::Unknown) => ErrorCode::Timeout,
Err(Error::InvalidLanguage) => ErrorCode::InvalidLanguage,
Ok(()) => ErrorCode::Ok,
}
} else {
@ -242,15 +319,15 @@ impl TSHighlighter {
}
}
fn unwrap_ptr<'a, T>(result: *const T) -> &'a T {
unsafe { result.as_ref() }.unwrap_or_else(|| {
unsafe fn unwrap_ptr<'a, T>(result: *const T) -> &'a T {
result.as_ref().unwrap_or_else(|| {
eprintln!("{}:{} - pointer must not be null", file!(), line!());
abort();
})
}
fn unwrap_mut_ptr<'a, T>(result: *mut T) -> &'a mut T {
unsafe { result.as_mut() }.unwrap_or_else(|| {
unsafe fn unwrap_mut_ptr<'a, T>(result: *mut T) -> &'a mut T {
result.as_mut().unwrap_or_else(|| {
eprintln!("{}:{} - pointer must not be null", file!(), line!());
abort();
})

View file

@ -1,7 +1,11 @@
#![doc = include_str!("../README.md")]
pub mod c_lib;
pub mod util;
pub use c_lib as c;
use lazy_static::lazy_static;
use std::collections::HashSet;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::{iter, mem, ops, str, usize};
use thiserror::Error;
@ -14,6 +18,65 @@ const CANCELLATION_CHECK_INTERVAL: usize = 100;
const BUFFER_HTML_RESERVE_CAPACITY: usize = 10 * 1024;
const BUFFER_LINES_RESERVE_CAPACITY: usize = 1000;
lazy_static! {
static ref STANDARD_CAPTURE_NAMES: HashSet<&'static str> = vec![
"attribute",
"boolean",
"carriage-return",
"comment",
"comment.documentation",
"constant",
"constant.builtin",
"constructor",
"constructor.builtin",
"embedded",
"error",
"escape",
"function",
"function.builtin",
"keyword",
"markup",
"markup.bold",
"markup.heading",
"markup.italic",
"markup.link",
"markup.link.url",
"markup.list",
"markup.list.checked",
"markup.list.numbered",
"markup.list.unchecked",
"markup.list.unnumbered",
"markup.quote",
"markup.raw",
"markup.raw.block",
"markup.raw.inline",
"markup.strikethrough",
"module",
"number",
"operator",
"property",
"property.builtin",
"punctuation",
"punctuation.bracket",
"punctuation.delimiter",
"punctuation.special",
"string",
"string.escape",
"string.regexp",
"string.special",
"string.special.symbol",
"tag",
"type",
"type.builtin",
"variable",
"variable.builtin",
"variable.member",
"variable.parameter",
]
.into_iter()
.collect();
}
/// Indicates which highlight should be applied to a region of source code.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct Highlight(pub usize);
@ -42,7 +105,9 @@ pub enum HighlightEvent {
/// This struct is immutable and can be shared between threads.
pub struct HighlightConfiguration {
pub language: Language,
pub language_name: String,
pub query: Query,
pub apply_all_captures: bool,
combined_injections_query: Option<Query>,
locals_pattern_index: usize,
highlights_pattern_index: usize,
@ -92,6 +157,7 @@ where
F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
{
source: &'a [u8],
language_name: &'a str,
byte_offset: usize,
highlighter: &'a mut Highlighter,
injection_callback: F,
@ -100,12 +166,13 @@ where
iter_count: usize,
next_event: Option<HighlightEvent>,
last_highlight_range: Option<(usize, usize, usize)>,
apply_all_captures: bool,
}
struct HighlightIterLayer<'a> {
_tree: Tree,
cursor: QueryCursor,
captures: iter::Peekable<QueryCaptures<'a, 'a, &'a [u8]>>,
captures: iter::Peekable<QueryCaptures<'a, 'a, &'a [u8], &'a [u8]>>,
config: &'a HighlightConfiguration,
highlight_end_stack: Vec<usize>,
scope_stack: Vec<LocalScope<'a>>,
@ -135,6 +202,7 @@ impl Highlighter {
) -> Result<impl Iterator<Item = Result<HighlightEvent, Error>> + 'a, Error> {
let layers = HighlightIterLayer::new(
source,
None,
self,
cancellation_flag,
&mut injection_callback,
@ -150,14 +218,16 @@ impl Highlighter {
assert_ne!(layers.len(), 0);
let mut result = HighlightIter {
source,
language_name: &config.language_name,
byte_offset: 0,
injection_callback,
cancellation_flag,
highlighter: self,
iter_count: 0,
layers: layers,
layers,
next_event: None,
last_highlight_range: None,
apply_all_captures: config.apply_all_captures,
};
result.sort_layers();
Ok(result)
@ -181,9 +251,11 @@ impl HighlightConfiguration {
/// Returns a `HighlightConfiguration` that can then be used with the `highlight` method.
pub fn new(
language: Language,
name: impl Into<String>,
highlights_query: &str,
injection_query: &str,
locals_query: &str,
apply_all_captures: bool,
) -> Result<Self, QueryError> {
// Concatenate the query strings, keeping track of the start offset of each section.
let mut query_source = String::new();
@ -249,7 +321,7 @@ impl HighlightConfiguration {
let mut local_scope_capture_index = None;
for (i, name) in query.capture_names().iter().enumerate() {
let i = Some(i as u32);
match name.as_str() {
match *name {
"injection.content" => injection_content_capture_index = i,
"injection.language" => injection_language_capture_index = i,
"local.definition" => local_def_capture_index = i,
@ -263,7 +335,9 @@ impl HighlightConfiguration {
let highlight_indices = vec![None; query.capture_names().len()];
Ok(HighlightConfiguration {
language,
language_name: name.into(),
query,
apply_all_captures,
combined_injections_query,
locals_pattern_index,
highlights_pattern_index,
@ -279,7 +353,7 @@ impl HighlightConfiguration {
}
/// Get a slice containing all of the highlight names used in the configuration.
pub fn names(&self) -> &[String] {
pub fn names(&self) -> &[&str] {
self.query.capture_names()
}
@ -321,6 +395,22 @@ impl HighlightConfiguration {
best_index.map(Highlight)
}));
}
// Return the list of this configuration's capture names that are neither present in the
// list of predefined 'canonical' names nor start with an underscore (denoting 'private' captures
// used as part of capture internals).
pub fn nonconformant_capture_names(&self, capture_names: &HashSet<&str>) -> Vec<&str> {
let capture_names = if capture_names.is_empty() {
&*STANDARD_CAPTURE_NAMES
} else {
&capture_names
};
self.names()
.iter()
.filter(|&n| !(n.starts_with('_') || capture_names.contains(n)))
.map(|n| *n)
.collect()
}
}
impl<'a> HighlightIterLayer<'a> {
@ -331,6 +421,7 @@ impl<'a> HighlightIterLayer<'a> {
/// added to the returned vector.
fn new<F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a>(
source: &'a [u8],
parent_name: Option<&str>,
highlighter: &mut Highlighter,
cancellation_flag: Option<&'a AtomicUsize>,
injection_callback: &mut F,
@ -363,8 +454,13 @@ impl<'a> HighlightIterLayer<'a> {
cursor.matches(combined_injections_query, tree.root_node(), source);
for mat in matches {
let entry = &mut injections_by_pattern_index[mat.pattern_index];
let (language_name, content_node, include_children) =
injection_for_match(config, combined_injections_query, &mat, source);
let (language_name, content_node, include_children) = injection_for_match(
config,
parent_name,
combined_injections_query,
&mat,
source,
);
if language_name.is_some() {
entry.0 = language_name;
}
@ -685,8 +781,13 @@ where
// If this capture represents an injection, then process the injection.
if match_.pattern_index < layer.config.locals_pattern_index {
let (language_name, content_node, include_children) =
injection_for_match(&layer.config, &layer.config.query, &match_, &self.source);
let (language_name, content_node, include_children) = injection_for_match(
layer.config,
Some(self.language_name),
&layer.config.query,
&match_,
self.source,
);
// Explicitly remove this match so that none of its other captures will remain
// in the stream of captures.
@ -704,6 +805,7 @@ where
if !ranges.is_empty() {
match HighlightIterLayer::new(
self.source,
Some(self.language_name),
self.highlighter,
self.cancellation_flag,
&mut self.injection_callback,
@ -858,7 +960,13 @@ where
while let Some((next_match, next_capture_index)) = layer.captures.peek() {
let next_capture = next_match.captures[*next_capture_index];
if next_capture.node == capture.node {
layer.captures.next();
if self.apply_all_captures {
match_.remove();
capture = next_capture;
match_ = layer.captures.next().unwrap().0;
} else {
layer.captures.next();
}
} else {
break;
}
@ -1024,7 +1132,8 @@ impl HtmlRenderer {
}
fn injection_for_match<'a>(
config: &HighlightConfiguration,
config: &'a HighlightConfiguration,
parent_name: Option<&'a str>,
query: &'a Query,
query_match: &QueryMatch<'a, 'a>,
source: &'a [u8],
@ -1034,6 +1143,7 @@ fn injection_for_match<'a>(
let mut language_name = None;
let mut content_node = None;
for capture in query_match.captures {
let index = Some(capture.index);
if index == language_capture_index {
@ -1051,7 +1161,25 @@ fn injection_for_match<'a>(
// that sets the injection.language key.
"injection.language" => {
if language_name.is_none() {
language_name = prop.value.as_ref().map(|s| s.as_ref())
language_name = prop.value.as_ref().map(|s| s.as_ref());
}
}
// Setting the `injection.self` key can be used to specify that the
// language name should be the same as the language of the current
// layer.
"injection.self" => {
if language_name.is_none() {
language_name = Some(config.language_name.as_str());
}
}
// Setting the `injection.parent` key can be used to specify that
// the language name should be the same as the language of the
// parent layer
"injection.parent" => {
if language_name.is_none() {
language_name = parent_name;
}
}

Some files were not shown because too many files have changed in this diff Show more