Compare commits
1 commit
master
...
scanner-ra
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
588c1a5610 |
483 changed files with 22016 additions and 43785 deletions
|
|
@ -10,9 +10,6 @@ insert_final_newline = true
|
||||||
[*.rs]
|
[*.rs]
|
||||||
indent_size = 4
|
indent_size = 4
|
||||||
|
|
||||||
[*.{zig,zon}]
|
|
||||||
indent_size = 4
|
|
||||||
|
|
||||||
[Makefile]
|
[Makefile]
|
||||||
indent_style = tab
|
indent_style = tab
|
||||||
indent_size = 8
|
indent_size = 8
|
||||||
|
|
|
||||||
1
.envrc
1
.envrc
|
|
@ -1 +0,0 @@
|
||||||
use flake
|
|
||||||
1
.gitattributes
vendored
1
.gitattributes
vendored
|
|
@ -3,4 +3,5 @@
|
||||||
/lib/src/unicode/*.h linguist-vendored
|
/lib/src/unicode/*.h linguist-vendored
|
||||||
/lib/src/unicode/LICENSE linguist-vendored
|
/lib/src/unicode/LICENSE linguist-vendored
|
||||||
|
|
||||||
|
/cli/src/generate/prepare_grammar/*.json -diff
|
||||||
Cargo.lock -diff
|
Cargo.lock -diff
|
||||||
|
|
|
||||||
15
.github/FUNDING.yml
vendored
15
.github/FUNDING.yml
vendored
|
|
@ -1,15 +0,0 @@
|
||||||
# These are supported funding model platforms
|
|
||||||
|
|
||||||
github: tree-sitter
|
|
||||||
patreon: # Replace with a single Patreon username
|
|
||||||
open_collective: tree-sitter # Replace with a single Open Collective username
|
|
||||||
ko_fi: amaanq
|
|
||||||
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
|
|
||||||
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
|
|
||||||
liberapay: # Replace with a single Liberapay username
|
|
||||||
issuehunt: # Replace with a single IssueHunt username
|
|
||||||
lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
|
|
||||||
polar: # Replace with a single Polar username
|
|
||||||
buy_me_a_coffee: # Replace with a single Buy Me a Coffee username
|
|
||||||
thanks_dev: # Replace with a single thanks.dev username
|
|
||||||
custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
|
|
||||||
2
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
2
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
|
|
@ -1,6 +1,6 @@
|
||||||
name: Bug Report
|
name: Bug Report
|
||||||
description: Report a problem
|
description: Report a problem
|
||||||
type: Bug
|
labels: [bug]
|
||||||
body:
|
body:
|
||||||
- type: textarea
|
- type: textarea
|
||||||
attributes:
|
attributes:
|
||||||
|
|
|
||||||
2
.github/ISSUE_TEMPLATE/feature_request.yml
vendored
2
.github/ISSUE_TEMPLATE/feature_request.yml
vendored
|
|
@ -1,6 +1,6 @@
|
||||||
name: Feature request
|
name: Feature request
|
||||||
description: Request an enhancement
|
description: Request an enhancement
|
||||||
type: Feature
|
labels: [enhancement]
|
||||||
body:
|
body:
|
||||||
- type: markdown
|
- type: markdown
|
||||||
attributes:
|
attributes:
|
||||||
|
|
|
||||||
6
.github/actions/cache/action.yml
vendored
6
.github/actions/cache/action.yml
vendored
|
|
@ -17,9 +17,7 @@ runs:
|
||||||
test/fixtures/grammars
|
test/fixtures/grammars
|
||||||
target/release/tree-sitter-*.wasm
|
target/release/tree-sitter-*.wasm
|
||||||
key: fixtures-${{ join(matrix.*, '_') }}-${{ hashFiles(
|
key: fixtures-${{ join(matrix.*, '_') }}-${{ hashFiles(
|
||||||
'crates/generate/src/**',
|
'cli/generate/src/**',
|
||||||
'lib/src/parser.h',
|
'xtask/src/*',
|
||||||
'lib/src/array.h',
|
|
||||||
'lib/src/alloc.h',
|
|
||||||
'test/fixtures/grammars/*/**/src/*.c',
|
'test/fixtures/grammars/*/**/src/*.c',
|
||||||
'.github/actions/cache/action.yml') }}
|
'.github/actions/cache/action.yml') }}
|
||||||
|
|
|
||||||
4
.github/cliff.toml
vendored
4
.github/cliff.toml
vendored
|
|
@ -16,13 +16,13 @@ body = """
|
||||||
{% for commit in commits%}\
|
{% for commit in commits%}\
|
||||||
{% if not commit.scope %}\
|
{% if not commit.scope %}\
|
||||||
- {{ commit.message | upper_first }}\
|
- {{ commit.message | upper_first }}\
|
||||||
{% if commit.remote.pr_number %} (<https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }}/pull/{{ commit.remote.pr_number }}>){%- endif %}
|
{% if commit.github.pr_number %} (<https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }}/pull/{{ commit.github.pr_number }}>){%- endif %}
|
||||||
{% endif %}\
|
{% endif %}\
|
||||||
{% endfor %}\
|
{% endfor %}\
|
||||||
{% for group, commits in commits | group_by(attribute="scope") %}\
|
{% for group, commits in commits | group_by(attribute="scope") %}\
|
||||||
{% for commit in commits %}\
|
{% for commit in commits %}\
|
||||||
- **{{commit.scope}}**: {{ commit.message | upper_first }}\
|
- **{{commit.scope}}**: {{ commit.message | upper_first }}\
|
||||||
{% if commit.remote.pr_number %} (<https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }}/pull/{{ commit.remote.pr_number }}>){%- endif %}
|
{% if commit.github.pr_number %} (<https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }}/pull/{{ commit.github.pr_number }}>){%- endif %}
|
||||||
{% endfor %}\
|
{% endfor %}\
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
|
||||||
27
.github/dependabot.yml
vendored
27
.github/dependabot.yml
vendored
|
|
@ -4,8 +4,6 @@ updates:
|
||||||
directory: "/"
|
directory: "/"
|
||||||
schedule:
|
schedule:
|
||||||
interval: "weekly"
|
interval: "weekly"
|
||||||
cooldown:
|
|
||||||
default-days: 3
|
|
||||||
commit-message:
|
commit-message:
|
||||||
prefix: "build(deps)"
|
prefix: "build(deps)"
|
||||||
labels:
|
labels:
|
||||||
|
|
@ -14,16 +12,10 @@ updates:
|
||||||
groups:
|
groups:
|
||||||
cargo:
|
cargo:
|
||||||
patterns: ["*"]
|
patterns: ["*"]
|
||||||
ignore:
|
|
||||||
- dependency-name: "*"
|
|
||||||
update-types: ["version-update:semver-major", "version-update:semver-minor"]
|
|
||||||
|
|
||||||
- package-ecosystem: "github-actions"
|
- package-ecosystem: "github-actions"
|
||||||
directory: "/"
|
directory: "/"
|
||||||
schedule:
|
schedule:
|
||||||
interval: "weekly"
|
interval: "weekly"
|
||||||
cooldown:
|
|
||||||
default-days: 3
|
|
||||||
commit-message:
|
commit-message:
|
||||||
prefix: "ci"
|
prefix: "ci"
|
||||||
labels:
|
labels:
|
||||||
|
|
@ -32,22 +24,3 @@ updates:
|
||||||
groups:
|
groups:
|
||||||
actions:
|
actions:
|
||||||
patterns: ["*"]
|
patterns: ["*"]
|
||||||
|
|
||||||
- package-ecosystem: "npm"
|
|
||||||
versioning-strategy: increase
|
|
||||||
directories:
|
|
||||||
- "/crates/npm"
|
|
||||||
- "/crates/eslint"
|
|
||||||
- "/lib/binding_web"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
cooldown:
|
|
||||||
default-days: 3
|
|
||||||
commit-message:
|
|
||||||
prefix: "build(deps)"
|
|
||||||
labels:
|
|
||||||
- "dependencies"
|
|
||||||
- "npm"
|
|
||||||
groups:
|
|
||||||
npm:
|
|
||||||
patterns: ["*"]
|
|
||||||
|
|
|
||||||
29
.github/scripts/close_spam.js
vendored
29
.github/scripts/close_spam.js
vendored
|
|
@ -1,29 +0,0 @@
|
||||||
module.exports = async ({ github, context }) => {
|
|
||||||
let target = context.payload.issue;
|
|
||||||
if (target) {
|
|
||||||
await github.rest.issues.update({
|
|
||||||
...context.repo,
|
|
||||||
issue_number: target.number,
|
|
||||||
state: "closed",
|
|
||||||
state_reason: "not_planned",
|
|
||||||
title: "[spam]",
|
|
||||||
body: "",
|
|
||||||
type: null,
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
target = context.payload.pull_request;
|
|
||||||
await github.rest.pulls.update({
|
|
||||||
...context.repo,
|
|
||||||
pull_number: target.number,
|
|
||||||
state: "closed",
|
|
||||||
title: "[spam]",
|
|
||||||
body: "",
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
await github.rest.issues.lock({
|
|
||||||
...context.repo,
|
|
||||||
issue_number: target.number,
|
|
||||||
lock_reason: "spam",
|
|
||||||
});
|
|
||||||
};
|
|
||||||
3
.github/scripts/cross.sh
vendored
Executable file
3
.github/scripts/cross.sh
vendored
Executable file
|
|
@ -0,0 +1,3 @@
|
||||||
|
#!/bin/bash -eu
|
||||||
|
|
||||||
|
exec docker run --rm -v /home/runner:/home/runner -w "$PWD" "$CROSS_IMAGE" "$@"
|
||||||
9
.github/scripts/make.sh
vendored
Executable file
9
.github/scripts/make.sh
vendored
Executable file
|
|
@ -0,0 +1,9 @@
|
||||||
|
#!/bin/bash -eu
|
||||||
|
|
||||||
|
tree_sitter="$ROOT"/target/"$TARGET"/release/tree-sitter
|
||||||
|
|
||||||
|
if [[ $BUILD_CMD == cross ]]; then
|
||||||
|
cross.sh make CC="$CC" AR="$AR" "$@"
|
||||||
|
else
|
||||||
|
exec make "$@"
|
||||||
|
fi
|
||||||
9
.github/scripts/tree-sitter.sh
vendored
Executable file
9
.github/scripts/tree-sitter.sh
vendored
Executable file
|
|
@ -0,0 +1,9 @@
|
||||||
|
#!/bin/bash -eu
|
||||||
|
|
||||||
|
tree_sitter="$ROOT"/target/"$TARGET"/release/tree-sitter
|
||||||
|
|
||||||
|
if [[ $BUILD_CMD == cross ]]; then
|
||||||
|
cross.sh "$CROSS_RUNNER" "$tree_sitter" "$@"
|
||||||
|
else
|
||||||
|
exec "$tree_sitter" "$@"
|
||||||
|
fi
|
||||||
25
.github/scripts/wasm_stdlib.js
vendored
25
.github/scripts/wasm_stdlib.js
vendored
|
|
@ -1,25 +0,0 @@
|
||||||
module.exports = async ({ github, context, core }) => {
|
|
||||||
if (context.eventName !== 'pull_request') return;
|
|
||||||
|
|
||||||
const prNumber = context.payload.pull_request.number;
|
|
||||||
const owner = context.repo.owner;
|
|
||||||
const repo = context.repo.repo;
|
|
||||||
|
|
||||||
const { data: files } = await github.rest.pulls.listFiles({
|
|
||||||
owner,
|
|
||||||
repo,
|
|
||||||
pull_number: prNumber
|
|
||||||
});
|
|
||||||
|
|
||||||
const changedFiles = files.map(file => file.filename);
|
|
||||||
|
|
||||||
const wasmStdLibSrc = 'crates/language/wasm/';
|
|
||||||
const dirChanged = changedFiles.some(file => file.startsWith(wasmStdLibSrc));
|
|
||||||
|
|
||||||
if (!dirChanged) return;
|
|
||||||
|
|
||||||
const wasmStdLibHeader = 'lib/src/wasm/wasm-stdlib.h';
|
|
||||||
const requiredChanged = changedFiles.includes(wasmStdLibHeader);
|
|
||||||
|
|
||||||
if (!requiredChanged) core.setFailed(`Changes detected in ${wasmStdLibSrc} but ${wasmStdLibHeader} was not modified.`);
|
|
||||||
};
|
|
||||||
6
.github/workflows/backport.yml
vendored
6
.github/workflows/backport.yml
vendored
|
|
@ -14,17 +14,17 @@ jobs:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Create app token
|
- name: Create app token
|
||||||
uses: actions/create-github-app-token@v2
|
uses: actions/create-github-app-token@v1
|
||||||
id: app-token
|
id: app-token
|
||||||
with:
|
with:
|
||||||
app-id: ${{ vars.BACKPORT_APP }}
|
app-id: ${{ vars.BACKPORT_APP }}
|
||||||
private-key: ${{ secrets.BACKPORT_KEY }}
|
private-key: ${{ secrets.BACKPORT_KEY }}
|
||||||
|
|
||||||
- name: Create backport PR
|
- name: Create backport PR
|
||||||
uses: korthout/backport-action@v4
|
uses: korthout/backport-action@v3
|
||||||
with:
|
with:
|
||||||
pull_title: "${pull_title}"
|
pull_title: "${pull_title}"
|
||||||
label_pattern: "^ci:backport ([^ ]+)$"
|
label_pattern: "^ci:backport ([^ ]+)$"
|
||||||
|
|
|
||||||
8
.github/workflows/bindgen.yml
vendored
8
.github/workflows/bindgen.yml
vendored
|
|
@ -2,21 +2,15 @@ name: Check Bindgen Output
|
||||||
|
|
||||||
on:
|
on:
|
||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
|
||||||
- lib/include/tree_sitter/api.h
|
|
||||||
- lib/binding_rust/bindings.rs
|
|
||||||
push:
|
push:
|
||||||
branches: [master]
|
branches: [master]
|
||||||
paths:
|
|
||||||
- lib/include/tree_sitter/api.h
|
|
||||||
- lib/binding_rust/bindings.rs
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
check-bindgen:
|
check-bindgen:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Set up stable Rust toolchain
|
- name: Set up stable Rust toolchain
|
||||||
uses: actions-rust-lang/setup-rust-toolchain@v1
|
uses: actions-rust-lang/setup-rust-toolchain@v1
|
||||||
|
|
|
||||||
301
.github/workflows/build.yml
vendored
301
.github/workflows/build.yml
vendored
|
|
@ -1,5 +1,10 @@
|
||||||
name: Build & Test
|
name: Build & Test
|
||||||
|
|
||||||
|
env:
|
||||||
|
CARGO_TERM_COLOR: always
|
||||||
|
RUSTFLAGS: "-D warnings"
|
||||||
|
CROSS_DEBUG: 1
|
||||||
|
|
||||||
on:
|
on:
|
||||||
workflow_call:
|
workflow_call:
|
||||||
inputs:
|
inputs:
|
||||||
|
|
@ -26,41 +31,38 @@ jobs:
|
||||||
- windows-x86
|
- windows-x86
|
||||||
- macos-arm64
|
- macos-arm64
|
||||||
- macos-x64
|
- macos-x64
|
||||||
- wasm32
|
|
||||||
|
|
||||||
include:
|
include:
|
||||||
# When adding a new `target`:
|
# When adding a new `target`:
|
||||||
# 1. Define a new platform alias above
|
# 1. Define a new platform alias above
|
||||||
# 2. Add a new record to the matrix map in `crates/cli/npm/install.js`
|
# 2. Add a new record to the matrix map in `cli/npm/install.js`
|
||||||
- { platform: linux-arm64 , target: aarch64-unknown-linux-gnu , os: ubuntu-24.04-arm }
|
- { platform: linux-arm64 , target: aarch64-unknown-linux-gnu , os: ubuntu-latest , use-cross: true }
|
||||||
- { platform: linux-arm , target: armv7-unknown-linux-gnueabihf , os: ubuntu-24.04-arm }
|
- { platform: linux-arm , target: arm-unknown-linux-gnueabi , os: ubuntu-latest , use-cross: true }
|
||||||
- { platform: linux-x64 , target: x86_64-unknown-linux-gnu , os: ubuntu-24.04 }
|
- { platform: linux-x64 , target: x86_64-unknown-linux-gnu , os: ubuntu-20.04 , features: wasm } # See #2272
|
||||||
- { platform: linux-x86 , target: i686-unknown-linux-gnu , os: ubuntu-24.04 }
|
- { platform: linux-x86 , target: i686-unknown-linux-gnu , os: ubuntu-latest , use-cross: true }
|
||||||
- { platform: linux-powerpc64 , target: powerpc64-unknown-linux-gnu , os: ubuntu-24.04 }
|
- { platform: linux-powerpc64 , target: powerpc64-unknown-linux-gnu , os: ubuntu-latest , use-cross: true }
|
||||||
- { platform: windows-arm64 , target: aarch64-pc-windows-msvc , os: windows-11-arm }
|
- { platform: windows-arm64 , target: aarch64-pc-windows-msvc , os: windows-latest }
|
||||||
- { platform: windows-x64 , target: x86_64-pc-windows-msvc , os: windows-2025 }
|
- { platform: windows-x64 , target: x86_64-pc-windows-msvc , os: windows-latest , features: wasm }
|
||||||
- { platform: windows-x86 , target: i686-pc-windows-msvc , os: windows-2025 }
|
- { platform: windows-x86 , target: i686-pc-windows-msvc , os: windows-latest }
|
||||||
- { platform: macos-arm64 , target: aarch64-apple-darwin , os: macos-15 }
|
- { platform: macos-arm64 , target: aarch64-apple-darwin , os: macos-14 , features: wasm }
|
||||||
- { platform: macos-x64 , target: x86_64-apple-darwin , os: macos-15-intel }
|
- { platform: macos-x64 , target: x86_64-apple-darwin , os: macos-13 , features: wasm }
|
||||||
- { platform: wasm32 , target: wasm32-unknown-unknown , os: ubuntu-24.04 }
|
|
||||||
|
|
||||||
# Extra features
|
# Cross compilers for C library
|
||||||
- { platform: linux-arm64 , features: wasm }
|
- { platform: linux-arm64 , cc: aarch64-linux-gnu-gcc , ar: aarch64-linux-gnu-ar }
|
||||||
- { platform: linux-x64 , features: wasm }
|
- { platform: linux-arm , cc: arm-linux-gnueabi-gcc , ar: arm-linux-gnueabi-ar }
|
||||||
- { platform: macos-arm64 , features: wasm }
|
- { platform: linux-x86 , cc: i686-linux-gnu-gcc , ar: i686-linux-gnu-ar }
|
||||||
- { platform: macos-x64 , features: wasm }
|
- { platform: linux-powerpc64 , cc: powerpc64-linux-gnu-gcc , ar: powerpc64-linux-gnu-ar }
|
||||||
|
|
||||||
# Cross-compilation
|
# Prevent race condition (see #2041)
|
||||||
- { platform: linux-arm , cross: true }
|
- { platform: windows-x64 , rust-test-threads: 1 }
|
||||||
- { platform: linux-x86 , cross: true }
|
- { platform: windows-x86 , rust-test-threads: 1 }
|
||||||
- { platform: linux-powerpc64 , cross: true }
|
|
||||||
|
|
||||||
# Compile-only
|
# Can't natively run CLI on Github runner's host
|
||||||
- { platform: wasm32 , no-run: true }
|
- { platform: windows-arm64 , no-run: true }
|
||||||
|
|
||||||
env:
|
env:
|
||||||
CARGO_TERM_COLOR: always
|
BUILD_CMD: cargo
|
||||||
RUSTFLAGS: -D warnings
|
SUFFIX: ${{ contains(matrix.target, 'windows') && '.exe' || '' }}
|
||||||
|
|
||||||
defaults:
|
defaults:
|
||||||
run:
|
run:
|
||||||
|
|
@ -68,28 +70,13 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Set up cross-compilation
|
- name: Read Emscripten version
|
||||||
if: matrix.cross
|
run: printf 'EMSCRIPTEN_VERSION=%s\n' "$(<cli/loader/emscripten-version)" >> $GITHUB_ENV
|
||||||
run: |
|
|
||||||
for target in armv7-unknown-linux-gnueabihf i686-unknown-linux-gnu powerpc64-unknown-linux-gnu; do
|
|
||||||
camel_target=${target//-/_}; target_cc=${target/-unknown/}
|
|
||||||
printf 'CC_%s=%s\n' "$camel_target" "${target_cc/v7/}-gcc"
|
|
||||||
printf 'AR_%s=%s\n' "$camel_target" "${target_cc/v7/}-ar"
|
|
||||||
printf 'CARGO_TARGET_%s_LINKER=%s\n' "${camel_target^^}" "${target_cc/v7/}-gcc"
|
|
||||||
done >> $GITHUB_ENV
|
|
||||||
{
|
|
||||||
printf 'CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER=qemu-arm -L /usr/arm-linux-gnueabihf\n'
|
|
||||||
printf 'CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc64 -L /usr/powerpc64-linux-gnu\n'
|
|
||||||
} >> $GITHUB_ENV
|
|
||||||
|
|
||||||
- name: Get emscripten version
|
|
||||||
if: contains(matrix.features, 'wasm')
|
|
||||||
run: printf 'EMSCRIPTEN_VERSION=%s\n' "$(<crates/loader/emscripten-version)" >> $GITHUB_ENV
|
|
||||||
|
|
||||||
- name: Install Emscripten
|
- name: Install Emscripten
|
||||||
if: contains(matrix.features, 'wasm')
|
if: ${{ !matrix.no-run && !matrix.use-cross }}
|
||||||
uses: mymindstorm/setup-emsdk@v14
|
uses: mymindstorm/setup-emsdk@v14
|
||||||
with:
|
with:
|
||||||
version: ${{ env.EMSCRIPTEN_VERSION }}
|
version: ${{ env.EMSCRIPTEN_VERSION }}
|
||||||
|
|
@ -99,84 +86,58 @@ jobs:
|
||||||
with:
|
with:
|
||||||
target: ${{ matrix.target }}
|
target: ${{ matrix.target }}
|
||||||
|
|
||||||
- name: Install cross-compilation toolchain
|
- name: Install cross
|
||||||
if: matrix.cross
|
if: ${{ matrix.use-cross }}
|
||||||
|
run: cargo install cross --git https://github.com/cross-rs/cross
|
||||||
|
|
||||||
|
- name: Configure cross
|
||||||
|
if: ${{ matrix.use-cross }}
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update -qy
|
printf '%s\n' > Cross.toml \
|
||||||
if [[ $PLATFORM == linux-arm ]]; then
|
'[target.${{ matrix.target }}]' \
|
||||||
sudo apt-get install -qy {binutils,gcc}-arm-linux-gnueabihf qemu-user
|
'image = "ghcr.io/cross-rs/${{ matrix.target }}:edge"' \
|
||||||
elif [[ $PLATFORM == linux-x86 ]]; then
|
'[build]' \
|
||||||
sudo apt-get install -qy {binutils,gcc}-i686-linux-gnu
|
'pre-build = [' \
|
||||||
elif [[ $PLATFORM == linux-powerpc64 ]]; then
|
' "dpkg --add-architecture $CROSS_DEB_ARCH",' \
|
||||||
sudo apt-get install -qy {binutils,gcc}-powerpc64-linux-gnu qemu-user
|
' "curl -fsSL https://deb.nodesource.com/setup_22.x | bash -",' \
|
||||||
|
' "apt-get update && apt-get -y install libssl-dev nodejs"' \
|
||||||
|
']'
|
||||||
|
cat - Cross.toml <<< 'Cross.toml:'
|
||||||
|
printf '%s\n' >> $GITHUB_ENV \
|
||||||
|
"CROSS_CONFIG=$PWD/Cross.toml" \
|
||||||
|
"CROSS_IMAGE=ghcr.io/cross-rs/${{ matrix.target }}:edge"
|
||||||
|
|
||||||
|
- name: Set up environment
|
||||||
|
env:
|
||||||
|
RUST_TEST_THREADS: ${{ matrix.rust-test-threads }}
|
||||||
|
USE_CROSS: ${{ matrix.use-cross }}
|
||||||
|
TARGET: ${{ matrix.target }}
|
||||||
|
CC: ${{ matrix.cc }}
|
||||||
|
AR: ${{ matrix.ar }}
|
||||||
|
run: |
|
||||||
|
PATH="$PWD/.github/scripts:$PATH"
|
||||||
|
printf '%s/.github/scripts\n' "$PWD" >> $GITHUB_PATH
|
||||||
|
|
||||||
|
printf '%s\n' >> $GITHUB_ENV \
|
||||||
|
'TREE_SITTER=tree-sitter.sh' \
|
||||||
|
"TARGET=$TARGET" \
|
||||||
|
"ROOT=$PWD"
|
||||||
|
|
||||||
|
[[ -n $RUST_TEST_THREADS ]] && \
|
||||||
|
printf 'RUST_TEST_THREADS=%s\n' "$RUST_TEST_THREADS" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
[[ -n $CC ]] && printf 'CC=%s\n' "$CC" >> $GITHUB_ENV
|
||||||
|
[[ -n $AR ]] && printf 'AR=%s\n' "$AR" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
if [[ $USE_CROSS == true ]]; then
|
||||||
|
printf 'BUILD_CMD=cross\n' >> $GITHUB_ENV
|
||||||
|
runner=$(cross.sh bash -c "env | sed -n 's/^CARGO_TARGET_.*_RUNNER=//p'")
|
||||||
|
[[ -n $runner ]] && printf 'CROSS_RUNNER=%s\n' "$runner" >> $GITHUB_ENV
|
||||||
fi
|
fi
|
||||||
env:
|
|
||||||
PLATFORM: ${{ matrix.platform }}
|
|
||||||
|
|
||||||
- name: Install MinGW and Clang (Windows x64 MSYS2)
|
|
||||||
if: matrix.platform == 'windows-x64'
|
|
||||||
uses: msys2/setup-msys2@v2
|
|
||||||
with:
|
|
||||||
update: true
|
|
||||||
install: |
|
|
||||||
mingw-w64-x86_64-toolchain
|
|
||||||
mingw-w64-x86_64-clang
|
|
||||||
mingw-w64-x86_64-make
|
|
||||||
mingw-w64-x86_64-cmake
|
|
||||||
|
|
||||||
# TODO: Remove RUSTFLAGS="--cap-lints allow" once we use a wasmtime release that addresses
|
|
||||||
# the `mismatched-lifetime-syntaxes` lint
|
|
||||||
- name: Build wasmtime library (Windows x64 MSYS2)
|
|
||||||
if: contains(matrix.features, 'wasm') && matrix.platform == 'windows-x64'
|
|
||||||
run: |
|
|
||||||
mkdir -p target
|
|
||||||
WASMTIME_VERSION=$(cargo metadata --format-version=1 --locked --features wasm | \
|
|
||||||
jq -r '.packages[] | select(.name == "wasmtime-c-api-impl") | .version')
|
|
||||||
curl -LSs "$WASMTIME_REPO/archive/refs/tags/v${WASMTIME_VERSION}.tar.gz" | tar xzf - -C target
|
|
||||||
cd target/wasmtime-${WASMTIME_VERSION}
|
|
||||||
cmake -S crates/c-api -B target/c-api \
|
|
||||||
-DCMAKE_INSTALL_PREFIX="$PWD/artifacts" \
|
|
||||||
-DWASMTIME_DISABLE_ALL_FEATURES=ON \
|
|
||||||
-DWASMTIME_FEATURE_CRANELIFT=ON \
|
|
||||||
-DWASMTIME_TARGET='x86_64-pc-windows-gnu'
|
|
||||||
cmake --build target/c-api && cmake --install target/c-api
|
|
||||||
printf 'CMAKE_PREFIX_PATH=%s\n' "$PWD/artifacts" >> $GITHUB_ENV
|
|
||||||
env:
|
|
||||||
WASMTIME_REPO: https://github.com/bytecodealliance/wasmtime
|
|
||||||
RUSTFLAGS: ${{ env.RUSTFLAGS }} --cap-lints allow
|
|
||||||
|
|
||||||
- name: Build C library (Windows x64 MSYS2 CMake)
|
|
||||||
if: matrix.platform == 'windows-x64'
|
|
||||||
shell: msys2 {0}
|
|
||||||
run: |
|
|
||||||
cmake -G Ninja -S . -B build/static \
|
|
||||||
-DBUILD_SHARED_LIBS=OFF \
|
|
||||||
-DCMAKE_BUILD_TYPE=Debug \
|
|
||||||
-DCMAKE_COMPILE_WARNING_AS_ERROR=ON \
|
|
||||||
-DTREE_SITTER_FEATURE_WASM=$WASM \
|
|
||||||
-DCMAKE_C_COMPILER=clang
|
|
||||||
cmake --build build/static
|
|
||||||
|
|
||||||
cmake -G Ninja -S . -B build/shared \
|
|
||||||
-DBUILD_SHARED_LIBS=ON \
|
|
||||||
-DCMAKE_BUILD_TYPE=Debug \
|
|
||||||
-DCMAKE_COMPILE_WARNING_AS_ERROR=ON \
|
|
||||||
-DTREE_SITTER_FEATURE_WASM=$WASM \
|
|
||||||
-DCMAKE_C_COMPILER=clang
|
|
||||||
cmake --build build/shared
|
|
||||||
rm -rf \
|
|
||||||
build/{static,shared} \
|
|
||||||
"${CMAKE_PREFIX_PATH}/artifacts" \
|
|
||||||
target/wasmtime-${WASMTIME_VERSION}
|
|
||||||
env:
|
|
||||||
WASM: ${{ contains(matrix.features, 'wasm') && 'ON' || 'OFF' }}
|
|
||||||
|
|
||||||
# TODO: Remove RUSTFLAGS="--cap-lints allow" once we use a wasmtime release that addresses
|
|
||||||
# the `mismatched-lifetime-syntaxes` lint
|
|
||||||
- name: Build wasmtime library
|
- name: Build wasmtime library
|
||||||
if: contains(matrix.features, 'wasm')
|
if: ${{ !matrix.use-cross && contains(matrix.features, 'wasm') }}
|
||||||
run: |
|
run: |
|
||||||
mkdir -p target
|
|
||||||
WASMTIME_VERSION=$(cargo metadata --format-version=1 --locked --features wasm | \
|
WASMTIME_VERSION=$(cargo metadata --format-version=1 --locked --features wasm | \
|
||||||
jq -r '.packages[] | select(.name == "wasmtime-c-api-impl") | .version')
|
jq -r '.packages[] | select(.name == "wasmtime-c-api-impl") | .version')
|
||||||
curl -LSs "$WASMTIME_REPO/archive/refs/tags/v${WASMTIME_VERSION}.tar.gz" | tar xzf - -C target
|
curl -LSs "$WASMTIME_REPO/archive/refs/tags/v${WASMTIME_VERSION}.tar.gz" | tar xzf - -C target
|
||||||
|
|
@ -190,122 +151,86 @@ jobs:
|
||||||
printf 'CMAKE_PREFIX_PATH=%s\n' "$PWD/artifacts" >> $GITHUB_ENV
|
printf 'CMAKE_PREFIX_PATH=%s\n' "$PWD/artifacts" >> $GITHUB_ENV
|
||||||
env:
|
env:
|
||||||
WASMTIME_REPO: https://github.com/bytecodealliance/wasmtime
|
WASMTIME_REPO: https://github.com/bytecodealliance/wasmtime
|
||||||
RUSTFLAGS: ${{ env.RUSTFLAGS }} --cap-lints allow
|
|
||||||
|
|
||||||
- name: Build C library (make)
|
- name: Build C library (make)
|
||||||
if: runner.os != 'Windows'
|
if: ${{ runner.os != 'Windows' }}
|
||||||
run: |
|
run: make.sh -j CFLAGS="$CFLAGS"
|
||||||
if [[ $PLATFORM == linux-arm ]]; then
|
|
||||||
CC=arm-linux-gnueabihf-gcc; AR=arm-linux-gnueabihf-ar
|
|
||||||
elif [[ $PLATFORM == linux-x86 ]]; then
|
|
||||||
CC=i686-linux-gnu-gcc; AR=i686-linux-gnu-ar
|
|
||||||
elif [[ $PLATFORM == linux-powerpc64 ]]; then
|
|
||||||
CC=powerpc64-linux-gnu-gcc; AR=powerpc64-linux-gnu-ar
|
|
||||||
else
|
|
||||||
CC=gcc; AR=ar
|
|
||||||
fi
|
|
||||||
make -j CFLAGS="$CFLAGS" CC=$CC AR=$AR
|
|
||||||
env:
|
env:
|
||||||
PLATFORM: ${{ matrix.platform }}
|
|
||||||
CFLAGS: -g -Werror -Wall -Wextra -Wshadow -Wpedantic -Werror=incompatible-pointer-types
|
CFLAGS: -g -Werror -Wall -Wextra -Wshadow -Wpedantic -Werror=incompatible-pointer-types
|
||||||
|
|
||||||
- name: Build C library (CMake)
|
- name: Build C library (CMake)
|
||||||
if: "!matrix.cross"
|
if: ${{ !matrix.use-cross }}
|
||||||
run: |
|
run: |
|
||||||
cmake -S . -B build/static \
|
cmake -S lib -B build/static \
|
||||||
-DBUILD_SHARED_LIBS=OFF \
|
-DBUILD_SHARED_LIBS=OFF \
|
||||||
-DCMAKE_BUILD_TYPE=Debug \
|
-DCMAKE_BUILD_TYPE=Debug \
|
||||||
-DCMAKE_COMPILE_WARNING_AS_ERROR=ON \
|
-DCMAKE_COMPILE_WARNING_AS_ERROR=ON \
|
||||||
-DTREE_SITTER_FEATURE_WASM=$WASM
|
-DTREE_SITTER_FEATURE_WASM=$WASM
|
||||||
cmake --build build/static --verbose
|
cmake --build build/static --verbose
|
||||||
|
|
||||||
cmake -S . -B build/shared \
|
cmake -S lib -B build/shared \
|
||||||
-DBUILD_SHARED_LIBS=ON \
|
-DBUILD_SHARED_LIBS=ON \
|
||||||
-DCMAKE_BUILD_TYPE=Debug \
|
-DCMAKE_BUILD_TYPE=Debug \
|
||||||
-DCMAKE_COMPILE_WARNING_AS_ERROR=ON \
|
-DCMAKE_COMPILE_WARNING_AS_ERROR=ON \
|
||||||
-DTREE_SITTER_FEATURE_WASM=$WASM
|
-DTREE_SITTER_FEATURE_WASM=$WASM
|
||||||
cmake --build build/shared --verbose
|
cmake --build build/shared --verbose
|
||||||
env:
|
env:
|
||||||
CC: ${{ contains(matrix.platform, 'linux') && 'clang' || '' }}
|
CC: ${{ contains(matrix.target, 'linux') && 'clang' || '' }}
|
||||||
WASM: ${{ contains(matrix.features, 'wasm') && 'ON' || 'OFF' }}
|
WASM: ${{ contains(matrix.features, 'wasm') && 'ON' || 'OFF' }}
|
||||||
|
|
||||||
- name: Build Wasm library
|
- name: Build wasm library
|
||||||
if: contains(matrix.features, 'wasm')
|
# No reason to build on the same Github runner hosts many times
|
||||||
shell: bash
|
if: ${{ !matrix.no-run && !matrix.use-cross }}
|
||||||
run: |
|
run: $BUILD_CMD run -p xtask -- build-wasm
|
||||||
cd lib/binding_web
|
|
||||||
npm ci
|
|
||||||
CJS=true npm run build
|
|
||||||
CJS=true npm run build:debug
|
|
||||||
npm run build
|
|
||||||
npm run build:debug
|
|
||||||
|
|
||||||
- name: Check no_std builds
|
|
||||||
if: inputs.run-test && !matrix.no-run
|
|
||||||
working-directory: lib
|
|
||||||
shell: bash
|
|
||||||
run: cargo check --no-default-features --target='${{ matrix.target }}'
|
|
||||||
|
|
||||||
- name: Build target
|
- name: Build target
|
||||||
run: cargo build --release --target='${{ matrix.target }}' --features='${{ matrix.features }}' $PACKAGE
|
run: $BUILD_CMD build --release --target=${{ matrix.target }} --features=${{ matrix.features }}
|
||||||
env:
|
|
||||||
PACKAGE: ${{ matrix.platform == 'wasm32' && '-p tree-sitter' || '' }}
|
|
||||||
|
|
||||||
- name: Cache fixtures
|
- name: Cache fixtures
|
||||||
id: cache
|
id: cache
|
||||||
if: inputs.run-test && !matrix.no-run
|
if: ${{ !matrix.no-run && inputs.run-test }}
|
||||||
uses: ./.github/actions/cache
|
uses: ./.github/actions/cache
|
||||||
|
|
||||||
- name: Fetch fixtures
|
- name: Fetch fixtures
|
||||||
if: inputs.run-test && !matrix.no-run
|
if: ${{ !matrix.no-run && inputs.run-test }}
|
||||||
run: cargo run -p xtask --target='${{ matrix.target }}' -- fetch-fixtures
|
run: $BUILD_CMD run -p xtask -- fetch-fixtures
|
||||||
|
|
||||||
- name: Generate fixtures
|
- name: Generate fixtures
|
||||||
if: inputs.run-test && !matrix.no-run && steps.cache.outputs.cache-hit != 'true'
|
if: ${{ !matrix.no-run && inputs.run-test && steps.cache.outputs.cache-hit != 'true' }}
|
||||||
run: cargo run -p xtask --target='${{ matrix.target }}' -- generate-fixtures
|
run: $BUILD_CMD run -p xtask -- generate-fixtures
|
||||||
|
|
||||||
- name: Generate Wasm fixtures
|
- name: Generate Wasm fixtures
|
||||||
if: inputs.run-test && !matrix.no-run && contains(matrix.features, 'wasm') && steps.cache.outputs.cache-hit != 'true'
|
if: ${{ !matrix.no-run && !matrix.use-cross && inputs.run-test && steps.cache.outputs.cache-hit != 'true' }}
|
||||||
run: cargo run -p xtask --target='${{ matrix.target }}' -- generate-fixtures --wasm
|
run: $BUILD_CMD run -p xtask -- generate-fixtures --wasm
|
||||||
|
|
||||||
- name: Run main tests
|
- name: Run main tests
|
||||||
if: inputs.run-test && !matrix.no-run
|
if: ${{ !matrix.no-run && inputs.run-test }}
|
||||||
run: cargo test --target='${{ matrix.target }}' --features='${{ matrix.features }}'
|
run: $BUILD_CMD test --target=${{ matrix.target }} --features=${{ matrix.features }}
|
||||||
|
|
||||||
- name: Run Wasm tests
|
- name: Run wasm tests
|
||||||
if: inputs.run-test && !matrix.no-run && contains(matrix.features, 'wasm')
|
if: ${{ !matrix.no-run && !matrix.use-cross && inputs.run-test }}
|
||||||
run: cargo run -p xtask --target='${{ matrix.target }}' -- test-wasm
|
run: $BUILD_CMD run -p xtask -- test-wasm
|
||||||
|
|
||||||
|
- name: Run benchmarks
|
||||||
|
# Cross-compiled benchmarks are pointless
|
||||||
|
if: ${{ !matrix.no-run && !matrix.use-cross && inputs.run-test }}
|
||||||
|
run: $BUILD_CMD bench benchmark -p tree-sitter-cli --target=${{ matrix.target }}
|
||||||
|
|
||||||
- name: Upload CLI artifact
|
- name: Upload CLI artifact
|
||||||
if: "!matrix.no-run"
|
uses: actions/upload-artifact@v4
|
||||||
uses: actions/upload-artifact@v6
|
|
||||||
with:
|
with:
|
||||||
name: tree-sitter.${{ matrix.platform }}
|
name: tree-sitter.${{ matrix.platform }}
|
||||||
path: target/${{ matrix.target }}/release/tree-sitter${{ contains(matrix.target, 'windows') && '.exe' || '' }}
|
path: target/${{ matrix.target }}/release/tree-sitter${{ env.SUFFIX }}
|
||||||
if-no-files-found: error
|
if-no-files-found: error
|
||||||
retention-days: 7
|
retention-days: 7
|
||||||
|
|
||||||
- name: Upload Wasm artifacts
|
- name: Upload Wasm artifacts
|
||||||
if: matrix.platform == 'linux-x64'
|
if: ${{ matrix.platform == 'linux-x64' }}
|
||||||
uses: actions/upload-artifact@v6
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: tree-sitter.wasm
|
name: tree-sitter.wasm
|
||||||
path: |
|
path: |
|
||||||
lib/binding_web/web-tree-sitter.js
|
lib/binding_web/tree-sitter.js
|
||||||
lib/binding_web/web-tree-sitter.js.map
|
lib/binding_web/tree-sitter.wasm
|
||||||
lib/binding_web/web-tree-sitter.cjs
|
|
||||||
lib/binding_web/web-tree-sitter.cjs.map
|
|
||||||
lib/binding_web/web-tree-sitter.wasm
|
|
||||||
lib/binding_web/web-tree-sitter.wasm.map
|
|
||||||
lib/binding_web/debug/web-tree-sitter.cjs
|
|
||||||
lib/binding_web/debug/web-tree-sitter.cjs.map
|
|
||||||
lib/binding_web/debug/web-tree-sitter.js
|
|
||||||
lib/binding_web/debug/web-tree-sitter.js.map
|
|
||||||
lib/binding_web/debug/web-tree-sitter.wasm
|
|
||||||
lib/binding_web/debug/web-tree-sitter.wasm.map
|
|
||||||
lib/binding_web/lib/*.c
|
|
||||||
lib/binding_web/lib/*.h
|
|
||||||
lib/binding_web/lib/*.ts
|
|
||||||
lib/binding_web/src/*.ts
|
|
||||||
if-no-files-found: error
|
if-no-files-found: error
|
||||||
retention-days: 7
|
retention-days: 7
|
||||||
|
|
|
||||||
26
.github/workflows/ci.yml
vendored
26
.github/workflows/ci.yml
vendored
|
|
@ -2,20 +2,8 @@ name: CI
|
||||||
|
|
||||||
on:
|
on:
|
||||||
pull_request:
|
pull_request:
|
||||||
paths-ignore:
|
|
||||||
- docs/**
|
|
||||||
- "**/README.md"
|
|
||||||
- CONTRIBUTING.md
|
|
||||||
- LICENSE
|
|
||||||
- cli/src/templates
|
|
||||||
push:
|
push:
|
||||||
branches: [master]
|
branches: [master]
|
||||||
paths-ignore:
|
|
||||||
- docs/**
|
|
||||||
- "**/README.md"
|
|
||||||
- CONTRIBUTING.md
|
|
||||||
- LICENSE
|
|
||||||
- cli/src/templates
|
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.ref }}
|
group: ${{ github.workflow }}-${{ github.ref }}
|
||||||
|
|
@ -26,24 +14,24 @@ jobs:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Set up stable Rust toolchain
|
- name: Set up stable Rust toolchain
|
||||||
uses: actions-rust-lang/setup-rust-toolchain@v1
|
uses: actions-rust-lang/setup-rust-toolchain@v1
|
||||||
with:
|
with:
|
||||||
toolchain: stable
|
toolchain: stable
|
||||||
|
|
||||||
|
- name: Set up nightly Rust toolchain
|
||||||
|
uses: actions-rust-lang/setup-rust-toolchain@v1
|
||||||
|
with:
|
||||||
|
toolchain: nightly
|
||||||
components: clippy, rustfmt
|
components: clippy, rustfmt
|
||||||
|
|
||||||
- name: Lint files
|
- name: Lint files
|
||||||
run: |
|
run: make lint
|
||||||
make lint
|
|
||||||
make lint-web
|
|
||||||
|
|
||||||
sanitize:
|
sanitize:
|
||||||
uses: ./.github/workflows/sanitize.yml
|
uses: ./.github/workflows/sanitize.yml
|
||||||
|
|
||||||
build:
|
build:
|
||||||
uses: ./.github/workflows/build.yml
|
uses: ./.github/workflows/build.yml
|
||||||
|
|
||||||
check-wasm-stdlib:
|
|
||||||
uses: ./.github/workflows/wasm_stdlib.yml
|
|
||||||
|
|
|
||||||
50
.github/workflows/docs.yml
vendored
50
.github/workflows/docs.yml
vendored
|
|
@ -1,50 +0,0 @@
|
||||||
name: Deploy Docs
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches: [master]
|
|
||||||
paths: [docs/**]
|
|
||||||
workflow_dispatch:
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
deploy-docs:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: write
|
|
||||||
pages: write
|
|
||||||
id-token: write
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v6
|
|
||||||
|
|
||||||
- name: Set up Rust
|
|
||||||
uses: actions-rust-lang/setup-rust-toolchain@v1
|
|
||||||
|
|
||||||
- name: Install mdbook
|
|
||||||
env:
|
|
||||||
GH_TOKEN: ${{ github.token }}
|
|
||||||
run: |
|
|
||||||
jq_expr='.assets[] | select(.name | contains("x86_64-unknown-linux-gnu")) | .browser_download_url'
|
|
||||||
url=$(gh api repos/rust-lang/mdbook/releases/tags/v0.4.52 --jq "$jq_expr")
|
|
||||||
mkdir mdbook
|
|
||||||
curl -sSL "$url" | tar -xz -C mdbook
|
|
||||||
printf '%s/mdbook\n' "$PWD" >> "$GITHUB_PATH"
|
|
||||||
|
|
||||||
- name: Install mdbook-admonish
|
|
||||||
run: cargo install mdbook-admonish
|
|
||||||
|
|
||||||
- name: Build Book
|
|
||||||
run: mdbook build docs
|
|
||||||
|
|
||||||
- name: Setup Pages
|
|
||||||
uses: actions/configure-pages@v5
|
|
||||||
|
|
||||||
- name: Upload artifact
|
|
||||||
uses: actions/upload-pages-artifact@v4
|
|
||||||
with:
|
|
||||||
path: docs/book
|
|
||||||
|
|
||||||
- name: Deploy to GitHub Pages
|
|
||||||
id: deployment
|
|
||||||
uses: actions/deploy-pages@v4
|
|
||||||
13
.github/workflows/nvim_ts.yml
vendored
13
.github/workflows/nvim_ts.yml
vendored
|
|
@ -3,10 +3,7 @@ name: nvim-treesitter parser tests
|
||||||
on:
|
on:
|
||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
- 'crates/cli/**'
|
- 'cli/**'
|
||||||
- 'crates/config/**'
|
|
||||||
- 'crates/generate/**'
|
|
||||||
- 'crates/loader/**'
|
|
||||||
- '.github/workflows/nvim_ts.yml'
|
- '.github/workflows/nvim_ts.yml'
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
|
|
@ -16,7 +13,7 @@ concurrency:
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
check_compilation:
|
check_compilation:
|
||||||
timeout-minutes: 30
|
timeout-minutes: 20
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
|
|
@ -28,9 +25,9 @@ jobs:
|
||||||
NVIM: ${{ matrix.os == 'windows-latest' && 'nvim-win64\\bin\\nvim.exe' || 'nvim' }}
|
NVIM: ${{ matrix.os == 'windows-latest' && 'nvim-win64\\bin\\nvim.exe' || 'nvim' }}
|
||||||
NVIM_TS_DIR: nvim-treesitter
|
NVIM_TS_DIR: nvim-treesitter
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v6
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
- uses: actions/checkout@v6
|
- uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
repository: nvim-treesitter/nvim-treesitter
|
repository: nvim-treesitter/nvim-treesitter
|
||||||
path: ${{ env.NVIM_TS_DIR }}
|
path: ${{ env.NVIM_TS_DIR }}
|
||||||
|
|
@ -58,7 +55,7 @@ jobs:
|
||||||
|
|
||||||
- if: matrix.type == 'build'
|
- if: matrix.type == 'build'
|
||||||
name: Compile parsers
|
name: Compile parsers
|
||||||
run: $NVIM -l ./scripts/install-parsers.lua --max-jobs=10
|
run: $NVIM -l ./scripts/install-parsers.lua
|
||||||
working-directory: ${{ env.NVIM_TS_DIR }}
|
working-directory: ${{ env.NVIM_TS_DIR }}
|
||||||
shell: bash
|
shell: bash
|
||||||
|
|
||||||
|
|
|
||||||
63
.github/workflows/release.yml
vendored
63
.github/workflows/release.yml
vendored
|
|
@ -17,15 +17,13 @@ jobs:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
needs: build
|
needs: build
|
||||||
permissions:
|
permissions:
|
||||||
id-token: write
|
|
||||||
attestations: write
|
|
||||||
contents: write
|
contents: write
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Download build artifacts
|
- name: Download build artifacts
|
||||||
uses: actions/download-artifact@v7
|
uses: actions/download-artifact@v4
|
||||||
with:
|
with:
|
||||||
path: artifacts
|
path: artifacts
|
||||||
|
|
||||||
|
|
@ -35,13 +33,9 @@ jobs:
|
||||||
|
|
||||||
- name: Prepare release artifacts
|
- name: Prepare release artifacts
|
||||||
run: |
|
run: |
|
||||||
mkdir -p target web
|
mkdir -p target
|
||||||
mv artifacts/tree-sitter.wasm/* web/
|
mv artifacts/tree-sitter.wasm/* target/
|
||||||
|
|
||||||
tar -czf target/web-tree-sitter.tar.gz -C web .
|
|
||||||
|
|
||||||
rm -r artifacts/tree-sitter.wasm
|
rm -r artifacts/tree-sitter.wasm
|
||||||
|
|
||||||
for platform in $(cd artifacts; ls | sed 's/^tree-sitter\.//'); do
|
for platform in $(cd artifacts; ls | sed 's/^tree-sitter\.//'); do
|
||||||
exe=$(ls artifacts/tree-sitter.$platform/tree-sitter*)
|
exe=$(ls artifacts/tree-sitter.$platform/tree-sitter*)
|
||||||
gzip --stdout --name $exe > target/tree-sitter-$platform.gz
|
gzip --stdout --name $exe > target/tree-sitter-$platform.gz
|
||||||
|
|
@ -49,65 +43,47 @@ jobs:
|
||||||
rm -rf artifacts
|
rm -rf artifacts
|
||||||
ls -l target/
|
ls -l target/
|
||||||
|
|
||||||
- name: Generate attestations
|
|
||||||
uses: actions/attest-build-provenance@v3
|
|
||||||
with:
|
|
||||||
subject-path: |
|
|
||||||
target/tree-sitter-*.gz
|
|
||||||
target/web-tree-sitter.tar.gz
|
|
||||||
|
|
||||||
- name: Create release
|
- name: Create release
|
||||||
run: |-
|
run: |-
|
||||||
gh release create $GITHUB_REF_NAME \
|
gh release create \
|
||||||
target/tree-sitter-*.gz \
|
target/tree-sitter-*.gz \
|
||||||
target/web-tree-sitter.tar.gz
|
target/tree-sitter.wasm \
|
||||||
|
target/tree-sitter.js
|
||||||
env:
|
env:
|
||||||
GH_TOKEN: ${{ github.token }}
|
GH_TOKEN: ${{ github.token }}
|
||||||
|
|
||||||
crates_io:
|
crates_io:
|
||||||
name: Publish packages to Crates.io
|
name: Publish packages to Crates.io
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
environment: crates
|
|
||||||
permissions:
|
|
||||||
id-token: write
|
|
||||||
contents: read
|
|
||||||
needs: release
|
needs: release
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Set up Rust
|
- name: Set up Rust
|
||||||
uses: actions-rust-lang/setup-rust-toolchain@v1
|
uses: actions-rust-lang/setup-rust-toolchain@v1
|
||||||
|
|
||||||
- name: Set up registry token
|
|
||||||
id: auth
|
|
||||||
uses: rust-lang/crates-io-auth-action@v1
|
|
||||||
|
|
||||||
- name: Publish crates to Crates.io
|
- name: Publish crates to Crates.io
|
||||||
uses: katyo/publish-crates@v2
|
uses: katyo/publish-crates@v2
|
||||||
with:
|
with:
|
||||||
registry-token: ${{ steps.auth.outputs.token }}
|
registry-token: ${{ secrets.CARGO_REGISTRY_TOKEN }}
|
||||||
|
|
||||||
npm:
|
npm:
|
||||||
name: Publish packages to npmjs.com
|
name: Publish packages to npmjs.com
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
environment: npm
|
|
||||||
permissions:
|
|
||||||
id-token: write
|
|
||||||
contents: read
|
|
||||||
needs: release
|
needs: release
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
directory: [crates/cli/npm, lib/binding_web]
|
directory: [cli/npm, lib/binding_web]
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: CHeckout repository
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Set up Node
|
- name: Set up Node
|
||||||
uses: actions/setup-node@v6
|
uses: actions/setup-node@v4
|
||||||
with:
|
with:
|
||||||
node-version: 24
|
node-version: 20
|
||||||
registry-url: https://registry.npmjs.org
|
registry-url: https://registry.npmjs.org
|
||||||
|
|
||||||
- name: Set up Rust
|
- name: Set up Rust
|
||||||
|
|
@ -115,15 +91,10 @@ jobs:
|
||||||
|
|
||||||
- name: Build wasm
|
- name: Build wasm
|
||||||
if: matrix.directory == 'lib/binding_web'
|
if: matrix.directory == 'lib/binding_web'
|
||||||
run: |
|
run: cargo xtask build-wasm
|
||||||
cd ${{ matrix.directory }}
|
|
||||||
npm ci
|
|
||||||
npm run build
|
|
||||||
npm run build:debug
|
|
||||||
CJS=true npm run build
|
|
||||||
CJS=true npm run build:debug
|
|
||||||
npm run build:dts
|
|
||||||
|
|
||||||
- name: Publish to npmjs.com
|
- name: Publish to npmjs.com
|
||||||
working-directory: ${{ matrix.directory }}
|
working-directory: ${{ matrix.directory }}
|
||||||
run: npm publish
|
run: npm publish
|
||||||
|
env:
|
||||||
|
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
||||||
|
|
|
||||||
8
.github/workflows/response.yml
vendored
8
.github/workflows/response.yml
vendored
|
|
@ -17,13 +17,13 @@ jobs:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout script
|
- name: Checkout script
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
sparse-checkout: .github/scripts/close_unresponsive.js
|
sparse-checkout: .github/scripts/close_unresponsive.js
|
||||||
sparse-checkout-cone-mode: false
|
sparse-checkout-cone-mode: false
|
||||||
|
|
||||||
- name: Run script
|
- name: Run script
|
||||||
uses: actions/github-script@v8
|
uses: actions/github-script@v7
|
||||||
with:
|
with:
|
||||||
script: |
|
script: |
|
||||||
const script = require('./.github/scripts/close_unresponsive.js')
|
const script = require('./.github/scripts/close_unresponsive.js')
|
||||||
|
|
@ -35,13 +35,13 @@ jobs:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout script
|
- name: Checkout script
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
sparse-checkout: .github/scripts/remove_response_label.js
|
sparse-checkout: .github/scripts/remove_response_label.js
|
||||||
sparse-checkout-cone-mode: false
|
sparse-checkout-cone-mode: false
|
||||||
|
|
||||||
- name: Run script
|
- name: Run script
|
||||||
uses: actions/github-script@v8
|
uses: actions/github-script@v7
|
||||||
with:
|
with:
|
||||||
script: |
|
script: |
|
||||||
const script = require('./.github/scripts/remove_response_label.js')
|
const script = require('./.github/scripts/remove_response_label.js')
|
||||||
|
|
|
||||||
4
.github/workflows/reviewers_remove.yml
vendored
4
.github/workflows/reviewers_remove.yml
vendored
|
|
@ -12,13 +12,13 @@ jobs:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout script
|
- name: Checkout script
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
sparse-checkout: .github/scripts/reviewers_remove.js
|
sparse-checkout: .github/scripts/reviewers_remove.js
|
||||||
sparse-checkout-cone-mode: false
|
sparse-checkout-cone-mode: false
|
||||||
|
|
||||||
- name: Run script
|
- name: Run script
|
||||||
uses: actions/github-script@v8
|
uses: actions/github-script@v7
|
||||||
with:
|
with:
|
||||||
script: |
|
script: |
|
||||||
const script = require('./.github/scripts/reviewers_remove.js')
|
const script = require('./.github/scripts/reviewers_remove.js')
|
||||||
|
|
|
||||||
2
.github/workflows/sanitize.yml
vendored
2
.github/workflows/sanitize.yml
vendored
|
|
@ -15,7 +15,7 @@ jobs:
|
||||||
TREE_SITTER: ${{ github.workspace }}/target/release/tree-sitter
|
TREE_SITTER: ${{ github.workspace }}/target/release/tree-sitter
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Install UBSAN library
|
- name: Install UBSAN library
|
||||||
run: sudo apt-get update -y && sudo apt-get install -y libubsan1
|
run: sudo apt-get update -y && sudo apt-get install -y libubsan1
|
||||||
|
|
|
||||||
29
.github/workflows/spam.yml
vendored
29
.github/workflows/spam.yml
vendored
|
|
@ -1,29 +0,0 @@
|
||||||
name: Close as spam
|
|
||||||
|
|
||||||
on:
|
|
||||||
issues:
|
|
||||||
types: [labeled]
|
|
||||||
pull_request_target:
|
|
||||||
types: [labeled]
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
issues: write
|
|
||||||
pull-requests: write
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
spam:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
if: github.event.label.name == 'spam'
|
|
||||||
steps:
|
|
||||||
- name: Checkout script
|
|
||||||
uses: actions/checkout@v6
|
|
||||||
with:
|
|
||||||
sparse-checkout: .github/scripts/close_spam.js
|
|
||||||
sparse-checkout-cone-mode: false
|
|
||||||
|
|
||||||
- name: Run script
|
|
||||||
uses: actions/github-script@v8
|
|
||||||
with:
|
|
||||||
script: |
|
|
||||||
const script = require('./.github/scripts/close_spam.js')
|
|
||||||
await script({github, context})
|
|
||||||
41
.github/workflows/wasm_exports.yml
vendored
41
.github/workflows/wasm_exports.yml
vendored
|
|
@ -1,41 +0,0 @@
|
||||||
name: Check Wasm Exports
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
paths:
|
|
||||||
- lib/include/tree_sitter/api.h
|
|
||||||
- lib/binding_web/**
|
|
||||||
- xtask/src/**
|
|
||||||
push:
|
|
||||||
branches: [master]
|
|
||||||
paths:
|
|
||||||
- lib/include/tree_sitter/api.h
|
|
||||||
- lib/binding_rust/bindings.rs
|
|
||||||
- CMakeLists.txt
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
check-wasm-exports:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v6
|
|
||||||
|
|
||||||
- name: Set up stable Rust toolchain
|
|
||||||
uses: actions-rust-lang/setup-rust-toolchain@v1
|
|
||||||
with:
|
|
||||||
toolchain: stable
|
|
||||||
|
|
||||||
- name: Install wasm-objdump
|
|
||||||
run: sudo apt-get update -y && sudo apt-get install -y wabt
|
|
||||||
|
|
||||||
- name: Build C library (make)
|
|
||||||
run: make -j CFLAGS="$CFLAGS"
|
|
||||||
env:
|
|
||||||
CFLAGS: -g -Werror -Wall -Wextra -Wshadow -Wpedantic -Werror=incompatible-pointer-types
|
|
||||||
|
|
||||||
- name: Build Wasm Library
|
|
||||||
working-directory: lib/binding_web
|
|
||||||
run: npm ci && npm run build:debug
|
|
||||||
|
|
||||||
- name: Check Wasm exports
|
|
||||||
run: cargo xtask check-wasm-exports
|
|
||||||
19
.github/workflows/wasm_stdlib.yml
vendored
19
.github/workflows/wasm_stdlib.yml
vendored
|
|
@ -1,19 +0,0 @@
|
||||||
name: Check Wasm Stdlib build
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_call:
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
check:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v6
|
|
||||||
|
|
||||||
- name: Check directory changes
|
|
||||||
uses: actions/github-script@v8
|
|
||||||
with:
|
|
||||||
script: |
|
|
||||||
const scriptPath = `${process.env.GITHUB_WORKSPACE}/.github/scripts/wasm_stdlib.js`;
|
|
||||||
const script = require(scriptPath);
|
|
||||||
return script({ github, context, core });
|
|
||||||
6
.gitignore
vendored
6
.gitignore
vendored
|
|
@ -1,12 +1,10 @@
|
||||||
log*.html
|
log*.html
|
||||||
.direnv
|
|
||||||
|
|
||||||
.idea
|
.idea
|
||||||
*.xcodeproj
|
*.xcodeproj
|
||||||
.vscode
|
.vscode
|
||||||
.cache
|
.cache
|
||||||
.zig-cache
|
.zig-cache
|
||||||
.direnv
|
|
||||||
|
|
||||||
profile*
|
profile*
|
||||||
fuzz-results
|
fuzz-results
|
||||||
|
|
@ -14,6 +12,7 @@ test/fuzz/out
|
||||||
test/fixtures/grammars/*
|
test/fixtures/grammars/*
|
||||||
!test/fixtures/grammars/.gitkeep
|
!test/fixtures/grammars/.gitkeep
|
||||||
|
|
||||||
|
package-lock.json
|
||||||
node_modules
|
node_modules
|
||||||
|
|
||||||
docs/assets/js/tree-sitter.js
|
docs/assets/js/tree-sitter.js
|
||||||
|
|
@ -26,7 +25,6 @@ docs/assets/js/tree-sitter.js
|
||||||
*.dylib
|
*.dylib
|
||||||
*.so
|
*.so
|
||||||
*.so.[0-9]*
|
*.so.[0-9]*
|
||||||
*.dll
|
|
||||||
*.o
|
*.o
|
||||||
*.obj
|
*.obj
|
||||||
*.exp
|
*.exp
|
||||||
|
|
@ -36,5 +34,3 @@ docs/assets/js/tree-sitter.js
|
||||||
.build
|
.build
|
||||||
build
|
build
|
||||||
zig-*
|
zig-*
|
||||||
|
|
||||||
/result
|
|
||||||
|
|
|
||||||
|
|
@ -1,11 +0,0 @@
|
||||||
{
|
|
||||||
"lsp": {
|
|
||||||
"rust-analyzer": {
|
|
||||||
"initialization_options": {
|
|
||||||
"cargo": {
|
|
||||||
"features": "all"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1 +1 @@
|
||||||
See [docs/src/6-contributing.md](./docs/src/6-contributing.md)
|
See [section-6-contributing.md](./docs/section-6-contributing.md)
|
||||||
|
|
|
||||||
1958
Cargo.lock
generated
1958
Cargo.lock
generated
File diff suppressed because it is too large
Load diff
118
Cargo.toml
118
Cargo.toml
|
|
@ -1,26 +1,22 @@
|
||||||
[workspace]
|
[workspace]
|
||||||
default-members = ["crates/cli"]
|
default-members = ["cli"]
|
||||||
members = [
|
members = [
|
||||||
"crates/cli",
|
"cli",
|
||||||
"crates/config",
|
"cli/config",
|
||||||
"crates/generate",
|
"cli/loader",
|
||||||
"crates/highlight",
|
|
||||||
"crates/loader",
|
|
||||||
"crates/tags",
|
|
||||||
"crates/xtask",
|
|
||||||
"crates/language",
|
|
||||||
"lib",
|
"lib",
|
||||||
|
"lib/language",
|
||||||
|
"tags",
|
||||||
|
"highlight",
|
||||||
|
"xtask",
|
||||||
]
|
]
|
||||||
resolver = "2"
|
resolver = "2"
|
||||||
|
|
||||||
[workspace.package]
|
[workspace.package]
|
||||||
version = "0.27.0"
|
version = "0.25.0"
|
||||||
authors = [
|
authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
|
||||||
"Max Brunsfeld <maxbrunsfeld@gmail.com>",
|
|
||||||
"Amaan Qureshi <amaanq12@gmail.com>",
|
|
||||||
]
|
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
rust-version = "1.85"
|
rust-version = "1.74.1"
|
||||||
homepage = "https://tree-sitter.github.io/tree-sitter"
|
homepage = "https://tree-sitter.github.io/tree-sitter"
|
||||||
repository = "https://github.com/tree-sitter/tree-sitter"
|
repository = "https://github.com/tree-sitter/tree-sitter"
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
|
@ -60,8 +56,6 @@ missing_errors_doc = "allow"
|
||||||
missing_panics_doc = "allow"
|
missing_panics_doc = "allow"
|
||||||
module_name_repetitions = "allow"
|
module_name_repetitions = "allow"
|
||||||
multiple_crate_versions = "allow"
|
multiple_crate_versions = "allow"
|
||||||
needless_for_each = "allow"
|
|
||||||
obfuscated_if_else = "allow"
|
|
||||||
option_if_let_else = "allow"
|
option_if_let_else = "allow"
|
||||||
or_fun_call = "allow"
|
or_fun_call = "allow"
|
||||||
range_plus_one = "allow"
|
range_plus_one = "allow"
|
||||||
|
|
@ -78,9 +72,6 @@ unnecessary_wraps = "allow"
|
||||||
unused_self = "allow"
|
unused_self = "allow"
|
||||||
used_underscore_items = "allow"
|
used_underscore_items = "allow"
|
||||||
|
|
||||||
[workspace.lints.rust]
|
|
||||||
mismatched_lifetime_syntaxes = "allow"
|
|
||||||
|
|
||||||
[profile.optimize]
|
[profile.optimize]
|
||||||
inherits = "release"
|
inherits = "release"
|
||||||
strip = true # Automatically strip symbols from the binary.
|
strip = true # Automatically strip symbols from the binary.
|
||||||
|
|
@ -102,62 +93,61 @@ incremental = true
|
||||||
codegen-units = 256
|
codegen-units = 256
|
||||||
|
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
ansi_colours = "1.2.3"
|
anstyle = "1.0.8"
|
||||||
anstyle = "1.0.13"
|
anyhow = "1.0.89"
|
||||||
anyhow = "1.0.100"
|
bstr = "1.11.0"
|
||||||
bstr = "1.12.0"
|
cc = "1.2.1"
|
||||||
cc = "1.2.53"
|
clap = { version = "4.5.21", features = [
|
||||||
clap = { version = "4.5.54", features = [
|
|
||||||
"cargo",
|
"cargo",
|
||||||
"derive",
|
"derive",
|
||||||
"env",
|
"env",
|
||||||
"help",
|
"help",
|
||||||
"string",
|
|
||||||
"unstable-styles",
|
"unstable-styles",
|
||||||
] }
|
] }
|
||||||
clap_complete = "4.5.65"
|
clap_complete = "4.5.38"
|
||||||
clap_complete_nushell = "4.5.10"
|
ctor = "0.2.8"
|
||||||
crc32fast = "1.5.0"
|
ctrlc = { version = "3.4.5", features = ["termination"] }
|
||||||
ctor = "0.2.9"
|
|
||||||
ctrlc = { version = "3.5.0", features = ["termination"] }
|
|
||||||
dialoguer = { version = "0.11.0", features = ["fuzzy-select"] }
|
dialoguer = { version = "0.11.0", features = ["fuzzy-select"] }
|
||||||
etcetera = "0.11.0"
|
dirs = "5.0.1"
|
||||||
fs4 = "0.12.0"
|
filetime = "0.2.25"
|
||||||
glob = "0.3.3"
|
fs4 = "0.9.1"
|
||||||
|
git2 = "0.19.0"
|
||||||
|
glob = "0.3.1"
|
||||||
heck = "0.5.0"
|
heck = "0.5.0"
|
||||||
html-escape = "0.2.13"
|
html-escape = "0.2.13"
|
||||||
indexmap = "2.12.1"
|
indexmap = "2.5.0"
|
||||||
indoc = "2.0.6"
|
indoc = "2.0.5"
|
||||||
libloading = "0.9.0"
|
lazy_static = "1.5.0"
|
||||||
log = { version = "0.4.28", features = ["std"] }
|
libloading = "0.8.5"
|
||||||
memchr = "2.7.6"
|
log = { version = "0.4.22", features = ["std"] }
|
||||||
once_cell = "1.21.3"
|
memchr = "2.7.4"
|
||||||
|
once_cell = "1.19.0"
|
||||||
|
path-slash = "0.2.1"
|
||||||
pretty_assertions = "1.4.1"
|
pretty_assertions = "1.4.1"
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
regex = "1.11.3"
|
regex = "1.10.6"
|
||||||
regex-syntax = "0.8.6"
|
regex-syntax = "0.8.4"
|
||||||
rustc-hash = "2.1.1"
|
rustc-hash = "2.0.0"
|
||||||
schemars = "1.0.5"
|
semver = { version = "1.0.23", features = ["serde"] }
|
||||||
semver = { version = "1.0.27", features = ["serde"] }
|
serde = { version = "1.0.215", features = ["derive"] }
|
||||||
serde = { version = "1.0.219", features = ["derive"] }
|
serde_derive = "1.0.210"
|
||||||
serde_json = { version = "1.0.149", features = ["preserve_order"] }
|
serde_json = { version = "1.0.133", features = ["preserve_order"] }
|
||||||
similar = "2.7.0"
|
similar = "2.6.0"
|
||||||
smallbitvec = "2.6.0"
|
smallbitvec = "2.5.3"
|
||||||
streaming-iterator = "0.1.9"
|
streaming-iterator = "0.1.9"
|
||||||
tempfile = "3.23.0"
|
tempfile = "3.14.0"
|
||||||
thiserror = "2.0.17"
|
thiserror = "1.0.69"
|
||||||
tiny_http = "0.12.0"
|
tiny_http = "0.12.0"
|
||||||
topological-sort = "0.2.2"
|
toml = "0.8.19"
|
||||||
unindent = "0.2.4"
|
unindent = "0.2.3"
|
||||||
|
url = { version = "2.5.2", features = ["serde"] }
|
||||||
walkdir = "2.5.0"
|
walkdir = "2.5.0"
|
||||||
wasmparser = "0.243.0"
|
wasmparser = "0.218.0"
|
||||||
webbrowser = "1.0.5"
|
webbrowser = "1.0.2"
|
||||||
|
|
||||||
tree-sitter = { version = "0.27.0", path = "./lib" }
|
tree-sitter = { version = "0.25.0", path = "./lib" }
|
||||||
tree-sitter-generate = { version = "0.27.0", path = "./crates/generate" }
|
tree-sitter-generate = { version = "0.25.0", path = "./cli/generate" }
|
||||||
tree-sitter-loader = { version = "0.27.0", path = "./crates/loader" }
|
tree-sitter-loader = { version = "0.25.0", path = "./cli/loader" }
|
||||||
tree-sitter-config = { version = "0.27.0", path = "./crates/config" }
|
tree-sitter-config = { version = "0.25.0", path = "./cli/config" }
|
||||||
tree-sitter-highlight = { version = "0.27.0", path = "./crates/highlight" }
|
tree-sitter-highlight = { version = "0.25.0", path = "./highlight" }
|
||||||
tree-sitter-tags = { version = "0.27.0", path = "./crates/tags" }
|
tree-sitter-tags = { version = "0.25.0", path = "./tags" }
|
||||||
|
|
||||||
tree-sitter-language = { version = "0.1", path = "./crates/language" }
|
|
||||||
|
|
|
||||||
2
LICENSE
2
LICENSE
|
|
@ -1,6 +1,6 @@
|
||||||
The MIT License (MIT)
|
The MIT License (MIT)
|
||||||
|
|
||||||
Copyright (c) 2018 Max Brunsfeld
|
Copyright (c) 2018-2024 Max Brunsfeld
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
|
|
||||||
57
Makefile
57
Makefile
|
|
@ -1,4 +1,8 @@
|
||||||
VERSION := 0.27.0
|
ifeq ($(OS),Windows_NT)
|
||||||
|
$(error Windows is not supported)
|
||||||
|
endif
|
||||||
|
|
||||||
|
VERSION := 0.25.0
|
||||||
DESCRIPTION := An incremental parsing system for programming tools
|
DESCRIPTION := An incremental parsing system for programming tools
|
||||||
HOMEPAGE_URL := https://tree-sitter.github.io/tree-sitter/
|
HOMEPAGE_URL := https://tree-sitter.github.io/tree-sitter/
|
||||||
|
|
||||||
|
|
@ -6,7 +10,6 @@ HOMEPAGE_URL := https://tree-sitter.github.io/tree-sitter/
|
||||||
PREFIX ?= /usr/local
|
PREFIX ?= /usr/local
|
||||||
INCLUDEDIR ?= $(PREFIX)/include
|
INCLUDEDIR ?= $(PREFIX)/include
|
||||||
LIBDIR ?= $(PREFIX)/lib
|
LIBDIR ?= $(PREFIX)/lib
|
||||||
BINDIR ?= $(PREFIX)/bin
|
|
||||||
PCLIBDIR ?= $(LIBDIR)/pkgconfig
|
PCLIBDIR ?= $(LIBDIR)/pkgconfig
|
||||||
|
|
||||||
# collect sources
|
# collect sources
|
||||||
|
|
@ -24,7 +27,7 @@ OBJ := $(SRC:.c=.o)
|
||||||
ARFLAGS := rcs
|
ARFLAGS := rcs
|
||||||
CFLAGS ?= -O3 -Wall -Wextra -Wshadow -Wpedantic -Werror=incompatible-pointer-types
|
CFLAGS ?= -O3 -Wall -Wextra -Wshadow -Wpedantic -Werror=incompatible-pointer-types
|
||||||
override CFLAGS += -std=c11 -fPIC -fvisibility=hidden
|
override CFLAGS += -std=c11 -fPIC -fvisibility=hidden
|
||||||
override CFLAGS += -D_POSIX_C_SOURCE=200112L -D_DEFAULT_SOURCE -D_BSD_SOURCE -D_DARWIN_C_SOURCE
|
override CFLAGS += -D_POSIX_C_SOURCE=200112L -D_DEFAULT_SOURCE
|
||||||
override CFLAGS += -Ilib/src -Ilib/src/wasm -Ilib/include
|
override CFLAGS += -Ilib/src -Ilib/src/wasm -Ilib/include
|
||||||
|
|
||||||
# ABI versioning
|
# ABI versioning
|
||||||
|
|
@ -32,25 +35,20 @@ SONAME_MAJOR := $(word 1,$(subst ., ,$(VERSION)))
|
||||||
SONAME_MINOR := $(word 2,$(subst ., ,$(VERSION)))
|
SONAME_MINOR := $(word 2,$(subst ., ,$(VERSION)))
|
||||||
|
|
||||||
# OS-specific bits
|
# OS-specific bits
|
||||||
MACHINE := $(shell $(CC) -dumpmachine)
|
ifneq ($(findstring darwin,$(shell $(CC) -dumpmachine)),)
|
||||||
|
|
||||||
ifneq ($(findstring darwin,$(MACHINE)),)
|
|
||||||
SOEXT = dylib
|
SOEXT = dylib
|
||||||
SOEXTVER_MAJOR = $(SONAME_MAJOR).$(SOEXT)
|
SOEXTVER_MAJOR = $(SONAME_MAJOR).$(SOEXT)
|
||||||
SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).$(SOEXT)
|
SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).$(SOEXT)
|
||||||
LINKSHARED += -dynamiclib -Wl,-install_name,$(LIBDIR)/libtree-sitter.$(SOEXTVER)
|
LINKSHARED += -dynamiclib -Wl,-install_name,$(LIBDIR)/libtree-sitter.$(SOEXTVER)
|
||||||
else ifneq ($(findstring mingw32,$(MACHINE)),)
|
|
||||||
SOEXT = dll
|
|
||||||
LINKSHARED += -s -shared -Wl,--out-implib,libtree-sitter.dll.a
|
|
||||||
else
|
else
|
||||||
SOEXT = so
|
SOEXT = so
|
||||||
SOEXTVER_MAJOR = $(SOEXT).$(SONAME_MAJOR)
|
SOEXTVER_MAJOR = $(SOEXT).$(SONAME_MAJOR)
|
||||||
SOEXTVER = $(SOEXT).$(SONAME_MAJOR).$(SONAME_MINOR)
|
SOEXTVER = $(SOEXT).$(SONAME_MAJOR).$(SONAME_MINOR)
|
||||||
LINKSHARED += -shared -Wl,-soname,libtree-sitter.$(SOEXTVER)
|
LINKSHARED += -shared -Wl,-soname,libtree-sitter.$(SOEXTVER)
|
||||||
|
endif
|
||||||
ifneq ($(filter $(shell uname),FreeBSD NetBSD DragonFly),)
|
ifneq ($(filter $(shell uname),FreeBSD NetBSD DragonFly),)
|
||||||
PCLIBDIR := $(PREFIX)/libdata/pkgconfig
|
PCLIBDIR := $(PREFIX)/libdata/pkgconfig
|
||||||
endif
|
endif
|
||||||
endif
|
|
||||||
|
|
||||||
all: libtree-sitter.a libtree-sitter.$(SOEXT) tree-sitter.pc
|
all: libtree-sitter.a libtree-sitter.$(SOEXT) tree-sitter.pc
|
||||||
|
|
||||||
|
|
@ -63,10 +61,6 @@ ifneq ($(STRIP),)
|
||||||
$(STRIP) $@
|
$(STRIP) $@
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifneq ($(findstring mingw32,$(MACHINE)),)
|
|
||||||
libtree-sitter.dll.a: libtree-sitter.$(SOEXT)
|
|
||||||
endif
|
|
||||||
|
|
||||||
tree-sitter.pc: lib/tree-sitter.pc.in
|
tree-sitter.pc: lib/tree-sitter.pc.in
|
||||||
sed -e 's|@PROJECT_VERSION@|$(VERSION)|' \
|
sed -e 's|@PROJECT_VERSION@|$(VERSION)|' \
|
||||||
-e 's|@CMAKE_INSTALL_LIBDIR@|$(LIBDIR:$(PREFIX)/%=%)|' \
|
-e 's|@CMAKE_INSTALL_LIBDIR@|$(LIBDIR:$(PREFIX)/%=%)|' \
|
||||||
|
|
@ -75,27 +69,17 @@ tree-sitter.pc: lib/tree-sitter.pc.in
|
||||||
-e 's|@PROJECT_HOMEPAGE_URL@|$(HOMEPAGE_URL)|' \
|
-e 's|@PROJECT_HOMEPAGE_URL@|$(HOMEPAGE_URL)|' \
|
||||||
-e 's|@CMAKE_INSTALL_PREFIX@|$(PREFIX)|' $< > $@
|
-e 's|@CMAKE_INSTALL_PREFIX@|$(PREFIX)|' $< > $@
|
||||||
|
|
||||||
shared: libtree-sitter.$(SOEXT)
|
|
||||||
|
|
||||||
static: libtree-sitter.a
|
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
$(RM) $(OBJ) tree-sitter.pc libtree-sitter.a libtree-sitter.$(SOEXT) libtree-stitter.dll.a
|
$(RM) $(OBJ) tree-sitter.pc libtree-sitter.a libtree-sitter.$(SOEXT)
|
||||||
|
|
||||||
install: all
|
install: all
|
||||||
install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter '$(DESTDIR)$(PCLIBDIR)' '$(DESTDIR)$(LIBDIR)'
|
install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter '$(DESTDIR)$(PCLIBDIR)' '$(DESTDIR)$(LIBDIR)'
|
||||||
install -m644 lib/include/tree_sitter/api.h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/api.h
|
install -m644 lib/include/tree_sitter/api.h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/api.h
|
||||||
install -m644 tree-sitter.pc '$(DESTDIR)$(PCLIBDIR)'/tree-sitter.pc
|
install -m644 tree-sitter.pc '$(DESTDIR)$(PCLIBDIR)'/tree-sitter.pc
|
||||||
install -m644 libtree-sitter.a '$(DESTDIR)$(LIBDIR)'/libtree-sitter.a
|
install -m644 libtree-sitter.a '$(DESTDIR)$(LIBDIR)'/libtree-sitter.a
|
||||||
ifneq ($(findstring mingw32,$(MACHINE)),)
|
|
||||||
install -d '$(DESTDIR)$(BINDIR)'
|
|
||||||
install -m755 libtree-sitter.dll '$(DESTDIR)$(BINDIR)'/libtree-sitter.dll
|
|
||||||
install -m755 libtree-sitter.dll.a '$(DESTDIR)$(LIBDIR)'/libtree-sitter.dll.a
|
|
||||||
else
|
|
||||||
install -m755 libtree-sitter.$(SOEXT) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER)
|
install -m755 libtree-sitter.$(SOEXT) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER)
|
||||||
cd '$(DESTDIR)$(LIBDIR)' && ln -sf libtree-sitter.$(SOEXTVER) libtree-sitter.$(SOEXTVER_MAJOR)
|
ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER_MAJOR)
|
||||||
cd '$(DESTDIR)$(LIBDIR)' && ln -sf libtree-sitter.$(SOEXTVER_MAJOR) libtree-sitter.$(SOEXT)
|
ln -sf libtree-sitter.$(SOEXTVER_MAJOR) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXT)
|
||||||
endif
|
|
||||||
|
|
||||||
uninstall:
|
uninstall:
|
||||||
$(RM) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.a \
|
$(RM) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.a \
|
||||||
|
|
@ -104,9 +88,8 @@ uninstall:
|
||||||
'$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXT) \
|
'$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXT) \
|
||||||
'$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/api.h \
|
'$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/api.h \
|
||||||
'$(DESTDIR)$(PCLIBDIR)'/tree-sitter.pc
|
'$(DESTDIR)$(PCLIBDIR)'/tree-sitter.pc
|
||||||
rmdir '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter
|
|
||||||
|
|
||||||
.PHONY: all shared static install uninstall clean
|
.PHONY: all install uninstall clean
|
||||||
|
|
||||||
|
|
||||||
##### Dev targets #####
|
##### Dev targets #####
|
||||||
|
|
@ -116,24 +99,20 @@ test:
|
||||||
cargo xtask generate-fixtures
|
cargo xtask generate-fixtures
|
||||||
cargo xtask test
|
cargo xtask test
|
||||||
|
|
||||||
test-wasm:
|
test_wasm:
|
||||||
cargo xtask generate-fixtures --wasm
|
cargo xtask generate-fixtures-wasm
|
||||||
cargo xtask test-wasm
|
cargo xtask test-wasm
|
||||||
|
|
||||||
lint:
|
lint:
|
||||||
cargo update --workspace --locked --quiet
|
cargo update --workspace --locked --quiet
|
||||||
cargo check --workspace --all-targets
|
cargo check --workspace --all-targets
|
||||||
cargo fmt --all --check
|
cargo +nightly fmt --all --check
|
||||||
cargo clippy --workspace --all-targets -- -D warnings
|
cargo +nightly clippy --workspace --all-targets -- -D warnings
|
||||||
|
|
||||||
lint-web:
|
|
||||||
npm --prefix lib/binding_web ci
|
|
||||||
npm --prefix lib/binding_web run lint
|
|
||||||
|
|
||||||
format:
|
format:
|
||||||
cargo fmt --all
|
cargo +nightly fmt --all
|
||||||
|
|
||||||
changelog:
|
changelog:
|
||||||
@git-cliff --config .github/cliff.toml --prepend CHANGELOG.md --latest --github-token $(shell gh auth token)
|
@git-cliff --config .github/cliff.toml --prepend CHANGELOG.md --latest --github-token $(shell gh auth token)
|
||||||
|
|
||||||
.PHONY: test test-wasm lint format changelog
|
.PHONY: test test_wasm lint format changelog
|
||||||
|
|
|
||||||
|
|
@ -14,21 +14,11 @@ let package = Package(
|
||||||
targets: [
|
targets: [
|
||||||
.target(name: "TreeSitter",
|
.target(name: "TreeSitter",
|
||||||
path: "lib",
|
path: "lib",
|
||||||
exclude: [
|
sources: ["src/lib.c"],
|
||||||
"src/unicode/ICU_SHA",
|
|
||||||
"src/unicode/README.md",
|
|
||||||
"src/unicode/LICENSE",
|
|
||||||
"src/wasm/stdlib-symbols.txt",
|
|
||||||
"src/lib.c",
|
|
||||||
],
|
|
||||||
sources: ["src"],
|
|
||||||
publicHeadersPath: "include",
|
|
||||||
cSettings: [
|
cSettings: [
|
||||||
.headerSearchPath("src"),
|
.headerSearchPath("src"),
|
||||||
.define("_POSIX_C_SOURCE", to: "200112L"),
|
.define("_POSIX_C_SOURCE", to: "200112L"),
|
||||||
.define("_DEFAULT_SOURCE"),
|
.define("_DEFAULT_SOURCE"),
|
||||||
.define("_BSD_SOURCE"),
|
|
||||||
.define("_DARWIN_C_SOURCE"),
|
|
||||||
]),
|
]),
|
||||||
],
|
],
|
||||||
cLanguageStandard: .c11
|
cLanguageStandard: .c11
|
||||||
|
|
|
||||||
|
|
@ -14,8 +14,8 @@ Tree-sitter is a parser generator tool and an incremental parsing library. It ca
|
||||||
## Links
|
## Links
|
||||||
- [Documentation](https://tree-sitter.github.io)
|
- [Documentation](https://tree-sitter.github.io)
|
||||||
- [Rust binding](lib/binding_rust/README.md)
|
- [Rust binding](lib/binding_rust/README.md)
|
||||||
- [Wasm binding](lib/binding_web/README.md)
|
- [WASM binding](lib/binding_web/README.md)
|
||||||
- [Command-line interface](crates/cli/README.md)
|
- [Command-line interface](cli/README.md)
|
||||||
|
|
||||||
[discord]: https://img.shields.io/discord/1063097320771698699?logo=discord&label=discord
|
[discord]: https://img.shields.io/discord/1063097320771698699?logo=discord&label=discord
|
||||||
[matrix]: https://img.shields.io/matrix/tree-sitter-chat%3Amatrix.org?logo=matrix&label=matrix
|
[matrix]: https://img.shields.io/matrix/tree-sitter-chat%3Amatrix.org?logo=matrix&label=matrix
|
||||||
|
|
|
||||||
218
build.zig
218
build.zig
|
|
@ -1,142 +1,116 @@
|
||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
|
|
||||||
pub fn build(b: *std.Build) !void {
|
pub fn build(b: *std.Build) !void {
|
||||||
const target = b.standardTargetOptions(.{});
|
const target = b.standardTargetOptions(.{});
|
||||||
const optimize = b.standardOptimizeOption(.{});
|
const optimize = b.standardOptimizeOption(.{});
|
||||||
|
|
||||||
const wasm = b.option(bool, "enable-wasm", "Enable Wasm support") orelse false;
|
const wasm = b.option(bool, "enable-wasm", "Enable Wasm support") orelse false;
|
||||||
const shared = b.option(bool, "build-shared", "Build a shared library") orelse false;
|
const shared = b.option(bool, "build-shared", "Build a shared library") orelse false;
|
||||||
const amalgamated = b.option(bool, "amalgamated", "Build using an amalgamated source") orelse false;
|
const amalgamated = b.option(bool, "amalgamated", "Build using an amalgamated source") orelse false;
|
||||||
|
|
||||||
const lib: *std.Build.Step.Compile = b.addLibrary(.{
|
const lib: *std.Build.Step.Compile = if (!shared) b.addStaticLibrary(.{
|
||||||
.name = "tree-sitter",
|
.name = "tree-sitter",
|
||||||
.linkage = if (shared) .dynamic else .static,
|
.target = target,
|
||||||
.root_module = b.createModule(.{
|
.optimize = optimize,
|
||||||
.target = target,
|
.link_libc = true,
|
||||||
.optimize = optimize,
|
}) else b.addSharedLibrary(.{
|
||||||
.link_libc = true,
|
.name = "tree-sitter",
|
||||||
.pic = if (shared) true else null,
|
.pic = true,
|
||||||
}),
|
.target = target,
|
||||||
|
.optimize = optimize,
|
||||||
|
.link_libc = true,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (amalgamated) {
|
||||||
|
lib.addCSourceFile(.{
|
||||||
|
.file = b.path("lib/src/lib.c"),
|
||||||
|
.flags = &.{"-std=c11"},
|
||||||
});
|
});
|
||||||
|
} else {
|
||||||
|
lib.addCSourceFiles(.{
|
||||||
|
.root = b.path("lib/src"),
|
||||||
|
.files = try findSourceFiles(b),
|
||||||
|
.flags = &.{"-std=c11"},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
if (amalgamated) {
|
lib.addIncludePath(b.path("lib/include"));
|
||||||
lib.addCSourceFile(.{
|
lib.addIncludePath(b.path("lib/src"));
|
||||||
.file = b.path("lib/src/lib.c"),
|
lib.addIncludePath(b.path("lib/src/wasm"));
|
||||||
.flags = &.{"-std=c11"},
|
|
||||||
});
|
lib.root_module.addCMacro("_POSIX_C_SOURCE", "200112L");
|
||||||
} else {
|
lib.root_module.addCMacro("_DEFAULT_SOURCE", "");
|
||||||
const files = try findSourceFiles(b);
|
|
||||||
defer b.allocator.free(files);
|
if (wasm) {
|
||||||
lib.addCSourceFiles(.{
|
if (b.lazyDependency(wasmtimeDep(target.result), .{})) |wasmtime| {
|
||||||
.root = b.path("lib/src"),
|
lib.root_module.addCMacro("TREE_SITTER_FEATURE_WASM", "");
|
||||||
.files = files,
|
lib.addSystemIncludePath(wasmtime.path("include"));
|
||||||
.flags = &.{"-std=c11"},
|
lib.addLibraryPath(wasmtime.path("lib"));
|
||||||
});
|
lib.linkSystemLibrary("wasmtime");
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
lib.addIncludePath(b.path("lib/include"));
|
lib.installHeadersDirectory(b.path("lib/include"), ".", .{});
|
||||||
lib.addIncludePath(b.path("lib/src"));
|
|
||||||
lib.addIncludePath(b.path("lib/src/wasm"));
|
|
||||||
|
|
||||||
lib.root_module.addCMacro("_POSIX_C_SOURCE", "200112L");
|
b.installArtifact(lib);
|
||||||
lib.root_module.addCMacro("_DEFAULT_SOURCE", "");
|
|
||||||
lib.root_module.addCMacro("_BSD_SOURCE", "");
|
|
||||||
lib.root_module.addCMacro("_DARWIN_C_SOURCE", "");
|
|
||||||
|
|
||||||
if (wasm) {
|
|
||||||
if (b.lazyDependency(wasmtimeDep(target.result), .{})) |wasmtime| {
|
|
||||||
lib.root_module.addCMacro("TREE_SITTER_FEATURE_WASM", "");
|
|
||||||
lib.addSystemIncludePath(wasmtime.path("include"));
|
|
||||||
lib.addLibraryPath(wasmtime.path("lib"));
|
|
||||||
if (shared) lib.linkSystemLibrary("wasmtime");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
lib.installHeadersDirectory(b.path("lib/include"), ".", .{});
|
|
||||||
|
|
||||||
b.installArtifact(lib);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the name of the wasmtime dependency for this target.
|
fn wasmtimeDep(target: std.Target) []const u8 {
|
||||||
pub fn wasmtimeDep(target: std.Target) []const u8 {
|
const arch = target.cpu.arch;
|
||||||
const arch = target.cpu.arch;
|
const os = target.os.tag;
|
||||||
const os = target.os.tag;
|
const abi = target.abi;
|
||||||
const abi = target.abi;
|
return switch (os) {
|
||||||
return @as(?[]const u8, switch (os) {
|
.linux => switch (arch) {
|
||||||
.linux => switch (arch) {
|
.x86_64 => switch (abi) {
|
||||||
.x86_64 => switch (abi) {
|
.gnu => "wasmtime_c_api_x86_64_linux",
|
||||||
.gnu => "wasmtime_c_api_x86_64_linux",
|
.musl => "wasmtime_c_api_x86_64_musl",
|
||||||
.musl => "wasmtime_c_api_x86_64_musl",
|
.android => "wasmtime_c_api_x86_64_android",
|
||||||
.android => "wasmtime_c_api_x86_64_android",
|
else => null
|
||||||
else => null,
|
},
|
||||||
},
|
.aarch64 => switch (abi) {
|
||||||
.aarch64 => switch (abi) {
|
.gnu => "wasmtime_c_api_aarch64_linux",
|
||||||
.gnu => "wasmtime_c_api_aarch64_linux",
|
.android => "wasmtime_c_api_aarch64_android",
|
||||||
.musl => "wasmtime_c_api_aarch64_musl",
|
else => null
|
||||||
.android => "wasmtime_c_api_aarch64_android",
|
},
|
||||||
else => null,
|
.s390x => "wasmtime_c_api_s390x_linux",
|
||||||
},
|
.riscv64 => "wasmtime_c_api_riscv64gc_linux",
|
||||||
.x86 => switch (abi) {
|
else => null
|
||||||
.gnu => "wasmtime_c_api_i686_linux",
|
},
|
||||||
else => null,
|
.windows => switch (arch) {
|
||||||
},
|
.x86_64 => switch (abi) {
|
||||||
.arm => switch (abi) {
|
.gnu => "wasmtime_c_api_x86_64_mingw",
|
||||||
.gnueabi => "wasmtime_c_api_armv7_linux",
|
.msvc => "wasmtime_c_api_x86_64_windows",
|
||||||
else => null,
|
else => null
|
||||||
},
|
},
|
||||||
.s390x => switch (abi) {
|
else => null
|
||||||
.gnu => "wasmtime_c_api_s390x_linux",
|
},
|
||||||
else => null,
|
.macos => switch (arch) {
|
||||||
},
|
.x86_64 => "wasmtime_c_api_x86_64_macos",
|
||||||
.riscv64 => switch (abi) {
|
.aarch64 => "wasmtime_c_api_aarch64_macos",
|
||||||
.gnu => "wasmtime_c_api_riscv64gc_linux",
|
else => null
|
||||||
else => null,
|
},
|
||||||
},
|
else => null
|
||||||
else => null,
|
} orelse std.debug.panic(
|
||||||
},
|
"Unsupported target for wasmtime: {s}-{s}-{s}",
|
||||||
.windows => switch (arch) {
|
.{ @tagName(arch), @tagName(os), @tagName(abi) }
|
||||||
.x86_64 => switch (abi) {
|
);
|
||||||
.gnu => "wasmtime_c_api_x86_64_mingw",
|
|
||||||
.msvc => "wasmtime_c_api_x86_64_windows",
|
|
||||||
else => null,
|
|
||||||
},
|
|
||||||
.aarch64 => switch (abi) {
|
|
||||||
.msvc => "wasmtime_c_api_aarch64_windows",
|
|
||||||
else => null,
|
|
||||||
},
|
|
||||||
.x86 => switch (abi) {
|
|
||||||
.msvc => "wasmtime_c_api_i686_windows",
|
|
||||||
else => null,
|
|
||||||
},
|
|
||||||
else => null,
|
|
||||||
},
|
|
||||||
.macos => switch (arch) {
|
|
||||||
.x86_64 => "wasmtime_c_api_x86_64_macos",
|
|
||||||
.aarch64 => "wasmtime_c_api_aarch64_macos",
|
|
||||||
else => null,
|
|
||||||
},
|
|
||||||
else => null,
|
|
||||||
}) orelse std.debug.panic(
|
|
||||||
"Unsupported target for wasmtime: {s}-{s}-{s}",
|
|
||||||
.{ @tagName(arch), @tagName(os), @tagName(abi) },
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn findSourceFiles(b: *std.Build) ![]const []const u8 {
|
fn findSourceFiles(b: *std.Build) ![]const []const u8 {
|
||||||
var sources: std.ArrayListUnmanaged([]const u8) = .empty;
|
var sources = std.ArrayList([]const u8).init(b.allocator);
|
||||||
|
|
||||||
var dir = try b.build_root.handle.openDir("lib/src", .{ .iterate = true });
|
var dir = try b.build_root.handle.openDir("lib/src", .{ .iterate = true });
|
||||||
var iter = dir.iterate();
|
var iter = dir.iterate();
|
||||||
defer dir.close();
|
defer dir.close();
|
||||||
|
|
||||||
while (try iter.next()) |entry| {
|
while (try iter.next()) |entry| {
|
||||||
if (entry.kind != .file) continue;
|
if (entry.kind != .file) continue;
|
||||||
const file = entry.name;
|
const file = entry.name;
|
||||||
const ext = std.fs.path.extension(file);
|
const ext = std.fs.path.extension(file);
|
||||||
if (std.mem.eql(u8, ext, ".c") and !std.mem.eql(u8, file, "lib.c")) {
|
if (std.mem.eql(u8, ext, ".c") and !std.mem.eql(u8, file, "lib.c")) {
|
||||||
try sources.append(b.allocator, b.dupe(file));
|
try sources.append(b.dupe(file));
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return sources.toOwnedSlice(b.allocator);
|
return sources.items;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
157
build.zig.zon
157
build.zig.zon
|
|
@ -1,96 +1,69 @@
|
||||||
.{
|
.{
|
||||||
.name = .tree_sitter,
|
.name = "tree-sitter",
|
||||||
.fingerprint = 0x841224b447ac0d4f,
|
.version = "0.25.0",
|
||||||
.version = "0.27.0",
|
.paths = .{
|
||||||
.minimum_zig_version = "0.14.1",
|
"build.zig",
|
||||||
.paths = .{
|
"build.zig.zon",
|
||||||
"build.zig",
|
"lib/src",
|
||||||
"build.zig.zon",
|
"lib/include",
|
||||||
"lib/src",
|
"README.md",
|
||||||
"lib/include",
|
"LICENSE",
|
||||||
"README.md",
|
},
|
||||||
"LICENSE",
|
.dependencies = .{
|
||||||
|
.wasmtime_c_api_aarch64_android = .{
|
||||||
|
.url = "https://github.com/bytecodealliance/wasmtime/releases/download/v26.0.1/wasmtime-v26.0.1-aarch64-android-c-api.tar.xz",
|
||||||
|
.hash = "12208b1c6fc26df81b3bf6b82ba38a2099bcbfb3eea21b93c9cca797d8f0067d891f",
|
||||||
|
.lazy = true,
|
||||||
},
|
},
|
||||||
.dependencies = .{
|
.wasmtime_c_api_aarch64_linux = .{
|
||||||
.wasmtime_c_api_aarch64_android = .{
|
.url = "https://github.com/bytecodealliance/wasmtime/releases/download/v26.0.1/wasmtime-v26.0.1-aarch64-linux-c-api.tar.xz",
|
||||||
.url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-aarch64-android-c-api.tar.xz",
|
.hash = "12209aaa1bd480ad8674b8d9cc89300e8b045f0fc626938b64158a09e87597705a45",
|
||||||
.hash = "N-V-__8AAIfPIgdw2YnV3QyiFQ2NHdrxrXzzCdjYJyxJDOta",
|
.lazy = true,
|
||||||
.lazy = true,
|
|
||||||
},
|
|
||||||
.wasmtime_c_api_aarch64_linux = .{
|
|
||||||
.url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-aarch64-linux-c-api.tar.xz",
|
|
||||||
.hash = "N-V-__8AAIt97QZi7Pf7nNJ2mVY6uxA80Klyuvvtop3pLMRK",
|
|
||||||
.lazy = true,
|
|
||||||
},
|
|
||||||
.wasmtime_c_api_aarch64_macos = .{
|
|
||||||
.url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-aarch64-macos-c-api.tar.xz",
|
|
||||||
.hash = "N-V-__8AAAO48QQf91w9RmmUDHTja8DrXZA1n6Bmc8waW3qe",
|
|
||||||
.lazy = true,
|
|
||||||
},
|
|
||||||
.wasmtime_c_api_aarch64_musl = .{
|
|
||||||
.url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-aarch64-musl-c-api.tar.xz",
|
|
||||||
.hash = "N-V-__8AAI196wa9pwADoA2RbCDp5F7bKQg1iOPq6gIh8-FH",
|
|
||||||
.lazy = true,
|
|
||||||
},
|
|
||||||
.wasmtime_c_api_aarch64_windows = .{
|
|
||||||
.url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-aarch64-windows-c-api.zip",
|
|
||||||
.hash = "N-V-__8AAC9u4wXfqd1Q6XyQaC8_DbQZClXux60Vu5743N05",
|
|
||||||
.lazy = true,
|
|
||||||
},
|
|
||||||
.wasmtime_c_api_armv7_linux = .{
|
|
||||||
.url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-armv7-linux-c-api.tar.xz",
|
|
||||||
.hash = "N-V-__8AAHXe8gWs3s83Cc5G6SIq0_jWxj8fGTT5xG4vb6-x",
|
|
||||||
.lazy = true,
|
|
||||||
},
|
|
||||||
.wasmtime_c_api_i686_linux = .{
|
|
||||||
.url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-i686-linux-c-api.tar.xz",
|
|
||||||
.hash = "N-V-__8AAN2pzgUUfulRCYnipSfis9IIYHoTHVlieLRmKuct",
|
|
||||||
.lazy = true,
|
|
||||||
},
|
|
||||||
.wasmtime_c_api_i686_windows = .{
|
|
||||||
.url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-i686-windows-c-api.zip",
|
|
||||||
.hash = "N-V-__8AAJu0YAUUTFBLxFIOi-MSQVezA6MMkpoFtuaf2Quf",
|
|
||||||
.lazy = true,
|
|
||||||
},
|
|
||||||
.wasmtime_c_api_riscv64gc_linux = .{
|
|
||||||
.url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-riscv64gc-linux-c-api.tar.xz",
|
|
||||||
.hash = "N-V-__8AAG8m-gc3E3AIImtTZ3l1c7HC6HUWazQ9OH5KACX4",
|
|
||||||
.lazy = true,
|
|
||||||
},
|
|
||||||
.wasmtime_c_api_s390x_linux = .{
|
|
||||||
.url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-s390x-linux-c-api.tar.xz",
|
|
||||||
.hash = "N-V-__8AAH314gd-gE4IBp2uvAL3gHeuW1uUZjMiLLeUdXL_",
|
|
||||||
.lazy = true,
|
|
||||||
},
|
|
||||||
.wasmtime_c_api_x86_64_android = .{
|
|
||||||
.url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-x86_64-android-c-api.tar.xz",
|
|
||||||
.hash = "N-V-__8AAIPNRwfNkznebrcGb0IKUe7f35bkuZEYOjcx6q3f",
|
|
||||||
.lazy = true,
|
|
||||||
},
|
|
||||||
.wasmtime_c_api_x86_64_linux = .{
|
|
||||||
.url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-x86_64-linux-c-api.tar.xz",
|
|
||||||
.hash = "N-V-__8AAI8EDwcyTtk_Afhk47SEaqfpoRqGkJeZpGs69ChF",
|
|
||||||
.lazy = true,
|
|
||||||
},
|
|
||||||
.wasmtime_c_api_x86_64_macos = .{
|
|
||||||
.url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-x86_64-macos-c-api.tar.xz",
|
|
||||||
.hash = "N-V-__8AAGtGNgVaOpHSxC22IjrampbRIy6lLwscdcAE8nG1",
|
|
||||||
.lazy = true,
|
|
||||||
},
|
|
||||||
.wasmtime_c_api_x86_64_mingw = .{
|
|
||||||
.url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-x86_64-mingw-c-api.zip",
|
|
||||||
.hash = "N-V-__8AAPS2PAbVix50L6lnddlgazCPTz3whLUFk1qnRtnZ",
|
|
||||||
.lazy = true,
|
|
||||||
},
|
|
||||||
.wasmtime_c_api_x86_64_musl = .{
|
|
||||||
.url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-x86_64-musl-c-api.tar.xz",
|
|
||||||
.hash = "N-V-__8AAF-WEQe0nzvi09PgusM5i46FIuCKJmIDWUleWgQ3",
|
|
||||||
.lazy = true,
|
|
||||||
},
|
|
||||||
.wasmtime_c_api_x86_64_windows = .{
|
|
||||||
.url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-x86_64-windows-c-api.zip",
|
|
||||||
.hash = "N-V-__8AAKGNXwbpJQsn0_6kwSIVDDWifSg8cBzf7T2RzsC9",
|
|
||||||
.lazy = true,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
|
.wasmtime_c_api_aarch64_macos = .{
|
||||||
|
.url = "https://github.com/bytecodealliance/wasmtime/releases/download/v26.0.1/wasmtime-v26.0.1-aarch64-macos-c-api.tar.xz",
|
||||||
|
.hash = "12206de8f3ce815b0cd9fd735fc61ac73f338e7601e973916b06ae050b4fa7118baf",
|
||||||
|
.lazy = true,
|
||||||
|
},
|
||||||
|
.wasmtime_c_api_riscv64gc_linux = .{
|
||||||
|
.url = "https://github.com/bytecodealliance/wasmtime/releases/download/v26.0.1/wasmtime-v26.0.1-riscv64gc-linux-c-api.tar.xz",
|
||||||
|
.hash = "122005e52855c8be82f574b6f35c1e2f5bc6d74ec1e12f16852654e4edd6ac7e2fc1",
|
||||||
|
.lazy = true,
|
||||||
|
},
|
||||||
|
.wasmtime_c_api_s390x_linux = .{
|
||||||
|
.url = "https://github.com/bytecodealliance/wasmtime/releases/download/v26.0.1/wasmtime-v26.0.1-s390x-linux-c-api.tar.xz",
|
||||||
|
.hash = "1220a4643445f5e67daffe6473c8e68267682aa92e4d612355b7ac6d46be41d8511e",
|
||||||
|
.lazy = true,
|
||||||
|
},
|
||||||
|
.wasmtime_c_api_x86_64_android = .{
|
||||||
|
.url = "https://github.com/bytecodealliance/wasmtime/releases/download/v26.0.1/wasmtime-v26.0.1-x86_64-android-c-api.tar.xz",
|
||||||
|
.hash = "122082a6f5db4787a639d8fa587087d3452aa53a92137fef701dfd2be4d62a70102f",
|
||||||
|
.lazy = true,
|
||||||
|
},
|
||||||
|
.wasmtime_c_api_x86_64_linux = .{
|
||||||
|
.url = "https://github.com/bytecodealliance/wasmtime/releases/download/v26.0.1/wasmtime-v26.0.1-x86_64-linux-c-api.tar.xz",
|
||||||
|
.hash = "12201e8daa6057abd4ce5d25d29a053f4be66a81b695f32f65a14f999bf075ddc0f2",
|
||||||
|
.lazy = true,
|
||||||
|
},
|
||||||
|
.wasmtime_c_api_x86_64_macos = .{
|
||||||
|
.url = "https://github.com/bytecodealliance/wasmtime/releases/download/v26.0.1/wasmtime-v26.0.1-x86_64-macos-c-api.tar.xz",
|
||||||
|
.hash = "122063a6a6811cf6a3ae6838a61abb66ff4c348447c657a5ed2348c0d310efc2edbb",
|
||||||
|
.lazy = true,
|
||||||
|
},
|
||||||
|
.wasmtime_c_api_x86_64_mingw = .{
|
||||||
|
.url = "https://github.com/bytecodealliance/wasmtime/releases/download/v26.0.1/wasmtime-v26.0.1-x86_64-mingw-c-api.zip",
|
||||||
|
.hash = "1220bdd5c3711af386ca07795c7ee8917f58365b0bb6b95255424aa86e08a7fcb4fa",
|
||||||
|
.lazy = true,
|
||||||
|
},
|
||||||
|
.wasmtime_c_api_x86_64_musl = .{
|
||||||
|
.url = "https://github.com/bytecodealliance/wasmtime/releases/download/v26.0.1/wasmtime-v26.0.1-x86_64-musl-c-api.tar.xz",
|
||||||
|
.hash = "12200037419e1a5f8a529d42e0ec289919dc5baf06981bc98295e61df4976563566d",
|
||||||
|
.lazy = true,
|
||||||
|
},
|
||||||
|
.wasmtime_c_api_x86_64_windows = .{
|
||||||
|
.url = "https://github.com/bytecodealliance/wasmtime/releases/download/v26.0.1/wasmtime-v26.0.1-x86_64-windows-c-api.zip",
|
||||||
|
.hash = "122069341103b7d16b1f47c3bb96101614af0845ba63a0664e5cc857e9feb369a772",
|
||||||
|
.lazy = true,
|
||||||
|
},
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -8,18 +8,13 @@ rust-version.workspace = true
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
homepage.workspace = true
|
homepage.workspace = true
|
||||||
repository.workspace = true
|
repository.workspace = true
|
||||||
documentation = "https://docs.rs/tree-sitter-cli"
|
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
keywords.workspace = true
|
keywords.workspace = true
|
||||||
categories.workspace = true
|
categories.workspace = true
|
||||||
include = ["build.rs", "README.md", "LICENSE", "benches/*", "src/**"]
|
|
||||||
|
|
||||||
[lints]
|
[lints]
|
||||||
workspace = true
|
workspace = true
|
||||||
|
|
||||||
[lib]
|
|
||||||
path = "src/tree_sitter_cli.rs"
|
|
||||||
|
|
||||||
[[bin]]
|
[[bin]]
|
||||||
name = "tree-sitter"
|
name = "tree-sitter"
|
||||||
path = "src/main.rs"
|
path = "src/main.rs"
|
||||||
|
|
@ -30,38 +25,40 @@ name = "benchmark"
|
||||||
harness = false
|
harness = false
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["qjs-rt"]
|
|
||||||
wasm = ["tree-sitter/wasm", "tree-sitter-loader/wasm"]
|
wasm = ["tree-sitter/wasm", "tree-sitter-loader/wasm"]
|
||||||
qjs-rt = ["tree-sitter-generate/qjs-rt"]
|
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
ansi_colours.workspace = true
|
|
||||||
anstyle.workspace = true
|
anstyle.workspace = true
|
||||||
anyhow.workspace = true
|
anyhow.workspace = true
|
||||||
bstr.workspace = true
|
bstr.workspace = true
|
||||||
clap.workspace = true
|
clap.workspace = true
|
||||||
clap_complete.workspace = true
|
clap_complete.workspace = true
|
||||||
clap_complete_nushell.workspace = true
|
|
||||||
crc32fast.workspace = true
|
|
||||||
ctor.workspace = true
|
ctor.workspace = true
|
||||||
ctrlc.workspace = true
|
ctrlc.workspace = true
|
||||||
dialoguer.workspace = true
|
dialoguer.workspace = true
|
||||||
|
dirs.workspace = true
|
||||||
|
filetime.workspace = true
|
||||||
glob.workspace = true
|
glob.workspace = true
|
||||||
heck.workspace = true
|
heck.workspace = true
|
||||||
html-escape.workspace = true
|
html-escape.workspace = true
|
||||||
|
indexmap.workspace = true
|
||||||
indoc.workspace = true
|
indoc.workspace = true
|
||||||
|
lazy_static.workspace = true
|
||||||
log.workspace = true
|
log.workspace = true
|
||||||
memchr.workspace = true
|
memchr.workspace = true
|
||||||
rand.workspace = true
|
rand.workspace = true
|
||||||
regex.workspace = true
|
regex.workspace = true
|
||||||
schemars.workspace = true
|
regex-syntax.workspace = true
|
||||||
|
rustc-hash.workspace = true
|
||||||
semver.workspace = true
|
semver.workspace = true
|
||||||
serde.workspace = true
|
serde.workspace = true
|
||||||
|
serde_derive.workspace = true
|
||||||
serde_json.workspace = true
|
serde_json.workspace = true
|
||||||
similar.workspace = true
|
similar.workspace = true
|
||||||
|
smallbitvec.workspace = true
|
||||||
streaming-iterator.workspace = true
|
streaming-iterator.workspace = true
|
||||||
thiserror.workspace = true
|
|
||||||
tiny_http.workspace = true
|
tiny_http.workspace = true
|
||||||
|
url.workspace = true
|
||||||
walkdir.workspace = true
|
walkdir.workspace = true
|
||||||
wasmparser.workspace = true
|
wasmparser.workspace = true
|
||||||
webbrowser.workspace = true
|
webbrowser.workspace = true
|
||||||
|
|
@ -75,7 +72,7 @@ tree-sitter-tags.workspace = true
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
encoding_rs = "0.8.35"
|
encoding_rs = "0.8.35"
|
||||||
widestring = "1.2.1"
|
widestring = "1.1.0"
|
||||||
tree_sitter_proc_macro = { path = "src/tests/proc_macro", package = "tree-sitter-tests-proc-macro" }
|
tree_sitter_proc_macro = { path = "src/tests/proc_macro", package = "tree-sitter-tests-proc-macro" }
|
||||||
|
|
||||||
tempfile.workspace = true
|
tempfile.workspace = true
|
||||||
|
|
@ -7,8 +7,7 @@
|
||||||
[npmjs.com]: https://www.npmjs.org/package/tree-sitter-cli
|
[npmjs.com]: https://www.npmjs.org/package/tree-sitter-cli
|
||||||
[npmjs.com badge]: https://img.shields.io/npm/v/tree-sitter-cli.svg?color=%23BF4A4A
|
[npmjs.com badge]: https://img.shields.io/npm/v/tree-sitter-cli.svg?color=%23BF4A4A
|
||||||
|
|
||||||
The Tree-sitter CLI allows you to develop, test, and use Tree-sitter grammars from the command line. It works on `MacOS`,
|
The Tree-sitter CLI allows you to develop, test, and use Tree-sitter grammars from the command line. It works on `MacOS`, `Linux`, and `Windows`.
|
||||||
`Linux`, and `Windows`.
|
|
||||||
|
|
||||||
### Installation
|
### Installation
|
||||||
|
|
||||||
|
|
@ -35,11 +34,9 @@ The `tree-sitter` binary itself has no dependencies, but specific commands have
|
||||||
|
|
||||||
### Commands
|
### Commands
|
||||||
|
|
||||||
* `generate` - The `tree-sitter generate` command will generate a Tree-sitter parser based on the grammar in the current
|
* `generate` - The `tree-sitter generate` command will generate a Tree-sitter parser based on the grammar in the current working directory. See [the documentation] for more information.
|
||||||
working directory. See [the documentation] for more information.
|
|
||||||
|
|
||||||
* `test` - The `tree-sitter test` command will run the unit tests for the Tree-sitter parser in the current working directory.
|
* `test` - The `tree-sitter test` command will run the unit tests for the Tree-sitter parser in the current working directory. See [the documentation] for more information.
|
||||||
See [the documentation] for more information.
|
|
||||||
|
|
||||||
* `parse` - The `tree-sitter parse` command will parse a file (or list of files) using Tree-sitter parsers.
|
* `parse` - The `tree-sitter parse` command will parse a file (or list of files) using Tree-sitter parsers.
|
||||||
|
|
||||||
|
|
@ -3,77 +3,70 @@ use std::{
|
||||||
env, fs,
|
env, fs,
|
||||||
path::{Path, PathBuf},
|
path::{Path, PathBuf},
|
||||||
str,
|
str,
|
||||||
sync::LazyLock,
|
|
||||||
time::Instant,
|
time::Instant,
|
||||||
};
|
};
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use log::info;
|
use lazy_static::lazy_static;
|
||||||
use tree_sitter::{Language, Parser, Query};
|
use tree_sitter::{Language, Parser, Query};
|
||||||
use tree_sitter_loader::{CompileConfig, Loader};
|
use tree_sitter_loader::{CompileConfig, Loader};
|
||||||
|
|
||||||
include!("../src/tests/helpers/dirs.rs");
|
include!("../src/tests/helpers/dirs.rs");
|
||||||
|
|
||||||
static LANGUAGE_FILTER: LazyLock<Option<String>> =
|
lazy_static! {
|
||||||
LazyLock::new(|| env::var("TREE_SITTER_BENCHMARK_LANGUAGE_FILTER").ok());
|
static ref LANGUAGE_FILTER: Option<String> =
|
||||||
static EXAMPLE_FILTER: LazyLock<Option<String>> =
|
env::var("TREE_SITTER_BENCHMARK_LANGUAGE_FILTER").ok();
|
||||||
LazyLock::new(|| env::var("TREE_SITTER_BENCHMARK_EXAMPLE_FILTER").ok());
|
static ref EXAMPLE_FILTER: Option<String> =
|
||||||
static REPETITION_COUNT: LazyLock<usize> = LazyLock::new(|| {
|
env::var("TREE_SITTER_BENCHMARK_EXAMPLE_FILTER").ok();
|
||||||
env::var("TREE_SITTER_BENCHMARK_REPETITION_COUNT")
|
static ref REPETITION_COUNT: usize = env::var("TREE_SITTER_BENCHMARK_REPETITION_COUNT")
|
||||||
.map(|s| s.parse::<usize>().unwrap())
|
.map(|s| s.parse::<usize>().unwrap())
|
||||||
.unwrap_or(5)
|
.unwrap_or(5);
|
||||||
});
|
static ref TEST_LOADER: Loader = Loader::with_parser_lib_path(SCRATCH_DIR.clone());
|
||||||
static TEST_LOADER: LazyLock<Loader> =
|
static ref EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR: BTreeMap<PathBuf, (Vec<PathBuf>, Vec<PathBuf>)> = {
|
||||||
LazyLock::new(|| Loader::with_parser_lib_path(SCRATCH_DIR.clone()));
|
fn process_dir(result: &mut BTreeMap<PathBuf, (Vec<PathBuf>, Vec<PathBuf>)>, dir: &Path) {
|
||||||
|
if dir.join("grammar.js").exists() {
|
||||||
|
let relative_path = dir.strip_prefix(GRAMMARS_DIR.as_path()).unwrap();
|
||||||
|
let (example_paths, query_paths) =
|
||||||
|
result.entry(relative_path.to_owned()).or_default();
|
||||||
|
|
||||||
#[allow(clippy::type_complexity)]
|
if let Ok(example_files) = fs::read_dir(dir.join("examples")) {
|
||||||
static EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR: LazyLock<
|
example_paths.extend(example_files.filter_map(|p| {
|
||||||
BTreeMap<PathBuf, (Vec<PathBuf>, Vec<PathBuf>)>,
|
let p = p.unwrap().path();
|
||||||
> = LazyLock::new(|| {
|
if p.is_file() {
|
||||||
fn process_dir(result: &mut BTreeMap<PathBuf, (Vec<PathBuf>, Vec<PathBuf>)>, dir: &Path) {
|
Some(p)
|
||||||
if dir.join("grammar.js").exists() {
|
} else {
|
||||||
let relative_path = dir.strip_prefix(GRAMMARS_DIR.as_path()).unwrap();
|
None
|
||||||
let (example_paths, query_paths) = result.entry(relative_path.to_owned()).or_default();
|
}
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
if let Ok(example_files) = fs::read_dir(dir.join("examples")) {
|
if let Ok(query_files) = fs::read_dir(dir.join("queries")) {
|
||||||
example_paths.extend(example_files.filter_map(|p| {
|
query_paths.extend(query_files.filter_map(|p| {
|
||||||
let p = p.unwrap().path();
|
let p = p.unwrap().path();
|
||||||
if p.is_file() {
|
if p.is_file() {
|
||||||
Some(p)
|
Some(p)
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
|
}
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for entry in fs::read_dir(dir).unwrap() {
|
||||||
|
let entry = entry.unwrap().path();
|
||||||
|
if entry.is_dir() {
|
||||||
|
process_dir(result, &entry);
|
||||||
}
|
}
|
||||||
}));
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Ok(query_files) = fs::read_dir(dir.join("queries")) {
|
|
||||||
query_paths.extend(query_files.filter_map(|p| {
|
|
||||||
let p = p.unwrap().path();
|
|
||||||
if p.is_file() {
|
|
||||||
Some(p)
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for entry in fs::read_dir(dir).unwrap() {
|
|
||||||
let entry = entry.unwrap().path();
|
|
||||||
if entry.is_dir() {
|
|
||||||
process_dir(result, &entry);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
let mut result = BTreeMap::new();
|
let mut result = BTreeMap::new();
|
||||||
process_dir(&mut result, &GRAMMARS_DIR);
|
process_dir(&mut result, &GRAMMARS_DIR);
|
||||||
result
|
result
|
||||||
});
|
};
|
||||||
|
}
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
tree_sitter_cli::logger::init();
|
|
||||||
|
|
||||||
let max_path_length = EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR
|
let max_path_length = EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR
|
||||||
.values()
|
.values()
|
||||||
.flat_map(|(e, q)| {
|
.flat_map(|(e, q)| {
|
||||||
|
|
@ -84,7 +77,7 @@ fn main() {
|
||||||
.max()
|
.max()
|
||||||
.unwrap_or(0);
|
.unwrap_or(0);
|
||||||
|
|
||||||
info!("Benchmarking with {} repetitions", *REPETITION_COUNT);
|
eprintln!("Benchmarking with {} repetitions", *REPETITION_COUNT);
|
||||||
|
|
||||||
let mut parser = Parser::new();
|
let mut parser = Parser::new();
|
||||||
let mut all_normal_speeds = Vec::new();
|
let mut all_normal_speeds = Vec::new();
|
||||||
|
|
@ -101,11 +94,11 @@ fn main() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
info!("\nLanguage: {language_name}");
|
eprintln!("\nLanguage: {language_name}");
|
||||||
let language = get_language(language_path);
|
let language = get_language(language_path);
|
||||||
parser.set_language(&language).unwrap();
|
parser.set_language(&language).unwrap();
|
||||||
|
|
||||||
info!(" Constructing Queries");
|
eprintln!(" Constructing Queries");
|
||||||
for path in query_paths {
|
for path in query_paths {
|
||||||
if let Some(filter) = EXAMPLE_FILTER.as_ref() {
|
if let Some(filter) = EXAMPLE_FILTER.as_ref() {
|
||||||
if !path.to_str().unwrap().contains(filter.as_str()) {
|
if !path.to_str().unwrap().contains(filter.as_str()) {
|
||||||
|
|
@ -115,12 +108,12 @@ fn main() {
|
||||||
|
|
||||||
parse(path, max_path_length, |source| {
|
parse(path, max_path_length, |source| {
|
||||||
Query::new(&language, str::from_utf8(source).unwrap())
|
Query::new(&language, str::from_utf8(source).unwrap())
|
||||||
.with_context(|| format!("Query file path: {}", path.display()))
|
.with_context(|| format!("Query file path: {path:?}"))
|
||||||
.expect("Failed to parse query");
|
.expect("Failed to parse query");
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
info!(" Parsing Valid Code:");
|
eprintln!(" Parsing Valid Code:");
|
||||||
let mut normal_speeds = Vec::new();
|
let mut normal_speeds = Vec::new();
|
||||||
for example_path in example_paths {
|
for example_path in example_paths {
|
||||||
if let Some(filter) = EXAMPLE_FILTER.as_ref() {
|
if let Some(filter) = EXAMPLE_FILTER.as_ref() {
|
||||||
|
|
@ -134,7 +127,7 @@ fn main() {
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
info!(" Parsing Invalid Code (mismatched languages):");
|
eprintln!(" Parsing Invalid Code (mismatched languages):");
|
||||||
let mut error_speeds = Vec::new();
|
let mut error_speeds = Vec::new();
|
||||||
for (other_language_path, (example_paths, _)) in
|
for (other_language_path, (example_paths, _)) in
|
||||||
EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR.iter()
|
EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR.iter()
|
||||||
|
|
@ -155,30 +148,30 @@ fn main() {
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some((average_normal, worst_normal)) = aggregate(&normal_speeds) {
|
if let Some((average_normal, worst_normal)) = aggregate(&normal_speeds) {
|
||||||
info!(" Average Speed (normal): {average_normal} bytes/ms");
|
eprintln!(" Average Speed (normal): {average_normal} bytes/ms");
|
||||||
info!(" Worst Speed (normal): {worst_normal} bytes/ms");
|
eprintln!(" Worst Speed (normal): {worst_normal} bytes/ms");
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some((average_error, worst_error)) = aggregate(&error_speeds) {
|
if let Some((average_error, worst_error)) = aggregate(&error_speeds) {
|
||||||
info!(" Average Speed (errors): {average_error} bytes/ms");
|
eprintln!(" Average Speed (errors): {average_error} bytes/ms");
|
||||||
info!(" Worst Speed (errors): {worst_error} bytes/ms");
|
eprintln!(" Worst Speed (errors): {worst_error} bytes/ms");
|
||||||
}
|
}
|
||||||
|
|
||||||
all_normal_speeds.extend(normal_speeds);
|
all_normal_speeds.extend(normal_speeds);
|
||||||
all_error_speeds.extend(error_speeds);
|
all_error_speeds.extend(error_speeds);
|
||||||
}
|
}
|
||||||
|
|
||||||
info!("\n Overall");
|
eprintln!("\n Overall");
|
||||||
if let Some((average_normal, worst_normal)) = aggregate(&all_normal_speeds) {
|
if let Some((average_normal, worst_normal)) = aggregate(&all_normal_speeds) {
|
||||||
info!(" Average Speed (normal): {average_normal} bytes/ms");
|
eprintln!(" Average Speed (normal): {average_normal} bytes/ms");
|
||||||
info!(" Worst Speed (normal): {worst_normal} bytes/ms");
|
eprintln!(" Worst Speed (normal): {worst_normal} bytes/ms");
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some((average_error, worst_error)) = aggregate(&all_error_speeds) {
|
if let Some((average_error, worst_error)) = aggregate(&all_error_speeds) {
|
||||||
info!(" Average Speed (errors): {average_error} bytes/ms");
|
eprintln!(" Average Speed (errors): {average_error} bytes/ms");
|
||||||
info!(" Worst Speed (errors): {worst_error} bytes/ms");
|
eprintln!(" Worst Speed (errors): {worst_error} bytes/ms");
|
||||||
}
|
}
|
||||||
info!("");
|
eprintln!();
|
||||||
}
|
}
|
||||||
|
|
||||||
fn aggregate(speeds: &[usize]) -> Option<(usize, usize)> {
|
fn aggregate(speeds: &[usize]) -> Option<(usize, usize)> {
|
||||||
|
|
@ -197,8 +190,14 @@ fn aggregate(speeds: &[usize]) -> Option<(usize, usize)> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse(path: &Path, max_path_length: usize, mut action: impl FnMut(&[u8])) -> usize {
|
fn parse(path: &Path, max_path_length: usize, mut action: impl FnMut(&[u8])) -> usize {
|
||||||
|
eprint!(
|
||||||
|
" {:width$}\t",
|
||||||
|
path.file_name().unwrap().to_str().unwrap(),
|
||||||
|
width = max_path_length
|
||||||
|
);
|
||||||
|
|
||||||
let source_code = fs::read(path)
|
let source_code = fs::read(path)
|
||||||
.with_context(|| format!("Failed to read {}", path.display()))
|
.with_context(|| format!("Failed to read {path:?}"))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let time = Instant::now();
|
let time = Instant::now();
|
||||||
for _ in 0..*REPETITION_COUNT {
|
for _ in 0..*REPETITION_COUNT {
|
||||||
|
|
@ -207,9 +206,8 @@ fn parse(path: &Path, max_path_length: usize, mut action: impl FnMut(&[u8])) ->
|
||||||
let duration = time.elapsed() / (*REPETITION_COUNT as u32);
|
let duration = time.elapsed() / (*REPETITION_COUNT as u32);
|
||||||
let duration_ns = duration.as_nanos();
|
let duration_ns = duration.as_nanos();
|
||||||
let speed = ((source_code.len() as u128) * 1_000_000) / duration_ns;
|
let speed = ((source_code.len() as u128) * 1_000_000) / duration_ns;
|
||||||
info!(
|
eprintln!(
|
||||||
" {:max_path_length$}\ttime {:>7.2} ms\t\tspeed {speed:>6} bytes/ms",
|
"time {:>7.2} ms\t\tspeed {speed:>6} bytes/ms",
|
||||||
path.file_name().unwrap().to_str().unwrap(),
|
|
||||||
(duration_ns as f64) / 1e6,
|
(duration_ns as f64) / 1e6,
|
||||||
);
|
);
|
||||||
speed as usize
|
speed as usize
|
||||||
|
|
@ -219,6 +217,6 @@ fn get_language(path: &Path) -> Language {
|
||||||
let src_path = GRAMMARS_DIR.join(path).join("src");
|
let src_path = GRAMMARS_DIR.join(path).join("src");
|
||||||
TEST_LOADER
|
TEST_LOADER
|
||||||
.load_language_at_path(CompileConfig::new(&src_path, None, None))
|
.load_language_at_path(CompileConfig::new(&src_path, None, None))
|
||||||
.with_context(|| format!("Failed to load language at path {}", src_path.display()))
|
.with_context(|| format!("Failed to load language at path {src_path:?}"))
|
||||||
.unwrap()
|
.unwrap()
|
||||||
}
|
}
|
||||||
|
|
@ -52,14 +52,16 @@ fn main() {
|
||||||
|
|
||||||
fn web_playground_files_present() -> bool {
|
fn web_playground_files_present() -> bool {
|
||||||
let paths = [
|
let paths = [
|
||||||
"../../docs/src/assets/js/playground.js",
|
"../docs/assets/js/playground.js",
|
||||||
"../../lib/binding_web/web-tree-sitter.js",
|
"../lib/binding_web/tree-sitter.js",
|
||||||
"../../lib/binding_web/web-tree-sitter.wasm",
|
"../lib/binding_web/tree-sitter.wasm",
|
||||||
];
|
];
|
||||||
|
|
||||||
paths.iter().all(|p| Path::new(p).exists())
|
paths.iter().all(|p| Path::new(p).exists())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// When updating this function, don't forget to also update generate/build.rs which has a
|
||||||
|
// near-identical function.
|
||||||
fn read_git_sha() -> Option<String> {
|
fn read_git_sha() -> Option<String> {
|
||||||
let crate_path = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap());
|
let crate_path = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap());
|
||||||
|
|
||||||
|
|
@ -8,20 +8,15 @@ rust-version.workspace = true
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
homepage.workspace = true
|
homepage.workspace = true
|
||||||
repository.workspace = true
|
repository.workspace = true
|
||||||
documentation = "https://docs.rs/tree-sitter-config"
|
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
keywords.workspace = true
|
keywords.workspace = true
|
||||||
categories.workspace = true
|
categories.workspace = true
|
||||||
|
|
||||||
[lib]
|
|
||||||
path = "src/tree_sitter_config.rs"
|
|
||||||
|
|
||||||
[lints]
|
[lints]
|
||||||
workspace = true
|
workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
etcetera.workspace = true
|
anyhow.workspace = true
|
||||||
log.workspace = true
|
dirs.workspace = true
|
||||||
serde.workspace = true
|
serde.workspace = true
|
||||||
serde_json.workspace = true
|
serde_json.workspace = true
|
||||||
thiserror.workspace = true
|
|
||||||
|
|
@ -1,54 +1,10 @@
|
||||||
#![cfg_attr(not(any(test, doctest)), doc = include_str!("../README.md"))]
|
#![doc = include_str!("../README.md")]
|
||||||
|
|
||||||
use std::{
|
use std::{env, fs, path::PathBuf};
|
||||||
env, fs,
|
|
||||||
path::{Path, PathBuf},
|
|
||||||
};
|
|
||||||
|
|
||||||
use etcetera::BaseStrategy as _;
|
use anyhow::{anyhow, Context, Result};
|
||||||
use log::warn;
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use thiserror::Error;
|
|
||||||
|
|
||||||
pub type ConfigResult<T> = Result<T, ConfigError>;
|
|
||||||
|
|
||||||
#[derive(Debug, Error)]
|
|
||||||
pub enum ConfigError {
|
|
||||||
#[error("Bad JSON config {0} -- {1}")]
|
|
||||||
ConfigRead(String, serde_json::Error),
|
|
||||||
#[error(transparent)]
|
|
||||||
HomeDir(#[from] etcetera::HomeDirError),
|
|
||||||
#[error(transparent)]
|
|
||||||
IO(IoError),
|
|
||||||
#[error(transparent)]
|
|
||||||
Serialization(#[from] serde_json::Error),
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Error)]
|
|
||||||
pub struct IoError {
|
|
||||||
pub error: std::io::Error,
|
|
||||||
pub path: Option<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl IoError {
|
|
||||||
fn new(error: std::io::Error, path: Option<&Path>) -> Self {
|
|
||||||
Self {
|
|
||||||
error,
|
|
||||||
path: path.map(|p| p.to_string_lossy().to_string()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Display for IoError {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
write!(f, "{}", self.error)?;
|
|
||||||
if let Some(ref path) = self.path {
|
|
||||||
write!(f, " ({path})")?;
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Holds the contents of tree-sitter's configuration file.
|
/// Holds the contents of tree-sitter's configuration file.
|
||||||
///
|
///
|
||||||
|
|
@ -65,7 +21,7 @@ pub struct Config {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Config {
|
impl Config {
|
||||||
pub fn find_config_file() -> ConfigResult<Option<PathBuf>> {
|
pub fn find_config_file() -> Result<Option<PathBuf>> {
|
||||||
if let Ok(path) = env::var("TREE_SITTER_DIR") {
|
if let Ok(path) = env::var("TREE_SITTER_DIR") {
|
||||||
let mut path = PathBuf::from(path);
|
let mut path = PathBuf::from(path);
|
||||||
path.push("config.json");
|
path.push("config.json");
|
||||||
|
|
@ -82,28 +38,8 @@ impl Config {
|
||||||
return Ok(Some(xdg_path));
|
return Ok(Some(xdg_path));
|
||||||
}
|
}
|
||||||
|
|
||||||
if cfg!(target_os = "macos") {
|
let legacy_path = dirs::home_dir()
|
||||||
let legacy_apple_path = etcetera::base_strategy::Apple::new()?
|
.ok_or_else(|| anyhow!("Cannot determine home directory"))?
|
||||||
.data_dir() // `$HOME/Library/Application Support/`
|
|
||||||
.join("tree-sitter")
|
|
||||||
.join("config.json");
|
|
||||||
if legacy_apple_path.is_file() {
|
|
||||||
let xdg_dir = xdg_path.parent().unwrap();
|
|
||||||
fs::create_dir_all(xdg_dir)
|
|
||||||
.map_err(|e| ConfigError::IO(IoError::new(e, Some(xdg_dir))))?;
|
|
||||||
fs::rename(&legacy_apple_path, &xdg_path).map_err(|e| {
|
|
||||||
ConfigError::IO(IoError::new(e, Some(legacy_apple_path.as_path())))
|
|
||||||
})?;
|
|
||||||
warn!(
|
|
||||||
"Your config.json file has been automatically migrated from \"{}\" to \"{}\"",
|
|
||||||
legacy_apple_path.display(),
|
|
||||||
xdg_path.display()
|
|
||||||
);
|
|
||||||
return Ok(Some(xdg_path));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let legacy_path = etcetera::home_dir()?
|
|
||||||
.join(".tree-sitter")
|
.join(".tree-sitter")
|
||||||
.join("config.json");
|
.join("config.json");
|
||||||
if legacy_path.is_file() {
|
if legacy_path.is_file() {
|
||||||
|
|
@ -113,9 +49,9 @@ impl Config {
|
||||||
Ok(None)
|
Ok(None)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn xdg_config_file() -> ConfigResult<PathBuf> {
|
fn xdg_config_file() -> Result<PathBuf> {
|
||||||
let xdg_path = etcetera::choose_base_strategy()?
|
let xdg_path = dirs::config_dir()
|
||||||
.config_dir()
|
.ok_or_else(|| anyhow!("Cannot determine config directory"))?
|
||||||
.join("tree-sitter")
|
.join("tree-sitter")
|
||||||
.join("config.json");
|
.join("config.json");
|
||||||
Ok(xdg_path)
|
Ok(xdg_path)
|
||||||
|
|
@ -127,10 +63,10 @@ impl Config {
|
||||||
/// - Location specified by the path parameter if provided
|
/// - Location specified by the path parameter if provided
|
||||||
/// - `$TREE_SITTER_DIR/config.json`, if the `TREE_SITTER_DIR` environment variable is set
|
/// - `$TREE_SITTER_DIR/config.json`, if the `TREE_SITTER_DIR` environment variable is set
|
||||||
/// - `tree-sitter/config.json` in your default user configuration directory, as determined by
|
/// - `tree-sitter/config.json` in your default user configuration directory, as determined by
|
||||||
/// [`etcetera::choose_base_strategy`](https://docs.rs/etcetera/*/etcetera/#basestrategy)
|
/// [`dirs::config_dir`](https://docs.rs/dirs/*/dirs/fn.config_dir.html)
|
||||||
/// - `$HOME/.tree-sitter/config.json` as a fallback from where tree-sitter _used_ to store
|
/// - `$HOME/.tree-sitter/config.json` as a fallback from where tree-sitter _used_ to store
|
||||||
/// its configuration
|
/// its configuration
|
||||||
pub fn load(path: Option<PathBuf>) -> ConfigResult<Self> {
|
pub fn load(path: Option<PathBuf>) -> Result<Self> {
|
||||||
let location = if let Some(path) = path {
|
let location = if let Some(path) = path {
|
||||||
path
|
path
|
||||||
} else if let Some(path) = Self::find_config_file()? {
|
} else if let Some(path) = Self::find_config_file()? {
|
||||||
|
|
@ -140,9 +76,9 @@ impl Config {
|
||||||
};
|
};
|
||||||
|
|
||||||
let content = fs::read_to_string(&location)
|
let content = fs::read_to_string(&location)
|
||||||
.map_err(|e| ConfigError::IO(IoError::new(e, Some(location.as_path()))))?;
|
.with_context(|| format!("Failed to read {}", &location.to_string_lossy()))?;
|
||||||
let config = serde_json::from_str(&content)
|
let config = serde_json::from_str(&content)
|
||||||
.map_err(|e| ConfigError::ConfigRead(location.to_string_lossy().to_string(), e))?;
|
.with_context(|| format!("Bad JSON config {}", &location.to_string_lossy()))?;
|
||||||
Ok(Self { location, config })
|
Ok(Self { location, config })
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -152,7 +88,7 @@ impl Config {
|
||||||
/// disk.
|
/// disk.
|
||||||
///
|
///
|
||||||
/// (Note that this is typically only done by the `tree-sitter init-config` command.)
|
/// (Note that this is typically only done by the `tree-sitter init-config` command.)
|
||||||
pub fn initial() -> ConfigResult<Self> {
|
pub fn initial() -> Result<Self> {
|
||||||
let location = if let Ok(path) = env::var("TREE_SITTER_DIR") {
|
let location = if let Ok(path) = env::var("TREE_SITTER_DIR") {
|
||||||
let mut path = PathBuf::from(path);
|
let mut path = PathBuf::from(path);
|
||||||
path.push("config.json");
|
path.push("config.json");
|
||||||
|
|
@ -165,20 +101,17 @@ impl Config {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Saves this configuration to the file that it was originally loaded from.
|
/// Saves this configuration to the file that it was originally loaded from.
|
||||||
pub fn save(&self) -> ConfigResult<()> {
|
pub fn save(&self) -> Result<()> {
|
||||||
let json = serde_json::to_string_pretty(&self.config)?;
|
let json = serde_json::to_string_pretty(&self.config)?;
|
||||||
let config_dir = self.location.parent().unwrap();
|
fs::create_dir_all(self.location.parent().unwrap())?;
|
||||||
fs::create_dir_all(config_dir)
|
fs::write(&self.location, json)?;
|
||||||
.map_err(|e| ConfigError::IO(IoError::new(e, Some(config_dir))))?;
|
|
||||||
fs::write(&self.location, json)
|
|
||||||
.map_err(|e| ConfigError::IO(IoError::new(e, Some(self.location.as_path()))))?;
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parses a component-specific configuration from the configuration file. The type `C` must
|
/// Parses a component-specific configuration from the configuration file. The type `C` must
|
||||||
/// be [deserializable](https://docs.rs/serde/*/serde/trait.Deserialize.html) from a JSON
|
/// be [deserializable](https://docs.rs/serde/*/serde/trait.Deserialize.html) from a JSON
|
||||||
/// object, and must only include the fields relevant to that component.
|
/// object, and must only include the fields relevant to that component.
|
||||||
pub fn get<C>(&self) -> ConfigResult<C>
|
pub fn get<C>(&self) -> Result<C>
|
||||||
where
|
where
|
||||||
C: for<'de> Deserialize<'de>,
|
C: for<'de> Deserialize<'de>,
|
||||||
{
|
{
|
||||||
|
|
@ -189,7 +122,7 @@ impl Config {
|
||||||
/// Adds a component-specific configuration to the configuration file. The type `C` must be
|
/// Adds a component-specific configuration to the configuration file. The type `C` must be
|
||||||
/// [serializable](https://docs.rs/serde/*/serde/trait.Serialize.html) into a JSON object, and
|
/// [serializable](https://docs.rs/serde/*/serde/trait.Serialize.html) into a JSON object, and
|
||||||
/// must only include the fields relevant to that component.
|
/// must only include the fields relevant to that component.
|
||||||
pub fn add<C>(&mut self, config: C) -> ConfigResult<()>
|
pub fn add<C>(&mut self, config: C) -> Result<()>
|
||||||
where
|
where
|
||||||
C: Serialize,
|
C: Serialize,
|
||||||
{
|
{
|
||||||
|
|
@ -305,9 +305,9 @@
|
||||||
"peer": true
|
"peer": true
|
||||||
},
|
},
|
||||||
"node_modules/brace-expansion": {
|
"node_modules/brace-expansion": {
|
||||||
"version": "1.1.12",
|
"version": "1.1.11",
|
||||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
|
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
|
||||||
"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
|
"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"peer": true,
|
"peer": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
|
@ -805,9 +805,9 @@
|
||||||
"peer": true
|
"peer": true
|
||||||
},
|
},
|
||||||
"node_modules/js-yaml": {
|
"node_modules/js-yaml": {
|
||||||
"version": "4.1.1",
|
"version": "4.1.0",
|
||||||
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.1.tgz",
|
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz",
|
||||||
"integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==",
|
"integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"peer": true,
|
"peer": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
|
@ -4,8 +4,7 @@
|
||||||
"description": "Eslint configuration for Tree-sitter grammar files",
|
"description": "Eslint configuration for Tree-sitter grammar files",
|
||||||
"repository": {
|
"repository": {
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "git+https://github.com/tree-sitter/tree-sitter.git",
|
"url": "git+https://github.com/tree-sitter/tree-sitter.git"
|
||||||
"directory": "crates/cli/eslint"
|
|
||||||
},
|
},
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"author": "Amaan Qureshi <amaanq12@gmail.com>",
|
"author": "Amaan Qureshi <amaanq12@gmail.com>",
|
||||||
|
|
@ -21,9 +20,5 @@
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"eslint": ">= 9"
|
"eslint": ">= 9"
|
||||||
},
|
|
||||||
"scripts": {
|
|
||||||
"prepack": "cp ../../../LICENSE .",
|
|
||||||
"postpack": "rm LICENSE"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -8,44 +8,27 @@ rust-version.workspace = true
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
homepage.workspace = true
|
homepage.workspace = true
|
||||||
repository.workspace = true
|
repository.workspace = true
|
||||||
documentation = "https://docs.rs/tree-sitter-generate"
|
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
keywords.workspace = true
|
keywords.workspace = true
|
||||||
categories.workspace = true
|
categories.workspace = true
|
||||||
|
|
||||||
[lib]
|
|
||||||
path = "src/generate.rs"
|
|
||||||
|
|
||||||
[lints]
|
[lints]
|
||||||
workspace = true
|
workspace = true
|
||||||
|
|
||||||
[features]
|
|
||||||
default = ["qjs-rt"]
|
|
||||||
load = ["dep:semver"]
|
|
||||||
qjs-rt = ["load", "rquickjs", "pathdiff"]
|
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
bitflags = "2.9.4"
|
anyhow.workspace = true
|
||||||
dunce = "1.0.5"
|
heck.workspace = true
|
||||||
indexmap.workspace = true
|
indexmap.workspace = true
|
||||||
indoc.workspace = true
|
indoc.workspace = true
|
||||||
|
lazy_static.workspace = true
|
||||||
log.workspace = true
|
log.workspace = true
|
||||||
pathdiff = { version = "0.2.3", optional = true }
|
|
||||||
regex.workspace = true
|
regex.workspace = true
|
||||||
regex-syntax.workspace = true
|
regex-syntax.workspace = true
|
||||||
rquickjs = { version = "0.11.0", optional = true, features = [
|
|
||||||
"bindgen",
|
|
||||||
"loader",
|
|
||||||
"macro",
|
|
||||||
"phf",
|
|
||||||
] }
|
|
||||||
rustc-hash.workspace = true
|
rustc-hash.workspace = true
|
||||||
semver = { workspace = true, optional = true }
|
semver.workspace = true
|
||||||
serde.workspace = true
|
serde.workspace = true
|
||||||
serde_json.workspace = true
|
serde_json.workspace = true
|
||||||
smallbitvec.workspace = true
|
smallbitvec.workspace = true
|
||||||
thiserror.workspace = true
|
url.workspace = true
|
||||||
topological-sort.workspace = true
|
|
||||||
|
|
||||||
[dev-dependencies]
|
tree-sitter.workspace = true
|
||||||
tempfile.workspace = true
|
|
||||||
32
cli/generate/build.rs
Normal file
32
cli/generate/build.rs
Normal file
|
|
@ -0,0 +1,32 @@
|
||||||
|
use std::{env, path::PathBuf, process::Command};
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
if let Some(git_sha) = read_git_sha() {
|
||||||
|
println!("cargo:rustc-env=BUILD_SHA={git_sha}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is copied from the build.rs in parent directory. This should be updated if the
|
||||||
|
// parent build.rs gets fixes.
|
||||||
|
fn read_git_sha() -> Option<String> {
|
||||||
|
let crate_path = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap());
|
||||||
|
|
||||||
|
if !crate_path
|
||||||
|
.parent()?
|
||||||
|
.parent()
|
||||||
|
.is_some_and(|p| p.join(".git").exists())
|
||||||
|
{
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
Command::new("git")
|
||||||
|
.args(["rev-parse", "HEAD"])
|
||||||
|
.current_dir(crate_path)
|
||||||
|
.output()
|
||||||
|
.map_or(None, |output| {
|
||||||
|
if !output.status.success() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
Some(String::from_utf8_lossy(&output.stdout).to_string())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
@ -3,7 +3,7 @@ use std::{
|
||||||
mem,
|
mem,
|
||||||
};
|
};
|
||||||
|
|
||||||
use log::debug;
|
use log::info;
|
||||||
|
|
||||||
use super::{coincident_tokens::CoincidentTokenIndex, token_conflicts::TokenConflictMap};
|
use super::{coincident_tokens::CoincidentTokenIndex, token_conflicts::TokenConflictMap};
|
||||||
use crate::{
|
use crate::{
|
||||||
|
|
@ -43,17 +43,15 @@ pub fn build_lex_table(
|
||||||
let tokens = state
|
let tokens = state
|
||||||
.terminal_entries
|
.terminal_entries
|
||||||
.keys()
|
.keys()
|
||||||
.copied()
|
|
||||||
.chain(state.reserved_words.iter())
|
|
||||||
.filter_map(|token| {
|
.filter_map(|token| {
|
||||||
if token.is_terminal() {
|
if token.is_terminal() {
|
||||||
if keywords.contains(&token) {
|
if keywords.contains(token) {
|
||||||
syntax_grammar.word_token
|
syntax_grammar.word_token
|
||||||
} else {
|
} else {
|
||||||
Some(token)
|
Some(*token)
|
||||||
}
|
}
|
||||||
} else if token.is_eof() {
|
} else if token.is_eof() {
|
||||||
Some(token)
|
Some(*token)
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
@ -176,8 +174,9 @@ impl<'a> LexTableBuilder<'a> {
|
||||||
let (state_id, is_new) = self.add_state(nfa_states, eof_valid);
|
let (state_id, is_new) = self.add_state(nfa_states, eof_valid);
|
||||||
|
|
||||||
if is_new {
|
if is_new {
|
||||||
debug!(
|
info!(
|
||||||
"entry point state: {state_id}, tokens: {:?}",
|
"entry point state: {}, tokens: {:?}",
|
||||||
|
state_id,
|
||||||
tokens
|
tokens
|
||||||
.iter()
|
.iter()
|
||||||
.map(|t| &self.lexical_grammar.variables[t.index].name)
|
.map(|t| &self.lexical_grammar.variables[t.index].name)
|
||||||
|
|
@ -358,7 +357,9 @@ fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
|
||||||
&mut group_ids_by_state_id,
|
&mut group_ids_by_state_id,
|
||||||
1,
|
1,
|
||||||
lex_states_differ,
|
lex_states_differ,
|
||||||
) {}
|
) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
let mut new_states = Vec::with_capacity(state_ids_by_group_id.len());
|
let mut new_states = Vec::with_capacity(state_ids_by_group_id.len());
|
||||||
for state_ids in &state_ids_by_group_id {
|
for state_ids in &state_ids_by_group_id {
|
||||||
|
|
@ -1,21 +1,22 @@
|
||||||
use std::{
|
use std::{
|
||||||
cmp::Ordering,
|
cmp::Ordering,
|
||||||
collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque},
|
collections::{BTreeMap, HashMap, HashSet, VecDeque},
|
||||||
|
fmt::Write,
|
||||||
hash::BuildHasherDefault,
|
hash::BuildHasherDefault,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use anyhow::{anyhow, Result};
|
||||||
use indexmap::{map::Entry, IndexMap};
|
use indexmap::{map::Entry, IndexMap};
|
||||||
use log::warn;
|
|
||||||
use rustc_hash::FxHasher;
|
use rustc_hash::FxHasher;
|
||||||
use serde::Serialize;
|
|
||||||
use thiserror::Error;
|
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
item::{ParseItem, ParseItemSet, ParseItemSetCore, ParseItemSetEntry},
|
item::{ParseItem, ParseItemSet, ParseItemSetCore},
|
||||||
item_set_builder::ParseItemSetBuilder,
|
item_set_builder::ParseItemSetBuilder,
|
||||||
};
|
};
|
||||||
use crate::{
|
use crate::{
|
||||||
grammars::{LexicalGrammar, PrecedenceEntry, ReservedWordSetId, SyntaxGrammar, VariableType},
|
grammars::{
|
||||||
|
InlinedProductionMap, LexicalGrammar, PrecedenceEntry, SyntaxGrammar, VariableType,
|
||||||
|
},
|
||||||
node_types::VariableInfo,
|
node_types::VariableInfo,
|
||||||
rules::{Associativity, Precedence, Symbol, SymbolType, TokenSet},
|
rules::{Associativity, Precedence, Symbol, SymbolType, TokenSet},
|
||||||
tables::{
|
tables::{
|
||||||
|
|
@ -65,208 +66,8 @@ struct ParseTableBuilder<'a> {
|
||||||
parse_table: ParseTable,
|
parse_table: ParseTable,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type BuildTableResult<T> = Result<T, ParseTableBuilderError>;
|
|
||||||
|
|
||||||
#[derive(Debug, Error, Serialize)]
|
|
||||||
pub enum ParseTableBuilderError {
|
|
||||||
#[error("Unresolved conflict for symbol sequence:\n\n{0}")]
|
|
||||||
Conflict(#[from] ConflictError),
|
|
||||||
#[error("Extra rules must have unambiguous endings. Conflicting rules: {0}")]
|
|
||||||
AmbiguousExtra(#[from] AmbiguousExtraError),
|
|
||||||
#[error(
|
|
||||||
"The non-terminal rule `{0}` is used in a non-terminal `extra` rule, which is not allowed."
|
|
||||||
)]
|
|
||||||
ImproperNonTerminalExtra(String),
|
|
||||||
#[error("State count `{0}` exceeds the max value {max}.", max=u16::MAX)]
|
|
||||||
StateCount(usize),
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Default, Debug, Serialize, Error)]
|
|
||||||
pub struct ConflictError {
|
|
||||||
pub symbol_sequence: Vec<String>,
|
|
||||||
pub conflicting_lookahead: String,
|
|
||||||
pub possible_interpretations: Vec<Interpretation>,
|
|
||||||
pub possible_resolutions: Vec<Resolution>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Default, Debug, Serialize, Error)]
|
|
||||||
pub struct Interpretation {
|
|
||||||
pub preceding_symbols: Vec<String>,
|
|
||||||
pub variable_name: String,
|
|
||||||
pub production_step_symbols: Vec<String>,
|
|
||||||
pub step_index: u32,
|
|
||||||
pub done: bool,
|
|
||||||
pub conflicting_lookahead: String,
|
|
||||||
pub precedence: Option<String>,
|
|
||||||
pub associativity: Option<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Serialize)]
|
|
||||||
pub enum Resolution {
|
|
||||||
Precedence { symbols: Vec<String> },
|
|
||||||
Associativity { symbols: Vec<String> },
|
|
||||||
AddConflict { symbols: Vec<String> },
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Error)]
|
|
||||||
pub struct AmbiguousExtraError {
|
|
||||||
pub parent_symbols: Vec<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Display for ConflictError {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
|
||||||
for symbol in &self.symbol_sequence {
|
|
||||||
write!(f, " {symbol}")?;
|
|
||||||
}
|
|
||||||
writeln!(f, " • {} …\n", self.conflicting_lookahead)?;
|
|
||||||
|
|
||||||
writeln!(f, "Possible interpretations:\n")?;
|
|
||||||
let mut interpretations = self
|
|
||||||
.possible_interpretations
|
|
||||||
.iter()
|
|
||||||
.map(|i| {
|
|
||||||
let line = i.to_string();
|
|
||||||
let prec_line = if let (Some(precedence), Some(associativity)) =
|
|
||||||
(&i.precedence, &i.associativity)
|
|
||||||
{
|
|
||||||
Some(format!(
|
|
||||||
"(precedence: {precedence}, associativity: {associativity})",
|
|
||||||
))
|
|
||||||
} else {
|
|
||||||
i.precedence
|
|
||||||
.as_ref()
|
|
||||||
.map(|precedence| format!("(precedence: {precedence})"))
|
|
||||||
};
|
|
||||||
|
|
||||||
(line, prec_line)
|
|
||||||
})
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
let max_interpretation_length = interpretations
|
|
||||||
.iter()
|
|
||||||
.map(|i| i.0.chars().count())
|
|
||||||
.max()
|
|
||||||
.unwrap();
|
|
||||||
interpretations.sort_unstable();
|
|
||||||
for (i, (line, prec_suffix)) in interpretations.into_iter().enumerate() {
|
|
||||||
write!(f, " {}:", i + 1).unwrap();
|
|
||||||
write!(f, "{line}")?;
|
|
||||||
if let Some(prec_suffix) = prec_suffix {
|
|
||||||
write!(
|
|
||||||
f,
|
|
||||||
"{:1$}",
|
|
||||||
"",
|
|
||||||
max_interpretation_length.saturating_sub(line.chars().count()) + 2
|
|
||||||
)?;
|
|
||||||
write!(f, "{prec_suffix}")?;
|
|
||||||
}
|
|
||||||
writeln!(f)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
writeln!(f, "\nPossible resolutions:\n")?;
|
|
||||||
for (i, resolution) in self.possible_resolutions.iter().enumerate() {
|
|
||||||
writeln!(f, " {}: {resolution}", i + 1)?;
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Display for Interpretation {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
|
||||||
for symbol in &self.preceding_symbols {
|
|
||||||
write!(f, " {symbol}")?;
|
|
||||||
}
|
|
||||||
write!(f, " ({}", self.variable_name)?;
|
|
||||||
for (i, symbol) in self.production_step_symbols.iter().enumerate() {
|
|
||||||
if i == self.step_index as usize {
|
|
||||||
write!(f, " •")?;
|
|
||||||
}
|
|
||||||
write!(f, " {symbol}")?;
|
|
||||||
}
|
|
||||||
write!(f, ")")?;
|
|
||||||
if self.done {
|
|
||||||
write!(f, " • {} …", self.conflicting_lookahead)?;
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Display for Resolution {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
|
||||||
match self {
|
|
||||||
Self::Precedence { symbols } => {
|
|
||||||
write!(f, "Specify a higher precedence in ")?;
|
|
||||||
for (i, symbol) in symbols.iter().enumerate() {
|
|
||||||
if i > 0 {
|
|
||||||
write!(f, " and ")?;
|
|
||||||
}
|
|
||||||
write!(f, "`{symbol}`")?;
|
|
||||||
}
|
|
||||||
write!(f, " than in the other rules.")?;
|
|
||||||
}
|
|
||||||
Self::Associativity { symbols } => {
|
|
||||||
write!(f, "Specify a left or right associativity in ")?;
|
|
||||||
for (i, symbol) in symbols.iter().enumerate() {
|
|
||||||
if i > 0 {
|
|
||||||
write!(f, ", ")?;
|
|
||||||
}
|
|
||||||
write!(f, "`{symbol}`")?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Self::AddConflict { symbols } => {
|
|
||||||
write!(f, "Add a conflict for these rules: ")?;
|
|
||||||
for (i, symbol) in symbols.iter().enumerate() {
|
|
||||||
if i > 0 {
|
|
||||||
write!(f, ", ")?;
|
|
||||||
}
|
|
||||||
write!(f, "`{symbol}`")?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Display for AmbiguousExtraError {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
|
||||||
for (i, symbol) in self.parent_symbols.iter().enumerate() {
|
|
||||||
if i > 0 {
|
|
||||||
write!(f, ", ")?;
|
|
||||||
}
|
|
||||||
write!(f, "{symbol}")?;
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> ParseTableBuilder<'a> {
|
impl<'a> ParseTableBuilder<'a> {
|
||||||
fn new(
|
fn build(mut self) -> Result<(ParseTable, Vec<ParseStateInfo<'a>>)> {
|
||||||
syntax_grammar: &'a SyntaxGrammar,
|
|
||||||
lexical_grammar: &'a LexicalGrammar,
|
|
||||||
item_set_builder: ParseItemSetBuilder<'a>,
|
|
||||||
variable_info: &'a [VariableInfo],
|
|
||||||
) -> Self {
|
|
||||||
Self {
|
|
||||||
syntax_grammar,
|
|
||||||
lexical_grammar,
|
|
||||||
item_set_builder,
|
|
||||||
variable_info,
|
|
||||||
non_terminal_extra_states: Vec::new(),
|
|
||||||
state_ids_by_item_set: IndexMap::default(),
|
|
||||||
core_ids_by_core: HashMap::new(),
|
|
||||||
parse_state_info_by_id: Vec::new(),
|
|
||||||
parse_state_queue: VecDeque::new(),
|
|
||||||
actual_conflicts: syntax_grammar.expected_conflicts.iter().cloned().collect(),
|
|
||||||
parse_table: ParseTable {
|
|
||||||
states: Vec::new(),
|
|
||||||
symbols: Vec::new(),
|
|
||||||
external_lex_states: Vec::new(),
|
|
||||||
production_infos: Vec::new(),
|
|
||||||
max_aliased_production_length: 1,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn build(mut self) -> BuildTableResult<(ParseTable, Vec<ParseStateInfo<'a>>)> {
|
|
||||||
// Ensure that the empty alias sequence has index 0.
|
// Ensure that the empty alias sequence has index 0.
|
||||||
self.parse_table
|
self.parse_table
|
||||||
.production_infos
|
.production_infos
|
||||||
|
|
@ -279,13 +80,10 @@ impl<'a> ParseTableBuilder<'a> {
|
||||||
self.add_parse_state(
|
self.add_parse_state(
|
||||||
&Vec::new(),
|
&Vec::new(),
|
||||||
&Vec::new(),
|
&Vec::new(),
|
||||||
ParseItemSet {
|
ParseItemSet::with(std::iter::once((
|
||||||
entries: vec![ParseItemSetEntry {
|
ParseItem::start(),
|
||||||
item: ParseItem::start(),
|
std::iter::once(Symbol::end()).collect(),
|
||||||
lookaheads: std::iter::once(Symbol::end()).collect(),
|
))),
|
||||||
following_reserved_word_set: ReservedWordSetId::default(),
|
|
||||||
}],
|
|
||||||
},
|
|
||||||
);
|
);
|
||||||
|
|
||||||
// Compute the possible item sets for non-terminal extras.
|
// Compute the possible item sets for non-terminal extras.
|
||||||
|
|
@ -301,35 +99,23 @@ impl<'a> ParseTableBuilder<'a> {
|
||||||
non_terminal_extra_item_sets_by_first_terminal
|
non_terminal_extra_item_sets_by_first_terminal
|
||||||
.entry(production.first_symbol().unwrap())
|
.entry(production.first_symbol().unwrap())
|
||||||
.or_insert_with(ParseItemSet::default)
|
.or_insert_with(ParseItemSet::default)
|
||||||
.insert(ParseItem {
|
.insert(
|
||||||
variable_index: extra_non_terminal.index as u32,
|
ParseItem {
|
||||||
production,
|
variable_index: extra_non_terminal.index as u32,
|
||||||
step_index: 1,
|
production,
|
||||||
has_preceding_inherited_fields: false,
|
step_index: 1,
|
||||||
})
|
has_preceding_inherited_fields: false,
|
||||||
.lookaheads
|
},
|
||||||
.insert(Symbol::end_of_nonterminal_extra());
|
&std::iter::once(Symbol::end_of_nonterminal_extra()).collect(),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let non_terminal_sets_len = non_terminal_extra_item_sets_by_first_terminal.len();
|
|
||||||
self.non_terminal_extra_states
|
|
||||||
.reserve(non_terminal_sets_len);
|
|
||||||
self.parse_state_info_by_id.reserve(non_terminal_sets_len);
|
|
||||||
self.parse_table.states.reserve(non_terminal_sets_len);
|
|
||||||
self.parse_state_queue.reserve(non_terminal_sets_len);
|
|
||||||
// Add a state for each starting terminal of a non-terminal extra rule.
|
// Add a state for each starting terminal of a non-terminal extra rule.
|
||||||
for (terminal, item_set) in non_terminal_extra_item_sets_by_first_terminal {
|
for (terminal, item_set) in non_terminal_extra_item_sets_by_first_terminal {
|
||||||
if terminal.is_non_terminal() {
|
self.non_terminal_extra_states
|
||||||
Err(ParseTableBuilderError::ImproperNonTerminalExtra(
|
.push((terminal, self.parse_table.states.len()));
|
||||||
self.symbol_name(&terminal),
|
self.add_parse_state(&Vec::new(), &Vec::new(), item_set);
|
||||||
))?;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add the parse state, and *then* push the terminal and the state id into the
|
|
||||||
// list of nonterminal extra states
|
|
||||||
let state_id = self.add_parse_state(&Vec::new(), &Vec::new(), item_set);
|
|
||||||
self.non_terminal_extra_states.push((terminal, state_id));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
while let Some(entry) = self.parse_state_queue.pop_front() {
|
while let Some(entry) = self.parse_state_queue.pop_front() {
|
||||||
|
|
@ -346,21 +132,17 @@ impl<'a> ParseTableBuilder<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
if !self.actual_conflicts.is_empty() {
|
if !self.actual_conflicts.is_empty() {
|
||||||
warn!(
|
println!("Warning: unnecessary conflicts");
|
||||||
"unnecessary conflicts:\n {}",
|
for conflict in &self.actual_conflicts {
|
||||||
&self
|
println!(
|
||||||
.actual_conflicts
|
" {}",
|
||||||
.iter()
|
conflict
|
||||||
.map(|conflict| {
|
.iter()
|
||||||
conflict
|
.map(|symbol| format!("`{}`", self.symbol_name(symbol)))
|
||||||
.iter()
|
.collect::<Vec<_>>()
|
||||||
.map(|symbol| format!("`{}`", self.symbol_name(symbol)))
|
.join(", ")
|
||||||
.collect::<Vec<_>>()
|
);
|
||||||
.join(", ")
|
}
|
||||||
})
|
|
||||||
.collect::<Vec<_>>()
|
|
||||||
.join("\n ")
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok((self.parse_table, self.parse_state_info_by_id))
|
Ok((self.parse_table, self.parse_state_info_by_id))
|
||||||
|
|
@ -394,7 +176,6 @@ impl<'a> ParseTableBuilder<'a> {
|
||||||
external_lex_state_id: 0,
|
external_lex_state_id: 0,
|
||||||
terminal_entries: IndexMap::default(),
|
terminal_entries: IndexMap::default(),
|
||||||
nonterminal_entries: IndexMap::default(),
|
nonterminal_entries: IndexMap::default(),
|
||||||
reserved_words: TokenSet::default(),
|
|
||||||
core_id,
|
core_id,
|
||||||
});
|
});
|
||||||
self.parse_state_queue.push_back(ParseStateQueueEntry {
|
self.parse_state_queue.push_back(ParseStateQueueEntry {
|
||||||
|
|
@ -413,7 +194,7 @@ impl<'a> ParseTableBuilder<'a> {
|
||||||
mut preceding_auxiliary_symbols: AuxiliarySymbolSequence,
|
mut preceding_auxiliary_symbols: AuxiliarySymbolSequence,
|
||||||
state_id: ParseStateId,
|
state_id: ParseStateId,
|
||||||
item_set: &ParseItemSet<'a>,
|
item_set: &ParseItemSet<'a>,
|
||||||
) -> BuildTableResult<()> {
|
) -> Result<()> {
|
||||||
let mut terminal_successors = BTreeMap::new();
|
let mut terminal_successors = BTreeMap::new();
|
||||||
let mut non_terminal_successors = BTreeMap::new();
|
let mut non_terminal_successors = BTreeMap::new();
|
||||||
let mut lookaheads_with_conflicts = TokenSet::new();
|
let mut lookaheads_with_conflicts = TokenSet::new();
|
||||||
|
|
@ -421,18 +202,13 @@ impl<'a> ParseTableBuilder<'a> {
|
||||||
|
|
||||||
// Each item in the item set contributes to either or a Shift action or a Reduce
|
// Each item in the item set contributes to either or a Shift action or a Reduce
|
||||||
// action in this state.
|
// action in this state.
|
||||||
for ParseItemSetEntry {
|
for (item, lookaheads) in &item_set.entries {
|
||||||
item,
|
|
||||||
lookaheads,
|
|
||||||
following_reserved_word_set: reserved_lookaheads,
|
|
||||||
} in &item_set.entries
|
|
||||||
{
|
|
||||||
// If the item is unfinished, then this state has a transition for the item's
|
// If the item is unfinished, then this state has a transition for the item's
|
||||||
// next symbol. Advance the item to its next step and insert the resulting
|
// next symbol. Advance the item to its next step and insert the resulting
|
||||||
// item into the successor item set.
|
// item into the successor item set.
|
||||||
if let Some(next_symbol) = item.symbol() {
|
if let Some(next_symbol) = item.symbol() {
|
||||||
let mut successor = item.successor();
|
let mut successor = item.successor();
|
||||||
let successor_set = if next_symbol.is_non_terminal() {
|
if next_symbol.is_non_terminal() {
|
||||||
let variable = &self.syntax_grammar.variables[next_symbol.index];
|
let variable = &self.syntax_grammar.variables[next_symbol.index];
|
||||||
|
|
||||||
// Keep track of where auxiliary non-terminals (repeat symbols) are
|
// Keep track of where auxiliary non-terminals (repeat symbols) are
|
||||||
|
|
@ -461,16 +237,13 @@ impl<'a> ParseTableBuilder<'a> {
|
||||||
non_terminal_successors
|
non_terminal_successors
|
||||||
.entry(next_symbol)
|
.entry(next_symbol)
|
||||||
.or_insert_with(ParseItemSet::default)
|
.or_insert_with(ParseItemSet::default)
|
||||||
|
.insert(successor, lookaheads);
|
||||||
} else {
|
} else {
|
||||||
terminal_successors
|
terminal_successors
|
||||||
.entry(next_symbol)
|
.entry(next_symbol)
|
||||||
.or_insert_with(ParseItemSet::default)
|
.or_insert_with(ParseItemSet::default)
|
||||||
};
|
.insert(successor, lookaheads);
|
||||||
let successor_entry = successor_set.insert(successor);
|
}
|
||||||
successor_entry.lookaheads.insert_all(lookaheads);
|
|
||||||
successor_entry.following_reserved_word_set = successor_entry
|
|
||||||
.following_reserved_word_set
|
|
||||||
.max(*reserved_lookaheads);
|
|
||||||
}
|
}
|
||||||
// If the item is finished, then add a Reduce action to this state based
|
// If the item is finished, then add a Reduce action to this state based
|
||||||
// on this item.
|
// on this item.
|
||||||
|
|
@ -597,7 +370,7 @@ impl<'a> ParseTableBuilder<'a> {
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add actions for the grammar's `extra` symbols.
|
// Finally, add actions for the grammar's `extra` symbols.
|
||||||
let state = &mut self.parse_table.states[state_id];
|
let state = &mut self.parse_table.states[state_id];
|
||||||
let is_end_of_non_terminal_extra = state.is_end_of_non_terminal_extra();
|
let is_end_of_non_terminal_extra = state.is_end_of_non_terminal_extra();
|
||||||
|
|
||||||
|
|
@ -609,7 +382,7 @@ impl<'a> ParseTableBuilder<'a> {
|
||||||
let parent_symbols = item_set
|
let parent_symbols = item_set
|
||||||
.entries
|
.entries
|
||||||
.iter()
|
.iter()
|
||||||
.filter_map(|ParseItemSetEntry { item, .. }| {
|
.filter_map(|(item, _)| {
|
||||||
if !item.is_augmented() && item.step_index > 0 {
|
if !item.is_augmented() && item.step_index > 0 {
|
||||||
Some(item.variable_index)
|
Some(item.variable_index)
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -617,18 +390,15 @@ impl<'a> ParseTableBuilder<'a> {
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.collect::<HashSet<_>>();
|
.collect::<HashSet<_>>();
|
||||||
let parent_symbol_names = parent_symbols
|
let mut message =
|
||||||
.iter()
|
"Extra rules must have unambiguous endings. Conflicting rules: ".to_string();
|
||||||
.map(|&variable_index| {
|
for (i, variable_index) in parent_symbols.iter().enumerate() {
|
||||||
self.syntax_grammar.variables[variable_index as usize]
|
if i > 0 {
|
||||||
.name
|
message += ", ";
|
||||||
.clone()
|
}
|
||||||
})
|
message += &self.syntax_grammar.variables[*variable_index as usize].name;
|
||||||
.collect::<Vec<_>>();
|
}
|
||||||
|
return Err(anyhow!(message));
|
||||||
Err(AmbiguousExtraError {
|
|
||||||
parent_symbols: parent_symbol_names,
|
|
||||||
})?;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Add actions for the start tokens of each non-terminal extra rule.
|
// Add actions for the start tokens of each non-terminal extra rule.
|
||||||
|
|
@ -666,30 +436,6 @@ impl<'a> ParseTableBuilder<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(keyword_capture_token) = self.syntax_grammar.word_token {
|
|
||||||
let reserved_word_set_id = item_set
|
|
||||||
.entries
|
|
||||||
.iter()
|
|
||||||
.filter_map(|entry| {
|
|
||||||
if let Some(next_step) = entry.item.step() {
|
|
||||||
if next_step.symbol == keyword_capture_token {
|
|
||||||
Some(next_step.reserved_word_set_id)
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
} else if entry.lookaheads.contains(&keyword_capture_token) {
|
|
||||||
Some(entry.following_reserved_word_set)
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.max();
|
|
||||||
if let Some(reserved_word_set_id) = reserved_word_set_id {
|
|
||||||
state.reserved_words =
|
|
||||||
self.syntax_grammar.reserved_word_sets[reserved_word_set_id.0].clone();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -701,7 +447,7 @@ impl<'a> ParseTableBuilder<'a> {
|
||||||
preceding_auxiliary_symbols: &[AuxiliarySymbolInfo],
|
preceding_auxiliary_symbols: &[AuxiliarySymbolInfo],
|
||||||
conflicting_lookahead: Symbol,
|
conflicting_lookahead: Symbol,
|
||||||
reduction_info: &ReductionInfo,
|
reduction_info: &ReductionInfo,
|
||||||
) -> BuildTableResult<()> {
|
) -> Result<()> {
|
||||||
let entry = self.parse_table.states[state_id]
|
let entry = self.parse_table.states[state_id]
|
||||||
.terminal_entries
|
.terminal_entries
|
||||||
.get_mut(&conflicting_lookahead)
|
.get_mut(&conflicting_lookahead)
|
||||||
|
|
@ -715,11 +461,8 @@ impl<'a> ParseTableBuilder<'a> {
|
||||||
// precedence, and there can still be SHIFT/REDUCE conflicts.
|
// precedence, and there can still be SHIFT/REDUCE conflicts.
|
||||||
let mut considered_associativity = false;
|
let mut considered_associativity = false;
|
||||||
let mut shift_precedence = Vec::<(&Precedence, Symbol)>::new();
|
let mut shift_precedence = Vec::<(&Precedence, Symbol)>::new();
|
||||||
let mut conflicting_items = BTreeSet::new();
|
let mut conflicting_items = HashSet::new();
|
||||||
for ParseItemSetEntry {
|
for (item, lookaheads) in &item_set.entries {
|
||||||
item, lookaheads, ..
|
|
||||||
} in &item_set.entries
|
|
||||||
{
|
|
||||||
if let Some(step) = item.step() {
|
if let Some(step) = item.step() {
|
||||||
if item.step_index > 0
|
if item.step_index > 0
|
||||||
&& self
|
&& self
|
||||||
|
|
@ -856,55 +599,93 @@ impl<'a> ParseTableBuilder<'a> {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut conflict_error = ConflictError::default();
|
let mut msg = "Unresolved conflict for symbol sequence:\n\n".to_string();
|
||||||
for symbol in preceding_symbols {
|
for symbol in preceding_symbols {
|
||||||
conflict_error
|
write!(&mut msg, " {}", self.symbol_name(symbol)).unwrap();
|
||||||
.symbol_sequence
|
|
||||||
.push(self.symbol_name(symbol));
|
|
||||||
}
|
}
|
||||||
conflict_error.conflicting_lookahead = self.symbol_name(&conflicting_lookahead);
|
|
||||||
|
|
||||||
let interpretations = conflicting_items
|
writeln!(
|
||||||
|
&mut msg,
|
||||||
|
" • {} …\n",
|
||||||
|
self.symbol_name(&conflicting_lookahead)
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
writeln!(&mut msg, "Possible interpretations:\n").unwrap();
|
||||||
|
|
||||||
|
let mut interpretations = conflicting_items
|
||||||
.iter()
|
.iter()
|
||||||
.map(|item| {
|
.map(|item| {
|
||||||
let preceding_symbols = preceding_symbols
|
let mut line = String::new();
|
||||||
|
for preceding_symbol in preceding_symbols
|
||||||
.iter()
|
.iter()
|
||||||
.take(preceding_symbols.len() - item.step_index as usize)
|
.take(preceding_symbols.len() - item.step_index as usize)
|
||||||
.map(|symbol| self.symbol_name(symbol))
|
{
|
||||||
.collect::<Vec<_>>();
|
write!(&mut line, " {}", self.symbol_name(preceding_symbol)).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
let variable_name = self.syntax_grammar.variables[item.variable_index as usize]
|
write!(
|
||||||
.name
|
&mut line,
|
||||||
.clone();
|
" ({}",
|
||||||
|
&self.syntax_grammar.variables[item.variable_index as usize].name
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let production_step_symbols = item
|
for (j, step) in item.production.steps.iter().enumerate() {
|
||||||
.production
|
if j as u32 == item.step_index {
|
||||||
.steps
|
write!(&mut line, " •").unwrap();
|
||||||
.iter()
|
}
|
||||||
.map(|step| self.symbol_name(&step.symbol))
|
write!(&mut line, " {}", self.symbol_name(&step.symbol)).unwrap();
|
||||||
.collect::<Vec<_>>();
|
}
|
||||||
|
|
||||||
let precedence = match item.precedence() {
|
write!(&mut line, ")").unwrap();
|
||||||
Precedence::None => None,
|
|
||||||
_ => Some(item.precedence().to_string()),
|
if item.is_done() {
|
||||||
|
write!(
|
||||||
|
&mut line,
|
||||||
|
" • {} …",
|
||||||
|
self.symbol_name(&conflicting_lookahead)
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
let precedence = item.precedence();
|
||||||
|
let associativity = item.associativity();
|
||||||
|
|
||||||
|
let prec_line = if let Some(associativity) = associativity {
|
||||||
|
Some(format!(
|
||||||
|
"(precedence: {precedence}, associativity: {associativity:?})",
|
||||||
|
))
|
||||||
|
} else if !precedence.is_none() {
|
||||||
|
Some(format!("(precedence: {precedence})"))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
};
|
};
|
||||||
|
|
||||||
let associativity = item.associativity().map(|assoc| format!("{assoc:?}"));
|
(line, prec_line)
|
||||||
|
|
||||||
Interpretation {
|
|
||||||
preceding_symbols,
|
|
||||||
variable_name,
|
|
||||||
production_step_symbols,
|
|
||||||
step_index: item.step_index,
|
|
||||||
done: item.is_done(),
|
|
||||||
conflicting_lookahead: self.symbol_name(&conflicting_lookahead),
|
|
||||||
precedence,
|
|
||||||
associativity,
|
|
||||||
}
|
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
conflict_error.possible_interpretations = interpretations;
|
|
||||||
|
|
||||||
|
let max_interpretation_length = interpretations
|
||||||
|
.iter()
|
||||||
|
.map(|i| i.0.chars().count())
|
||||||
|
.max()
|
||||||
|
.unwrap();
|
||||||
|
interpretations.sort_unstable();
|
||||||
|
for (i, (line, prec_suffix)) in interpretations.into_iter().enumerate() {
|
||||||
|
write!(&mut msg, " {}:", i + 1).unwrap();
|
||||||
|
msg += &line;
|
||||||
|
if let Some(prec_suffix) = prec_suffix {
|
||||||
|
for _ in line.chars().count()..max_interpretation_length {
|
||||||
|
msg.push(' ');
|
||||||
|
}
|
||||||
|
msg += " ";
|
||||||
|
msg += &prec_suffix;
|
||||||
|
}
|
||||||
|
msg.push('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut resolution_count = 0;
|
||||||
|
writeln!(&mut msg, "\nPossible resolutions:\n").unwrap();
|
||||||
let mut shift_items = Vec::new();
|
let mut shift_items = Vec::new();
|
||||||
let mut reduce_items = Vec::new();
|
let mut reduce_items = Vec::new();
|
||||||
for item in conflicting_items {
|
for item in conflicting_items {
|
||||||
|
|
@ -917,57 +698,76 @@ impl<'a> ParseTableBuilder<'a> {
|
||||||
shift_items.sort_unstable();
|
shift_items.sort_unstable();
|
||||||
reduce_items.sort_unstable();
|
reduce_items.sort_unstable();
|
||||||
|
|
||||||
let get_rule_names = |items: &[&ParseItem]| -> Vec<String> {
|
let list_rule_names = |mut msg: &mut String, items: &[&ParseItem]| {
|
||||||
let mut last_rule_id = None;
|
let mut last_rule_id = None;
|
||||||
let mut result = Vec::with_capacity(items.len());
|
|
||||||
for item in items {
|
for item in items {
|
||||||
if last_rule_id == Some(item.variable_index) {
|
if last_rule_id == Some(item.variable_index) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
last_rule_id = Some(item.variable_index);
|
|
||||||
result.push(self.symbol_name(&Symbol::non_terminal(item.variable_index as usize)));
|
|
||||||
}
|
|
||||||
|
|
||||||
result
|
if last_rule_id.is_some() {
|
||||||
|
write!(&mut msg, " and").unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
last_rule_id = Some(item.variable_index);
|
||||||
|
write!(
|
||||||
|
msg,
|
||||||
|
" `{}`",
|
||||||
|
self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if actual_conflict.len() > 1 {
|
if actual_conflict.len() > 1 {
|
||||||
if !shift_items.is_empty() {
|
if !shift_items.is_empty() {
|
||||||
let names = get_rule_names(&shift_items);
|
resolution_count += 1;
|
||||||
conflict_error
|
write!(
|
||||||
.possible_resolutions
|
&mut msg,
|
||||||
.push(Resolution::Precedence { symbols: names });
|
" {resolution_count}: Specify a higher precedence in",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
list_rule_names(&mut msg, &shift_items);
|
||||||
|
writeln!(&mut msg, " than in the other rules.").unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
for item in &reduce_items {
|
for item in &reduce_items {
|
||||||
let name = self.symbol_name(&Symbol::non_terminal(item.variable_index as usize));
|
resolution_count += 1;
|
||||||
conflict_error
|
writeln!(
|
||||||
.possible_resolutions
|
&mut msg,
|
||||||
.push(Resolution::Precedence {
|
" {resolution_count}: Specify a higher precedence in `{}` than in the other rules.",
|
||||||
symbols: vec![name],
|
self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
|
||||||
});
|
)
|
||||||
|
.unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if considered_associativity {
|
if considered_associativity {
|
||||||
let names = get_rule_names(&reduce_items);
|
resolution_count += 1;
|
||||||
conflict_error
|
write!(
|
||||||
.possible_resolutions
|
&mut msg,
|
||||||
.push(Resolution::Associativity { symbols: names });
|
" {resolution_count}: Specify a left or right associativity in",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
list_rule_names(&mut msg, &reduce_items);
|
||||||
|
writeln!(&mut msg).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
conflict_error
|
resolution_count += 1;
|
||||||
.possible_resolutions
|
write!(
|
||||||
.push(Resolution::AddConflict {
|
&mut msg,
|
||||||
symbols: actual_conflict
|
" {resolution_count}: Add a conflict for these rules: ",
|
||||||
.iter()
|
)
|
||||||
.map(|s| self.symbol_name(s))
|
.unwrap();
|
||||||
.collect(),
|
for (i, symbol) in actual_conflict.iter().enumerate() {
|
||||||
});
|
if i > 0 {
|
||||||
|
write!(&mut msg, ", ").unwrap();
|
||||||
|
}
|
||||||
|
write!(&mut msg, "`{}`", self.symbol_name(symbol)).unwrap();
|
||||||
|
}
|
||||||
|
writeln!(&mut msg).unwrap();
|
||||||
|
|
||||||
self.actual_conflicts.insert(actual_conflict);
|
Err(anyhow!(msg))
|
||||||
|
|
||||||
Err(conflict_error)?
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn compare_precedence(
|
fn compare_precedence(
|
||||||
|
|
@ -1036,7 +836,7 @@ impl<'a> ParseTableBuilder<'a> {
|
||||||
let parent_symbols = item_set
|
let parent_symbols = item_set
|
||||||
.entries
|
.entries
|
||||||
.iter()
|
.iter()
|
||||||
.filter_map(|ParseItemSetEntry { item, .. }| {
|
.filter_map(|(item, _)| {
|
||||||
let variable_index = item.variable_index as usize;
|
let variable_index = item.variable_index as usize;
|
||||||
if item.symbol() == Some(symbol)
|
if item.symbol() == Some(symbol)
|
||||||
&& !self.syntax_grammar.variables[variable_index].is_auxiliary()
|
&& !self.syntax_grammar.variables[variable_index].is_auxiliary()
|
||||||
|
|
@ -1124,24 +924,84 @@ impl<'a> ParseTableBuilder<'a> {
|
||||||
if variable.kind == VariableType::Named {
|
if variable.kind == VariableType::Named {
|
||||||
variable.name.clone()
|
variable.name.clone()
|
||||||
} else {
|
} else {
|
||||||
format!("'{}'", variable.name)
|
format!("'{}'", &variable.name)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn populate_following_tokens(
|
||||||
|
result: &mut [TokenSet],
|
||||||
|
grammar: &SyntaxGrammar,
|
||||||
|
inlines: &InlinedProductionMap,
|
||||||
|
builder: &ParseItemSetBuilder,
|
||||||
|
) {
|
||||||
|
let productions = grammar
|
||||||
|
.variables
|
||||||
|
.iter()
|
||||||
|
.flat_map(|v| &v.productions)
|
||||||
|
.chain(&inlines.productions);
|
||||||
|
let all_tokens = (0..result.len())
|
||||||
|
.map(Symbol::terminal)
|
||||||
|
.collect::<TokenSet>();
|
||||||
|
for production in productions {
|
||||||
|
for i in 1..production.steps.len() {
|
||||||
|
let left_tokens = builder.last_set(&production.steps[i - 1].symbol);
|
||||||
|
let right_tokens = builder.first_set(&production.steps[i].symbol);
|
||||||
|
for left_token in left_tokens.iter() {
|
||||||
|
if left_token.is_terminal() {
|
||||||
|
result[left_token.index].insert_all_terminals(right_tokens);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for extra in &grammar.extra_symbols {
|
||||||
|
if extra.is_terminal() {
|
||||||
|
for entry in result.iter_mut() {
|
||||||
|
entry.insert(*extra);
|
||||||
|
}
|
||||||
|
result[extra.index].clone_from(&all_tokens);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn build_parse_table<'a>(
|
pub fn build_parse_table<'a>(
|
||||||
syntax_grammar: &'a SyntaxGrammar,
|
syntax_grammar: &'a SyntaxGrammar,
|
||||||
lexical_grammar: &'a LexicalGrammar,
|
lexical_grammar: &'a LexicalGrammar,
|
||||||
item_set_builder: ParseItemSetBuilder<'a>,
|
inlines: &'a InlinedProductionMap,
|
||||||
variable_info: &'a [VariableInfo],
|
variable_info: &'a [VariableInfo],
|
||||||
) -> BuildTableResult<(ParseTable, Vec<ParseStateInfo<'a>>)> {
|
) -> Result<(ParseTable, Vec<TokenSet>, Vec<ParseStateInfo<'a>>)> {
|
||||||
ParseTableBuilder::new(
|
let actual_conflicts = syntax_grammar.expected_conflicts.iter().cloned().collect();
|
||||||
|
let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines);
|
||||||
|
let mut following_tokens = vec![TokenSet::new(); lexical_grammar.variables.len()];
|
||||||
|
populate_following_tokens(
|
||||||
|
&mut following_tokens,
|
||||||
|
syntax_grammar,
|
||||||
|
inlines,
|
||||||
|
&item_set_builder,
|
||||||
|
);
|
||||||
|
|
||||||
|
let (table, item_sets) = ParseTableBuilder {
|
||||||
syntax_grammar,
|
syntax_grammar,
|
||||||
lexical_grammar,
|
lexical_grammar,
|
||||||
item_set_builder,
|
item_set_builder,
|
||||||
variable_info,
|
variable_info,
|
||||||
)
|
non_terminal_extra_states: Vec::new(),
|
||||||
.build()
|
actual_conflicts,
|
||||||
|
state_ids_by_item_set: IndexMap::default(),
|
||||||
|
core_ids_by_core: HashMap::new(),
|
||||||
|
parse_state_info_by_id: Vec::new(),
|
||||||
|
parse_state_queue: VecDeque::new(),
|
||||||
|
parse_table: ParseTable {
|
||||||
|
states: Vec::new(),
|
||||||
|
symbols: Vec::new(),
|
||||||
|
external_lex_states: Vec::new(),
|
||||||
|
production_infos: Vec::new(),
|
||||||
|
max_aliased_production_length: 1,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
Ok((table, following_tokens, item_sets))
|
||||||
}
|
}
|
||||||
|
|
@ -2,31 +2,30 @@ use std::{
|
||||||
cmp::Ordering,
|
cmp::Ordering,
|
||||||
fmt,
|
fmt,
|
||||||
hash::{Hash, Hasher},
|
hash::{Hash, Hasher},
|
||||||
sync::LazyLock,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use lazy_static::lazy_static;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
grammars::{
|
grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar},
|
||||||
LexicalGrammar, Production, ProductionStep, ReservedWordSetId, SyntaxGrammar,
|
|
||||||
NO_RESERVED_WORDS,
|
|
||||||
},
|
|
||||||
rules::{Associativity, Precedence, Symbol, SymbolType, TokenSet},
|
rules::{Associativity, Precedence, Symbol, SymbolType, TokenSet},
|
||||||
};
|
};
|
||||||
|
|
||||||
static START_PRODUCTION: LazyLock<Production> = LazyLock::new(|| Production {
|
lazy_static! {
|
||||||
dynamic_precedence: 0,
|
static ref START_PRODUCTION: Production = Production {
|
||||||
steps: vec![ProductionStep {
|
dynamic_precedence: 0,
|
||||||
symbol: Symbol {
|
steps: vec![ProductionStep {
|
||||||
index: 0,
|
symbol: Symbol {
|
||||||
kind: SymbolType::NonTerminal,
|
index: 0,
|
||||||
},
|
kind: SymbolType::NonTerminal,
|
||||||
precedence: Precedence::None,
|
},
|
||||||
associativity: None,
|
precedence: Precedence::None,
|
||||||
alias: None,
|
associativity: None,
|
||||||
field_name: None,
|
alias: None,
|
||||||
reserved_word_set_id: NO_RESERVED_WORDS,
|
field_name: None,
|
||||||
}],
|
}],
|
||||||
});
|
};
|
||||||
|
}
|
||||||
|
|
||||||
/// A [`ParseItem`] represents an in-progress match of a single production in a grammar.
|
/// A [`ParseItem`] represents an in-progress match of a single production in a grammar.
|
||||||
#[derive(Clone, Copy, Debug)]
|
#[derive(Clone, Copy, Debug)]
|
||||||
|
|
@ -59,14 +58,7 @@ pub struct ParseItem<'a> {
|
||||||
/// to a state in the final parse table.
|
/// to a state in the final parse table.
|
||||||
#[derive(Clone, Debug, PartialEq, Eq, Default)]
|
#[derive(Clone, Debug, PartialEq, Eq, Default)]
|
||||||
pub struct ParseItemSet<'a> {
|
pub struct ParseItemSet<'a> {
|
||||||
pub entries: Vec<ParseItemSetEntry<'a>>,
|
pub entries: Vec<(ParseItem<'a>, TokenSet)>,
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
|
||||||
pub struct ParseItemSetEntry<'a> {
|
|
||||||
pub item: ParseItem<'a>,
|
|
||||||
pub lookaheads: TokenSet,
|
|
||||||
pub following_reserved_word_set: ReservedWordSetId,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A [`ParseItemSetCore`] is like a [`ParseItemSet`], but without the lookahead
|
/// A [`ParseItemSetCore`] is like a [`ParseItemSet`], but without the lookahead
|
||||||
|
|
@ -160,26 +152,30 @@ impl<'a> ParseItem<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> ParseItemSet<'a> {
|
impl<'a> ParseItemSet<'a> {
|
||||||
pub fn insert(&mut self, item: ParseItem<'a>) -> &mut ParseItemSetEntry<'a> {
|
pub fn with(elements: impl IntoIterator<Item = (ParseItem<'a>, TokenSet)>) -> Self {
|
||||||
match self.entries.binary_search_by(|e| e.item.cmp(&item)) {
|
let mut result = Self::default();
|
||||||
|
for (item, lookaheads) in elements {
|
||||||
|
result.insert(item, &lookaheads);
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn insert(&mut self, item: ParseItem<'a>, lookaheads: &TokenSet) -> &mut TokenSet {
|
||||||
|
match self.entries.binary_search_by(|(i, _)| i.cmp(&item)) {
|
||||||
Err(i) => {
|
Err(i) => {
|
||||||
self.entries.insert(
|
self.entries.insert(i, (item, lookaheads.clone()));
|
||||||
i,
|
&mut self.entries[i].1
|
||||||
ParseItemSetEntry {
|
}
|
||||||
item,
|
Ok(i) => {
|
||||||
lookaheads: TokenSet::new(),
|
self.entries[i].1.insert_all(lookaheads);
|
||||||
following_reserved_word_set: ReservedWordSetId::default(),
|
&mut self.entries[i].1
|
||||||
},
|
|
||||||
);
|
|
||||||
&mut self.entries[i]
|
|
||||||
}
|
}
|
||||||
Ok(i) => &mut self.entries[i],
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn core(&self) -> ParseItemSetCore<'a> {
|
pub fn core(&self) -> ParseItemSetCore<'a> {
|
||||||
ParseItemSetCore {
|
ParseItemSetCore {
|
||||||
entries: self.entries.iter().map(|e| e.item).collect(),
|
entries: self.entries.iter().map(|e| e.0).collect(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -192,42 +188,35 @@ impl fmt::Display for ParseItemDisplay<'_> {
|
||||||
write!(
|
write!(
|
||||||
f,
|
f,
|
||||||
"{} →",
|
"{} →",
|
||||||
self.1.variables[self.0.variable_index as usize].name
|
&self.1.variables[self.0.variable_index as usize].name
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i, step) in self.0.production.steps.iter().enumerate() {
|
for (i, step) in self.0.production.steps.iter().enumerate() {
|
||||||
if i == self.0.step_index as usize {
|
if i == self.0.step_index as usize {
|
||||||
write!(f, " •")?;
|
write!(f, " •")?;
|
||||||
if !step.precedence.is_none()
|
if let Some(associativity) = step.associativity {
|
||||||
|| step.associativity.is_some()
|
if step.precedence.is_none() {
|
||||||
|| step.reserved_word_set_id != ReservedWordSetId::default()
|
write!(f, " ({associativity:?})")?;
|
||||||
{
|
} else {
|
||||||
write!(f, " (")?;
|
write!(f, " ({} {associativity:?})", step.precedence)?;
|
||||||
if !step.precedence.is_none() {
|
|
||||||
write!(f, " {}", step.precedence)?;
|
|
||||||
}
|
}
|
||||||
if let Some(associativity) = step.associativity {
|
} else if !step.precedence.is_none() {
|
||||||
write!(f, " {associativity:?}")?;
|
write!(f, " ({})", step.precedence)?;
|
||||||
}
|
|
||||||
if step.reserved_word_set_id != ReservedWordSetId::default() {
|
|
||||||
write!(f, "reserved: {}", step.reserved_word_set_id)?;
|
|
||||||
}
|
|
||||||
write!(f, " )")?;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
write!(f, " ")?;
|
write!(f, " ")?;
|
||||||
if step.symbol.is_terminal() {
|
if step.symbol.is_terminal() {
|
||||||
if let Some(variable) = self.2.variables.get(step.symbol.index) {
|
if let Some(variable) = self.2.variables.get(step.symbol.index) {
|
||||||
write!(f, "{}", variable.name)?;
|
write!(f, "{}", &variable.name)?;
|
||||||
} else {
|
} else {
|
||||||
write!(f, "terminal-{}", step.symbol.index)?;
|
write!(f, "terminal-{}", step.symbol.index)?;
|
||||||
}
|
}
|
||||||
} else if step.symbol.is_external() {
|
} else if step.symbol.is_external() {
|
||||||
write!(f, "{}", self.1.external_tokens[step.symbol.index].name)?;
|
write!(f, "{}", &self.1.external_tokens[step.symbol.index].name)?;
|
||||||
} else {
|
} else {
|
||||||
write!(f, "{}", self.1.variables[step.symbol.index].name)?;
|
write!(f, "{}", &self.1.variables[step.symbol.index].name)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(alias) = &step.alias {
|
if let Some(alias) = &step.alias {
|
||||||
|
|
@ -254,32 +243,6 @@ impl fmt::Display for ParseItemDisplay<'_> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const fn escape_invisible(c: char) -> Option<&'static str> {
|
|
||||||
Some(match c {
|
|
||||||
'\n' => "\\n",
|
|
||||||
'\r' => "\\r",
|
|
||||||
'\t' => "\\t",
|
|
||||||
'\0' => "\\0",
|
|
||||||
'\\' => "\\\\",
|
|
||||||
'\x0b' => "\\v",
|
|
||||||
'\x0c' => "\\f",
|
|
||||||
_ => return None,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn display_variable_name(source: &str) -> String {
|
|
||||||
source
|
|
||||||
.chars()
|
|
||||||
.fold(String::with_capacity(source.len()), |mut acc, c| {
|
|
||||||
if let Some(esc) = escape_invisible(c) {
|
|
||||||
acc.push_str(esc);
|
|
||||||
} else {
|
|
||||||
acc.push(c);
|
|
||||||
}
|
|
||||||
acc
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for TokenSetDisplay<'_> {
|
impl fmt::Display for TokenSetDisplay<'_> {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||||
write!(f, "[")?;
|
write!(f, "[")?;
|
||||||
|
|
@ -290,14 +253,14 @@ impl fmt::Display for TokenSetDisplay<'_> {
|
||||||
|
|
||||||
if symbol.is_terminal() {
|
if symbol.is_terminal() {
|
||||||
if let Some(variable) = self.2.variables.get(symbol.index) {
|
if let Some(variable) = self.2.variables.get(symbol.index) {
|
||||||
write!(f, "{}", display_variable_name(&variable.name))?;
|
write!(f, "{}", &variable.name)?;
|
||||||
} else {
|
} else {
|
||||||
write!(f, "terminal-{}", symbol.index)?;
|
write!(f, "terminal-{}", symbol.index)?;
|
||||||
}
|
}
|
||||||
} else if symbol.is_external() {
|
} else if symbol.is_external() {
|
||||||
write!(f, "{}", self.1.external_tokens[symbol.index].name)?;
|
write!(f, "{}", &self.1.external_tokens[symbol.index].name)?;
|
||||||
} else {
|
} else {
|
||||||
write!(f, "{}", self.1.variables[symbol.index].name)?;
|
write!(f, "{}", &self.1.variables[symbol.index].name)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
write!(f, "]")?;
|
write!(f, "]")?;
|
||||||
|
|
@ -307,21 +270,13 @@ impl fmt::Display for TokenSetDisplay<'_> {
|
||||||
|
|
||||||
impl fmt::Display for ParseItemSetDisplay<'_> {
|
impl fmt::Display for ParseItemSetDisplay<'_> {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||||
for entry in &self.0.entries {
|
for (item, lookaheads) in &self.0.entries {
|
||||||
write!(
|
writeln!(
|
||||||
f,
|
f,
|
||||||
"{}\t{}",
|
"{}\t{}",
|
||||||
ParseItemDisplay(&entry.item, self.1, self.2),
|
ParseItemDisplay(item, self.1, self.2),
|
||||||
TokenSetDisplay(&entry.lookaheads, self.1, self.2),
|
TokenSetDisplay(lookaheads, self.1, self.2)
|
||||||
)?;
|
)?;
|
||||||
if entry.following_reserved_word_set != ReservedWordSetId::default() {
|
|
||||||
write!(
|
|
||||||
f,
|
|
||||||
"\treserved word set: {}",
|
|
||||||
entry.following_reserved_word_set
|
|
||||||
)?;
|
|
||||||
}
|
|
||||||
writeln!(f)?;
|
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
@ -341,7 +296,7 @@ impl Hash for ParseItem<'_> {
|
||||||
// this item, unless any of the following are true:
|
// this item, unless any of the following are true:
|
||||||
// * the children have fields
|
// * the children have fields
|
||||||
// * the children have aliases
|
// * the children have aliases
|
||||||
// * the children are hidden and represent rules that have fields.
|
// * the children are hidden and
|
||||||
// See the docs for `has_preceding_inherited_fields`.
|
// See the docs for `has_preceding_inherited_fields`.
|
||||||
for step in &self.production.steps[0..self.step_index as usize] {
|
for step in &self.production.steps[0..self.step_index as usize] {
|
||||||
step.alias.hash(hasher);
|
step.alias.hash(hasher);
|
||||||
|
|
@ -444,10 +399,9 @@ impl Eq for ParseItem<'_> {}
|
||||||
impl Hash for ParseItemSet<'_> {
|
impl Hash for ParseItemSet<'_> {
|
||||||
fn hash<H: Hasher>(&self, hasher: &mut H) {
|
fn hash<H: Hasher>(&self, hasher: &mut H) {
|
||||||
hasher.write_usize(self.entries.len());
|
hasher.write_usize(self.entries.len());
|
||||||
for entry in &self.entries {
|
for (item, lookaheads) in &self.entries {
|
||||||
entry.item.hash(hasher);
|
item.hash(hasher);
|
||||||
entry.lookaheads.hash(hasher);
|
lookaheads.hash(hasher);
|
||||||
entry.following_reserved_word_set.hash(hasher);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -3,9 +3,9 @@ use std::{
|
||||||
fmt,
|
fmt,
|
||||||
};
|
};
|
||||||
|
|
||||||
use super::item::{ParseItem, ParseItemDisplay, ParseItemSet, ParseItemSetEntry, TokenSetDisplay};
|
use super::item::{ParseItem, ParseItemDisplay, ParseItemSet, TokenSetDisplay};
|
||||||
use crate::{
|
use crate::{
|
||||||
grammars::{InlinedProductionMap, LexicalGrammar, ReservedWordSetId, SyntaxGrammar},
|
grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar},
|
||||||
rules::{Symbol, SymbolType, TokenSet},
|
rules::{Symbol, SymbolType, TokenSet},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -15,10 +15,9 @@ struct TransitiveClosureAddition<'a> {
|
||||||
info: FollowSetInfo,
|
info: FollowSetInfo,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||||
struct FollowSetInfo {
|
struct FollowSetInfo {
|
||||||
lookaheads: TokenSet,
|
lookaheads: TokenSet,
|
||||||
reserved_lookaheads: ReservedWordSetId,
|
|
||||||
propagates_lookaheads: bool,
|
propagates_lookaheads: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -26,7 +25,6 @@ pub struct ParseItemSetBuilder<'a> {
|
||||||
syntax_grammar: &'a SyntaxGrammar,
|
syntax_grammar: &'a SyntaxGrammar,
|
||||||
lexical_grammar: &'a LexicalGrammar,
|
lexical_grammar: &'a LexicalGrammar,
|
||||||
first_sets: HashMap<Symbol, TokenSet>,
|
first_sets: HashMap<Symbol, TokenSet>,
|
||||||
reserved_first_sets: HashMap<Symbol, ReservedWordSetId>,
|
|
||||||
last_sets: HashMap<Symbol, TokenSet>,
|
last_sets: HashMap<Symbol, TokenSet>,
|
||||||
inlines: &'a InlinedProductionMap,
|
inlines: &'a InlinedProductionMap,
|
||||||
transitive_closure_additions: Vec<Vec<TransitiveClosureAddition<'a>>>,
|
transitive_closure_additions: Vec<Vec<TransitiveClosureAddition<'a>>>,
|
||||||
|
|
@ -48,7 +46,6 @@ impl<'a> ParseItemSetBuilder<'a> {
|
||||||
syntax_grammar,
|
syntax_grammar,
|
||||||
lexical_grammar,
|
lexical_grammar,
|
||||||
first_sets: HashMap::new(),
|
first_sets: HashMap::new(),
|
||||||
reserved_first_sets: HashMap::new(),
|
|
||||||
last_sets: HashMap::new(),
|
last_sets: HashMap::new(),
|
||||||
inlines,
|
inlines,
|
||||||
transitive_closure_additions: vec![Vec::new(); syntax_grammar.variables.len()],
|
transitive_closure_additions: vec![Vec::new(); syntax_grammar.variables.len()],
|
||||||
|
|
@ -57,7 +54,8 @@ impl<'a> ParseItemSetBuilder<'a> {
|
||||||
// For each grammar symbol, populate the FIRST and LAST sets: the set of
|
// For each grammar symbol, populate the FIRST and LAST sets: the set of
|
||||||
// terminals that appear at the beginning and end that symbol's productions,
|
// terminals that appear at the beginning and end that symbol's productions,
|
||||||
// respectively.
|
// respectively.
|
||||||
// For a terminal symbol, the FIRST and LAST sets just consist of the
|
//
|
||||||
|
// For a terminal symbol, the FIRST and LAST set just consists of the
|
||||||
// terminal itself.
|
// terminal itself.
|
||||||
for i in 0..lexical_grammar.variables.len() {
|
for i in 0..lexical_grammar.variables.len() {
|
||||||
let symbol = Symbol::terminal(i);
|
let symbol = Symbol::terminal(i);
|
||||||
|
|
@ -65,9 +63,6 @@ impl<'a> ParseItemSetBuilder<'a> {
|
||||||
set.insert(symbol);
|
set.insert(symbol);
|
||||||
result.first_sets.insert(symbol, set.clone());
|
result.first_sets.insert(symbol, set.clone());
|
||||||
result.last_sets.insert(symbol, set);
|
result.last_sets.insert(symbol, set);
|
||||||
result
|
|
||||||
.reserved_first_sets
|
|
||||||
.insert(symbol, ReservedWordSetId::default());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for i in 0..syntax_grammar.external_tokens.len() {
|
for i in 0..syntax_grammar.external_tokens.len() {
|
||||||
|
|
@ -76,15 +71,12 @@ impl<'a> ParseItemSetBuilder<'a> {
|
||||||
set.insert(symbol);
|
set.insert(symbol);
|
||||||
result.first_sets.insert(symbol, set.clone());
|
result.first_sets.insert(symbol, set.clone());
|
||||||
result.last_sets.insert(symbol, set);
|
result.last_sets.insert(symbol, set);
|
||||||
result
|
|
||||||
.reserved_first_sets
|
|
||||||
.insert(symbol, ReservedWordSetId::default());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// The FIRST set of a non-terminal `i` is the union of the FIRST sets
|
// The FIRST set of a non-terminal `i` is the union of the following sets:
|
||||||
// of all the symbols that appear at the beginnings of i's productions. Some
|
// * the set of all terminals that appear at the beginnings of i's productions
|
||||||
// of these symbols may themselves be non-terminals, so this is a recursive
|
// * the FIRST sets of all the non-terminals that appear at the beginnings of i's
|
||||||
// definition.
|
// productions
|
||||||
//
|
//
|
||||||
// Rather than computing these sets using recursion, we use an explicit stack
|
// Rather than computing these sets using recursion, we use an explicit stack
|
||||||
// called `symbols_to_process`.
|
// called `symbols_to_process`.
|
||||||
|
|
@ -92,36 +84,37 @@ impl<'a> ParseItemSetBuilder<'a> {
|
||||||
let mut processed_non_terminals = HashSet::new();
|
let mut processed_non_terminals = HashSet::new();
|
||||||
for i in 0..syntax_grammar.variables.len() {
|
for i in 0..syntax_grammar.variables.len() {
|
||||||
let symbol = Symbol::non_terminal(i);
|
let symbol = Symbol::non_terminal(i);
|
||||||
let first_set = result.first_sets.entry(symbol).or_default();
|
|
||||||
let reserved_first_set = result.reserved_first_sets.entry(symbol).or_default();
|
|
||||||
|
|
||||||
|
let first_set = result
|
||||||
|
.first_sets
|
||||||
|
.entry(symbol)
|
||||||
|
.or_insert_with(TokenSet::new);
|
||||||
processed_non_terminals.clear();
|
processed_non_terminals.clear();
|
||||||
symbols_to_process.clear();
|
symbols_to_process.clear();
|
||||||
symbols_to_process.push(symbol);
|
symbols_to_process.push(symbol);
|
||||||
while let Some(sym) = symbols_to_process.pop() {
|
while let Some(current_symbol) = symbols_to_process.pop() {
|
||||||
for production in &syntax_grammar.variables[sym.index].productions {
|
if current_symbol.is_terminal() || current_symbol.is_external() {
|
||||||
if let Some(step) = production.steps.first() {
|
first_set.insert(current_symbol);
|
||||||
if step.symbol.is_terminal() || step.symbol.is_external() {
|
} else if processed_non_terminals.insert(current_symbol) {
|
||||||
first_set.insert(step.symbol);
|
for production in &syntax_grammar.variables[current_symbol.index].productions {
|
||||||
} else if processed_non_terminals.insert(step.symbol) {
|
if let Some(step) = production.steps.first() {
|
||||||
symbols_to_process.push(step.symbol);
|
symbols_to_process.push(step.symbol);
|
||||||
}
|
}
|
||||||
*reserved_first_set = (*reserved_first_set).max(step.reserved_word_set_id);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// The LAST set is defined in a similar way to the FIRST set.
|
// The LAST set is defined in a similar way to the FIRST set.
|
||||||
let last_set = result.last_sets.entry(symbol).or_default();
|
let last_set = result.last_sets.entry(symbol).or_insert_with(TokenSet::new);
|
||||||
processed_non_terminals.clear();
|
processed_non_terminals.clear();
|
||||||
symbols_to_process.clear();
|
symbols_to_process.clear();
|
||||||
symbols_to_process.push(symbol);
|
symbols_to_process.push(symbol);
|
||||||
while let Some(sym) = symbols_to_process.pop() {
|
while let Some(current_symbol) = symbols_to_process.pop() {
|
||||||
for production in &syntax_grammar.variables[sym.index].productions {
|
if current_symbol.is_terminal() || current_symbol.is_external() {
|
||||||
if let Some(step) = production.steps.last() {
|
last_set.insert(current_symbol);
|
||||||
if step.symbol.is_terminal() || step.symbol.is_external() {
|
} else if processed_non_terminals.insert(current_symbol) {
|
||||||
last_set.insert(step.symbol);
|
for production in &syntax_grammar.variables[current_symbol.index].productions {
|
||||||
} else if processed_non_terminals.insert(step.symbol) {
|
if let Some(step) = production.steps.last() {
|
||||||
symbols_to_process.push(step.symbol);
|
symbols_to_process.push(step.symbol);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -131,75 +124,67 @@ impl<'a> ParseItemSetBuilder<'a> {
|
||||||
|
|
||||||
// To compute an item set's transitive closure, we find each item in the set
|
// To compute an item set's transitive closure, we find each item in the set
|
||||||
// whose next symbol is a non-terminal, and we add new items to the set for
|
// whose next symbol is a non-terminal, and we add new items to the set for
|
||||||
// each of that symbol's productions. These productions might themselves begin
|
// each of that symbols' productions. These productions might themselves begin
|
||||||
// with non-terminals, so the process continues recursively. In this process,
|
// with non-terminals, so the process continues recursively. In this process,
|
||||||
// the total set of entries that get added depends only on two things:
|
// the total set of entries that get added depends only on two things:
|
||||||
//
|
// * the set of non-terminal symbols that occur at each item's current position
|
||||||
// * the non-terminal symbol that occurs next in each item
|
// * the set of terminals that occurs after each of these non-terminal symbols
|
||||||
//
|
|
||||||
// * the set of terminals that can follow that non-terminal symbol in the item
|
|
||||||
//
|
//
|
||||||
// So we can avoid a lot of duplicated recursive work by precomputing, for each
|
// So we can avoid a lot of duplicated recursive work by precomputing, for each
|
||||||
// non-terminal symbol `i`, a final list of *additions* that must be made to an
|
// non-terminal symbol `i`, a final list of *additions* that must be made to an
|
||||||
// item set when symbol `i` occurs as the next symbol in one if its core items.
|
// item set when `i` occurs as the next symbol in one if its core items. The
|
||||||
// The structure of a precomputed *addition* is as follows:
|
// structure of an *addition* is as follows:
|
||||||
//
|
// * `item` - the new item that must be added as part of the expansion of `i`
|
||||||
// * `item` - the new item that must be added as part of the expansion of the symbol `i`.
|
// * `lookaheads` - lookahead tokens that can always come after that item in the expansion
|
||||||
//
|
// of `i`
|
||||||
// * `lookaheads` - the set of possible lookahead tokens that can always come after `item`
|
|
||||||
// in an expansion of symbol `i`.
|
|
||||||
//
|
|
||||||
// * `reserved_lookaheads` - the set of reserved lookahead lookahead tokens that can
|
|
||||||
// always come after `item` in the expansion of symbol `i`.
|
|
||||||
//
|
|
||||||
// * `propagates_lookaheads` - a boolean indicating whether or not `item` can occur at the
|
// * `propagates_lookaheads` - a boolean indicating whether or not `item` can occur at the
|
||||||
// *end* of the expansion of symbol `i`, so that i's own current lookahead tokens can
|
// *end* of the expansion of `i`, so that i's own current lookahead tokens can occur
|
||||||
// occur after `item`.
|
// after `item`.
|
||||||
//
|
//
|
||||||
// Rather than computing these additions recursively, we use an explicit stack.
|
// Again, rather than computing these additions recursively, we use an explicit
|
||||||
let empty_lookaheads = TokenSet::new();
|
// stack called `entries_to_process`.
|
||||||
let mut stack = Vec::new();
|
|
||||||
let mut follow_set_info_by_non_terminal = HashMap::<usize, FollowSetInfo>::new();
|
|
||||||
for i in 0..syntax_grammar.variables.len() {
|
for i in 0..syntax_grammar.variables.len() {
|
||||||
|
let empty_lookaheads = TokenSet::new();
|
||||||
|
let mut entries_to_process = vec![(i, &empty_lookaheads, true)];
|
||||||
|
|
||||||
// First, build up a map whose keys are all of the non-terminals that can
|
// First, build up a map whose keys are all of the non-terminals that can
|
||||||
// appear at the beginning of non-terminal `i`, and whose values store
|
// appear at the beginning of non-terminal `i`, and whose values store
|
||||||
// information about the tokens that can follow those non-terminals.
|
// information about the tokens that can follow each non-terminal.
|
||||||
stack.clear();
|
let mut follow_set_info_by_non_terminal = HashMap::new();
|
||||||
stack.push((i, &empty_lookaheads, ReservedWordSetId::default(), true));
|
while let Some(entry) = entries_to_process.pop() {
|
||||||
follow_set_info_by_non_terminal.clear();
|
let (variable_index, lookaheads, propagates_lookaheads) = entry;
|
||||||
while let Some((sym_ix, lookaheads, reserved_word_set_id, propagates_lookaheads)) =
|
let existing_info = follow_set_info_by_non_terminal
|
||||||
stack.pop()
|
.entry(variable_index)
|
||||||
{
|
.or_insert_with(|| FollowSetInfo {
|
||||||
let mut did_add = false;
|
lookaheads: TokenSet::new(),
|
||||||
let info = follow_set_info_by_non_terminal.entry(sym_ix).or_default();
|
propagates_lookaheads: false,
|
||||||
did_add |= info.lookaheads.insert_all(lookaheads);
|
});
|
||||||
if reserved_word_set_id > info.reserved_lookaheads {
|
|
||||||
info.reserved_lookaheads = reserved_word_set_id;
|
let did_add_follow_set_info;
|
||||||
did_add = true;
|
if propagates_lookaheads {
|
||||||
}
|
did_add_follow_set_info = !existing_info.propagates_lookaheads;
|
||||||
did_add |= propagates_lookaheads && !info.propagates_lookaheads;
|
existing_info.propagates_lookaheads = true;
|
||||||
info.propagates_lookaheads |= propagates_lookaheads;
|
} else {
|
||||||
if !did_add {
|
did_add_follow_set_info = existing_info.lookaheads.insert_all(lookaheads);
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for production in &syntax_grammar.variables[sym_ix].productions {
|
if did_add_follow_set_info {
|
||||||
if let Some(symbol) = production.first_symbol() {
|
for production in &syntax_grammar.variables[variable_index].productions {
|
||||||
if symbol.is_non_terminal() {
|
if let Some(symbol) = production.first_symbol() {
|
||||||
if let Some(next_step) = production.steps.get(1) {
|
if symbol.is_non_terminal() {
|
||||||
stack.push((
|
if production.steps.len() == 1 {
|
||||||
symbol.index,
|
entries_to_process.push((
|
||||||
&result.first_sets[&next_step.symbol],
|
symbol.index,
|
||||||
result.reserved_first_sets[&next_step.symbol],
|
lookaheads,
|
||||||
false,
|
propagates_lookaheads,
|
||||||
));
|
));
|
||||||
} else {
|
} else {
|
||||||
stack.push((
|
entries_to_process.push((
|
||||||
symbol.index,
|
symbol.index,
|
||||||
lookaheads,
|
&result.first_sets[&production.steps[1].symbol],
|
||||||
reserved_word_set_id,
|
false,
|
||||||
propagates_lookaheads,
|
));
|
||||||
));
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -209,7 +194,7 @@ impl<'a> ParseItemSetBuilder<'a> {
|
||||||
// Store all of those non-terminals' productions, along with their associated
|
// Store all of those non-terminals' productions, along with their associated
|
||||||
// lookahead info, as *additions* associated with non-terminal `i`.
|
// lookahead info, as *additions* associated with non-terminal `i`.
|
||||||
let additions_for_non_terminal = &mut result.transitive_closure_additions[i];
|
let additions_for_non_terminal = &mut result.transitive_closure_additions[i];
|
||||||
for (&variable_index, follow_set_info) in &follow_set_info_by_non_terminal {
|
for (variable_index, follow_set_info) in follow_set_info_by_non_terminal {
|
||||||
let variable = &syntax_grammar.variables[variable_index];
|
let variable = &syntax_grammar.variables[variable_index];
|
||||||
let non_terminal = Symbol::non_terminal(variable_index);
|
let non_terminal = Symbol::non_terminal(variable_index);
|
||||||
let variable_index = variable_index as u32;
|
let variable_index = variable_index as u32;
|
||||||
|
|
@ -254,23 +239,20 @@ impl<'a> ParseItemSetBuilder<'a> {
|
||||||
|
|
||||||
pub fn transitive_closure(&self, item_set: &ParseItemSet<'a>) -> ParseItemSet<'a> {
|
pub fn transitive_closure(&self, item_set: &ParseItemSet<'a>) -> ParseItemSet<'a> {
|
||||||
let mut result = ParseItemSet::default();
|
let mut result = ParseItemSet::default();
|
||||||
for entry in &item_set.entries {
|
for (item, lookaheads) in &item_set.entries {
|
||||||
if let Some(productions) = self
|
if let Some(productions) = self
|
||||||
.inlines
|
.inlines
|
||||||
.inlined_productions(entry.item.production, entry.item.step_index)
|
.inlined_productions(item.production, item.step_index)
|
||||||
{
|
{
|
||||||
for production in productions {
|
for production in productions {
|
||||||
self.add_item(
|
self.add_item(
|
||||||
&mut result,
|
&mut result,
|
||||||
&ParseItemSetEntry {
|
item.substitute_production(production),
|
||||||
item: entry.item.substitute_production(production),
|
lookaheads,
|
||||||
lookaheads: entry.lookaheads.clone(),
|
|
||||||
following_reserved_word_set: entry.following_reserved_word_set,
|
|
||||||
},
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
self.add_item(&mut result, entry);
|
self.add_item(&mut result, *item, lookaheads);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
result
|
result
|
||||||
|
|
@ -280,64 +262,30 @@ impl<'a> ParseItemSetBuilder<'a> {
|
||||||
&self.first_sets[symbol]
|
&self.first_sets[symbol]
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn reserved_first_set(&self, symbol: &Symbol) -> Option<&TokenSet> {
|
|
||||||
let id = *self.reserved_first_sets.get(symbol)?;
|
|
||||||
Some(&self.syntax_grammar.reserved_word_sets[id.0])
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn last_set(&self, symbol: &Symbol) -> &TokenSet {
|
pub fn last_set(&self, symbol: &Symbol) -> &TokenSet {
|
||||||
&self.last_sets[symbol]
|
&self.last_sets[symbol]
|
||||||
}
|
}
|
||||||
|
|
||||||
fn add_item(&self, set: &mut ParseItemSet<'a>, entry: &ParseItemSetEntry<'a>) {
|
fn add_item(&self, set: &mut ParseItemSet<'a>, item: ParseItem<'a>, lookaheads: &TokenSet) {
|
||||||
if let Some(step) = entry.item.step() {
|
if let Some(step) = item.step() {
|
||||||
if step.symbol.is_non_terminal() {
|
if step.symbol.is_non_terminal() {
|
||||||
let next_step = entry.item.successor().step();
|
let next_step = item.successor().step();
|
||||||
|
|
||||||
// Determine which tokens can follow this non-terminal.
|
// Determine which tokens can follow this non-terminal.
|
||||||
let (following_tokens, following_reserved_tokens) =
|
let following_tokens = next_step.map_or(lookaheads, |next_step| {
|
||||||
if let Some(next_step) = next_step {
|
self.first_sets.get(&next_step.symbol).unwrap()
|
||||||
(
|
});
|
||||||
self.first_sets.get(&next_step.symbol).unwrap(),
|
|
||||||
*self.reserved_first_sets.get(&next_step.symbol).unwrap(),
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
(&entry.lookaheads, entry.following_reserved_word_set)
|
|
||||||
};
|
|
||||||
|
|
||||||
// Use the pre-computed *additions* to expand the non-terminal.
|
// Use the pre-computed *additions* to expand the non-terminal.
|
||||||
for addition in &self.transitive_closure_additions[step.symbol.index] {
|
for addition in &self.transitive_closure_additions[step.symbol.index] {
|
||||||
let entry = set.insert(addition.item);
|
let lookaheads = set.insert(addition.item, &addition.info.lookaheads);
|
||||||
entry.lookaheads.insert_all(&addition.info.lookaheads);
|
|
||||||
|
|
||||||
if let Some(word_token) = self.syntax_grammar.word_token {
|
|
||||||
if addition.info.lookaheads.contains(&word_token) {
|
|
||||||
entry.following_reserved_word_set = entry
|
|
||||||
.following_reserved_word_set
|
|
||||||
.max(addition.info.reserved_lookaheads);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if addition.info.propagates_lookaheads {
|
if addition.info.propagates_lookaheads {
|
||||||
entry.lookaheads.insert_all(following_tokens);
|
lookaheads.insert_all(following_tokens);
|
||||||
|
|
||||||
if let Some(word_token) = self.syntax_grammar.word_token {
|
|
||||||
if following_tokens.contains(&word_token) {
|
|
||||||
entry.following_reserved_word_set = entry
|
|
||||||
.following_reserved_word_set
|
|
||||||
.max(following_reserved_tokens);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
set.insert(item, lookaheads);
|
||||||
let e = set.insert(entry.item);
|
|
||||||
e.lookaheads.insert_all(&entry.lookaheads);
|
|
||||||
e.following_reserved_word_set = e
|
|
||||||
.following_reserved_word_set
|
|
||||||
.max(entry.following_reserved_word_set);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -3,7 +3,7 @@ use std::{
|
||||||
mem,
|
mem,
|
||||||
};
|
};
|
||||||
|
|
||||||
use log::debug;
|
use log::info;
|
||||||
|
|
||||||
use super::token_conflicts::TokenConflictMap;
|
use super::token_conflicts::TokenConflictMap;
|
||||||
use crate::{
|
use crate::{
|
||||||
|
|
@ -11,7 +11,6 @@ use crate::{
|
||||||
grammars::{LexicalGrammar, SyntaxGrammar, VariableType},
|
grammars::{LexicalGrammar, SyntaxGrammar, VariableType},
|
||||||
rules::{AliasMap, Symbol, TokenSet},
|
rules::{AliasMap, Symbol, TokenSet},
|
||||||
tables::{GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry},
|
tables::{GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry},
|
||||||
OptLevel,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
pub fn minimize_parse_table(
|
pub fn minimize_parse_table(
|
||||||
|
|
@ -21,7 +20,6 @@ pub fn minimize_parse_table(
|
||||||
simple_aliases: &AliasMap,
|
simple_aliases: &AliasMap,
|
||||||
token_conflict_map: &TokenConflictMap,
|
token_conflict_map: &TokenConflictMap,
|
||||||
keywords: &TokenSet,
|
keywords: &TokenSet,
|
||||||
optimizations: OptLevel,
|
|
||||||
) {
|
) {
|
||||||
let mut minimizer = Minimizer {
|
let mut minimizer = Minimizer {
|
||||||
parse_table,
|
parse_table,
|
||||||
|
|
@ -31,9 +29,7 @@ pub fn minimize_parse_table(
|
||||||
keywords,
|
keywords,
|
||||||
simple_aliases,
|
simple_aliases,
|
||||||
};
|
};
|
||||||
if optimizations.contains(OptLevel::MergeStates) {
|
minimizer.merge_compatible_states();
|
||||||
minimizer.merge_compatible_states();
|
|
||||||
}
|
|
||||||
minimizer.remove_unit_reductions();
|
minimizer.remove_unit_reductions();
|
||||||
minimizer.remove_unused_states();
|
minimizer.remove_unused_states();
|
||||||
minimizer.reorder_states_by_descending_size();
|
minimizer.reorder_states_by_descending_size();
|
||||||
|
|
@ -155,7 +151,9 @@ impl Minimizer<'_> {
|
||||||
&mut group_ids_by_state_id,
|
&mut group_ids_by_state_id,
|
||||||
0,
|
0,
|
||||||
|left, right, groups| self.state_successors_differ(left, right, groups),
|
|left, right, groups| self.state_successors_differ(left, right, groups),
|
||||||
) {}
|
) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
let error_group_index = state_ids_by_group_id
|
let error_group_index = state_ids_by_group_id
|
||||||
.iter()
|
.iter()
|
||||||
|
|
@ -172,12 +170,17 @@ impl Minimizer<'_> {
|
||||||
let mut new_states = Vec::with_capacity(state_ids_by_group_id.len());
|
let mut new_states = Vec::with_capacity(state_ids_by_group_id.len());
|
||||||
for state_ids in &state_ids_by_group_id {
|
for state_ids in &state_ids_by_group_id {
|
||||||
// Initialize the new state based on the first old state in the group.
|
// Initialize the new state based on the first old state in the group.
|
||||||
let mut parse_state = mem::take(&mut self.parse_table.states[state_ids[0]]);
|
let mut parse_state = ParseState::default();
|
||||||
|
mem::swap(&mut parse_state, &mut self.parse_table.states[state_ids[0]]);
|
||||||
|
|
||||||
// Extend the new state with all of the actions from the other old states
|
// Extend the new state with all of the actions from the other old states
|
||||||
// in the group.
|
// in the group.
|
||||||
for state_id in &state_ids[1..] {
|
for state_id in &state_ids[1..] {
|
||||||
let other_parse_state = mem::take(&mut self.parse_table.states[*state_id]);
|
let mut other_parse_state = ParseState::default();
|
||||||
|
mem::swap(
|
||||||
|
&mut other_parse_state,
|
||||||
|
&mut self.parse_table.states[*state_id],
|
||||||
|
);
|
||||||
|
|
||||||
parse_state
|
parse_state
|
||||||
.terminal_entries
|
.terminal_entries
|
||||||
|
|
@ -185,12 +188,6 @@ impl Minimizer<'_> {
|
||||||
parse_state
|
parse_state
|
||||||
.nonterminal_entries
|
.nonterminal_entries
|
||||||
.extend(other_parse_state.nonterminal_entries);
|
.extend(other_parse_state.nonterminal_entries);
|
||||||
parse_state
|
|
||||||
.reserved_words
|
|
||||||
.insert_all(&other_parse_state.reserved_words);
|
|
||||||
for symbol in parse_state.terminal_entries.keys() {
|
|
||||||
parse_state.reserved_words.remove(symbol);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update the new state's outgoing references using the new grouping.
|
// Update the new state's outgoing references using the new grouping.
|
||||||
|
|
@ -219,14 +216,24 @@ impl Minimizer<'_> {
|
||||||
) {
|
) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
} else if self.token_conflicts(left_state.id, right_state.id, right_state, *token) {
|
} else if self.token_conflicts(
|
||||||
|
left_state.id,
|
||||||
|
right_state.id,
|
||||||
|
right_state.terminal_entries.keys(),
|
||||||
|
*token,
|
||||||
|
) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for token in right_state.terminal_entries.keys() {
|
for token in right_state.terminal_entries.keys() {
|
||||||
if !left_state.terminal_entries.contains_key(token)
|
if !left_state.terminal_entries.contains_key(token)
|
||||||
&& self.token_conflicts(left_state.id, right_state.id, left_state, *token)
|
&& self.token_conflicts(
|
||||||
|
left_state.id,
|
||||||
|
right_state.id,
|
||||||
|
left_state.terminal_entries.keys(),
|
||||||
|
*token,
|
||||||
|
)
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
@ -248,7 +255,7 @@ impl Minimizer<'_> {
|
||||||
let group1 = group_ids_by_state_id[*s1];
|
let group1 = group_ids_by_state_id[*s1];
|
||||||
let group2 = group_ids_by_state_id[*s2];
|
let group2 = group_ids_by_state_id[*s2];
|
||||||
if group1 != group2 {
|
if group1 != group2 {
|
||||||
debug!(
|
info!(
|
||||||
"split states {} {} - successors for {} are split: {s1} {s2}",
|
"split states {} {} - successors for {} are split: {s1} {s2}",
|
||||||
state1.id,
|
state1.id,
|
||||||
state2.id,
|
state2.id,
|
||||||
|
|
@ -264,12 +271,12 @@ impl Minimizer<'_> {
|
||||||
for (symbol, s1) in &state1.nonterminal_entries {
|
for (symbol, s1) in &state1.nonterminal_entries {
|
||||||
if let Some(s2) = state2.nonterminal_entries.get(symbol) {
|
if let Some(s2) = state2.nonterminal_entries.get(symbol) {
|
||||||
match (s1, s2) {
|
match (s1, s2) {
|
||||||
(GotoAction::ShiftExtra, GotoAction::ShiftExtra) => {}
|
(GotoAction::ShiftExtra, GotoAction::ShiftExtra) => continue,
|
||||||
(GotoAction::Goto(s1), GotoAction::Goto(s2)) => {
|
(GotoAction::Goto(s1), GotoAction::Goto(s2)) => {
|
||||||
let group1 = group_ids_by_state_id[*s1];
|
let group1 = group_ids_by_state_id[*s1];
|
||||||
let group2 = group_ids_by_state_id[*s2];
|
let group2 = group_ids_by_state_id[*s2];
|
||||||
if group1 != group2 {
|
if group1 != group2 {
|
||||||
debug!(
|
info!(
|
||||||
"split states {} {} - successors for {} are split: {s1} {s2}",
|
"split states {} {} - successors for {} are split: {s1} {s2}",
|
||||||
state1.id,
|
state1.id,
|
||||||
state2.id,
|
state2.id,
|
||||||
|
|
@ -299,14 +306,16 @@ impl Minimizer<'_> {
|
||||||
let actions1 = &entry1.actions;
|
let actions1 = &entry1.actions;
|
||||||
let actions2 = &entry2.actions;
|
let actions2 = &entry2.actions;
|
||||||
if actions1.len() != actions2.len() {
|
if actions1.len() != actions2.len() {
|
||||||
debug!(
|
info!(
|
||||||
"split states {state_id1} {state_id2} - differing action counts for token {}",
|
"split states {state_id1} {state_id2} - differing action counts for token {}",
|
||||||
self.symbol_name(token)
|
self.symbol_name(token)
|
||||||
);
|
);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (action1, action2) in actions1.iter().zip(actions2.iter()) {
|
for (i, action1) in actions1.iter().enumerate() {
|
||||||
|
let action2 = &actions2[i];
|
||||||
|
|
||||||
// Two shift actions are equivalent if their destinations are in the same group.
|
// Two shift actions are equivalent if their destinations are in the same group.
|
||||||
if let (
|
if let (
|
||||||
ParseAction::Shift {
|
ParseAction::Shift {
|
||||||
|
|
@ -324,13 +333,13 @@ impl Minimizer<'_> {
|
||||||
if group1 == group2 && is_repetition1 == is_repetition2 {
|
if group1 == group2 && is_repetition1 == is_repetition2 {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
debug!(
|
info!(
|
||||||
"split states {state_id1} {state_id2} - successors for {} are split: {s1} {s2}",
|
"split states {state_id1} {state_id2} - successors for {} are split: {s1} {s2}",
|
||||||
self.symbol_name(token),
|
self.symbol_name(token),
|
||||||
);
|
);
|
||||||
return true;
|
return true;
|
||||||
} else if action1 != action2 {
|
} else if action1 != action2 {
|
||||||
debug!(
|
info!(
|
||||||
"split states {state_id1} {state_id2} - unequal actions for {}",
|
"split states {state_id1} {state_id2} - unequal actions for {}",
|
||||||
self.symbol_name(token),
|
self.symbol_name(token),
|
||||||
);
|
);
|
||||||
|
|
@ -341,32 +350,28 @@ impl Minimizer<'_> {
|
||||||
false
|
false
|
||||||
}
|
}
|
||||||
|
|
||||||
fn token_conflicts(
|
fn token_conflicts<'b>(
|
||||||
&self,
|
&self,
|
||||||
left_id: ParseStateId,
|
left_id: ParseStateId,
|
||||||
right_id: ParseStateId,
|
right_id: ParseStateId,
|
||||||
right_state: &ParseState,
|
existing_tokens: impl Iterator<Item = &'b Symbol>,
|
||||||
new_token: Symbol,
|
new_token: Symbol,
|
||||||
) -> bool {
|
) -> bool {
|
||||||
if new_token == Symbol::end_of_nonterminal_extra() {
|
if new_token == Symbol::end_of_nonterminal_extra() {
|
||||||
debug!("split states {left_id} {right_id} - end of non-terminal extra",);
|
info!("split states {left_id} {right_id} - end of non-terminal extra",);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Do not add external tokens; they could conflict lexically with any of the state's
|
// Do not add external tokens; they could conflict lexically with any of the state's
|
||||||
// existing lookahead tokens.
|
// existing lookahead tokens.
|
||||||
if new_token.is_external() {
|
if new_token.is_external() {
|
||||||
debug!(
|
info!(
|
||||||
"split states {left_id} {right_id} - external token {}",
|
"split states {left_id} {right_id} - external token {}",
|
||||||
self.symbol_name(&new_token),
|
self.symbol_name(&new_token),
|
||||||
);
|
);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if right_state.reserved_words.contains(&new_token) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Do not add tokens which are both internal and external. Their validity could
|
// Do not add tokens which are both internal and external. Their validity could
|
||||||
// influence the behavior of the external scanner.
|
// influence the behavior of the external scanner.
|
||||||
if self
|
if self
|
||||||
|
|
@ -375,7 +380,7 @@ impl Minimizer<'_> {
|
||||||
.iter()
|
.iter()
|
||||||
.any(|external| external.corresponding_internal_token == Some(new_token))
|
.any(|external| external.corresponding_internal_token == Some(new_token))
|
||||||
{
|
{
|
||||||
debug!(
|
info!(
|
||||||
"split states {left_id} {right_id} - internal/external token {}",
|
"split states {left_id} {right_id} - internal/external token {}",
|
||||||
self.symbol_name(&new_token),
|
self.symbol_name(&new_token),
|
||||||
);
|
);
|
||||||
|
|
@ -383,30 +388,23 @@ impl Minimizer<'_> {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Do not add a token if it conflicts with an existing token.
|
// Do not add a token if it conflicts with an existing token.
|
||||||
for token in right_state.terminal_entries.keys().copied() {
|
for token in existing_tokens {
|
||||||
if !token.is_terminal() {
|
if token.is_terminal()
|
||||||
continue;
|
&& !(self.syntax_grammar.word_token == Some(*token)
|
||||||
}
|
&& self.keywords.contains(&new_token))
|
||||||
if self.syntax_grammar.word_token == Some(token) && self.keywords.contains(&new_token) {
|
&& !(self.syntax_grammar.word_token == Some(new_token)
|
||||||
continue;
|
&& self.keywords.contains(token))
|
||||||
}
|
&& (self
|
||||||
if self.syntax_grammar.word_token == Some(new_token) && self.keywords.contains(&token) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if self
|
|
||||||
.token_conflict_map
|
|
||||||
.does_conflict(new_token.index, token.index)
|
|
||||||
|| self
|
|
||||||
.token_conflict_map
|
.token_conflict_map
|
||||||
.does_match_same_string(new_token.index, token.index)
|
.does_conflict(new_token.index, token.index)
|
||||||
|
|| self
|
||||||
|
.token_conflict_map
|
||||||
|
.does_match_same_string(new_token.index, token.index))
|
||||||
{
|
{
|
||||||
debug!(
|
info!(
|
||||||
"split states {} {} - token {} conflicts with {}",
|
"split states {left_id} {right_id} - token {} conflicts with {}",
|
||||||
left_id,
|
|
||||||
right_id,
|
|
||||||
self.symbol_name(&new_token),
|
self.symbol_name(&new_token),
|
||||||
self.symbol_name(&token),
|
self.symbol_name(token),
|
||||||
);
|
);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
@ -8,16 +8,14 @@ mod token_conflicts;
|
||||||
|
|
||||||
use std::collections::{BTreeSet, HashMap};
|
use std::collections::{BTreeSet, HashMap};
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
pub use build_lex_table::LARGE_CHARACTER_RANGE_COUNT;
|
pub use build_lex_table::LARGE_CHARACTER_RANGE_COUNT;
|
||||||
use build_parse_table::BuildTableResult;
|
use log::info;
|
||||||
pub use build_parse_table::ParseTableBuilderError;
|
|
||||||
use log::{debug, info};
|
|
||||||
|
|
||||||
use self::{
|
use self::{
|
||||||
build_lex_table::build_lex_table,
|
build_lex_table::build_lex_table,
|
||||||
build_parse_table::{build_parse_table, ParseStateInfo},
|
build_parse_table::{build_parse_table, ParseStateInfo},
|
||||||
coincident_tokens::CoincidentTokenIndex,
|
coincident_tokens::CoincidentTokenIndex,
|
||||||
item_set_builder::ParseItemSetBuilder,
|
|
||||||
minimize_parse_table::minimize_parse_table,
|
minimize_parse_table::minimize_parse_table,
|
||||||
token_conflicts::TokenConflictMap,
|
token_conflicts::TokenConflictMap,
|
||||||
};
|
};
|
||||||
|
|
@ -27,13 +25,13 @@ use crate::{
|
||||||
node_types::VariableInfo,
|
node_types::VariableInfo,
|
||||||
rules::{AliasMap, Symbol, SymbolType, TokenSet},
|
rules::{AliasMap, Symbol, SymbolType, TokenSet},
|
||||||
tables::{LexTable, ParseAction, ParseTable, ParseTableEntry},
|
tables::{LexTable, ParseAction, ParseTable, ParseTableEntry},
|
||||||
OptLevel,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
pub struct Tables {
|
pub struct Tables {
|
||||||
pub parse_table: ParseTable,
|
pub parse_table: ParseTable,
|
||||||
pub main_lex_table: LexTable,
|
pub main_lex_table: LexTable,
|
||||||
pub keyword_lex_table: LexTable,
|
pub keyword_lex_table: LexTable,
|
||||||
|
pub word_token: Option<Symbol>,
|
||||||
pub large_character_sets: Vec<(Option<Symbol>, CharacterSet)>,
|
pub large_character_sets: Vec<(Option<Symbol>, CharacterSet)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -44,17 +42,9 @@ pub fn build_tables(
|
||||||
variable_info: &[VariableInfo],
|
variable_info: &[VariableInfo],
|
||||||
inlines: &InlinedProductionMap,
|
inlines: &InlinedProductionMap,
|
||||||
report_symbol_name: Option<&str>,
|
report_symbol_name: Option<&str>,
|
||||||
optimizations: OptLevel,
|
) -> Result<Tables> {
|
||||||
) -> BuildTableResult<Tables> {
|
let (mut parse_table, following_tokens, parse_state_info) =
|
||||||
let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines);
|
build_parse_table(syntax_grammar, lexical_grammar, inlines, variable_info)?;
|
||||||
let following_tokens =
|
|
||||||
get_following_tokens(syntax_grammar, lexical_grammar, inlines, &item_set_builder);
|
|
||||||
let (mut parse_table, parse_state_info) = build_parse_table(
|
|
||||||
syntax_grammar,
|
|
||||||
lexical_grammar,
|
|
||||||
item_set_builder,
|
|
||||||
variable_info,
|
|
||||||
)?;
|
|
||||||
let token_conflict_map = TokenConflictMap::new(lexical_grammar, following_tokens);
|
let token_conflict_map = TokenConflictMap::new(lexical_grammar, following_tokens);
|
||||||
let coincident_token_index = CoincidentTokenIndex::new(&parse_table, lexical_grammar);
|
let coincident_token_index = CoincidentTokenIndex::new(&parse_table, lexical_grammar);
|
||||||
let keywords = identify_keywords(
|
let keywords = identify_keywords(
|
||||||
|
|
@ -80,7 +70,6 @@ pub fn build_tables(
|
||||||
simple_aliases,
|
simple_aliases,
|
||||||
&token_conflict_map,
|
&token_conflict_map,
|
||||||
&keywords,
|
&keywords,
|
||||||
optimizations,
|
|
||||||
);
|
);
|
||||||
let lex_tables = build_lex_table(
|
let lex_tables = build_lex_table(
|
||||||
&mut parse_table,
|
&mut parse_table,
|
||||||
|
|
@ -103,59 +92,15 @@ pub fn build_tables(
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
if parse_table.states.len() > u16::MAX as usize {
|
|
||||||
Err(ParseTableBuilderError::StateCount(parse_table.states.len()))?;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(Tables {
|
Ok(Tables {
|
||||||
parse_table,
|
parse_table,
|
||||||
main_lex_table: lex_tables.main_lex_table,
|
main_lex_table: lex_tables.main_lex_table,
|
||||||
keyword_lex_table: lex_tables.keyword_lex_table,
|
keyword_lex_table: lex_tables.keyword_lex_table,
|
||||||
large_character_sets: lex_tables.large_character_sets,
|
large_character_sets: lex_tables.large_character_sets,
|
||||||
|
word_token: syntax_grammar.word_token,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_following_tokens(
|
|
||||||
syntax_grammar: &SyntaxGrammar,
|
|
||||||
lexical_grammar: &LexicalGrammar,
|
|
||||||
inlines: &InlinedProductionMap,
|
|
||||||
builder: &ParseItemSetBuilder,
|
|
||||||
) -> Vec<TokenSet> {
|
|
||||||
let mut result = vec![TokenSet::new(); lexical_grammar.variables.len()];
|
|
||||||
let productions = syntax_grammar
|
|
||||||
.variables
|
|
||||||
.iter()
|
|
||||||
.flat_map(|v| &v.productions)
|
|
||||||
.chain(&inlines.productions);
|
|
||||||
let all_tokens = (0..result.len())
|
|
||||||
.map(Symbol::terminal)
|
|
||||||
.collect::<TokenSet>();
|
|
||||||
for production in productions {
|
|
||||||
for i in 1..production.steps.len() {
|
|
||||||
let left_tokens = builder.last_set(&production.steps[i - 1].symbol);
|
|
||||||
let right_tokens = builder.first_set(&production.steps[i].symbol);
|
|
||||||
let right_reserved_tokens = builder.reserved_first_set(&production.steps[i].symbol);
|
|
||||||
for left_token in left_tokens.iter() {
|
|
||||||
if left_token.is_terminal() {
|
|
||||||
result[left_token.index].insert_all_terminals(right_tokens);
|
|
||||||
if let Some(reserved_tokens) = right_reserved_tokens {
|
|
||||||
result[left_token.index].insert_all_terminals(reserved_tokens);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for extra in &syntax_grammar.extra_symbols {
|
|
||||||
if extra.is_terminal() {
|
|
||||||
for entry in &mut result {
|
|
||||||
entry.insert(*extra);
|
|
||||||
}
|
|
||||||
result[extra.index] = all_tokens.clone();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
result
|
|
||||||
}
|
|
||||||
|
|
||||||
fn populate_error_state(
|
fn populate_error_state(
|
||||||
parse_table: &mut ParseTable,
|
parse_table: &mut ParseTable,
|
||||||
syntax_grammar: &SyntaxGrammar,
|
syntax_grammar: &SyntaxGrammar,
|
||||||
|
|
@ -179,7 +124,7 @@ fn populate_error_state(
|
||||||
if conflicts_with_other_tokens {
|
if conflicts_with_other_tokens {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
debug!(
|
info!(
|
||||||
"error recovery - token {} has no conflicts",
|
"error recovery - token {} has no conflicts",
|
||||||
lexical_grammar.variables[i].name
|
lexical_grammar.variables[i].name
|
||||||
);
|
);
|
||||||
|
|
@ -205,14 +150,14 @@ fn populate_error_state(
|
||||||
!coincident_token_index.contains(symbol, *t)
|
!coincident_token_index.contains(symbol, *t)
|
||||||
&& token_conflict_map.does_conflict(symbol.index, t.index)
|
&& token_conflict_map.does_conflict(symbol.index, t.index)
|
||||||
}) {
|
}) {
|
||||||
debug!(
|
info!(
|
||||||
"error recovery - exclude token {} because of conflict with {}",
|
"error recovery - exclude token {} because of conflict with {}",
|
||||||
lexical_grammar.variables[i].name, lexical_grammar.variables[t.index].name
|
lexical_grammar.variables[i].name, lexical_grammar.variables[t.index].name
|
||||||
);
|
);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
debug!(
|
info!(
|
||||||
"error recovery - include token {}",
|
"error recovery - include token {}",
|
||||||
lexical_grammar.variables[i].name
|
lexical_grammar.variables[i].name
|
||||||
);
|
);
|
||||||
|
|
@ -345,7 +290,7 @@ fn identify_keywords(
|
||||||
&& token_conflict_map.does_match_same_string(i, word_token.index)
|
&& token_conflict_map.does_match_same_string(i, word_token.index)
|
||||||
&& !token_conflict_map.does_match_different_string(i, word_token.index)
|
&& !token_conflict_map.does_match_different_string(i, word_token.index)
|
||||||
{
|
{
|
||||||
debug!(
|
info!(
|
||||||
"Keywords - add candidate {}",
|
"Keywords - add candidate {}",
|
||||||
lexical_grammar.variables[i].name
|
lexical_grammar.variables[i].name
|
||||||
);
|
);
|
||||||
|
|
@ -364,7 +309,7 @@ fn identify_keywords(
|
||||||
if other_token != *token
|
if other_token != *token
|
||||||
&& token_conflict_map.does_match_same_string(other_token.index, token.index)
|
&& token_conflict_map.does_match_same_string(other_token.index, token.index)
|
||||||
{
|
{
|
||||||
debug!(
|
info!(
|
||||||
"Keywords - exclude {} because it matches the same string as {}",
|
"Keywords - exclude {} because it matches the same string as {}",
|
||||||
lexical_grammar.variables[token.index].name,
|
lexical_grammar.variables[token.index].name,
|
||||||
lexical_grammar.variables[other_token.index].name
|
lexical_grammar.variables[other_token.index].name
|
||||||
|
|
@ -406,7 +351,7 @@ fn identify_keywords(
|
||||||
word_token.index,
|
word_token.index,
|
||||||
other_index,
|
other_index,
|
||||||
) {
|
) {
|
||||||
debug!(
|
info!(
|
||||||
"Keywords - exclude {} because of conflict with {}",
|
"Keywords - exclude {} because of conflict with {}",
|
||||||
lexical_grammar.variables[token.index].name,
|
lexical_grammar.variables[token.index].name,
|
||||||
lexical_grammar.variables[other_index].name
|
lexical_grammar.variables[other_index].name
|
||||||
|
|
@ -415,7 +360,7 @@ fn identify_keywords(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
debug!(
|
info!(
|
||||||
"Keywords - include {}",
|
"Keywords - include {}",
|
||||||
lexical_grammar.variables[token.index].name,
|
lexical_grammar.variables[token.index].name,
|
||||||
);
|
);
|
||||||
|
|
@ -469,9 +414,9 @@ fn report_state_info<'a>(
|
||||||
for (i, state) in parse_table.states.iter().enumerate() {
|
for (i, state) in parse_table.states.iter().enumerate() {
|
||||||
all_state_indices.insert(i);
|
all_state_indices.insert(i);
|
||||||
let item_set = &parse_state_info[state.id];
|
let item_set = &parse_state_info[state.id];
|
||||||
for entry in &item_set.1.entries {
|
for (item, _) in &item_set.1.entries {
|
||||||
if !entry.item.is_augmented() {
|
if !item.is_augmented() {
|
||||||
symbols_with_state_indices[entry.item.variable_index as usize]
|
symbols_with_state_indices[item.variable_index as usize]
|
||||||
.1
|
.1
|
||||||
.insert(i);
|
.insert(i);
|
||||||
}
|
}
|
||||||
|
|
@ -487,14 +432,14 @@ fn report_state_info<'a>(
|
||||||
.max()
|
.max()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
for (symbol, states) in &symbols_with_state_indices {
|
for (symbol, states) in &symbols_with_state_indices {
|
||||||
info!(
|
eprintln!(
|
||||||
"{:width$}\t{}",
|
"{:width$}\t{}",
|
||||||
syntax_grammar.variables[symbol.index].name,
|
syntax_grammar.variables[symbol.index].name,
|
||||||
states.len(),
|
states.len(),
|
||||||
width = max_symbol_name_length
|
width = max_symbol_name_length
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
info!("");
|
eprintln!();
|
||||||
|
|
||||||
let state_indices = if report_symbol_name == "*" {
|
let state_indices = if report_symbol_name == "*" {
|
||||||
Some(&all_state_indices)
|
Some(&all_state_indices)
|
||||||
|
|
@ -517,27 +462,22 @@ fn report_state_info<'a>(
|
||||||
for state_index in state_indices {
|
for state_index in state_indices {
|
||||||
let id = parse_table.states[state_index].id;
|
let id = parse_table.states[state_index].id;
|
||||||
let (preceding_symbols, item_set) = &parse_state_info[id];
|
let (preceding_symbols, item_set) = &parse_state_info[id];
|
||||||
info!("state index: {state_index}");
|
eprintln!("state index: {state_index}");
|
||||||
info!("state id: {id}");
|
eprintln!("state id: {id}");
|
||||||
info!(
|
eprint!("symbol sequence:");
|
||||||
"symbol sequence: {}",
|
for symbol in preceding_symbols {
|
||||||
preceding_symbols
|
let name = if symbol.is_terminal() {
|
||||||
.iter()
|
&lexical_grammar.variables[symbol.index].name
|
||||||
.map(|symbol| {
|
} else if symbol.is_external() {
|
||||||
if symbol.is_terminal() {
|
&syntax_grammar.external_tokens[symbol.index].name
|
||||||
lexical_grammar.variables[symbol.index].name.clone()
|
} else {
|
||||||
} else if symbol.is_external() {
|
&syntax_grammar.variables[symbol.index].name
|
||||||
syntax_grammar.external_tokens[symbol.index].name.clone()
|
};
|
||||||
} else {
|
eprint!(" {name}");
|
||||||
syntax_grammar.variables[symbol.index].name.clone()
|
}
|
||||||
}
|
eprintln!(
|
||||||
})
|
|
||||||
.collect::<Vec<_>>()
|
|
||||||
.join(" ")
|
|
||||||
);
|
|
||||||
info!(
|
|
||||||
"\nitems:\n{}",
|
"\nitems:\n{}",
|
||||||
item::ParseItemSetDisplay(item_set, syntax_grammar, lexical_grammar),
|
self::item::ParseItemSetDisplay(item_set, syntax_grammar, lexical_grammar,),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -28,7 +28,7 @@ pub struct TokenConflictMap<'a> {
|
||||||
|
|
||||||
impl<'a> TokenConflictMap<'a> {
|
impl<'a> TokenConflictMap<'a> {
|
||||||
/// Create a token conflict map based on a lexical grammar, which describes the structure
|
/// Create a token conflict map based on a lexical grammar, which describes the structure
|
||||||
/// of each token, and a `following_token` map, which indicates which tokens may be appear
|
/// each token, and a `following_token` map, which indicates which tokens may be appear
|
||||||
/// immediately after each other token.
|
/// immediately after each other token.
|
||||||
///
|
///
|
||||||
/// This analyzes the possible kinds of overlap between each pair of tokens and stores
|
/// This analyzes the possible kinds of overlap between each pair of tokens and stores
|
||||||
|
|
@ -3,7 +3,7 @@ pub fn split_state_id_groups<S>(
|
||||||
state_ids_by_group_id: &mut Vec<Vec<usize>>,
|
state_ids_by_group_id: &mut Vec<Vec<usize>>,
|
||||||
group_ids_by_state_id: &mut [usize],
|
group_ids_by_state_id: &mut [usize],
|
||||||
start_group_id: usize,
|
start_group_id: usize,
|
||||||
mut should_split: impl FnMut(&S, &S, &[usize]) -> bool,
|
mut f: impl FnMut(&S, &S, &[usize]) -> bool,
|
||||||
) -> bool {
|
) -> bool {
|
||||||
let mut result = false;
|
let mut result = false;
|
||||||
|
|
||||||
|
|
@ -33,7 +33,7 @@ pub fn split_state_id_groups<S>(
|
||||||
}
|
}
|
||||||
let right_state = &states[right_state_id];
|
let right_state = &states[right_state_id];
|
||||||
|
|
||||||
if should_split(left_state, right_state, group_ids_by_state_id) {
|
if f(left_state, right_state, group_ids_by_state_id) {
|
||||||
split_state_ids.push(right_state_id);
|
split_state_ids.push(right_state_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -16,7 +16,6 @@ function alias(rule, value) {
|
||||||
result.value = value.symbol.name;
|
result.value = value.symbol.name;
|
||||||
return result;
|
return result;
|
||||||
case Object:
|
case Object:
|
||||||
case GrammarSymbol:
|
|
||||||
if (typeof value.type === 'string' && value.type === 'SYMBOL') {
|
if (typeof value.type === 'string' && value.type === 'SYMBOL') {
|
||||||
result.named = true;
|
result.named = true;
|
||||||
result.value = value.name;
|
result.value = value.name;
|
||||||
|
|
@ -70,7 +69,7 @@ function prec(number, rule) {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
prec.left = function (number, rule) {
|
prec.left = function(number, rule) {
|
||||||
if (rule == null) {
|
if (rule == null) {
|
||||||
rule = number;
|
rule = number;
|
||||||
number = 0;
|
number = 0;
|
||||||
|
|
@ -92,7 +91,7 @@ prec.left = function (number, rule) {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
prec.right = function (number, rule) {
|
prec.right = function(number, rule) {
|
||||||
if (rule == null) {
|
if (rule == null) {
|
||||||
rule = number;
|
rule = number;
|
||||||
number = 0;
|
number = 0;
|
||||||
|
|
@ -114,7 +113,7 @@ prec.right = function (number, rule) {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
prec.dynamic = function (number, rule) {
|
prec.dynamic = function(number, rule) {
|
||||||
checkPrecedence(number);
|
checkPrecedence(number);
|
||||||
checkArguments(
|
checkArguments(
|
||||||
arguments,
|
arguments,
|
||||||
|
|
@ -154,26 +153,11 @@ function seq(...elements) {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
class GrammarSymbol {
|
|
||||||
constructor(name) {
|
|
||||||
this.type = "SYMBOL";
|
|
||||||
this.name = name;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function reserved(wordset, rule) {
|
|
||||||
if (typeof wordset !== 'string') {
|
|
||||||
throw new Error('Invalid reserved word set name: ' + wordset)
|
|
||||||
}
|
|
||||||
return {
|
|
||||||
type: "RESERVED",
|
|
||||||
content: normalize(rule),
|
|
||||||
context_name: wordset,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function sym(name) {
|
function sym(name) {
|
||||||
return new GrammarSymbol(name);
|
return {
|
||||||
|
type: "SYMBOL",
|
||||||
|
name
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
function token(value) {
|
function token(value) {
|
||||||
|
|
@ -184,7 +168,7 @@ function token(value) {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
token.immediate = function (value) {
|
token.immediate = function(value) {
|
||||||
checkArguments(arguments, arguments.length, token.immediate, 'token.immediate', '', 'literal');
|
checkArguments(arguments, arguments.length, token.immediate, 'token.immediate', '', 'literal');
|
||||||
return {
|
return {
|
||||||
type: "IMMEDIATE_TOKEN",
|
type: "IMMEDIATE_TOKEN",
|
||||||
|
|
@ -211,11 +195,6 @@ function normalize(value) {
|
||||||
type: 'PATTERN',
|
type: 'PATTERN',
|
||||||
value: value.source
|
value: value.source
|
||||||
};
|
};
|
||||||
case RustRegex:
|
|
||||||
return {
|
|
||||||
type: 'PATTERN',
|
|
||||||
value: value.value
|
|
||||||
};
|
|
||||||
case ReferenceError:
|
case ReferenceError:
|
||||||
throw value
|
throw value
|
||||||
default:
|
default:
|
||||||
|
|
@ -257,7 +236,6 @@ function grammar(baseGrammar, options) {
|
||||||
inline: [],
|
inline: [],
|
||||||
supertypes: [],
|
supertypes: [],
|
||||||
precedences: [],
|
precedences: [],
|
||||||
reserved: {},
|
|
||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
baseGrammar = baseGrammar.grammar;
|
baseGrammar = baseGrammar.grammar;
|
||||||
|
|
@ -331,28 +309,6 @@ function grammar(baseGrammar, options) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let reserved = baseGrammar.reserved;
|
|
||||||
if (options.reserved) {
|
|
||||||
if (typeof options.reserved !== "object") {
|
|
||||||
throw new Error("Grammar's 'reserved' property must be an object.");
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const reservedWordSetName of Object.keys(options.reserved)) {
|
|
||||||
const reservedWordSetFn = options.reserved[reservedWordSetName]
|
|
||||||
if (typeof reservedWordSetFn !== "function") {
|
|
||||||
throw new Error(`Grammar reserved word sets must all be functions. '${reservedWordSetName}' is not.`);
|
|
||||||
}
|
|
||||||
|
|
||||||
const reservedTokens = reservedWordSetFn.call(ruleBuilder, ruleBuilder, baseGrammar.reserved[reservedWordSetName]);
|
|
||||||
|
|
||||||
if (!Array.isArray(reservedTokens)) {
|
|
||||||
throw new Error(`Grammar's reserved word set functions must all return arrays of rules. '${reservedWordSetName}' does not.`);
|
|
||||||
}
|
|
||||||
|
|
||||||
reserved[reservedWordSetName] = reservedTokens.map(normalize);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let extras = baseGrammar.extras.slice();
|
let extras = baseGrammar.extras.slice();
|
||||||
if (options.extras) {
|
if (options.extras) {
|
||||||
if (typeof options.extras !== "function") {
|
if (typeof options.extras !== "function") {
|
||||||
|
|
@ -483,17 +439,10 @@ function grammar(baseGrammar, options) {
|
||||||
externals,
|
externals,
|
||||||
inline,
|
inline,
|
||||||
supertypes,
|
supertypes,
|
||||||
reserved,
|
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
class RustRegex {
|
|
||||||
constructor(value) {
|
|
||||||
this.value = value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function checkArguments(args, ruleCount, caller, callerName, suffix = '', argType = 'rule') {
|
function checkArguments(args, ruleCount, caller, callerName, suffix = '', argType = 'rule') {
|
||||||
// Allow for .map() usage where additional arguments are index and the entire array.
|
// Allow for .map() usage where additional arguments are index and the entire array.
|
||||||
const isMapCall = ruleCount === 3 && typeof args[1] === 'number' && Array.isArray(args[2]);
|
const isMapCall = ruleCount === 3 && typeof args[1] === 'number' && Array.isArray(args[2]);
|
||||||
|
|
@ -517,7 +466,6 @@ function checkPrecedence(value) {
|
||||||
}
|
}
|
||||||
|
|
||||||
function getEnv(name) {
|
function getEnv(name) {
|
||||||
if (globalThis.native) return globalThis.__ts_grammar_path;
|
|
||||||
if (globalThis.process) return process.env[name]; // Node/Bun
|
if (globalThis.process) return process.env[name]; // Node/Bun
|
||||||
if (globalThis.Deno) return Deno.env.get(name); // Deno
|
if (globalThis.Deno) return Deno.env.get(name); // Deno
|
||||||
throw Error("Unsupported JS runtime");
|
throw Error("Unsupported JS runtime");
|
||||||
|
|
@ -530,31 +478,20 @@ globalThis.optional = optional;
|
||||||
globalThis.prec = prec;
|
globalThis.prec = prec;
|
||||||
globalThis.repeat = repeat;
|
globalThis.repeat = repeat;
|
||||||
globalThis.repeat1 = repeat1;
|
globalThis.repeat1 = repeat1;
|
||||||
globalThis.reserved = reserved;
|
|
||||||
globalThis.seq = seq;
|
globalThis.seq = seq;
|
||||||
globalThis.sym = sym;
|
globalThis.sym = sym;
|
||||||
globalThis.token = token;
|
globalThis.token = token;
|
||||||
globalThis.grammar = grammar;
|
globalThis.grammar = grammar;
|
||||||
globalThis.field = field;
|
globalThis.field = field;
|
||||||
globalThis.RustRegex = RustRegex;
|
|
||||||
|
|
||||||
const grammarPath = getEnv("TREE_SITTER_GRAMMAR_PATH");
|
|
||||||
let result = await import(grammarPath);
|
|
||||||
let grammarObj = result.default?.grammar ?? result.grammar;
|
|
||||||
|
|
||||||
if (globalThis.native && !grammarObj) {
|
|
||||||
grammarObj = module.exports.grammar;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
const result = await import(getEnv("TREE_SITTER_GRAMMAR_PATH"));
|
||||||
const object = {
|
const object = {
|
||||||
"$schema": "https://tree-sitter.github.io/tree-sitter/assets/schemas/grammar.schema.json",
|
"$schema": "https://tree-sitter.github.io/tree-sitter/assets/schemas/grammar.schema.json",
|
||||||
...grammarObj,
|
...(result.default?.grammar ?? result.grammar)
|
||||||
};
|
};
|
||||||
const output = JSON.stringify(object);
|
const output = JSON.stringify(object);
|
||||||
|
|
||||||
if (globalThis.native) {
|
if (globalThis.process) { // Node/Bun
|
||||||
globalThis.output = output;
|
|
||||||
} else if (globalThis.process) { // Node/Bun
|
|
||||||
process.stdout.write(output);
|
process.stdout.write(output);
|
||||||
} else if (globalThis.Deno) { // Deno
|
} else if (globalThis.Deno) { // Deno
|
||||||
Deno.stdout.writeSync(new TextEncoder().encode(output));
|
Deno.stdout.writeSync(new TextEncoder().encode(output));
|
||||||
1
cli/generate/src/grammar_files.rs
Normal file
1
cli/generate/src/grammar_files.rs
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
|
||||||
|
|
@ -2,7 +2,7 @@ use std::{collections::HashMap, fmt};
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
nfa::Nfa,
|
nfa::Nfa,
|
||||||
rules::{Alias, Associativity, Precedence, Rule, Symbol, TokenSet},
|
rules::{Alias, Associativity, Precedence, Rule, Symbol},
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
|
|
@ -39,13 +39,6 @@ pub struct InputGrammar {
|
||||||
pub variables_to_inline: Vec<String>,
|
pub variables_to_inline: Vec<String>,
|
||||||
pub supertype_symbols: Vec<String>,
|
pub supertype_symbols: Vec<String>,
|
||||||
pub word_token: Option<String>,
|
pub word_token: Option<String>,
|
||||||
pub reserved_words: Vec<ReservedWordContext<Rule>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Default, PartialEq, Eq)]
|
|
||||||
pub struct ReservedWordContext<T> {
|
|
||||||
pub name: String,
|
|
||||||
pub reserved_words: Vec<T>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extracted lexical grammar
|
// Extracted lexical grammar
|
||||||
|
|
@ -73,20 +66,8 @@ pub struct ProductionStep {
|
||||||
pub associativity: Option<Associativity>,
|
pub associativity: Option<Associativity>,
|
||||||
pub alias: Option<Alias>,
|
pub alias: Option<Alias>,
|
||||||
pub field_name: Option<String>,
|
pub field_name: Option<String>,
|
||||||
pub reserved_word_set_id: ReservedWordSetId,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
|
||||||
pub struct ReservedWordSetId(pub usize);
|
|
||||||
|
|
||||||
impl fmt::Display for ReservedWordSetId {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
self.0.fmt(f)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub const NO_RESERVED_WORDS: ReservedWordSetId = ReservedWordSetId(usize::MAX);
|
|
||||||
|
|
||||||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||||
pub struct Production {
|
pub struct Production {
|
||||||
pub steps: Vec<ProductionStep>,
|
pub steps: Vec<ProductionStep>,
|
||||||
|
|
@ -123,44 +104,51 @@ pub struct SyntaxGrammar {
|
||||||
pub variables_to_inline: Vec<Symbol>,
|
pub variables_to_inline: Vec<Symbol>,
|
||||||
pub word_token: Option<Symbol>,
|
pub word_token: Option<Symbol>,
|
||||||
pub precedence_orderings: Vec<Vec<PrecedenceEntry>>,
|
pub precedence_orderings: Vec<Vec<PrecedenceEntry>>,
|
||||||
pub reserved_word_sets: Vec<TokenSet>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
impl ProductionStep {
|
impl ProductionStep {
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn new(symbol: Symbol) -> Self {
|
pub const fn new(symbol: Symbol) -> Self {
|
||||||
Self {
|
Self {
|
||||||
symbol,
|
symbol,
|
||||||
precedence: Precedence::None,
|
precedence: Precedence::None,
|
||||||
associativity: None,
|
associativity: None,
|
||||||
alias: None,
|
alias: None,
|
||||||
field_name: None,
|
field_name: None,
|
||||||
reserved_word_set_id: ReservedWordSetId::default(),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn with_prec(
|
pub fn with_prec(self, precedence: Precedence, associativity: Option<Associativity>) -> Self {
|
||||||
mut self,
|
Self {
|
||||||
precedence: Precedence,
|
symbol: self.symbol,
|
||||||
associativity: Option<Associativity>,
|
precedence,
|
||||||
) -> Self {
|
associativity,
|
||||||
self.precedence = precedence;
|
alias: self.alias,
|
||||||
self.associativity = associativity;
|
field_name: self.field_name,
|
||||||
self
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn with_alias(mut self, value: &str, is_named: bool) -> Self {
|
pub fn with_alias(self, value: &str, is_named: bool) -> Self {
|
||||||
self.alias = Some(Alias {
|
Self {
|
||||||
value: value.to_string(),
|
symbol: self.symbol,
|
||||||
is_named,
|
precedence: self.precedence,
|
||||||
});
|
associativity: self.associativity,
|
||||||
self
|
alias: Some(Alias {
|
||||||
|
value: value.to_string(),
|
||||||
|
is_named,
|
||||||
|
}),
|
||||||
|
field_name: self.field_name,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
pub fn with_field_name(self, name: &str) -> Self {
|
||||||
pub fn with_field_name(mut self, name: &str) -> Self {
|
Self {
|
||||||
self.field_name = Some(name.to_string());
|
symbol: self.symbol,
|
||||||
self
|
precedence: self.precedence,
|
||||||
|
associativity: self.associativity,
|
||||||
|
alias: self.alias,
|
||||||
|
field_name: Some(name.to_string()),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -253,7 +241,7 @@ impl InlinedProductionMap {
|
||||||
step_index: u32,
|
step_index: u32,
|
||||||
) -> Option<impl Iterator<Item = &'a Production> + 'a> {
|
) -> Option<impl Iterator<Item = &'a Production> + 'a> {
|
||||||
self.production_map
|
self.production_map
|
||||||
.get(&(std::ptr::from_ref::<Production>(production), step_index))
|
.get(&(production as *const Production, step_index))
|
||||||
.map(|production_indices| {
|
.map(|production_indices| {
|
||||||
production_indices
|
production_indices
|
||||||
.iter()
|
.iter()
|
||||||
257
cli/generate/src/lib.rs
Normal file
257
cli/generate/src/lib.rs
Normal file
|
|
@ -0,0 +1,257 @@
|
||||||
|
use std::{
|
||||||
|
env, fs,
|
||||||
|
io::Write,
|
||||||
|
path::{Path, PathBuf},
|
||||||
|
process::{Command, Stdio},
|
||||||
|
};
|
||||||
|
|
||||||
|
use anyhow::{anyhow, Context, Result};
|
||||||
|
use build_tables::build_tables;
|
||||||
|
use grammars::InputGrammar;
|
||||||
|
use lazy_static::lazy_static;
|
||||||
|
use parse_grammar::parse_grammar;
|
||||||
|
use prepare_grammar::prepare_grammar;
|
||||||
|
use regex::{Regex, RegexBuilder};
|
||||||
|
use render::render_c_code;
|
||||||
|
use semver::Version;
|
||||||
|
|
||||||
|
mod build_tables;
|
||||||
|
mod dedup;
|
||||||
|
mod grammar_files;
|
||||||
|
mod grammars;
|
||||||
|
mod nfa;
|
||||||
|
mod node_types;
|
||||||
|
pub mod parse_grammar;
|
||||||
|
mod prepare_grammar;
|
||||||
|
mod render;
|
||||||
|
mod rules;
|
||||||
|
mod tables;
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
static ref JSON_COMMENT_REGEX: Regex = RegexBuilder::new("^\\s*//.*")
|
||||||
|
.multi_line(true)
|
||||||
|
.build()
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
struct GeneratedParser {
|
||||||
|
c_code: String,
|
||||||
|
node_types_json: String,
|
||||||
|
header_tail: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const ALLOC_HEADER: &str = include_str!("templates/alloc.h");
|
||||||
|
pub const ARRAY_HEADER: &str = include_str!("templates/array.h");
|
||||||
|
|
||||||
|
pub fn generate_parser_in_directory(
|
||||||
|
repo_path: &Path,
|
||||||
|
out_path: Option<&str>,
|
||||||
|
grammar_path: Option<&str>,
|
||||||
|
abi_version: usize,
|
||||||
|
report_symbol_name: Option<&str>,
|
||||||
|
js_runtime: Option<&str>,
|
||||||
|
) -> Result<()> {
|
||||||
|
let mut repo_path = repo_path.to_owned();
|
||||||
|
let mut grammar_path = grammar_path;
|
||||||
|
|
||||||
|
// Populate a new empty grammar directory.
|
||||||
|
if let Some(path) = grammar_path {
|
||||||
|
let path = PathBuf::from(path);
|
||||||
|
if !path
|
||||||
|
.try_exists()
|
||||||
|
.with_context(|| "Some error with specified path")?
|
||||||
|
{
|
||||||
|
fs::create_dir_all(&path)?;
|
||||||
|
grammar_path = None;
|
||||||
|
repo_path = path;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let grammar_path = grammar_path.map_or_else(|| repo_path.join("grammar.js"), PathBuf::from);
|
||||||
|
|
||||||
|
// Read the grammar file.
|
||||||
|
let grammar_json = load_grammar_file(&grammar_path, js_runtime)?;
|
||||||
|
|
||||||
|
let src_path = out_path.map_or_else(|| repo_path.join("src"), PathBuf::from);
|
||||||
|
let header_path = src_path.join("tree_sitter");
|
||||||
|
|
||||||
|
// Ensure that the output directories exist.
|
||||||
|
fs::create_dir_all(&src_path)?;
|
||||||
|
fs::create_dir_all(&header_path)?;
|
||||||
|
|
||||||
|
if grammar_path.file_name().unwrap() != "grammar.json" {
|
||||||
|
fs::write(src_path.join("grammar.json"), &grammar_json)
|
||||||
|
.with_context(|| format!("Failed to write grammar.json to {src_path:?}"))?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse and preprocess the grammar.
|
||||||
|
let input_grammar = parse_grammar(&grammar_json)?;
|
||||||
|
|
||||||
|
// Generate the parser and related files.
|
||||||
|
let GeneratedParser {
|
||||||
|
c_code,
|
||||||
|
node_types_json,
|
||||||
|
header_tail,
|
||||||
|
} = generate_parser_for_grammar_with_opts(&input_grammar, abi_version, report_symbol_name)?;
|
||||||
|
|
||||||
|
write_file(&src_path.join("parser.c"), c_code)?;
|
||||||
|
write_file(&src_path.join("node-types.json"), node_types_json)?;
|
||||||
|
write_file(&header_path.join("alloc.h"), ALLOC_HEADER)?;
|
||||||
|
write_file(&header_path.join("array.h"), ARRAY_HEADER)?;
|
||||||
|
write_file(
|
||||||
|
&header_path.join("parser.h"),
|
||||||
|
format!("{}\n{header_tail}", tree_sitter::PARSER_HEADER),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String)> {
|
||||||
|
let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n");
|
||||||
|
let input_grammar = parse_grammar(&grammar_json)?;
|
||||||
|
let parser =
|
||||||
|
generate_parser_for_grammar_with_opts(&input_grammar, tree_sitter::LANGUAGE_VERSION, None)?;
|
||||||
|
Ok((input_grammar.name, parser.c_code))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn generate_parser_for_grammar_with_opts(
|
||||||
|
input_grammar: &InputGrammar,
|
||||||
|
abi_version: usize,
|
||||||
|
report_symbol_name: Option<&str>,
|
||||||
|
) -> Result<GeneratedParser> {
|
||||||
|
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
|
||||||
|
prepare_grammar(input_grammar)?;
|
||||||
|
let variable_info =
|
||||||
|
node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases)?;
|
||||||
|
let node_types_json = node_types::generate_node_types_json(
|
||||||
|
&syntax_grammar,
|
||||||
|
&lexical_grammar,
|
||||||
|
&simple_aliases,
|
||||||
|
&variable_info,
|
||||||
|
);
|
||||||
|
let tables = build_tables(
|
||||||
|
&syntax_grammar,
|
||||||
|
&lexical_grammar,
|
||||||
|
&simple_aliases,
|
||||||
|
&variable_info,
|
||||||
|
&inlines,
|
||||||
|
report_symbol_name,
|
||||||
|
)?;
|
||||||
|
let (c_code, header_tail) = render_c_code(
|
||||||
|
&input_grammar.name,
|
||||||
|
tables,
|
||||||
|
syntax_grammar,
|
||||||
|
lexical_grammar,
|
||||||
|
simple_aliases,
|
||||||
|
abi_version,
|
||||||
|
);
|
||||||
|
Ok(GeneratedParser {
|
||||||
|
c_code,
|
||||||
|
node_types_json: serde_json::to_string_pretty(&node_types_json).unwrap(),
|
||||||
|
header_tail,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn load_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result<String> {
|
||||||
|
if grammar_path.is_dir() {
|
||||||
|
return Err(anyhow!(
|
||||||
|
"Path to a grammar file with `.js` or `.json` extension is required"
|
||||||
|
));
|
||||||
|
}
|
||||||
|
match grammar_path.extension().and_then(|e| e.to_str()) {
|
||||||
|
Some("js") => Ok(load_js_grammar_file(grammar_path, js_runtime)
|
||||||
|
.with_context(|| "Failed to load grammar.js")?),
|
||||||
|
Some("json") => {
|
||||||
|
Ok(fs::read_to_string(grammar_path).with_context(|| "Failed to load grammar.json")?)
|
||||||
|
}
|
||||||
|
_ => Err(anyhow!("Unknown grammar file extension: {grammar_path:?}",)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn load_js_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result<String> {
|
||||||
|
let grammar_path = fs::canonicalize(grammar_path)?;
|
||||||
|
|
||||||
|
#[cfg(windows)]
|
||||||
|
let grammar_path = url::Url::from_file_path(grammar_path)
|
||||||
|
.expect("Failed to convert path to URL")
|
||||||
|
.to_string();
|
||||||
|
|
||||||
|
let js_runtime = js_runtime.unwrap_or("node");
|
||||||
|
|
||||||
|
let mut js_command = Command::new(js_runtime);
|
||||||
|
match js_runtime {
|
||||||
|
"node" => {
|
||||||
|
js_command.args(["--input-type=module", "-"]);
|
||||||
|
}
|
||||||
|
"bun" => {
|
||||||
|
js_command.arg("-");
|
||||||
|
}
|
||||||
|
"deno" => {
|
||||||
|
js_command.args(["run", "--allow-all", "-"]);
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut js_process = js_command
|
||||||
|
.env("TREE_SITTER_GRAMMAR_PATH", grammar_path)
|
||||||
|
.stdin(Stdio::piped())
|
||||||
|
.stdout(Stdio::piped())
|
||||||
|
.spawn()
|
||||||
|
.with_context(|| format!("Failed to run `{js_runtime}`"))?;
|
||||||
|
|
||||||
|
let mut js_stdin = js_process
|
||||||
|
.stdin
|
||||||
|
.take()
|
||||||
|
.with_context(|| format!("Failed to open stdin for {js_runtime}"))?;
|
||||||
|
let cli_version = Version::parse(env!("CARGO_PKG_VERSION"))
|
||||||
|
.with_context(|| "Could not parse this package's version as semver.")?;
|
||||||
|
write!(
|
||||||
|
js_stdin,
|
||||||
|
"globalThis.TREE_SITTER_CLI_VERSION_MAJOR = {};
|
||||||
|
globalThis.TREE_SITTER_CLI_VERSION_MINOR = {};
|
||||||
|
globalThis.TREE_SITTER_CLI_VERSION_PATCH = {};",
|
||||||
|
cli_version.major, cli_version.minor, cli_version.patch,
|
||||||
|
)
|
||||||
|
.with_context(|| format!("Failed to write tree-sitter version to {js_runtime}'s stdin"))?;
|
||||||
|
js_stdin
|
||||||
|
.write(include_bytes!("./dsl.js"))
|
||||||
|
.with_context(|| format!("Failed to write grammar dsl to {js_runtime}'s stdin"))?;
|
||||||
|
drop(js_stdin);
|
||||||
|
|
||||||
|
let output = js_process
|
||||||
|
.wait_with_output()
|
||||||
|
.with_context(|| format!("Failed to read output from {js_runtime}"))?;
|
||||||
|
match output.status.code() {
|
||||||
|
None => panic!("{js_runtime} process was killed"),
|
||||||
|
Some(0) => {
|
||||||
|
let stdout = String::from_utf8(output.stdout)
|
||||||
|
.with_context(|| format!("Got invalid UTF8 from {js_runtime}"))?;
|
||||||
|
|
||||||
|
let mut grammar_json = &stdout[..];
|
||||||
|
|
||||||
|
if let Some(pos) = stdout.rfind('\n') {
|
||||||
|
// If there's a newline, split the last line from the rest of the output
|
||||||
|
let node_output = &stdout[..pos];
|
||||||
|
grammar_json = &stdout[pos + 1..];
|
||||||
|
|
||||||
|
let mut stdout = std::io::stdout().lock();
|
||||||
|
stdout.write_all(node_output.as_bytes())?;
|
||||||
|
stdout.write_all(b"\n")?;
|
||||||
|
stdout.flush()?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(serde_json::to_string_pretty(
|
||||||
|
&serde_json::from_str::<serde_json::Value>(grammar_json)
|
||||||
|
.with_context(|| "Failed to parse grammar JSON")?,
|
||||||
|
)
|
||||||
|
.with_context(|| "Failed to serialize grammar JSON")?
|
||||||
|
+ "\n")
|
||||||
|
}
|
||||||
|
Some(code) => Err(anyhow!("{js_runtime} process exited with status {code}")),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn write_file(path: &Path, body: impl AsRef<[u8]>) -> Result<()> {
|
||||||
|
fs::write(path, body)
|
||||||
|
.with_context(|| format!("Failed to write {:?}", path.file_name().unwrap()))
|
||||||
|
}
|
||||||
|
|
@ -434,7 +434,6 @@ impl Nfa {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn last_state_id(&self) -> u32 {
|
pub fn last_state_id(&self) -> u32 {
|
||||||
assert!(!self.states.is_empty());
|
|
||||||
self.states.len() as u32 - 1
|
self.states.len() as u32 - 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -950,19 +949,20 @@ mod tests {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
left.remove_intersection(&mut right),
|
left.remove_intersection(&mut right),
|
||||||
row.intersection,
|
row.intersection,
|
||||||
"row {i}a: {:?} && {:?}",
|
"row {}a: {:?} && {:?}",
|
||||||
|
i,
|
||||||
row.left,
|
row.left,
|
||||||
row.right
|
row.right
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
left, row.left_only,
|
left, row.left_only,
|
||||||
"row {i}a: {:?} - {:?}",
|
"row {}a: {:?} - {:?}",
|
||||||
row.left, row.right
|
i, row.left, row.right
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
right, row.right_only,
|
right, row.right_only,
|
||||||
"row {i}a: {:?} - {:?}",
|
"row {}a: {:?} - {:?}",
|
||||||
row.right, row.left
|
i, row.right, row.left
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut left = row.left.clone();
|
let mut left = row.left.clone();
|
||||||
|
|
@ -970,25 +970,27 @@ mod tests {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
right.remove_intersection(&mut left),
|
right.remove_intersection(&mut left),
|
||||||
row.intersection,
|
row.intersection,
|
||||||
"row {i}b: {:?} && {:?}",
|
"row {}b: {:?} && {:?}",
|
||||||
|
i,
|
||||||
row.left,
|
row.left,
|
||||||
row.right
|
row.right
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
left, row.left_only,
|
left, row.left_only,
|
||||||
"row {i}b: {:?} - {:?}",
|
"row {}b: {:?} - {:?}",
|
||||||
row.left, row.right
|
i, row.left, row.right
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
right, row.right_only,
|
right, row.right_only,
|
||||||
"row {i}b: {:?} - {:?}",
|
"row {}b: {:?} - {:?}",
|
||||||
row.right, row.left
|
i, row.right, row.left
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
row.left.clone().difference(row.right.clone()),
|
row.left.clone().difference(row.right.clone()),
|
||||||
row.left_only,
|
row.left_only,
|
||||||
"row {i}b: {:?} -- {:?}",
|
"row {}b: {:?} -- {:?}",
|
||||||
|
i,
|
||||||
row.left,
|
row.left,
|
||||||
row.right
|
row.right
|
||||||
);
|
);
|
||||||
|
|
@ -1,7 +1,10 @@
|
||||||
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
use std::{
|
||||||
|
cmp::Ordering,
|
||||||
|
collections::{BTreeMap, HashMap, HashSet},
|
||||||
|
};
|
||||||
|
|
||||||
|
use anyhow::{anyhow, Result};
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use thiserror::Error;
|
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
grammars::{LexicalGrammar, SyntaxGrammar, VariableType},
|
grammars::{LexicalGrammar, SyntaxGrammar, VariableType},
|
||||||
|
|
@ -29,15 +32,12 @@ pub struct VariableInfo {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize, PartialEq, Eq, Default, PartialOrd, Ord)]
|
#[derive(Debug, Serialize, PartialEq, Eq, Default, PartialOrd, Ord)]
|
||||||
#[cfg(feature = "load")]
|
|
||||||
pub struct NodeInfoJSON {
|
pub struct NodeInfoJSON {
|
||||||
#[serde(rename = "type")]
|
#[serde(rename = "type")]
|
||||||
kind: String,
|
kind: String,
|
||||||
named: bool,
|
named: bool,
|
||||||
#[serde(skip_serializing_if = "std::ops::Not::not")]
|
#[serde(skip_serializing_if = "std::ops::Not::not")]
|
||||||
root: bool,
|
root: bool,
|
||||||
#[serde(skip_serializing_if = "std::ops::Not::not")]
|
|
||||||
extra: bool,
|
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
fields: Option<BTreeMap<String, FieldInfoJSON>>,
|
fields: Option<BTreeMap<String, FieldInfoJSON>>,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
|
@ -47,7 +47,6 @@ pub struct NodeInfoJSON {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Serialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
#[derive(Clone, Debug, Serialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
#[cfg(feature = "load")]
|
|
||||||
pub struct NodeTypeJSON {
|
pub struct NodeTypeJSON {
|
||||||
#[serde(rename = "type")]
|
#[serde(rename = "type")]
|
||||||
kind: String,
|
kind: String,
|
||||||
|
|
@ -55,7 +54,6 @@ pub struct NodeTypeJSON {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize, PartialEq, Eq, PartialOrd, Ord)]
|
#[derive(Debug, Serialize, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
#[cfg(feature = "load")]
|
|
||||||
pub struct FieldInfoJSON {
|
pub struct FieldInfoJSON {
|
||||||
multiple: bool,
|
multiple: bool,
|
||||||
required: bool,
|
required: bool,
|
||||||
|
|
@ -69,7 +67,6 @@ pub struct ChildQuantity {
|
||||||
multiple: bool,
|
multiple: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "load")]
|
|
||||||
impl Default for FieldInfoJSON {
|
impl Default for FieldInfoJSON {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self {
|
Self {
|
||||||
|
|
@ -105,7 +102,7 @@ impl ChildQuantity {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const fn append(&mut self, other: Self) {
|
fn append(&mut self, other: Self) {
|
||||||
if other.exists {
|
if other.exists {
|
||||||
if self.exists || other.multiple {
|
if self.exists || other.multiple {
|
||||||
self.multiple = true;
|
self.multiple = true;
|
||||||
|
|
@ -117,7 +114,7 @@ impl ChildQuantity {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const fn union(&mut self, other: Self) -> bool {
|
fn union(&mut self, other: Self) -> bool {
|
||||||
let mut result = false;
|
let mut result = false;
|
||||||
if !self.exists && other.exists {
|
if !self.exists && other.exists {
|
||||||
result = true;
|
result = true;
|
||||||
|
|
@ -135,14 +132,6 @@ impl ChildQuantity {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type VariableInfoResult<T> = Result<T, VariableInfoError>;
|
|
||||||
|
|
||||||
#[derive(Debug, Error, Serialize)]
|
|
||||||
pub enum VariableInfoError {
|
|
||||||
#[error("Grammar error: Supertype symbols must always have a single visible child, but `{0}` can have multiple")]
|
|
||||||
InvalidSupertype(String),
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Compute a summary of the public-facing structure of each variable in the
|
/// Compute a summary of the public-facing structure of each variable in the
|
||||||
/// grammar. Each variable in the grammar corresponds to a distinct public-facing
|
/// grammar. Each variable in the grammar corresponds to a distinct public-facing
|
||||||
/// node type.
|
/// node type.
|
||||||
|
|
@ -168,7 +157,7 @@ pub fn get_variable_info(
|
||||||
syntax_grammar: &SyntaxGrammar,
|
syntax_grammar: &SyntaxGrammar,
|
||||||
lexical_grammar: &LexicalGrammar,
|
lexical_grammar: &LexicalGrammar,
|
||||||
default_aliases: &AliasMap,
|
default_aliases: &AliasMap,
|
||||||
) -> VariableInfoResult<Vec<VariableInfo>> {
|
) -> Result<Vec<VariableInfo>> {
|
||||||
let child_type_is_visible = |t: &ChildType| {
|
let child_type_is_visible = |t: &ChildType| {
|
||||||
variable_type_for_child_type(t, syntax_grammar, lexical_grammar) >= VariableType::Anonymous
|
variable_type_for_child_type(t, syntax_grammar, lexical_grammar) >= VariableType::Anonymous
|
||||||
};
|
};
|
||||||
|
|
@ -349,7 +338,13 @@ pub fn get_variable_info(
|
||||||
for supertype_symbol in &syntax_grammar.supertype_symbols {
|
for supertype_symbol in &syntax_grammar.supertype_symbols {
|
||||||
if result[supertype_symbol.index].has_multi_step_production {
|
if result[supertype_symbol.index].has_multi_step_production {
|
||||||
let variable = &syntax_grammar.variables[supertype_symbol.index];
|
let variable = &syntax_grammar.variables[supertype_symbol.index];
|
||||||
Err(VariableInfoError::InvalidSupertype(variable.name.clone()))?;
|
return Err(anyhow!(
|
||||||
|
concat!(
|
||||||
|
"Grammar error: Supertype symbols must always ",
|
||||||
|
"have a single visible child, but `{}` can have multiple"
|
||||||
|
),
|
||||||
|
variable.name
|
||||||
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -374,105 +369,12 @@ pub fn get_variable_info(
|
||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_aliases_by_symbol(
|
|
||||||
syntax_grammar: &SyntaxGrammar,
|
|
||||||
default_aliases: &AliasMap,
|
|
||||||
) -> HashMap<Symbol, BTreeSet<Option<Alias>>> {
|
|
||||||
let mut aliases_by_symbol = HashMap::new();
|
|
||||||
for (symbol, alias) in default_aliases {
|
|
||||||
aliases_by_symbol.insert(*symbol, {
|
|
||||||
let mut aliases = BTreeSet::new();
|
|
||||||
aliases.insert(Some(alias.clone()));
|
|
||||||
aliases
|
|
||||||
});
|
|
||||||
}
|
|
||||||
for extra_symbol in &syntax_grammar.extra_symbols {
|
|
||||||
if !default_aliases.contains_key(extra_symbol) {
|
|
||||||
aliases_by_symbol
|
|
||||||
.entry(*extra_symbol)
|
|
||||||
.or_insert_with(BTreeSet::new)
|
|
||||||
.insert(None);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for variable in &syntax_grammar.variables {
|
|
||||||
for production in &variable.productions {
|
|
||||||
for step in &production.steps {
|
|
||||||
aliases_by_symbol
|
|
||||||
.entry(step.symbol)
|
|
||||||
.or_insert_with(BTreeSet::new)
|
|
||||||
.insert(
|
|
||||||
step.alias
|
|
||||||
.as_ref()
|
|
||||||
.or_else(|| default_aliases.get(&step.symbol))
|
|
||||||
.cloned(),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
aliases_by_symbol.insert(
|
|
||||||
Symbol::non_terminal(0),
|
|
||||||
std::iter::once(&None).cloned().collect(),
|
|
||||||
);
|
|
||||||
aliases_by_symbol
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get_supertype_symbol_map(
|
|
||||||
syntax_grammar: &SyntaxGrammar,
|
|
||||||
default_aliases: &AliasMap,
|
|
||||||
variable_info: &[VariableInfo],
|
|
||||||
) -> BTreeMap<Symbol, Vec<ChildType>> {
|
|
||||||
let aliases_by_symbol = get_aliases_by_symbol(syntax_grammar, default_aliases);
|
|
||||||
let mut supertype_symbol_map = BTreeMap::new();
|
|
||||||
|
|
||||||
let mut symbols_by_alias = HashMap::new();
|
|
||||||
for (symbol, aliases) in &aliases_by_symbol {
|
|
||||||
for alias in aliases.iter().flatten() {
|
|
||||||
symbols_by_alias
|
|
||||||
.entry(alias)
|
|
||||||
.or_insert_with(Vec::new)
|
|
||||||
.push(*symbol);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i, info) in variable_info.iter().enumerate() {
|
|
||||||
let symbol = Symbol::non_terminal(i);
|
|
||||||
if syntax_grammar.supertype_symbols.contains(&symbol) {
|
|
||||||
let subtypes = info.children.types.clone();
|
|
||||||
supertype_symbol_map.insert(symbol, subtypes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
supertype_symbol_map
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(feature = "load")]
|
|
||||||
pub type SuperTypeCycleResult<T> = Result<T, SuperTypeCycleError>;
|
|
||||||
|
|
||||||
#[derive(Debug, Error, Serialize)]
|
|
||||||
pub struct SuperTypeCycleError {
|
|
||||||
items: Vec<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Display for SuperTypeCycleError {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
write!(f, "Dependency cycle detected in node types:")?;
|
|
||||||
for (i, item) in self.items.iter().enumerate() {
|
|
||||||
write!(f, " {item}")?;
|
|
||||||
if i < self.items.len() - 1 {
|
|
||||||
write!(f, ",")?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(feature = "load")]
|
|
||||||
pub fn generate_node_types_json(
|
pub fn generate_node_types_json(
|
||||||
syntax_grammar: &SyntaxGrammar,
|
syntax_grammar: &SyntaxGrammar,
|
||||||
lexical_grammar: &LexicalGrammar,
|
lexical_grammar: &LexicalGrammar,
|
||||||
default_aliases: &AliasMap,
|
default_aliases: &AliasMap,
|
||||||
variable_info: &[VariableInfo],
|
variable_info: &[VariableInfo],
|
||||||
) -> SuperTypeCycleResult<Vec<NodeInfoJSON>> {
|
) -> Vec<NodeInfoJSON> {
|
||||||
let mut node_types_json = BTreeMap::new();
|
let mut node_types_json = BTreeMap::new();
|
||||||
|
|
||||||
let child_type_to_node_type = |child_type: &ChildType| match child_type {
|
let child_type_to_node_type = |child_type: &ChildType| match child_type {
|
||||||
|
|
@ -528,32 +430,41 @@ pub fn generate_node_types_json(
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let aliases_by_symbol = get_aliases_by_symbol(syntax_grammar, default_aliases);
|
let mut aliases_by_symbol = HashMap::new();
|
||||||
|
for (symbol, alias) in default_aliases {
|
||||||
let empty = BTreeSet::new();
|
aliases_by_symbol.insert(*symbol, {
|
||||||
let extra_names = syntax_grammar
|
let mut aliases = HashSet::new();
|
||||||
.extra_symbols
|
aliases.insert(Some(alias.clone()));
|
||||||
.iter()
|
aliases
|
||||||
.flat_map(|symbol| {
|
});
|
||||||
|
}
|
||||||
|
for extra_symbol in &syntax_grammar.extra_symbols {
|
||||||
|
if !default_aliases.contains_key(extra_symbol) {
|
||||||
aliases_by_symbol
|
aliases_by_symbol
|
||||||
.get(symbol)
|
.entry(*extra_symbol)
|
||||||
.unwrap_or(&empty)
|
.or_insert_with(HashSet::new)
|
||||||
.iter()
|
.insert(None);
|
||||||
.map(|alias| {
|
}
|
||||||
alias.as_ref().map_or(
|
}
|
||||||
match symbol.kind {
|
for variable in &syntax_grammar.variables {
|
||||||
SymbolType::NonTerminal => &syntax_grammar.variables[symbol.index].name,
|
for production in &variable.productions {
|
||||||
SymbolType::Terminal => &lexical_grammar.variables[symbol.index].name,
|
for step in &production.steps {
|
||||||
SymbolType::External => {
|
aliases_by_symbol
|
||||||
&syntax_grammar.external_tokens[symbol.index].name
|
.entry(step.symbol)
|
||||||
}
|
.or_insert_with(HashSet::new)
|
||||||
_ => unreachable!(),
|
.insert(
|
||||||
},
|
step.alias
|
||||||
|alias| &alias.value,
|
.as_ref()
|
||||||
)
|
.or_else(|| default_aliases.get(&step.symbol))
|
||||||
})
|
.cloned(),
|
||||||
})
|
);
|
||||||
.collect::<HashSet<_>>();
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
aliases_by_symbol.insert(
|
||||||
|
Symbol::non_terminal(0),
|
||||||
|
std::iter::once(&None).cloned().collect(),
|
||||||
|
);
|
||||||
|
|
||||||
let mut subtype_map = Vec::new();
|
let mut subtype_map = Vec::new();
|
||||||
for (i, info) in variable_info.iter().enumerate() {
|
for (i, info) in variable_info.iter().enumerate() {
|
||||||
|
|
@ -567,7 +478,6 @@ pub fn generate_node_types_json(
|
||||||
kind: variable.name.clone(),
|
kind: variable.name.clone(),
|
||||||
named: true,
|
named: true,
|
||||||
root: false,
|
root: false,
|
||||||
extra: extra_names.contains(&variable.name),
|
|
||||||
fields: None,
|
fields: None,
|
||||||
children: None,
|
children: None,
|
||||||
subtypes: None,
|
subtypes: None,
|
||||||
|
|
@ -589,7 +499,7 @@ pub fn generate_node_types_json(
|
||||||
} else if !syntax_grammar.variables_to_inline.contains(&symbol) {
|
} else if !syntax_grammar.variables_to_inline.contains(&symbol) {
|
||||||
// If a rule is aliased under multiple names, then its information
|
// If a rule is aliased under multiple names, then its information
|
||||||
// contributes to multiple entries in the final JSON.
|
// contributes to multiple entries in the final JSON.
|
||||||
for alias in aliases_by_symbol.get(&symbol).unwrap_or(&BTreeSet::new()) {
|
for alias in aliases_by_symbol.get(&symbol).unwrap_or(&HashSet::new()) {
|
||||||
let kind;
|
let kind;
|
||||||
let is_named;
|
let is_named;
|
||||||
if let Some(alias) = alias {
|
if let Some(alias) = alias {
|
||||||
|
|
@ -611,7 +521,6 @@ pub fn generate_node_types_json(
|
||||||
kind: kind.clone(),
|
kind: kind.clone(),
|
||||||
named: is_named,
|
named: is_named,
|
||||||
root: i == 0,
|
root: i == 0,
|
||||||
extra: extra_names.contains(&kind),
|
|
||||||
fields: Some(BTreeMap::new()),
|
fields: Some(BTreeMap::new()),
|
||||||
children: None,
|
children: None,
|
||||||
subtypes: None,
|
subtypes: None,
|
||||||
|
|
@ -650,33 +559,15 @@ pub fn generate_node_types_json(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sort the subtype map topologically so that subtypes are listed before their supertypes.
|
// Sort the subtype map so that subtypes are listed before their supertypes.
|
||||||
let mut sorted_kinds = Vec::with_capacity(subtype_map.len());
|
|
||||||
let mut top_sort = topological_sort::TopologicalSort::<String>::new();
|
|
||||||
for (supertype, subtypes) in &subtype_map {
|
|
||||||
for subtype in subtypes {
|
|
||||||
top_sort.add_dependency(subtype.kind.clone(), supertype.kind.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
loop {
|
|
||||||
let mut next_kinds = top_sort.pop_all();
|
|
||||||
match (next_kinds.is_empty(), top_sort.is_empty()) {
|
|
||||||
(true, true) => break,
|
|
||||||
(true, false) => {
|
|
||||||
let mut items = top_sort.collect::<Vec<String>>();
|
|
||||||
items.sort();
|
|
||||||
return Err(SuperTypeCycleError { items });
|
|
||||||
}
|
|
||||||
(false, _) => {
|
|
||||||
next_kinds.sort();
|
|
||||||
sorted_kinds.extend(next_kinds);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
subtype_map.sort_by(|a, b| {
|
subtype_map.sort_by(|a, b| {
|
||||||
let a_idx = sorted_kinds.iter().position(|n| n.eq(&a.0.kind)).unwrap();
|
if b.1.contains(&a.0) {
|
||||||
let b_idx = sorted_kinds.iter().position(|n| n.eq(&b.0.kind)).unwrap();
|
Ordering::Less
|
||||||
a_idx.cmp(&b_idx)
|
} else if a.1.contains(&b.0) {
|
||||||
|
Ordering::Greater
|
||||||
|
} else {
|
||||||
|
Ordering::Equal
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
for node_type_json in node_types_json.values_mut() {
|
for node_type_json in node_types_json.values_mut() {
|
||||||
|
|
@ -700,6 +591,7 @@ pub fn generate_node_types_json(
|
||||||
|
|
||||||
let mut anonymous_node_types = Vec::new();
|
let mut anonymous_node_types = Vec::new();
|
||||||
|
|
||||||
|
let empty = HashSet::new();
|
||||||
let regular_tokens = lexical_grammar
|
let regular_tokens = lexical_grammar
|
||||||
.variables
|
.variables
|
||||||
.iter()
|
.iter()
|
||||||
|
|
@ -744,7 +636,6 @@ pub fn generate_node_types_json(
|
||||||
kind: name.clone(),
|
kind: name.clone(),
|
||||||
named: true,
|
named: true,
|
||||||
root: false,
|
root: false,
|
||||||
extra: extra_names.contains(&name),
|
|
||||||
fields: None,
|
fields: None,
|
||||||
children: None,
|
children: None,
|
||||||
subtypes: None,
|
subtypes: None,
|
||||||
|
|
@ -762,7 +653,6 @@ pub fn generate_node_types_json(
|
||||||
kind: name.clone(),
|
kind: name.clone(),
|
||||||
named: false,
|
named: false,
|
||||||
root: false,
|
root: false,
|
||||||
extra: extra_names.contains(&name),
|
|
||||||
fields: None,
|
fields: None,
|
||||||
children: None,
|
children: None,
|
||||||
subtypes: None,
|
subtypes: None,
|
||||||
|
|
@ -783,15 +673,11 @@ pub fn generate_node_types_json(
|
||||||
a_is_leaf.cmp(&b_is_leaf)
|
a_is_leaf.cmp(&b_is_leaf)
|
||||||
})
|
})
|
||||||
.then_with(|| a.kind.cmp(&b.kind))
|
.then_with(|| a.kind.cmp(&b.kind))
|
||||||
.then_with(|| a.named.cmp(&b.named))
|
|
||||||
.then_with(|| a.root.cmp(&b.root))
|
|
||||||
.then_with(|| a.extra.cmp(&b.extra))
|
|
||||||
});
|
});
|
||||||
result.dedup();
|
result.dedup();
|
||||||
Ok(result)
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "load")]
|
|
||||||
fn process_supertypes(info: &mut FieldInfoJSON, subtype_map: &[(NodeTypeJSON, Vec<NodeTypeJSON>)]) {
|
fn process_supertypes(info: &mut FieldInfoJSON, subtype_map: &[(NodeTypeJSON, Vec<NodeTypeJSON>)]) {
|
||||||
for (supertype, subtypes) in subtype_map {
|
for (supertype, subtypes) in subtype_map {
|
||||||
if info.types.contains(supertype) {
|
if info.types.contains(supertype) {
|
||||||
|
|
@ -828,17 +714,17 @@ fn extend_sorted<'a, T>(vec: &mut Vec<T>, values: impl IntoIterator<Item = &'a T
|
||||||
where
|
where
|
||||||
T: 'a + Clone + Eq + Ord,
|
T: 'a + Clone + Eq + Ord,
|
||||||
{
|
{
|
||||||
values.into_iter().fold(false, |acc, value| {
|
values.into_iter().any(|value| {
|
||||||
if let Err(i) = vec.binary_search(value) {
|
if let Err(i) = vec.binary_search(value) {
|
||||||
vec.insert(i, value.clone());
|
vec.insert(i, value.clone());
|
||||||
true
|
true
|
||||||
} else {
|
} else {
|
||||||
acc
|
false
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(all(test, feature = "load"))]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::{
|
use crate::{
|
||||||
|
|
@ -875,8 +761,7 @@ mod tests {
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
..Default::default()
|
..Default::default()
|
||||||
})
|
});
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(node_types.len(), 3);
|
assert_eq!(node_types.len(), 3);
|
||||||
|
|
||||||
|
|
@ -886,7 +771,6 @@ mod tests {
|
||||||
kind: "v1".to_string(),
|
kind: "v1".to_string(),
|
||||||
named: true,
|
named: true,
|
||||||
root: true,
|
root: true,
|
||||||
extra: false,
|
|
||||||
subtypes: None,
|
subtypes: None,
|
||||||
children: None,
|
children: None,
|
||||||
fields: Some(
|
fields: Some(
|
||||||
|
|
@ -925,7 +809,6 @@ mod tests {
|
||||||
kind: ";".to_string(),
|
kind: ";".to_string(),
|
||||||
named: false,
|
named: false,
|
||||||
root: false,
|
root: false,
|
||||||
extra: false,
|
|
||||||
subtypes: None,
|
subtypes: None,
|
||||||
children: None,
|
children: None,
|
||||||
fields: None
|
fields: None
|
||||||
|
|
@ -937,7 +820,6 @@ mod tests {
|
||||||
kind: "v2".to_string(),
|
kind: "v2".to_string(),
|
||||||
named: true,
|
named: true,
|
||||||
root: false,
|
root: false,
|
||||||
extra: false,
|
|
||||||
subtypes: None,
|
subtypes: None,
|
||||||
children: None,
|
children: None,
|
||||||
fields: None
|
fields: None
|
||||||
|
|
@ -965,9 +847,7 @@ mod tests {
|
||||||
},
|
},
|
||||||
// This rule is not reachable from the start symbol, but
|
// This rule is not reachable from the start symbol, but
|
||||||
// it is reachable from the 'extra_symbols' so it
|
// it is reachable from the 'extra_symbols' so it
|
||||||
// should be present in the node_types.
|
// should be present in the node_types
|
||||||
// But because it's only a literal, it will get replaced by
|
|
||||||
// a lexical variable.
|
|
||||||
Variable {
|
Variable {
|
||||||
name: "v3".to_string(),
|
name: "v3".to_string(),
|
||||||
kind: VariableType::Named,
|
kind: VariableType::Named,
|
||||||
|
|
@ -975,8 +855,7 @@ mod tests {
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
..Default::default()
|
..Default::default()
|
||||||
})
|
});
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(node_types.len(), 4);
|
assert_eq!(node_types.len(), 4);
|
||||||
|
|
||||||
|
|
@ -986,7 +865,6 @@ mod tests {
|
||||||
kind: "v1".to_string(),
|
kind: "v1".to_string(),
|
||||||
named: true,
|
named: true,
|
||||||
root: true,
|
root: true,
|
||||||
extra: false,
|
|
||||||
subtypes: None,
|
subtypes: None,
|
||||||
children: None,
|
children: None,
|
||||||
fields: Some(
|
fields: Some(
|
||||||
|
|
@ -1025,7 +903,6 @@ mod tests {
|
||||||
kind: ";".to_string(),
|
kind: ";".to_string(),
|
||||||
named: false,
|
named: false,
|
||||||
root: false,
|
root: false,
|
||||||
extra: false,
|
|
||||||
subtypes: None,
|
subtypes: None,
|
||||||
children: None,
|
children: None,
|
||||||
fields: None
|
fields: None
|
||||||
|
|
@ -1037,7 +914,6 @@ mod tests {
|
||||||
kind: "v2".to_string(),
|
kind: "v2".to_string(),
|
||||||
named: true,
|
named: true,
|
||||||
root: false,
|
root: false,
|
||||||
extra: false,
|
|
||||||
subtypes: None,
|
subtypes: None,
|
||||||
children: None,
|
children: None,
|
||||||
fields: None
|
fields: None
|
||||||
|
|
@ -1049,119 +925,6 @@ mod tests {
|
||||||
kind: "v3".to_string(),
|
kind: "v3".to_string(),
|
||||||
named: true,
|
named: true,
|
||||||
root: false,
|
root: false,
|
||||||
extra: true,
|
|
||||||
subtypes: None,
|
|
||||||
children: None,
|
|
||||||
fields: None
|
|
||||||
}
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_node_types_deeper_extras() {
|
|
||||||
let node_types = get_node_types(&InputGrammar {
|
|
||||||
extra_symbols: vec![Rule::named("v3")],
|
|
||||||
variables: vec![
|
|
||||||
Variable {
|
|
||||||
name: "v1".to_string(),
|
|
||||||
kind: VariableType::Named,
|
|
||||||
rule: Rule::seq(vec![
|
|
||||||
Rule::field("f1".to_string(), Rule::named("v2")),
|
|
||||||
Rule::field("f2".to_string(), Rule::string(";")),
|
|
||||||
]),
|
|
||||||
},
|
|
||||||
Variable {
|
|
||||||
name: "v2".to_string(),
|
|
||||||
kind: VariableType::Named,
|
|
||||||
rule: Rule::string("x"),
|
|
||||||
},
|
|
||||||
// This rule is not reachable from the start symbol, but
|
|
||||||
// it is reachable from the 'extra_symbols' so it
|
|
||||||
// should be present in the node_types.
|
|
||||||
// Because it is not just a literal, it won't get replaced
|
|
||||||
// by a lexical variable.
|
|
||||||
Variable {
|
|
||||||
name: "v3".to_string(),
|
|
||||||
kind: VariableType::Named,
|
|
||||||
rule: Rule::seq(vec![Rule::string("y"), Rule::repeat(Rule::string("z"))]),
|
|
||||||
},
|
|
||||||
],
|
|
||||||
..Default::default()
|
|
||||||
})
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(node_types.len(), 6);
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
node_types[0],
|
|
||||||
NodeInfoJSON {
|
|
||||||
kind: "v1".to_string(),
|
|
||||||
named: true,
|
|
||||||
root: true,
|
|
||||||
extra: false,
|
|
||||||
subtypes: None,
|
|
||||||
children: None,
|
|
||||||
fields: Some(
|
|
||||||
vec![
|
|
||||||
(
|
|
||||||
"f1".to_string(),
|
|
||||||
FieldInfoJSON {
|
|
||||||
multiple: false,
|
|
||||||
required: true,
|
|
||||||
types: vec![NodeTypeJSON {
|
|
||||||
kind: "v2".to_string(),
|
|
||||||
named: true,
|
|
||||||
}]
|
|
||||||
}
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"f2".to_string(),
|
|
||||||
FieldInfoJSON {
|
|
||||||
multiple: false,
|
|
||||||
required: true,
|
|
||||||
types: vec![NodeTypeJSON {
|
|
||||||
kind: ";".to_string(),
|
|
||||||
named: false,
|
|
||||||
}]
|
|
||||||
}
|
|
||||||
),
|
|
||||||
]
|
|
||||||
.into_iter()
|
|
||||||
.collect()
|
|
||||||
)
|
|
||||||
}
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
node_types[1],
|
|
||||||
NodeInfoJSON {
|
|
||||||
kind: "v3".to_string(),
|
|
||||||
named: true,
|
|
||||||
root: false,
|
|
||||||
extra: true,
|
|
||||||
subtypes: None,
|
|
||||||
children: None,
|
|
||||||
fields: Some(BTreeMap::default())
|
|
||||||
}
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
node_types[2],
|
|
||||||
NodeInfoJSON {
|
|
||||||
kind: ";".to_string(),
|
|
||||||
named: false,
|
|
||||||
root: false,
|
|
||||||
extra: false,
|
|
||||||
subtypes: None,
|
|
||||||
children: None,
|
|
||||||
fields: None
|
|
||||||
}
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
node_types[3],
|
|
||||||
NodeInfoJSON {
|
|
||||||
kind: "v2".to_string(),
|
|
||||||
named: true,
|
|
||||||
root: false,
|
|
||||||
extra: false,
|
|
||||||
subtypes: None,
|
subtypes: None,
|
||||||
children: None,
|
children: None,
|
||||||
fields: None
|
fields: None
|
||||||
|
|
@ -1200,8 +963,7 @@ mod tests {
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
..Default::default()
|
..Default::default()
|
||||||
})
|
});
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
node_types[0],
|
node_types[0],
|
||||||
|
|
@ -1209,7 +971,6 @@ mod tests {
|
||||||
kind: "_v2".to_string(),
|
kind: "_v2".to_string(),
|
||||||
named: true,
|
named: true,
|
||||||
root: false,
|
root: false,
|
||||||
extra: false,
|
|
||||||
fields: None,
|
fields: None,
|
||||||
children: None,
|
children: None,
|
||||||
subtypes: Some(vec![
|
subtypes: Some(vec![
|
||||||
|
|
@ -1234,7 +995,6 @@ mod tests {
|
||||||
kind: "v1".to_string(),
|
kind: "v1".to_string(),
|
||||||
named: true,
|
named: true,
|
||||||
root: true,
|
root: true,
|
||||||
extra: false,
|
|
||||||
subtypes: None,
|
subtypes: None,
|
||||||
children: None,
|
children: None,
|
||||||
fields: Some(
|
fields: Some(
|
||||||
|
|
@ -1290,8 +1050,7 @@ mod tests {
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
..Default::default()
|
..Default::default()
|
||||||
})
|
});
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
node_types[0],
|
node_types[0],
|
||||||
|
|
@ -1299,7 +1058,6 @@ mod tests {
|
||||||
kind: "v1".to_string(),
|
kind: "v1".to_string(),
|
||||||
named: true,
|
named: true,
|
||||||
root: true,
|
root: true,
|
||||||
extra: false,
|
|
||||||
subtypes: None,
|
subtypes: None,
|
||||||
children: Some(FieldInfoJSON {
|
children: Some(FieldInfoJSON {
|
||||||
multiple: true,
|
multiple: true,
|
||||||
|
|
@ -1338,7 +1096,6 @@ mod tests {
|
||||||
kind: "v2".to_string(),
|
kind: "v2".to_string(),
|
||||||
named: true,
|
named: true,
|
||||||
root: false,
|
root: false,
|
||||||
extra: false,
|
|
||||||
subtypes: None,
|
subtypes: None,
|
||||||
children: Some(FieldInfoJSON {
|
children: Some(FieldInfoJSON {
|
||||||
multiple: false,
|
multiple: false,
|
||||||
|
|
@ -1376,8 +1133,7 @@ mod tests {
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
..Default::default()
|
..Default::default()
|
||||||
})
|
});
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
node_types[0],
|
node_types[0],
|
||||||
|
|
@ -1385,7 +1141,6 @@ mod tests {
|
||||||
kind: "v1".to_string(),
|
kind: "v1".to_string(),
|
||||||
named: true,
|
named: true,
|
||||||
root: true,
|
root: true,
|
||||||
extra: false,
|
|
||||||
subtypes: None,
|
subtypes: None,
|
||||||
children: Some(FieldInfoJSON {
|
children: Some(FieldInfoJSON {
|
||||||
multiple: true,
|
multiple: true,
|
||||||
|
|
@ -1451,8 +1206,7 @@ mod tests {
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
..Default::default()
|
..Default::default()
|
||||||
})
|
});
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(node_types.iter().find(|t| t.kind == "foo_identifier"), None);
|
assert_eq!(node_types.iter().find(|t| t.kind == "foo_identifier"), None);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
|
@ -1461,7 +1215,6 @@ mod tests {
|
||||||
kind: "identifier".to_string(),
|
kind: "identifier".to_string(),
|
||||||
named: true,
|
named: true,
|
||||||
root: false,
|
root: false,
|
||||||
extra: false,
|
|
||||||
subtypes: None,
|
subtypes: None,
|
||||||
children: None,
|
children: None,
|
||||||
fields: None,
|
fields: None,
|
||||||
|
|
@ -1473,7 +1226,6 @@ mod tests {
|
||||||
kind: "type_identifier".to_string(),
|
kind: "type_identifier".to_string(),
|
||||||
named: true,
|
named: true,
|
||||||
root: false,
|
root: false,
|
||||||
extra: false,
|
|
||||||
subtypes: None,
|
subtypes: None,
|
||||||
children: None,
|
children: None,
|
||||||
fields: None,
|
fields: None,
|
||||||
|
|
@ -1508,8 +1260,7 @@ mod tests {
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
..Default::default()
|
..Default::default()
|
||||||
})
|
});
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
node_types[0],
|
node_types[0],
|
||||||
|
|
@ -1517,7 +1268,6 @@ mod tests {
|
||||||
kind: "a".to_string(),
|
kind: "a".to_string(),
|
||||||
named: true,
|
named: true,
|
||||||
root: true,
|
root: true,
|
||||||
extra: false,
|
|
||||||
subtypes: None,
|
subtypes: None,
|
||||||
children: Some(FieldInfoJSON {
|
children: Some(FieldInfoJSON {
|
||||||
multiple: true,
|
multiple: true,
|
||||||
|
|
@ -1558,8 +1308,7 @@ mod tests {
|
||||||
]),
|
]),
|
||||||
}],
|
}],
|
||||||
..Default::default()
|
..Default::default()
|
||||||
})
|
});
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
node_types,
|
node_types,
|
||||||
|
|
@ -1567,7 +1316,6 @@ mod tests {
|
||||||
kind: "script".to_string(),
|
kind: "script".to_string(),
|
||||||
named: true,
|
named: true,
|
||||||
root: true,
|
root: true,
|
||||||
extra: false,
|
|
||||||
fields: Some(BTreeMap::new()),
|
fields: Some(BTreeMap::new()),
|
||||||
children: None,
|
children: None,
|
||||||
subtypes: None
|
subtypes: None
|
||||||
|
|
@ -1607,8 +1355,7 @@ mod tests {
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
..Default::default()
|
..Default::default()
|
||||||
})
|
});
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&node_types
|
&node_types
|
||||||
|
|
@ -1626,7 +1373,6 @@ mod tests {
|
||||||
kind: "a".to_string(),
|
kind: "a".to_string(),
|
||||||
named: true,
|
named: true,
|
||||||
root: false,
|
root: false,
|
||||||
extra: false,
|
|
||||||
subtypes: None,
|
subtypes: None,
|
||||||
children: None,
|
children: None,
|
||||||
fields: Some(
|
fields: Some(
|
||||||
|
|
@ -1683,7 +1429,6 @@ mod tests {
|
||||||
kind: "script".to_string(),
|
kind: "script".to_string(),
|
||||||
named: true,
|
named: true,
|
||||||
root: true,
|
root: true,
|
||||||
extra: false,
|
|
||||||
subtypes: None,
|
subtypes: None,
|
||||||
// Only one node
|
// Only one node
|
||||||
children: Some(FieldInfoJSON {
|
children: Some(FieldInfoJSON {
|
||||||
|
|
@ -1727,8 +1472,7 @@ mod tests {
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
..Default::default()
|
..Default::default()
|
||||||
})
|
});
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
node_types.iter().map(|n| &n.kind).collect::<Vec<_>>(),
|
node_types.iter().map(|n| &n.kind).collect::<Vec<_>>(),
|
||||||
|
|
@ -1740,7 +1484,6 @@ mod tests {
|
||||||
kind: "b".to_string(),
|
kind: "b".to_string(),
|
||||||
named: true,
|
named: true,
|
||||||
root: false,
|
root: false,
|
||||||
extra: false,
|
|
||||||
subtypes: None,
|
subtypes: None,
|
||||||
children: Some(FieldInfoJSON {
|
children: Some(FieldInfoJSON {
|
||||||
multiple: true,
|
multiple: true,
|
||||||
|
|
@ -2055,7 +1798,7 @@ mod tests {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_node_types(grammar: &InputGrammar) -> SuperTypeCycleResult<Vec<NodeInfoJSON>> {
|
fn get_node_types(grammar: &InputGrammar) -> Vec<NodeInfoJSON> {
|
||||||
let (syntax_grammar, lexical_grammar, _, default_aliases) =
|
let (syntax_grammar, lexical_grammar, _, default_aliases) =
|
||||||
prepare_grammar(grammar).unwrap();
|
prepare_grammar(grammar).unwrap();
|
||||||
let variable_info =
|
let variable_info =
|
||||||
343
cli/generate/src/parse_grammar.rs
Normal file
343
cli/generate/src/parse_grammar.rs
Normal file
|
|
@ -0,0 +1,343 @@
|
||||||
|
use std::collections::HashSet;
|
||||||
|
|
||||||
|
use anyhow::{anyhow, Result};
|
||||||
|
use serde::Deserialize;
|
||||||
|
use serde_json::{Map, Value};
|
||||||
|
|
||||||
|
use super::{
|
||||||
|
grammars::{InputGrammar, PrecedenceEntry, Variable, VariableType},
|
||||||
|
rules::{Precedence, Rule},
|
||||||
|
};
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
#[serde(tag = "type")]
|
||||||
|
#[allow(non_camel_case_types)]
|
||||||
|
#[allow(clippy::upper_case_acronyms)]
|
||||||
|
enum RuleJSON {
|
||||||
|
ALIAS {
|
||||||
|
content: Box<RuleJSON>,
|
||||||
|
named: bool,
|
||||||
|
value: String,
|
||||||
|
},
|
||||||
|
BLANK,
|
||||||
|
STRING {
|
||||||
|
value: String,
|
||||||
|
},
|
||||||
|
PATTERN {
|
||||||
|
value: String,
|
||||||
|
flags: Option<String>,
|
||||||
|
},
|
||||||
|
SYMBOL {
|
||||||
|
name: String,
|
||||||
|
},
|
||||||
|
CHOICE {
|
||||||
|
members: Vec<RuleJSON>,
|
||||||
|
},
|
||||||
|
FIELD {
|
||||||
|
name: String,
|
||||||
|
content: Box<RuleJSON>,
|
||||||
|
},
|
||||||
|
SEQ {
|
||||||
|
members: Vec<RuleJSON>,
|
||||||
|
},
|
||||||
|
REPEAT {
|
||||||
|
content: Box<RuleJSON>,
|
||||||
|
},
|
||||||
|
REPEAT1 {
|
||||||
|
content: Box<RuleJSON>,
|
||||||
|
},
|
||||||
|
PREC_DYNAMIC {
|
||||||
|
value: i32,
|
||||||
|
content: Box<RuleJSON>,
|
||||||
|
},
|
||||||
|
PREC_LEFT {
|
||||||
|
value: PrecedenceValueJSON,
|
||||||
|
content: Box<RuleJSON>,
|
||||||
|
},
|
||||||
|
PREC_RIGHT {
|
||||||
|
value: PrecedenceValueJSON,
|
||||||
|
content: Box<RuleJSON>,
|
||||||
|
},
|
||||||
|
PREC {
|
||||||
|
value: PrecedenceValueJSON,
|
||||||
|
content: Box<RuleJSON>,
|
||||||
|
},
|
||||||
|
TOKEN {
|
||||||
|
content: Box<RuleJSON>,
|
||||||
|
},
|
||||||
|
IMMEDIATE_TOKEN {
|
||||||
|
content: Box<RuleJSON>,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
#[serde(untagged)]
|
||||||
|
enum PrecedenceValueJSON {
|
||||||
|
Integer(i32),
|
||||||
|
Name(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
pub struct GrammarJSON {
|
||||||
|
pub name: String,
|
||||||
|
rules: Map<String, Value>,
|
||||||
|
#[serde(default)]
|
||||||
|
precedences: Vec<Vec<RuleJSON>>,
|
||||||
|
#[serde(default)]
|
||||||
|
conflicts: Vec<Vec<String>>,
|
||||||
|
#[serde(default)]
|
||||||
|
externals: Vec<RuleJSON>,
|
||||||
|
#[serde(default)]
|
||||||
|
extras: Vec<RuleJSON>,
|
||||||
|
#[serde(default)]
|
||||||
|
inline: Vec<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
supertypes: Vec<String>,
|
||||||
|
word: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn rule_is_referenced(rule: &Rule, target: &str) -> bool {
|
||||||
|
match rule {
|
||||||
|
Rule::NamedSymbol(name) => name == target,
|
||||||
|
Rule::Choice(rules) | Rule::Seq(rules) => {
|
||||||
|
rules.iter().any(|r| rule_is_referenced(r, target))
|
||||||
|
}
|
||||||
|
Rule::Metadata { rule, .. } => rule_is_referenced(rule, target),
|
||||||
|
Rule::Repeat(inner) => rule_is_referenced(inner, target),
|
||||||
|
Rule::Blank | Rule::String(_) | Rule::Pattern(_, _) | Rule::Symbol(_) => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn variable_is_used(
|
||||||
|
grammar_rules: &[(String, Rule)],
|
||||||
|
other_rules: (&[Rule], &[Rule]),
|
||||||
|
target_name: &str,
|
||||||
|
in_progress: &mut HashSet<String>,
|
||||||
|
) -> bool {
|
||||||
|
let root = &grammar_rules.first().unwrap().0;
|
||||||
|
if target_name == root {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if other_rules
|
||||||
|
.0
|
||||||
|
.iter()
|
||||||
|
.chain(other_rules.1.iter())
|
||||||
|
.any(|rule| rule_is_referenced(rule, target_name))
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
in_progress.insert(target_name.to_string());
|
||||||
|
let result = grammar_rules
|
||||||
|
.iter()
|
||||||
|
.filter(|(key, _)| *key != target_name)
|
||||||
|
.any(|(name, rule)| {
|
||||||
|
if !rule_is_referenced(rule, target_name) || in_progress.contains(name) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
variable_is_used(grammar_rules, other_rules, name, in_progress)
|
||||||
|
});
|
||||||
|
in_progress.remove(target_name);
|
||||||
|
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
|
||||||
|
let mut grammar_json = serde_json::from_str::<GrammarJSON>(input)?;
|
||||||
|
|
||||||
|
let mut extra_symbols =
|
||||||
|
grammar_json
|
||||||
|
.extras
|
||||||
|
.into_iter()
|
||||||
|
.try_fold(Vec::new(), |mut acc, item| {
|
||||||
|
let rule = parse_rule(item);
|
||||||
|
if let Rule::String(ref value) = rule {
|
||||||
|
if value.is_empty() {
|
||||||
|
return Err(anyhow!(
|
||||||
|
"Rules in the `extras` array must not contain empty strings"
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
acc.push(rule);
|
||||||
|
Ok(acc)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let mut external_tokens = grammar_json
|
||||||
|
.externals
|
||||||
|
.into_iter()
|
||||||
|
.map(parse_rule)
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let mut precedence_orderings = Vec::with_capacity(grammar_json.precedences.len());
|
||||||
|
for list in grammar_json.precedences {
|
||||||
|
let mut ordering = Vec::with_capacity(list.len());
|
||||||
|
for entry in list {
|
||||||
|
ordering.push(match entry {
|
||||||
|
RuleJSON::STRING { value } => PrecedenceEntry::Name(value),
|
||||||
|
RuleJSON::SYMBOL { name } => PrecedenceEntry::Symbol(name),
|
||||||
|
_ => {
|
||||||
|
return Err(anyhow!(
|
||||||
|
"Invalid rule in precedences array. Only strings and symbols are allowed"
|
||||||
|
))
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
precedence_orderings.push(ordering);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut variables = Vec::with_capacity(grammar_json.rules.len());
|
||||||
|
|
||||||
|
let rules = grammar_json
|
||||||
|
.rules
|
||||||
|
.into_iter()
|
||||||
|
.map(|(n, r)| Ok((n, parse_rule(serde_json::from_value(r)?))))
|
||||||
|
.collect::<Result<Vec<_>>>()?;
|
||||||
|
|
||||||
|
let mut in_progress = HashSet::new();
|
||||||
|
|
||||||
|
for (name, rule) in &rules {
|
||||||
|
if !variable_is_used(
|
||||||
|
&rules,
|
||||||
|
(&extra_symbols, &external_tokens),
|
||||||
|
name,
|
||||||
|
&mut in_progress,
|
||||||
|
) && grammar_json.word.as_ref().is_some_and(|w| w != name)
|
||||||
|
{
|
||||||
|
grammar_json.conflicts.retain(|r| !r.contains(name));
|
||||||
|
grammar_json.supertypes.retain(|r| r != name);
|
||||||
|
grammar_json.inline.retain(|r| r != name);
|
||||||
|
extra_symbols.retain(|r| !rule_is_referenced(r, name));
|
||||||
|
external_tokens.retain(|r| !rule_is_referenced(r, name));
|
||||||
|
precedence_orderings.retain(|r| {
|
||||||
|
!r.iter().any(|e| {
|
||||||
|
let PrecedenceEntry::Symbol(s) = e else {
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
s == name
|
||||||
|
})
|
||||||
|
});
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
variables.push(Variable {
|
||||||
|
name: name.clone(),
|
||||||
|
kind: VariableType::Named,
|
||||||
|
rule: rule.clone(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(InputGrammar {
|
||||||
|
name: grammar_json.name,
|
||||||
|
word_token: grammar_json.word,
|
||||||
|
expected_conflicts: grammar_json.conflicts,
|
||||||
|
supertype_symbols: grammar_json.supertypes,
|
||||||
|
variables_to_inline: grammar_json.inline,
|
||||||
|
precedence_orderings,
|
||||||
|
variables,
|
||||||
|
extra_symbols,
|
||||||
|
external_tokens,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_rule(json: RuleJSON) -> Rule {
|
||||||
|
match json {
|
||||||
|
RuleJSON::ALIAS {
|
||||||
|
content,
|
||||||
|
value,
|
||||||
|
named,
|
||||||
|
} => Rule::alias(parse_rule(*content), value, named),
|
||||||
|
RuleJSON::BLANK => Rule::Blank,
|
||||||
|
RuleJSON::STRING { value } => Rule::String(value),
|
||||||
|
RuleJSON::PATTERN { value, flags } => Rule::Pattern(
|
||||||
|
value,
|
||||||
|
flags.map_or(String::new(), |f| {
|
||||||
|
f.matches(|c| {
|
||||||
|
if c == 'i' {
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
// silently ignore unicode flags
|
||||||
|
if c != 'u' && c != 'v' {
|
||||||
|
eprintln!("Warning: unsupported flag {c}");
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}),
|
||||||
|
),
|
||||||
|
RuleJSON::SYMBOL { name } => Rule::NamedSymbol(name),
|
||||||
|
RuleJSON::CHOICE { members } => Rule::choice(members.into_iter().map(parse_rule).collect()),
|
||||||
|
RuleJSON::FIELD { content, name } => Rule::field(name, parse_rule(*content)),
|
||||||
|
RuleJSON::SEQ { members } => Rule::seq(members.into_iter().map(parse_rule).collect()),
|
||||||
|
RuleJSON::REPEAT1 { content } => Rule::repeat(parse_rule(*content)),
|
||||||
|
RuleJSON::REPEAT { content } => {
|
||||||
|
Rule::choice(vec![Rule::repeat(parse_rule(*content)), Rule::Blank])
|
||||||
|
}
|
||||||
|
RuleJSON::PREC { value, content } => Rule::prec(value.into(), parse_rule(*content)),
|
||||||
|
RuleJSON::PREC_LEFT { value, content } => {
|
||||||
|
Rule::prec_left(value.into(), parse_rule(*content))
|
||||||
|
}
|
||||||
|
RuleJSON::PREC_RIGHT { value, content } => {
|
||||||
|
Rule::prec_right(value.into(), parse_rule(*content))
|
||||||
|
}
|
||||||
|
RuleJSON::PREC_DYNAMIC { value, content } => {
|
||||||
|
Rule::prec_dynamic(value, parse_rule(*content))
|
||||||
|
}
|
||||||
|
RuleJSON::TOKEN { content } => Rule::token(parse_rule(*content)),
|
||||||
|
RuleJSON::IMMEDIATE_TOKEN { content } => Rule::immediate_token(parse_rule(*content)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<PrecedenceValueJSON> for Precedence {
|
||||||
|
fn from(val: PrecedenceValueJSON) -> Self {
|
||||||
|
match val {
|
||||||
|
PrecedenceValueJSON::Integer(i) => Self::Integer(i),
|
||||||
|
PrecedenceValueJSON::Name(i) => Self::Name(i),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_grammar() {
|
||||||
|
let grammar = parse_grammar(
|
||||||
|
r#"{
|
||||||
|
"name": "my_lang",
|
||||||
|
"rules": {
|
||||||
|
"file": {
|
||||||
|
"type": "REPEAT1",
|
||||||
|
"content": {
|
||||||
|
"type": "SYMBOL",
|
||||||
|
"name": "statement"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"statement": {
|
||||||
|
"type": "STRING",
|
||||||
|
"value": "foo"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}"#,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(grammar.name, "my_lang");
|
||||||
|
assert_eq!(
|
||||||
|
grammar.variables,
|
||||||
|
vec![
|
||||||
|
Variable {
|
||||||
|
name: "file".to_string(),
|
||||||
|
kind: VariableType::Named,
|
||||||
|
rule: Rule::repeat(Rule::NamedSymbol("statement".to_string()))
|
||||||
|
},
|
||||||
|
Variable {
|
||||||
|
name: "statement".to_string(),
|
||||||
|
kind: VariableType::Named,
|
||||||
|
rule: Rule::String("foo".to_string())
|
||||||
|
},
|
||||||
|
]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
|
use anyhow::{anyhow, Context, Result};
|
||||||
|
use indoc::indoc;
|
||||||
use regex_syntax::{
|
use regex_syntax::{
|
||||||
hir::{Class, Hir, HirKind},
|
hir::{Class, Hir, HirKind},
|
||||||
ParserBuilder,
|
ParserBuilder,
|
||||||
};
|
};
|
||||||
use serde::Serialize;
|
|
||||||
use thiserror::Error;
|
|
||||||
|
|
||||||
use super::ExtractedLexicalGrammar;
|
use super::ExtractedLexicalGrammar;
|
||||||
use crate::{
|
use crate::{
|
||||||
|
|
@ -18,40 +18,6 @@ struct NfaBuilder {
|
||||||
precedence_stack: Vec<i32>,
|
precedence_stack: Vec<i32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type ExpandTokensResult<T> = Result<T, ExpandTokensError>;
|
|
||||||
|
|
||||||
#[derive(Debug, Error, Serialize)]
|
|
||||||
pub enum ExpandTokensError {
|
|
||||||
#[error(
|
|
||||||
"The rule `{0}` matches the empty string.
|
|
||||||
Tree-sitter does not support syntactic rules that match the empty string
|
|
||||||
unless they are used only as the grammar's start rule.
|
|
||||||
"
|
|
||||||
)]
|
|
||||||
EmptyString(String),
|
|
||||||
#[error(transparent)]
|
|
||||||
Processing(ExpandTokensProcessingError),
|
|
||||||
#[error(transparent)]
|
|
||||||
ExpandRule(ExpandRuleError),
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Error, Serialize)]
|
|
||||||
pub struct ExpandTokensProcessingError {
|
|
||||||
rule: String,
|
|
||||||
error: ExpandRuleError,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Display for ExpandTokensProcessingError {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
writeln!(
|
|
||||||
f,
|
|
||||||
"Error processing rule {}: Grammar error: Unexpected rule {:?}",
|
|
||||||
self.rule, self.error
|
|
||||||
)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_implicit_precedence(rule: &Rule) -> i32 {
|
fn get_implicit_precedence(rule: &Rule) -> i32 {
|
||||||
match rule {
|
match rule {
|
||||||
Rule::String(_) => 2,
|
Rule::String(_) => 2,
|
||||||
|
|
@ -75,7 +41,7 @@ const fn get_completion_precedence(rule: &Rule) -> i32 {
|
||||||
0
|
0
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> ExpandTokensResult<LexicalGrammar> {
|
pub fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<LexicalGrammar> {
|
||||||
let mut builder = NfaBuilder {
|
let mut builder = NfaBuilder {
|
||||||
nfa: Nfa::new(),
|
nfa: Nfa::new(),
|
||||||
is_sep: true,
|
is_sep: true,
|
||||||
|
|
@ -89,10 +55,17 @@ pub fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> ExpandTokensResult
|
||||||
Rule::repeat(Rule::choice(grammar.separators))
|
Rule::repeat(Rule::choice(grammar.separators))
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut variables = Vec::with_capacity(grammar.variables.len());
|
let mut variables = Vec::new();
|
||||||
for (i, variable) in grammar.variables.into_iter().enumerate() {
|
for (i, variable) in grammar.variables.into_iter().enumerate() {
|
||||||
if variable.rule.is_empty() {
|
if variable.rule.is_empty() {
|
||||||
Err(ExpandTokensError::EmptyString(variable.name.clone()))?;
|
return Err(anyhow!(
|
||||||
|
indoc! {"
|
||||||
|
The rule `{}` matches the empty string.
|
||||||
|
Tree-sitter does not support syntactic rules that match the empty string
|
||||||
|
unless they are used only as the grammar's start rule.
|
||||||
|
"},
|
||||||
|
variable.name
|
||||||
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
let is_immediate_token = match &variable.rule {
|
let is_immediate_token = match &variable.rule {
|
||||||
|
|
@ -108,19 +81,12 @@ pub fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> ExpandTokensResult
|
||||||
let last_state_id = builder.nfa.last_state_id();
|
let last_state_id = builder.nfa.last_state_id();
|
||||||
builder
|
builder
|
||||||
.expand_rule(&variable.rule, last_state_id)
|
.expand_rule(&variable.rule, last_state_id)
|
||||||
.map_err(|e| {
|
.with_context(|| format!("Error processing rule {}", variable.name))?;
|
||||||
ExpandTokensError::Processing(ExpandTokensProcessingError {
|
|
||||||
rule: variable.name.clone(),
|
|
||||||
error: e,
|
|
||||||
})
|
|
||||||
})?;
|
|
||||||
|
|
||||||
if !is_immediate_token {
|
if !is_immediate_token {
|
||||||
builder.is_sep = true;
|
builder.is_sep = true;
|
||||||
let last_state_id = builder.nfa.last_state_id();
|
let last_state_id = builder.nfa.last_state_id();
|
||||||
builder
|
builder.expand_rule(&separator_rule, last_state_id)?;
|
||||||
.expand_rule(&separator_rule, last_state_id)
|
|
||||||
.map_err(ExpandTokensError::ExpandRule)?;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
variables.push(LexicalVariable {
|
variables.push(LexicalVariable {
|
||||||
|
|
@ -137,30 +103,8 @@ pub fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> ExpandTokensResult
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type ExpandRuleResult<T> = Result<T, ExpandRuleError>;
|
|
||||||
|
|
||||||
#[derive(Debug, Error, Serialize)]
|
|
||||||
pub enum ExpandRuleError {
|
|
||||||
#[error("Grammar error: Unexpected rule {0:?}")]
|
|
||||||
UnexpectedRule(Rule),
|
|
||||||
#[error("{0}")]
|
|
||||||
Parse(String),
|
|
||||||
#[error(transparent)]
|
|
||||||
ExpandRegex(ExpandRegexError),
|
|
||||||
}
|
|
||||||
|
|
||||||
pub type ExpandRegexResult<T> = Result<T, ExpandRegexError>;
|
|
||||||
|
|
||||||
#[derive(Debug, Error, Serialize)]
|
|
||||||
pub enum ExpandRegexError {
|
|
||||||
#[error("{0}")]
|
|
||||||
Utf8(String),
|
|
||||||
#[error("Regex error: Assertions are not supported")]
|
|
||||||
Assertion,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl NfaBuilder {
|
impl NfaBuilder {
|
||||||
fn expand_rule(&mut self, rule: &Rule, mut next_state_id: u32) -> ExpandRuleResult<bool> {
|
fn expand_rule(&mut self, rule: &Rule, mut next_state_id: u32) -> Result<bool> {
|
||||||
match rule {
|
match rule {
|
||||||
Rule::Pattern(s, f) => {
|
Rule::Pattern(s, f) => {
|
||||||
// With unicode enabled, `\w`, `\s` and `\d` expand to character sets that are much
|
// With unicode enabled, `\w`, `\s` and `\d` expand to character sets that are much
|
||||||
|
|
@ -180,21 +124,18 @@ impl NfaBuilder {
|
||||||
.unicode(true)
|
.unicode(true)
|
||||||
.utf8(false)
|
.utf8(false)
|
||||||
.build();
|
.build();
|
||||||
let hir = parser
|
let hir = parser.parse(&s)?;
|
||||||
.parse(&s)
|
|
||||||
.map_err(|e| ExpandRuleError::Parse(e.to_string()))?;
|
|
||||||
self.expand_regex(&hir, next_state_id)
|
self.expand_regex(&hir, next_state_id)
|
||||||
.map_err(ExpandRuleError::ExpandRegex)
|
|
||||||
}
|
}
|
||||||
Rule::String(s) => {
|
Rule::String(s) => {
|
||||||
for c in s.chars().rev() {
|
for c in s.chars().rev() {
|
||||||
self.push_advance(CharacterSet::from_char(c), next_state_id);
|
self.push_advance(CharacterSet::empty().add_char(c), next_state_id);
|
||||||
next_state_id = self.nfa.last_state_id();
|
next_state_id = self.nfa.last_state_id();
|
||||||
}
|
}
|
||||||
Ok(!s.is_empty())
|
Ok(!s.is_empty())
|
||||||
}
|
}
|
||||||
Rule::Choice(elements) => {
|
Rule::Choice(elements) => {
|
||||||
let mut alternative_state_ids = Vec::with_capacity(elements.len());
|
let mut alternative_state_ids = Vec::new();
|
||||||
for element in elements {
|
for element in elements {
|
||||||
if self.expand_rule(element, next_state_id)? {
|
if self.expand_rule(element, next_state_id)? {
|
||||||
alternative_state_ids.push(self.nfa.last_state_id());
|
alternative_state_ids.push(self.nfa.last_state_id());
|
||||||
|
|
@ -248,19 +189,15 @@ impl NfaBuilder {
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
Rule::Blank => Ok(false),
|
Rule::Blank => Ok(false),
|
||||||
_ => Err(ExpandRuleError::UnexpectedRule(rule.clone()))?,
|
_ => Err(anyhow!("Grammar error: Unexpected rule {rule:?}")),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn expand_regex(&mut self, hir: &Hir, mut next_state_id: u32) -> ExpandRegexResult<bool> {
|
fn expand_regex(&mut self, hir: &Hir, mut next_state_id: u32) -> Result<bool> {
|
||||||
match hir.kind() {
|
match hir.kind() {
|
||||||
HirKind::Empty => Ok(false),
|
HirKind::Empty => Ok(false),
|
||||||
HirKind::Literal(literal) => {
|
HirKind::Literal(literal) => {
|
||||||
for character in std::str::from_utf8(&literal.0)
|
for character in std::str::from_utf8(&literal.0)?.chars().rev() {
|
||||||
.map_err(|e| ExpandRegexError::Utf8(e.to_string()))?
|
|
||||||
.chars()
|
|
||||||
.rev()
|
|
||||||
{
|
|
||||||
let char_set = CharacterSet::from_char(character);
|
let char_set = CharacterSet::from_char(character);
|
||||||
self.push_advance(char_set, next_state_id);
|
self.push_advance(char_set, next_state_id);
|
||||||
next_state_id = self.nfa.last_state_id();
|
next_state_id = self.nfa.last_state_id();
|
||||||
|
|
@ -297,7 +234,7 @@ impl NfaBuilder {
|
||||||
Ok(true)
|
Ok(true)
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
HirKind::Look(_) => Err(ExpandRegexError::Assertion)?,
|
HirKind::Look(_) => Err(anyhow!("Regex error: Assertions are not supported")),
|
||||||
HirKind::Repetition(repetition) => match (repetition.min, repetition.max) {
|
HirKind::Repetition(repetition) => match (repetition.min, repetition.max) {
|
||||||
(0, Some(1)) => self.expand_zero_or_one(&repetition.sub, next_state_id),
|
(0, Some(1)) => self.expand_zero_or_one(&repetition.sub, next_state_id),
|
||||||
(1, None) => self.expand_one_or_more(&repetition.sub, next_state_id),
|
(1, None) => self.expand_one_or_more(&repetition.sub, next_state_id),
|
||||||
|
|
@ -337,7 +274,7 @@ impl NfaBuilder {
|
||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
HirKind::Alternation(alternations) => {
|
HirKind::Alternation(alternations) => {
|
||||||
let mut alternative_state_ids = Vec::with_capacity(alternations.len());
|
let mut alternative_state_ids = Vec::new();
|
||||||
for hir in alternations {
|
for hir in alternations {
|
||||||
if self.expand_regex(hir, next_state_id)? {
|
if self.expand_regex(hir, next_state_id)? {
|
||||||
alternative_state_ids.push(self.nfa.last_state_id());
|
alternative_state_ids.push(self.nfa.last_state_id());
|
||||||
|
|
@ -356,7 +293,7 @@ impl NfaBuilder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn expand_one_or_more(&mut self, hir: &Hir, next_state_id: u32) -> ExpandRegexResult<bool> {
|
fn expand_one_or_more(&mut self, hir: &Hir, next_state_id: u32) -> Result<bool> {
|
||||||
self.nfa.states.push(NfaState::Accept {
|
self.nfa.states.push(NfaState::Accept {
|
||||||
variable_index: 0,
|
variable_index: 0,
|
||||||
precedence: 0,
|
precedence: 0,
|
||||||
|
|
@ -372,7 +309,7 @@ impl NfaBuilder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn expand_zero_or_one(&mut self, hir: &Hir, next_state_id: u32) -> ExpandRegexResult<bool> {
|
fn expand_zero_or_one(&mut self, hir: &Hir, next_state_id: u32) -> Result<bool> {
|
||||||
if self.expand_regex(hir, next_state_id)? {
|
if self.expand_regex(hir, next_state_id)? {
|
||||||
self.push_split(next_state_id);
|
self.push_split(next_state_id);
|
||||||
Ok(true)
|
Ok(true)
|
||||||
|
|
@ -381,7 +318,7 @@ impl NfaBuilder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn expand_zero_or_more(&mut self, hir: &Hir, next_state_id: u32) -> ExpandRegexResult<bool> {
|
fn expand_zero_or_more(&mut self, hir: &Hir, next_state_id: u32) -> Result<bool> {
|
||||||
if self.expand_one_or_more(hir, next_state_id)? {
|
if self.expand_one_or_more(hir, next_state_id)? {
|
||||||
self.push_split(next_state_id);
|
self.push_split(next_state_id);
|
||||||
Ok(true)
|
Ok(true)
|
||||||
|
|
@ -390,12 +327,7 @@ impl NfaBuilder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn expand_count(
|
fn expand_count(&mut self, hir: &Hir, count: u32, mut next_state_id: u32) -> Result<bool> {
|
||||||
&mut self,
|
|
||||||
hir: &Hir,
|
|
||||||
count: u32,
|
|
||||||
mut next_state_id: u32,
|
|
||||||
) -> ExpandRegexResult<bool> {
|
|
||||||
let mut result = false;
|
let mut result = false;
|
||||||
for _ in 0..count {
|
for _ in 0..count {
|
||||||
if self.expand_regex(hir, next_state_id)? {
|
if self.expand_regex(hir, next_state_id)? {
|
||||||
|
|
@ -69,7 +69,9 @@ pub(super) fn extract_default_aliases(
|
||||||
SymbolType::External => &mut external_status_list[symbol.index],
|
SymbolType::External => &mut external_status_list[symbol.index],
|
||||||
SymbolType::NonTerminal => &mut non_terminal_status_list[symbol.index],
|
SymbolType::NonTerminal => &mut non_terminal_status_list[symbol.index],
|
||||||
SymbolType::Terminal => &mut terminal_status_list[symbol.index],
|
SymbolType::Terminal => &mut terminal_status_list[symbol.index],
|
||||||
SymbolType::End | SymbolType::EndOfNonTerminalExtra => panic!("Unexpected end token"),
|
SymbolType::End | SymbolType::EndOfNonTerminalExtra => {
|
||||||
|
panic!("Unexpected end token")
|
||||||
|
}
|
||||||
};
|
};
|
||||||
status.appears_unaliased = true;
|
status.appears_unaliased = true;
|
||||||
}
|
}
|
||||||
|
|
@ -1,63 +1,16 @@
|
||||||
use std::collections::HashMap;
|
use std::{collections::HashMap, mem};
|
||||||
|
|
||||||
use serde::Serialize;
|
use anyhow::{anyhow, Result};
|
||||||
use thiserror::Error;
|
|
||||||
|
|
||||||
use super::{ExtractedLexicalGrammar, ExtractedSyntaxGrammar, InternedGrammar};
|
use super::{ExtractedLexicalGrammar, ExtractedSyntaxGrammar, InternedGrammar};
|
||||||
use crate::{
|
use crate::{
|
||||||
grammars::{ExternalToken, ReservedWordContext, Variable, VariableType},
|
grammars::{ExternalToken, Variable, VariableType},
|
||||||
rules::{MetadataParams, Rule, Symbol, SymbolType},
|
rules::{MetadataParams, Rule, Symbol, SymbolType},
|
||||||
};
|
};
|
||||||
|
|
||||||
pub type ExtractTokensResult<T> = Result<T, ExtractTokensError>;
|
|
||||||
|
|
||||||
#[derive(Debug, Error, Serialize)]
|
|
||||||
pub enum ExtractTokensError {
|
|
||||||
#[error(
|
|
||||||
"The rule `{0}` contains an empty string.
|
|
||||||
|
|
||||||
Tree-sitter does not support syntactic rules that contain an empty string
|
|
||||||
unless they are used only as the grammar's start rule.
|
|
||||||
"
|
|
||||||
)]
|
|
||||||
EmptyString(String),
|
|
||||||
#[error("Rule '{0}' cannot be used as both an external token and a non-terminal rule")]
|
|
||||||
ExternalTokenNonTerminal(String),
|
|
||||||
#[error("Non-symbol rules cannot be used as external tokens")]
|
|
||||||
NonSymbolExternalToken,
|
|
||||||
#[error(transparent)]
|
|
||||||
WordToken(NonTerminalWordTokenError),
|
|
||||||
#[error("Reserved word '{0}' must be a token")]
|
|
||||||
NonTokenReservedWord(String),
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Error, Serialize)]
|
|
||||||
pub struct NonTerminalWordTokenError {
|
|
||||||
pub symbol_name: String,
|
|
||||||
pub conflicting_symbol_name: Option<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Display for NonTerminalWordTokenError {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
write!(
|
|
||||||
f,
|
|
||||||
"Non-terminal symbol '{}' cannot be used as the word token",
|
|
||||||
self.symbol_name
|
|
||||||
)?;
|
|
||||||
if let Some(conflicting_name) = &self.conflicting_symbol_name {
|
|
||||||
writeln!(
|
|
||||||
f,
|
|
||||||
", because its rule is duplicated in '{conflicting_name}'",
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
writeln!(f)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(super) fn extract_tokens(
|
pub(super) fn extract_tokens(
|
||||||
mut grammar: InternedGrammar,
|
mut grammar: InternedGrammar,
|
||||||
) -> ExtractTokensResult<(ExtractedSyntaxGrammar, ExtractedLexicalGrammar)> {
|
) -> Result<(ExtractedSyntaxGrammar, ExtractedLexicalGrammar)> {
|
||||||
let mut extractor = TokenExtractor {
|
let mut extractor = TokenExtractor {
|
||||||
current_variable_name: String::new(),
|
current_variable_name: String::new(),
|
||||||
current_variable_token_count: 0,
|
current_variable_token_count: 0,
|
||||||
|
|
@ -85,7 +38,7 @@ pub(super) fn extract_tokens(
|
||||||
// that pointed to that variable will need to be updated to point to the
|
// that pointed to that variable will need to be updated to point to the
|
||||||
// variable in the lexical grammar. Symbols that pointed to later variables
|
// variable in the lexical grammar. Symbols that pointed to later variables
|
||||||
// will need to have their indices decremented.
|
// will need to have their indices decremented.
|
||||||
let mut variables = Vec::with_capacity(grammar.variables.len());
|
let mut variables = Vec::new();
|
||||||
let mut symbol_replacer = SymbolReplacer {
|
let mut symbol_replacer = SymbolReplacer {
|
||||||
replacements: HashMap::new(),
|
replacements: HashMap::new(),
|
||||||
};
|
};
|
||||||
|
|
@ -152,14 +105,15 @@ pub(super) fn extract_tokens(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut external_tokens = Vec::with_capacity(grammar.external_tokens.len());
|
let mut external_tokens = Vec::new();
|
||||||
for external_token in grammar.external_tokens {
|
for external_token in grammar.external_tokens {
|
||||||
let rule = symbol_replacer.replace_symbols_in_rule(&external_token.rule);
|
let rule = symbol_replacer.replace_symbols_in_rule(&external_token.rule);
|
||||||
if let Rule::Symbol(symbol) = rule {
|
if let Rule::Symbol(symbol) = rule {
|
||||||
if symbol.is_non_terminal() {
|
if symbol.is_non_terminal() {
|
||||||
Err(ExtractTokensError::ExternalTokenNonTerminal(
|
return Err(anyhow!(
|
||||||
variables[symbol.index].name.clone(),
|
"Rule '{}' cannot be used as both an external token and a non-terminal rule",
|
||||||
))?;
|
&variables[symbol.index].name,
|
||||||
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
if symbol.is_external() {
|
if symbol.is_external() {
|
||||||
|
|
@ -176,59 +130,22 @@ pub(super) fn extract_tokens(
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
Err(ExtractTokensError::NonSymbolExternalToken)?;
|
return Err(anyhow!(
|
||||||
|
"Non-symbol rules cannot be used as external tokens"
|
||||||
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let word_token = if let Some(token) = grammar.word_token {
|
let mut word_token = None;
|
||||||
|
if let Some(token) = grammar.word_token {
|
||||||
let token = symbol_replacer.replace_symbol(token);
|
let token = symbol_replacer.replace_symbol(token);
|
||||||
if token.is_non_terminal() {
|
if token.is_non_terminal() {
|
||||||
let word_token_variable = &variables[token.index];
|
return Err(anyhow!(
|
||||||
let conflicting_symbol_name = variables
|
"Non-terminal symbol '{}' cannot be used as the word token",
|
||||||
.iter()
|
&variables[token.index].name
|
||||||
.enumerate()
|
));
|
||||||
.find(|(i, v)| *i != token.index && v.rule == word_token_variable.rule)
|
|
||||||
.map(|(_, v)| v.name.clone());
|
|
||||||
|
|
||||||
Err(ExtractTokensError::WordToken(NonTerminalWordTokenError {
|
|
||||||
symbol_name: word_token_variable.name.clone(),
|
|
||||||
conflicting_symbol_name,
|
|
||||||
}))?;
|
|
||||||
}
|
}
|
||||||
Some(token)
|
word_token = Some(token);
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut reserved_word_contexts = Vec::with_capacity(grammar.reserved_word_sets.len());
|
|
||||||
for reserved_word_context in grammar.reserved_word_sets {
|
|
||||||
let mut reserved_words = Vec::with_capacity(reserved_word_contexts.len());
|
|
||||||
for reserved_rule in reserved_word_context.reserved_words {
|
|
||||||
if let Rule::Symbol(symbol) = reserved_rule {
|
|
||||||
reserved_words.push(symbol_replacer.replace_symbol(symbol));
|
|
||||||
} else if let Some(index) = lexical_variables
|
|
||||||
.iter()
|
|
||||||
.position(|v| v.rule == reserved_rule)
|
|
||||||
{
|
|
||||||
reserved_words.push(Symbol::terminal(index));
|
|
||||||
} else {
|
|
||||||
let rule = if let Rule::Metadata { rule, .. } = &reserved_rule {
|
|
||||||
rule.as_ref()
|
|
||||||
} else {
|
|
||||||
&reserved_rule
|
|
||||||
};
|
|
||||||
let token_name = match rule {
|
|
||||||
Rule::String(s) => s.clone(),
|
|
||||||
Rule::Pattern(p, _) => p.clone(),
|
|
||||||
_ => "unknown".to_string(),
|
|
||||||
};
|
|
||||||
Err(ExtractTokensError::NonTokenReservedWord(token_name))?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
reserved_word_contexts.push(ReservedWordContext {
|
|
||||||
name: reserved_word_context.name,
|
|
||||||
reserved_words,
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok((
|
Ok((
|
||||||
|
|
@ -241,7 +158,6 @@ pub(super) fn extract_tokens(
|
||||||
external_tokens,
|
external_tokens,
|
||||||
word_token,
|
word_token,
|
||||||
precedence_orderings: grammar.precedence_orderings,
|
precedence_orderings: grammar.precedence_orderings,
|
||||||
reserved_word_sets: reserved_word_contexts,
|
|
||||||
},
|
},
|
||||||
ExtractedLexicalGrammar {
|
ExtractedLexicalGrammar {
|
||||||
variables: lexical_variables,
|
variables: lexical_variables,
|
||||||
|
|
@ -267,16 +183,18 @@ impl TokenExtractor {
|
||||||
&mut self,
|
&mut self,
|
||||||
is_first: bool,
|
is_first: bool,
|
||||||
variable: &mut Variable,
|
variable: &mut Variable,
|
||||||
) -> ExtractTokensResult<()> {
|
) -> Result<()> {
|
||||||
self.current_variable_name.clear();
|
self.current_variable_name.clear();
|
||||||
self.current_variable_name.push_str(&variable.name);
|
self.current_variable_name.push_str(&variable.name);
|
||||||
self.current_variable_token_count = 0;
|
self.current_variable_token_count = 0;
|
||||||
self.is_first_rule = is_first;
|
self.is_first_rule = is_first;
|
||||||
variable.rule = self.extract_tokens_in_rule(&variable.rule)?;
|
let mut rule = Rule::Blank;
|
||||||
|
mem::swap(&mut rule, &mut variable.rule);
|
||||||
|
variable.rule = self.extract_tokens_in_rule(&rule)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn extract_tokens_in_rule(&mut self, input: &Rule) -> ExtractTokensResult<Rule> {
|
fn extract_tokens_in_rule(&mut self, input: &Rule) -> Result<Rule> {
|
||||||
match input {
|
match input {
|
||||||
Rule::String(name) => Ok(self.extract_token(input, Some(name))?.into()),
|
Rule::String(name) => Ok(self.extract_token(input, Some(name))?.into()),
|
||||||
Rule::Pattern(..) => Ok(self.extract_token(input, None)?.into()),
|
Rule::Pattern(..) => Ok(self.extract_token(input, None)?.into()),
|
||||||
|
|
@ -285,11 +203,10 @@ impl TokenExtractor {
|
||||||
let mut params = params.clone();
|
let mut params = params.clone();
|
||||||
params.is_token = false;
|
params.is_token = false;
|
||||||
|
|
||||||
let string_value = if let Rule::String(value) = rule.as_ref() {
|
let mut string_value = None;
|
||||||
Some(value)
|
if let Rule::String(value) = rule.as_ref() {
|
||||||
} else {
|
string_value = Some(value);
|
||||||
None
|
}
|
||||||
};
|
|
||||||
|
|
||||||
let rule_to_extract = if params == MetadataParams::default() {
|
let rule_to_extract = if params == MetadataParams::default() {
|
||||||
rule.as_ref()
|
rule.as_ref()
|
||||||
|
|
@ -312,27 +229,19 @@ impl TokenExtractor {
|
||||||
elements
|
elements
|
||||||
.iter()
|
.iter()
|
||||||
.map(|e| self.extract_tokens_in_rule(e))
|
.map(|e| self.extract_tokens_in_rule(e))
|
||||||
.collect::<ExtractTokensResult<Vec<_>>>()?,
|
.collect::<Result<Vec<_>>>()?,
|
||||||
)),
|
)),
|
||||||
Rule::Choice(elements) => Ok(Rule::Choice(
|
Rule::Choice(elements) => Ok(Rule::Choice(
|
||||||
elements
|
elements
|
||||||
.iter()
|
.iter()
|
||||||
.map(|e| self.extract_tokens_in_rule(e))
|
.map(|e| self.extract_tokens_in_rule(e))
|
||||||
.collect::<ExtractTokensResult<Vec<_>>>()?,
|
.collect::<Result<Vec<_>>>()?,
|
||||||
)),
|
)),
|
||||||
Rule::Reserved { rule, context_name } => Ok(Rule::Reserved {
|
|
||||||
rule: Box::new(self.extract_tokens_in_rule(rule)?),
|
|
||||||
context_name: context_name.clone(),
|
|
||||||
}),
|
|
||||||
_ => Ok(input.clone()),
|
_ => Ok(input.clone()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn extract_token(
|
fn extract_token(&mut self, rule: &Rule, string_value: Option<&String>) -> Result<Symbol> {
|
||||||
&mut self,
|
|
||||||
rule: &Rule,
|
|
||||||
string_value: Option<&String>,
|
|
||||||
) -> ExtractTokensResult<Symbol> {
|
|
||||||
for (i, variable) in self.extracted_variables.iter_mut().enumerate() {
|
for (i, variable) in self.extracted_variables.iter_mut().enumerate() {
|
||||||
if variable.rule == *rule {
|
if variable.rule == *rule {
|
||||||
self.extracted_usage_counts[i] += 1;
|
self.extracted_usage_counts[i] += 1;
|
||||||
|
|
@ -343,9 +252,14 @@ impl TokenExtractor {
|
||||||
let index = self.extracted_variables.len();
|
let index = self.extracted_variables.len();
|
||||||
let variable = if let Some(string_value) = string_value {
|
let variable = if let Some(string_value) = string_value {
|
||||||
if string_value.is_empty() && !self.is_first_rule {
|
if string_value.is_empty() && !self.is_first_rule {
|
||||||
Err(ExtractTokensError::EmptyString(
|
return Err(anyhow!(
|
||||||
self.current_variable_name.clone(),
|
"The rule `{}` contains an empty string.
|
||||||
))?;
|
|
||||||
|
Tree-sitter does not support syntactic rules that contain an empty string
|
||||||
|
unless they are used only as the grammar's start rule.
|
||||||
|
",
|
||||||
|
self.current_variable_name
|
||||||
|
));
|
||||||
}
|
}
|
||||||
Variable {
|
Variable {
|
||||||
name: string_value.clone(),
|
name: string_value.clone(),
|
||||||
|
|
@ -357,7 +271,7 @@ impl TokenExtractor {
|
||||||
Variable {
|
Variable {
|
||||||
name: format!(
|
name: format!(
|
||||||
"{}_token{}",
|
"{}_token{}",
|
||||||
self.current_variable_name, self.current_variable_token_count
|
&self.current_variable_name, self.current_variable_token_count
|
||||||
),
|
),
|
||||||
kind: VariableType::Auxiliary,
|
kind: VariableType::Auxiliary,
|
||||||
rule: rule.clone(),
|
rule: rule.clone(),
|
||||||
|
|
@ -391,10 +305,6 @@ impl SymbolReplacer {
|
||||||
params: params.clone(),
|
params: params.clone(),
|
||||||
rule: Box::new(self.replace_symbols_in_rule(rule)),
|
rule: Box::new(self.replace_symbols_in_rule(rule)),
|
||||||
},
|
},
|
||||||
Rule::Reserved { rule, context_name } => Rule::Reserved {
|
|
||||||
rule: Box::new(self.replace_symbols_in_rule(rule)),
|
|
||||||
context_name: context_name.clone(),
|
|
||||||
},
|
|
||||||
_ => rule.clone(),
|
_ => rule.clone(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -590,13 +500,14 @@ mod test {
|
||||||
]);
|
]);
|
||||||
grammar.external_tokens = vec![Variable::named("rule_1", Rule::non_terminal(1))];
|
grammar.external_tokens = vec![Variable::named("rule_1", Rule::non_terminal(1))];
|
||||||
|
|
||||||
let result = extract_tokens(grammar);
|
match extract_tokens(grammar) {
|
||||||
assert!(result.is_err(), "Expected an error but got no error");
|
Err(e) => {
|
||||||
let err = result.err().unwrap();
|
assert_eq!(e.to_string(), "Rule 'rule_1' cannot be used as both an external token and a non-terminal rule");
|
||||||
assert_eq!(
|
}
|
||||||
err.to_string(),
|
_ => {
|
||||||
"Rule 'rule_1' cannot be used as both an external token and a non-terminal rule"
|
panic!("Expected an error but got no error");
|
||||||
);
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
@ -1,96 +1,48 @@
|
||||||
use std::collections::HashMap;
|
use anyhow::{anyhow, Result};
|
||||||
|
use indoc::indoc;
|
||||||
use serde::Serialize;
|
|
||||||
use thiserror::Error;
|
|
||||||
|
|
||||||
use super::ExtractedSyntaxGrammar;
|
use super::ExtractedSyntaxGrammar;
|
||||||
use crate::{
|
use crate::{
|
||||||
grammars::{
|
grammars::{Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable},
|
||||||
Production, ProductionStep, ReservedWordSetId, SyntaxGrammar, SyntaxVariable, Variable,
|
rules::{Alias, Associativity, Precedence, Rule, Symbol},
|
||||||
},
|
|
||||||
rules::{Alias, Associativity, Precedence, Rule, Symbol, TokenSet},
|
|
||||||
};
|
};
|
||||||
|
|
||||||
pub type FlattenGrammarResult<T> = Result<T, FlattenGrammarError>;
|
|
||||||
|
|
||||||
#[derive(Debug, Error, Serialize)]
|
|
||||||
pub enum FlattenGrammarError {
|
|
||||||
#[error("No such reserved word set: {0}")]
|
|
||||||
NoReservedWordSet(String),
|
|
||||||
#[error(
|
|
||||||
"The rule `{0}` matches the empty string.
|
|
||||||
|
|
||||||
Tree-sitter does not support syntactic rules that match the empty string
|
|
||||||
unless they are used only as the grammar's start rule.
|
|
||||||
"
|
|
||||||
)]
|
|
||||||
EmptyString(String),
|
|
||||||
#[error("Rule `{0}` cannot be inlined because it contains a reference to itself")]
|
|
||||||
RecursiveInline(String),
|
|
||||||
}
|
|
||||||
|
|
||||||
struct RuleFlattener {
|
struct RuleFlattener {
|
||||||
production: Production,
|
production: Production,
|
||||||
reserved_word_set_ids: HashMap<String, ReservedWordSetId>,
|
|
||||||
precedence_stack: Vec<Precedence>,
|
precedence_stack: Vec<Precedence>,
|
||||||
associativity_stack: Vec<Associativity>,
|
associativity_stack: Vec<Associativity>,
|
||||||
reserved_word_stack: Vec<ReservedWordSetId>,
|
|
||||||
alias_stack: Vec<Alias>,
|
alias_stack: Vec<Alias>,
|
||||||
field_name_stack: Vec<String>,
|
field_name_stack: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RuleFlattener {
|
impl RuleFlattener {
|
||||||
const fn new(reserved_word_set_ids: HashMap<String, ReservedWordSetId>) -> Self {
|
const fn new() -> Self {
|
||||||
Self {
|
Self {
|
||||||
production: Production {
|
production: Production {
|
||||||
steps: Vec::new(),
|
steps: Vec::new(),
|
||||||
dynamic_precedence: 0,
|
dynamic_precedence: 0,
|
||||||
},
|
},
|
||||||
reserved_word_set_ids,
|
|
||||||
precedence_stack: Vec::new(),
|
precedence_stack: Vec::new(),
|
||||||
associativity_stack: Vec::new(),
|
associativity_stack: Vec::new(),
|
||||||
reserved_word_stack: Vec::new(),
|
|
||||||
alias_stack: Vec::new(),
|
alias_stack: Vec::new(),
|
||||||
field_name_stack: Vec::new(),
|
field_name_stack: Vec::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn flatten_variable(&mut self, variable: Variable) -> FlattenGrammarResult<SyntaxVariable> {
|
fn flatten(mut self, rule: Rule) -> Production {
|
||||||
let choices = extract_choices(variable.rule);
|
self.apply(rule, true);
|
||||||
let mut productions = Vec::with_capacity(choices.len());
|
self.production
|
||||||
for rule in choices {
|
|
||||||
let production = self.flatten_rule(rule)?;
|
|
||||||
if !productions.contains(&production) {
|
|
||||||
productions.push(production);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(SyntaxVariable {
|
|
||||||
name: variable.name,
|
|
||||||
kind: variable.kind,
|
|
||||||
productions,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn flatten_rule(&mut self, rule: Rule) -> FlattenGrammarResult<Production> {
|
fn apply(&mut self, rule: Rule, at_end: bool) -> bool {
|
||||||
self.production = Production::default();
|
|
||||||
self.alias_stack.clear();
|
|
||||||
self.reserved_word_stack.clear();
|
|
||||||
self.precedence_stack.clear();
|
|
||||||
self.associativity_stack.clear();
|
|
||||||
self.field_name_stack.clear();
|
|
||||||
self.apply(rule, true)?;
|
|
||||||
Ok(self.production.clone())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn apply(&mut self, rule: Rule, at_end: bool) -> FlattenGrammarResult<bool> {
|
|
||||||
match rule {
|
match rule {
|
||||||
Rule::Seq(members) => {
|
Rule::Seq(members) => {
|
||||||
let mut result = false;
|
let mut result = false;
|
||||||
let last_index = members.len() - 1;
|
let last_index = members.len() - 1;
|
||||||
for (i, member) in members.into_iter().enumerate() {
|
for (i, member) in members.into_iter().enumerate() {
|
||||||
result |= self.apply(member, i == last_index && at_end)?;
|
result |= self.apply(member, i == last_index && at_end);
|
||||||
}
|
}
|
||||||
Ok(result)
|
result
|
||||||
}
|
}
|
||||||
Rule::Metadata { rule, params } => {
|
Rule::Metadata { rule, params } => {
|
||||||
let mut has_precedence = false;
|
let mut has_precedence = false;
|
||||||
|
|
@ -121,7 +73,7 @@ impl RuleFlattener {
|
||||||
self.production.dynamic_precedence = params.dynamic_precedence;
|
self.production.dynamic_precedence = params.dynamic_precedence;
|
||||||
}
|
}
|
||||||
|
|
||||||
let did_push = self.apply(*rule, at_end)?;
|
let did_push = self.apply(*rule, at_end);
|
||||||
|
|
||||||
if has_precedence {
|
if has_precedence {
|
||||||
self.precedence_stack.pop();
|
self.precedence_stack.pop();
|
||||||
|
|
@ -150,20 +102,7 @@ impl RuleFlattener {
|
||||||
self.field_name_stack.pop();
|
self.field_name_stack.pop();
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(did_push)
|
did_push
|
||||||
}
|
|
||||||
Rule::Reserved { rule, context_name } => {
|
|
||||||
self.reserved_word_stack.push(
|
|
||||||
self.reserved_word_set_ids
|
|
||||||
.get(&context_name)
|
|
||||||
.copied()
|
|
||||||
.ok_or_else(|| {
|
|
||||||
FlattenGrammarError::NoReservedWordSet(context_name.clone())
|
|
||||||
})?,
|
|
||||||
);
|
|
||||||
let did_push = self.apply(*rule, at_end)?;
|
|
||||||
self.reserved_word_stack.pop();
|
|
||||||
Ok(did_push)
|
|
||||||
}
|
}
|
||||||
Rule::Symbol(symbol) => {
|
Rule::Symbol(symbol) => {
|
||||||
self.production.steps.push(ProductionStep {
|
self.production.steps.push(ProductionStep {
|
||||||
|
|
@ -174,17 +113,12 @@ impl RuleFlattener {
|
||||||
.cloned()
|
.cloned()
|
||||||
.unwrap_or(Precedence::None),
|
.unwrap_or(Precedence::None),
|
||||||
associativity: self.associativity_stack.last().copied(),
|
associativity: self.associativity_stack.last().copied(),
|
||||||
reserved_word_set_id: self
|
|
||||||
.reserved_word_stack
|
|
||||||
.last()
|
|
||||||
.copied()
|
|
||||||
.unwrap_or(ReservedWordSetId::default()),
|
|
||||||
alias: self.alias_stack.last().cloned(),
|
alias: self.alias_stack.last().cloned(),
|
||||||
field_name: self.field_name_stack.last().cloned(),
|
field_name: self.field_name_stack.last().cloned(),
|
||||||
});
|
});
|
||||||
Ok(true)
|
true
|
||||||
}
|
}
|
||||||
_ => Ok(false),
|
_ => false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -195,7 +129,7 @@ fn extract_choices(rule: Rule) -> Vec<Rule> {
|
||||||
let mut result = vec![Rule::Blank];
|
let mut result = vec![Rule::Blank];
|
||||||
for element in elements {
|
for element in elements {
|
||||||
let extraction = extract_choices(element);
|
let extraction = extract_choices(element);
|
||||||
let mut next_result = Vec::with_capacity(result.len());
|
let mut next_result = Vec::new();
|
||||||
for entry in result {
|
for entry in result {
|
||||||
for extraction_entry in &extraction {
|
for extraction_entry in &extraction {
|
||||||
next_result.push(Rule::Seq(vec![entry.clone(), extraction_entry.clone()]));
|
next_result.push(Rule::Seq(vec![entry.clone(), extraction_entry.clone()]));
|
||||||
|
|
@ -206,7 +140,7 @@ fn extract_choices(rule: Rule) -> Vec<Rule> {
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
Rule::Choice(elements) => {
|
Rule::Choice(elements) => {
|
||||||
let mut result = Vec::with_capacity(elements.len());
|
let mut result = Vec::new();
|
||||||
for element in elements {
|
for element in elements {
|
||||||
for rule in extract_choices(element) {
|
for rule in extract_choices(element) {
|
||||||
result.push(rule);
|
result.push(rule);
|
||||||
|
|
@ -221,17 +155,25 @@ fn extract_choices(rule: Rule) -> Vec<Rule> {
|
||||||
params: params.clone(),
|
params: params.clone(),
|
||||||
})
|
})
|
||||||
.collect(),
|
.collect(),
|
||||||
Rule::Reserved { rule, context_name } => extract_choices(*rule)
|
|
||||||
.into_iter()
|
|
||||||
.map(|rule| Rule::Reserved {
|
|
||||||
rule: Box::new(rule),
|
|
||||||
context_name: context_name.clone(),
|
|
||||||
})
|
|
||||||
.collect(),
|
|
||||||
_ => vec![rule],
|
_ => vec![rule],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn flatten_variable(variable: Variable) -> SyntaxVariable {
|
||||||
|
let mut productions = Vec::new();
|
||||||
|
for rule in extract_choices(variable.rule) {
|
||||||
|
let production = RuleFlattener::new().flatten(rule);
|
||||||
|
if !productions.contains(&production) {
|
||||||
|
productions.push(production);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
SyntaxVariable {
|
||||||
|
name: variable.name,
|
||||||
|
kind: variable.kind,
|
||||||
|
productions,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn symbol_is_used(variables: &[SyntaxVariable], symbol: Symbol) -> bool {
|
fn symbol_is_used(variables: &[SyntaxVariable], symbol: Symbol) -> bool {
|
||||||
for variable in variables {
|
for variable in variables {
|
||||||
for production in &variable.productions {
|
for production in &variable.productions {
|
||||||
|
|
@ -245,48 +187,37 @@ fn symbol_is_used(variables: &[SyntaxVariable], symbol: Symbol) -> bool {
|
||||||
false
|
false
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) fn flatten_grammar(
|
pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result<SyntaxGrammar> {
|
||||||
grammar: ExtractedSyntaxGrammar,
|
let mut variables = Vec::new();
|
||||||
) -> FlattenGrammarResult<SyntaxGrammar> {
|
for variable in grammar.variables {
|
||||||
let mut reserved_word_set_ids_by_name = HashMap::new();
|
variables.push(flatten_variable(variable));
|
||||||
for (ix, set) in grammar.reserved_word_sets.iter().enumerate() {
|
|
||||||
reserved_word_set_ids_by_name.insert(set.name.clone(), ReservedWordSetId(ix));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut flattener = RuleFlattener::new(reserved_word_set_ids_by_name);
|
|
||||||
let variables = grammar
|
|
||||||
.variables
|
|
||||||
.into_iter()
|
|
||||||
.map(|variable| flattener.flatten_variable(variable))
|
|
||||||
.collect::<FlattenGrammarResult<Vec<_>>>()?;
|
|
||||||
|
|
||||||
for (i, variable) in variables.iter().enumerate() {
|
for (i, variable) in variables.iter().enumerate() {
|
||||||
let symbol = Symbol::non_terminal(i);
|
let symbol = Symbol::non_terminal(i);
|
||||||
let used = symbol_is_used(&variables, symbol);
|
|
||||||
|
|
||||||
for production in &variable.productions {
|
for production in &variable.productions {
|
||||||
if used && production.steps.is_empty() {
|
if production.steps.is_empty() && symbol_is_used(&variables, symbol) {
|
||||||
Err(FlattenGrammarError::EmptyString(variable.name.clone()))?;
|
return Err(anyhow!(
|
||||||
|
indoc! {"
|
||||||
|
The rule `{}` matches the empty string.
|
||||||
|
|
||||||
|
Tree-sitter does not support syntactic rules that match the empty string
|
||||||
|
unless they are used only as the grammar's start rule.
|
||||||
|
"},
|
||||||
|
variable.name
|
||||||
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
if grammar.variables_to_inline.contains(&symbol)
|
if grammar.variables_to_inline.contains(&symbol)
|
||||||
&& production.steps.iter().any(|step| step.symbol == symbol)
|
&& production.steps.iter().any(|step| step.symbol == symbol)
|
||||||
{
|
{
|
||||||
Err(FlattenGrammarError::RecursiveInline(variable.name.clone()))?;
|
return Err(anyhow!(
|
||||||
|
"Rule `{}` cannot be inlined because it contains a reference to itself.",
|
||||||
|
variable.name,
|
||||||
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let mut reserved_word_sets = grammar
|
|
||||||
.reserved_word_sets
|
|
||||||
.into_iter()
|
|
||||||
.map(|set| set.reserved_words.into_iter().collect())
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
|
|
||||||
// If no default reserved word set is specified, there are no reserved words.
|
|
||||||
if reserved_word_sets.is_empty() {
|
|
||||||
reserved_word_sets.push(TokenSet::default());
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(SyntaxGrammar {
|
Ok(SyntaxGrammar {
|
||||||
extra_symbols: grammar.extra_symbols,
|
extra_symbols: grammar.extra_symbols,
|
||||||
expected_conflicts: grammar.expected_conflicts,
|
expected_conflicts: grammar.expected_conflicts,
|
||||||
|
|
@ -295,7 +226,6 @@ pub(super) fn flatten_grammar(
|
||||||
external_tokens: grammar.external_tokens,
|
external_tokens: grammar.external_tokens,
|
||||||
supertype_symbols: grammar.supertype_symbols,
|
supertype_symbols: grammar.supertype_symbols,
|
||||||
word_token: grammar.word_token,
|
word_token: grammar.word_token,
|
||||||
reserved_word_sets,
|
|
||||||
variables,
|
variables,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
@ -307,31 +237,28 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_flatten_grammar() {
|
fn test_flatten_grammar() {
|
||||||
let mut flattener = RuleFlattener::new(HashMap::default());
|
let result = flatten_variable(Variable {
|
||||||
let result = flattener
|
name: "test".to_string(),
|
||||||
.flatten_variable(Variable {
|
kind: VariableType::Named,
|
||||||
name: "test".to_string(),
|
rule: Rule::seq(vec![
|
||||||
kind: VariableType::Named,
|
Rule::non_terminal(1),
|
||||||
rule: Rule::seq(vec![
|
Rule::prec_left(
|
||||||
Rule::non_terminal(1),
|
Precedence::Integer(101),
|
||||||
Rule::prec_left(
|
Rule::seq(vec![
|
||||||
Precedence::Integer(101),
|
Rule::non_terminal(2),
|
||||||
Rule::seq(vec![
|
Rule::choice(vec![
|
||||||
Rule::non_terminal(2),
|
Rule::prec_right(
|
||||||
Rule::choice(vec![
|
Precedence::Integer(102),
|
||||||
Rule::prec_right(
|
Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
|
||||||
Precedence::Integer(102),
|
),
|
||||||
Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
|
Rule::non_terminal(5),
|
||||||
),
|
|
||||||
Rule::non_terminal(5),
|
|
||||||
]),
|
|
||||||
Rule::non_terminal(6),
|
|
||||||
]),
|
]),
|
||||||
),
|
Rule::non_terminal(6),
|
||||||
Rule::non_terminal(7),
|
]),
|
||||||
]),
|
),
|
||||||
})
|
Rule::non_terminal(7),
|
||||||
.unwrap();
|
]),
|
||||||
|
});
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
result.productions,
|
result.productions,
|
||||||
|
|
@ -368,31 +295,28 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_flatten_grammar_with_maximum_dynamic_precedence() {
|
fn test_flatten_grammar_with_maximum_dynamic_precedence() {
|
||||||
let mut flattener = RuleFlattener::new(HashMap::default());
|
let result = flatten_variable(Variable {
|
||||||
let result = flattener
|
name: "test".to_string(),
|
||||||
.flatten_variable(Variable {
|
kind: VariableType::Named,
|
||||||
name: "test".to_string(),
|
rule: Rule::seq(vec![
|
||||||
kind: VariableType::Named,
|
Rule::non_terminal(1),
|
||||||
rule: Rule::seq(vec![
|
Rule::prec_dynamic(
|
||||||
Rule::non_terminal(1),
|
101,
|
||||||
Rule::prec_dynamic(
|
Rule::seq(vec![
|
||||||
101,
|
Rule::non_terminal(2),
|
||||||
Rule::seq(vec![
|
Rule::choice(vec![
|
||||||
Rule::non_terminal(2),
|
Rule::prec_dynamic(
|
||||||
Rule::choice(vec![
|
102,
|
||||||
Rule::prec_dynamic(
|
Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
|
||||||
102,
|
),
|
||||||
Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
|
Rule::non_terminal(5),
|
||||||
),
|
|
||||||
Rule::non_terminal(5),
|
|
||||||
]),
|
|
||||||
Rule::non_terminal(6),
|
|
||||||
]),
|
]),
|
||||||
),
|
Rule::non_terminal(6),
|
||||||
Rule::non_terminal(7),
|
]),
|
||||||
]),
|
),
|
||||||
})
|
Rule::non_terminal(7),
|
||||||
.unwrap();
|
]),
|
||||||
|
});
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
result.productions,
|
result.productions,
|
||||||
|
|
@ -424,17 +348,14 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_flatten_grammar_with_final_precedence() {
|
fn test_flatten_grammar_with_final_precedence() {
|
||||||
let mut flattener = RuleFlattener::new(HashMap::default());
|
let result = flatten_variable(Variable {
|
||||||
let result = flattener
|
name: "test".to_string(),
|
||||||
.flatten_variable(Variable {
|
kind: VariableType::Named,
|
||||||
name: "test".to_string(),
|
rule: Rule::prec_left(
|
||||||
kind: VariableType::Named,
|
Precedence::Integer(101),
|
||||||
rule: Rule::prec_left(
|
Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(2)]),
|
||||||
Precedence::Integer(101),
|
),
|
||||||
Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(2)]),
|
});
|
||||||
),
|
|
||||||
})
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
result.productions,
|
result.productions,
|
||||||
|
|
@ -449,16 +370,14 @@ mod tests {
|
||||||
}]
|
}]
|
||||||
);
|
);
|
||||||
|
|
||||||
let result = flattener
|
let result = flatten_variable(Variable {
|
||||||
.flatten_variable(Variable {
|
name: "test".to_string(),
|
||||||
name: "test".to_string(),
|
kind: VariableType::Named,
|
||||||
kind: VariableType::Named,
|
rule: Rule::prec_left(
|
||||||
rule: Rule::prec_left(
|
Precedence::Integer(101),
|
||||||
Precedence::Integer(101),
|
Rule::seq(vec![Rule::non_terminal(1)]),
|
||||||
Rule::seq(vec![Rule::non_terminal(1)]),
|
),
|
||||||
),
|
});
|
||||||
})
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
result.productions,
|
result.productions,
|
||||||
|
|
@ -472,21 +391,18 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_flatten_grammar_with_field_names() {
|
fn test_flatten_grammar_with_field_names() {
|
||||||
let mut flattener = RuleFlattener::new(HashMap::default());
|
let result = flatten_variable(Variable {
|
||||||
let result = flattener
|
name: "test".to_string(),
|
||||||
.flatten_variable(Variable {
|
kind: VariableType::Named,
|
||||||
name: "test".to_string(),
|
rule: Rule::seq(vec![
|
||||||
kind: VariableType::Named,
|
Rule::field("first-thing".to_string(), Rule::terminal(1)),
|
||||||
rule: Rule::seq(vec![
|
Rule::terminal(2),
|
||||||
Rule::field("first-thing".to_string(), Rule::terminal(1)),
|
Rule::choice(vec![
|
||||||
Rule::terminal(2),
|
Rule::Blank,
|
||||||
Rule::choice(vec![
|
Rule::field("second-thing".to_string(), Rule::terminal(3)),
|
||||||
Rule::Blank,
|
|
||||||
Rule::field("second-thing".to_string(), Rule::terminal(3)),
|
|
||||||
]),
|
|
||||||
]),
|
]),
|
||||||
})
|
]),
|
||||||
.unwrap();
|
});
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
result.productions,
|
result.productions,
|
||||||
|
|
@ -520,7 +436,6 @@ mod tests {
|
||||||
external_tokens: Vec::new(),
|
external_tokens: Vec::new(),
|
||||||
supertype_symbols: Vec::new(),
|
supertype_symbols: Vec::new(),
|
||||||
word_token: None,
|
word_token: None,
|
||||||
reserved_word_sets: Vec::new(),
|
|
||||||
variables: vec![Variable {
|
variables: vec![Variable {
|
||||||
name: "test".to_string(),
|
name: "test".to_string(),
|
||||||
kind: VariableType::Named,
|
kind: VariableType::Named,
|
||||||
|
|
@ -534,7 +449,7 @@ mod tests {
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
result.unwrap_err().to_string(),
|
result.unwrap_err().to_string(),
|
||||||
"Rule `test` cannot be inlined because it contains a reference to itself",
|
"Rule `test` cannot be inlined because it contains a reference to itself.",
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1,34 +1,16 @@
|
||||||
use log::warn;
|
use anyhow::{anyhow, Result};
|
||||||
use serde::Serialize;
|
|
||||||
use thiserror::Error;
|
|
||||||
|
|
||||||
use super::InternedGrammar;
|
use super::InternedGrammar;
|
||||||
use crate::{
|
use crate::{
|
||||||
grammars::{InputGrammar, ReservedWordContext, Variable, VariableType},
|
grammars::{InputGrammar, Variable, VariableType},
|
||||||
rules::{Rule, Symbol},
|
rules::{Rule, Symbol},
|
||||||
};
|
};
|
||||||
|
|
||||||
pub type InternSymbolsResult<T> = Result<T, InternSymbolsError>;
|
pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar> {
|
||||||
|
|
||||||
#[derive(Debug, Error, Serialize)]
|
|
||||||
pub enum InternSymbolsError {
|
|
||||||
#[error("A grammar's start rule must be visible.")]
|
|
||||||
HiddenStartRule,
|
|
||||||
#[error("Undefined symbol `{0}`")]
|
|
||||||
Undefined(String),
|
|
||||||
#[error("Undefined symbol `{0}` in grammar's supertypes array")]
|
|
||||||
UndefinedSupertype(String),
|
|
||||||
#[error("Undefined symbol `{0}` in grammar's conflicts array")]
|
|
||||||
UndefinedConflict(String),
|
|
||||||
#[error("Undefined symbol `{0}` as grammar's word token")]
|
|
||||||
UndefinedWordToken(String),
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(super) fn intern_symbols(grammar: &InputGrammar) -> InternSymbolsResult<InternedGrammar> {
|
|
||||||
let interner = Interner { grammar };
|
let interner = Interner { grammar };
|
||||||
|
|
||||||
if variable_type_for_name(&grammar.variables[0].name) == VariableType::Hidden {
|
if variable_type_for_name(&grammar.variables[0].name) == VariableType::Hidden {
|
||||||
Err(InternSymbolsError::HiddenStartRule)?;
|
return Err(anyhow!("A grammar's start rule must be visible."));
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut variables = Vec::with_capacity(grammar.variables.len());
|
let mut variables = Vec::with_capacity(grammar.variables.len());
|
||||||
|
|
@ -59,31 +41,17 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> InternSymbolsResult<Inte
|
||||||
let mut supertype_symbols = Vec::with_capacity(grammar.supertype_symbols.len());
|
let mut supertype_symbols = Vec::with_capacity(grammar.supertype_symbols.len());
|
||||||
for supertype_symbol_name in &grammar.supertype_symbols {
|
for supertype_symbol_name in &grammar.supertype_symbols {
|
||||||
supertype_symbols.push(interner.intern_name(supertype_symbol_name).ok_or_else(|| {
|
supertype_symbols.push(interner.intern_name(supertype_symbol_name).ok_or_else(|| {
|
||||||
InternSymbolsError::UndefinedSupertype(supertype_symbol_name.clone())
|
anyhow!("Undefined symbol `{supertype_symbol_name}` in grammar's supertypes array")
|
||||||
})?);
|
})?);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut reserved_words = Vec::with_capacity(grammar.reserved_words.len());
|
let mut expected_conflicts = Vec::new();
|
||||||
for reserved_word_set in &grammar.reserved_words {
|
|
||||||
let mut interned_set = Vec::with_capacity(reserved_word_set.reserved_words.len());
|
|
||||||
for rule in &reserved_word_set.reserved_words {
|
|
||||||
interned_set.push(interner.intern_rule(rule, None)?);
|
|
||||||
}
|
|
||||||
reserved_words.push(ReservedWordContext {
|
|
||||||
name: reserved_word_set.name.clone(),
|
|
||||||
reserved_words: interned_set,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut expected_conflicts = Vec::with_capacity(grammar.expected_conflicts.len());
|
|
||||||
for conflict in &grammar.expected_conflicts {
|
for conflict in &grammar.expected_conflicts {
|
||||||
let mut interned_conflict = Vec::with_capacity(conflict.len());
|
let mut interned_conflict = Vec::with_capacity(conflict.len());
|
||||||
for name in conflict {
|
for name in conflict {
|
||||||
interned_conflict.push(
|
interned_conflict.push(interner.intern_name(name).ok_or_else(|| {
|
||||||
interner
|
anyhow!("Undefined symbol `{name}` in grammar's conflicts array")
|
||||||
.intern_name(name)
|
})?);
|
||||||
.ok_or_else(|| InternSymbolsError::UndefinedConflict(name.clone()))?,
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
expected_conflicts.push(interned_conflict);
|
expected_conflicts.push(interned_conflict);
|
||||||
}
|
}
|
||||||
|
|
@ -95,15 +63,14 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> InternSymbolsResult<Inte
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let word_token = if let Some(name) = grammar.word_token.as_ref() {
|
let mut word_token = None;
|
||||||
Some(
|
if let Some(name) = grammar.word_token.as_ref() {
|
||||||
|
word_token = Some(
|
||||||
interner
|
interner
|
||||||
.intern_name(name)
|
.intern_name(name)
|
||||||
.ok_or_else(|| InternSymbolsError::UndefinedWordToken(name.clone()))?,
|
.ok_or_else(|| anyhow!("Undefined symbol `{name}` as grammar's word token"))?,
|
||||||
)
|
);
|
||||||
} else {
|
}
|
||||||
None
|
|
||||||
};
|
|
||||||
|
|
||||||
for (i, variable) in variables.iter_mut().enumerate() {
|
for (i, variable) in variables.iter_mut().enumerate() {
|
||||||
if supertype_symbols.contains(&Symbol::non_terminal(i)) {
|
if supertype_symbols.contains(&Symbol::non_terminal(i)) {
|
||||||
|
|
@ -120,7 +87,6 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> InternSymbolsResult<Inte
|
||||||
supertype_symbols,
|
supertype_symbols,
|
||||||
word_token,
|
word_token,
|
||||||
precedence_orderings: grammar.precedence_orderings.clone(),
|
precedence_orderings: grammar.precedence_orderings.clone(),
|
||||||
reserved_word_sets: reserved_words,
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -129,10 +95,10 @@ struct Interner<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Interner<'_> {
|
impl Interner<'_> {
|
||||||
fn intern_rule(&self, rule: &Rule, name: Option<&str>) -> InternSymbolsResult<Rule> {
|
fn intern_rule(&self, rule: &Rule, name: Option<&str>) -> Result<Rule> {
|
||||||
match rule {
|
match rule {
|
||||||
Rule::Choice(elements) => {
|
Rule::Choice(elements) => {
|
||||||
self.check_single(elements, name, "choice");
|
self.check_single(elements, name);
|
||||||
let mut result = Vec::with_capacity(elements.len());
|
let mut result = Vec::with_capacity(elements.len());
|
||||||
for element in elements {
|
for element in elements {
|
||||||
result.push(self.intern_rule(element, name)?);
|
result.push(self.intern_rule(element, name)?);
|
||||||
|
|
@ -140,7 +106,7 @@ impl Interner<'_> {
|
||||||
Ok(Rule::Choice(result))
|
Ok(Rule::Choice(result))
|
||||||
}
|
}
|
||||||
Rule::Seq(elements) => {
|
Rule::Seq(elements) => {
|
||||||
self.check_single(elements, name, "seq");
|
self.check_single(elements, name);
|
||||||
let mut result = Vec::with_capacity(elements.len());
|
let mut result = Vec::with_capacity(elements.len());
|
||||||
for element in elements {
|
for element in elements {
|
||||||
result.push(self.intern_rule(element, name)?);
|
result.push(self.intern_rule(element, name)?);
|
||||||
|
|
@ -152,12 +118,8 @@ impl Interner<'_> {
|
||||||
rule: Box::new(self.intern_rule(rule, name)?),
|
rule: Box::new(self.intern_rule(rule, name)?),
|
||||||
params: params.clone(),
|
params: params.clone(),
|
||||||
}),
|
}),
|
||||||
Rule::Reserved { rule, context_name } => Ok(Rule::Reserved {
|
|
||||||
rule: Box::new(self.intern_rule(rule, name)?),
|
|
||||||
context_name: context_name.clone(),
|
|
||||||
}),
|
|
||||||
Rule::NamedSymbol(name) => self.intern_name(name).map_or_else(
|
Rule::NamedSymbol(name) => self.intern_name(name).map_or_else(
|
||||||
|| Err(InternSymbolsError::Undefined(name.clone())),
|
|| Err(anyhow!("Undefined symbol `{name}`")),
|
||||||
|symbol| Ok(Rule::Symbol(symbol)),
|
|symbol| Ok(Rule::Symbol(symbol)),
|
||||||
),
|
),
|
||||||
_ => Ok(rule.clone()),
|
_ => Ok(rule.clone()),
|
||||||
|
|
@ -184,10 +146,10 @@ impl Interner<'_> {
|
||||||
|
|
||||||
// In the case of a seq or choice rule of 1 element in a hidden rule, weird
|
// In the case of a seq or choice rule of 1 element in a hidden rule, weird
|
||||||
// inconsistent behavior with queries can occur. So we should warn the user about it.
|
// inconsistent behavior with queries can occur. So we should warn the user about it.
|
||||||
fn check_single(&self, elements: &[Rule], name: Option<&str>, kind: &str) {
|
fn check_single(&self, elements: &[Rule], name: Option<&str>) {
|
||||||
if elements.len() == 1 && matches!(elements[0], Rule::String(_) | Rule::Pattern(_, _)) {
|
if elements.len() == 1 && matches!(elements[0], Rule::String(_) | Rule::Pattern(_, _)) {
|
||||||
warn!(
|
eprintln!(
|
||||||
"rule {} contains a `{kind}` rule with a single element. This is unnecessary.",
|
"Warning: rule {} contains a `seq` or `choice` rule with a single element. This is unnecessary.",
|
||||||
name.unwrap_or_default()
|
name.unwrap_or_default()
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
@ -278,9 +240,10 @@ mod tests {
|
||||||
fn test_grammar_with_undefined_symbols() {
|
fn test_grammar_with_undefined_symbols() {
|
||||||
let result = intern_symbols(&build_grammar(vec![Variable::named("x", Rule::named("y"))]));
|
let result = intern_symbols(&build_grammar(vec![Variable::named("x", Rule::named("y"))]));
|
||||||
|
|
||||||
assert!(result.is_err(), "Expected an error but got none");
|
match result {
|
||||||
let e = result.err().unwrap();
|
Err(e) => assert_eq!(e.to_string(), "Undefined symbol `y`"),
|
||||||
assert_eq!(e.to_string(), "Undefined symbol `y`");
|
_ => panic!("Expected an error but got none"),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn build_grammar(variables: Vec<Variable>) -> InputGrammar {
|
fn build_grammar(variables: Vec<Variable>) -> InputGrammar {
|
||||||
|
|
@ -8,18 +8,11 @@ mod process_inlines;
|
||||||
|
|
||||||
use std::{
|
use std::{
|
||||||
cmp::Ordering,
|
cmp::Ordering,
|
||||||
collections::{hash_map, BTreeSet, HashMap, HashSet},
|
collections::{hash_map, HashMap, HashSet},
|
||||||
mem,
|
mem,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub use expand_tokens::ExpandTokensError;
|
use anyhow::{anyhow, Result};
|
||||||
pub use extract_tokens::ExtractTokensError;
|
|
||||||
pub use flatten_grammar::FlattenGrammarError;
|
|
||||||
use indexmap::IndexMap;
|
|
||||||
pub use intern_symbols::InternSymbolsError;
|
|
||||||
pub use process_inlines::ProcessInlinesError;
|
|
||||||
use serde::Serialize;
|
|
||||||
use thiserror::Error;
|
|
||||||
|
|
||||||
pub use self::expand_tokens::expand_tokens;
|
pub use self::expand_tokens::expand_tokens;
|
||||||
use self::{
|
use self::{
|
||||||
|
|
@ -34,7 +27,6 @@ use super::{
|
||||||
},
|
},
|
||||||
rules::{AliasMap, Precedence, Rule, Symbol},
|
rules::{AliasMap, Precedence, Rule, Symbol},
|
||||||
};
|
};
|
||||||
use crate::grammars::ReservedWordContext;
|
|
||||||
|
|
||||||
pub struct IntermediateGrammar<T, U> {
|
pub struct IntermediateGrammar<T, U> {
|
||||||
variables: Vec<Variable>,
|
variables: Vec<Variable>,
|
||||||
|
|
@ -45,7 +37,6 @@ pub struct IntermediateGrammar<T, U> {
|
||||||
variables_to_inline: Vec<Symbol>,
|
variables_to_inline: Vec<Symbol>,
|
||||||
supertype_symbols: Vec<Symbol>,
|
supertype_symbols: Vec<Symbol>,
|
||||||
word_token: Option<Symbol>,
|
word_token: Option<Symbol>,
|
||||||
reserved_word_sets: Vec<ReservedWordContext<T>>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type InternedGrammar = IntermediateGrammar<Rule, Variable>;
|
pub type InternedGrammar = IntermediateGrammar<Rule, Variable>;
|
||||||
|
|
@ -69,96 +60,21 @@ impl<T, U> Default for IntermediateGrammar<T, U> {
|
||||||
variables_to_inline: Vec::default(),
|
variables_to_inline: Vec::default(),
|
||||||
supertype_symbols: Vec::default(),
|
supertype_symbols: Vec::default(),
|
||||||
word_token: Option::default(),
|
word_token: Option::default(),
|
||||||
reserved_word_sets: Vec::default(),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type PrepareGrammarResult<T> = Result<T, PrepareGrammarError>;
|
|
||||||
|
|
||||||
#[derive(Debug, Error, Serialize)]
|
|
||||||
#[error(transparent)]
|
|
||||||
pub enum PrepareGrammarError {
|
|
||||||
ValidatePrecedences(#[from] ValidatePrecedenceError),
|
|
||||||
ValidateIndirectRecursion(#[from] IndirectRecursionError),
|
|
||||||
InternSymbols(#[from] InternSymbolsError),
|
|
||||||
ExtractTokens(#[from] ExtractTokensError),
|
|
||||||
FlattenGrammar(#[from] FlattenGrammarError),
|
|
||||||
ExpandTokens(#[from] ExpandTokensError),
|
|
||||||
ProcessInlines(#[from] ProcessInlinesError),
|
|
||||||
}
|
|
||||||
|
|
||||||
pub type ValidatePrecedenceResult<T> = Result<T, ValidatePrecedenceError>;
|
|
||||||
|
|
||||||
#[derive(Debug, Error, Serialize)]
|
|
||||||
#[error(transparent)]
|
|
||||||
pub enum ValidatePrecedenceError {
|
|
||||||
Undeclared(#[from] UndeclaredPrecedenceError),
|
|
||||||
Ordering(#[from] ConflictingPrecedenceOrderingError),
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Error, Serialize)]
|
|
||||||
pub struct IndirectRecursionError(pub Vec<String>);
|
|
||||||
|
|
||||||
impl std::fmt::Display for IndirectRecursionError {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
write!(f, "Grammar contains an indirectly recursive rule: ")?;
|
|
||||||
for (i, symbol) in self.0.iter().enumerate() {
|
|
||||||
if i > 0 {
|
|
||||||
write!(f, " -> ")?;
|
|
||||||
}
|
|
||||||
write!(f, "{symbol}")?;
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Error, Serialize)]
|
|
||||||
pub struct UndeclaredPrecedenceError {
|
|
||||||
pub precedence: String,
|
|
||||||
pub rule: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Display for UndeclaredPrecedenceError {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
write!(
|
|
||||||
f,
|
|
||||||
"Undeclared precedence '{}' in rule '{}'",
|
|
||||||
self.precedence, self.rule
|
|
||||||
)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Error, Serialize)]
|
|
||||||
pub struct ConflictingPrecedenceOrderingError {
|
|
||||||
pub precedence_1: String,
|
|
||||||
pub precedence_2: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Display for ConflictingPrecedenceOrderingError {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
write!(
|
|
||||||
f,
|
|
||||||
"Conflicting orderings for precedences {} and {}",
|
|
||||||
self.precedence_1, self.precedence_2
|
|
||||||
)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Transform an input grammar into separate components that are ready
|
/// Transform an input grammar into separate components that are ready
|
||||||
/// for parse table construction.
|
/// for parse table construction.
|
||||||
pub fn prepare_grammar(
|
pub fn prepare_grammar(
|
||||||
input_grammar: &InputGrammar,
|
input_grammar: &InputGrammar,
|
||||||
) -> PrepareGrammarResult<(
|
) -> Result<(
|
||||||
SyntaxGrammar,
|
SyntaxGrammar,
|
||||||
LexicalGrammar,
|
LexicalGrammar,
|
||||||
InlinedProductionMap,
|
InlinedProductionMap,
|
||||||
AliasMap,
|
AliasMap,
|
||||||
)> {
|
)> {
|
||||||
validate_precedences(input_grammar)?;
|
validate_precedences(input_grammar)?;
|
||||||
validate_indirect_recursion(input_grammar)?;
|
|
||||||
|
|
||||||
let interned_grammar = intern_symbols(input_grammar)?;
|
let interned_grammar = intern_symbols(input_grammar)?;
|
||||||
let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?;
|
let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?;
|
||||||
|
|
@ -170,94 +86,13 @@ pub fn prepare_grammar(
|
||||||
Ok((syntax_grammar, lexical_grammar, inlines, default_aliases))
|
Ok((syntax_grammar, lexical_grammar, inlines, default_aliases))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check for indirect recursion cycles in the grammar that can cause infinite loops while
|
|
||||||
/// parsing. An indirect recursion cycle occurs when a non-terminal can derive itself through
|
|
||||||
/// a chain of single-symbol productions (e.g., A -> B, B -> A).
|
|
||||||
fn validate_indirect_recursion(grammar: &InputGrammar) -> Result<(), IndirectRecursionError> {
|
|
||||||
let mut epsilon_transitions: IndexMap<&str, BTreeSet<String>> = IndexMap::new();
|
|
||||||
|
|
||||||
for variable in &grammar.variables {
|
|
||||||
let productions = get_single_symbol_productions(&variable.rule);
|
|
||||||
// Filter out rules that *directly* reference themselves, as this doesn't
|
|
||||||
// cause a parsing loop.
|
|
||||||
let filtered: BTreeSet<String> = productions
|
|
||||||
.into_iter()
|
|
||||||
.filter(|s| s != &variable.name)
|
|
||||||
.collect();
|
|
||||||
epsilon_transitions.insert(variable.name.as_str(), filtered);
|
|
||||||
}
|
|
||||||
|
|
||||||
for start_symbol in epsilon_transitions.keys() {
|
|
||||||
let mut visited = BTreeSet::new();
|
|
||||||
let mut path = Vec::new();
|
|
||||||
if let Some((start_idx, end_idx)) =
|
|
||||||
get_cycle(start_symbol, &epsilon_transitions, &mut visited, &mut path)
|
|
||||||
{
|
|
||||||
let cycle_symbols = path[start_idx..=end_idx]
|
|
||||||
.iter()
|
|
||||||
.map(|s| (*s).to_string())
|
|
||||||
.collect();
|
|
||||||
return Err(IndirectRecursionError(cycle_symbols));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_single_symbol_productions(rule: &Rule) -> BTreeSet<String> {
|
|
||||||
match rule {
|
|
||||||
Rule::NamedSymbol(name) => BTreeSet::from([name.clone()]),
|
|
||||||
Rule::Choice(choices) => choices
|
|
||||||
.iter()
|
|
||||||
.flat_map(get_single_symbol_productions)
|
|
||||||
.collect(),
|
|
||||||
Rule::Metadata { rule, .. } => get_single_symbol_productions(rule),
|
|
||||||
_ => BTreeSet::new(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Perform a depth-first search to detect cycles in single state transitions.
|
|
||||||
fn get_cycle<'a>(
|
|
||||||
current: &'a str,
|
|
||||||
transitions: &'a IndexMap<&'a str, BTreeSet<String>>,
|
|
||||||
visited: &mut BTreeSet<&'a str>,
|
|
||||||
path: &mut Vec<&'a str>,
|
|
||||||
) -> Option<(usize, usize)> {
|
|
||||||
if let Some(first_idx) = path.iter().position(|s| *s == current) {
|
|
||||||
path.push(current);
|
|
||||||
return Some((first_idx, path.len() - 1));
|
|
||||||
}
|
|
||||||
|
|
||||||
if visited.contains(current) {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
path.push(current);
|
|
||||||
visited.insert(current);
|
|
||||||
|
|
||||||
if let Some(next_symbols) = transitions.get(current) {
|
|
||||||
for next in next_symbols {
|
|
||||||
if let Some(cycle) = get_cycle(next, transitions, visited, path) {
|
|
||||||
return Some(cycle);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
path.pop();
|
|
||||||
None
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Check that all of the named precedences used in the grammar are declared
|
/// Check that all of the named precedences used in the grammar are declared
|
||||||
/// within the `precedences` lists, and also that there are no conflicting
|
/// within the `precedences` lists, and also that there are no conflicting
|
||||||
/// precedence orderings declared in those lists.
|
/// precedence orderings declared in those lists.
|
||||||
fn validate_precedences(grammar: &InputGrammar) -> ValidatePrecedenceResult<()> {
|
fn validate_precedences(grammar: &InputGrammar) -> Result<()> {
|
||||||
// Check that no rule contains a named precedence that is not present in
|
// Check that no rule contains a named precedence that is not present in
|
||||||
// any of the `precedences` lists.
|
// any of the `precedences` lists.
|
||||||
fn validate(
|
fn validate(rule_name: &str, rule: &Rule, names: &HashSet<&String>) -> Result<()> {
|
||||||
rule_name: &str,
|
|
||||||
rule: &Rule,
|
|
||||||
names: &HashSet<&String>,
|
|
||||||
) -> ValidatePrecedenceResult<()> {
|
|
||||||
match rule {
|
match rule {
|
||||||
Rule::Repeat(rule) => validate(rule_name, rule, names),
|
Rule::Repeat(rule) => validate(rule_name, rule, names),
|
||||||
Rule::Seq(elements) | Rule::Choice(elements) => elements
|
Rule::Seq(elements) | Rule::Choice(elements) => elements
|
||||||
|
|
@ -266,10 +101,7 @@ fn validate_precedences(grammar: &InputGrammar) -> ValidatePrecedenceResult<()>
|
||||||
Rule::Metadata { rule, params } => {
|
Rule::Metadata { rule, params } => {
|
||||||
if let Precedence::Name(n) = ¶ms.precedence {
|
if let Precedence::Name(n) = ¶ms.precedence {
|
||||||
if !names.contains(n) {
|
if !names.contains(n) {
|
||||||
Err(UndeclaredPrecedenceError {
|
return Err(anyhow!("Undeclared precedence '{n}' in rule '{rule_name}'"));
|
||||||
precedence: n.clone(),
|
|
||||||
rule: rule_name.to_string(),
|
|
||||||
})?;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
validate(rule_name, rule, names)?;
|
validate(rule_name, rule, names)?;
|
||||||
|
|
@ -299,10 +131,9 @@ fn validate_precedences(grammar: &InputGrammar) -> ValidatePrecedenceResult<()>
|
||||||
}
|
}
|
||||||
hash_map::Entry::Occupied(e) => {
|
hash_map::Entry::Occupied(e) => {
|
||||||
if e.get() != &ordering {
|
if e.get() != &ordering {
|
||||||
Err(ConflictingPrecedenceOrderingError {
|
return Err(anyhow!(
|
||||||
precedence_1: entry1.to_string(),
|
"Conflicting orderings for precedences {entry1} and {entry2}",
|
||||||
precedence_2: entry2.to_string(),
|
));
|
||||||
})?;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use serde::Serialize;
|
use anyhow::{anyhow, Result};
|
||||||
use thiserror::Error;
|
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
grammars::{InlinedProductionMap, LexicalGrammar, Production, ProductionStep, SyntaxGrammar},
|
grammars::{InlinedProductionMap, LexicalGrammar, Production, ProductionStep, SyntaxGrammar},
|
||||||
|
|
@ -70,13 +69,12 @@ impl InlinedProductionMapBuilder {
|
||||||
let production_map = production_indices_by_step_id
|
let production_map = production_indices_by_step_id
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|(step_id, production_indices)| {
|
.map(|(step_id, production_indices)| {
|
||||||
let production =
|
let production = step_id.variable_index.map_or_else(
|
||||||
core::ptr::from_ref::<Production>(step_id.variable_index.map_or_else(
|
|| &productions[step_id.production_index],
|
||||||
|| &productions[step_id.production_index],
|
|variable_index| {
|
||||||
|variable_index| {
|
&grammar.variables[variable_index].productions[step_id.production_index]
|
||||||
&grammar.variables[variable_index].productions[step_id.production_index]
|
},
|
||||||
},
|
) as *const Production;
|
||||||
));
|
|
||||||
((production, step_id.step_index as u32), production_indices)
|
((production, step_id.step_index as u32), production_indices)
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
@ -189,38 +187,29 @@ impl InlinedProductionMapBuilder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type ProcessInlinesResult<T> = Result<T, ProcessInlinesError>;
|
|
||||||
|
|
||||||
#[derive(Debug, Error, Serialize)]
|
|
||||||
pub enum ProcessInlinesError {
|
|
||||||
#[error("External token `{0}` cannot be inlined")]
|
|
||||||
ExternalToken(String),
|
|
||||||
#[error("Token `{0}` cannot be inlined")]
|
|
||||||
Token(String),
|
|
||||||
#[error("Rule `{0}` cannot be inlined because it is the first rule")]
|
|
||||||
FirstRule(String),
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(super) fn process_inlines(
|
pub(super) fn process_inlines(
|
||||||
grammar: &SyntaxGrammar,
|
grammar: &SyntaxGrammar,
|
||||||
lexical_grammar: &LexicalGrammar,
|
lexical_grammar: &LexicalGrammar,
|
||||||
) -> ProcessInlinesResult<InlinedProductionMap> {
|
) -> Result<InlinedProductionMap> {
|
||||||
for symbol in &grammar.variables_to_inline {
|
for symbol in &grammar.variables_to_inline {
|
||||||
match symbol.kind {
|
match symbol.kind {
|
||||||
SymbolType::External => {
|
SymbolType::External => {
|
||||||
Err(ProcessInlinesError::ExternalToken(
|
return Err(anyhow!(
|
||||||
grammar.external_tokens[symbol.index].name.clone(),
|
"External token `{}` cannot be inlined",
|
||||||
))?;
|
grammar.external_tokens[symbol.index].name
|
||||||
|
))
|
||||||
}
|
}
|
||||||
SymbolType::Terminal => {
|
SymbolType::Terminal => {
|
||||||
Err(ProcessInlinesError::Token(
|
return Err(anyhow!(
|
||||||
lexical_grammar.variables[symbol.index].name.clone(),
|
"Token `{}` cannot be inlined",
|
||||||
))?;
|
lexical_grammar.variables[symbol.index].name,
|
||||||
|
))
|
||||||
}
|
}
|
||||||
SymbolType::NonTerminal if symbol.index == 0 => {
|
SymbolType::NonTerminal if symbol.index == 0 => {
|
||||||
Err(ProcessInlinesError::FirstRule(
|
return Err(anyhow!(
|
||||||
grammar.variables[symbol.index].name.clone(),
|
"Rule `{}` cannot be inlined because it is the first rule",
|
||||||
))?;
|
grammar.variables[symbol.index].name,
|
||||||
|
))
|
||||||
}
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
|
|
@ -549,9 +538,10 @@ mod tests {
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
|
|
||||||
let result = process_inlines(&grammar, &lexical_grammar);
|
if let Err(error) = process_inlines(&grammar, &lexical_grammar) {
|
||||||
assert!(result.is_err(), "expected an error, but got none");
|
assert_eq!(error.to_string(), "Token `something` cannot be inlined");
|
||||||
let err = result.err().unwrap();
|
} else {
|
||||||
assert_eq!(err.to_string(), "Token `something` cannot be inlined",);
|
panic!("expected an error, but got none");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1,19 +1,15 @@
|
||||||
use std::{
|
use std::{
|
||||||
cmp,
|
cmp,
|
||||||
collections::{BTreeMap, BTreeSet, HashMap, HashSet},
|
collections::{HashMap, HashSet},
|
||||||
fmt::Write,
|
fmt::Write,
|
||||||
mem::swap,
|
mem::swap,
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::LANGUAGE_VERSION;
|
|
||||||
use indoc::indoc;
|
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
build_tables::Tables,
|
build_tables::Tables,
|
||||||
grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType},
|
grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType},
|
||||||
nfa::CharacterSet,
|
nfa::CharacterSet,
|
||||||
node_types::ChildType,
|
rules::{Alias, AliasMap, Symbol, SymbolType},
|
||||||
rules::{Alias, AliasMap, Symbol, SymbolType, TokenSet},
|
|
||||||
tables::{
|
tables::{
|
||||||
AdvanceAction, FieldLocation, GotoAction, LexState, LexTable, ParseAction, ParseTable,
|
AdvanceAction, FieldLocation, GotoAction, LexState, LexTable, ParseAction, ParseTable,
|
||||||
ParseTableEntry,
|
ParseTableEntry,
|
||||||
|
|
@ -21,11 +17,12 @@ use super::{
|
||||||
};
|
};
|
||||||
|
|
||||||
const SMALL_STATE_THRESHOLD: usize = 64;
|
const SMALL_STATE_THRESHOLD: usize = 64;
|
||||||
pub const ABI_VERSION_MIN: usize = 14;
|
const ABI_VERSION_MIN: usize = 14;
|
||||||
pub const ABI_VERSION_MAX: usize = LANGUAGE_VERSION;
|
const ABI_VERSION_MAX: usize = tree_sitter::LANGUAGE_VERSION;
|
||||||
const ABI_VERSION_WITH_RESERVED_WORDS: usize = 15;
|
const ABI_VERSION_WITH_METADATA: usize = 15;
|
||||||
|
const BUILD_VERSION: &str = env!("CARGO_PKG_VERSION");
|
||||||
|
const BUILD_SHA: Option<&'static str> = option_env!("BUILD_SHA");
|
||||||
|
|
||||||
#[clippy::format_args]
|
|
||||||
macro_rules! add {
|
macro_rules! add {
|
||||||
($this: tt, $($arg: tt)*) => {{
|
($this: tt, $($arg: tt)*) => {{
|
||||||
$this.buffer.write_fmt(format_args!($($arg)*)).unwrap();
|
$this.buffer.write_fmt(format_args!($($arg)*)).unwrap();
|
||||||
|
|
@ -34,15 +31,12 @@ macro_rules! add {
|
||||||
|
|
||||||
macro_rules! add_whitespace {
|
macro_rules! add_whitespace {
|
||||||
($this:tt) => {{
|
($this:tt) => {{
|
||||||
// 4 bytes per char, 2 spaces per indent level
|
|
||||||
$this.buffer.reserve(4 * 2 * $this.indent_level);
|
|
||||||
for _ in 0..$this.indent_level {
|
for _ in 0..$this.indent_level {
|
||||||
write!(&mut $this.buffer, " ").unwrap();
|
write!(&mut $this.buffer, " ").unwrap();
|
||||||
}
|
}
|
||||||
}};
|
}};
|
||||||
}
|
}
|
||||||
|
|
||||||
#[clippy::format_args]
|
|
||||||
macro_rules! add_line {
|
macro_rules! add_line {
|
||||||
($this: tt, $($arg: tt)*) => {
|
($this: tt, $($arg: tt)*) => {
|
||||||
add_whitespace!($this);
|
add_whitespace!($this);
|
||||||
|
|
@ -64,9 +58,9 @@ macro_rules! dedent {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default)]
|
|
||||||
struct Generator {
|
struct Generator {
|
||||||
buffer: String,
|
buffer: String,
|
||||||
|
header_buffer: String,
|
||||||
indent_level: usize,
|
indent_level: usize,
|
||||||
language_name: String,
|
language_name: String,
|
||||||
parse_table: ParseTable,
|
parse_table: ParseTable,
|
||||||
|
|
@ -75,6 +69,7 @@ struct Generator {
|
||||||
large_character_sets: Vec<(Option<Symbol>, CharacterSet)>,
|
large_character_sets: Vec<(Option<Symbol>, CharacterSet)>,
|
||||||
large_character_set_info: Vec<LargeCharacterSetInfo>,
|
large_character_set_info: Vec<LargeCharacterSetInfo>,
|
||||||
large_state_count: usize,
|
large_state_count: usize,
|
||||||
|
keyword_capture_token: Option<Symbol>,
|
||||||
syntax_grammar: SyntaxGrammar,
|
syntax_grammar: SyntaxGrammar,
|
||||||
lexical_grammar: LexicalGrammar,
|
lexical_grammar: LexicalGrammar,
|
||||||
default_aliases: AliasMap,
|
default_aliases: AliasMap,
|
||||||
|
|
@ -83,13 +78,10 @@ struct Generator {
|
||||||
alias_ids: HashMap<Alias, String>,
|
alias_ids: HashMap<Alias, String>,
|
||||||
unique_aliases: Vec<Alias>,
|
unique_aliases: Vec<Alias>,
|
||||||
symbol_map: HashMap<Symbol, Symbol>,
|
symbol_map: HashMap<Symbol, Symbol>,
|
||||||
reserved_word_sets: Vec<TokenSet>,
|
|
||||||
reserved_word_set_ids_by_parse_state: Vec<usize>,
|
|
||||||
field_names: Vec<String>,
|
field_names: Vec<String>,
|
||||||
supertype_symbol_map: BTreeMap<Symbol, Vec<ChildType>>,
|
|
||||||
supertype_map: BTreeMap<String, Vec<ChildType>>,
|
#[allow(unused)]
|
||||||
abi_version: usize,
|
abi_version: usize,
|
||||||
metadata: Option<Metadata>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct LargeCharacterSetInfo {
|
struct LargeCharacterSetInfo {
|
||||||
|
|
@ -97,14 +89,8 @@ struct LargeCharacterSetInfo {
|
||||||
is_used: bool,
|
is_used: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Metadata {
|
|
||||||
major_version: u8,
|
|
||||||
minor_version: u8,
|
|
||||||
patch_version: u8,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Generator {
|
impl Generator {
|
||||||
fn generate(mut self) -> String {
|
fn generate(mut self) -> (String, String) {
|
||||||
self.init();
|
self.init();
|
||||||
self.add_header();
|
self.add_header();
|
||||||
self.add_includes();
|
self.add_includes();
|
||||||
|
|
@ -128,17 +114,13 @@ impl Generator {
|
||||||
self.add_non_terminal_alias_map();
|
self.add_non_terminal_alias_map();
|
||||||
self.add_primary_state_id_list();
|
self.add_primary_state_id_list();
|
||||||
|
|
||||||
if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS && !self.supertype_map.is_empty() {
|
|
||||||
self.add_supertype_map();
|
|
||||||
}
|
|
||||||
|
|
||||||
let buffer_offset_before_lex_functions = self.buffer.len();
|
let buffer_offset_before_lex_functions = self.buffer.len();
|
||||||
|
|
||||||
let mut main_lex_table = LexTable::default();
|
let mut main_lex_table = LexTable::default();
|
||||||
swap(&mut main_lex_table, &mut self.main_lex_table);
|
swap(&mut main_lex_table, &mut self.main_lex_table);
|
||||||
self.add_lex_function("ts_lex", main_lex_table);
|
self.add_lex_function("ts_lex", main_lex_table);
|
||||||
|
|
||||||
if self.syntax_grammar.word_token.is_some() {
|
if self.keyword_capture_token.is_some() {
|
||||||
let mut keyword_lex_table = LexTable::default();
|
let mut keyword_lex_table = LexTable::default();
|
||||||
swap(&mut keyword_lex_table, &mut self.keyword_lex_table);
|
swap(&mut keyword_lex_table, &mut self.keyword_lex_table);
|
||||||
self.add_lex_function("ts_lex_keywords", keyword_lex_table);
|
self.add_lex_function("ts_lex_keywords", keyword_lex_table);
|
||||||
|
|
@ -154,13 +136,7 @@ impl Generator {
|
||||||
}
|
}
|
||||||
self.buffer.push_str(&lex_functions);
|
self.buffer.push_str(&lex_functions);
|
||||||
|
|
||||||
self.add_lex_modes();
|
self.add_lex_modes_list();
|
||||||
|
|
||||||
if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS && self.reserved_word_sets.len() > 1
|
|
||||||
{
|
|
||||||
self.add_reserved_word_sets();
|
|
||||||
}
|
|
||||||
|
|
||||||
self.add_parse_table();
|
self.add_parse_table();
|
||||||
|
|
||||||
if !self.syntax_grammar.external_tokens.is_empty() {
|
if !self.syntax_grammar.external_tokens.is_empty() {
|
||||||
|
|
@ -171,7 +147,7 @@ impl Generator {
|
||||||
|
|
||||||
self.add_parser_export();
|
self.add_parser_export();
|
||||||
|
|
||||||
self.buffer
|
(self.buffer, self.header_buffer)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn init(&mut self) {
|
fn init(&mut self) {
|
||||||
|
|
@ -241,24 +217,33 @@ impl Generator {
|
||||||
for alias in &production_info.alias_sequence {
|
for alias in &production_info.alias_sequence {
|
||||||
// Generate a mapping from aliases to C identifiers.
|
// Generate a mapping from aliases to C identifiers.
|
||||||
if let Some(alias) = &alias {
|
if let Some(alias) = &alias {
|
||||||
// Some aliases match an existing symbol in the grammar.
|
let existing_symbol = self.parse_table.symbols.iter().copied().find(|symbol| {
|
||||||
let alias_id =
|
self.default_aliases.get(symbol).map_or_else(
|
||||||
if let Some(existing_symbol) = self.symbols_for_alias(alias).first() {
|
|| {
|
||||||
self.symbol_ids[&self.symbol_map[existing_symbol]].clone()
|
let (name, kind) = self.metadata_for_symbol(*symbol);
|
||||||
}
|
name == alias.value && kind == alias.kind()
|
||||||
// Other aliases don't match any existing symbol, and need their own
|
},
|
||||||
// identifiers.
|
|default_alias| default_alias == alias,
|
||||||
else {
|
)
|
||||||
if let Err(i) = self.unique_aliases.binary_search(alias) {
|
});
|
||||||
self.unique_aliases.insert(i, alias.clone());
|
|
||||||
}
|
|
||||||
|
|
||||||
if alias.is_named {
|
// Some aliases match an existing symbol in the grammar.
|
||||||
format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
|
let alias_id = if let Some(existing_symbol) = existing_symbol {
|
||||||
} else {
|
self.symbol_ids[&self.symbol_map[&existing_symbol]].clone()
|
||||||
format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
|
}
|
||||||
}
|
// Other aliases don't match any existing symbol, and need their own
|
||||||
};
|
// identifiers.
|
||||||
|
else {
|
||||||
|
if let Err(i) = self.unique_aliases.binary_search(alias) {
|
||||||
|
self.unique_aliases.insert(i, alias.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
if alias.is_named {
|
||||||
|
format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
|
||||||
|
} else {
|
||||||
|
format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
self.alias_ids.entry(alias.clone()).or_insert(alias_id);
|
self.alias_ids.entry(alias.clone()).or_insert(alias_id);
|
||||||
}
|
}
|
||||||
|
|
@ -282,34 +267,6 @@ impl Generator {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Assign an id to each unique reserved word set
|
|
||||||
self.reserved_word_sets.push(TokenSet::new());
|
|
||||||
for state in &self.parse_table.states {
|
|
||||||
let id = if let Some(ix) = self
|
|
||||||
.reserved_word_sets
|
|
||||||
.iter()
|
|
||||||
.position(|set| *set == state.reserved_words)
|
|
||||||
{
|
|
||||||
ix
|
|
||||||
} else {
|
|
||||||
self.reserved_word_sets.push(state.reserved_words.clone());
|
|
||||||
self.reserved_word_sets.len() - 1
|
|
||||||
};
|
|
||||||
self.reserved_word_set_ids_by_parse_state.push(id);
|
|
||||||
}
|
|
||||||
|
|
||||||
if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
|
|
||||||
for (supertype, subtypes) in &self.supertype_symbol_map {
|
|
||||||
if let Some(supertype) = self.symbol_ids.get(supertype) {
|
|
||||||
self.supertype_map
|
|
||||||
.entry(supertype.clone())
|
|
||||||
.or_insert_with(|| subtypes.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
self.supertype_symbol_map.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Determine which states should use the "small state" representation, and which should
|
// Determine which states should use the "small state" representation, and which should
|
||||||
// use the normal array representation.
|
// use the normal array representation.
|
||||||
let threshold = cmp::min(SMALL_STATE_THRESHOLD, self.parse_table.symbols.len() / 2);
|
let threshold = cmp::min(SMALL_STATE_THRESHOLD, self.parse_table.symbols.len() / 2);
|
||||||
|
|
@ -325,7 +282,14 @@ impl Generator {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn add_header(&mut self) {
|
fn add_header(&mut self) {
|
||||||
add_line!(self, "/* Automatically @generated by tree-sitter */",);
|
let version = BUILD_SHA.map_or_else(
|
||||||
|
|| BUILD_VERSION.to_string(),
|
||||||
|
|build_sha| format!("{BUILD_VERSION} ({build_sha})"),
|
||||||
|
);
|
||||||
|
add_line!(
|
||||||
|
self,
|
||||||
|
"/* Automatically generated by tree-sitter v{version} */",
|
||||||
|
);
|
||||||
add_line!(self, "");
|
add_line!(self, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -390,7 +354,7 @@ impl Generator {
|
||||||
self.parse_table.symbols.len()
|
self.parse_table.symbols.len()
|
||||||
);
|
);
|
||||||
add_line!(self, "#define ALIAS_COUNT {}", self.unique_aliases.len());
|
add_line!(self, "#define ALIAS_COUNT {}", self.unique_aliases.len());
|
||||||
add_line!(self, "#define TOKEN_COUNT {token_count}");
|
add_line!(self, "#define TOKEN_COUNT {}", token_count);
|
||||||
add_line!(
|
add_line!(
|
||||||
self,
|
self,
|
||||||
"#define EXTERNAL_TOKEN_COUNT {}",
|
"#define EXTERNAL_TOKEN_COUNT {}",
|
||||||
|
|
@ -402,22 +366,11 @@ impl Generator {
|
||||||
"#define MAX_ALIAS_SEQUENCE_LENGTH {}",
|
"#define MAX_ALIAS_SEQUENCE_LENGTH {}",
|
||||||
self.parse_table.max_aliased_production_length
|
self.parse_table.max_aliased_production_length
|
||||||
);
|
);
|
||||||
add_line!(
|
|
||||||
self,
|
|
||||||
"#define MAX_RESERVED_WORD_SET_SIZE {}",
|
|
||||||
self.reserved_word_sets
|
|
||||||
.iter()
|
|
||||||
.map(TokenSet::len)
|
|
||||||
.max()
|
|
||||||
.unwrap()
|
|
||||||
);
|
|
||||||
|
|
||||||
add_line!(
|
add_line!(
|
||||||
self,
|
self,
|
||||||
"#define PRODUCTION_ID_COUNT {}",
|
"#define PRODUCTION_ID_COUNT {}",
|
||||||
self.parse_table.production_infos.len()
|
self.parse_table.production_infos.len()
|
||||||
);
|
);
|
||||||
add_line!(self, "#define SUPERTYPE_COUNT {}", self.supertype_map.len());
|
|
||||||
add_line!(self, "");
|
add_line!(self, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -679,32 +632,31 @@ impl Generator {
|
||||||
&mut next_flat_field_map_index,
|
&mut next_flat_field_map_index,
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut field_map_ids = Vec::with_capacity(self.parse_table.production_infos.len());
|
let mut field_map_ids = Vec::new();
|
||||||
for production_info in &self.parse_table.production_infos {
|
for production_info in &self.parse_table.production_infos {
|
||||||
if production_info.field_map.is_empty() {
|
if production_info.field_map.is_empty() {
|
||||||
field_map_ids.push((0, 0));
|
field_map_ids.push((0, 0));
|
||||||
} else {
|
} else {
|
||||||
let mut flat_field_map = Vec::with_capacity(production_info.field_map.len());
|
let mut flat_field_map = Vec::new();
|
||||||
for (field_name, locations) in &production_info.field_map {
|
for (field_name, locations) in &production_info.field_map {
|
||||||
for location in locations {
|
for location in locations {
|
||||||
flat_field_map.push((field_name.clone(), *location));
|
flat_field_map.push((field_name.clone(), *location));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let field_map_len = flat_field_map.len();
|
|
||||||
field_map_ids.push((
|
field_map_ids.push((
|
||||||
self.get_field_map_id(
|
self.get_field_map_id(
|
||||||
flat_field_map,
|
flat_field_map.clone(),
|
||||||
&mut flat_field_maps,
|
&mut flat_field_maps,
|
||||||
&mut next_flat_field_map_index,
|
&mut next_flat_field_map_index,
|
||||||
),
|
),
|
||||||
field_map_len,
|
flat_field_map.len(),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
add_line!(
|
add_line!(
|
||||||
self,
|
self,
|
||||||
"static const TSMapSlice ts_field_map_slices[PRODUCTION_ID_COUNT] = {{",
|
"static const TSFieldMapSlice ts_field_map_slices[PRODUCTION_ID_COUNT] = {{",
|
||||||
);
|
);
|
||||||
indent!(self);
|
indent!(self);
|
||||||
for (production_id, (row_id, length)) in field_map_ids.into_iter().enumerate() {
|
for (production_id, (row_id, length)) in field_map_ids.into_iter().enumerate() {
|
||||||
|
|
@ -743,83 +695,6 @@ impl Generator {
|
||||||
add_line!(self, "");
|
add_line!(self, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
fn add_supertype_map(&mut self) {
|
|
||||||
add_line!(
|
|
||||||
self,
|
|
||||||
"static const TSSymbol ts_supertype_symbols[SUPERTYPE_COUNT] = {{"
|
|
||||||
);
|
|
||||||
indent!(self);
|
|
||||||
for supertype in self.supertype_map.keys() {
|
|
||||||
add_line!(self, "{supertype},");
|
|
||||||
}
|
|
||||||
dedent!(self);
|
|
||||||
add_line!(self, "}};\n");
|
|
||||||
|
|
||||||
add_line!(
|
|
||||||
self,
|
|
||||||
"static const TSMapSlice ts_supertype_map_slices[] = {{",
|
|
||||||
);
|
|
||||||
indent!(self);
|
|
||||||
let mut row_id = 0;
|
|
||||||
let mut supertype_ids = vec![0];
|
|
||||||
let mut supertype_string_map = BTreeMap::new();
|
|
||||||
for (supertype, subtypes) in &self.supertype_map {
|
|
||||||
supertype_string_map.insert(
|
|
||||||
supertype,
|
|
||||||
subtypes
|
|
||||||
.iter()
|
|
||||||
.flat_map(|s| match s {
|
|
||||||
ChildType::Normal(symbol) => vec![self.symbol_ids.get(symbol).cloned()],
|
|
||||||
ChildType::Aliased(alias) => {
|
|
||||||
self.alias_ids.get(alias).cloned().map_or_else(
|
|
||||||
|| {
|
|
||||||
self.symbols_for_alias(alias)
|
|
||||||
.into_iter()
|
|
||||||
.map(|s| self.symbol_ids.get(&s).cloned())
|
|
||||||
.collect()
|
|
||||||
},
|
|
||||||
|a| vec![Some(a)],
|
|
||||||
)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.flatten()
|
|
||||||
.collect::<BTreeSet<String>>(),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
for (supertype, subtypes) in &supertype_string_map {
|
|
||||||
let length = subtypes.len();
|
|
||||||
add_line!(
|
|
||||||
self,
|
|
||||||
"[{supertype}] = {{.index = {row_id}, .length = {length}}},",
|
|
||||||
);
|
|
||||||
row_id += length;
|
|
||||||
supertype_ids.push(row_id);
|
|
||||||
}
|
|
||||||
dedent!(self);
|
|
||||||
add_line!(self, "}};");
|
|
||||||
add_line!(self, "");
|
|
||||||
|
|
||||||
add_line!(
|
|
||||||
self,
|
|
||||||
"static const TSSymbol ts_supertype_map_entries[] = {{",
|
|
||||||
);
|
|
||||||
indent!(self);
|
|
||||||
for (i, (_, subtypes)) in supertype_string_map.iter().enumerate() {
|
|
||||||
let row_index = supertype_ids[i];
|
|
||||||
add_line!(self, "[{row_index}] =");
|
|
||||||
indent!(self);
|
|
||||||
for subtype in subtypes {
|
|
||||||
add_whitespace!(self);
|
|
||||||
add!(self, "{subtype},\n");
|
|
||||||
}
|
|
||||||
dedent!(self);
|
|
||||||
}
|
|
||||||
|
|
||||||
dedent!(self);
|
|
||||||
add_line!(self, "}};");
|
|
||||||
add_line!(self, "");
|
|
||||||
}
|
|
||||||
|
|
||||||
fn add_lex_function(&mut self, name: &str, lex_table: LexTable) {
|
fn add_lex_function(&mut self, name: &str, lex_table: LexTable) {
|
||||||
add_line!(
|
add_line!(
|
||||||
self,
|
self,
|
||||||
|
|
@ -877,7 +752,7 @@ impl Generator {
|
||||||
&& chars.ranges().all(|r| {
|
&& chars.ranges().all(|r| {
|
||||||
let start = *r.start() as u32;
|
let start = *r.start() as u32;
|
||||||
let end = *r.end() as u32;
|
let end = *r.end() as u32;
|
||||||
end <= start + 1 && u16::try_from(end).is_ok()
|
end <= start + 1 && end <= u16::MAX as u32
|
||||||
})
|
})
|
||||||
{
|
{
|
||||||
leading_simple_transition_count += 1;
|
leading_simple_transition_count += 1;
|
||||||
|
|
@ -965,7 +840,10 @@ impl Generator {
|
||||||
large_char_set_ix = Some(char_set_ix);
|
large_char_set_ix = Some(char_set_ix);
|
||||||
}
|
}
|
||||||
|
|
||||||
let line_break = format!("\n{}", " ".repeat(self.indent_level + 2));
|
let mut line_break = "\n".to_string();
|
||||||
|
for _ in 0..self.indent_level + 2 {
|
||||||
|
line_break.push_str(" ");
|
||||||
|
}
|
||||||
|
|
||||||
let has_positive_condition = large_char_set_ix.is_some() || !asserted_chars.is_empty();
|
let has_positive_condition = large_char_set_ix.is_some() || !asserted_chars.is_empty();
|
||||||
let has_negative_condition = !negated_chars.is_empty();
|
let has_negative_condition = !negated_chars.is_empty();
|
||||||
|
|
@ -992,7 +870,7 @@ impl Generator {
|
||||||
add!(
|
add!(
|
||||||
self,
|
self,
|
||||||
"set_contains({}, {}, lookahead)",
|
"set_contains({}, {}, lookahead)",
|
||||||
char_set_info.constant_name,
|
&char_set_info.constant_name,
|
||||||
large_set.range_count(),
|
large_set.range_count(),
|
||||||
);
|
);
|
||||||
if check_eof {
|
if check_eof {
|
||||||
|
|
@ -1057,6 +935,7 @@ impl Generator {
|
||||||
}
|
}
|
||||||
self.add_character(end);
|
self.add_character(end);
|
||||||
add!(self, ")");
|
add!(self, ")");
|
||||||
|
continue;
|
||||||
} else if end == start {
|
} else if end == start {
|
||||||
add!(self, "lookahead == ");
|
add!(self, "lookahead == ");
|
||||||
self.add_character(start);
|
self.add_character(start);
|
||||||
|
|
@ -1107,8 +986,21 @@ impl Generator {
|
||||||
|
|
||||||
add_line!(
|
add_line!(
|
||||||
self,
|
self,
|
||||||
"static const TSCharacterRange {}[] = {{",
|
"const TSCharacterRange {}[{}] = {{",
|
||||||
info.constant_name
|
info.constant_name,
|
||||||
|
characters.range_count()
|
||||||
|
);
|
||||||
|
|
||||||
|
self.header_buffer += &format!(
|
||||||
|
"extern const TSCharacterRange {}[{}];\n",
|
||||||
|
info.constant_name,
|
||||||
|
characters.range_count()
|
||||||
|
);
|
||||||
|
|
||||||
|
self.header_buffer += &format!(
|
||||||
|
"static const uint32_t {}_length = {};\n\n",
|
||||||
|
info.constant_name,
|
||||||
|
characters.range_count()
|
||||||
);
|
);
|
||||||
|
|
||||||
indent!(self);
|
indent!(self);
|
||||||
|
|
@ -1142,66 +1034,25 @@ impl Generator {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn add_lex_modes(&mut self) {
|
fn add_lex_modes_list(&mut self) {
|
||||||
add_line!(
|
add_line!(
|
||||||
self,
|
self,
|
||||||
"static const {} ts_lex_modes[STATE_COUNT] = {{",
|
"static const TSLexMode ts_lex_modes[STATE_COUNT] = {{"
|
||||||
if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
|
|
||||||
"TSLexerMode"
|
|
||||||
} else {
|
|
||||||
"TSLexMode"
|
|
||||||
}
|
|
||||||
);
|
);
|
||||||
indent!(self);
|
indent!(self);
|
||||||
for (i, state) in self.parse_table.states.iter().enumerate() {
|
for (i, state) in self.parse_table.states.iter().enumerate() {
|
||||||
add_whitespace!(self);
|
|
||||||
add!(self, "[{i}] = {{");
|
|
||||||
if state.is_end_of_non_terminal_extra() {
|
if state.is_end_of_non_terminal_extra() {
|
||||||
add!(self, "(TSStateId)(-1),");
|
add_line!(self, "[{i}] = {{(TSStateId)(-1)}},");
|
||||||
|
} else if state.external_lex_state_id > 0 {
|
||||||
|
add_line!(
|
||||||
|
self,
|
||||||
|
"[{i}] = {{.lex_state = {}, .external_lex_state = {}}},",
|
||||||
|
state.lex_state_id,
|
||||||
|
state.external_lex_state_id
|
||||||
|
);
|
||||||
} else {
|
} else {
|
||||||
add!(self, ".lex_state = {}", state.lex_state_id);
|
add_line!(self, "[{i}] = {{.lex_state = {}}},", state.lex_state_id);
|
||||||
|
|
||||||
if state.external_lex_state_id > 0 {
|
|
||||||
add!(
|
|
||||||
self,
|
|
||||||
", .external_lex_state = {}",
|
|
||||||
state.external_lex_state_id
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
|
|
||||||
let reserved_word_set_id = self.reserved_word_set_ids_by_parse_state[i];
|
|
||||||
if reserved_word_set_id != 0 {
|
|
||||||
add!(self, ", .reserved_word_set_id = {reserved_word_set_id}");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
add!(self, "}},\n");
|
|
||||||
}
|
|
||||||
dedent!(self);
|
|
||||||
add_line!(self, "}};");
|
|
||||||
add_line!(self, "");
|
|
||||||
}
|
|
||||||
|
|
||||||
fn add_reserved_word_sets(&mut self) {
|
|
||||||
add_line!(
|
|
||||||
self,
|
|
||||||
"static const TSSymbol ts_reserved_words[{}][MAX_RESERVED_WORD_SET_SIZE] = {{",
|
|
||||||
self.reserved_word_sets.len(),
|
|
||||||
);
|
|
||||||
indent!(self);
|
|
||||||
for (id, set) in self.reserved_word_sets.iter().enumerate() {
|
|
||||||
if id == 0 {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
add_line!(self, "[{id}] = {{");
|
|
||||||
indent!(self);
|
|
||||||
for token in set.iter() {
|
|
||||||
add_line!(self, "{},", self.symbol_ids[&token]);
|
|
||||||
}
|
|
||||||
dedent!(self);
|
|
||||||
add_line!(self, "}},");
|
|
||||||
}
|
}
|
||||||
dedent!(self);
|
dedent!(self);
|
||||||
add_line!(self, "}};");
|
add_line!(self, "}};");
|
||||||
|
|
@ -1255,7 +1106,7 @@ impl Generator {
|
||||||
indent!(self);
|
indent!(self);
|
||||||
for i in 0..self.parse_table.external_lex_states.len() {
|
for i in 0..self.parse_table.external_lex_states.len() {
|
||||||
if !self.parse_table.external_lex_states[i].is_empty() {
|
if !self.parse_table.external_lex_states[i].is_empty() {
|
||||||
add_line!(self, "[{i}] = {{");
|
add_line!(self, "[{}] = {{", i);
|
||||||
indent!(self);
|
indent!(self);
|
||||||
for token in self.parse_table.external_lex_states[i].iter() {
|
for token in self.parse_table.external_lex_states[i].iter() {
|
||||||
add_line!(
|
add_line!(
|
||||||
|
|
@ -1277,7 +1128,6 @@ impl Generator {
|
||||||
let mut parse_table_entries = HashMap::new();
|
let mut parse_table_entries = HashMap::new();
|
||||||
let mut next_parse_action_list_index = 0;
|
let mut next_parse_action_list_index = 0;
|
||||||
|
|
||||||
// Parse action lists zero is for the default value, when a symbol is not valid.
|
|
||||||
self.get_parse_action_list_id(
|
self.get_parse_action_list_id(
|
||||||
&ParseTableEntry {
|
&ParseTableEntry {
|
||||||
actions: Vec::new(),
|
actions: Vec::new(),
|
||||||
|
|
@ -1303,7 +1153,7 @@ impl Generator {
|
||||||
.enumerate()
|
.enumerate()
|
||||||
.take(self.large_state_count)
|
.take(self.large_state_count)
|
||||||
{
|
{
|
||||||
add_line!(self, "[STATE({i})] = {{");
|
add_line!(self, "[{i}] = {{");
|
||||||
indent!(self);
|
indent!(self);
|
||||||
|
|
||||||
// Ensure the entries are in a deterministic order, since they are
|
// Ensure the entries are in a deterministic order, since they are
|
||||||
|
|
@ -1335,11 +1185,9 @@ impl Generator {
|
||||||
);
|
);
|
||||||
add_line!(self, "[{}] = ACTIONS({entry_id}),", self.symbol_ids[symbol]);
|
add_line!(self, "[{}] = ACTIONS({entry_id}),", self.symbol_ids[symbol]);
|
||||||
}
|
}
|
||||||
|
|
||||||
dedent!(self);
|
dedent!(self);
|
||||||
add_line!(self, "}},");
|
add_line!(self, "}},");
|
||||||
}
|
}
|
||||||
|
|
||||||
dedent!(self);
|
dedent!(self);
|
||||||
add_line!(self, "}};");
|
add_line!(self, "}};");
|
||||||
add_line!(self, "");
|
add_line!(self, "");
|
||||||
|
|
@ -1348,16 +1196,11 @@ impl Generator {
|
||||||
add_line!(self, "static const uint16_t ts_small_parse_table[] = {{");
|
add_line!(self, "static const uint16_t ts_small_parse_table[] = {{");
|
||||||
indent!(self);
|
indent!(self);
|
||||||
|
|
||||||
let mut next_table_index = 0;
|
let mut index = 0;
|
||||||
let mut small_state_indices = Vec::with_capacity(
|
let mut small_state_indices = Vec::new();
|
||||||
self.parse_table
|
|
||||||
.states
|
|
||||||
.len()
|
|
||||||
.saturating_sub(self.large_state_count),
|
|
||||||
);
|
|
||||||
let mut symbols_by_value = HashMap::<(usize, SymbolType), Vec<Symbol>>::new();
|
let mut symbols_by_value = HashMap::<(usize, SymbolType), Vec<Symbol>>::new();
|
||||||
for state in self.parse_table.states.iter().skip(self.large_state_count) {
|
for state in self.parse_table.states.iter().skip(self.large_state_count) {
|
||||||
small_state_indices.push(next_table_index);
|
small_state_indices.push(index);
|
||||||
symbols_by_value.clear();
|
symbols_by_value.clear();
|
||||||
|
|
||||||
terminal_entries.clear();
|
terminal_entries.clear();
|
||||||
|
|
@ -1396,16 +1239,10 @@ impl Generator {
|
||||||
(symbols.len(), *kind, *value, symbols[0])
|
(symbols.len(), *kind, *value, symbols[0])
|
||||||
});
|
});
|
||||||
|
|
||||||
add_line!(
|
add_line!(self, "[{index}] = {},", values_with_symbols.len());
|
||||||
self,
|
|
||||||
"[{next_table_index}] = {},",
|
|
||||||
values_with_symbols.len()
|
|
||||||
);
|
|
||||||
indent!(self);
|
indent!(self);
|
||||||
next_table_index += 1;
|
|
||||||
|
|
||||||
for ((value, kind), symbols) in &mut values_with_symbols {
|
for ((value, kind), symbols) in &mut values_with_symbols {
|
||||||
next_table_index += 2 + symbols.len();
|
|
||||||
if *kind == SymbolType::NonTerminal {
|
if *kind == SymbolType::NonTerminal {
|
||||||
add_line!(self, "STATE({value}), {},", symbols.len());
|
add_line!(self, "STATE({value}), {},", symbols.len());
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -1421,6 +1258,11 @@ impl Generator {
|
||||||
}
|
}
|
||||||
|
|
||||||
dedent!(self);
|
dedent!(self);
|
||||||
|
|
||||||
|
index += 1 + values_with_symbols
|
||||||
|
.iter()
|
||||||
|
.map(|(_, symbols)| 2 + symbols.len())
|
||||||
|
.sum::<usize>();
|
||||||
}
|
}
|
||||||
|
|
||||||
dedent!(self);
|
dedent!(self);
|
||||||
|
|
@ -1549,7 +1391,7 @@ impl Generator {
|
||||||
indent!(self);
|
indent!(self);
|
||||||
add_line!(self, "static const TSLanguage language = {{");
|
add_line!(self, "static const TSLanguage language = {{");
|
||||||
indent!(self);
|
indent!(self);
|
||||||
add_line!(self, ".abi_version = LANGUAGE_VERSION,");
|
add_line!(self, ".version = LANGUAGE_VERSION,");
|
||||||
|
|
||||||
// Quantities
|
// Quantities
|
||||||
add_line!(self, ".symbol_count = SYMBOL_COUNT,");
|
add_line!(self, ".symbol_count = SYMBOL_COUNT,");
|
||||||
|
|
@ -1559,9 +1401,6 @@ impl Generator {
|
||||||
add_line!(self, ".state_count = STATE_COUNT,");
|
add_line!(self, ".state_count = STATE_COUNT,");
|
||||||
add_line!(self, ".large_state_count = LARGE_STATE_COUNT,");
|
add_line!(self, ".large_state_count = LARGE_STATE_COUNT,");
|
||||||
add_line!(self, ".production_id_count = PRODUCTION_ID_COUNT,");
|
add_line!(self, ".production_id_count = PRODUCTION_ID_COUNT,");
|
||||||
if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
|
|
||||||
add_line!(self, ".supertype_count = SUPERTYPE_COUNT,");
|
|
||||||
}
|
|
||||||
add_line!(self, ".field_count = FIELD_COUNT,");
|
add_line!(self, ".field_count = FIELD_COUNT,");
|
||||||
add_line!(
|
add_line!(
|
||||||
self,
|
self,
|
||||||
|
|
@ -1583,11 +1422,6 @@ impl Generator {
|
||||||
add_line!(self, ".field_map_slices = ts_field_map_slices,");
|
add_line!(self, ".field_map_slices = ts_field_map_slices,");
|
||||||
add_line!(self, ".field_map_entries = ts_field_map_entries,");
|
add_line!(self, ".field_map_entries = ts_field_map_entries,");
|
||||||
}
|
}
|
||||||
if !self.supertype_map.is_empty() && self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
|
|
||||||
add_line!(self, ".supertype_map_slices = ts_supertype_map_slices,");
|
|
||||||
add_line!(self, ".supertype_map_entries = ts_supertype_map_entries,");
|
|
||||||
add_line!(self, ".supertype_symbols = ts_supertype_symbols,");
|
|
||||||
}
|
|
||||||
add_line!(self, ".symbol_metadata = ts_symbol_metadata,");
|
add_line!(self, ".symbol_metadata = ts_symbol_metadata,");
|
||||||
add_line!(self, ".public_symbol_map = ts_symbol_map,");
|
add_line!(self, ".public_symbol_map = ts_symbol_map,");
|
||||||
add_line!(self, ".alias_map = ts_non_terminal_alias_map,");
|
add_line!(self, ".alias_map = ts_non_terminal_alias_map,");
|
||||||
|
|
@ -1596,9 +1430,9 @@ impl Generator {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Lexing
|
// Lexing
|
||||||
add_line!(self, ".lex_modes = (const void*)ts_lex_modes,");
|
add_line!(self, ".lex_modes = ts_lex_modes,");
|
||||||
add_line!(self, ".lex_fn = ts_lex,");
|
add_line!(self, ".lex_fn = ts_lex,");
|
||||||
if let Some(keyword_capture_token) = self.syntax_grammar.word_token {
|
if let Some(keyword_capture_token) = self.keyword_capture_token {
|
||||||
add_line!(self, ".keyword_lex_fn = ts_lex_keywords,");
|
add_line!(self, ".keyword_lex_fn = ts_lex_keywords,");
|
||||||
add_line!(
|
add_line!(
|
||||||
self,
|
self,
|
||||||
|
|
@ -1623,40 +1457,8 @@ impl Generator {
|
||||||
|
|
||||||
add_line!(self, ".primary_state_ids = ts_primary_state_ids,");
|
add_line!(self, ".primary_state_ids = ts_primary_state_ids,");
|
||||||
|
|
||||||
if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
|
if self.abi_version >= ABI_VERSION_WITH_METADATA {
|
||||||
add_line!(self, ".name = \"{}\",", self.language_name);
|
add_line!(self, ".name = \"{}\",", self.language_name);
|
||||||
|
|
||||||
if self.reserved_word_sets.len() > 1 {
|
|
||||||
add_line!(self, ".reserved_words = &ts_reserved_words[0][0],");
|
|
||||||
}
|
|
||||||
|
|
||||||
add_line!(
|
|
||||||
self,
|
|
||||||
".max_reserved_word_set_size = {},",
|
|
||||||
self.reserved_word_sets
|
|
||||||
.iter()
|
|
||||||
.map(TokenSet::len)
|
|
||||||
.max()
|
|
||||||
.unwrap()
|
|
||||||
);
|
|
||||||
|
|
||||||
let Some(metadata) = &self.metadata else {
|
|
||||||
panic!(
|
|
||||||
indoc! {"
|
|
||||||
Metadata is required to generate ABI version {}.
|
|
||||||
This means that your grammar doesn't have a tree-sitter.json config file with an appropriate version field in the metadata table.
|
|
||||||
"},
|
|
||||||
self.abi_version
|
|
||||||
);
|
|
||||||
};
|
|
||||||
|
|
||||||
add_line!(self, ".metadata = {{");
|
|
||||||
indent!(self);
|
|
||||||
add_line!(self, ".major_version = {},", metadata.major_version);
|
|
||||||
add_line!(self, ".minor_version = {},", metadata.minor_version);
|
|
||||||
add_line!(self, ".patch_version = {},", metadata.patch_version);
|
|
||||||
dedent!(self);
|
|
||||||
add_line!(self, "}},");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
dedent!(self);
|
dedent!(self);
|
||||||
|
|
@ -1758,23 +1560,6 @@ impl Generator {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn symbols_for_alias(&self, alias: &Alias) -> Vec<Symbol> {
|
|
||||||
self.parse_table
|
|
||||||
.symbols
|
|
||||||
.iter()
|
|
||||||
.copied()
|
|
||||||
.filter(move |symbol| {
|
|
||||||
self.default_aliases.get(symbol).map_or_else(
|
|
||||||
|| {
|
|
||||||
let (name, kind) = self.metadata_for_symbol(*symbol);
|
|
||||||
name == alias.value && kind == alias.kind()
|
|
||||||
},
|
|
||||||
|default_alias| default_alias == alias,
|
|
||||||
)
|
|
||||||
})
|
|
||||||
.collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn sanitize_identifier(&self, name: &str) -> String {
|
fn sanitize_identifier(&self, name: &str) -> String {
|
||||||
let mut result = String::with_capacity(name.len());
|
let mut result = String::with_capacity(name.len());
|
||||||
for c in name.chars() {
|
for c in name.chars() {
|
||||||
|
|
@ -1850,11 +1635,11 @@ impl Generator {
|
||||||
'\u{007F}' => "DEL",
|
'\u{007F}' => "DEL",
|
||||||
'\u{FEFF}' => "BOM",
|
'\u{FEFF}' => "BOM",
|
||||||
'\u{0080}'..='\u{FFFF}' => {
|
'\u{0080}'..='\u{FFFF}' => {
|
||||||
write!(result, "u{:04x}", c as u32).unwrap();
|
result.push_str(&format!("u{:04x}", c as u32));
|
||||||
break 'special_chars;
|
break 'special_chars;
|
||||||
}
|
}
|
||||||
'\u{10000}'..='\u{10FFFF}' => {
|
'\u{10000}'..='\u{10FFFF}' => {
|
||||||
write!(result, "U{:08x}", c as u32).unwrap();
|
result.push_str(&format!("U{:08x}", c as u32));
|
||||||
break 'special_chars;
|
break 'special_chars;
|
||||||
}
|
}
|
||||||
'0'..='9' | 'a'..='z' | 'A'..='Z' | '_' => unreachable!(),
|
'0'..='9' | 'a'..='z' | 'A'..='Z' | '_' => unreachable!(),
|
||||||
|
|
@ -1885,9 +1670,11 @@ impl Generator {
|
||||||
'\r' => result += "\\r",
|
'\r' => result += "\\r",
|
||||||
'\t' => result += "\\t",
|
'\t' => result += "\\t",
|
||||||
'\0' => result += "\\0",
|
'\0' => result += "\\0",
|
||||||
'\u{0001}'..='\u{001f}' => write!(result, "\\x{:02x}", c as u32).unwrap(),
|
'\u{0001}'..='\u{001f}' => result += &format!("\\x{:02x}", c as u32),
|
||||||
'\u{007F}'..='\u{FFFF}' => write!(result, "\\u{:04x}", c as u32).unwrap(),
|
'\u{007F}'..='\u{FFFF}' => result += &format!("\\u{:04x}", c as u32),
|
||||||
'\u{10000}'..='\u{10FFFF}' => write!(result, "\\U{:08x}", c as u32).unwrap(),
|
'\u{10000}'..='\u{10FFFF}' => {
|
||||||
|
result.push_str(&format!("\\U{:08x}", c as u32));
|
||||||
|
}
|
||||||
_ => result.push(c),
|
_ => result.push(c),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1940,32 +1727,34 @@ pub fn render_c_code(
|
||||||
lexical_grammar: LexicalGrammar,
|
lexical_grammar: LexicalGrammar,
|
||||||
default_aliases: AliasMap,
|
default_aliases: AliasMap,
|
||||||
abi_version: usize,
|
abi_version: usize,
|
||||||
semantic_version: Option<(u8, u8, u8)>,
|
) -> (String, String) {
|
||||||
supertype_symbol_map: BTreeMap<Symbol, Vec<ChildType>>,
|
|
||||||
) -> String {
|
|
||||||
assert!(
|
assert!(
|
||||||
(ABI_VERSION_MIN..=ABI_VERSION_MAX).contains(&abi_version),
|
(ABI_VERSION_MIN..=ABI_VERSION_MAX).contains(&abi_version),
|
||||||
"This version of Tree-sitter can only generate parsers with ABI version {ABI_VERSION_MIN} - {ABI_VERSION_MAX}, not {abi_version}",
|
"This version of Tree-sitter can only generate parsers with ABI version {ABI_VERSION_MIN} - {ABI_VERSION_MAX}, not {abi_version}",
|
||||||
);
|
);
|
||||||
|
|
||||||
Generator {
|
Generator {
|
||||||
|
buffer: String::new(),
|
||||||
|
header_buffer: String::new(),
|
||||||
|
indent_level: 0,
|
||||||
language_name: name.to_string(),
|
language_name: name.to_string(),
|
||||||
|
large_state_count: 0,
|
||||||
parse_table: tables.parse_table,
|
parse_table: tables.parse_table,
|
||||||
main_lex_table: tables.main_lex_table,
|
main_lex_table: tables.main_lex_table,
|
||||||
keyword_lex_table: tables.keyword_lex_table,
|
keyword_lex_table: tables.keyword_lex_table,
|
||||||
|
keyword_capture_token: tables.word_token,
|
||||||
large_character_sets: tables.large_character_sets,
|
large_character_sets: tables.large_character_sets,
|
||||||
large_character_set_info: Vec::new(),
|
large_character_set_info: Vec::new(),
|
||||||
syntax_grammar,
|
syntax_grammar,
|
||||||
lexical_grammar,
|
lexical_grammar,
|
||||||
default_aliases,
|
default_aliases,
|
||||||
|
symbol_ids: HashMap::new(),
|
||||||
|
symbol_order: HashMap::new(),
|
||||||
|
alias_ids: HashMap::new(),
|
||||||
|
symbol_map: HashMap::new(),
|
||||||
|
unique_aliases: Vec::new(),
|
||||||
|
field_names: Vec::new(),
|
||||||
abi_version,
|
abi_version,
|
||||||
metadata: semantic_version.map(|(major_version, minor_version, patch_version)| Metadata {
|
|
||||||
major_version,
|
|
||||||
minor_version,
|
|
||||||
patch_version,
|
|
||||||
}),
|
|
||||||
supertype_symbol_map,
|
|
||||||
..Default::default()
|
|
||||||
}
|
}
|
||||||
.generate()
|
.generate()
|
||||||
}
|
}
|
||||||
|
|
@ -1,11 +1,10 @@
|
||||||
use std::{collections::BTreeMap, fmt};
|
use std::{collections::HashMap, fmt};
|
||||||
|
|
||||||
use serde::Serialize;
|
|
||||||
use smallbitvec::SmallBitVec;
|
use smallbitvec::SmallBitVec;
|
||||||
|
|
||||||
use super::grammars::VariableType;
|
use super::grammars::VariableType;
|
||||||
|
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||||
pub enum SymbolType {
|
pub enum SymbolType {
|
||||||
External,
|
External,
|
||||||
End,
|
End,
|
||||||
|
|
@ -14,19 +13,19 @@ pub enum SymbolType {
|
||||||
NonTerminal,
|
NonTerminal,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||||
pub enum Associativity {
|
pub enum Associativity {
|
||||||
Left,
|
Left,
|
||||||
Right,
|
Right,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
|
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||||
pub struct Alias {
|
pub struct Alias {
|
||||||
pub value: String,
|
pub value: String,
|
||||||
pub is_named: bool,
|
pub is_named: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Default, Serialize)]
|
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
|
||||||
pub enum Precedence {
|
pub enum Precedence {
|
||||||
#[default]
|
#[default]
|
||||||
None,
|
None,
|
||||||
|
|
@ -34,50 +33,48 @@ pub enum Precedence {
|
||||||
Name(String),
|
Name(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type AliasMap = BTreeMap<Symbol, Alias>;
|
pub type AliasMap = HashMap<Symbol, Alias>;
|
||||||
|
|
||||||
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, Serialize)]
|
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
|
||||||
pub struct MetadataParams {
|
pub struct MetadataParams {
|
||||||
pub precedence: Precedence,
|
pub precedence: Precedence,
|
||||||
pub dynamic_precedence: i32,
|
pub dynamic_precedence: i32,
|
||||||
pub associativity: Option<Associativity>,
|
pub associativity: Option<Associativity>,
|
||||||
pub is_token: bool,
|
pub is_token: bool,
|
||||||
|
pub is_string: bool,
|
||||||
|
pub is_active: bool,
|
||||||
pub is_main_token: bool,
|
pub is_main_token: bool,
|
||||||
pub alias: Option<Alias>,
|
pub alias: Option<Alias>,
|
||||||
pub field_name: Option<String>,
|
pub field_name: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||||
pub struct Symbol {
|
pub struct Symbol {
|
||||||
pub kind: SymbolType,
|
pub kind: SymbolType,
|
||||||
pub index: usize,
|
pub index: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize)]
|
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||||
pub enum Rule {
|
pub enum Rule {
|
||||||
Blank,
|
Blank,
|
||||||
String(String),
|
String(String),
|
||||||
Pattern(String, String),
|
Pattern(String, String),
|
||||||
NamedSymbol(String),
|
NamedSymbol(String),
|
||||||
Symbol(Symbol),
|
Symbol(Symbol),
|
||||||
Choice(Vec<Self>),
|
Choice(Vec<Rule>),
|
||||||
Metadata {
|
Metadata {
|
||||||
params: MetadataParams,
|
params: MetadataParams,
|
||||||
rule: Box<Self>,
|
rule: Box<Rule>,
|
||||||
},
|
|
||||||
Repeat(Box<Self>),
|
|
||||||
Seq(Vec<Self>),
|
|
||||||
Reserved {
|
|
||||||
rule: Box<Self>,
|
|
||||||
context_name: String,
|
|
||||||
},
|
},
|
||||||
|
Repeat(Box<Rule>),
|
||||||
|
Seq(Vec<Rule>),
|
||||||
}
|
}
|
||||||
|
|
||||||
// Because tokens are represented as small (~400 max) unsigned integers,
|
// Because tokens are represented as small (~400 max) unsigned integers,
|
||||||
// sets of tokens can be efficiently represented as bit vectors with each
|
// sets of tokens can be efficiently represented as bit vectors with each
|
||||||
// index corresponding to a token, and each value representing whether or not
|
// index corresponding to a token, and each value representing whether or not
|
||||||
// the token is present in the set.
|
// the token is present in the set.
|
||||||
#[derive(Default, Clone, PartialEq, Eq, Hash)]
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||||
pub struct TokenSet {
|
pub struct TokenSet {
|
||||||
terminal_bits: SmallBitVec,
|
terminal_bits: SmallBitVec,
|
||||||
external_bits: SmallBitVec,
|
external_bits: SmallBitVec,
|
||||||
|
|
@ -85,32 +82,6 @@ pub struct TokenSet {
|
||||||
end_of_nonterminal_extra: bool,
|
end_of_nonterminal_extra: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Debug for TokenSet {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
f.debug_list().entries(self.iter()).finish()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl PartialOrd for TokenSet {
|
|
||||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
|
||||||
Some(self.cmp(other))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Ord for TokenSet {
|
|
||||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
|
||||||
self.terminal_bits
|
|
||||||
.iter()
|
|
||||||
.cmp(other.terminal_bits.iter())
|
|
||||||
.then_with(|| self.external_bits.iter().cmp(other.external_bits.iter()))
|
|
||||||
.then_with(|| self.eof.cmp(&other.eof))
|
|
||||||
.then_with(|| {
|
|
||||||
self.end_of_nonterminal_extra
|
|
||||||
.cmp(&other.end_of_nonterminal_extra)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Rule {
|
impl Rule {
|
||||||
pub fn field(name: String, content: Self) -> Self {
|
pub fn field(name: String, content: Self) -> Self {
|
||||||
add_metadata(content, move |params| {
|
add_metadata(content, move |params| {
|
||||||
|
|
@ -183,9 +154,7 @@ impl Rule {
|
||||||
match self {
|
match self {
|
||||||
Self::Blank | Self::Pattern(..) | Self::NamedSymbol(_) | Self::Symbol(_) => false,
|
Self::Blank | Self::Pattern(..) | Self::NamedSymbol(_) | Self::Symbol(_) => false,
|
||||||
Self::String(string) => string.is_empty(),
|
Self::String(string) => string.is_empty(),
|
||||||
Self::Metadata { rule, .. } | Self::Repeat(rule) | Self::Reserved { rule, .. } => {
|
Self::Metadata { rule, .. } | Self::Repeat(rule) => rule.is_empty(),
|
||||||
rule.is_empty()
|
|
||||||
}
|
|
||||||
Self::Choice(rules) => rules.iter().any(Self::is_empty),
|
Self::Choice(rules) => rules.iter().any(Self::is_empty),
|
||||||
Self::Seq(rules) => rules.iter().all(Self::is_empty),
|
Self::Seq(rules) => rules.iter().all(Self::is_empty),
|
||||||
}
|
}
|
||||||
|
|
@ -306,6 +275,7 @@ impl Symbol {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<Symbol> for Rule {
|
impl From<Symbol> for Rule {
|
||||||
|
#[must_use]
|
||||||
fn from(symbol: Symbol) -> Self {
|
fn from(symbol: Symbol) -> Self {
|
||||||
Self::Symbol(symbol)
|
Self::Symbol(symbol)
|
||||||
}
|
}
|
||||||
|
|
@ -424,9 +394,6 @@ impl TokenSet {
|
||||||
};
|
};
|
||||||
if other.index < vec.len() && vec[other.index] {
|
if other.index < vec.len() && vec[other.index] {
|
||||||
vec.set(other.index, false);
|
vec.set(other.index, false);
|
||||||
while vec.last() == Some(false) {
|
|
||||||
vec.pop();
|
|
||||||
}
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
false
|
false
|
||||||
|
|
@ -439,13 +406,6 @@ impl TokenSet {
|
||||||
&& !self.external_bits.iter().any(|a| a)
|
&& !self.external_bits.iter().any(|a| a)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn len(&self) -> usize {
|
|
||||||
self.eof as usize
|
|
||||||
+ self.end_of_nonterminal_extra as usize
|
|
||||||
+ self.terminal_bits.iter().filter(|b| *b).count()
|
|
||||||
+ self.external_bits.iter().filter(|b| *b).count()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn insert_all_terminals(&mut self, other: &Self) -> bool {
|
pub fn insert_all_terminals(&mut self, other: &Self) -> bool {
|
||||||
let mut result = false;
|
let mut result = false;
|
||||||
if other.terminal_bits.len() > self.terminal_bits.len() {
|
if other.terminal_bits.len() > self.terminal_bits.len() {
|
||||||
|
|
@ -47,7 +47,6 @@ pub struct ParseState {
|
||||||
pub id: ParseStateId,
|
pub id: ParseStateId,
|
||||||
pub terminal_entries: IndexMap<Symbol, ParseTableEntry, BuildHasherDefault<FxHasher>>,
|
pub terminal_entries: IndexMap<Symbol, ParseTableEntry, BuildHasherDefault<FxHasher>>,
|
||||||
pub nonterminal_entries: IndexMap<Symbol, GotoAction, BuildHasherDefault<FxHasher>>,
|
pub nonterminal_entries: IndexMap<Symbol, GotoAction, BuildHasherDefault<FxHasher>>,
|
||||||
pub reserved_words: TokenSet,
|
|
||||||
pub lex_state_id: usize,
|
pub lex_state_id: usize,
|
||||||
pub external_lex_state_id: usize,
|
pub external_lex_state_id: usize,
|
||||||
pub core_id: usize,
|
pub core_id: usize,
|
||||||
|
|
@ -65,7 +64,7 @@ pub struct ProductionInfo {
|
||||||
pub field_map: BTreeMap<String, Vec<FieldLocation>>,
|
pub field_map: BTreeMap<String, Vec<FieldLocation>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Default, PartialEq, Eq)]
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
pub struct ParseTable {
|
pub struct ParseTable {
|
||||||
pub states: Vec<ParseState>,
|
pub states: Vec<ParseState>,
|
||||||
pub symbols: Vec<Symbol>,
|
pub symbols: Vec<Symbol>,
|
||||||
|
|
@ -8,7 +8,6 @@ rust-version.workspace = true
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
homepage.workspace = true
|
homepage.workspace = true
|
||||||
repository.workspace = true
|
repository.workspace = true
|
||||||
documentation = "https://docs.rs/tree-sitter-loader"
|
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
keywords.workspace = true
|
keywords.workspace = true
|
||||||
categories.workspace = true
|
categories.workspace = true
|
||||||
|
|
@ -17,30 +16,31 @@ categories.workspace = true
|
||||||
all-features = true
|
all-features = true
|
||||||
rustdoc-args = ["--cfg", "docsrs"]
|
rustdoc-args = ["--cfg", "docsrs"]
|
||||||
|
|
||||||
[lib]
|
|
||||||
path = "src/loader.rs"
|
|
||||||
|
|
||||||
[lints]
|
[lints]
|
||||||
workspace = true
|
workspace = true
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
wasm = ["tree-sitter/wasm"]
|
wasm = ["tree-sitter/wasm"]
|
||||||
|
# TODO: For backward compatibility these must be enabled by default,
|
||||||
|
# consider removing for the next semver incompatible release
|
||||||
default = ["tree-sitter-highlight", "tree-sitter-tags"]
|
default = ["tree-sitter-highlight", "tree-sitter-tags"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
anyhow.workspace = true
|
||||||
cc.workspace = true
|
cc.workspace = true
|
||||||
etcetera.workspace = true
|
dirs.workspace = true
|
||||||
fs4.workspace = true
|
fs4.workspace = true
|
||||||
indoc.workspace = true
|
indoc.workspace = true
|
||||||
|
lazy_static.workspace = true
|
||||||
libloading.workspace = true
|
libloading.workspace = true
|
||||||
log.workspace = true
|
|
||||||
once_cell.workspace = true
|
once_cell.workspace = true
|
||||||
|
path-slash.workspace = true
|
||||||
regex.workspace = true
|
regex.workspace = true
|
||||||
semver.workspace = true
|
semver.workspace = true
|
||||||
serde.workspace = true
|
serde.workspace = true
|
||||||
serde_json.workspace = true
|
serde_json.workspace = true
|
||||||
tempfile.workspace = true
|
tempfile.workspace = true
|
||||||
thiserror.workspace = true
|
url.workspace = true
|
||||||
|
|
||||||
tree-sitter = { workspace = true }
|
tree-sitter = { workspace = true }
|
||||||
tree-sitter-highlight = { workspace = true, optional = true }
|
tree-sitter-highlight = { workspace = true, optional = true }
|
||||||
|
|
@ -7,4 +7,7 @@ fn main() {
|
||||||
"cargo:rustc-env=BUILD_HOST={}",
|
"cargo:rustc-env=BUILD_HOST={}",
|
||||||
std::env::var("HOST").unwrap()
|
std::env::var("HOST").unwrap()
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let emscripten_version = std::fs::read_to_string("emscripten-version").unwrap();
|
||||||
|
println!("cargo:rustc-env=EMSCRIPTEN_VERSION={emscripten_version}");
|
||||||
}
|
}
|
||||||
1
cli/loader/emscripten-version
Normal file
1
cli/loader/emscripten-version
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
3.1.64
|
||||||
File diff suppressed because it is too large
Load diff
46
crates/cli/npm/dsl.d.ts → cli/npm/dsl.d.ts
vendored
46
crates/cli/npm/dsl.d.ts → cli/npm/dsl.d.ts
vendored
|
|
@ -10,7 +10,6 @@ type PrecRightRule = { type: 'PREC_RIGHT'; content: Rule; value: number };
|
||||||
type PrecRule = { type: 'PREC'; content: Rule; value: number };
|
type PrecRule = { type: 'PREC'; content: Rule; value: number };
|
||||||
type Repeat1Rule = { type: 'REPEAT1'; content: Rule };
|
type Repeat1Rule = { type: 'REPEAT1'; content: Rule };
|
||||||
type RepeatRule = { type: 'REPEAT'; content: Rule };
|
type RepeatRule = { type: 'REPEAT'; content: Rule };
|
||||||
type ReservedRule = { type: 'RESERVED'; content: Rule; context_name: string };
|
|
||||||
type SeqRule = { type: 'SEQ'; members: Rule[] };
|
type SeqRule = { type: 'SEQ'; members: Rule[] };
|
||||||
type StringRule = { type: 'STRING'; value: string };
|
type StringRule = { type: 'STRING'; value: string };
|
||||||
type SymbolRule<Name extends string> = { type: 'SYMBOL'; name: Name };
|
type SymbolRule<Name extends string> = { type: 'SYMBOL'; name: Name };
|
||||||
|
|
@ -29,19 +28,12 @@ type Rule =
|
||||||
| PrecRule
|
| PrecRule
|
||||||
| Repeat1Rule
|
| Repeat1Rule
|
||||||
| RepeatRule
|
| RepeatRule
|
||||||
| ReservedRule
|
|
||||||
| SeqRule
|
| SeqRule
|
||||||
| StringRule
|
| StringRule
|
||||||
| SymbolRule<string>
|
| SymbolRule<string>
|
||||||
| TokenRule;
|
| TokenRule;
|
||||||
|
|
||||||
declare class RustRegex {
|
type RuleOrLiteral = Rule | RegExp | string;
|
||||||
value: string;
|
|
||||||
|
|
||||||
constructor(pattern: string);
|
|
||||||
}
|
|
||||||
|
|
||||||
type RuleOrLiteral = Rule | RegExp | RustRegex | string;
|
|
||||||
|
|
||||||
type GrammarSymbols<RuleName extends string> = {
|
type GrammarSymbols<RuleName extends string> = {
|
||||||
[name in RuleName]: SymbolRule<name>;
|
[name in RuleName]: SymbolRule<name>;
|
||||||
|
|
@ -113,7 +105,7 @@ interface Grammar<
|
||||||
* @param $ grammar rules
|
* @param $ grammar rules
|
||||||
* @param previous array of externals from the base schema, if any
|
* @param previous array of externals from the base schema, if any
|
||||||
*
|
*
|
||||||
* @see https://tree-sitter.github.io/tree-sitter/creating-parsers/4-external-scanners
|
* @see https://tree-sitter.github.io/tree-sitter/creating-parsers#external-scanners
|
||||||
*/
|
*/
|
||||||
externals?: (
|
externals?: (
|
||||||
$: Record<string, SymbolRule<string>>,
|
$: Record<string, SymbolRule<string>>,
|
||||||
|
|
@ -151,7 +143,7 @@ interface Grammar<
|
||||||
*
|
*
|
||||||
* @param $ grammar rules
|
* @param $ grammar rules
|
||||||
*
|
*
|
||||||
* @see https://tree-sitter.github.io/tree-sitter/using-parsers/6-static-node-types
|
* @see https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
|
||||||
*/
|
*/
|
||||||
supertypes?: (
|
supertypes?: (
|
||||||
$: GrammarSymbols<RuleName | BaseGrammarRuleName>,
|
$: GrammarSymbols<RuleName | BaseGrammarRuleName>,
|
||||||
|
|
@ -164,20 +156,9 @@ interface Grammar<
|
||||||
*
|
*
|
||||||
* @param $ grammar rules
|
* @param $ grammar rules
|
||||||
*
|
*
|
||||||
* @see https://tree-sitter.github.io/tree-sitter/creating-parsers/3-writing-the-grammar#keyword-extraction
|
* @see https://tree-sitter.github.io/tree-sitter/creating-parsers#keyword-extraction
|
||||||
*/
|
*/
|
||||||
word?: ($: GrammarSymbols<RuleName | BaseGrammarRuleName>) => RuleOrLiteral;
|
word?: ($: GrammarSymbols<RuleName | BaseGrammarRuleName>) => RuleOrLiteral;
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Mapping of names to reserved word sets. The first reserved word set is the
|
|
||||||
* global word set, meaning it applies to every rule in every parse state.
|
|
||||||
* The other word sets can be used with the `reserved` function.
|
|
||||||
*/
|
|
||||||
reserved?: Record<
|
|
||||||
string,
|
|
||||||
($: GrammarSymbols<RuleName | BaseGrammarRuleName>) => RuleOrLiteral[]
|
|
||||||
>;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type GrammarSchema<RuleName extends string> = {
|
type GrammarSchema<RuleName extends string> = {
|
||||||
|
|
@ -262,7 +243,7 @@ declare function optional(rule: RuleOrLiteral): ChoiceRule;
|
||||||
* @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
|
* @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
|
||||||
*/
|
*/
|
||||||
declare const prec: {
|
declare const prec: {
|
||||||
(value: string | number, rule: RuleOrLiteral): PrecRule;
|
(value: String | number, rule: RuleOrLiteral): PrecRule;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Marks the given rule as left-associative (and optionally applies a
|
* Marks the given rule as left-associative (and optionally applies a
|
||||||
|
|
@ -278,7 +259,7 @@ declare const prec: {
|
||||||
* @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
|
* @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
|
||||||
*/
|
*/
|
||||||
left(rule: RuleOrLiteral): PrecLeftRule;
|
left(rule: RuleOrLiteral): PrecLeftRule;
|
||||||
left(value: string | number, rule: RuleOrLiteral): PrecLeftRule;
|
left(value: String | number, rule: RuleOrLiteral): PrecLeftRule;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Marks the given rule as right-associative (and optionally applies a
|
* Marks the given rule as right-associative (and optionally applies a
|
||||||
|
|
@ -294,7 +275,7 @@ declare const prec: {
|
||||||
* @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
|
* @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
|
||||||
*/
|
*/
|
||||||
right(rule: RuleOrLiteral): PrecRightRule;
|
right(rule: RuleOrLiteral): PrecRightRule;
|
||||||
right(value: string | number, rule: RuleOrLiteral): PrecRightRule;
|
right(value: String | number, rule: RuleOrLiteral): PrecRightRule;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Marks the given rule with a numerical precedence which will be used to
|
* Marks the given rule with a numerical precedence which will be used to
|
||||||
|
|
@ -311,7 +292,7 @@ declare const prec: {
|
||||||
*
|
*
|
||||||
* @see https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html
|
* @see https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html
|
||||||
*/
|
*/
|
||||||
dynamic(value: string | number, rule: RuleOrLiteral): PrecDynamicRule;
|
dynamic(value: String | number, rule: RuleOrLiteral): PrecDynamicRule;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -331,15 +312,6 @@ declare function repeat(rule: RuleOrLiteral): RepeatRule;
|
||||||
*/
|
*/
|
||||||
declare function repeat1(rule: RuleOrLiteral): Repeat1Rule;
|
declare function repeat1(rule: RuleOrLiteral): Repeat1Rule;
|
||||||
|
|
||||||
/**
|
|
||||||
* Overrides the global reserved word set for a given rule. The word set name
|
|
||||||
* should be defined in the `reserved` field in the grammar.
|
|
||||||
*
|
|
||||||
* @param wordset name of the reserved word set
|
|
||||||
* @param rule rule that will use the reserved word set
|
|
||||||
*/
|
|
||||||
declare function reserved(wordset: string, rule: RuleOrLiteral): ReservedRule;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a rule that matches any number of other rules, one after another.
|
* Creates a rule that matches any number of other rules, one after another.
|
||||||
* It is analogous to simply writing multiple symbols next to each other
|
* It is analogous to simply writing multiple symbols next to each other
|
||||||
|
|
@ -358,7 +330,7 @@ declare function sym<Name extends string>(name: Name): SymbolRule<Name>;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Marks the given rule as producing only a single token. Tree-sitter's
|
* Marks the given rule as producing only a single token. Tree-sitter's
|
||||||
* default is to treat each string or RegExp literal in the grammar as a
|
* default is to treat each String or RegExp literal in the grammar as a
|
||||||
* separate token. Each token is matched separately by the lexer and
|
* separate token. Each token is matched separately by the lexer and
|
||||||
* returned as its own leaf node in the tree. The token function allows
|
* returned as its own leaf node in the tree. The token function allows
|
||||||
* you to express a complex rule using the DSL functions (rather
|
* you to express a complex rule using the DSL functions (rather
|
||||||
3
crates/cli/npm/install.js → cli/npm/install.js
Normal file → Executable file
3
crates/cli/npm/install.js → cli/npm/install.js
Normal file → Executable file
|
|
@ -6,8 +6,7 @@ const http = require('http');
|
||||||
const https = require('https');
|
const https = require('https');
|
||||||
const packageJSON = require('./package.json');
|
const packageJSON = require('./package.json');
|
||||||
|
|
||||||
https.globalAgent.keepAlive = false;
|
// Look to a results table in https://github.com/tree-sitter/tree-sitter/issues/2196
|
||||||
|
|
||||||
const matrix = {
|
const matrix = {
|
||||||
platform: {
|
platform: {
|
||||||
'darwin': {
|
'darwin': {
|
||||||
|
|
@ -1,33 +1,24 @@
|
||||||
{
|
{
|
||||||
"name": "tree-sitter-cli",
|
"name": "tree-sitter-cli",
|
||||||
"version": "0.27.0",
|
"version": "0.25.0",
|
||||||
"author": {
|
"author": "Max Brunsfeld",
|
||||||
"name": "Max Brunsfeld",
|
|
||||||
"email": "maxbrunsfeld@gmail.com"
|
|
||||||
},
|
|
||||||
"maintainers": [
|
|
||||||
{
|
|
||||||
"name": "Amaan Qureshi",
|
|
||||||
"email": "amaanq12@gmail.com"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"repository": {
|
"repository": {
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "git+https://github.com/tree-sitter/tree-sitter.git",
|
"url": "https://github.com/tree-sitter/tree-sitter.git"
|
||||||
"directory": "crates/cli/npm"
|
|
||||||
},
|
},
|
||||||
"description": "CLI for generating fast incremental parsers",
|
"description": "CLI for generating fast incremental parsers",
|
||||||
"keywords": [
|
"keywords": [
|
||||||
"parser",
|
"parser",
|
||||||
"lexer"
|
"lexer"
|
||||||
],
|
],
|
||||||
|
"main": "lib/api/index.js",
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=12.0.0"
|
"node": ">=12.0.0"
|
||||||
},
|
},
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"install": "node install.js",
|
"install": "node install.js",
|
||||||
"prepack": "cp ../../../LICENSE ../README.md .",
|
"prepack": "cp ../../LICENSE ../README.md .",
|
||||||
"postpack": "rm LICENSE README.md"
|
"postpack": "rm LICENSE README.md"
|
||||||
},
|
},
|
||||||
"bin": {
|
"bin": {
|
||||||
|
|
@ -40,11 +40,7 @@ extern "C" {
|
||||||
fn free(ptr: *mut c_void);
|
fn free(ptr: *mut c_void);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn record<T>(f: impl FnOnce() -> T) -> T {
|
pub fn record<T>(f: impl FnOnce() -> T) -> Result<T, String> {
|
||||||
record_checked(f).unwrap()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn record_checked<T>(f: impl FnOnce() -> T) -> Result<T, String> {
|
|
||||||
RECORDER.with(|recorder| {
|
RECORDER.with(|recorder| {
|
||||||
recorder.enabled.store(true, SeqCst);
|
recorder.enabled.store(true, SeqCst);
|
||||||
recorder.allocation_count.store(0, SeqCst);
|
recorder.allocation_count.store(0, SeqCst);
|
||||||
|
|
@ -97,49 +93,30 @@ fn record_dealloc(ptr: *mut c_void) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/// # Safety
|
unsafe extern "C" fn ts_record_malloc(size: usize) -> *mut c_void {
|
||||||
///
|
|
||||||
/// The caller must ensure that the returned pointer is eventually
|
|
||||||
/// freed by calling `ts_record_free`.
|
|
||||||
#[must_use]
|
|
||||||
pub unsafe extern "C" fn ts_record_malloc(size: usize) -> *mut c_void {
|
|
||||||
let result = malloc(size);
|
let result = malloc(size);
|
||||||
record_alloc(result);
|
record_alloc(result);
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
/// # Safety
|
unsafe extern "C" fn ts_record_calloc(count: usize, size: usize) -> *mut c_void {
|
||||||
///
|
|
||||||
/// The caller must ensure that the returned pointer is eventually
|
|
||||||
/// freed by calling `ts_record_free`.
|
|
||||||
#[must_use]
|
|
||||||
pub unsafe extern "C" fn ts_record_calloc(count: usize, size: usize) -> *mut c_void {
|
|
||||||
let result = calloc(count, size);
|
let result = calloc(count, size);
|
||||||
record_alloc(result);
|
record_alloc(result);
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
/// # Safety
|
unsafe extern "C" fn ts_record_realloc(ptr: *mut c_void, size: usize) -> *mut c_void {
|
||||||
///
|
|
||||||
/// The caller must ensure that the returned pointer is eventually
|
|
||||||
/// freed by calling `ts_record_free`.
|
|
||||||
#[must_use]
|
|
||||||
pub unsafe extern "C" fn ts_record_realloc(ptr: *mut c_void, size: usize) -> *mut c_void {
|
|
||||||
let result = realloc(ptr, size);
|
let result = realloc(ptr, size);
|
||||||
if ptr.is_null() {
|
if ptr.is_null() {
|
||||||
record_alloc(result);
|
record_alloc(result);
|
||||||
} else if !core::ptr::eq(ptr, result) {
|
} else if ptr != result {
|
||||||
record_dealloc(ptr);
|
record_dealloc(ptr);
|
||||||
record_alloc(result);
|
record_alloc(result);
|
||||||
}
|
}
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
/// # Safety
|
unsafe extern "C" fn ts_record_free(ptr: *mut c_void) {
|
||||||
///
|
|
||||||
/// The caller must ensure that `ptr` was allocated by a previous call
|
|
||||||
/// to `ts_record_malloc`, `ts_record_calloc`, or `ts_record_realloc`.
|
|
||||||
pub unsafe extern "C" fn ts_record_free(ptr: *mut c_void) {
|
|
||||||
record_dealloc(ptr);
|
record_dealloc(ptr);
|
||||||
free(ptr);
|
free(ptr);
|
||||||
}
|
}
|
||||||
|
|
@ -23,7 +23,7 @@ pub fn check_consistent_sizes(tree: &Tree, input: &[u8]) {
|
||||||
let mut some_child_has_changes = false;
|
let mut some_child_has_changes = false;
|
||||||
let mut actual_named_child_count = 0;
|
let mut actual_named_child_count = 0;
|
||||||
for i in 0..node.child_count() {
|
for i in 0..node.child_count() {
|
||||||
let child = node.child(i as u32).unwrap();
|
let child = node.child(i).unwrap();
|
||||||
assert!(child.start_byte() >= last_child_end_byte);
|
assert!(child.start_byte() >= last_child_end_byte);
|
||||||
assert!(child.start_position() >= last_child_end_point);
|
assert!(child.start_position() >= last_child_end_point);
|
||||||
check(child, line_offsets);
|
check(child, line_offsets);
|
||||||
|
|
@ -1,11 +1,6 @@
|
||||||
use std::{
|
use std::{collections::HashMap, env, fs, path::Path};
|
||||||
collections::HashMap,
|
|
||||||
env, fs,
|
|
||||||
path::{Path, PathBuf},
|
|
||||||
sync::LazyLock,
|
|
||||||
};
|
|
||||||
|
|
||||||
use log::{error, info};
|
use lazy_static::lazy_static;
|
||||||
use rand::Rng;
|
use rand::Rng;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use tree_sitter::{Language, Parser};
|
use tree_sitter::{Language, Parser};
|
||||||
|
|
@ -25,30 +20,19 @@ use crate::{
|
||||||
random::Rand,
|
random::Rand,
|
||||||
},
|
},
|
||||||
parse::perform_edit,
|
parse::perform_edit,
|
||||||
test::{parse_tests, strip_sexp_fields, DiffKey, TestDiff, TestEntry},
|
test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry},
|
||||||
};
|
};
|
||||||
|
|
||||||
pub static LOG_ENABLED: LazyLock<bool> = LazyLock::new(|| env::var("TREE_SITTER_LOG").is_ok());
|
lazy_static! {
|
||||||
|
pub static ref LOG_ENABLED: bool = env::var("TREE_SITTER_LOG").is_ok();
|
||||||
pub static LOG_GRAPH_ENABLED: LazyLock<bool> =
|
pub static ref LOG_GRAPH_ENABLED: bool = env::var("TREE_SITTER_LOG_GRAPHS").is_ok();
|
||||||
LazyLock::new(|| env::var("TREE_SITTER_LOG_GRAPHS").is_ok());
|
pub static ref LANGUAGE_FILTER: Option<String> = env::var("TREE_SITTER_LANGUAGE").ok();
|
||||||
|
pub static ref EXAMPLE_INCLUDE: Option<Regex> = regex_env_var("TREE_SITTER_EXAMPLE_INCLUDE");
|
||||||
pub static LANGUAGE_FILTER: LazyLock<Option<String>> =
|
pub static ref EXAMPLE_EXCLUDE: Option<Regex> = regex_env_var("TREE_SITTER_EXAMPLE_EXCLUDE");
|
||||||
LazyLock::new(|| env::var("TREE_SITTER_LANGUAGE").ok());
|
pub static ref START_SEED: usize = new_seed();
|
||||||
|
pub static ref EDIT_COUNT: usize = int_env_var("TREE_SITTER_EDITS").unwrap_or(3);
|
||||||
pub static EXAMPLE_INCLUDE: LazyLock<Option<Regex>> =
|
pub static ref ITERATION_COUNT: usize = int_env_var("TREE_SITTER_ITERATIONS").unwrap_or(10);
|
||||||
LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_INCLUDE"));
|
}
|
||||||
|
|
||||||
pub static EXAMPLE_EXCLUDE: LazyLock<Option<Regex>> =
|
|
||||||
LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_EXCLUDE"));
|
|
||||||
|
|
||||||
pub static START_SEED: LazyLock<usize> = LazyLock::new(new_seed);
|
|
||||||
|
|
||||||
pub static EDIT_COUNT: LazyLock<usize> =
|
|
||||||
LazyLock::new(|| int_env_var("TREE_SITTER_EDITS").unwrap_or(3));
|
|
||||||
|
|
||||||
pub static ITERATION_COUNT: LazyLock<usize> =
|
|
||||||
LazyLock::new(|| int_env_var("TREE_SITTER_ITERATIONS").unwrap_or(10));
|
|
||||||
|
|
||||||
fn int_env_var(name: &'static str) -> Option<usize> {
|
fn int_env_var(name: &'static str) -> Option<usize> {
|
||||||
env::var(name).ok().and_then(|e| e.parse().ok())
|
env::var(name).ok().and_then(|e| e.parse().ok())
|
||||||
|
|
@ -62,15 +46,13 @@ fn regex_env_var(name: &'static str) -> Option<Regex> {
|
||||||
pub fn new_seed() -> usize {
|
pub fn new_seed() -> usize {
|
||||||
int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| {
|
int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| {
|
||||||
let mut rng = rand::thread_rng();
|
let mut rng = rand::thread_rng();
|
||||||
let seed = rng.gen::<usize>();
|
rng.gen::<usize>()
|
||||||
info!("Seed: {seed}");
|
|
||||||
seed
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct FuzzOptions {
|
pub struct FuzzOptions {
|
||||||
pub skipped: Option<Vec<String>>,
|
pub skipped: Option<Vec<String>>,
|
||||||
pub subdir: Option<PathBuf>,
|
pub subdir: Option<String>,
|
||||||
pub edits: usize,
|
pub edits: usize,
|
||||||
pub iterations: usize,
|
pub iterations: usize,
|
||||||
pub include: Option<Regex>,
|
pub include: Option<Regex>,
|
||||||
|
|
@ -109,12 +91,12 @@ pub fn fuzz_language_corpus(
|
||||||
let corpus_dir = grammar_dir.join(subdir).join("test").join("corpus");
|
let corpus_dir = grammar_dir.join(subdir).join("test").join("corpus");
|
||||||
|
|
||||||
if !corpus_dir.exists() || !corpus_dir.is_dir() {
|
if !corpus_dir.exists() || !corpus_dir.is_dir() {
|
||||||
error!("No corpus directory found, ensure that you have a `test/corpus` directory in your grammar directory with at least one test file.");
|
eprintln!("No corpus directory found, ensure that you have a `test/corpus` directory in your grammar directory with at least one test file.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if std::fs::read_dir(&corpus_dir).unwrap().count() == 0 {
|
if std::fs::read_dir(&corpus_dir).unwrap().count() == 0 {
|
||||||
error!("No corpus files found in `test/corpus`, ensure that you have at least one test file in your corpus directory.");
|
eprintln!("No corpus files found in `test/corpus`, ensure that you have at least one test file in your corpus directory.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -150,7 +132,7 @@ pub fn fuzz_language_corpus(
|
||||||
let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok();
|
let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok();
|
||||||
|
|
||||||
if log_seed {
|
if log_seed {
|
||||||
info!(" start seed: {start_seed}");
|
println!(" start seed: {start_seed}");
|
||||||
}
|
}
|
||||||
|
|
||||||
println!();
|
println!();
|
||||||
|
|
@ -164,7 +146,7 @@ pub fn fuzz_language_corpus(
|
||||||
|
|
||||||
println!(" {test_index}. {test_name}");
|
println!(" {test_index}. {test_name}");
|
||||||
|
|
||||||
let passed = allocations::record_checked(|| {
|
let passed = allocations::record(|| {
|
||||||
let mut log_session = None;
|
let mut log_session = None;
|
||||||
let mut parser = get_parser(&mut log_session, "log.html");
|
let mut parser = get_parser(&mut log_session, "log.html");
|
||||||
parser.set_language(language).unwrap();
|
parser.set_language(language).unwrap();
|
||||||
|
|
@ -183,8 +165,8 @@ pub fn fuzz_language_corpus(
|
||||||
|
|
||||||
if actual_output != test.output {
|
if actual_output != test.output {
|
||||||
println!("Incorrect initial parse for {test_name}");
|
println!("Incorrect initial parse for {test_name}");
|
||||||
DiffKey::print();
|
print_diff_key();
|
||||||
println!("{}", TestDiff::new(&actual_output, &test.output));
|
print_diff(&actual_output, &test.output, true);
|
||||||
println!();
|
println!();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
@ -192,7 +174,7 @@ pub fn fuzz_language_corpus(
|
||||||
true
|
true
|
||||||
})
|
})
|
||||||
.unwrap_or_else(|e| {
|
.unwrap_or_else(|e| {
|
||||||
error!("{e}");
|
eprintln!("Error: {e}");
|
||||||
false
|
false
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
@ -208,7 +190,7 @@ pub fn fuzz_language_corpus(
|
||||||
|
|
||||||
for trial in 0..options.iterations {
|
for trial in 0..options.iterations {
|
||||||
let seed = start_seed + trial;
|
let seed = start_seed + trial;
|
||||||
let passed = allocations::record_checked(|| {
|
let passed = allocations::record(|| {
|
||||||
let mut rand = Rand::new(seed);
|
let mut rand = Rand::new(seed);
|
||||||
let mut log_session = None;
|
let mut log_session = None;
|
||||||
let mut parser = get_parser(&mut log_session, "log.html");
|
let mut parser = get_parser(&mut log_session, "log.html");
|
||||||
|
|
@ -217,20 +199,19 @@ pub fn fuzz_language_corpus(
|
||||||
let mut input = test.input.clone();
|
let mut input = test.input.clone();
|
||||||
|
|
||||||
if options.log_graphs {
|
if options.log_graphs {
|
||||||
info!("{}\n", String::from_utf8_lossy(&input));
|
eprintln!("{}\n", String::from_utf8_lossy(&input));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Perform a random series of edits and reparse.
|
// Perform a random series of edits and reparse.
|
||||||
let edit_count = rand.unsigned(*EDIT_COUNT);
|
let mut undo_stack = Vec::new();
|
||||||
let mut undo_stack = Vec::with_capacity(edit_count);
|
for _ in 0..=rand.unsigned(*EDIT_COUNT) {
|
||||||
for _ in 0..=edit_count {
|
|
||||||
let edit = get_random_edit(&mut rand, &input);
|
let edit = get_random_edit(&mut rand, &input);
|
||||||
undo_stack.push(invert_edit(&input, &edit));
|
undo_stack.push(invert_edit(&input, &edit));
|
||||||
perform_edit(&mut tree, &mut input, &edit).unwrap();
|
perform_edit(&mut tree, &mut input, &edit).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
if log_seed {
|
if log_seed {
|
||||||
info!(" {test_index}.{trial:<2} seed: {seed}");
|
println!(" {test_index}.{trial:<2} seed: {seed}");
|
||||||
}
|
}
|
||||||
|
|
||||||
if dump_edits {
|
if dump_edits {
|
||||||
|
|
@ -244,7 +225,7 @@ pub fn fuzz_language_corpus(
|
||||||
}
|
}
|
||||||
|
|
||||||
if options.log_graphs {
|
if options.log_graphs {
|
||||||
info!("{}\n", String::from_utf8_lossy(&input));
|
eprintln!("{}\n", String::from_utf8_lossy(&input));
|
||||||
}
|
}
|
||||||
|
|
||||||
set_included_ranges(&mut parser, &input, test.template_delimiters);
|
set_included_ranges(&mut parser, &input, test.template_delimiters);
|
||||||
|
|
@ -253,7 +234,7 @@ pub fn fuzz_language_corpus(
|
||||||
// Check that the new tree is consistent.
|
// Check that the new tree is consistent.
|
||||||
check_consistent_sizes(&tree2, &input);
|
check_consistent_sizes(&tree2, &input);
|
||||||
if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
|
if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
|
||||||
error!("\nUnexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n",);
|
println!("\nUnexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n",);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -262,7 +243,7 @@ pub fn fuzz_language_corpus(
|
||||||
perform_edit(&mut tree2, &mut input, &edit).unwrap();
|
perform_edit(&mut tree2, &mut input, &edit).unwrap();
|
||||||
}
|
}
|
||||||
if options.log_graphs {
|
if options.log_graphs {
|
||||||
info!("{}\n", String::from_utf8_lossy(&input));
|
eprintln!("{}\n", String::from_utf8_lossy(&input));
|
||||||
}
|
}
|
||||||
|
|
||||||
set_included_ranges(&mut parser, &test.input, test.template_delimiters);
|
set_included_ranges(&mut parser, &test.input, test.template_delimiters);
|
||||||
|
|
@ -276,8 +257,8 @@ pub fn fuzz_language_corpus(
|
||||||
|
|
||||||
if actual_output != test.output && !test.error {
|
if actual_output != test.output && !test.error {
|
||||||
println!("Incorrect parse for {test_name} - seed {seed}");
|
println!("Incorrect parse for {test_name} - seed {seed}");
|
||||||
DiffKey::print();
|
print_diff_key();
|
||||||
println!("{}", TestDiff::new(&actual_output, &test.output));
|
print_diff(&actual_output, &test.output, true);
|
||||||
println!();
|
println!();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
@ -285,13 +266,13 @@ pub fn fuzz_language_corpus(
|
||||||
// Check that the edited tree is consistent.
|
// Check that the edited tree is consistent.
|
||||||
check_consistent_sizes(&tree3, &input);
|
check_consistent_sizes(&tree3, &input);
|
||||||
if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
|
if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
|
||||||
error!("Unexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n");
|
println!("Unexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
true
|
true
|
||||||
}).unwrap_or_else(|e| {
|
}).unwrap_or_else(|e| {
|
||||||
error!("{e}");
|
eprintln!("Error: {e}");
|
||||||
false
|
false
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
@ -303,17 +284,17 @@ pub fn fuzz_language_corpus(
|
||||||
}
|
}
|
||||||
|
|
||||||
if failure_count != 0 {
|
if failure_count != 0 {
|
||||||
info!("{failure_count} {language_name} corpus tests failed fuzzing");
|
eprintln!("{failure_count} {language_name} corpus tests failed fuzzing");
|
||||||
}
|
}
|
||||||
|
|
||||||
skipped.retain(|_, v| *v == 0);
|
skipped.retain(|_, v| *v == 0);
|
||||||
|
|
||||||
if !skipped.is_empty() {
|
if !skipped.is_empty() {
|
||||||
info!("Non matchable skip definitions:");
|
println!("Non matchable skip definitions:");
|
||||||
for k in skipped.keys() {
|
for k in skipped.keys() {
|
||||||
info!(" {k}");
|
println!(" {k}");
|
||||||
}
|
}
|
||||||
panic!("Non matchable skip definitions need to be removed");
|
panic!("Non matchable skip definitions needs to be removed");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -20,8 +20,8 @@ impl Rand {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn words(&mut self, max_count: usize) -> Vec<u8> {
|
pub fn words(&mut self, max_count: usize) -> Vec<u8> {
|
||||||
|
let mut result = Vec::new();
|
||||||
let word_count = self.unsigned(max_count);
|
let word_count = self.unsigned(max_count);
|
||||||
let mut result = Vec::with_capacity(2 * word_count);
|
|
||||||
for i in 0..word_count {
|
for i in 0..word_count {
|
||||||
if i > 0 {
|
if i > 0 {
|
||||||
if self.unsigned(5) == 0 {
|
if self.unsigned(5) == 0 {
|
||||||
|
|
@ -1,24 +1,22 @@
|
||||||
use std::{
|
use std::{
|
||||||
collections::{BTreeMap, HashSet},
|
collections::HashMap,
|
||||||
fmt::Write,
|
fmt::Write,
|
||||||
fs,
|
fs,
|
||||||
io::{self, Write as _},
|
io::{self, Write as _},
|
||||||
path::{self, Path, PathBuf},
|
path, str,
|
||||||
str,
|
sync::atomic::AtomicUsize,
|
||||||
sync::{atomic::AtomicUsize, Arc},
|
|
||||||
time::Instant,
|
time::Instant,
|
||||||
};
|
};
|
||||||
|
|
||||||
use ansi_colours::{ansi256_from_rgb, rgb_from_ansi256};
|
|
||||||
use anstyle::{Ansi256Color, AnsiColor, Color, Effects, RgbColor};
|
use anstyle::{Ansi256Color, AnsiColor, Color, Effects, RgbColor};
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use log::{info, warn};
|
use lazy_static::lazy_static;
|
||||||
use serde::{ser::SerializeMap, Deserialize, Deserializer, Serialize, Serializer};
|
use serde::{ser::SerializeMap, Deserialize, Deserializer, Serialize, Serializer};
|
||||||
use serde_json::{json, Value};
|
use serde_json::{json, Value};
|
||||||
use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent, Highlighter, HtmlRenderer};
|
use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent, Highlighter, HtmlRenderer};
|
||||||
use tree_sitter_loader::Loader;
|
use tree_sitter_loader::Loader;
|
||||||
|
|
||||||
pub const HTML_HEAD_HEADER: &str = "
|
pub const HTML_HEADER: &str = "
|
||||||
<!doctype HTML>
|
<!doctype HTML>
|
||||||
<head>
|
<head>
|
||||||
<title>Tree-sitter Highlighting</title>
|
<title>Tree-sitter Highlighting</title>
|
||||||
|
|
@ -35,9 +33,7 @@ pub const HTML_HEAD_HEADER: &str = "
|
||||||
.line {
|
.line {
|
||||||
white-space: pre;
|
white-space: pre;
|
||||||
}
|
}
|
||||||
</style>";
|
</style>
|
||||||
|
|
||||||
pub const HTML_BODY_HEADER: &str = "
|
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
";
|
";
|
||||||
|
|
@ -46,6 +42,11 @@ pub const HTML_FOOTER: &str = "
|
||||||
</body>
|
</body>
|
||||||
";
|
";
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
static ref CSS_STYLES_BY_COLOR_ID: Vec<String> =
|
||||||
|
serde_json::from_str(include_str!("../vendor/xterm-colors.json")).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Default)]
|
#[derive(Debug, Default)]
|
||||||
pub struct Style {
|
pub struct Style {
|
||||||
pub ansi: anstyle::Style,
|
pub ansi: anstyle::Style,
|
||||||
|
|
@ -83,9 +84,9 @@ impl<'de> Deserialize<'de> for Theme {
|
||||||
{
|
{
|
||||||
let mut styles = Vec::new();
|
let mut styles = Vec::new();
|
||||||
let mut highlight_names = Vec::new();
|
let mut highlight_names = Vec::new();
|
||||||
if let Ok(colors) = BTreeMap::<String, Value>::deserialize(deserializer) {
|
if let Ok(colors) = HashMap::<String, Value>::deserialize(deserializer) {
|
||||||
styles.reserve(colors.len());
|
|
||||||
highlight_names.reserve(colors.len());
|
highlight_names.reserve(colors.len());
|
||||||
|
styles.reserve(colors.len());
|
||||||
for (name, style_value) in colors {
|
for (name, style_value) in colors {
|
||||||
let mut style = Style::default();
|
let mut style = Style::default();
|
||||||
parse_style(&mut style, style_value);
|
parse_style(&mut style, style_value);
|
||||||
|
|
@ -128,7 +129,7 @@ impl Serialize for Theme {
|
||||||
|| effects.contains(Effects::ITALIC)
|
|| effects.contains(Effects::ITALIC)
|
||||||
|| effects.contains(Effects::UNDERLINE)
|
|| effects.contains(Effects::UNDERLINE)
|
||||||
{
|
{
|
||||||
let mut style_json = BTreeMap::new();
|
let mut style_json = HashMap::new();
|
||||||
if let Some(color) = color {
|
if let Some(color) = color {
|
||||||
style_json.insert("color", color);
|
style_json.insert("color", color);
|
||||||
}
|
}
|
||||||
|
|
@ -155,32 +156,28 @@ impl Serialize for Theme {
|
||||||
impl Default for Theme {
|
impl Default for Theme {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
serde_json::from_value(json!({
|
serde_json::from_value(json!({
|
||||||
"attribute": {"color": 124, "italic": true},
|
"attribute": {"color": 124, "italic": true},
|
||||||
"comment": {"color": 245, "italic": true},
|
"comment": {"color": 245, "italic": true},
|
||||||
"constant": 94,
|
"constant.builtin": {"color": 94, "bold": true},
|
||||||
"constant.builtin": {"color": 94, "bold": true},
|
"constant": 94,
|
||||||
"constructor": 136,
|
"constructor": 136,
|
||||||
"embedded": null,
|
"embedded": null,
|
||||||
"function": 26,
|
"function.builtin": {"color": 26, "bold": true},
|
||||||
"function.builtin": {"color": 26, "bold": true},
|
"function": 26,
|
||||||
"keyword": 56,
|
"keyword": 56,
|
||||||
"module": 136,
|
"number": {"color": 94, "bold": true},
|
||||||
"number": {"color": 94, "bold": true},
|
"module": 136,
|
||||||
"operator": {"color": 239, "bold": true},
|
"property": 124,
|
||||||
"property": 124,
|
"operator": {"color": 239, "bold": true},
|
||||||
"property.builtin": {"color": 124, "bold": true},
|
"punctuation.bracket": 239,
|
||||||
"punctuation": 239,
|
"punctuation.delimiter": 239,
|
||||||
"punctuation.bracket": 239,
|
"string.special": 30,
|
||||||
"punctuation.delimiter": 239,
|
"string": 28,
|
||||||
"punctuation.special": 239,
|
"tag": 18,
|
||||||
"string": 28,
|
"type": 23,
|
||||||
"string.special": 30,
|
"type.builtin": {"color": 23, "bold": true},
|
||||||
"tag": 18,
|
"variable.builtin": {"bold": true},
|
||||||
"type": 23,
|
"variable.parameter": {"underline": true}
|
||||||
"type.builtin": {"color": 23, "bold": true},
|
|
||||||
"variable": 252,
|
|
||||||
"variable.builtin": {"color": 252, "bold": true},
|
|
||||||
"variable.parameter": {"color": 252, "underline": true}
|
|
||||||
}))
|
}))
|
||||||
.unwrap()
|
.unwrap()
|
||||||
}
|
}
|
||||||
|
|
@ -223,8 +220,9 @@ fn parse_style(style: &mut Style, json: Value) {
|
||||||
|
|
||||||
if let Some(Color::Rgb(RgbColor(red, green, blue))) = style.ansi.get_fg_color() {
|
if let Some(Color::Rgb(RgbColor(red, green, blue))) = style.ansi.get_fg_color() {
|
||||||
if !terminal_supports_truecolor() {
|
if !terminal_supports_truecolor() {
|
||||||
let ansi256 = Color::Ansi256(Ansi256Color(ansi256_from_rgb((red, green, blue))));
|
style.ansi = style
|
||||||
style.ansi = style.ansi.fg_color(Some(ansi256));
|
.ansi
|
||||||
|
.fg_color(Some(closest_xterm_color(red, green, blue)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -270,7 +268,7 @@ fn hex_string_to_rgb(s: &str) -> Option<(u8, u8, u8)> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn style_to_css(style: anstyle::Style) -> String {
|
fn style_to_css(style: anstyle::Style) -> String {
|
||||||
let mut result = String::new();
|
let mut result = "style='".to_string();
|
||||||
let effects = style.get_effects();
|
let effects = style.get_effects();
|
||||||
if effects.contains(Effects::UNDERLINE) {
|
if effects.contains(Effects::UNDERLINE) {
|
||||||
write!(&mut result, "text-decoration: underline;").unwrap();
|
write!(&mut result, "text-decoration: underline;").unwrap();
|
||||||
|
|
@ -284,6 +282,7 @@ fn style_to_css(style: anstyle::Style) -> String {
|
||||||
if let Some(color) = style.get_fg_color() {
|
if let Some(color) = style.get_fg_color() {
|
||||||
write_color(&mut result, color);
|
write_color(&mut result, color);
|
||||||
}
|
}
|
||||||
|
result.push('\'');
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -301,8 +300,7 @@ fn write_color(buffer: &mut String, color: Color) {
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
},
|
},
|
||||||
Color::Ansi256(Ansi256Color(n)) => {
|
Color::Ansi256(Ansi256Color(n)) => {
|
||||||
let (r, g, b) = rgb_from_ansi256(n);
|
write!(buffer, "color: {}", CSS_STYLES_BY_COLOR_ID[n as usize]).unwrap();
|
||||||
write!(buffer, "color: #{r:02x}{g:02x}{b:02x}").unwrap();
|
|
||||||
}
|
}
|
||||||
Color::Rgb(RgbColor(r, g, b)) => write!(buffer, "color: #{r:02x}{g:02x}{b:02x}").unwrap(),
|
Color::Rgb(RgbColor(r, g, b)) => write!(buffer, "color: #{r:02x}{g:02x}{b:02x}").unwrap(),
|
||||||
}
|
}
|
||||||
|
|
@ -313,144 +311,115 @@ fn terminal_supports_truecolor() -> bool {
|
||||||
.is_ok_and(|truecolor| truecolor == "truecolor" || truecolor == "24bit")
|
.is_ok_and(|truecolor| truecolor == "truecolor" || truecolor == "24bit")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct HighlightOptions {
|
fn closest_xterm_color(red: u8, green: u8, blue: u8) -> Color {
|
||||||
pub theme: Theme,
|
use std::cmp::{max, min};
|
||||||
pub check: bool,
|
|
||||||
pub captures_path: Option<PathBuf>,
|
let colors = CSS_STYLES_BY_COLOR_ID
|
||||||
pub inline_styles: bool,
|
.iter()
|
||||||
pub html: bool,
|
.enumerate()
|
||||||
pub quiet: bool,
|
.map(|(color_id, hex)| (color_id as u8, hex_string_to_rgb(hex).unwrap()));
|
||||||
pub print_time: bool,
|
|
||||||
pub cancellation_flag: Arc<AtomicUsize>,
|
// Get the xterm color with the minimum Euclidean distance to the target color
|
||||||
|
// i.e. distance = √ (r2 - r1)² + (g2 - g1)² + (b2 - b1)²
|
||||||
|
let distances = colors.map(|(color_id, (r, g, b))| {
|
||||||
|
let r_delta = (max(r, red) - min(r, red)) as u32;
|
||||||
|
let g_delta = (max(g, green) - min(g, green)) as u32;
|
||||||
|
let b_delta = (max(b, blue) - min(b, blue)) as u32;
|
||||||
|
let distance = r_delta.pow(2) + g_delta.pow(2) + b_delta.pow(2);
|
||||||
|
// don't need to actually take the square root for the sake of comparison
|
||||||
|
(color_id, distance)
|
||||||
|
});
|
||||||
|
|
||||||
|
Color::Ansi256(Ansi256Color(
|
||||||
|
distances.min_by(|(_, d1), (_, d2)| d1.cmp(d2)).unwrap().0,
|
||||||
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn highlight(
|
pub fn ansi(
|
||||||
loader: &Loader,
|
loader: &Loader,
|
||||||
path: &Path,
|
theme: &Theme,
|
||||||
name: &str,
|
source: &[u8],
|
||||||
config: &HighlightConfiguration,
|
config: &HighlightConfiguration,
|
||||||
print_name: bool,
|
print_time: bool,
|
||||||
opts: &HighlightOptions,
|
cancellation_flag: Option<&AtomicUsize>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
if opts.check {
|
|
||||||
let names = if let Some(path) = opts.captures_path.as_deref() {
|
|
||||||
let file = fs::read_to_string(path)?;
|
|
||||||
let capture_names = file
|
|
||||||
.lines()
|
|
||||||
.filter_map(|line| {
|
|
||||||
if line.trim().is_empty() || line.trim().starts_with(';') {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
line.split(';').next().map(|s| s.trim().trim_matches('"'))
|
|
||||||
})
|
|
||||||
.collect::<HashSet<_>>();
|
|
||||||
config.nonconformant_capture_names(&capture_names)
|
|
||||||
} else {
|
|
||||||
config.nonconformant_capture_names(&HashSet::new())
|
|
||||||
};
|
|
||||||
if names.is_empty() {
|
|
||||||
info!("All highlight captures conform to standards.");
|
|
||||||
} else {
|
|
||||||
warn!(
|
|
||||||
"Non-standard highlight {} detected:\n* {}",
|
|
||||||
if names.len() > 1 {
|
|
||||||
"captures"
|
|
||||||
} else {
|
|
||||||
"capture"
|
|
||||||
},
|
|
||||||
names.join("\n* ")
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let source = fs::read(path)?;
|
|
||||||
let stdout = io::stdout();
|
let stdout = io::stdout();
|
||||||
let mut stdout = stdout.lock();
|
let mut stdout = stdout.lock();
|
||||||
let time = Instant::now();
|
let time = Instant::now();
|
||||||
let mut highlighter = Highlighter::new();
|
let mut highlighter = Highlighter::new();
|
||||||
let events =
|
|
||||||
highlighter.highlight(config, &source, Some(&opts.cancellation_flag), |string| {
|
|
||||||
loader.highlight_config_for_injection_string(string)
|
|
||||||
})?;
|
|
||||||
let theme = &opts.theme;
|
|
||||||
|
|
||||||
if !opts.quiet && print_name {
|
let events = highlighter.highlight(config, source, cancellation_flag, |string| {
|
||||||
writeln!(&mut stdout, "{name}")?;
|
loader.highlight_config_for_injection_string(string)
|
||||||
}
|
})?;
|
||||||
|
|
||||||
if opts.html {
|
let mut style_stack = vec![theme.default_style().ansi];
|
||||||
if !opts.quiet {
|
for event in events {
|
||||||
writeln!(&mut stdout, "{HTML_HEAD_HEADER}")?;
|
match event? {
|
||||||
writeln!(&mut stdout, " <style>")?;
|
HighlightEvent::HighlightStart(highlight) => {
|
||||||
let names = theme.highlight_names.iter();
|
style_stack.push(theme.styles[highlight.0].ansi);
|
||||||
let styles = theme.styles.iter();
|
|
||||||
for (name, style) in names.zip(styles) {
|
|
||||||
if let Some(css) = &style.css {
|
|
||||||
writeln!(&mut stdout, " .{name} {{ {css}; }}")?;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
writeln!(&mut stdout, " </style>")?;
|
HighlightEvent::HighlightEnd => {
|
||||||
writeln!(&mut stdout, "{HTML_BODY_HEADER}")?;
|
style_stack.pop();
|
||||||
}
|
|
||||||
|
|
||||||
let mut renderer = HtmlRenderer::new();
|
|
||||||
renderer.render(events, &source, &move |highlight, output| {
|
|
||||||
if opts.inline_styles {
|
|
||||||
output.extend(b"style='");
|
|
||||||
output.extend(
|
|
||||||
theme.styles[highlight.0]
|
|
||||||
.css
|
|
||||||
.as_ref()
|
|
||||||
.map_or_else(|| "".as_bytes(), |css_style| css_style.as_bytes()),
|
|
||||||
);
|
|
||||||
output.extend(b"'");
|
|
||||||
} else {
|
|
||||||
output.extend(b"class='");
|
|
||||||
let mut parts = theme.highlight_names[highlight.0].split('.').peekable();
|
|
||||||
while let Some(part) = parts.next() {
|
|
||||||
output.extend(part.as_bytes());
|
|
||||||
if parts.peek().is_some() {
|
|
||||||
output.extend(b" ");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
output.extend(b"'");
|
|
||||||
}
|
}
|
||||||
})?;
|
HighlightEvent::Source { start, end } => {
|
||||||
|
let style = style_stack.last().unwrap();
|
||||||
if !opts.quiet {
|
write!(&mut stdout, "{style}").unwrap();
|
||||||
writeln!(&mut stdout, "<table>")?;
|
stdout.write_all(&source[start..end])?;
|
||||||
for (i, line) in renderer.lines().enumerate() {
|
write!(&mut stdout, "{style:#}").unwrap();
|
||||||
writeln!(
|
|
||||||
&mut stdout,
|
|
||||||
"<tr><td class=line-number>{}</td><td class=line>{line}</td></tr>",
|
|
||||||
i + 1,
|
|
||||||
)?;
|
|
||||||
}
|
|
||||||
writeln!(&mut stdout, "</table>")?;
|
|
||||||
writeln!(&mut stdout, "{HTML_FOOTER}")?;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
let mut style_stack = vec![theme.default_style().ansi];
|
|
||||||
for event in events {
|
|
||||||
match event? {
|
|
||||||
HighlightEvent::HighlightStart(highlight) => {
|
|
||||||
style_stack.push(theme.styles[highlight.0].ansi);
|
|
||||||
}
|
|
||||||
HighlightEvent::HighlightEnd => {
|
|
||||||
style_stack.pop();
|
|
||||||
}
|
|
||||||
HighlightEvent::Source { start, end } => {
|
|
||||||
let style = style_stack.last().unwrap();
|
|
||||||
write!(&mut stdout, "{style}").unwrap();
|
|
||||||
stdout.write_all(&source[start..end])?;
|
|
||||||
write!(&mut stdout, "{style:#}").unwrap();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if opts.print_time {
|
if print_time {
|
||||||
info!("Time: {}ms", time.elapsed().as_millis());
|
eprintln!("Time: {}ms", time.elapsed().as_millis());
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn html(
|
||||||
|
loader: &Loader,
|
||||||
|
theme: &Theme,
|
||||||
|
source: &[u8],
|
||||||
|
config: &HighlightConfiguration,
|
||||||
|
quiet: bool,
|
||||||
|
print_time: bool,
|
||||||
|
cancellation_flag: Option<&AtomicUsize>,
|
||||||
|
) -> Result<()> {
|
||||||
|
use std::io::Write;
|
||||||
|
|
||||||
|
let stdout = io::stdout();
|
||||||
|
let mut stdout = stdout.lock();
|
||||||
|
let time = Instant::now();
|
||||||
|
let mut highlighter = Highlighter::new();
|
||||||
|
|
||||||
|
let events = highlighter.highlight(config, source, cancellation_flag, |string| {
|
||||||
|
loader.highlight_config_for_injection_string(string)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let mut renderer = HtmlRenderer::new();
|
||||||
|
renderer.render(events, source, &move |highlight| {
|
||||||
|
theme.styles[highlight.0]
|
||||||
|
.css
|
||||||
|
.as_ref()
|
||||||
|
.map_or_else(|| "".as_bytes(), |css_style| css_style.as_bytes())
|
||||||
|
})?;
|
||||||
|
|
||||||
|
if !quiet {
|
||||||
|
writeln!(&mut stdout, "<table>")?;
|
||||||
|
for (i, line) in renderer.lines().enumerate() {
|
||||||
|
writeln!(
|
||||||
|
&mut stdout,
|
||||||
|
"<tr><td class=line-number>{}</td><td class=line>{line}</td></tr>",
|
||||||
|
i + 1,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
writeln!(&mut stdout, "</table>")?;
|
||||||
|
}
|
||||||
|
|
||||||
|
if print_time {
|
||||||
|
eprintln!("Time: {}ms", time.elapsed().as_millis());
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
@ -480,7 +449,7 @@ mod tests {
|
||||||
style.ansi.get_fg_color(),
|
style.ansi.get_fg_color(),
|
||||||
Some(Color::Ansi256(Ansi256Color(36)))
|
Some(Color::Ansi256(Ansi256Color(36)))
|
||||||
);
|
);
|
||||||
assert_eq!(style.css, Some("color: #00af87".to_string()));
|
assert_eq!(style.css, Some("style=\'color: #00af87\'".to_string()));
|
||||||
|
|
||||||
// junglegreen is not an ANSI color and is preserved when the terminal supports it
|
// junglegreen is not an ANSI color and is preserved when the terminal supports it
|
||||||
env::set_var("COLORTERM", "truecolor");
|
env::set_var("COLORTERM", "truecolor");
|
||||||
|
|
@ -489,16 +458,16 @@ mod tests {
|
||||||
style.ansi.get_fg_color(),
|
style.ansi.get_fg_color(),
|
||||||
Some(Color::Rgb(RgbColor(38, 166, 154)))
|
Some(Color::Rgb(RgbColor(38, 166, 154)))
|
||||||
);
|
);
|
||||||
assert_eq!(style.css, Some("color: #26a69a".to_string()));
|
assert_eq!(style.css, Some("style=\'color: #26a69a\'".to_string()));
|
||||||
|
|
||||||
// junglegreen gets approximated as cadetblue when the terminal does not support it
|
// junglegreen gets approximated as darkcyan when the terminal does not support it
|
||||||
env::set_var("COLORTERM", "");
|
env::set_var("COLORTERM", "");
|
||||||
parse_style(&mut style, Value::String(JUNGLE_GREEN.to_string()));
|
parse_style(&mut style, Value::String(JUNGLE_GREEN.to_string()));
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
style.ansi.get_fg_color(),
|
style.ansi.get_fg_color(),
|
||||||
Some(Color::Ansi256(Ansi256Color(72)))
|
Some(Color::Ansi256(Ansi256Color(36)))
|
||||||
);
|
);
|
||||||
assert_eq!(style.css, Some("color: #26a69a".to_string()));
|
assert_eq!(style.css, Some("style=\'color: #26a69a\'".to_string()));
|
||||||
|
|
||||||
if let Ok(environment_variable) = original_environment_variable {
|
if let Ok(environment_variable) = original_environment_variable {
|
||||||
env::set_var("COLORTERM", environment_variable);
|
env::set_var("COLORTERM", environment_variable);
|
||||||
992
cli/src/init.rs
Normal file
992
cli/src/init.rs
Normal file
|
|
@ -0,0 +1,992 @@
|
||||||
|
use std::{
|
||||||
|
fs,
|
||||||
|
path::{Path, PathBuf},
|
||||||
|
str::{self, FromStr},
|
||||||
|
};
|
||||||
|
|
||||||
|
use anyhow::{anyhow, Context, Result};
|
||||||
|
use heck::{ToKebabCase, ToShoutySnakeCase, ToSnakeCase, ToUpperCamelCase};
|
||||||
|
use regex::Regex;
|
||||||
|
use semver::Version;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use serde_json::{Map, Value};
|
||||||
|
use tree_sitter_generate::write_file;
|
||||||
|
use tree_sitter_loader::{
|
||||||
|
Author, Bindings, Grammar, Links, Metadata, PackageJSON, PackageJSONAuthor,
|
||||||
|
PackageJSONRepository, PathsJSON, TreeSitterJSON,
|
||||||
|
};
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
|
const CLI_VERSION: &str = env!("CARGO_PKG_VERSION");
|
||||||
|
const CLI_VERSION_PLACEHOLDER: &str = "CLI_VERSION";
|
||||||
|
|
||||||
|
const ABI_VERSION_MAX: usize = tree_sitter::LANGUAGE_VERSION;
|
||||||
|
const ABI_VERSION_MAX_PLACEHOLDER: &str = "ABI_VERSION_MAX";
|
||||||
|
|
||||||
|
const PARSER_NAME_PLACEHOLDER: &str = "PARSER_NAME";
|
||||||
|
const CAMEL_PARSER_NAME_PLACEHOLDER: &str = "CAMEL_PARSER_NAME";
|
||||||
|
const UPPER_PARSER_NAME_PLACEHOLDER: &str = "UPPER_PARSER_NAME";
|
||||||
|
const LOWER_PARSER_NAME_PLACEHOLDER: &str = "LOWER_PARSER_NAME";
|
||||||
|
|
||||||
|
const PARSER_DESCRIPTION_PLACEHOLDER: &str = "PARSER_DESCRIPTION";
|
||||||
|
const PARSER_LICENSE_PLACEHOLDER: &str = "PARSER_LICENSE";
|
||||||
|
const PARSER_URL_PLACEHOLDER: &str = "PARSER_URL";
|
||||||
|
const PARSER_URL_STRIPPED_PLACEHOLDER: &str = "PARSER_URL_STRIPPED";
|
||||||
|
const PARSER_VERSION_PLACEHOLDER: &str = "PARSER_VERSION";
|
||||||
|
|
||||||
|
const AUTHOR_NAME_PLACEHOLDER: &str = "PARSER_AUTHOR_NAME";
|
||||||
|
const AUTHOR_EMAIL_PLACEHOLDER: &str = "PARSER_AUTHOR_EMAIL";
|
||||||
|
const AUTHOR_URL_PLACEHOLDER: &str = "PARSER_AUTHOR_URL";
|
||||||
|
|
||||||
|
const AUTHOR_BLOCK_JS: &str = "\n \"author\": {";
|
||||||
|
const AUTHOR_NAME_PLACEHOLDER_JS: &str = "\n \"name\": \"PARSER_AUTHOR_NAME\",";
|
||||||
|
const AUTHOR_EMAIL_PLACEHOLDER_JS: &str = ",\n \"email\": \"PARSER_AUTHOR_EMAIL\"";
|
||||||
|
const AUTHOR_URL_PLACEHOLDER_JS: &str = ",\n \"url\": \"PARSER_AUTHOR_URL\"";
|
||||||
|
|
||||||
|
const AUTHOR_BLOCK_PY: &str = "\nauthors = [{";
|
||||||
|
const AUTHOR_NAME_PLACEHOLDER_PY: &str = "name = \"PARSER_AUTHOR_NAME\"";
|
||||||
|
const AUTHOR_EMAIL_PLACEHOLDER_PY: &str = ", email = \"PARSER_AUTHOR_EMAIL\"";
|
||||||
|
|
||||||
|
const AUTHOR_BLOCK_RS: &str = "\nauthors = [";
|
||||||
|
const AUTHOR_NAME_PLACEHOLDER_RS: &str = "PARSER_AUTHOR_NAME";
|
||||||
|
const AUTHOR_EMAIL_PLACEHOLDER_RS: &str = " PARSER_AUTHOR_EMAIL";
|
||||||
|
|
||||||
|
const AUTHOR_BLOCK_GRAMMAR: &str = "\n * @author ";
|
||||||
|
const AUTHOR_NAME_PLACEHOLDER_GRAMMAR: &str = "PARSER_AUTHOR_NAME";
|
||||||
|
const AUTHOR_EMAIL_PLACEHOLDER_GRAMMAR: &str = " PARSER_AUTHOR_EMAIL";
|
||||||
|
|
||||||
|
const GRAMMAR_JS_TEMPLATE: &str = include_str!("./templates/grammar.js");
|
||||||
|
const PACKAGE_JSON_TEMPLATE: &str = include_str!("./templates/package.json");
|
||||||
|
const GITIGNORE_TEMPLATE: &str = include_str!("./templates/gitignore");
|
||||||
|
const GITATTRIBUTES_TEMPLATE: &str = include_str!("./templates/gitattributes");
|
||||||
|
const EDITORCONFIG_TEMPLATE: &str = include_str!("./templates/.editorconfig");
|
||||||
|
|
||||||
|
const RUST_BINDING_VERSION: &str = env!("CARGO_PKG_VERSION");
|
||||||
|
const RUST_BINDING_VERSION_PLACEHOLDER: &str = "RUST_BINDING_VERSION";
|
||||||
|
|
||||||
|
const LIB_RS_TEMPLATE: &str = include_str!("./templates/lib.rs");
|
||||||
|
const BUILD_RS_TEMPLATE: &str = include_str!("./templates/build.rs");
|
||||||
|
const CARGO_TOML_TEMPLATE: &str = include_str!("./templates/_cargo.toml");
|
||||||
|
|
||||||
|
const INDEX_JS_TEMPLATE: &str = include_str!("./templates/index.js");
|
||||||
|
const INDEX_D_TS_TEMPLATE: &str = include_str!("./templates/index.d.ts");
|
||||||
|
const JS_BINDING_CC_TEMPLATE: &str = include_str!("./templates/js-binding.cc");
|
||||||
|
const BINDING_GYP_TEMPLATE: &str = include_str!("./templates/binding.gyp");
|
||||||
|
const BINDING_TEST_JS_TEMPLATE: &str = include_str!("./templates/binding_test.js");
|
||||||
|
|
||||||
|
const MAKEFILE_TEMPLATE: &str = include_str!("./templates/makefile");
|
||||||
|
const CMAKELISTS_TXT_TEMPLATE: &str = include_str!("./templates/cmakelists.cmake");
|
||||||
|
const PARSER_NAME_H_TEMPLATE: &str = include_str!("./templates/PARSER_NAME.h");
|
||||||
|
const PARSER_NAME_PC_IN_TEMPLATE: &str = include_str!("./templates/PARSER_NAME.pc.in");
|
||||||
|
|
||||||
|
const GO_MOD_TEMPLATE: &str = include_str!("./templates/go.mod");
|
||||||
|
const BINDING_GO_TEMPLATE: &str = include_str!("./templates/binding.go");
|
||||||
|
const BINDING_TEST_GO_TEMPLATE: &str = include_str!("./templates/binding_test.go");
|
||||||
|
|
||||||
|
const SETUP_PY_TEMPLATE: &str = include_str!("./templates/setup.py");
|
||||||
|
const INIT_PY_TEMPLATE: &str = include_str!("./templates/__init__.py");
|
||||||
|
const INIT_PYI_TEMPLATE: &str = include_str!("./templates/__init__.pyi");
|
||||||
|
const PYPROJECT_TOML_TEMPLATE: &str = include_str!("./templates/pyproject.toml");
|
||||||
|
const PY_BINDING_C_TEMPLATE: &str = include_str!("./templates/py-binding.c");
|
||||||
|
const TEST_BINDING_PY_TEMPLATE: &str = include_str!("./templates/test_binding.py");
|
||||||
|
|
||||||
|
const PACKAGE_SWIFT_TEMPLATE: &str = include_str!("./templates/package.swift");
|
||||||
|
const TESTS_SWIFT_TEMPLATE: &str = include_str!("./templates/tests.swift");
|
||||||
|
|
||||||
|
const TREE_SITTER_JSON_SCHEMA: &str =
|
||||||
|
"https://tree-sitter.github.io/tree-sitter/assets/schemas/config.schema.json";
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
|
pub fn path_in_ignore(repo_path: &Path) -> bool {
|
||||||
|
[
|
||||||
|
"bindings",
|
||||||
|
"build",
|
||||||
|
"examples",
|
||||||
|
"node_modules",
|
||||||
|
"queries",
|
||||||
|
"script",
|
||||||
|
"src",
|
||||||
|
"target",
|
||||||
|
"test",
|
||||||
|
"types",
|
||||||
|
]
|
||||||
|
.iter()
|
||||||
|
.any(|dir| repo_path.ends_with(dir))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Clone)]
|
||||||
|
pub struct JsonConfigOpts {
|
||||||
|
pub name: String,
|
||||||
|
pub camelcase: String,
|
||||||
|
pub description: String,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub repository: Option<Url>,
|
||||||
|
pub scope: String,
|
||||||
|
pub file_types: Vec<String>,
|
||||||
|
pub version: Version,
|
||||||
|
pub license: String,
|
||||||
|
pub author: String,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub email: Option<String>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub url: Option<Url>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl JsonConfigOpts {
|
||||||
|
#[must_use]
|
||||||
|
pub fn to_tree_sitter_json(self) -> TreeSitterJSON {
|
||||||
|
TreeSitterJSON {
|
||||||
|
schema: Some(TREE_SITTER_JSON_SCHEMA.to_string()),
|
||||||
|
grammars: vec![Grammar {
|
||||||
|
name: self.name.clone(),
|
||||||
|
camelcase: Some(self.camelcase),
|
||||||
|
scope: self.scope,
|
||||||
|
path: None,
|
||||||
|
external_files: PathsJSON::Empty,
|
||||||
|
file_types: Some(self.file_types),
|
||||||
|
highlights: PathsJSON::Empty,
|
||||||
|
injections: PathsJSON::Empty,
|
||||||
|
locals: PathsJSON::Empty,
|
||||||
|
tags: PathsJSON::Empty,
|
||||||
|
injection_regex: Some(format!("^{}$", self.name)),
|
||||||
|
first_line_regex: None,
|
||||||
|
content_regex: None,
|
||||||
|
}],
|
||||||
|
metadata: Metadata {
|
||||||
|
version: self.version,
|
||||||
|
license: Some(self.license),
|
||||||
|
description: Some(self.description),
|
||||||
|
authors: Some(vec![Author {
|
||||||
|
name: self.author,
|
||||||
|
email: self.email,
|
||||||
|
url: self.url.map(|url| url.to_string()),
|
||||||
|
}]),
|
||||||
|
links: Some(Links {
|
||||||
|
repository: self.repository.unwrap_or_else(|| {
|
||||||
|
Url::parse(&format!(
|
||||||
|
"https://github.com/tree-sitter/tree-sitter-{}",
|
||||||
|
self.name
|
||||||
|
))
|
||||||
|
.expect("Failed to parse default repository URL")
|
||||||
|
}),
|
||||||
|
homepage: None,
|
||||||
|
}),
|
||||||
|
namespace: None,
|
||||||
|
},
|
||||||
|
bindings: Bindings::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for JsonConfigOpts {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
name: String::new(),
|
||||||
|
camelcase: String::new(),
|
||||||
|
description: String::new(),
|
||||||
|
repository: None,
|
||||||
|
scope: String::new(),
|
||||||
|
file_types: vec![],
|
||||||
|
version: Version::from_str("0.1.0").unwrap(),
|
||||||
|
license: String::new(),
|
||||||
|
author: String::new(),
|
||||||
|
email: None,
|
||||||
|
url: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct GenerateOpts<'a> {
|
||||||
|
author_name: Option<&'a str>,
|
||||||
|
author_email: Option<&'a str>,
|
||||||
|
author_url: Option<&'a str>,
|
||||||
|
license: Option<&'a str>,
|
||||||
|
description: Option<&'a str>,
|
||||||
|
repository: Option<&'a str>,
|
||||||
|
version: &'a Version,
|
||||||
|
camel_parser_name: &'a str,
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: remove in 0.25
|
||||||
|
// A return value of true means migration was successful, and false if not.
|
||||||
|
pub fn migrate_package_json(repo_path: &Path) -> Result<bool> {
|
||||||
|
let root_path =
|
||||||
|
get_root_path(&repo_path.join("package.json")).unwrap_or_else(|_| repo_path.to_path_buf());
|
||||||
|
let (package_json_path, tree_sitter_json_path) = (
|
||||||
|
root_path.join("package.json"),
|
||||||
|
root_path.join("tree-sitter.json"),
|
||||||
|
);
|
||||||
|
|
||||||
|
let old_config = serde_json::from_str::<PackageJSON>(
|
||||||
|
&fs::read_to_string(&package_json_path)
|
||||||
|
.with_context(|| format!("Failed to read package.json in {}", root_path.display()))?,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
if old_config.tree_sitter.is_none() {
|
||||||
|
eprintln!("Failed to find `tree-sitter` section in package.json, unable to migrate");
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
let name = old_config.name.replace("tree-sitter-", "");
|
||||||
|
|
||||||
|
let new_config = TreeSitterJSON {
|
||||||
|
schema: Some(TREE_SITTER_JSON_SCHEMA.to_string()),
|
||||||
|
grammars: old_config
|
||||||
|
.tree_sitter
|
||||||
|
.unwrap()
|
||||||
|
.into_iter()
|
||||||
|
.map(|l| Grammar {
|
||||||
|
name: name.clone(),
|
||||||
|
camelcase: Some(name.to_upper_camel_case()),
|
||||||
|
scope: l.scope.unwrap_or_else(|| format!("source.{name}")),
|
||||||
|
path: Some(l.path),
|
||||||
|
external_files: l.external_files,
|
||||||
|
file_types: l.file_types,
|
||||||
|
highlights: l.highlights,
|
||||||
|
injections: l.injections,
|
||||||
|
locals: l.locals,
|
||||||
|
tags: l.tags,
|
||||||
|
injection_regex: l.injection_regex,
|
||||||
|
first_line_regex: l.first_line_regex,
|
||||||
|
content_regex: l.content_regex,
|
||||||
|
})
|
||||||
|
.collect(),
|
||||||
|
metadata: Metadata {
|
||||||
|
version: old_config.version,
|
||||||
|
license: old_config
|
||||||
|
.license
|
||||||
|
.map_or_else(|| Some("MIT".to_string()), Some),
|
||||||
|
description: old_config
|
||||||
|
.description
|
||||||
|
.map_or_else(|| Some(format!("{name} grammar for tree-sitter")), Some),
|
||||||
|
authors: {
|
||||||
|
let authors = old_config
|
||||||
|
.author
|
||||||
|
.map_or_else(|| vec![].into_iter(), |a| vec![a].into_iter())
|
||||||
|
.chain(old_config.maintainers.unwrap_or_default())
|
||||||
|
.filter_map(|a| match a {
|
||||||
|
PackageJSONAuthor::String(s) => {
|
||||||
|
let mut name = s.trim().to_string();
|
||||||
|
if name.is_empty() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut email = None;
|
||||||
|
let mut url = None;
|
||||||
|
|
||||||
|
if let Some(url_start) = name.rfind('(') {
|
||||||
|
if let Some(url_end) = name.rfind(')') {
|
||||||
|
url = Some(name[url_start + 1..url_end].trim().to_string());
|
||||||
|
name = name[..url_start].trim().to_string();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(email_start) = name.rfind('<') {
|
||||||
|
if let Some(email_end) = name.rfind('>') {
|
||||||
|
email =
|
||||||
|
Some(name[email_start + 1..email_end].trim().to_string());
|
||||||
|
name = name[..email_start].trim().to_string();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(Author { name, email, url })
|
||||||
|
}
|
||||||
|
PackageJSONAuthor::Object { name, email, url } => {
|
||||||
|
if name.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(Author { name, email, url })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
if authors.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(authors)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
links: Some(Links {
|
||||||
|
repository: old_config
|
||||||
|
.repository
|
||||||
|
.map(|r| match r {
|
||||||
|
PackageJSONRepository::String(s) => {
|
||||||
|
if let Some(stripped) = s.strip_prefix("github:") {
|
||||||
|
Url::parse(&format!("https://github.com/{stripped}"))
|
||||||
|
} else if Regex::new(r"^[\w.-]+/[\w.-]+$").unwrap().is_match(&s) {
|
||||||
|
Url::parse(&format!("https://github.com/{s}"))
|
||||||
|
} else if let Some(stripped) = s.strip_prefix("gitlab:") {
|
||||||
|
Url::parse(&format!("https://gitlab.com/{stripped}"))
|
||||||
|
} else if let Some(stripped) = s.strip_prefix("bitbucket:") {
|
||||||
|
Url::parse(&format!("https://bitbucket.org/{stripped}"))
|
||||||
|
} else {
|
||||||
|
Url::parse(&s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
PackageJSONRepository::Object { url, .. } => Url::parse(&url),
|
||||||
|
})
|
||||||
|
.transpose()?
|
||||||
|
.unwrap_or_else(|| {
|
||||||
|
Url::parse(&format!(
|
||||||
|
"https://github.com/tree-sitter/tree-sitter-{name}"
|
||||||
|
))
|
||||||
|
.expect("Failed to parse default repository URL")
|
||||||
|
}),
|
||||||
|
homepage: None,
|
||||||
|
}),
|
||||||
|
namespace: None,
|
||||||
|
},
|
||||||
|
bindings: Bindings::default(),
|
||||||
|
};
|
||||||
|
|
||||||
|
write_file(
|
||||||
|
&tree_sitter_json_path,
|
||||||
|
serde_json::to_string_pretty(&new_config)? + "\n",
|
||||||
|
)?;
|
||||||
|
|
||||||
|
// Remove the `tree-sitter` field in-place
|
||||||
|
let mut package_json = serde_json::from_str::<Map<String, Value>>(
|
||||||
|
&fs::read_to_string(&package_json_path)
|
||||||
|
.with_context(|| format!("Failed to read package.json in {}", root_path.display()))?,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
package_json.remove("tree-sitter");
|
||||||
|
write_file(
|
||||||
|
&root_path.join("package.json"),
|
||||||
|
serde_json::to_string_pretty(&package_json)? + "\n",
|
||||||
|
)?;
|
||||||
|
|
||||||
|
println!("Warning: your package.json's `tree-sitter` field has been automatically migrated to the new `tree-sitter.json` config file");
|
||||||
|
println!(
|
||||||
|
"For more information, visit https://tree-sitter.github.io/tree-sitter/creating-parsers"
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn generate_grammar_files(
|
||||||
|
repo_path: &Path,
|
||||||
|
language_name: &str,
|
||||||
|
allow_update: bool,
|
||||||
|
opts: Option<&JsonConfigOpts>,
|
||||||
|
) -> Result<()> {
|
||||||
|
let dashed_language_name = language_name.to_kebab_case();
|
||||||
|
|
||||||
|
let tree_sitter_config = missing_path_else(
|
||||||
|
repo_path.join("tree-sitter.json"),
|
||||||
|
true,
|
||||||
|
|path| {
|
||||||
|
// invariant: opts is always Some when `tree-sitter.json` doesn't exist
|
||||||
|
let Some(opts) = opts else { unreachable!() };
|
||||||
|
|
||||||
|
let tree_sitter_json = opts.clone().to_tree_sitter_json();
|
||||||
|
write_file(path, serde_json::to_string_pretty(&tree_sitter_json)?)
|
||||||
|
},
|
||||||
|
|path| {
|
||||||
|
// updating the config, if needed
|
||||||
|
if let Some(opts) = opts {
|
||||||
|
let tree_sitter_json = opts.clone().to_tree_sitter_json();
|
||||||
|
write_file(path, serde_json::to_string_pretty(&tree_sitter_json)?)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
|
||||||
|
let tree_sitter_config = serde_json::from_str::<TreeSitterJSON>(
|
||||||
|
&fs::read_to_string(tree_sitter_config.as_path())
|
||||||
|
.with_context(|| "Failed to read tree-sitter.json")?,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
let authors = tree_sitter_config.metadata.authors.as_ref();
|
||||||
|
let camel_name = tree_sitter_config.grammars[0]
|
||||||
|
.camelcase
|
||||||
|
.clone()
|
||||||
|
.unwrap_or_else(|| language_name.to_upper_camel_case());
|
||||||
|
|
||||||
|
let generate_opts = GenerateOpts {
|
||||||
|
author_name: authors
|
||||||
|
.map(|a| a.first().map(|a| a.name.as_str()))
|
||||||
|
.unwrap_or_default(),
|
||||||
|
author_email: authors
|
||||||
|
.map(|a| a.first().and_then(|a| a.email.as_deref()))
|
||||||
|
.unwrap_or_default(),
|
||||||
|
author_url: authors
|
||||||
|
.map(|a| a.first().and_then(|a| a.url.as_deref()))
|
||||||
|
.unwrap_or_default(),
|
||||||
|
license: tree_sitter_config.metadata.license.as_deref(),
|
||||||
|
description: tree_sitter_config.metadata.description.as_deref(),
|
||||||
|
repository: tree_sitter_config
|
||||||
|
.metadata
|
||||||
|
.links
|
||||||
|
.as_ref()
|
||||||
|
.map(|l| l.repository.as_str()),
|
||||||
|
version: &tree_sitter_config.metadata.version,
|
||||||
|
camel_parser_name: &camel_name,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Create package.json
|
||||||
|
missing_path(repo_path.join("package.json"), |path| {
|
||||||
|
generate_file(
|
||||||
|
path,
|
||||||
|
PACKAGE_JSON_TEMPLATE,
|
||||||
|
dashed_language_name.as_str(),
|
||||||
|
&generate_opts,
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
// Do not create a grammar.js file in a repo with multiple language configs
|
||||||
|
if !tree_sitter_config.has_multiple_language_configs() {
|
||||||
|
missing_path(repo_path.join("grammar.js"), |path| {
|
||||||
|
generate_file(path, GRAMMAR_JS_TEMPLATE, language_name, &generate_opts)
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write .gitignore file
|
||||||
|
missing_path(repo_path.join(".gitignore"), |path| {
|
||||||
|
generate_file(path, GITIGNORE_TEMPLATE, language_name, &generate_opts)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
// Write .gitattributes file
|
||||||
|
missing_path(repo_path.join(".gitattributes"), |path| {
|
||||||
|
generate_file(path, GITATTRIBUTES_TEMPLATE, language_name, &generate_opts)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
// Write .editorconfig file
|
||||||
|
missing_path(repo_path.join(".editorconfig"), |path| {
|
||||||
|
generate_file(path, EDITORCONFIG_TEMPLATE, language_name, &generate_opts)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let bindings_dir = repo_path.join("bindings");
|
||||||
|
|
||||||
|
// Generate Rust bindings
|
||||||
|
if tree_sitter_config.bindings.rust {
|
||||||
|
missing_path(bindings_dir.join("rust"), create_dir)?.apply(|path| {
|
||||||
|
missing_path(path.join("lib.rs"), |path| {
|
||||||
|
generate_file(path, LIB_RS_TEMPLATE, language_name, &generate_opts)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
missing_path(path.join("build.rs"), |path| {
|
||||||
|
generate_file(path, BUILD_RS_TEMPLATE, language_name, &generate_opts)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
missing_path(repo_path.join("Cargo.toml"), |path| {
|
||||||
|
generate_file(
|
||||||
|
path,
|
||||||
|
CARGO_TOML_TEMPLATE,
|
||||||
|
dashed_language_name.as_str(),
|
||||||
|
&generate_opts,
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate Node bindings
|
||||||
|
if tree_sitter_config.bindings.node {
|
||||||
|
missing_path(bindings_dir.join("node"), create_dir)?.apply(|path| {
|
||||||
|
missing_path_else(
|
||||||
|
path.join("index.js"),
|
||||||
|
allow_update,
|
||||||
|
|path| generate_file(path, INDEX_JS_TEMPLATE, language_name, &generate_opts),
|
||||||
|
|path| {
|
||||||
|
let contents = fs::read_to_string(path)?;
|
||||||
|
if !contents.contains("bun") {
|
||||||
|
generate_file(path, INDEX_JS_TEMPLATE, language_name, &generate_opts)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
|
||||||
|
missing_path(path.join("index.d.ts"), |path| {
|
||||||
|
generate_file(path, INDEX_D_TS_TEMPLATE, language_name, &generate_opts)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
missing_path(path.join("binding_test.js"), |path| {
|
||||||
|
generate_file(
|
||||||
|
path,
|
||||||
|
BINDING_TEST_JS_TEMPLATE,
|
||||||
|
language_name,
|
||||||
|
&generate_opts,
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
missing_path(path.join("binding.cc"), |path| {
|
||||||
|
generate_file(path, JS_BINDING_CC_TEMPLATE, language_name, &generate_opts)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
missing_path(repo_path.join("binding.gyp"), |path| {
|
||||||
|
generate_file(path, BINDING_GYP_TEMPLATE, language_name, &generate_opts)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate C bindings
|
||||||
|
if tree_sitter_config.bindings.c {
|
||||||
|
missing_path(bindings_dir.join("c"), create_dir)?.apply(|path| {
|
||||||
|
missing_path(
|
||||||
|
path.join(format!("tree-sitter-{language_name}.h")),
|
||||||
|
|path| generate_file(path, PARSER_NAME_H_TEMPLATE, language_name, &generate_opts),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
missing_path(
|
||||||
|
path.join(format!("tree-sitter-{language_name}.pc.in")),
|
||||||
|
|path| {
|
||||||
|
generate_file(
|
||||||
|
path,
|
||||||
|
PARSER_NAME_PC_IN_TEMPLATE,
|
||||||
|
language_name,
|
||||||
|
&generate_opts,
|
||||||
|
)
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
|
||||||
|
missing_path(repo_path.join("Makefile"), |path| {
|
||||||
|
generate_file(path, MAKEFILE_TEMPLATE, language_name, &generate_opts)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
missing_path_else(
|
||||||
|
repo_path.join("CMakeLists.txt"),
|
||||||
|
allow_update,
|
||||||
|
|path| generate_file(path, CMAKELISTS_TXT_TEMPLATE, language_name, &generate_opts),
|
||||||
|
|path| {
|
||||||
|
let contents = fs::read_to_string(path)?;
|
||||||
|
let old = "add_custom_target(test";
|
||||||
|
if contents.contains(old) {
|
||||||
|
write_file(path, contents.replace(old, "add_custom_target(ts-test"))
|
||||||
|
} else {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate Go bindings
|
||||||
|
if tree_sitter_config.bindings.go {
|
||||||
|
missing_path(bindings_dir.join("go"), create_dir)?.apply(|path| {
|
||||||
|
missing_path(path.join("binding.go"), |path| {
|
||||||
|
generate_file(path, BINDING_GO_TEMPLATE, language_name, &generate_opts)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
missing_path(path.join("binding_test.go"), |path| {
|
||||||
|
generate_file(
|
||||||
|
path,
|
||||||
|
BINDING_TEST_GO_TEMPLATE,
|
||||||
|
language_name,
|
||||||
|
&generate_opts,
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
missing_path(repo_path.join("go.mod"), |path| {
|
||||||
|
generate_file(path, GO_MOD_TEMPLATE, language_name, &generate_opts)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate Python bindings
|
||||||
|
if tree_sitter_config.bindings.python {
|
||||||
|
missing_path(bindings_dir.join("python"), create_dir)?.apply(|path| {
|
||||||
|
let lang_path = path.join(format!("tree_sitter_{}", language_name.to_snake_case()));
|
||||||
|
missing_path(&lang_path, create_dir)?;
|
||||||
|
|
||||||
|
missing_path(lang_path.join("binding.c"), |path| {
|
||||||
|
generate_file(path, PY_BINDING_C_TEMPLATE, language_name, &generate_opts)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
missing_path(lang_path.join("__init__.py"), |path| {
|
||||||
|
generate_file(path, INIT_PY_TEMPLATE, language_name, &generate_opts)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
missing_path(lang_path.join("__init__.pyi"), |path| {
|
||||||
|
generate_file(path, INIT_PYI_TEMPLATE, language_name, &generate_opts)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
missing_path(lang_path.join("py.typed"), |path| {
|
||||||
|
generate_file(path, "", language_name, &generate_opts) // py.typed is empty
|
||||||
|
})?;
|
||||||
|
|
||||||
|
missing_path(path.join("tests"), create_dir)?.apply(|path| {
|
||||||
|
missing_path(path.join("test_binding.py"), |path| {
|
||||||
|
generate_file(
|
||||||
|
path,
|
||||||
|
TEST_BINDING_PY_TEMPLATE,
|
||||||
|
language_name,
|
||||||
|
&generate_opts,
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
|
})?;
|
||||||
|
|
||||||
|
missing_path(repo_path.join("setup.py"), |path| {
|
||||||
|
generate_file(path, SETUP_PY_TEMPLATE, language_name, &generate_opts)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
missing_path(repo_path.join("pyproject.toml"), |path| {
|
||||||
|
generate_file(
|
||||||
|
path,
|
||||||
|
PYPROJECT_TOML_TEMPLATE,
|
||||||
|
dashed_language_name.as_str(),
|
||||||
|
&generate_opts,
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate Swift bindings
|
||||||
|
if tree_sitter_config.bindings.swift {
|
||||||
|
missing_path(bindings_dir.join("swift"), create_dir)?.apply(|path| {
|
||||||
|
let lang_path = path.join(format!("TreeSitter{camel_name}",));
|
||||||
|
missing_path(&lang_path, create_dir)?;
|
||||||
|
|
||||||
|
missing_path(lang_path.join(format!("{language_name}.h")), |path| {
|
||||||
|
generate_file(path, PARSER_NAME_H_TEMPLATE, language_name, &generate_opts)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
missing_path(
|
||||||
|
path.join(format!("TreeSitter{camel_name}Tests",)),
|
||||||
|
create_dir,
|
||||||
|
)?
|
||||||
|
.apply(|path| {
|
||||||
|
missing_path(
|
||||||
|
path.join(format!("TreeSitter{camel_name}Tests.swift")),
|
||||||
|
|path| generate_file(path, TESTS_SWIFT_TEMPLATE, language_name, &generate_opts),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
})?;
|
||||||
|
|
||||||
|
missing_path(repo_path.join("Package.swift"), |path| {
|
||||||
|
generate_file(path, PACKAGE_SWIFT_TEMPLATE, language_name, &generate_opts)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_root_path(path: &Path) -> Result<PathBuf> {
|
||||||
|
let mut pathbuf = path.to_owned();
|
||||||
|
let filename = path.file_name().unwrap().to_str().unwrap();
|
||||||
|
let is_package_json = filename == "package.json";
|
||||||
|
loop {
|
||||||
|
let json = pathbuf
|
||||||
|
.exists()
|
||||||
|
.then(|| {
|
||||||
|
let contents = fs::read_to_string(pathbuf.as_path())
|
||||||
|
.with_context(|| format!("Failed to read {filename}"))?;
|
||||||
|
if is_package_json {
|
||||||
|
serde_json::from_str::<Map<String, Value>>(&contents)
|
||||||
|
.context(format!("Failed to parse {filename}"))
|
||||||
|
.map(|v| v.contains_key("tree-sitter"))
|
||||||
|
} else {
|
||||||
|
serde_json::from_str::<TreeSitterJSON>(&contents)
|
||||||
|
.context(format!("Failed to parse {filename}"))
|
||||||
|
.map(|_| true)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.transpose()?;
|
||||||
|
if json == Some(true) {
|
||||||
|
return Ok(pathbuf.parent().unwrap().to_path_buf());
|
||||||
|
}
|
||||||
|
pathbuf.pop(); // filename
|
||||||
|
if !pathbuf.pop() {
|
||||||
|
return Err(anyhow!(format!(
|
||||||
|
concat!(
|
||||||
|
"Failed to locate a {} file,",
|
||||||
|
" please ensure you have one, and if you don't then consult the docs",
|
||||||
|
),
|
||||||
|
filename
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
pathbuf.push(filename);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn generate_file(
|
||||||
|
path: &Path,
|
||||||
|
template: &str,
|
||||||
|
language_name: &str,
|
||||||
|
generate_opts: &GenerateOpts,
|
||||||
|
) -> Result<()> {
|
||||||
|
let filename = path.file_name().unwrap().to_str().unwrap();
|
||||||
|
|
||||||
|
let mut replacement = template
|
||||||
|
.replace(
|
||||||
|
CAMEL_PARSER_NAME_PLACEHOLDER,
|
||||||
|
generate_opts.camel_parser_name,
|
||||||
|
)
|
||||||
|
.replace(
|
||||||
|
UPPER_PARSER_NAME_PLACEHOLDER,
|
||||||
|
&language_name.to_shouty_snake_case(),
|
||||||
|
)
|
||||||
|
.replace(
|
||||||
|
LOWER_PARSER_NAME_PLACEHOLDER,
|
||||||
|
&language_name.to_snake_case(),
|
||||||
|
)
|
||||||
|
.replace(PARSER_NAME_PLACEHOLDER, language_name)
|
||||||
|
.replace(CLI_VERSION_PLACEHOLDER, CLI_VERSION)
|
||||||
|
.replace(RUST_BINDING_VERSION_PLACEHOLDER, RUST_BINDING_VERSION)
|
||||||
|
.replace(ABI_VERSION_MAX_PLACEHOLDER, &ABI_VERSION_MAX.to_string())
|
||||||
|
.replace(
|
||||||
|
PARSER_VERSION_PLACEHOLDER,
|
||||||
|
&generate_opts.version.to_string(),
|
||||||
|
);
|
||||||
|
|
||||||
|
if let Some(name) = generate_opts.author_name {
|
||||||
|
replacement = replacement.replace(AUTHOR_NAME_PLACEHOLDER, name);
|
||||||
|
} else {
|
||||||
|
match filename {
|
||||||
|
"package.json" => {
|
||||||
|
replacement = replacement.replace(AUTHOR_NAME_PLACEHOLDER_JS, "");
|
||||||
|
}
|
||||||
|
"pyproject.toml" => {
|
||||||
|
replacement = replacement.replace(AUTHOR_NAME_PLACEHOLDER_PY, "");
|
||||||
|
}
|
||||||
|
"grammar.js" => {
|
||||||
|
replacement = replacement.replace(AUTHOR_NAME_PLACEHOLDER_GRAMMAR, "");
|
||||||
|
}
|
||||||
|
"Cargo.toml" => {
|
||||||
|
replacement = replacement.replace(AUTHOR_NAME_PLACEHOLDER_RS, "");
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(email) = generate_opts.author_email {
|
||||||
|
replacement = match filename {
|
||||||
|
"Cargo.toml" | "grammar.js" => {
|
||||||
|
replacement.replace(AUTHOR_EMAIL_PLACEHOLDER, &format!("<{email}>"))
|
||||||
|
}
|
||||||
|
_ => replacement.replace(AUTHOR_EMAIL_PLACEHOLDER, email),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
match filename {
|
||||||
|
"package.json" => {
|
||||||
|
replacement = replacement.replace(AUTHOR_EMAIL_PLACEHOLDER_JS, "");
|
||||||
|
}
|
||||||
|
"pyproject.toml" => {
|
||||||
|
replacement = replacement.replace(AUTHOR_EMAIL_PLACEHOLDER_PY, "");
|
||||||
|
}
|
||||||
|
"grammar.js" => {
|
||||||
|
replacement = replacement.replace(AUTHOR_EMAIL_PLACEHOLDER_GRAMMAR, "");
|
||||||
|
}
|
||||||
|
"Cargo.toml" => {
|
||||||
|
replacement = replacement.replace(AUTHOR_EMAIL_PLACEHOLDER_RS, "");
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if filename == "package.json" {
|
||||||
|
if let Some(url) = generate_opts.author_url {
|
||||||
|
replacement = replacement.replace(AUTHOR_URL_PLACEHOLDER, url);
|
||||||
|
} else {
|
||||||
|
replacement = replacement.replace(AUTHOR_URL_PLACEHOLDER_JS, "");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if generate_opts.author_name.is_none()
|
||||||
|
&& generate_opts.author_email.is_none()
|
||||||
|
&& generate_opts.author_url.is_none()
|
||||||
|
&& filename == "package.json"
|
||||||
|
{
|
||||||
|
if let Some(start_idx) = replacement.find(AUTHOR_BLOCK_JS) {
|
||||||
|
if let Some(end_idx) = replacement[start_idx..]
|
||||||
|
.find("},")
|
||||||
|
.map(|i| i + start_idx + 2)
|
||||||
|
{
|
||||||
|
replacement.replace_range(start_idx..end_idx, "");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if generate_opts.author_name.is_none() && generate_opts.author_email.is_none() {
|
||||||
|
match filename {
|
||||||
|
"pyproject.toml" => {
|
||||||
|
if let Some(start_idx) = replacement.find(AUTHOR_BLOCK_PY) {
|
||||||
|
if let Some(end_idx) = replacement[start_idx..]
|
||||||
|
.find("}]")
|
||||||
|
.map(|i| i + start_idx + 2)
|
||||||
|
{
|
||||||
|
replacement.replace_range(start_idx..end_idx, "");
|
||||||
|
} else {
|
||||||
|
println!("none 2");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
println!("none 1");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"grammar.js" => {
|
||||||
|
if let Some(start_idx) = replacement.find(AUTHOR_BLOCK_GRAMMAR) {
|
||||||
|
if let Some(end_idx) = replacement[start_idx..]
|
||||||
|
.find(" \n")
|
||||||
|
.map(|i| i + start_idx + 1)
|
||||||
|
{
|
||||||
|
replacement.replace_range(start_idx..end_idx, "");
|
||||||
|
} else {
|
||||||
|
println!("none 2");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
println!("none 1");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"Cargo.toml" => {
|
||||||
|
if let Some(start_idx) = replacement.find(AUTHOR_BLOCK_RS) {
|
||||||
|
if let Some(end_idx) = replacement[start_idx..]
|
||||||
|
.find("\"]")
|
||||||
|
.map(|i| i + start_idx + 2)
|
||||||
|
{
|
||||||
|
replacement.replace_range(start_idx..end_idx, "");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
match generate_opts.license {
|
||||||
|
Some(license) => replacement = replacement.replace(PARSER_LICENSE_PLACEHOLDER, license),
|
||||||
|
_ => replacement = replacement.replace(PARSER_LICENSE_PLACEHOLDER, "MIT"),
|
||||||
|
}
|
||||||
|
|
||||||
|
match generate_opts.description {
|
||||||
|
Some(description) => {
|
||||||
|
replacement = replacement.replace(PARSER_DESCRIPTION_PLACEHOLDER, description);
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
replacement = replacement.replace(
|
||||||
|
PARSER_DESCRIPTION_PLACEHOLDER,
|
||||||
|
&format!(
|
||||||
|
"{} grammar for tree-sitter",
|
||||||
|
generate_opts.camel_parser_name,
|
||||||
|
),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
match generate_opts.repository {
|
||||||
|
Some(repository) => {
|
||||||
|
replacement = replacement
|
||||||
|
.replace(
|
||||||
|
PARSER_URL_STRIPPED_PLACEHOLDER,
|
||||||
|
&repository.replace("https://", "").to_lowercase(),
|
||||||
|
)
|
||||||
|
.replace(PARSER_URL_PLACEHOLDER, &repository.to_lowercase());
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
replacement = replacement
|
||||||
|
.replace(
|
||||||
|
PARSER_URL_STRIPPED_PLACEHOLDER,
|
||||||
|
&format!(
|
||||||
|
"github.com/tree-sitter/tree-sitter-{}",
|
||||||
|
language_name.to_lowercase()
|
||||||
|
),
|
||||||
|
)
|
||||||
|
.replace(
|
||||||
|
PARSER_URL_PLACEHOLDER,
|
||||||
|
&format!(
|
||||||
|
"https://github.com/tree-sitter/tree-sitter-{}",
|
||||||
|
language_name.to_lowercase()
|
||||||
|
),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
write_file(path, replacement)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn create_dir(path: &Path) -> Result<()> {
|
||||||
|
fs::create_dir_all(path)
|
||||||
|
.with_context(|| format!("Failed to create {:?}", path.to_string_lossy()))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(PartialEq, Eq, Debug)]
|
||||||
|
enum PathState<P>
|
||||||
|
where
|
||||||
|
P: AsRef<Path>,
|
||||||
|
{
|
||||||
|
Exists(P),
|
||||||
|
Missing(P),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
|
impl<P> PathState<P>
|
||||||
|
where
|
||||||
|
P: AsRef<Path>,
|
||||||
|
{
|
||||||
|
fn exists(&self, mut action: impl FnMut(&Path) -> Result<()>) -> Result<&Self> {
|
||||||
|
if let Self::Exists(path) = self {
|
||||||
|
action(path.as_ref())?;
|
||||||
|
}
|
||||||
|
Ok(self)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn missing(&self, mut action: impl FnMut(&Path) -> Result<()>) -> Result<&Self> {
|
||||||
|
if let Self::Missing(path) = self {
|
||||||
|
action(path.as_ref())?;
|
||||||
|
}
|
||||||
|
Ok(self)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn apply(&self, mut action: impl FnMut(&Path) -> Result<()>) -> Result<&Self> {
|
||||||
|
action(self.as_path())?;
|
||||||
|
Ok(self)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn apply_state(&self, mut action: impl FnMut(&Self) -> Result<()>) -> Result<&Self> {
|
||||||
|
action(self)?;
|
||||||
|
Ok(self)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_path(&self) -> &Path {
|
||||||
|
match self {
|
||||||
|
Self::Exists(path) | Self::Missing(path) => path.as_ref(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn missing_path<P, F>(path: P, mut action: F) -> Result<PathState<P>>
|
||||||
|
where
|
||||||
|
P: AsRef<Path>,
|
||||||
|
F: FnMut(&Path) -> Result<()>,
|
||||||
|
{
|
||||||
|
let path_ref = path.as_ref();
|
||||||
|
if !path_ref.exists() {
|
||||||
|
action(path_ref)?;
|
||||||
|
Ok(PathState::Missing(path))
|
||||||
|
} else {
|
||||||
|
Ok(PathState::Exists(path))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn missing_path_else<P, T, F>(
|
||||||
|
path: P,
|
||||||
|
allow_update: bool,
|
||||||
|
mut action: T,
|
||||||
|
mut else_action: F,
|
||||||
|
) -> Result<PathState<P>>
|
||||||
|
where
|
||||||
|
P: AsRef<Path>,
|
||||||
|
T: FnMut(&Path) -> Result<()>,
|
||||||
|
F: FnMut(&Path) -> Result<()>,
|
||||||
|
{
|
||||||
|
let path_ref = path.as_ref();
|
||||||
|
if !path_ref.exists() {
|
||||||
|
action(path_ref)?;
|
||||||
|
Ok(PathState::Missing(path))
|
||||||
|
} else {
|
||||||
|
if allow_update {
|
||||||
|
else_action(path_ref)?;
|
||||||
|
}
|
||||||
|
Ok(PathState::Exists(path))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,9 +1,8 @@
|
||||||
#![cfg_attr(not(any(test, doctest)), doc = include_str!("../README.md"))]
|
#![doc = include_str!("../README.md")]
|
||||||
|
|
||||||
pub mod fuzz;
|
pub mod fuzz;
|
||||||
pub mod highlight;
|
pub mod highlight;
|
||||||
pub mod init;
|
pub mod init;
|
||||||
pub mod input;
|
|
||||||
pub mod logger;
|
pub mod logger;
|
||||||
pub mod parse;
|
pub mod parse;
|
||||||
pub mod playground;
|
pub mod playground;
|
||||||
|
|
@ -20,5 +19,6 @@ pub mod wasm;
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests;
|
mod tests;
|
||||||
|
|
||||||
|
// To run compile fail tests
|
||||||
#[cfg(doctest)]
|
#[cfg(doctest)]
|
||||||
mod tests;
|
mod tests;
|
||||||
30
cli/src/logger.rs
Normal file
30
cli/src/logger.rs
Normal file
|
|
@ -0,0 +1,30 @@
|
||||||
|
use log::{LevelFilter, Log, Metadata, Record};
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
|
struct Logger {
|
||||||
|
pub filter: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Log for Logger {
|
||||||
|
fn enabled(&self, _: &Metadata) -> bool {
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
fn log(&self, record: &Record) {
|
||||||
|
eprintln!(
|
||||||
|
"[{}] {}",
|
||||||
|
record
|
||||||
|
.module_path()
|
||||||
|
.unwrap_or_default()
|
||||||
|
.trim_start_matches("rust_tree_sitter_cli::"),
|
||||||
|
record.args()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn flush(&self) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn init() {
|
||||||
|
log::set_boxed_logger(Box::new(Logger { filter: None })).unwrap();
|
||||||
|
log::set_max_level(LevelFilter::Info);
|
||||||
|
}
|
||||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue